annotate src/clojure/set.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
rev   line source
rlm@10 1 ; Copyright (c) Rich Hickey. All rights reserved.
rlm@10 2 ; The use and distribution terms for this software are covered by the
rlm@10 3 ; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
rlm@10 4 ; which can be found in the file epl-v10.html at the root of this distribution.
rlm@10 5 ; By using this software in any fashion, you are agreeing to be bound by
rlm@10 6 ; the terms of this license.
rlm@10 7 ; You must not remove this notice, or any other, from this software.
rlm@10 8
rlm@10 9 (ns ^{:doc "Set operations such as union/intersection."
rlm@10 10 :author "Rich Hickey"}
rlm@10 11 clojure.set)
rlm@10 12
rlm@10 13 (defn- bubble-max-key [k coll]
rlm@10 14 "Move a maximal element of coll according to fn k (which returns a number)
rlm@10 15 to the front of coll."
rlm@10 16 (let [max (apply max-key k coll)]
rlm@10 17 (cons max (remove #(identical? max %) coll))))
rlm@10 18
rlm@10 19 (defn union
rlm@10 20 "Return a set that is the union of the input sets"
rlm@10 21 {:added "1.0"}
rlm@10 22 ([] #{})
rlm@10 23 ([s1] s1)
rlm@10 24 ([s1 s2]
rlm@10 25 (if (< (count s1) (count s2))
rlm@10 26 (reduce conj s2 s1)
rlm@10 27 (reduce conj s1 s2)))
rlm@10 28 ([s1 s2 & sets]
rlm@10 29 (let [bubbled-sets (bubble-max-key count (conj sets s2 s1))]
rlm@10 30 (reduce into (first bubbled-sets) (rest bubbled-sets)))))
rlm@10 31
rlm@10 32 (defn intersection
rlm@10 33 "Return a set that is the intersection of the input sets"
rlm@10 34 {:added "1.0"}
rlm@10 35 ([s1] s1)
rlm@10 36 ([s1 s2]
rlm@10 37 (if (< (count s2) (count s1))
rlm@10 38 (recur s2 s1)
rlm@10 39 (reduce (fn [result item]
rlm@10 40 (if (contains? s2 item)
rlm@10 41 result
rlm@10 42 (disj result item)))
rlm@10 43 s1 s1)))
rlm@10 44 ([s1 s2 & sets]
rlm@10 45 (let [bubbled-sets (bubble-max-key #(- (count %)) (conj sets s2 s1))]
rlm@10 46 (reduce intersection (first bubbled-sets) (rest bubbled-sets)))))
rlm@10 47
rlm@10 48 (defn difference
rlm@10 49 "Return a set that is the first set without elements of the remaining sets"
rlm@10 50 {:added "1.0"}
rlm@10 51 ([s1] s1)
rlm@10 52 ([s1 s2]
rlm@10 53 (if (< (count s1) (count s2))
rlm@10 54 (reduce (fn [result item]
rlm@10 55 (if (contains? s2 item)
rlm@10 56 (disj result item)
rlm@10 57 result))
rlm@10 58 s1 s1)
rlm@10 59 (reduce disj s1 s2)))
rlm@10 60 ([s1 s2 & sets]
rlm@10 61 (reduce difference s1 (conj sets s2))))
rlm@10 62
rlm@10 63
rlm@10 64 (defn select
rlm@10 65 "Returns a set of the elements for which pred is true"
rlm@10 66 {:added "1.0"}
rlm@10 67 [pred xset]
rlm@10 68 (reduce (fn [s k] (if (pred k) s (disj s k)))
rlm@10 69 xset xset))
rlm@10 70
rlm@10 71 (defn project
rlm@10 72 "Returns a rel of the elements of xrel with only the keys in ks"
rlm@10 73 {:added "1.0"}
rlm@10 74 [xrel ks]
rlm@10 75 (set (map #(select-keys % ks) xrel)))
rlm@10 76
rlm@10 77 (defn rename-keys
rlm@10 78 "Returns the map with the keys in kmap renamed to the vals in kmap"
rlm@10 79 {:added "1.0"}
rlm@10 80 [map kmap]
rlm@10 81 (reduce
rlm@10 82 (fn [m [old new]]
rlm@10 83 (if (and (not= old new)
rlm@10 84 (contains? m old))
rlm@10 85 (-> m (assoc new (get m old)) (dissoc old))
rlm@10 86 m))
rlm@10 87 map kmap))
rlm@10 88
rlm@10 89 (defn rename
rlm@10 90 "Returns a rel of the maps in xrel with the keys in kmap renamed to the vals in kmap"
rlm@10 91 {:added "1.0"}
rlm@10 92 [xrel kmap]
rlm@10 93 (set (map #(rename-keys % kmap) xrel)))
rlm@10 94
rlm@10 95 (defn index
rlm@10 96 "Returns a map of the distinct values of ks in the xrel mapped to a
rlm@10 97 set of the maps in xrel with the corresponding values of ks."
rlm@10 98 {:added "1.0"}
rlm@10 99 [xrel ks]
rlm@10 100 (reduce
rlm@10 101 (fn [m x]
rlm@10 102 (let [ik (select-keys x ks)]
rlm@10 103 (assoc m ik (conj (get m ik #{}) x))))
rlm@10 104 {} xrel))
rlm@10 105
rlm@10 106 (defn map-invert
rlm@10 107 "Returns the map with the vals mapped to the keys."
rlm@10 108 {:added "1.0"}
rlm@10 109 [m] (reduce (fn [m [k v]] (assoc m v k)) {} m))
rlm@10 110
rlm@10 111 (defn join
rlm@10 112 "When passed 2 rels, returns the rel corresponding to the natural
rlm@10 113 join. When passed an additional keymap, joins on the corresponding
rlm@10 114 keys."
rlm@10 115 {:added "1.0"}
rlm@10 116 ([xrel yrel] ;natural join
rlm@10 117 (if (and (seq xrel) (seq yrel))
rlm@10 118 (let [ks (intersection (set (keys (first xrel))) (set (keys (first yrel))))
rlm@10 119 [r s] (if (<= (count xrel) (count yrel))
rlm@10 120 [xrel yrel]
rlm@10 121 [yrel xrel])
rlm@10 122 idx (index r ks)]
rlm@10 123 (reduce (fn [ret x]
rlm@10 124 (let [found (idx (select-keys x ks))]
rlm@10 125 (if found
rlm@10 126 (reduce #(conj %1 (merge %2 x)) ret found)
rlm@10 127 ret)))
rlm@10 128 #{} s))
rlm@10 129 #{}))
rlm@10 130 ([xrel yrel km] ;arbitrary key mapping
rlm@10 131 (let [[r s k] (if (<= (count xrel) (count yrel))
rlm@10 132 [xrel yrel (map-invert km)]
rlm@10 133 [yrel xrel km])
rlm@10 134 idx (index r (vals k))]
rlm@10 135 (reduce (fn [ret x]
rlm@10 136 (let [found (idx (rename-keys (select-keys x (keys k)) k))]
rlm@10 137 (if found
rlm@10 138 (reduce #(conj %1 (merge %2 x)) ret found)
rlm@10 139 ret)))
rlm@10 140 #{} s))))
rlm@10 141
rlm@10 142 (defn subset?
rlm@10 143 "Is set1 a subset of set2?"
rlm@10 144 {:added "1.2",
rlm@10 145 :tag Boolean}
rlm@10 146 [set1 set2]
rlm@10 147 (and (<= (count set1) (count set2))
rlm@10 148 (every? set2 set1)))
rlm@10 149
rlm@10 150 (defn superset?
rlm@10 151 "Is set1 a superset of set2?"
rlm@10 152 {:added "1.2",
rlm@10 153 :tag Boolean}
rlm@10 154 [set1 set2]
rlm@10 155 (and (>= (count set1) (count set2))
rlm@10 156 (every? set1 set2)))
rlm@10 157
rlm@10 158 (comment
rlm@10 159 (refer 'set)
rlm@10 160 (def xs #{{:a 11 :b 1 :c 1 :d 4}
rlm@10 161 {:a 2 :b 12 :c 2 :d 6}
rlm@10 162 {:a 3 :b 3 :c 3 :d 8 :f 42}})
rlm@10 163
rlm@10 164 (def ys #{{:a 11 :b 11 :c 11 :e 5}
rlm@10 165 {:a 12 :b 11 :c 12 :e 3}
rlm@10 166 {:a 3 :b 3 :c 3 :e 7 }})
rlm@10 167
rlm@10 168 (join xs ys)
rlm@10 169 (join xs (rename ys {:b :yb :c :yc}) {:a :a})
rlm@10 170
rlm@10 171 (union #{:a :b :c} #{:c :d :e })
rlm@10 172 (difference #{:a :b :c} #{:c :d :e})
rlm@10 173 (intersection #{:a :b :c} #{:c :d :e})
rlm@10 174
rlm@10 175 (index ys [:b])
rlm@10 176 )
rlm@10 177