rlm@10
|
1 ; Copyright (c) Rich Hickey. All rights reserved.
|
rlm@10
|
2 ; The use and distribution terms for this software are covered by the
|
rlm@10
|
3 ; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
|
rlm@10
|
4 ; which can be found in the file epl-v10.html at the root of this distribution.
|
rlm@10
|
5 ; By using this software in any fashion, you are agreeing to be bound by
|
rlm@10
|
6 ; the terms of this license.
|
rlm@10
|
7 ; You must not remove this notice, or any other, from this software.
|
rlm@10
|
8
|
rlm@10
|
9 (ns ^{:doc "Set operations such as union/intersection."
|
rlm@10
|
10 :author "Rich Hickey"}
|
rlm@10
|
11 clojure.set)
|
rlm@10
|
12
|
rlm@10
|
13 (defn- bubble-max-key [k coll]
|
rlm@10
|
14 "Move a maximal element of coll according to fn k (which returns a number)
|
rlm@10
|
15 to the front of coll."
|
rlm@10
|
16 (let [max (apply max-key k coll)]
|
rlm@10
|
17 (cons max (remove #(identical? max %) coll))))
|
rlm@10
|
18
|
rlm@10
|
19 (defn union
|
rlm@10
|
20 "Return a set that is the union of the input sets"
|
rlm@10
|
21 {:added "1.0"}
|
rlm@10
|
22 ([] #{})
|
rlm@10
|
23 ([s1] s1)
|
rlm@10
|
24 ([s1 s2]
|
rlm@10
|
25 (if (< (count s1) (count s2))
|
rlm@10
|
26 (reduce conj s2 s1)
|
rlm@10
|
27 (reduce conj s1 s2)))
|
rlm@10
|
28 ([s1 s2 & sets]
|
rlm@10
|
29 (let [bubbled-sets (bubble-max-key count (conj sets s2 s1))]
|
rlm@10
|
30 (reduce into (first bubbled-sets) (rest bubbled-sets)))))
|
rlm@10
|
31
|
rlm@10
|
32 (defn intersection
|
rlm@10
|
33 "Return a set that is the intersection of the input sets"
|
rlm@10
|
34 {:added "1.0"}
|
rlm@10
|
35 ([s1] s1)
|
rlm@10
|
36 ([s1 s2]
|
rlm@10
|
37 (if (< (count s2) (count s1))
|
rlm@10
|
38 (recur s2 s1)
|
rlm@10
|
39 (reduce (fn [result item]
|
rlm@10
|
40 (if (contains? s2 item)
|
rlm@10
|
41 result
|
rlm@10
|
42 (disj result item)))
|
rlm@10
|
43 s1 s1)))
|
rlm@10
|
44 ([s1 s2 & sets]
|
rlm@10
|
45 (let [bubbled-sets (bubble-max-key #(- (count %)) (conj sets s2 s1))]
|
rlm@10
|
46 (reduce intersection (first bubbled-sets) (rest bubbled-sets)))))
|
rlm@10
|
47
|
rlm@10
|
48 (defn difference
|
rlm@10
|
49 "Return a set that is the first set without elements of the remaining sets"
|
rlm@10
|
50 {:added "1.0"}
|
rlm@10
|
51 ([s1] s1)
|
rlm@10
|
52 ([s1 s2]
|
rlm@10
|
53 (if (< (count s1) (count s2))
|
rlm@10
|
54 (reduce (fn [result item]
|
rlm@10
|
55 (if (contains? s2 item)
|
rlm@10
|
56 (disj result item)
|
rlm@10
|
57 result))
|
rlm@10
|
58 s1 s1)
|
rlm@10
|
59 (reduce disj s1 s2)))
|
rlm@10
|
60 ([s1 s2 & sets]
|
rlm@10
|
61 (reduce difference s1 (conj sets s2))))
|
rlm@10
|
62
|
rlm@10
|
63
|
rlm@10
|
64 (defn select
|
rlm@10
|
65 "Returns a set of the elements for which pred is true"
|
rlm@10
|
66 {:added "1.0"}
|
rlm@10
|
67 [pred xset]
|
rlm@10
|
68 (reduce (fn [s k] (if (pred k) s (disj s k)))
|
rlm@10
|
69 xset xset))
|
rlm@10
|
70
|
rlm@10
|
71 (defn project
|
rlm@10
|
72 "Returns a rel of the elements of xrel with only the keys in ks"
|
rlm@10
|
73 {:added "1.0"}
|
rlm@10
|
74 [xrel ks]
|
rlm@10
|
75 (set (map #(select-keys % ks) xrel)))
|
rlm@10
|
76
|
rlm@10
|
77 (defn rename-keys
|
rlm@10
|
78 "Returns the map with the keys in kmap renamed to the vals in kmap"
|
rlm@10
|
79 {:added "1.0"}
|
rlm@10
|
80 [map kmap]
|
rlm@10
|
81 (reduce
|
rlm@10
|
82 (fn [m [old new]]
|
rlm@10
|
83 (if (and (not= old new)
|
rlm@10
|
84 (contains? m old))
|
rlm@10
|
85 (-> m (assoc new (get m old)) (dissoc old))
|
rlm@10
|
86 m))
|
rlm@10
|
87 map kmap))
|
rlm@10
|
88
|
rlm@10
|
89 (defn rename
|
rlm@10
|
90 "Returns a rel of the maps in xrel with the keys in kmap renamed to the vals in kmap"
|
rlm@10
|
91 {:added "1.0"}
|
rlm@10
|
92 [xrel kmap]
|
rlm@10
|
93 (set (map #(rename-keys % kmap) xrel)))
|
rlm@10
|
94
|
rlm@10
|
95 (defn index
|
rlm@10
|
96 "Returns a map of the distinct values of ks in the xrel mapped to a
|
rlm@10
|
97 set of the maps in xrel with the corresponding values of ks."
|
rlm@10
|
98 {:added "1.0"}
|
rlm@10
|
99 [xrel ks]
|
rlm@10
|
100 (reduce
|
rlm@10
|
101 (fn [m x]
|
rlm@10
|
102 (let [ik (select-keys x ks)]
|
rlm@10
|
103 (assoc m ik (conj (get m ik #{}) x))))
|
rlm@10
|
104 {} xrel))
|
rlm@10
|
105
|
rlm@10
|
106 (defn map-invert
|
rlm@10
|
107 "Returns the map with the vals mapped to the keys."
|
rlm@10
|
108 {:added "1.0"}
|
rlm@10
|
109 [m] (reduce (fn [m [k v]] (assoc m v k)) {} m))
|
rlm@10
|
110
|
rlm@10
|
111 (defn join
|
rlm@10
|
112 "When passed 2 rels, returns the rel corresponding to the natural
|
rlm@10
|
113 join. When passed an additional keymap, joins on the corresponding
|
rlm@10
|
114 keys."
|
rlm@10
|
115 {:added "1.0"}
|
rlm@10
|
116 ([xrel yrel] ;natural join
|
rlm@10
|
117 (if (and (seq xrel) (seq yrel))
|
rlm@10
|
118 (let [ks (intersection (set (keys (first xrel))) (set (keys (first yrel))))
|
rlm@10
|
119 [r s] (if (<= (count xrel) (count yrel))
|
rlm@10
|
120 [xrel yrel]
|
rlm@10
|
121 [yrel xrel])
|
rlm@10
|
122 idx (index r ks)]
|
rlm@10
|
123 (reduce (fn [ret x]
|
rlm@10
|
124 (let [found (idx (select-keys x ks))]
|
rlm@10
|
125 (if found
|
rlm@10
|
126 (reduce #(conj %1 (merge %2 x)) ret found)
|
rlm@10
|
127 ret)))
|
rlm@10
|
128 #{} s))
|
rlm@10
|
129 #{}))
|
rlm@10
|
130 ([xrel yrel km] ;arbitrary key mapping
|
rlm@10
|
131 (let [[r s k] (if (<= (count xrel) (count yrel))
|
rlm@10
|
132 [xrel yrel (map-invert km)]
|
rlm@10
|
133 [yrel xrel km])
|
rlm@10
|
134 idx (index r (vals k))]
|
rlm@10
|
135 (reduce (fn [ret x]
|
rlm@10
|
136 (let [found (idx (rename-keys (select-keys x (keys k)) k))]
|
rlm@10
|
137 (if found
|
rlm@10
|
138 (reduce #(conj %1 (merge %2 x)) ret found)
|
rlm@10
|
139 ret)))
|
rlm@10
|
140 #{} s))))
|
rlm@10
|
141
|
rlm@10
|
142 (defn subset?
|
rlm@10
|
143 "Is set1 a subset of set2?"
|
rlm@10
|
144 {:added "1.2",
|
rlm@10
|
145 :tag Boolean}
|
rlm@10
|
146 [set1 set2]
|
rlm@10
|
147 (and (<= (count set1) (count set2))
|
rlm@10
|
148 (every? set2 set1)))
|
rlm@10
|
149
|
rlm@10
|
150 (defn superset?
|
rlm@10
|
151 "Is set1 a superset of set2?"
|
rlm@10
|
152 {:added "1.2",
|
rlm@10
|
153 :tag Boolean}
|
rlm@10
|
154 [set1 set2]
|
rlm@10
|
155 (and (>= (count set1) (count set2))
|
rlm@10
|
156 (every? set1 set2)))
|
rlm@10
|
157
|
rlm@10
|
158 (comment
|
rlm@10
|
159 (refer 'set)
|
rlm@10
|
160 (def xs #{{:a 11 :b 1 :c 1 :d 4}
|
rlm@10
|
161 {:a 2 :b 12 :c 2 :d 6}
|
rlm@10
|
162 {:a 3 :b 3 :c 3 :d 8 :f 42}})
|
rlm@10
|
163
|
rlm@10
|
164 (def ys #{{:a 11 :b 11 :c 11 :e 5}
|
rlm@10
|
165 {:a 12 :b 11 :c 12 :e 3}
|
rlm@10
|
166 {:a 3 :b 3 :c 3 :e 7 }})
|
rlm@10
|
167
|
rlm@10
|
168 (join xs ys)
|
rlm@10
|
169 (join xs (rename ys {:b :yb :c :yc}) {:a :a})
|
rlm@10
|
170
|
rlm@10
|
171 (union #{:a :b :c} #{:c :d :e })
|
rlm@10
|
172 (difference #{:a :b :c} #{:c :d :e})
|
rlm@10
|
173 (intersection #{:a :b :c} #{:c :d :e})
|
rlm@10
|
174
|
rlm@10
|
175 (index ys [:b])
|
rlm@10
|
176 )
|
rlm@10
|
177
|