rlm@10
|
1 ; Copyright (c) Chris Houser, April 2008. All rights reserved.
|
rlm@10
|
2 ; The use and distribution terms for this software are covered by the
|
rlm@10
|
3 ; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
|
rlm@10
|
4 ; which can be found in the file epl-v10.html at the root of this distribution.
|
rlm@10
|
5 ; By using this software in any fashion, you are agreeing to be bound by
|
rlm@10
|
6 ; the terms of this license.
|
rlm@10
|
7 ; You must not remove this notice, or any other, from this software.
|
rlm@10
|
8
|
rlm@10
|
9 ; Specialization of zip-filter for xml trees.
|
rlm@10
|
10
|
rlm@10
|
11 (ns clojure.contrib.zip-filter.xml
|
rlm@10
|
12 (:require [clojure.contrib.zip-filter :as zf]
|
rlm@10
|
13 [clojure.zip :as zip]
|
rlm@10
|
14 [clojure.xml :as xml]))
|
rlm@10
|
15
|
rlm@10
|
16 (declare xml->)
|
rlm@10
|
17
|
rlm@10
|
18 (defn attr
|
rlm@10
|
19 "Returns the xml attribute named attrname, of the xml node at location loc."
|
rlm@10
|
20 ([attrname] (fn [loc] (attr loc attrname)))
|
rlm@10
|
21 ([loc attrname] (when (zip/branch? loc) (-> loc zip/node :attrs attrname))))
|
rlm@10
|
22
|
rlm@10
|
23 (defn attr=
|
rlm@10
|
24 "Returns a query predicate that matches a node when it has an
|
rlm@10
|
25 attribute named attrname whose value is attrval."
|
rlm@10
|
26 [attrname attrval] (fn [loc] (= attrval (attr loc attrname))))
|
rlm@10
|
27
|
rlm@10
|
28 (defn tag=
|
rlm@10
|
29 "Returns a query predicate that matches a node when its is a tag
|
rlm@10
|
30 named tagname."
|
rlm@10
|
31 [tagname]
|
rlm@10
|
32 (fn [loc]
|
rlm@10
|
33 (filter #(and (zip/branch? %) (= tagname ((zip/node %) :tag)))
|
rlm@10
|
34 (if (zf/auto? loc)
|
rlm@10
|
35 (zf/children-auto loc)
|
rlm@10
|
36 (list (zf/auto true loc))))))
|
rlm@10
|
37
|
rlm@10
|
38 (defn text
|
rlm@10
|
39 "Returns the textual contents of the given location, similar to
|
rlm@10
|
40 xpaths's value-of"
|
rlm@10
|
41 [loc]
|
rlm@10
|
42 (.replaceAll
|
rlm@10
|
43 ^String (apply str (xml-> loc zf/descendants zip/node string?))
|
rlm@10
|
44 (str "[\\s" (char 160) "]+") " "))
|
rlm@10
|
45
|
rlm@10
|
46 (defn text=
|
rlm@10
|
47 "Returns a query predicate that matches a node when its textual
|
rlm@10
|
48 content equals s."
|
rlm@10
|
49 [s] (fn [loc] (= (text loc) s)))
|
rlm@10
|
50
|
rlm@10
|
51 (defn seq-test
|
rlm@10
|
52 "Returns a query predicate that matches a node when its xml content
|
rlm@10
|
53 matches the query expresions given."
|
rlm@10
|
54 ^{:private true}
|
rlm@10
|
55 [preds] (fn [loc] (and (seq (apply xml-> loc preds)) (list loc))))
|
rlm@10
|
56
|
rlm@10
|
57 (defn xml->
|
rlm@10
|
58 "The loc is passed to the first predicate. If the predicate returns
|
rlm@10
|
59 a collection, each value of the collection is passed to the next
|
rlm@10
|
60 predicate. If it returns a location, the location is passed to the
|
rlm@10
|
61 next predicate. If it returns true, the input location is passed to
|
rlm@10
|
62 the next predicate. If it returns false or nil, the next predicate
|
rlm@10
|
63 is not called.
|
rlm@10
|
64
|
rlm@10
|
65 This process is repeated, passing the processed results of each
|
rlm@10
|
66 predicate to the next predicate. xml-> returns the final sequence.
|
rlm@10
|
67 The entire chain is evaluated lazily.
|
rlm@10
|
68
|
rlm@10
|
69 There are also special predicates: keywords are converted to tag=,
|
rlm@10
|
70 strings to text=, and vectors to sub-queries that return true if
|
rlm@10
|
71 they match.
|
rlm@10
|
72
|
rlm@10
|
73 See the footer of zip-query.clj for examples."
|
rlm@10
|
74 [loc & preds]
|
rlm@10
|
75 (zf/mapcat-chain loc preds
|
rlm@10
|
76 #(cond (keyword? %) (tag= %)
|
rlm@10
|
77 (string? %) (text= %)
|
rlm@10
|
78 (vector? %) (seq-test %))))
|
rlm@10
|
79
|
rlm@10
|
80 (defn xml1->
|
rlm@10
|
81 "Returns the first item from loc based on the query predicates
|
rlm@10
|
82 given. See xml->"
|
rlm@10
|
83 [loc & preds] (first (apply xml-> loc preds)))
|
rlm@10
|
84
|
rlm@10
|
85
|
rlm@10
|
86 ; === examples ===
|
rlm@10
|
87
|
rlm@10
|
88 (comment
|
rlm@10
|
89
|
rlm@10
|
90 (defn parse-str [s]
|
rlm@10
|
91 (zip/xml-zip (xml/parse (new org.xml.sax.InputSource
|
rlm@10
|
92 (new java.io.StringReader s)))))
|
rlm@10
|
93
|
rlm@10
|
94 (def atom1 (parse-str "<?xml version='1.0' encoding='UTF-8'?>
|
rlm@10
|
95 <feed xmlns='http://www.w3.org/2005/Atom'>
|
rlm@10
|
96 <id>tag:blogger.com,1999:blog-28403206</id>
|
rlm@10
|
97 <updated>2008-02-14T08:00:58.567-08:00</updated>
|
rlm@10
|
98 <title type='text'>n01senet</title>
|
rlm@10
|
99 <link rel='alternate' type='text/html' href='http://n01senet.blogspot.com/'/>
|
rlm@10
|
100 <entry>
|
rlm@10
|
101 <id>1</id>
|
rlm@10
|
102 <published>2008-02-13</published>
|
rlm@10
|
103 <title type='text'>clojure is the best lisp yet</title>
|
rlm@10
|
104 <author><name>Chouser</name></author>
|
rlm@10
|
105 </entry>
|
rlm@10
|
106 <entry>
|
rlm@10
|
107 <id>2</id>
|
rlm@10
|
108 <published>2008-02-07</published>
|
rlm@10
|
109 <title type='text'>experimenting with vnc</title>
|
rlm@10
|
110 <author><name>agriffis</name></author>
|
rlm@10
|
111 </entry>
|
rlm@10
|
112 </feed>
|
rlm@10
|
113 "))
|
rlm@10
|
114
|
rlm@10
|
115 ; simple single-function filter
|
rlm@10
|
116 (assert (= (xml-> atom1 #((zip/node %) :tag))
|
rlm@10
|
117 '(:feed)))
|
rlm@10
|
118
|
rlm@10
|
119 ; two-stage filter using helpful query prediates
|
rlm@10
|
120 (assert (= (xml-> atom1 (tag= :title) text)
|
rlm@10
|
121 '("n01senet")))
|
rlm@10
|
122
|
rlm@10
|
123 ; same filter as above, this time using keyword shortcut
|
rlm@10
|
124 (assert (= (xml-> atom1 :title text)
|
rlm@10
|
125 '("n01senet")))
|
rlm@10
|
126
|
rlm@10
|
127 ; multi-stage filter
|
rlm@10
|
128 (assert (= (xml-> atom1 :entry :author :name text)
|
rlm@10
|
129 '("Chouser" "agriffis")))
|
rlm@10
|
130
|
rlm@10
|
131 ; test xml1->
|
rlm@10
|
132 (assert (= (xml1-> atom1 :entry :author :name text)
|
rlm@10
|
133 "Chouser"))
|
rlm@10
|
134
|
rlm@10
|
135 ; multi-stage filter with subquery specified using a vector
|
rlm@10
|
136 (assert (= (xml-> atom1 :entry [:author :name (text= "agriffis")]
|
rlm@10
|
137 :id text)
|
rlm@10
|
138 '("2")))
|
rlm@10
|
139
|
rlm@10
|
140 ; same filter as above, this time using a string shortcut
|
rlm@10
|
141 (assert (= (xml-> atom1 :entry [:author :name "agriffis"] :id text)
|
rlm@10
|
142 '("2")))
|
rlm@10
|
143
|
rlm@10
|
144 ; attribute access
|
rlm@10
|
145 (assert (= (xml-> atom1 :title (attr :type))
|
rlm@10
|
146 '("text")))
|
rlm@10
|
147
|
rlm@10
|
148 ; attribute filtering
|
rlm@10
|
149 (assert (= (xml-> atom1 :link [(attr= :rel "alternate")] (attr :type))
|
rlm@10
|
150 '("text/html")))
|
rlm@10
|
151
|
rlm@10
|
152 ; ancestors
|
rlm@10
|
153 (assert (= (xml-> atom1 zf/descendants :id "2" zf/ancestors zip/node #(:tag %))
|
rlm@10
|
154 '(:id :entry :feed)))
|
rlm@10
|
155
|
rlm@10
|
156 ; ancestors with non-auto tag= (:entry), followed by auto tag= (:id)
|
rlm@10
|
157 (assert (= (xml-> atom1 zf/descendants :name "Chouser" zf/ancestors
|
rlm@10
|
158 :entry :id text)
|
rlm@10
|
159 '("1")))
|
rlm@10
|
160
|
rlm@10
|
161 ; left-locs and detection of returning a single loc (zip/up)
|
rlm@10
|
162 (assert (= (xml-> atom1 zf/descendants :name "Chouser" zip/up
|
rlm@10
|
163 zf/left-locs :id text)
|
rlm@10
|
164 '("1")))
|
rlm@10
|
165
|
rlm@10
|
166 ; right-locs
|
rlm@10
|
167 (assert (= (xml-> atom1 zf/descendants :id zf/right-locs :author text)
|
rlm@10
|
168 '("Chouser" "agriffis")))
|
rlm@10
|
169
|
rlm@10
|
170 )
|