view src/clojure/contrib/zip_filter/xml.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
line wrap: on
line source
1 ; Copyright (c) Chris Houser, April 2008. All rights reserved.
2 ; The use and distribution terms for this software are covered by the
3 ; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
4 ; which can be found in the file epl-v10.html at the root of this distribution.
5 ; By using this software in any fashion, you are agreeing to be bound by
6 ; the terms of this license.
7 ; You must not remove this notice, or any other, from this software.
9 ; Specialization of zip-filter for xml trees.
11 (ns clojure.contrib.zip-filter.xml
12 (:require [clojure.contrib.zip-filter :as zf]
13 [clojure.zip :as zip]
14 [clojure.xml :as xml]))
16 (declare xml->)
18 (defn attr
19 "Returns the xml attribute named attrname, of the xml node at location loc."
20 ([attrname] (fn [loc] (attr loc attrname)))
21 ([loc attrname] (when (zip/branch? loc) (-> loc zip/node :attrs attrname))))
23 (defn attr=
24 "Returns a query predicate that matches a node when it has an
25 attribute named attrname whose value is attrval."
26 [attrname attrval] (fn [loc] (= attrval (attr loc attrname))))
28 (defn tag=
29 "Returns a query predicate that matches a node when its is a tag
30 named tagname."
31 [tagname]
32 (fn [loc]
33 (filter #(and (zip/branch? %) (= tagname ((zip/node %) :tag)))
34 (if (zf/auto? loc)
35 (zf/children-auto loc)
36 (list (zf/auto true loc))))))
38 (defn text
39 "Returns the textual contents of the given location, similar to
40 xpaths's value-of"
41 [loc]
42 (.replaceAll
43 ^String (apply str (xml-> loc zf/descendants zip/node string?))
44 (str "[\\s" (char 160) "]+") " "))
46 (defn text=
47 "Returns a query predicate that matches a node when its textual
48 content equals s."
49 [s] (fn [loc] (= (text loc) s)))
51 (defn seq-test
52 "Returns a query predicate that matches a node when its xml content
53 matches the query expresions given."
54 ^{:private true}
55 [preds] (fn [loc] (and (seq (apply xml-> loc preds)) (list loc))))
57 (defn xml->
58 "The loc is passed to the first predicate. If the predicate returns
59 a collection, each value of the collection is passed to the next
60 predicate. If it returns a location, the location is passed to the
61 next predicate. If it returns true, the input location is passed to
62 the next predicate. If it returns false or nil, the next predicate
63 is not called.
65 This process is repeated, passing the processed results of each
66 predicate to the next predicate. xml-> returns the final sequence.
67 The entire chain is evaluated lazily.
69 There are also special predicates: keywords are converted to tag=,
70 strings to text=, and vectors to sub-queries that return true if
71 they match.
73 See the footer of zip-query.clj for examples."
74 [loc & preds]
75 (zf/mapcat-chain loc preds
76 #(cond (keyword? %) (tag= %)
77 (string? %) (text= %)
78 (vector? %) (seq-test %))))
80 (defn xml1->
81 "Returns the first item from loc based on the query predicates
82 given. See xml->"
83 [loc & preds] (first (apply xml-> loc preds)))
86 ; === examples ===
88 (comment
90 (defn parse-str [s]
91 (zip/xml-zip (xml/parse (new org.xml.sax.InputSource
92 (new java.io.StringReader s)))))
94 (def atom1 (parse-str "<?xml version='1.0' encoding='UTF-8'?>
95 <feed xmlns='http://www.w3.org/2005/Atom'>
96 <id>tag:blogger.com,1999:blog-28403206</id>
97 <updated>2008-02-14T08:00:58.567-08:00</updated>
98 <title type='text'>n01senet</title>
99 <link rel='alternate' type='text/html' href='http://n01senet.blogspot.com/'/>
100 <entry>
101 <id>1</id>
102 <published>2008-02-13</published>
103 <title type='text'>clojure is the best lisp yet</title>
104 <author><name>Chouser</name></author>
105 </entry>
106 <entry>
107 <id>2</id>
108 <published>2008-02-07</published>
109 <title type='text'>experimenting with vnc</title>
110 <author><name>agriffis</name></author>
111 </entry>
112 </feed>
113 "))
115 ; simple single-function filter
116 (assert (= (xml-> atom1 #((zip/node %) :tag))
117 '(:feed)))
119 ; two-stage filter using helpful query prediates
120 (assert (= (xml-> atom1 (tag= :title) text)
121 '("n01senet")))
123 ; same filter as above, this time using keyword shortcut
124 (assert (= (xml-> atom1 :title text)
125 '("n01senet")))
127 ; multi-stage filter
128 (assert (= (xml-> atom1 :entry :author :name text)
129 '("Chouser" "agriffis")))
131 ; test xml1->
132 (assert (= (xml1-> atom1 :entry :author :name text)
133 "Chouser"))
135 ; multi-stage filter with subquery specified using a vector
136 (assert (= (xml-> atom1 :entry [:author :name (text= "agriffis")]
137 :id text)
138 '("2")))
140 ; same filter as above, this time using a string shortcut
141 (assert (= (xml-> atom1 :entry [:author :name "agriffis"] :id text)
142 '("2")))
144 ; attribute access
145 (assert (= (xml-> atom1 :title (attr :type))
146 '("text")))
148 ; attribute filtering
149 (assert (= (xml-> atom1 :link [(attr= :rel "alternate")] (attr :type))
150 '("text/html")))
152 ; ancestors
153 (assert (= (xml-> atom1 zf/descendants :id "2" zf/ancestors zip/node #(:tag %))
154 '(:id :entry :feed)))
156 ; ancestors with non-auto tag= (:entry), followed by auto tag= (:id)
157 (assert (= (xml-> atom1 zf/descendants :name "Chouser" zf/ancestors
158 :entry :id text)
159 '("1")))
161 ; left-locs and detection of returning a single loc (zip/up)
162 (assert (= (xml-> atom1 zf/descendants :name "Chouser" zip/up
163 zf/left-locs :id text)
164 '("1")))
166 ; right-locs
167 (assert (= (xml-> atom1 zf/descendants :id zf/right-locs :author text)
168 '("Chouser" "agriffis")))
170 )