annotate src/clojure/contrib/zip_filter/xml.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
rev   line source
rlm@10 1 ; Copyright (c) Chris Houser, April 2008. All rights reserved.
rlm@10 2 ; The use and distribution terms for this software are covered by the
rlm@10 3 ; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
rlm@10 4 ; which can be found in the file epl-v10.html at the root of this distribution.
rlm@10 5 ; By using this software in any fashion, you are agreeing to be bound by
rlm@10 6 ; the terms of this license.
rlm@10 7 ; You must not remove this notice, or any other, from this software.
rlm@10 8
rlm@10 9 ; Specialization of zip-filter for xml trees.
rlm@10 10
rlm@10 11 (ns clojure.contrib.zip-filter.xml
rlm@10 12 (:require [clojure.contrib.zip-filter :as zf]
rlm@10 13 [clojure.zip :as zip]
rlm@10 14 [clojure.xml :as xml]))
rlm@10 15
rlm@10 16 (declare xml->)
rlm@10 17
rlm@10 18 (defn attr
rlm@10 19 "Returns the xml attribute named attrname, of the xml node at location loc."
rlm@10 20 ([attrname] (fn [loc] (attr loc attrname)))
rlm@10 21 ([loc attrname] (when (zip/branch? loc) (-> loc zip/node :attrs attrname))))
rlm@10 22
rlm@10 23 (defn attr=
rlm@10 24 "Returns a query predicate that matches a node when it has an
rlm@10 25 attribute named attrname whose value is attrval."
rlm@10 26 [attrname attrval] (fn [loc] (= attrval (attr loc attrname))))
rlm@10 27
rlm@10 28 (defn tag=
rlm@10 29 "Returns a query predicate that matches a node when its is a tag
rlm@10 30 named tagname."
rlm@10 31 [tagname]
rlm@10 32 (fn [loc]
rlm@10 33 (filter #(and (zip/branch? %) (= tagname ((zip/node %) :tag)))
rlm@10 34 (if (zf/auto? loc)
rlm@10 35 (zf/children-auto loc)
rlm@10 36 (list (zf/auto true loc))))))
rlm@10 37
rlm@10 38 (defn text
rlm@10 39 "Returns the textual contents of the given location, similar to
rlm@10 40 xpaths's value-of"
rlm@10 41 [loc]
rlm@10 42 (.replaceAll
rlm@10 43 ^String (apply str (xml-> loc zf/descendants zip/node string?))
rlm@10 44 (str "[\\s" (char 160) "]+") " "))
rlm@10 45
rlm@10 46 (defn text=
rlm@10 47 "Returns a query predicate that matches a node when its textual
rlm@10 48 content equals s."
rlm@10 49 [s] (fn [loc] (= (text loc) s)))
rlm@10 50
rlm@10 51 (defn seq-test
rlm@10 52 "Returns a query predicate that matches a node when its xml content
rlm@10 53 matches the query expresions given."
rlm@10 54 ^{:private true}
rlm@10 55 [preds] (fn [loc] (and (seq (apply xml-> loc preds)) (list loc))))
rlm@10 56
rlm@10 57 (defn xml->
rlm@10 58 "The loc is passed to the first predicate. If the predicate returns
rlm@10 59 a collection, each value of the collection is passed to the next
rlm@10 60 predicate. If it returns a location, the location is passed to the
rlm@10 61 next predicate. If it returns true, the input location is passed to
rlm@10 62 the next predicate. If it returns false or nil, the next predicate
rlm@10 63 is not called.
rlm@10 64
rlm@10 65 This process is repeated, passing the processed results of each
rlm@10 66 predicate to the next predicate. xml-> returns the final sequence.
rlm@10 67 The entire chain is evaluated lazily.
rlm@10 68
rlm@10 69 There are also special predicates: keywords are converted to tag=,
rlm@10 70 strings to text=, and vectors to sub-queries that return true if
rlm@10 71 they match.
rlm@10 72
rlm@10 73 See the footer of zip-query.clj for examples."
rlm@10 74 [loc & preds]
rlm@10 75 (zf/mapcat-chain loc preds
rlm@10 76 #(cond (keyword? %) (tag= %)
rlm@10 77 (string? %) (text= %)
rlm@10 78 (vector? %) (seq-test %))))
rlm@10 79
rlm@10 80 (defn xml1->
rlm@10 81 "Returns the first item from loc based on the query predicates
rlm@10 82 given. See xml->"
rlm@10 83 [loc & preds] (first (apply xml-> loc preds)))
rlm@10 84
rlm@10 85
rlm@10 86 ; === examples ===
rlm@10 87
rlm@10 88 (comment
rlm@10 89
rlm@10 90 (defn parse-str [s]
rlm@10 91 (zip/xml-zip (xml/parse (new org.xml.sax.InputSource
rlm@10 92 (new java.io.StringReader s)))))
rlm@10 93
rlm@10 94 (def atom1 (parse-str "<?xml version='1.0' encoding='UTF-8'?>
rlm@10 95 <feed xmlns='http://www.w3.org/2005/Atom'>
rlm@10 96 <id>tag:blogger.com,1999:blog-28403206</id>
rlm@10 97 <updated>2008-02-14T08:00:58.567-08:00</updated>
rlm@10 98 <title type='text'>n01senet</title>
rlm@10 99 <link rel='alternate' type='text/html' href='http://n01senet.blogspot.com/'/>
rlm@10 100 <entry>
rlm@10 101 <id>1</id>
rlm@10 102 <published>2008-02-13</published>
rlm@10 103 <title type='text'>clojure is the best lisp yet</title>
rlm@10 104 <author><name>Chouser</name></author>
rlm@10 105 </entry>
rlm@10 106 <entry>
rlm@10 107 <id>2</id>
rlm@10 108 <published>2008-02-07</published>
rlm@10 109 <title type='text'>experimenting with vnc</title>
rlm@10 110 <author><name>agriffis</name></author>
rlm@10 111 </entry>
rlm@10 112 </feed>
rlm@10 113 "))
rlm@10 114
rlm@10 115 ; simple single-function filter
rlm@10 116 (assert (= (xml-> atom1 #((zip/node %) :tag))
rlm@10 117 '(:feed)))
rlm@10 118
rlm@10 119 ; two-stage filter using helpful query prediates
rlm@10 120 (assert (= (xml-> atom1 (tag= :title) text)
rlm@10 121 '("n01senet")))
rlm@10 122
rlm@10 123 ; same filter as above, this time using keyword shortcut
rlm@10 124 (assert (= (xml-> atom1 :title text)
rlm@10 125 '("n01senet")))
rlm@10 126
rlm@10 127 ; multi-stage filter
rlm@10 128 (assert (= (xml-> atom1 :entry :author :name text)
rlm@10 129 '("Chouser" "agriffis")))
rlm@10 130
rlm@10 131 ; test xml1->
rlm@10 132 (assert (= (xml1-> atom1 :entry :author :name text)
rlm@10 133 "Chouser"))
rlm@10 134
rlm@10 135 ; multi-stage filter with subquery specified using a vector
rlm@10 136 (assert (= (xml-> atom1 :entry [:author :name (text= "agriffis")]
rlm@10 137 :id text)
rlm@10 138 '("2")))
rlm@10 139
rlm@10 140 ; same filter as above, this time using a string shortcut
rlm@10 141 (assert (= (xml-> atom1 :entry [:author :name "agriffis"] :id text)
rlm@10 142 '("2")))
rlm@10 143
rlm@10 144 ; attribute access
rlm@10 145 (assert (= (xml-> atom1 :title (attr :type))
rlm@10 146 '("text")))
rlm@10 147
rlm@10 148 ; attribute filtering
rlm@10 149 (assert (= (xml-> atom1 :link [(attr= :rel "alternate")] (attr :type))
rlm@10 150 '("text/html")))
rlm@10 151
rlm@10 152 ; ancestors
rlm@10 153 (assert (= (xml-> atom1 zf/descendants :id "2" zf/ancestors zip/node #(:tag %))
rlm@10 154 '(:id :entry :feed)))
rlm@10 155
rlm@10 156 ; ancestors with non-auto tag= (:entry), followed by auto tag= (:id)
rlm@10 157 (assert (= (xml-> atom1 zf/descendants :name "Chouser" zf/ancestors
rlm@10 158 :entry :id text)
rlm@10 159 '("1")))
rlm@10 160
rlm@10 161 ; left-locs and detection of returning a single loc (zip/up)
rlm@10 162 (assert (= (xml-> atom1 zf/descendants :name "Chouser" zip/up
rlm@10 163 zf/left-locs :id text)
rlm@10 164 '("1")))
rlm@10 165
rlm@10 166 ; right-locs
rlm@10 167 (assert (= (xml-> atom1 zf/descendants :id zf/right-locs :author text)
rlm@10 168 '("Chouser" "agriffis")))
rlm@10 169
rlm@10 170 )