Mercurial > lasercutter
diff src/clojure/contrib/zip_filter/xml.clj @ 10:ef7dbbd6452c
added clojure source goodness
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sat, 21 Aug 2010 06:25:44 -0400 |
parents | |
children |
line wrap: on
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/clojure/contrib/zip_filter/xml.clj Sat Aug 21 06:25:44 2010 -0400 1.3 @@ -0,0 +1,170 @@ 1.4 +; Copyright (c) Chris Houser, April 2008. All rights reserved. 1.5 +; The use and distribution terms for this software are covered by the 1.6 +; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 1.7 +; which can be found in the file epl-v10.html at the root of this distribution. 1.8 +; By using this software in any fashion, you are agreeing to be bound by 1.9 +; the terms of this license. 1.10 +; You must not remove this notice, or any other, from this software. 1.11 + 1.12 +; Specialization of zip-filter for xml trees. 1.13 + 1.14 +(ns clojure.contrib.zip-filter.xml 1.15 + (:require [clojure.contrib.zip-filter :as zf] 1.16 + [clojure.zip :as zip] 1.17 + [clojure.xml :as xml])) 1.18 + 1.19 +(declare xml->) 1.20 + 1.21 +(defn attr 1.22 + "Returns the xml attribute named attrname, of the xml node at location loc." 1.23 + ([attrname] (fn [loc] (attr loc attrname))) 1.24 + ([loc attrname] (when (zip/branch? loc) (-> loc zip/node :attrs attrname)))) 1.25 + 1.26 +(defn attr= 1.27 + "Returns a query predicate that matches a node when it has an 1.28 + attribute named attrname whose value is attrval." 1.29 + [attrname attrval] (fn [loc] (= attrval (attr loc attrname)))) 1.30 + 1.31 +(defn tag= 1.32 + "Returns a query predicate that matches a node when its is a tag 1.33 + named tagname." 1.34 + [tagname] 1.35 + (fn [loc] 1.36 + (filter #(and (zip/branch? %) (= tagname ((zip/node %) :tag))) 1.37 + (if (zf/auto? loc) 1.38 + (zf/children-auto loc) 1.39 + (list (zf/auto true loc)))))) 1.40 + 1.41 +(defn text 1.42 + "Returns the textual contents of the given location, similar to 1.43 + xpaths's value-of" 1.44 + [loc] 1.45 + (.replaceAll 1.46 + ^String (apply str (xml-> loc zf/descendants zip/node string?)) 1.47 + (str "[\\s" (char 160) "]+") " ")) 1.48 + 1.49 +(defn text= 1.50 + "Returns a query predicate that matches a node when its textual 1.51 + content equals s." 1.52 + [s] (fn [loc] (= (text loc) s))) 1.53 + 1.54 +(defn seq-test 1.55 + "Returns a query predicate that matches a node when its xml content 1.56 + matches the query expresions given." 1.57 + ^{:private true} 1.58 + [preds] (fn [loc] (and (seq (apply xml-> loc preds)) (list loc)))) 1.59 + 1.60 +(defn xml-> 1.61 + "The loc is passed to the first predicate. If the predicate returns 1.62 + a collection, each value of the collection is passed to the next 1.63 + predicate. If it returns a location, the location is passed to the 1.64 + next predicate. If it returns true, the input location is passed to 1.65 + the next predicate. If it returns false or nil, the next predicate 1.66 + is not called. 1.67 + 1.68 + This process is repeated, passing the processed results of each 1.69 + predicate to the next predicate. xml-> returns the final sequence. 1.70 + The entire chain is evaluated lazily. 1.71 + 1.72 + There are also special predicates: keywords are converted to tag=, 1.73 + strings to text=, and vectors to sub-queries that return true if 1.74 + they match. 1.75 + 1.76 + See the footer of zip-query.clj for examples." 1.77 + [loc & preds] 1.78 + (zf/mapcat-chain loc preds 1.79 + #(cond (keyword? %) (tag= %) 1.80 + (string? %) (text= %) 1.81 + (vector? %) (seq-test %)))) 1.82 + 1.83 +(defn xml1-> 1.84 + "Returns the first item from loc based on the query predicates 1.85 + given. See xml->" 1.86 + [loc & preds] (first (apply xml-> loc preds))) 1.87 + 1.88 + 1.89 +; === examples === 1.90 + 1.91 +(comment 1.92 + 1.93 +(defn parse-str [s] 1.94 + (zip/xml-zip (xml/parse (new org.xml.sax.InputSource 1.95 + (new java.io.StringReader s))))) 1.96 + 1.97 +(def atom1 (parse-str "<?xml version='1.0' encoding='UTF-8'?> 1.98 +<feed xmlns='http://www.w3.org/2005/Atom'> 1.99 + <id>tag:blogger.com,1999:blog-28403206</id> 1.100 + <updated>2008-02-14T08:00:58.567-08:00</updated> 1.101 + <title type='text'>n01senet</title> 1.102 + <link rel='alternate' type='text/html' href='http://n01senet.blogspot.com/'/> 1.103 + <entry> 1.104 + <id>1</id> 1.105 + <published>2008-02-13</published> 1.106 + <title type='text'>clojure is the best lisp yet</title> 1.107 + <author><name>Chouser</name></author> 1.108 + </entry> 1.109 + <entry> 1.110 + <id>2</id> 1.111 + <published>2008-02-07</published> 1.112 + <title type='text'>experimenting with vnc</title> 1.113 + <author><name>agriffis</name></author> 1.114 + </entry> 1.115 +</feed> 1.116 +")) 1.117 + 1.118 +; simple single-function filter 1.119 +(assert (= (xml-> atom1 #((zip/node %) :tag)) 1.120 + '(:feed))) 1.121 + 1.122 +; two-stage filter using helpful query prediates 1.123 +(assert (= (xml-> atom1 (tag= :title) text) 1.124 + '("n01senet"))) 1.125 + 1.126 +; same filter as above, this time using keyword shortcut 1.127 +(assert (= (xml-> atom1 :title text) 1.128 + '("n01senet"))) 1.129 + 1.130 +; multi-stage filter 1.131 +(assert (= (xml-> atom1 :entry :author :name text) 1.132 + '("Chouser" "agriffis"))) 1.133 + 1.134 +; test xml1-> 1.135 +(assert (= (xml1-> atom1 :entry :author :name text) 1.136 + "Chouser")) 1.137 + 1.138 +; multi-stage filter with subquery specified using a vector 1.139 +(assert (= (xml-> atom1 :entry [:author :name (text= "agriffis")] 1.140 + :id text) 1.141 + '("2"))) 1.142 + 1.143 +; same filter as above, this time using a string shortcut 1.144 +(assert (= (xml-> atom1 :entry [:author :name "agriffis"] :id text) 1.145 + '("2"))) 1.146 + 1.147 +; attribute access 1.148 +(assert (= (xml-> atom1 :title (attr :type)) 1.149 + '("text"))) 1.150 + 1.151 +; attribute filtering 1.152 +(assert (= (xml-> atom1 :link [(attr= :rel "alternate")] (attr :type)) 1.153 + '("text/html"))) 1.154 + 1.155 +; ancestors 1.156 +(assert (= (xml-> atom1 zf/descendants :id "2" zf/ancestors zip/node #(:tag %)) 1.157 + '(:id :entry :feed))) 1.158 + 1.159 +; ancestors with non-auto tag= (:entry), followed by auto tag= (:id) 1.160 +(assert (= (xml-> atom1 zf/descendants :name "Chouser" zf/ancestors 1.161 + :entry :id text) 1.162 + '("1"))) 1.163 + 1.164 +; left-locs and detection of returning a single loc (zip/up) 1.165 +(assert (= (xml-> atom1 zf/descendants :name "Chouser" zip/up 1.166 + zf/left-locs :id text) 1.167 + '("1"))) 1.168 + 1.169 +; right-locs 1.170 +(assert (= (xml-> atom1 zf/descendants :id zf/right-locs :author text) 1.171 + '("Chouser" "agriffis"))) 1.172 + 1.173 +)