diff src/clojure/contrib/zip_filter/xml.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
line wrap: on
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/clojure/contrib/zip_filter/xml.clj	Sat Aug 21 06:25:44 2010 -0400
     1.3 @@ -0,0 +1,170 @@
     1.4 +;   Copyright (c) Chris Houser, April 2008. All rights reserved.
     1.5 +;   The use and distribution terms for this software are covered by the
     1.6 +;   Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
     1.7 +;   which can be found in the file epl-v10.html at the root of this distribution.
     1.8 +;   By using this software in any fashion, you are agreeing to be bound by
     1.9 +;   the terms of this license.
    1.10 +;   You must not remove this notice, or any other, from this software.
    1.11 +
    1.12 +; Specialization of zip-filter for xml trees.
    1.13 +
    1.14 +(ns clojure.contrib.zip-filter.xml
    1.15 +    (:require [clojure.contrib.zip-filter :as zf]
    1.16 +              [clojure.zip :as zip]
    1.17 +              [clojure.xml :as xml]))
    1.18 +
    1.19 +(declare xml->)
    1.20 +
    1.21 +(defn attr
    1.22 +  "Returns the xml attribute named attrname, of the xml node at location loc."
    1.23 +  ([attrname]     (fn [loc] (attr loc attrname)))
    1.24 +  ([loc attrname] (when (zip/branch? loc) (-> loc zip/node :attrs attrname))))
    1.25 +
    1.26 +(defn attr=
    1.27 +  "Returns a query predicate that matches a node when it has an
    1.28 +  attribute named attrname whose value is attrval."
    1.29 +  [attrname attrval] (fn [loc] (= attrval (attr loc attrname))))
    1.30 +
    1.31 +(defn tag=
    1.32 +  "Returns a query predicate that matches a node when its is a tag
    1.33 +  named tagname."
    1.34 +  [tagname]
    1.35 +    (fn [loc]
    1.36 +      (filter #(and (zip/branch? %) (= tagname ((zip/node %) :tag)))
    1.37 +              (if (zf/auto? loc)
    1.38 +                (zf/children-auto loc)
    1.39 +                (list (zf/auto true loc))))))
    1.40 +
    1.41 +(defn text
    1.42 +  "Returns the textual contents of the given location, similar to
    1.43 +  xpaths's value-of"
    1.44 +  [loc]
    1.45 +    (.replaceAll
    1.46 +      ^String (apply str (xml-> loc zf/descendants zip/node string?))
    1.47 +      (str "[\\s" (char 160) "]+") " "))
    1.48 +
    1.49 +(defn text=
    1.50 +  "Returns a query predicate that matches a node when its textual
    1.51 +  content equals s."
    1.52 +  [s] (fn [loc] (= (text loc) s)))
    1.53 +
    1.54 +(defn seq-test
    1.55 +  "Returns a query predicate that matches a node when its xml content
    1.56 +  matches the query expresions given."
    1.57 +  ^{:private true}
    1.58 +  [preds] (fn [loc] (and (seq (apply xml-> loc preds)) (list loc))))
    1.59 +
    1.60 +(defn xml->
    1.61 +  "The loc is passed to the first predicate.  If the predicate returns
    1.62 +  a collection, each value of the collection is passed to the next
    1.63 +  predicate.  If it returns a location, the location is passed to the
    1.64 +  next predicate.  If it returns true, the input location is passed to
    1.65 +  the next predicate.  If it returns false or nil, the next predicate
    1.66 +  is not called.
    1.67 +
    1.68 +  This process is repeated, passing the processed results of each
    1.69 +  predicate to the next predicate.  xml-> returns the final sequence.
    1.70 +  The entire chain is evaluated lazily.
    1.71 +
    1.72 +  There are also special predicates: keywords are converted to tag=,
    1.73 +  strings to text=, and vectors to sub-queries that return true if
    1.74 +  they match.
    1.75 +
    1.76 +  See the footer of zip-query.clj for examples."
    1.77 +  [loc & preds]
    1.78 +    (zf/mapcat-chain loc preds
    1.79 +                     #(cond (keyword? %) (tag= %)
    1.80 +                            (string?  %) (text= %)
    1.81 +                            (vector?  %) (seq-test %))))
    1.82 +
    1.83 +(defn xml1->
    1.84 +  "Returns the first item from loc based on the query predicates
    1.85 +  given.  See xml->"
    1.86 +  [loc & preds] (first (apply xml-> loc preds)))
    1.87 +
    1.88 +
    1.89 +; === examples ===
    1.90 +
    1.91 +(comment
    1.92 +
    1.93 +(defn parse-str [s]
    1.94 +  (zip/xml-zip (xml/parse (new org.xml.sax.InputSource
    1.95 +                               (new java.io.StringReader s)))))
    1.96 +
    1.97 +(def atom1 (parse-str "<?xml version='1.0' encoding='UTF-8'?>
    1.98 +<feed xmlns='http://www.w3.org/2005/Atom'>
    1.99 +  <id>tag:blogger.com,1999:blog-28403206</id>
   1.100 +  <updated>2008-02-14T08:00:58.567-08:00</updated>
   1.101 +  <title type='text'>n01senet</title>
   1.102 +  <link rel='alternate' type='text/html' href='http://n01senet.blogspot.com/'/>
   1.103 +  <entry>
   1.104 +    <id>1</id>
   1.105 +    <published>2008-02-13</published>
   1.106 +    <title type='text'>clojure is the best lisp yet</title>
   1.107 +    <author><name>Chouser</name></author>
   1.108 +  </entry>
   1.109 +  <entry>
   1.110 +    <id>2</id>
   1.111 +    <published>2008-02-07</published>
   1.112 +    <title type='text'>experimenting with vnc</title>
   1.113 +    <author><name>agriffis</name></author>
   1.114 +  </entry>
   1.115 +</feed>
   1.116 +"))
   1.117 +
   1.118 +; simple single-function filter
   1.119 +(assert (= (xml-> atom1 #((zip/node %) :tag))
   1.120 +           '(:feed)))
   1.121 +
   1.122 +; two-stage filter using helpful query prediates
   1.123 +(assert (= (xml-> atom1 (tag= :title) text)
   1.124 +           '("n01senet")))
   1.125 +
   1.126 +; same filter as above, this time using keyword shortcut
   1.127 +(assert (= (xml-> atom1 :title text)
   1.128 +           '("n01senet")))
   1.129 +
   1.130 +; multi-stage filter
   1.131 +(assert (= (xml-> atom1 :entry :author :name text)
   1.132 +           '("Chouser" "agriffis")))
   1.133 +
   1.134 +; test xml1->
   1.135 +(assert (= (xml1-> atom1 :entry :author :name text)
   1.136 +           "Chouser"))
   1.137 +
   1.138 +; multi-stage filter with subquery specified using a vector
   1.139 +(assert (= (xml-> atom1 :entry [:author :name (text= "agriffis")]
   1.140 +                        :id text)
   1.141 +           '("2")))
   1.142 +
   1.143 +; same filter as above, this time using a string shortcut
   1.144 +(assert (= (xml-> atom1 :entry [:author :name "agriffis"] :id text)
   1.145 +           '("2")))
   1.146 +
   1.147 +; attribute access
   1.148 +(assert (= (xml-> atom1 :title (attr :type))
   1.149 +           '("text")))
   1.150 +
   1.151 +; attribute filtering
   1.152 +(assert (= (xml-> atom1 :link [(attr= :rel "alternate")] (attr :type))
   1.153 +           '("text/html")))
   1.154 +
   1.155 +; ancestors
   1.156 +(assert (= (xml-> atom1 zf/descendants :id "2" zf/ancestors zip/node #(:tag %))
   1.157 +           '(:id :entry :feed)))
   1.158 +
   1.159 +; ancestors with non-auto tag= (:entry), followed by auto tag= (:id)
   1.160 +(assert (= (xml-> atom1 zf/descendants :name "Chouser" zf/ancestors
   1.161 +                  :entry :id text)
   1.162 +           '("1")))
   1.163 +
   1.164 +; left-locs and detection of returning a single loc (zip/up)
   1.165 +(assert (= (xml-> atom1 zf/descendants :name "Chouser" zip/up
   1.166 +                  zf/left-locs :id text)
   1.167 +           '("1")))
   1.168 +
   1.169 +; right-locs
   1.170 +(assert (= (xml-> atom1 zf/descendants :id zf/right-locs :author text)
   1.171 +           '("Chouser" "agriffis")))
   1.172 +
   1.173 +)