view src/clojure/xml.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
line wrap: on
line source
1 ; Copyright (c) Rich Hickey. All rights reserved.
2 ; The use and distribution terms for this software are covered by the
3 ; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
4 ; which can be found in the file epl-v10.html at the root of this distribution.
5 ; By using this software in any fashion, you are agreeing to be bound by
6 ; the terms of this license.
7 ; You must not remove this notice, or any other, from this software.
9 (ns ^{:doc "XML reading/writing."
10 :author "Rich Hickey"}
11 clojure.xml
12 (:import (org.xml.sax ContentHandler Attributes SAXException)
13 (javax.xml.parsers SAXParser SAXParserFactory)))
15 (def *stack*)
16 (def *current*)
17 (def *state*) ; :element :chars :between
18 (def *sb*)
20 (defstruct element :tag :attrs :content)
22 (def tag (accessor element :tag))
23 (def attrs (accessor element :attrs))
24 (def content (accessor element :content))
26 (def content-handler
27 (let [push-content (fn [e c]
28 (assoc e :content (conj (or (:content e) []) c)))
29 push-chars (fn []
30 (when (and (= *state* :chars)
31 (some (complement #(Character/isWhitespace (char %))) (str *sb*)))
32 (set! *current* (push-content *current* (str *sb*)))))]
33 (new clojure.lang.XMLHandler
34 (proxy [ContentHandler] []
35 (startElement [uri local-name q-name ^Attributes atts]
36 (let [attrs (fn [ret i]
37 (if (neg? i)
38 ret
39 (recur (assoc ret
40 (clojure.lang.Keyword/intern (symbol (.getQName atts i)))
41 (.getValue atts (int i)))
42 (dec i))))
43 e (struct element
44 (. clojure.lang.Keyword (intern (symbol q-name)))
45 (when (pos? (.getLength atts))
46 (attrs {} (dec (.getLength atts)))))]
47 (push-chars)
48 (set! *stack* (conj *stack* *current*))
49 (set! *current* e)
50 (set! *state* :element))
51 nil)
52 (endElement [uri local-name q-name]
53 (push-chars)
54 (set! *current* (push-content (peek *stack*) *current*))
55 (set! *stack* (pop *stack*))
56 (set! *state* :between)
57 nil)
58 (characters [^chars ch start length]
59 (when-not (= *state* :chars)
60 (set! *sb* (new StringBuilder)))
61 (let [^StringBuilder sb *sb*]
62 (.append sb ch (int start) (int length))
63 (set! *state* :chars))
64 nil)
65 (setDocumentLocator [locator])
66 (startDocument [])
67 (endDocument [])
68 (startPrefixMapping [prefix uri])
69 (endPrefixMapping [prefix])
70 (ignorableWhitespace [ch start length])
71 (processingInstruction [target data])
72 (skippedEntity [name])
73 ))))
75 (defn startparse-sax [s ch]
76 (.. SAXParserFactory (newInstance) (newSAXParser) (parse s ch)))
78 (defn parse
79 "Parses and loads the source s, which can be a File, InputStream or
80 String naming a URI. Returns a tree of the xml/element struct-map,
81 which has the keys :tag, :attrs, and :content. and accessor fns tag,
82 attrs, and content. Other parsers can be supplied by passing
83 startparse, a fn taking a source and a ContentHandler and returning
84 a parser"
85 {:added "1.0"}
86 ([s] (parse s startparse-sax))
87 ([s startparse]
88 (binding [*stack* nil
89 *current* (struct element)
90 *state* :between
91 *sb* nil]
92 (startparse s content-handler)
93 ((:content *current*) 0))))
95 (defn emit-element [e]
96 (if (instance? String e)
97 (println e)
98 (do
99 (print (str "<" (name (:tag e))))
100 (when (:attrs e)
101 (doseq [attr (:attrs e)]
102 (print (str " " (name (key attr)) "='" (val attr)"'"))))
103 (if (:content e)
104 (do
105 (println ">")
106 (doseq [c (:content e)]
107 (emit-element c))
108 (println (str "</" (name (:tag e)) ">")))
109 (println "/>")))))
111 (defn emit [x]
112 (println "<?xml version='1.0' encoding='UTF-8'?>")
113 (emit-element x))
115 ;(export '(tag attrs content parse element emit emit-element))
117 ;(load-file "/Users/rich/dev/clojure/src/xml.clj")
118 ;(def x (xml/parse "http://arstechnica.com/journals.rssx"))