Mercurial > lasercutter
diff src/clojure/xml.clj @ 10:ef7dbbd6452c
added clojure source goodness
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sat, 21 Aug 2010 06:25:44 -0400 |
parents | |
children |
line wrap: on
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/clojure/xml.clj Sat Aug 21 06:25:44 2010 -0400 1.3 @@ -0,0 +1,118 @@ 1.4 +; Copyright (c) Rich Hickey. All rights reserved. 1.5 +; The use and distribution terms for this software are covered by the 1.6 +; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 1.7 +; which can be found in the file epl-v10.html at the root of this distribution. 1.8 +; By using this software in any fashion, you are agreeing to be bound by 1.9 +; the terms of this license. 1.10 +; You must not remove this notice, or any other, from this software. 1.11 + 1.12 +(ns ^{:doc "XML reading/writing." 1.13 + :author "Rich Hickey"} 1.14 + clojure.xml 1.15 + (:import (org.xml.sax ContentHandler Attributes SAXException) 1.16 + (javax.xml.parsers SAXParser SAXParserFactory))) 1.17 + 1.18 +(def *stack*) 1.19 +(def *current*) 1.20 +(def *state*) ; :element :chars :between 1.21 +(def *sb*) 1.22 + 1.23 +(defstruct element :tag :attrs :content) 1.24 + 1.25 +(def tag (accessor element :tag)) 1.26 +(def attrs (accessor element :attrs)) 1.27 +(def content (accessor element :content)) 1.28 + 1.29 +(def content-handler 1.30 + (let [push-content (fn [e c] 1.31 + (assoc e :content (conj (or (:content e) []) c))) 1.32 + push-chars (fn [] 1.33 + (when (and (= *state* :chars) 1.34 + (some (complement #(Character/isWhitespace (char %))) (str *sb*))) 1.35 + (set! *current* (push-content *current* (str *sb*)))))] 1.36 + (new clojure.lang.XMLHandler 1.37 + (proxy [ContentHandler] [] 1.38 + (startElement [uri local-name q-name ^Attributes atts] 1.39 + (let [attrs (fn [ret i] 1.40 + (if (neg? i) 1.41 + ret 1.42 + (recur (assoc ret 1.43 + (clojure.lang.Keyword/intern (symbol (.getQName atts i))) 1.44 + (.getValue atts (int i))) 1.45 + (dec i)))) 1.46 + e (struct element 1.47 + (. clojure.lang.Keyword (intern (symbol q-name))) 1.48 + (when (pos? (.getLength atts)) 1.49 + (attrs {} (dec (.getLength atts)))))] 1.50 + (push-chars) 1.51 + (set! *stack* (conj *stack* *current*)) 1.52 + (set! *current* e) 1.53 + (set! *state* :element)) 1.54 + nil) 1.55 + (endElement [uri local-name q-name] 1.56 + (push-chars) 1.57 + (set! *current* (push-content (peek *stack*) *current*)) 1.58 + (set! *stack* (pop *stack*)) 1.59 + (set! *state* :between) 1.60 + nil) 1.61 + (characters [^chars ch start length] 1.62 + (when-not (= *state* :chars) 1.63 + (set! *sb* (new StringBuilder))) 1.64 + (let [^StringBuilder sb *sb*] 1.65 + (.append sb ch (int start) (int length)) 1.66 + (set! *state* :chars)) 1.67 + nil) 1.68 + (setDocumentLocator [locator]) 1.69 + (startDocument []) 1.70 + (endDocument []) 1.71 + (startPrefixMapping [prefix uri]) 1.72 + (endPrefixMapping [prefix]) 1.73 + (ignorableWhitespace [ch start length]) 1.74 + (processingInstruction [target data]) 1.75 + (skippedEntity [name]) 1.76 + )))) 1.77 + 1.78 +(defn startparse-sax [s ch] 1.79 + (.. SAXParserFactory (newInstance) (newSAXParser) (parse s ch))) 1.80 + 1.81 +(defn parse 1.82 + "Parses and loads the source s, which can be a File, InputStream or 1.83 + String naming a URI. Returns a tree of the xml/element struct-map, 1.84 + which has the keys :tag, :attrs, and :content. and accessor fns tag, 1.85 + attrs, and content. Other parsers can be supplied by passing 1.86 + startparse, a fn taking a source and a ContentHandler and returning 1.87 + a parser" 1.88 + {:added "1.0"} 1.89 + ([s] (parse s startparse-sax)) 1.90 + ([s startparse] 1.91 + (binding [*stack* nil 1.92 + *current* (struct element) 1.93 + *state* :between 1.94 + *sb* nil] 1.95 + (startparse s content-handler) 1.96 + ((:content *current*) 0)))) 1.97 + 1.98 +(defn emit-element [e] 1.99 + (if (instance? String e) 1.100 + (println e) 1.101 + (do 1.102 + (print (str "<" (name (:tag e)))) 1.103 + (when (:attrs e) 1.104 + (doseq [attr (:attrs e)] 1.105 + (print (str " " (name (key attr)) "='" (val attr)"'")))) 1.106 + (if (:content e) 1.107 + (do 1.108 + (println ">") 1.109 + (doseq [c (:content e)] 1.110 + (emit-element c)) 1.111 + (println (str "</" (name (:tag e)) ">"))) 1.112 + (println "/>"))))) 1.113 + 1.114 +(defn emit [x] 1.115 + (println "<?xml version='1.0' encoding='UTF-8'?>") 1.116 + (emit-element x)) 1.117 + 1.118 +;(export '(tag attrs content parse element emit emit-element)) 1.119 + 1.120 +;(load-file "/Users/rich/dev/clojure/src/xml.clj") 1.121 +;(def x (xml/parse "http://arstechnica.com/journals.rssx"))