diff src/clojure/xml.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
line wrap: on
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/clojure/xml.clj	Sat Aug 21 06:25:44 2010 -0400
     1.3 @@ -0,0 +1,118 @@
     1.4 +;   Copyright (c) Rich Hickey. All rights reserved.
     1.5 +;   The use and distribution terms for this software are covered by the
     1.6 +;   Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
     1.7 +;   which can be found in the file epl-v10.html at the root of this distribution.
     1.8 +;   By using this software in any fashion, you are agreeing to be bound by
     1.9 +;   the terms of this license.
    1.10 +;   You must not remove this notice, or any other, from this software.
    1.11 +
    1.12 +(ns ^{:doc "XML reading/writing."
    1.13 +       :author "Rich Hickey"}
    1.14 +  clojure.xml
    1.15 +  (:import (org.xml.sax ContentHandler Attributes SAXException)
    1.16 +           (javax.xml.parsers SAXParser SAXParserFactory)))
    1.17 +
    1.18 +(def *stack*)
    1.19 +(def *current*)
    1.20 +(def *state*) ; :element :chars :between
    1.21 +(def *sb*)
    1.22 +
    1.23 +(defstruct element :tag :attrs :content)
    1.24 +
    1.25 +(def tag (accessor element :tag))
    1.26 +(def attrs (accessor element :attrs))
    1.27 +(def content (accessor element :content))
    1.28 +
    1.29 +(def content-handler
    1.30 +  (let [push-content (fn [e c]
    1.31 +                       (assoc e :content (conj (or (:content e) []) c)))
    1.32 +        push-chars (fn []
    1.33 +                     (when (and (= *state* :chars)
    1.34 +                                (some (complement #(Character/isWhitespace (char %))) (str *sb*)))
    1.35 +                       (set! *current* (push-content *current* (str *sb*)))))]
    1.36 +    (new clojure.lang.XMLHandler
    1.37 +         (proxy [ContentHandler] []
    1.38 +           (startElement [uri local-name q-name ^Attributes atts]
    1.39 +             (let [attrs (fn [ret i]
    1.40 +                           (if (neg? i)
    1.41 +                             ret
    1.42 +                             (recur (assoc ret
    1.43 +                                           (clojure.lang.Keyword/intern (symbol (.getQName atts i)))
    1.44 +                                           (.getValue atts (int i)))
    1.45 +                                    (dec i))))
    1.46 +                   e (struct element
    1.47 +                             (. clojure.lang.Keyword (intern (symbol q-name)))
    1.48 +                             (when (pos? (.getLength atts))
    1.49 +                               (attrs {} (dec (.getLength atts)))))]
    1.50 +               (push-chars)
    1.51 +               (set! *stack* (conj *stack* *current*))
    1.52 +               (set! *current* e)
    1.53 +               (set! *state* :element))
    1.54 +             nil)
    1.55 +           (endElement [uri local-name q-name]
    1.56 +             (push-chars)
    1.57 +             (set! *current* (push-content (peek *stack*) *current*))
    1.58 +             (set! *stack* (pop *stack*))
    1.59 +             (set! *state* :between)
    1.60 +             nil)
    1.61 +           (characters [^chars ch start length]
    1.62 +             (when-not (= *state* :chars)
    1.63 +               (set! *sb* (new StringBuilder)))
    1.64 +             (let [^StringBuilder sb *sb*]
    1.65 +               (.append sb ch (int start) (int length))
    1.66 +               (set! *state* :chars))
    1.67 +             nil)
    1.68 +           (setDocumentLocator [locator])
    1.69 +           (startDocument [])
    1.70 +           (endDocument [])
    1.71 +           (startPrefixMapping [prefix uri])
    1.72 +           (endPrefixMapping [prefix])
    1.73 +           (ignorableWhitespace [ch start length])
    1.74 +           (processingInstruction [target data])
    1.75 +           (skippedEntity [name])
    1.76 +           ))))
    1.77 +
    1.78 +(defn startparse-sax [s ch]
    1.79 +  (.. SAXParserFactory (newInstance) (newSAXParser) (parse s ch)))
    1.80 +
    1.81 +(defn parse
    1.82 +  "Parses and loads the source s, which can be a File, InputStream or
    1.83 +  String naming a URI. Returns a tree of the xml/element struct-map,
    1.84 +  which has the keys :tag, :attrs, and :content. and accessor fns tag,
    1.85 +  attrs, and content. Other parsers can be supplied by passing
    1.86 +  startparse, a fn taking a source and a ContentHandler and returning
    1.87 +  a parser"
    1.88 +  {:added "1.0"}
    1.89 +  ([s] (parse s startparse-sax))
    1.90 +  ([s startparse]
    1.91 +    (binding [*stack* nil
    1.92 +              *current* (struct element)
    1.93 +              *state* :between
    1.94 +              *sb* nil]
    1.95 +      (startparse s content-handler)
    1.96 +      ((:content *current*) 0)))) 
    1.97 +
    1.98 +(defn emit-element [e]
    1.99 +  (if (instance? String e)
   1.100 +    (println e)
   1.101 +    (do
   1.102 +      (print (str "<" (name (:tag e))))
   1.103 +      (when (:attrs e)
   1.104 +	(doseq [attr (:attrs e)]
   1.105 +	  (print (str " " (name (key attr)) "='" (val attr)"'"))))
   1.106 +      (if (:content e)
   1.107 +	(do
   1.108 +	  (println ">")
   1.109 +	  (doseq [c (:content e)]
   1.110 +	    (emit-element c))
   1.111 +	  (println (str "</" (name (:tag e)) ">")))
   1.112 +	(println "/>")))))
   1.113 +
   1.114 +(defn emit [x]
   1.115 +  (println "<?xml version='1.0' encoding='UTF-8'?>")
   1.116 +  (emit-element x))
   1.117 +
   1.118 +;(export '(tag attrs content parse element emit emit-element))
   1.119 +
   1.120 +;(load-file "/Users/rich/dev/clojure/src/xml.clj")
   1.121 +;(def x (xml/parse "http://arstechnica.com/journals.rssx"))