diff src/clojure/contrib/http/agent.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
line wrap: on
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/clojure/contrib/http/agent.clj	Sat Aug 21 06:25:44 2010 -0400
     1.3 @@ -0,0 +1,386 @@
     1.4 +;;; http/agent.clj: agent-based asynchronous HTTP client
     1.5 +
     1.6 +;; by Stuart Sierra, http://stuartsierra.com/
     1.7 +;; August 17, 2009
     1.8 +
     1.9 +;; Copyright (c) Stuart Sierra, 2009. All rights reserved.  The use
    1.10 +;; and distribution terms for this software are covered by the Eclipse
    1.11 +;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
    1.12 +;; which can be found in the file epl-v10.html at the root of this
    1.13 +;; distribution.  By using this software in any fashion, you are
    1.14 +;; agreeing to be bound by the terms of this license.  You must not
    1.15 +;; remove this notice, or any other, from this software.
    1.16 +
    1.17 +;; DEPRECATED IN 1.2. Use direct Java bits, or take a look at
    1.18 +;;                    http://github.com/technomancy/clojure-http-client
    1.19 +
    1.20 +(ns ^{:deprecated "1.2"
    1.21 +      :doc "Agent-based asynchronous HTTP client.
    1.22 +
    1.23 +  This is a HTTP client library based on Java's HttpURLConnection
    1.24 +  class and Clojure's Agent system.  It allows you to make multiple
    1.25 +  HTTP requests in parallel.
    1.26 +
    1.27 +  Start an HTTP request with the 'http-agent' function, which
    1.28 +  immediately returns a Clojure Agent.  You will never deref this
    1.29 +  agent; that is handled by the accessor functions.  The agent will
    1.30 +  execute the HTTP request on a separate thread.
    1.31 +
    1.32 +  If you pass a :handler function to http-agent, that function will be
    1.33 +  called as soon as the HTTP response body is ready.  The handler
    1.34 +  function is called with one argument, the HTTP agent itself.  The
    1.35 +  handler can read the response body by calling the 'stream' function
    1.36 +  on the agent.
    1.37 +
    1.38 +  The value returned by the handler function becomes part of the state
    1.39 +  of the agent, and you can retrieve it with the 'result' function.
    1.40 +  If you call 'result' before the HTTP request has finished, it will
    1.41 +  block until the handler function returns.
    1.42 +
    1.43 +  If you don't provide a handler function, the default handler will
    1.44 +  buffer the entire response body in memory, which you can retrieve
    1.45 +  with the 'bytes', 'string', or 'stream' functions.  Like 'result',
    1.46 +  these functions will block until the HTTP request is completed.
    1.47 +
    1.48 +  If you want to check if an HTTP request is finished without
    1.49 +  blocking, use the 'done?' function.
    1.50 +
    1.51 +  A single GET request could be as simple as:
    1.52 +
    1.53 +    (string (http-agent \"http://www.stuartsierra.com/\"))
    1.54 +
    1.55 +  A simple POST might look like:
    1.56 +
    1.57 +    (http-agent \"http...\" :method \"POST\" :body \"foo=1\")
    1.58 +
    1.59 +  And you could write the response directly to a file like this:
    1.60 +
    1.61 +    (require '[clojure.contrib.io :as d])
    1.62 +
    1.63 +    (http-agent \"http...\"
    1.64 +                :handler (fn [agnt] 
    1.65 +                           (with-open [w (d/writer \"/tmp/out\")] 
    1.66 +                             (d/copy (stream agnt) w))))
    1.67 +"
    1.68 +       :author "Stuart Sierra"
    1.69 +       }
    1.70 +
    1.71 +  clojure.contrib.http.agent
    1.72 +  (:refer-clojure :exclude [bytes])
    1.73 +  (:require [clojure.contrib.http.connection :as c]
    1.74 +            [clojure.contrib.io :as duck])
    1.75 +  (:import (java.io InputStream ByteArrayOutputStream
    1.76 +                    ByteArrayInputStream)
    1.77 +           (java.net HttpURLConnection)))
    1.78 +
    1.79 +
    1.80 +;;; PRIVATE
    1.81 +
    1.82 +(declare result stream)
    1.83 +
    1.84 +(defn- setup-http-connection
    1.85 +  "Sets the instance method, redirect behavior, and request headers of
    1.86 +  the HttpURLConnection."
    1.87 +  [^HttpURLConnection conn options]
    1.88 +  (when-let [t (:connect-timeout options)]
    1.89 +    (.setConnectTimeout conn t))
    1.90 +  (when-let [t (:read-timeout options)]
    1.91 +    (.setReadTimeout conn t))
    1.92 +  (.setRequestMethod conn (:method options))
    1.93 +  (.setInstanceFollowRedirects conn (:follow-redirects options))
    1.94 +  (doseq [[name value] (:headers options)]
    1.95 +    (.setRequestProperty conn name value)))
    1.96 +
    1.97 +(defn- start-request
    1.98 +  "Agent action that starts sending the HTTP request."
    1.99 +  [state options]
   1.100 +  (let [conn (::connection state)]
   1.101 +    (setup-http-connection conn options)
   1.102 +    (c/start-http-connection conn (:body options))
   1.103 +    (assoc state ::state ::started)))
   1.104 +
   1.105 +(defn- connection-success? [^HttpURLConnection conn]
   1.106 +  "Returns true if the HttpURLConnection response code is in the 2xx
   1.107 +  range."
   1.108 +  (= 2 (quot (.getResponseCode conn) 100)))
   1.109 +
   1.110 +(defn- open-response
   1.111 +  "Agent action that opens the response body stream on the HTTP
   1.112 +  request; this will block until the response stream is available." ;
   1.113 +  [state options]
   1.114 +  (let [^HttpURLConnection conn (::connection state)]
   1.115 +    (assoc state
   1.116 +      ::response-stream (if (connection-success? conn)
   1.117 +                          (.getInputStream conn)
   1.118 +                          (.getErrorStream conn))
   1.119 +      ::state ::receiving)))
   1.120 +
   1.121 +(defn- handle-response
   1.122 +  "Agent action that calls the provided handler function, with no
   1.123 +  arguments, and sets the ::result key of the agent to the handler's
   1.124 +  return value."
   1.125 +  [state handler options]
   1.126 +  (let [conn (::connection state)]
   1.127 +    (assoc state
   1.128 +      ::result (handler)
   1.129 +      ::state ::finished)))
   1.130 +
   1.131 +(defn- disconnect
   1.132 +  "Agent action that closes the response body stream and disconnects
   1.133 +  the HttpURLConnection."
   1.134 +  [state options]
   1.135 +  (when (::response-stream state)
   1.136 +    (.close ^InputStream (::response-stream state)))
   1.137 +  (.disconnect ^HttpURLConnection (::connection state))
   1.138 +  (assoc state
   1.139 +    ::response-stream nil
   1.140 +    ::state ::disconnected))
   1.141 +
   1.142 +(defn- status-in-range?
   1.143 +  "Returns true if the response status of the HTTP agent begins with
   1.144 +  digit, an Integer."
   1.145 +  [digit http-agnt]
   1.146 +  (= digit (quot (.getResponseCode
   1.147 +                              ^HttpURLConnection (::connection @http-agnt))
   1.148 +                             100)))
   1.149 +
   1.150 +(defn- ^ByteArrayOutputStream get-byte-buffer [http-agnt]
   1.151 +  (let [buffer (result http-agnt)]
   1.152 +    (if (instance? ByteArrayOutputStream buffer)
   1.153 +      buffer
   1.154 +      (throw (Exception. "Handler result was not a ByteArrayOutputStream")))))
   1.155 +
   1.156 +
   1.157 +(defn buffer-bytes
   1.158 +  "The default HTTP agent result handler; it collects the response
   1.159 +  body in a java.io.ByteArrayOutputStream, which can later be
   1.160 +  retrieved with the 'stream', 'string', and 'bytes' functions."
   1.161 +  [http-agnt]
   1.162 +  (let [output (ByteArrayOutputStream.)]
   1.163 +    (duck/copy (or (stream http-agnt) "") output)
   1.164 +    output))
   1.165 +
   1.166 +
   1.167 +;;; CONSTRUCTOR
   1.168 +
   1.169 +(def *http-agent-defaults*
   1.170 +     {:method "GET"
   1.171 +      :headers {}
   1.172 +      :body nil
   1.173 +      :connect-timeout 0
   1.174 +      :read-timeout 0
   1.175 +      :follow-redirects true
   1.176 +      :handler buffer-bytes})
   1.177 +
   1.178 +(defn http-agent
   1.179 +  "Creates (and immediately returns) an Agent representing an HTTP
   1.180 +  request running in a new thread.
   1.181 +
   1.182 +  options are key/value pairs:
   1.183 +
   1.184 +  :method string
   1.185 +
   1.186 +  The HTTP method name.  Default is \"GET\".
   1.187 +
   1.188 +  :headers h
   1.189 +
   1.190 +  HTTP headers, as a Map or a sequence of pairs like 
   1.191 +  ([key1,value1], [key2,value2])  Default is nil.
   1.192 +
   1.193 +  :body b
   1.194 +  
   1.195 +  HTTP request entity body, one of nil, String, byte[], InputStream,
   1.196 +  Reader, or File.  Default is nil.
   1.197 +
   1.198 +  :connect-timeout int
   1.199 +
   1.200 +  Timeout value, in milliseconds, when opening a connection to the
   1.201 +  URL.  Default is zero, meaning no timeout.
   1.202 +
   1.203 +  :read-timeout int
   1.204 +
   1.205 +  Timeout value, in milliseconds, when reading data from the
   1.206 +  connection.  Default is zero, meaning no timeout.
   1.207 +
   1.208 +  :follow-redirects boolean
   1.209 +
   1.210 +  If true, HTTP 3xx redirects will be followed automatically.  Default
   1.211 +  is true.
   1.212 +
   1.213 +  :handler f
   1.214 +
   1.215 +  Function to be called when the HTTP response body is ready.  If you
   1.216 +  do not provide a handler function, the default is to buffer the
   1.217 +  entire response body in memory.
   1.218 +
   1.219 +  The handler function will be called with the HTTP agent as its
   1.220 +  argument, and can use the 'stream' function to read the response
   1.221 +  body.  The return value of this function will be stored in the state
   1.222 +  of the agent and can be retrieved with the 'result' function.  Any
   1.223 +  exceptions thrown by this function will be added to the agent's
   1.224 +  error queue (see agent-errors).  The default function collects the
   1.225 +  response stream in a memory buffer.
   1.226 +  "
   1.227 +  ([uri & options]
   1.228 +     (let [opts (merge *http-agent-defaults* (apply array-map options))]
   1.229 +       (let [a (agent {::connection (c/http-connection uri)
   1.230 +                       ::state ::created
   1.231 +                       ::uri uri
   1.232 +                       ::options opts})]
   1.233 +         (send-off a start-request opts)
   1.234 +         (send-off a open-response opts)
   1.235 +         (send-off a handle-response (partial (:handler opts) a) opts)
   1.236 +         (send-off a disconnect opts)))))
   1.237 +
   1.238 +
   1.239 +;;; RESPONSE BODY ACCESSORS
   1.240 +
   1.241 +(defn result
   1.242 +  "Returns the value returned by the :handler function of the HTTP
   1.243 +  agent; blocks until the HTTP request is completed.  The default
   1.244 +  handler function returns a ByteArrayOutputStream."
   1.245 +  [http-agnt]
   1.246 +  (await http-agnt)
   1.247 +  (::result @http-agnt))
   1.248 +
   1.249 +(defn stream
   1.250 +  "Returns an InputStream of the HTTP response body.  When called by
   1.251 +  the handler function passed to http-agent, this is the raw
   1.252 +  HttpURLConnection stream.
   1.253 +
   1.254 +  If the default handler function was used, this function returns a
   1.255 +  ByteArrayInputStream on the buffered response body."
   1.256 +  [http-agnt]
   1.257 +  (let [a @http-agnt]
   1.258 +    (if (= (::state a) ::receiving)
   1.259 +      (::response-stream a)
   1.260 +      (ByteArrayInputStream.
   1.261 +       (.toByteArray (get-byte-buffer http-agnt))))))
   1.262 +
   1.263 +(defn bytes
   1.264 +  "Returns a Java byte array of the content returned by the server;
   1.265 +  nil if the content is not yet available."
   1.266 +  [http-agnt]
   1.267 +  (.toByteArray (get-byte-buffer http-agnt)))
   1.268 +
   1.269 +(defn string
   1.270 +  "Returns the HTTP response body as a string, using the given
   1.271 +  encoding.
   1.272 +
   1.273 +  If no encoding is given, uses the encoding specified in the server
   1.274 +  headers, or clojure.contrib.io/*default-encoding* if it is
   1.275 +  not specified."
   1.276 +  ([http-agnt]
   1.277 +     (await http-agnt) ;; have to wait for Content-Encoding
   1.278 +     (string http-agnt (or (.getContentEncoding
   1.279 +                            ^HttpURLConnection (::connection @http-agnt))
   1.280 +                           duck/*default-encoding*)))
   1.281 +  ([http-agnt ^String encoding]
   1.282 +     (.toString (get-byte-buffer http-agnt) encoding)))
   1.283 +
   1.284 +
   1.285 +;;; REQUEST ACCESSORS
   1.286 +
   1.287 +(defn request-uri
   1.288 +  "Returns the URI/URL requested by this HTTP agent, as a String."
   1.289 +  [http-agnt]
   1.290 +  (::uri @http-agnt))
   1.291 +
   1.292 +(defn request-headers
   1.293 +  "Returns the request headers specified for this HTTP agent."
   1.294 +  [http-agnt]
   1.295 +  (:headers (::options @http-agnt)))
   1.296 +
   1.297 +(defn method
   1.298 +  "Returns the HTTP method name used by this HTTP agent, as a String."
   1.299 +  [http-agnt]
   1.300 +  (:method (::options @http-agnt)))
   1.301 +
   1.302 +(defn request-body
   1.303 +  "Returns the HTTP request body given to this HTTP agent.  
   1.304 +
   1.305 +  Note: if the request body was an InputStream or a Reader, it will no
   1.306 +  longer be usable."
   1.307 +  [http-agnt]
   1.308 +  (:body (::options @http-agnt)))
   1.309 +
   1.310 +
   1.311 +;;; RESPONSE ACCESSORS
   1.312 +
   1.313 +(defn done?
   1.314 +  "Returns true if the HTTP request/response has completed."
   1.315 +  [http-agnt]
   1.316 +  (if (#{::finished ::disconnected} (::state @http-agnt))
   1.317 +    true false))
   1.318 +
   1.319 +(defn status
   1.320 +  "Returns the HTTP response status code (e.g. 200, 404) for this
   1.321 +  request, as an Integer, or nil if the status has not yet been
   1.322 +  received."
   1.323 +  [http-agnt]
   1.324 +  (when (done? http-agnt)
   1.325 +    (.getResponseCode ^HttpURLConnection (::connection @http-agnt))))
   1.326 +
   1.327 +(defn message
   1.328 +  "Returns the HTTP response message (e.g. 'Not Found'), for this
   1.329 +  request, or nil if the response has not yet been received."
   1.330 +  [http-agnt]
   1.331 +  (when (done? http-agnt)
   1.332 +    (.getResponseMessage ^HttpURLConnection (::connection @http-agnt))))
   1.333 +
   1.334 +(defn headers
   1.335 +  "Returns a map of HTTP response headers.  Header names are converted
   1.336 +  to keywords in all lower-case Header values are strings.  If a
   1.337 +  header appears more than once, only the last value is returned."
   1.338 +  [http-agnt]
   1.339 +  (reduce (fn [m [^String k v]]
   1.340 +            (assoc m (when k (keyword (.toLowerCase k))) (last v)))
   1.341 +          {} (.getHeaderFields
   1.342 +              ^HttpURLConnection (::connection @http-agnt))))
   1.343 +
   1.344 +(defn headers-seq
   1.345 +  "Returns the HTTP response headers in order as a sequence of
   1.346 +  [String,String] pairs.  The first 'header' name may be null for the
   1.347 +  HTTP status line."
   1.348 +  [http-agnt]
   1.349 +  (let [^HttpURLConnection conn (::connection @http-agnt)
   1.350 +        f (fn thisfn [^Integer i]
   1.351 +            ;; Get value first because first key may be nil.
   1.352 +            (when-let [value (.getHeaderField conn i)]
   1.353 +              (cons [(.getHeaderFieldKey conn i) value]
   1.354 +                    (thisfn (inc i)))))]
   1.355 +    (lazy-seq (f 0))))
   1.356 +
   1.357 +
   1.358 +;;; RESPONSE STATUS CODE ACCESSORS
   1.359 +
   1.360 +(defn success?
   1.361 +  "Returns true if the HTTP response code was in the 200-299 range."
   1.362 +  [http-agnt]
   1.363 +  (status-in-range? 2 http-agnt))
   1.364 +
   1.365 +(defn redirect?
   1.366 +  "Returns true if the HTTP response code was in the 300-399 range.
   1.367 +
   1.368 +  Note: if the :follow-redirects option was true (the default),
   1.369 +  redirects will be followed automatically and a the agent will never
   1.370 +  return a 3xx response code."
   1.371 +  [http-agnt]
   1.372 +  (status-in-range? 3 http-agnt))
   1.373 +
   1.374 +(defn client-error?
   1.375 +  "Returns true if the HTTP response code was in the 400-499 range."
   1.376 +  [http-agnt]
   1.377 +  (status-in-range? 4 http-agnt))
   1.378 +
   1.379 +(defn server-error?
   1.380 +  "Returns true if the HTTP response code was in the 500-599 range."
   1.381 +  [http-agnt]
   1.382 +  (status-in-range? 5 http-agnt))
   1.383 +
   1.384 +(defn error?
   1.385 +  "Returns true if the HTTP response code was in the 400-499 range OR
   1.386 +  the 500-599 range."
   1.387 +  [http-agnt]
   1.388 +  (or (client-error? http-agnt)
   1.389 +      (server-error? http-agnt)))