annotate src/clojure/contrib/http/agent.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
rev   line source
rlm@10 1 ;;; http/agent.clj: agent-based asynchronous HTTP client
rlm@10 2
rlm@10 3 ;; by Stuart Sierra, http://stuartsierra.com/
rlm@10 4 ;; August 17, 2009
rlm@10 5
rlm@10 6 ;; Copyright (c) Stuart Sierra, 2009. All rights reserved. The use
rlm@10 7 ;; and distribution terms for this software are covered by the Eclipse
rlm@10 8 ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
rlm@10 9 ;; which can be found in the file epl-v10.html at the root of this
rlm@10 10 ;; distribution. By using this software in any fashion, you are
rlm@10 11 ;; agreeing to be bound by the terms of this license. You must not
rlm@10 12 ;; remove this notice, or any other, from this software.
rlm@10 13
rlm@10 14 ;; DEPRECATED IN 1.2. Use direct Java bits, or take a look at
rlm@10 15 ;; http://github.com/technomancy/clojure-http-client
rlm@10 16
rlm@10 17 (ns ^{:deprecated "1.2"
rlm@10 18 :doc "Agent-based asynchronous HTTP client.
rlm@10 19
rlm@10 20 This is a HTTP client library based on Java's HttpURLConnection
rlm@10 21 class and Clojure's Agent system. It allows you to make multiple
rlm@10 22 HTTP requests in parallel.
rlm@10 23
rlm@10 24 Start an HTTP request with the 'http-agent' function, which
rlm@10 25 immediately returns a Clojure Agent. You will never deref this
rlm@10 26 agent; that is handled by the accessor functions. The agent will
rlm@10 27 execute the HTTP request on a separate thread.
rlm@10 28
rlm@10 29 If you pass a :handler function to http-agent, that function will be
rlm@10 30 called as soon as the HTTP response body is ready. The handler
rlm@10 31 function is called with one argument, the HTTP agent itself. The
rlm@10 32 handler can read the response body by calling the 'stream' function
rlm@10 33 on the agent.
rlm@10 34
rlm@10 35 The value returned by the handler function becomes part of the state
rlm@10 36 of the agent, and you can retrieve it with the 'result' function.
rlm@10 37 If you call 'result' before the HTTP request has finished, it will
rlm@10 38 block until the handler function returns.
rlm@10 39
rlm@10 40 If you don't provide a handler function, the default handler will
rlm@10 41 buffer the entire response body in memory, which you can retrieve
rlm@10 42 with the 'bytes', 'string', or 'stream' functions. Like 'result',
rlm@10 43 these functions will block until the HTTP request is completed.
rlm@10 44
rlm@10 45 If you want to check if an HTTP request is finished without
rlm@10 46 blocking, use the 'done?' function.
rlm@10 47
rlm@10 48 A single GET request could be as simple as:
rlm@10 49
rlm@10 50 (string (http-agent \"http://www.stuartsierra.com/\"))
rlm@10 51
rlm@10 52 A simple POST might look like:
rlm@10 53
rlm@10 54 (http-agent \"http...\" :method \"POST\" :body \"foo=1\")
rlm@10 55
rlm@10 56 And you could write the response directly to a file like this:
rlm@10 57
rlm@10 58 (require '[clojure.contrib.io :as d])
rlm@10 59
rlm@10 60 (http-agent \"http...\"
rlm@10 61 :handler (fn [agnt]
rlm@10 62 (with-open [w (d/writer \"/tmp/out\")]
rlm@10 63 (d/copy (stream agnt) w))))
rlm@10 64 "
rlm@10 65 :author "Stuart Sierra"
rlm@10 66 }
rlm@10 67
rlm@10 68 clojure.contrib.http.agent
rlm@10 69 (:refer-clojure :exclude [bytes])
rlm@10 70 (:require [clojure.contrib.http.connection :as c]
rlm@10 71 [clojure.contrib.io :as duck])
rlm@10 72 (:import (java.io InputStream ByteArrayOutputStream
rlm@10 73 ByteArrayInputStream)
rlm@10 74 (java.net HttpURLConnection)))
rlm@10 75
rlm@10 76
rlm@10 77 ;;; PRIVATE
rlm@10 78
rlm@10 79 (declare result stream)
rlm@10 80
rlm@10 81 (defn- setup-http-connection
rlm@10 82 "Sets the instance method, redirect behavior, and request headers of
rlm@10 83 the HttpURLConnection."
rlm@10 84 [^HttpURLConnection conn options]
rlm@10 85 (when-let [t (:connect-timeout options)]
rlm@10 86 (.setConnectTimeout conn t))
rlm@10 87 (when-let [t (:read-timeout options)]
rlm@10 88 (.setReadTimeout conn t))
rlm@10 89 (.setRequestMethod conn (:method options))
rlm@10 90 (.setInstanceFollowRedirects conn (:follow-redirects options))
rlm@10 91 (doseq [[name value] (:headers options)]
rlm@10 92 (.setRequestProperty conn name value)))
rlm@10 93
rlm@10 94 (defn- start-request
rlm@10 95 "Agent action that starts sending the HTTP request."
rlm@10 96 [state options]
rlm@10 97 (let [conn (::connection state)]
rlm@10 98 (setup-http-connection conn options)
rlm@10 99 (c/start-http-connection conn (:body options))
rlm@10 100 (assoc state ::state ::started)))
rlm@10 101
rlm@10 102 (defn- connection-success? [^HttpURLConnection conn]
rlm@10 103 "Returns true if the HttpURLConnection response code is in the 2xx
rlm@10 104 range."
rlm@10 105 (= 2 (quot (.getResponseCode conn) 100)))
rlm@10 106
rlm@10 107 (defn- open-response
rlm@10 108 "Agent action that opens the response body stream on the HTTP
rlm@10 109 request; this will block until the response stream is available." ;
rlm@10 110 [state options]
rlm@10 111 (let [^HttpURLConnection conn (::connection state)]
rlm@10 112 (assoc state
rlm@10 113 ::response-stream (if (connection-success? conn)
rlm@10 114 (.getInputStream conn)
rlm@10 115 (.getErrorStream conn))
rlm@10 116 ::state ::receiving)))
rlm@10 117
rlm@10 118 (defn- handle-response
rlm@10 119 "Agent action that calls the provided handler function, with no
rlm@10 120 arguments, and sets the ::result key of the agent to the handler's
rlm@10 121 return value."
rlm@10 122 [state handler options]
rlm@10 123 (let [conn (::connection state)]
rlm@10 124 (assoc state
rlm@10 125 ::result (handler)
rlm@10 126 ::state ::finished)))
rlm@10 127
rlm@10 128 (defn- disconnect
rlm@10 129 "Agent action that closes the response body stream and disconnects
rlm@10 130 the HttpURLConnection."
rlm@10 131 [state options]
rlm@10 132 (when (::response-stream state)
rlm@10 133 (.close ^InputStream (::response-stream state)))
rlm@10 134 (.disconnect ^HttpURLConnection (::connection state))
rlm@10 135 (assoc state
rlm@10 136 ::response-stream nil
rlm@10 137 ::state ::disconnected))
rlm@10 138
rlm@10 139 (defn- status-in-range?
rlm@10 140 "Returns true if the response status of the HTTP agent begins with
rlm@10 141 digit, an Integer."
rlm@10 142 [digit http-agnt]
rlm@10 143 (= digit (quot (.getResponseCode
rlm@10 144 ^HttpURLConnection (::connection @http-agnt))
rlm@10 145 100)))
rlm@10 146
rlm@10 147 (defn- ^ByteArrayOutputStream get-byte-buffer [http-agnt]
rlm@10 148 (let [buffer (result http-agnt)]
rlm@10 149 (if (instance? ByteArrayOutputStream buffer)
rlm@10 150 buffer
rlm@10 151 (throw (Exception. "Handler result was not a ByteArrayOutputStream")))))
rlm@10 152
rlm@10 153
rlm@10 154 (defn buffer-bytes
rlm@10 155 "The default HTTP agent result handler; it collects the response
rlm@10 156 body in a java.io.ByteArrayOutputStream, which can later be
rlm@10 157 retrieved with the 'stream', 'string', and 'bytes' functions."
rlm@10 158 [http-agnt]
rlm@10 159 (let [output (ByteArrayOutputStream.)]
rlm@10 160 (duck/copy (or (stream http-agnt) "") output)
rlm@10 161 output))
rlm@10 162
rlm@10 163
rlm@10 164 ;;; CONSTRUCTOR
rlm@10 165
rlm@10 166 (def *http-agent-defaults*
rlm@10 167 {:method "GET"
rlm@10 168 :headers {}
rlm@10 169 :body nil
rlm@10 170 :connect-timeout 0
rlm@10 171 :read-timeout 0
rlm@10 172 :follow-redirects true
rlm@10 173 :handler buffer-bytes})
rlm@10 174
rlm@10 175 (defn http-agent
rlm@10 176 "Creates (and immediately returns) an Agent representing an HTTP
rlm@10 177 request running in a new thread.
rlm@10 178
rlm@10 179 options are key/value pairs:
rlm@10 180
rlm@10 181 :method string
rlm@10 182
rlm@10 183 The HTTP method name. Default is \"GET\".
rlm@10 184
rlm@10 185 :headers h
rlm@10 186
rlm@10 187 HTTP headers, as a Map or a sequence of pairs like
rlm@10 188 ([key1,value1], [key2,value2]) Default is nil.
rlm@10 189
rlm@10 190 :body b
rlm@10 191
rlm@10 192 HTTP request entity body, one of nil, String, byte[], InputStream,
rlm@10 193 Reader, or File. Default is nil.
rlm@10 194
rlm@10 195 :connect-timeout int
rlm@10 196
rlm@10 197 Timeout value, in milliseconds, when opening a connection to the
rlm@10 198 URL. Default is zero, meaning no timeout.
rlm@10 199
rlm@10 200 :read-timeout int
rlm@10 201
rlm@10 202 Timeout value, in milliseconds, when reading data from the
rlm@10 203 connection. Default is zero, meaning no timeout.
rlm@10 204
rlm@10 205 :follow-redirects boolean
rlm@10 206
rlm@10 207 If true, HTTP 3xx redirects will be followed automatically. Default
rlm@10 208 is true.
rlm@10 209
rlm@10 210 :handler f
rlm@10 211
rlm@10 212 Function to be called when the HTTP response body is ready. If you
rlm@10 213 do not provide a handler function, the default is to buffer the
rlm@10 214 entire response body in memory.
rlm@10 215
rlm@10 216 The handler function will be called with the HTTP agent as its
rlm@10 217 argument, and can use the 'stream' function to read the response
rlm@10 218 body. The return value of this function will be stored in the state
rlm@10 219 of the agent and can be retrieved with the 'result' function. Any
rlm@10 220 exceptions thrown by this function will be added to the agent's
rlm@10 221 error queue (see agent-errors). The default function collects the
rlm@10 222 response stream in a memory buffer.
rlm@10 223 "
rlm@10 224 ([uri & options]
rlm@10 225 (let [opts (merge *http-agent-defaults* (apply array-map options))]
rlm@10 226 (let [a (agent {::connection (c/http-connection uri)
rlm@10 227 ::state ::created
rlm@10 228 ::uri uri
rlm@10 229 ::options opts})]
rlm@10 230 (send-off a start-request opts)
rlm@10 231 (send-off a open-response opts)
rlm@10 232 (send-off a handle-response (partial (:handler opts) a) opts)
rlm@10 233 (send-off a disconnect opts)))))
rlm@10 234
rlm@10 235
rlm@10 236 ;;; RESPONSE BODY ACCESSORS
rlm@10 237
rlm@10 238 (defn result
rlm@10 239 "Returns the value returned by the :handler function of the HTTP
rlm@10 240 agent; blocks until the HTTP request is completed. The default
rlm@10 241 handler function returns a ByteArrayOutputStream."
rlm@10 242 [http-agnt]
rlm@10 243 (await http-agnt)
rlm@10 244 (::result @http-agnt))
rlm@10 245
rlm@10 246 (defn stream
rlm@10 247 "Returns an InputStream of the HTTP response body. When called by
rlm@10 248 the handler function passed to http-agent, this is the raw
rlm@10 249 HttpURLConnection stream.
rlm@10 250
rlm@10 251 If the default handler function was used, this function returns a
rlm@10 252 ByteArrayInputStream on the buffered response body."
rlm@10 253 [http-agnt]
rlm@10 254 (let [a @http-agnt]
rlm@10 255 (if (= (::state a) ::receiving)
rlm@10 256 (::response-stream a)
rlm@10 257 (ByteArrayInputStream.
rlm@10 258 (.toByteArray (get-byte-buffer http-agnt))))))
rlm@10 259
rlm@10 260 (defn bytes
rlm@10 261 "Returns a Java byte array of the content returned by the server;
rlm@10 262 nil if the content is not yet available."
rlm@10 263 [http-agnt]
rlm@10 264 (.toByteArray (get-byte-buffer http-agnt)))
rlm@10 265
rlm@10 266 (defn string
rlm@10 267 "Returns the HTTP response body as a string, using the given
rlm@10 268 encoding.
rlm@10 269
rlm@10 270 If no encoding is given, uses the encoding specified in the server
rlm@10 271 headers, or clojure.contrib.io/*default-encoding* if it is
rlm@10 272 not specified."
rlm@10 273 ([http-agnt]
rlm@10 274 (await http-agnt) ;; have to wait for Content-Encoding
rlm@10 275 (string http-agnt (or (.getContentEncoding
rlm@10 276 ^HttpURLConnection (::connection @http-agnt))
rlm@10 277 duck/*default-encoding*)))
rlm@10 278 ([http-agnt ^String encoding]
rlm@10 279 (.toString (get-byte-buffer http-agnt) encoding)))
rlm@10 280
rlm@10 281
rlm@10 282 ;;; REQUEST ACCESSORS
rlm@10 283
rlm@10 284 (defn request-uri
rlm@10 285 "Returns the URI/URL requested by this HTTP agent, as a String."
rlm@10 286 [http-agnt]
rlm@10 287 (::uri @http-agnt))
rlm@10 288
rlm@10 289 (defn request-headers
rlm@10 290 "Returns the request headers specified for this HTTP agent."
rlm@10 291 [http-agnt]
rlm@10 292 (:headers (::options @http-agnt)))
rlm@10 293
rlm@10 294 (defn method
rlm@10 295 "Returns the HTTP method name used by this HTTP agent, as a String."
rlm@10 296 [http-agnt]
rlm@10 297 (:method (::options @http-agnt)))
rlm@10 298
rlm@10 299 (defn request-body
rlm@10 300 "Returns the HTTP request body given to this HTTP agent.
rlm@10 301
rlm@10 302 Note: if the request body was an InputStream or a Reader, it will no
rlm@10 303 longer be usable."
rlm@10 304 [http-agnt]
rlm@10 305 (:body (::options @http-agnt)))
rlm@10 306
rlm@10 307
rlm@10 308 ;;; RESPONSE ACCESSORS
rlm@10 309
rlm@10 310 (defn done?
rlm@10 311 "Returns true if the HTTP request/response has completed."
rlm@10 312 [http-agnt]
rlm@10 313 (if (#{::finished ::disconnected} (::state @http-agnt))
rlm@10 314 true false))
rlm@10 315
rlm@10 316 (defn status
rlm@10 317 "Returns the HTTP response status code (e.g. 200, 404) for this
rlm@10 318 request, as an Integer, or nil if the status has not yet been
rlm@10 319 received."
rlm@10 320 [http-agnt]
rlm@10 321 (when (done? http-agnt)
rlm@10 322 (.getResponseCode ^HttpURLConnection (::connection @http-agnt))))
rlm@10 323
rlm@10 324 (defn message
rlm@10 325 "Returns the HTTP response message (e.g. 'Not Found'), for this
rlm@10 326 request, or nil if the response has not yet been received."
rlm@10 327 [http-agnt]
rlm@10 328 (when (done? http-agnt)
rlm@10 329 (.getResponseMessage ^HttpURLConnection (::connection @http-agnt))))
rlm@10 330
rlm@10 331 (defn headers
rlm@10 332 "Returns a map of HTTP response headers. Header names are converted
rlm@10 333 to keywords in all lower-case Header values are strings. If a
rlm@10 334 header appears more than once, only the last value is returned."
rlm@10 335 [http-agnt]
rlm@10 336 (reduce (fn [m [^String k v]]
rlm@10 337 (assoc m (when k (keyword (.toLowerCase k))) (last v)))
rlm@10 338 {} (.getHeaderFields
rlm@10 339 ^HttpURLConnection (::connection @http-agnt))))
rlm@10 340
rlm@10 341 (defn headers-seq
rlm@10 342 "Returns the HTTP response headers in order as a sequence of
rlm@10 343 [String,String] pairs. The first 'header' name may be null for the
rlm@10 344 HTTP status line."
rlm@10 345 [http-agnt]
rlm@10 346 (let [^HttpURLConnection conn (::connection @http-agnt)
rlm@10 347 f (fn thisfn [^Integer i]
rlm@10 348 ;; Get value first because first key may be nil.
rlm@10 349 (when-let [value (.getHeaderField conn i)]
rlm@10 350 (cons [(.getHeaderFieldKey conn i) value]
rlm@10 351 (thisfn (inc i)))))]
rlm@10 352 (lazy-seq (f 0))))
rlm@10 353
rlm@10 354
rlm@10 355 ;;; RESPONSE STATUS CODE ACCESSORS
rlm@10 356
rlm@10 357 (defn success?
rlm@10 358 "Returns true if the HTTP response code was in the 200-299 range."
rlm@10 359 [http-agnt]
rlm@10 360 (status-in-range? 2 http-agnt))
rlm@10 361
rlm@10 362 (defn redirect?
rlm@10 363 "Returns true if the HTTP response code was in the 300-399 range.
rlm@10 364
rlm@10 365 Note: if the :follow-redirects option was true (the default),
rlm@10 366 redirects will be followed automatically and a the agent will never
rlm@10 367 return a 3xx response code."
rlm@10 368 [http-agnt]
rlm@10 369 (status-in-range? 3 http-agnt))
rlm@10 370
rlm@10 371 (defn client-error?
rlm@10 372 "Returns true if the HTTP response code was in the 400-499 range."
rlm@10 373 [http-agnt]
rlm@10 374 (status-in-range? 4 http-agnt))
rlm@10 375
rlm@10 376 (defn server-error?
rlm@10 377 "Returns true if the HTTP response code was in the 500-599 range."
rlm@10 378 [http-agnt]
rlm@10 379 (status-in-range? 5 http-agnt))
rlm@10 380
rlm@10 381 (defn error?
rlm@10 382 "Returns true if the HTTP response code was in the 400-499 range OR
rlm@10 383 the 500-599 range."
rlm@10 384 [http-agnt]
rlm@10 385 (or (client-error? http-agnt)
rlm@10 386 (server-error? http-agnt)))