rlm@10
|
1 ;;; http/agent.clj: agent-based asynchronous HTTP client
|
rlm@10
|
2
|
rlm@10
|
3 ;; by Stuart Sierra, http://stuartsierra.com/
|
rlm@10
|
4 ;; August 17, 2009
|
rlm@10
|
5
|
rlm@10
|
6 ;; Copyright (c) Stuart Sierra, 2009. All rights reserved. The use
|
rlm@10
|
7 ;; and distribution terms for this software are covered by the Eclipse
|
rlm@10
|
8 ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
|
rlm@10
|
9 ;; which can be found in the file epl-v10.html at the root of this
|
rlm@10
|
10 ;; distribution. By using this software in any fashion, you are
|
rlm@10
|
11 ;; agreeing to be bound by the terms of this license. You must not
|
rlm@10
|
12 ;; remove this notice, or any other, from this software.
|
rlm@10
|
13
|
rlm@10
|
14 ;; DEPRECATED IN 1.2. Use direct Java bits, or take a look at
|
rlm@10
|
15 ;; http://github.com/technomancy/clojure-http-client
|
rlm@10
|
16
|
rlm@10
|
17 (ns ^{:deprecated "1.2"
|
rlm@10
|
18 :doc "Agent-based asynchronous HTTP client.
|
rlm@10
|
19
|
rlm@10
|
20 This is a HTTP client library based on Java's HttpURLConnection
|
rlm@10
|
21 class and Clojure's Agent system. It allows you to make multiple
|
rlm@10
|
22 HTTP requests in parallel.
|
rlm@10
|
23
|
rlm@10
|
24 Start an HTTP request with the 'http-agent' function, which
|
rlm@10
|
25 immediately returns a Clojure Agent. You will never deref this
|
rlm@10
|
26 agent; that is handled by the accessor functions. The agent will
|
rlm@10
|
27 execute the HTTP request on a separate thread.
|
rlm@10
|
28
|
rlm@10
|
29 If you pass a :handler function to http-agent, that function will be
|
rlm@10
|
30 called as soon as the HTTP response body is ready. The handler
|
rlm@10
|
31 function is called with one argument, the HTTP agent itself. The
|
rlm@10
|
32 handler can read the response body by calling the 'stream' function
|
rlm@10
|
33 on the agent.
|
rlm@10
|
34
|
rlm@10
|
35 The value returned by the handler function becomes part of the state
|
rlm@10
|
36 of the agent, and you can retrieve it with the 'result' function.
|
rlm@10
|
37 If you call 'result' before the HTTP request has finished, it will
|
rlm@10
|
38 block until the handler function returns.
|
rlm@10
|
39
|
rlm@10
|
40 If you don't provide a handler function, the default handler will
|
rlm@10
|
41 buffer the entire response body in memory, which you can retrieve
|
rlm@10
|
42 with the 'bytes', 'string', or 'stream' functions. Like 'result',
|
rlm@10
|
43 these functions will block until the HTTP request is completed.
|
rlm@10
|
44
|
rlm@10
|
45 If you want to check if an HTTP request is finished without
|
rlm@10
|
46 blocking, use the 'done?' function.
|
rlm@10
|
47
|
rlm@10
|
48 A single GET request could be as simple as:
|
rlm@10
|
49
|
rlm@10
|
50 (string (http-agent \"http://www.stuartsierra.com/\"))
|
rlm@10
|
51
|
rlm@10
|
52 A simple POST might look like:
|
rlm@10
|
53
|
rlm@10
|
54 (http-agent \"http...\" :method \"POST\" :body \"foo=1\")
|
rlm@10
|
55
|
rlm@10
|
56 And you could write the response directly to a file like this:
|
rlm@10
|
57
|
rlm@10
|
58 (require '[clojure.contrib.io :as d])
|
rlm@10
|
59
|
rlm@10
|
60 (http-agent \"http...\"
|
rlm@10
|
61 :handler (fn [agnt]
|
rlm@10
|
62 (with-open [w (d/writer \"/tmp/out\")]
|
rlm@10
|
63 (d/copy (stream agnt) w))))
|
rlm@10
|
64 "
|
rlm@10
|
65 :author "Stuart Sierra"
|
rlm@10
|
66 }
|
rlm@10
|
67
|
rlm@10
|
68 clojure.contrib.http.agent
|
rlm@10
|
69 (:refer-clojure :exclude [bytes])
|
rlm@10
|
70 (:require [clojure.contrib.http.connection :as c]
|
rlm@10
|
71 [clojure.contrib.io :as duck])
|
rlm@10
|
72 (:import (java.io InputStream ByteArrayOutputStream
|
rlm@10
|
73 ByteArrayInputStream)
|
rlm@10
|
74 (java.net HttpURLConnection)))
|
rlm@10
|
75
|
rlm@10
|
76
|
rlm@10
|
77 ;;; PRIVATE
|
rlm@10
|
78
|
rlm@10
|
79 (declare result stream)
|
rlm@10
|
80
|
rlm@10
|
81 (defn- setup-http-connection
|
rlm@10
|
82 "Sets the instance method, redirect behavior, and request headers of
|
rlm@10
|
83 the HttpURLConnection."
|
rlm@10
|
84 [^HttpURLConnection conn options]
|
rlm@10
|
85 (when-let [t (:connect-timeout options)]
|
rlm@10
|
86 (.setConnectTimeout conn t))
|
rlm@10
|
87 (when-let [t (:read-timeout options)]
|
rlm@10
|
88 (.setReadTimeout conn t))
|
rlm@10
|
89 (.setRequestMethod conn (:method options))
|
rlm@10
|
90 (.setInstanceFollowRedirects conn (:follow-redirects options))
|
rlm@10
|
91 (doseq [[name value] (:headers options)]
|
rlm@10
|
92 (.setRequestProperty conn name value)))
|
rlm@10
|
93
|
rlm@10
|
94 (defn- start-request
|
rlm@10
|
95 "Agent action that starts sending the HTTP request."
|
rlm@10
|
96 [state options]
|
rlm@10
|
97 (let [conn (::connection state)]
|
rlm@10
|
98 (setup-http-connection conn options)
|
rlm@10
|
99 (c/start-http-connection conn (:body options))
|
rlm@10
|
100 (assoc state ::state ::started)))
|
rlm@10
|
101
|
rlm@10
|
102 (defn- connection-success? [^HttpURLConnection conn]
|
rlm@10
|
103 "Returns true if the HttpURLConnection response code is in the 2xx
|
rlm@10
|
104 range."
|
rlm@10
|
105 (= 2 (quot (.getResponseCode conn) 100)))
|
rlm@10
|
106
|
rlm@10
|
107 (defn- open-response
|
rlm@10
|
108 "Agent action that opens the response body stream on the HTTP
|
rlm@10
|
109 request; this will block until the response stream is available." ;
|
rlm@10
|
110 [state options]
|
rlm@10
|
111 (let [^HttpURLConnection conn (::connection state)]
|
rlm@10
|
112 (assoc state
|
rlm@10
|
113 ::response-stream (if (connection-success? conn)
|
rlm@10
|
114 (.getInputStream conn)
|
rlm@10
|
115 (.getErrorStream conn))
|
rlm@10
|
116 ::state ::receiving)))
|
rlm@10
|
117
|
rlm@10
|
118 (defn- handle-response
|
rlm@10
|
119 "Agent action that calls the provided handler function, with no
|
rlm@10
|
120 arguments, and sets the ::result key of the agent to the handler's
|
rlm@10
|
121 return value."
|
rlm@10
|
122 [state handler options]
|
rlm@10
|
123 (let [conn (::connection state)]
|
rlm@10
|
124 (assoc state
|
rlm@10
|
125 ::result (handler)
|
rlm@10
|
126 ::state ::finished)))
|
rlm@10
|
127
|
rlm@10
|
128 (defn- disconnect
|
rlm@10
|
129 "Agent action that closes the response body stream and disconnects
|
rlm@10
|
130 the HttpURLConnection."
|
rlm@10
|
131 [state options]
|
rlm@10
|
132 (when (::response-stream state)
|
rlm@10
|
133 (.close ^InputStream (::response-stream state)))
|
rlm@10
|
134 (.disconnect ^HttpURLConnection (::connection state))
|
rlm@10
|
135 (assoc state
|
rlm@10
|
136 ::response-stream nil
|
rlm@10
|
137 ::state ::disconnected))
|
rlm@10
|
138
|
rlm@10
|
139 (defn- status-in-range?
|
rlm@10
|
140 "Returns true if the response status of the HTTP agent begins with
|
rlm@10
|
141 digit, an Integer."
|
rlm@10
|
142 [digit http-agnt]
|
rlm@10
|
143 (= digit (quot (.getResponseCode
|
rlm@10
|
144 ^HttpURLConnection (::connection @http-agnt))
|
rlm@10
|
145 100)))
|
rlm@10
|
146
|
rlm@10
|
147 (defn- ^ByteArrayOutputStream get-byte-buffer [http-agnt]
|
rlm@10
|
148 (let [buffer (result http-agnt)]
|
rlm@10
|
149 (if (instance? ByteArrayOutputStream buffer)
|
rlm@10
|
150 buffer
|
rlm@10
|
151 (throw (Exception. "Handler result was not a ByteArrayOutputStream")))))
|
rlm@10
|
152
|
rlm@10
|
153
|
rlm@10
|
154 (defn buffer-bytes
|
rlm@10
|
155 "The default HTTP agent result handler; it collects the response
|
rlm@10
|
156 body in a java.io.ByteArrayOutputStream, which can later be
|
rlm@10
|
157 retrieved with the 'stream', 'string', and 'bytes' functions."
|
rlm@10
|
158 [http-agnt]
|
rlm@10
|
159 (let [output (ByteArrayOutputStream.)]
|
rlm@10
|
160 (duck/copy (or (stream http-agnt) "") output)
|
rlm@10
|
161 output))
|
rlm@10
|
162
|
rlm@10
|
163
|
rlm@10
|
164 ;;; CONSTRUCTOR
|
rlm@10
|
165
|
rlm@10
|
166 (def *http-agent-defaults*
|
rlm@10
|
167 {:method "GET"
|
rlm@10
|
168 :headers {}
|
rlm@10
|
169 :body nil
|
rlm@10
|
170 :connect-timeout 0
|
rlm@10
|
171 :read-timeout 0
|
rlm@10
|
172 :follow-redirects true
|
rlm@10
|
173 :handler buffer-bytes})
|
rlm@10
|
174
|
rlm@10
|
175 (defn http-agent
|
rlm@10
|
176 "Creates (and immediately returns) an Agent representing an HTTP
|
rlm@10
|
177 request running in a new thread.
|
rlm@10
|
178
|
rlm@10
|
179 options are key/value pairs:
|
rlm@10
|
180
|
rlm@10
|
181 :method string
|
rlm@10
|
182
|
rlm@10
|
183 The HTTP method name. Default is \"GET\".
|
rlm@10
|
184
|
rlm@10
|
185 :headers h
|
rlm@10
|
186
|
rlm@10
|
187 HTTP headers, as a Map or a sequence of pairs like
|
rlm@10
|
188 ([key1,value1], [key2,value2]) Default is nil.
|
rlm@10
|
189
|
rlm@10
|
190 :body b
|
rlm@10
|
191
|
rlm@10
|
192 HTTP request entity body, one of nil, String, byte[], InputStream,
|
rlm@10
|
193 Reader, or File. Default is nil.
|
rlm@10
|
194
|
rlm@10
|
195 :connect-timeout int
|
rlm@10
|
196
|
rlm@10
|
197 Timeout value, in milliseconds, when opening a connection to the
|
rlm@10
|
198 URL. Default is zero, meaning no timeout.
|
rlm@10
|
199
|
rlm@10
|
200 :read-timeout int
|
rlm@10
|
201
|
rlm@10
|
202 Timeout value, in milliseconds, when reading data from the
|
rlm@10
|
203 connection. Default is zero, meaning no timeout.
|
rlm@10
|
204
|
rlm@10
|
205 :follow-redirects boolean
|
rlm@10
|
206
|
rlm@10
|
207 If true, HTTP 3xx redirects will be followed automatically. Default
|
rlm@10
|
208 is true.
|
rlm@10
|
209
|
rlm@10
|
210 :handler f
|
rlm@10
|
211
|
rlm@10
|
212 Function to be called when the HTTP response body is ready. If you
|
rlm@10
|
213 do not provide a handler function, the default is to buffer the
|
rlm@10
|
214 entire response body in memory.
|
rlm@10
|
215
|
rlm@10
|
216 The handler function will be called with the HTTP agent as its
|
rlm@10
|
217 argument, and can use the 'stream' function to read the response
|
rlm@10
|
218 body. The return value of this function will be stored in the state
|
rlm@10
|
219 of the agent and can be retrieved with the 'result' function. Any
|
rlm@10
|
220 exceptions thrown by this function will be added to the agent's
|
rlm@10
|
221 error queue (see agent-errors). The default function collects the
|
rlm@10
|
222 response stream in a memory buffer.
|
rlm@10
|
223 "
|
rlm@10
|
224 ([uri & options]
|
rlm@10
|
225 (let [opts (merge *http-agent-defaults* (apply array-map options))]
|
rlm@10
|
226 (let [a (agent {::connection (c/http-connection uri)
|
rlm@10
|
227 ::state ::created
|
rlm@10
|
228 ::uri uri
|
rlm@10
|
229 ::options opts})]
|
rlm@10
|
230 (send-off a start-request opts)
|
rlm@10
|
231 (send-off a open-response opts)
|
rlm@10
|
232 (send-off a handle-response (partial (:handler opts) a) opts)
|
rlm@10
|
233 (send-off a disconnect opts)))))
|
rlm@10
|
234
|
rlm@10
|
235
|
rlm@10
|
236 ;;; RESPONSE BODY ACCESSORS
|
rlm@10
|
237
|
rlm@10
|
238 (defn result
|
rlm@10
|
239 "Returns the value returned by the :handler function of the HTTP
|
rlm@10
|
240 agent; blocks until the HTTP request is completed. The default
|
rlm@10
|
241 handler function returns a ByteArrayOutputStream."
|
rlm@10
|
242 [http-agnt]
|
rlm@10
|
243 (await http-agnt)
|
rlm@10
|
244 (::result @http-agnt))
|
rlm@10
|
245
|
rlm@10
|
246 (defn stream
|
rlm@10
|
247 "Returns an InputStream of the HTTP response body. When called by
|
rlm@10
|
248 the handler function passed to http-agent, this is the raw
|
rlm@10
|
249 HttpURLConnection stream.
|
rlm@10
|
250
|
rlm@10
|
251 If the default handler function was used, this function returns a
|
rlm@10
|
252 ByteArrayInputStream on the buffered response body."
|
rlm@10
|
253 [http-agnt]
|
rlm@10
|
254 (let [a @http-agnt]
|
rlm@10
|
255 (if (= (::state a) ::receiving)
|
rlm@10
|
256 (::response-stream a)
|
rlm@10
|
257 (ByteArrayInputStream.
|
rlm@10
|
258 (.toByteArray (get-byte-buffer http-agnt))))))
|
rlm@10
|
259
|
rlm@10
|
260 (defn bytes
|
rlm@10
|
261 "Returns a Java byte array of the content returned by the server;
|
rlm@10
|
262 nil if the content is not yet available."
|
rlm@10
|
263 [http-agnt]
|
rlm@10
|
264 (.toByteArray (get-byte-buffer http-agnt)))
|
rlm@10
|
265
|
rlm@10
|
266 (defn string
|
rlm@10
|
267 "Returns the HTTP response body as a string, using the given
|
rlm@10
|
268 encoding.
|
rlm@10
|
269
|
rlm@10
|
270 If no encoding is given, uses the encoding specified in the server
|
rlm@10
|
271 headers, or clojure.contrib.io/*default-encoding* if it is
|
rlm@10
|
272 not specified."
|
rlm@10
|
273 ([http-agnt]
|
rlm@10
|
274 (await http-agnt) ;; have to wait for Content-Encoding
|
rlm@10
|
275 (string http-agnt (or (.getContentEncoding
|
rlm@10
|
276 ^HttpURLConnection (::connection @http-agnt))
|
rlm@10
|
277 duck/*default-encoding*)))
|
rlm@10
|
278 ([http-agnt ^String encoding]
|
rlm@10
|
279 (.toString (get-byte-buffer http-agnt) encoding)))
|
rlm@10
|
280
|
rlm@10
|
281
|
rlm@10
|
282 ;;; REQUEST ACCESSORS
|
rlm@10
|
283
|
rlm@10
|
284 (defn request-uri
|
rlm@10
|
285 "Returns the URI/URL requested by this HTTP agent, as a String."
|
rlm@10
|
286 [http-agnt]
|
rlm@10
|
287 (::uri @http-agnt))
|
rlm@10
|
288
|
rlm@10
|
289 (defn request-headers
|
rlm@10
|
290 "Returns the request headers specified for this HTTP agent."
|
rlm@10
|
291 [http-agnt]
|
rlm@10
|
292 (:headers (::options @http-agnt)))
|
rlm@10
|
293
|
rlm@10
|
294 (defn method
|
rlm@10
|
295 "Returns the HTTP method name used by this HTTP agent, as a String."
|
rlm@10
|
296 [http-agnt]
|
rlm@10
|
297 (:method (::options @http-agnt)))
|
rlm@10
|
298
|
rlm@10
|
299 (defn request-body
|
rlm@10
|
300 "Returns the HTTP request body given to this HTTP agent.
|
rlm@10
|
301
|
rlm@10
|
302 Note: if the request body was an InputStream or a Reader, it will no
|
rlm@10
|
303 longer be usable."
|
rlm@10
|
304 [http-agnt]
|
rlm@10
|
305 (:body (::options @http-agnt)))
|
rlm@10
|
306
|
rlm@10
|
307
|
rlm@10
|
308 ;;; RESPONSE ACCESSORS
|
rlm@10
|
309
|
rlm@10
|
310 (defn done?
|
rlm@10
|
311 "Returns true if the HTTP request/response has completed."
|
rlm@10
|
312 [http-agnt]
|
rlm@10
|
313 (if (#{::finished ::disconnected} (::state @http-agnt))
|
rlm@10
|
314 true false))
|
rlm@10
|
315
|
rlm@10
|
316 (defn status
|
rlm@10
|
317 "Returns the HTTP response status code (e.g. 200, 404) for this
|
rlm@10
|
318 request, as an Integer, or nil if the status has not yet been
|
rlm@10
|
319 received."
|
rlm@10
|
320 [http-agnt]
|
rlm@10
|
321 (when (done? http-agnt)
|
rlm@10
|
322 (.getResponseCode ^HttpURLConnection (::connection @http-agnt))))
|
rlm@10
|
323
|
rlm@10
|
324 (defn message
|
rlm@10
|
325 "Returns the HTTP response message (e.g. 'Not Found'), for this
|
rlm@10
|
326 request, or nil if the response has not yet been received."
|
rlm@10
|
327 [http-agnt]
|
rlm@10
|
328 (when (done? http-agnt)
|
rlm@10
|
329 (.getResponseMessage ^HttpURLConnection (::connection @http-agnt))))
|
rlm@10
|
330
|
rlm@10
|
331 (defn headers
|
rlm@10
|
332 "Returns a map of HTTP response headers. Header names are converted
|
rlm@10
|
333 to keywords in all lower-case Header values are strings. If a
|
rlm@10
|
334 header appears more than once, only the last value is returned."
|
rlm@10
|
335 [http-agnt]
|
rlm@10
|
336 (reduce (fn [m [^String k v]]
|
rlm@10
|
337 (assoc m (when k (keyword (.toLowerCase k))) (last v)))
|
rlm@10
|
338 {} (.getHeaderFields
|
rlm@10
|
339 ^HttpURLConnection (::connection @http-agnt))))
|
rlm@10
|
340
|
rlm@10
|
341 (defn headers-seq
|
rlm@10
|
342 "Returns the HTTP response headers in order as a sequence of
|
rlm@10
|
343 [String,String] pairs. The first 'header' name may be null for the
|
rlm@10
|
344 HTTP status line."
|
rlm@10
|
345 [http-agnt]
|
rlm@10
|
346 (let [^HttpURLConnection conn (::connection @http-agnt)
|
rlm@10
|
347 f (fn thisfn [^Integer i]
|
rlm@10
|
348 ;; Get value first because first key may be nil.
|
rlm@10
|
349 (when-let [value (.getHeaderField conn i)]
|
rlm@10
|
350 (cons [(.getHeaderFieldKey conn i) value]
|
rlm@10
|
351 (thisfn (inc i)))))]
|
rlm@10
|
352 (lazy-seq (f 0))))
|
rlm@10
|
353
|
rlm@10
|
354
|
rlm@10
|
355 ;;; RESPONSE STATUS CODE ACCESSORS
|
rlm@10
|
356
|
rlm@10
|
357 (defn success?
|
rlm@10
|
358 "Returns true if the HTTP response code was in the 200-299 range."
|
rlm@10
|
359 [http-agnt]
|
rlm@10
|
360 (status-in-range? 2 http-agnt))
|
rlm@10
|
361
|
rlm@10
|
362 (defn redirect?
|
rlm@10
|
363 "Returns true if the HTTP response code was in the 300-399 range.
|
rlm@10
|
364
|
rlm@10
|
365 Note: if the :follow-redirects option was true (the default),
|
rlm@10
|
366 redirects will be followed automatically and a the agent will never
|
rlm@10
|
367 return a 3xx response code."
|
rlm@10
|
368 [http-agnt]
|
rlm@10
|
369 (status-in-range? 3 http-agnt))
|
rlm@10
|
370
|
rlm@10
|
371 (defn client-error?
|
rlm@10
|
372 "Returns true if the HTTP response code was in the 400-499 range."
|
rlm@10
|
373 [http-agnt]
|
rlm@10
|
374 (status-in-range? 4 http-agnt))
|
rlm@10
|
375
|
rlm@10
|
376 (defn server-error?
|
rlm@10
|
377 "Returns true if the HTTP response code was in the 500-599 range."
|
rlm@10
|
378 [http-agnt]
|
rlm@10
|
379 (status-in-range? 5 http-agnt))
|
rlm@10
|
380
|
rlm@10
|
381 (defn error?
|
rlm@10
|
382 "Returns true if the HTTP response code was in the 400-499 range OR
|
rlm@10
|
383 the 500-599 range."
|
rlm@10
|
384 [http-agnt]
|
rlm@10
|
385 (or (client-error? http-agnt)
|
rlm@10
|
386 (server-error? http-agnt)))
|