comparison src/clojure/contrib/string.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
comparison
equal deleted inserted replaced
9:35cf337adfcf 10:ef7dbbd6452c
1 ;;; string.clj -- functional string utilities for Clojure
2
3 ;; by Stuart Sierra, http://stuartsierra.com/
4 ;; January 26, 2010
5
6 ;; Copyright (c) Stuart Sierra, 2010. All rights reserved. The use
7 ;; and distribution terms for this software are covered by the Eclipse
8 ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
9 ;; which can be found in the file epl-v10.html at the root of this
10 ;; distribution. By using this software in any fashion, you are
11 ;; agreeing to be bound by the terms of this license. You must not
12 ;; remove this notice, or any other, from this software.
13
14 ;; DEPRECATED in 1.2: Many functions have moved to clojure.string.
15
16 (ns ^{:author "Stuart Sierra"
17 :doc "This is a library of string manipulation functions. It
18 is intented as a replacement for clojure.contrib.string.
19
20 You cannot (use 'clojure.contrib.string) because it defines
21 functions with the same names as functions in clojure.core.
22 Instead, do (require '[clojure.contrib.string :as s])
23 or something similar.
24
25 Goals:
26 1. Be functional
27 2. Most significant argument LAST, to work with ->>
28 3. At least O(n) performance for Strings of length n
29
30 Some ideas are borrowed from
31 http://github.com/francoisdevlin/devlinsf-clojure-utils/"}
32 clojure.contrib.string
33 (:refer-clojure :exclude (take replace drop butlast partition
34 contains? get repeat reverse partial))
35 (:import (java.util.regex Pattern)))
36
37
38 (defmacro dochars
39 "bindings => [name string]
40
41 Repeatedly executes body, with name bound to each character in
42 string. Does NOT handle Unicode supplementary characters (above
43 U+FFFF)."
44 [bindings & body]
45 (assert (vector bindings))
46 (assert (= 2 (count bindings)))
47 ;; This seems to be the fastest way to iterate over characters.
48 `(let [^String s# ~(second bindings)]
49 (dotimes [i# (.length s#)]
50 (let [~(first bindings) (.charAt s# i#)]
51 ~@body))))
52
53
54 (defmacro docodepoints
55 "bindings => [name string]
56
57 Repeatedly executes body, with name bound to the integer code point
58 of each Unicode character in the string. Handles Unicode
59 supplementary characters (above U+FFFF) correctly."
60 [bindings & body]
61 (assert (vector bindings))
62 (assert (= 2 (count bindings)))
63 (let [character (first bindings)
64 string (second bindings)]
65 `(let [^String s# ~string
66 len# (.length s#)]
67 (loop [i# 0]
68 (when (< i# len#)
69 (let [~character (.charAt s# i#)]
70 (if (Character/isHighSurrogate ~character)
71 (let [~character (.codePointAt s# i#)]
72 ~@body
73 (recur (+ 2 i#)))
74 (let [~character (int ~character)]
75 ~@body
76 (recur (inc i#))))))))))
77
78 (defn codepoints
79 "Returns a sequence of integer Unicode code points in s. Handles
80 Unicode supplementary characters (above U+FFFF) correctly."
81 [^String s]
82 (let [len (.length s)
83 f (fn thisfn [^String s i]
84 (when (< i len)
85 (let [c (.charAt s i)]
86 (if (Character/isHighSurrogate c)
87 (cons (.codePointAt s i) (thisfn s (+ 2 i)))
88 (cons (int c) (thisfn s (inc i)))))))]
89 (lazy-seq (f s 0))))
90
91 (defn ^String escape
92 "Returns a new String by applying cmap (a function or a map) to each
93 character in s. If cmap returns nil, the original character is
94 added to the output unchanged."
95 {:deprecated "1.2"}
96 [cmap ^String s]
97 (let [buffer (StringBuilder. (.length s))]
98 (dochars [c s]
99 (if-let [r (cmap c)]
100 (.append buffer r)
101 (.append buffer c)))
102 (.toString buffer)))
103
104 (defn blank?
105 "True if s is nil, empty, or contains only whitespace."
106 {:deprecated "1.2"}
107 [^String s]
108 (every? (fn [^Character c] (Character/isWhitespace c)) s))
109
110 (defn ^String take
111 "Take first n characters from s, up to the length of s."
112 [n ^String s]
113 (if (< (count s) n)
114 s
115 (.substring s 0 n)))
116
117 (defn ^String drop
118 "Drops first n characters from s. Returns an empty string if n is
119 greater than the length of s."
120 [n ^String s]
121 (if (< (count s) n)
122 ""
123 (.substring s n)))
124
125 (defn ^String butlast
126 "Returns s without the last n characters. Returns an empty string
127 if n is greater than the length of s."
128 [n ^String s]
129 (if (< (count s) n)
130 ""
131 (.substring s 0 (- (count s) n))))
132
133 (defn ^String tail
134 "Returns the last n characters of s."
135 [n ^String s]
136 (if (< (count s) n)
137 s
138 (.substring s (- (count s) n))))
139
140 (defn ^String repeat
141 "Returns a new String containing s repeated n times."
142 [n ^String s]
143 (apply str (clojure.core/repeat n s)))
144
145 (defn ^String reverse
146 "Returns s with its characters reversed."
147 {:deprecated "1.2"}
148 [^String s]
149 (.toString (.reverse (StringBuilder. s))))
150
151 (defn replace-str
152 "Replaces all instances of substring a with b in s."
153 {:deprecated "1.2"}
154 [^String a ^String b ^String s]
155 (.replace s a b))
156
157 (defn replace-char
158 "Replaces all instances of character a with character b in s."
159 {:deprecated "1.2"}
160 [^Character a ^Character b ^String s]
161 (.replace s a b))
162
163 (defn replace-re
164 "Replaces all matches of re with replacement in s."
165 {:deprecated "1.2"}
166 [re replacement ^String s]
167 (.replaceAll (re-matcher re s) replacement))
168
169 (defn replace-by
170 "Replaces all matches of re in s with the result of
171 (f (re-groups the-match))."
172 {:deprecated "1.2"}
173 [re f ^String s]
174 (let [m (re-matcher re s)]
175 (let [buffer (StringBuffer. (.length s))]
176 (loop []
177 (if (.find m)
178 (do (.appendReplacement m buffer (f (re-groups m)))
179 (recur))
180 (do (.appendTail m buffer)
181 (.toString buffer)))))))
182
183 (defn replace-first-str
184 "Replace first occurance of substring a with b in s."
185 {:deprecated "1.2"}
186 [^String a ^String b ^String s]
187 (.replaceFirst (re-matcher (Pattern/quote a) s) b))
188
189 (defn replace-first-re
190 "Replace first match of re in s."
191 {:deprecated "1.2"}
192 [^Pattern re ^String replacement ^String s]
193 (.replaceFirst (re-matcher re s) replacement))
194
195 (defn replace-first-by
196 "Replace first match of re in s with the result of
197 (f (re-groups the-match))."
198 {:deprecated "1.2"}
199 [^Pattern re f ^String s]
200 (let [m (re-matcher re s)]
201 (let [buffer (StringBuffer.)]
202 (if (.find m)
203 (let [rep (f (re-groups m))]
204 (.appendReplacement m buffer rep)
205 (.appendTail m buffer)
206 (str buffer))))))
207
208 (defn partition
209 "Splits the string into a lazy sequence of substrings, alternating
210 between substrings that match the patthern and the substrings
211 between the matches. The sequence always starts with the substring
212 before the first match, or an empty string if the beginning of the
213 string matches.
214
215 For example: (partition #\"[a-z]+\" \"abc123def\")
216 returns: (\"\" \"abc\" \"123\" \"def\")"
217 [^Pattern re ^String s]
218 (let [m (re-matcher re s)]
219 ((fn step [prevend]
220 (lazy-seq
221 (if (.find m)
222 (cons (.subSequence s prevend (.start m))
223 (cons (re-groups m)
224 (step (+ (.start m) (count (.group m))))))
225 (when (< prevend (.length s))
226 (list (.subSequence s prevend (.length s)))))))
227 0)))
228
229 (defn ^String join
230 "Returns a string of all elements in coll, separated by
231 separator. Like Perl's join."
232 {:deprecated "1.2"}
233 [^String separator coll]
234 (apply str (interpose separator coll)))
235
236 (defn ^String chop
237 "Removes the last character of string, does nothing on a zero-length
238 string."
239 [^String s]
240 (let [size (count s)]
241 (if (zero? size)
242 s
243 (subs s 0 (dec (count s))))))
244
245 (defn ^String chomp
246 "Removes all trailing newline \\n or return \\r characters from
247 string. Note: String.trim() is similar and faster.
248 Deprecated in 1.2. Use clojure.string/trim-newline"
249 {:deprecated "1.2"}
250 [^String s]
251 (replace-re #"[\r\n]+$" "" s))
252
253 (defn ^String swap-case
254 "Changes upper case characters to lower case and vice-versa.
255 Handles Unicode supplementary characters correctly. Uses the
256 locale-sensitive String.toUpperCase() and String.toLowerCase()
257 methods."
258 [^String s]
259 (let [buffer (StringBuilder. (.length s))
260 ;; array to make a String from one code point
261 ^"[I" array (make-array Integer/TYPE 1)]
262 (docodepoints [c s]
263 (aset-int array 0 c)
264 (if (Character/isLowerCase c)
265 ;; Character.toUpperCase is not locale-sensitive, but
266 ;; String.toUpperCase is; so we use a String.
267 (.append buffer (.toUpperCase (String. array 0 1)))
268 (.append buffer (.toLowerCase (String. array 0 1)))))
269 (.toString buffer)))
270
271 (defn ^String capitalize
272 "Converts first character of the string to upper-case, all other
273 characters to lower-case."
274 {:deprecated "1.2"}
275 [^String s]
276 (if (< (count s) 2)
277 (.toUpperCase s)
278 (str (.toUpperCase ^String (subs s 0 1))
279 (.toLowerCase ^String (subs s 1)))))
280
281 (defn ^String ltrim
282 "Removes whitespace from the left side of string.
283 Deprecated in 1.2. Use clojure.string/triml."
284 {:deprecated "1.2"}
285 [^String s]
286 (replace-re #"^\s+" "" s))
287
288 (defn ^String rtrim
289 "Removes whitespace from the right side of string.
290 Deprecated in 1.2. Use clojure.string/trimr."
291 {:deprecated "1.2"}
292 [^String s]
293 (replace-re #"\s+$" "" s))
294
295 (defn split-lines
296 "Splits s on \\n or \\r\\n."
297 {:deprecated "1.2"}
298 [^String s]
299 (seq (.split #"\r?\n" s)))
300
301 ;; borrowed from compojure.string, by James Reeves, EPL 1.0
302 (defn ^String map-str
303 "Apply f to each element of coll, concatenate all results into a
304 String."
305 [f coll]
306 (apply str (map f coll)))
307
308 ;; borrowed from compojure.string, by James Reeves, EPL 1.0
309 (defn grep
310 "Filters elements of coll by a regular expression. The String
311 representation (with str) of each element is tested with re-find."
312 [re coll]
313 (filter (fn [x] (re-find re (str x))) coll))
314
315 (defn as-str
316 "Like clojure.core/str, but if an argument is a keyword or symbol,
317 its name will be used instead of its literal representation.
318
319 Example:
320 (str :foo :bar) ;;=> \":foo:bar\"
321 (as-str :foo :bar) ;;=> \"foobar\"
322
323 Note that this does not apply to keywords or symbols nested within
324 data structures; they will be rendered as with str.
325
326 Example:
327 (str {:foo :bar}) ;;=> \"{:foo :bar}\"
328 (as-str {:foo :bar}) ;;=> \"{:foo :bar}\" "
329 ([] "")
330 ([x] (if (instance? clojure.lang.Named x)
331 (name x)
332 (str x)))
333 ([x & ys]
334 ((fn [^StringBuilder sb more]
335 (if more
336 (recur (. sb (append (as-str (first more)))) (next more))
337 (str sb)))
338 (new StringBuilder ^String (as-str x)) ys)))
339
340
341 ;;; WRAPPERS
342
343 ;; The following functions are simple wrappers around java.lang.String
344 ;; functions. They are included here for completeness, and for use
345 ;; when mapping over a collection of strings.
346
347 (defn ^String upper-case
348 "Converts string to all upper-case."
349 {:deprecated "1.2"}
350 [^String s]
351 (.toUpperCase s))
352
353 (defn ^String lower-case
354 "Converts string to all lower-case."
355 {:deprecated "1.2"}
356 [^String s]
357 (.toLowerCase s))
358
359 (defn split
360 "Splits string on a regular expression. Optional argument limit is
361 the maximum number of splits."
362 {:deprecated "1.2"}
363 ([^Pattern re ^String s] (seq (.split re s)))
364 ([^Pattern re limit ^String s] (seq (.split re s limit))))
365
366 (defn ^String trim
367 "Removes whitespace from both ends of string."
368 {:deprecated "1.2"}
369 [^String s]
370 (.trim s))
371
372 (defn ^String substring?
373 "True if s contains the substring."
374 [substring ^String s]
375 (.contains s substring))
376
377 (defn ^String get
378 "Gets the i'th character in string."
379 {:deprecated "1.2"}
380 [^String s i]
381 (.charAt s i))
382