view src/clojure/contrib/string.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
line wrap: on
line source
1 ;;; string.clj -- functional string utilities for Clojure
3 ;; by Stuart Sierra, http://stuartsierra.com/
4 ;; January 26, 2010
6 ;; Copyright (c) Stuart Sierra, 2010. All rights reserved. The use
7 ;; and distribution terms for this software are covered by the Eclipse
8 ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
9 ;; which can be found in the file epl-v10.html at the root of this
10 ;; distribution. By using this software in any fashion, you are
11 ;; agreeing to be bound by the terms of this license. You must not
12 ;; remove this notice, or any other, from this software.
14 ;; DEPRECATED in 1.2: Many functions have moved to clojure.string.
16 (ns ^{:author "Stuart Sierra"
17 :doc "This is a library of string manipulation functions. It
18 is intented as a replacement for clojure.contrib.string.
20 You cannot (use 'clojure.contrib.string) because it defines
21 functions with the same names as functions in clojure.core.
22 Instead, do (require '[clojure.contrib.string :as s])
23 or something similar.
25 Goals:
26 1. Be functional
27 2. Most significant argument LAST, to work with ->>
28 3. At least O(n) performance for Strings of length n
30 Some ideas are borrowed from
31 http://github.com/francoisdevlin/devlinsf-clojure-utils/"}
32 clojure.contrib.string
33 (:refer-clojure :exclude (take replace drop butlast partition
34 contains? get repeat reverse partial))
35 (:import (java.util.regex Pattern)))
38 (defmacro dochars
39 "bindings => [name string]
41 Repeatedly executes body, with name bound to each character in
42 string. Does NOT handle Unicode supplementary characters (above
43 U+FFFF)."
44 [bindings & body]
45 (assert (vector bindings))
46 (assert (= 2 (count bindings)))
47 ;; This seems to be the fastest way to iterate over characters.
48 `(let [^String s# ~(second bindings)]
49 (dotimes [i# (.length s#)]
50 (let [~(first bindings) (.charAt s# i#)]
51 ~@body))))
54 (defmacro docodepoints
55 "bindings => [name string]
57 Repeatedly executes body, with name bound to the integer code point
58 of each Unicode character in the string. Handles Unicode
59 supplementary characters (above U+FFFF) correctly."
60 [bindings & body]
61 (assert (vector bindings))
62 (assert (= 2 (count bindings)))
63 (let [character (first bindings)
64 string (second bindings)]
65 `(let [^String s# ~string
66 len# (.length s#)]
67 (loop [i# 0]
68 (when (< i# len#)
69 (let [~character (.charAt s# i#)]
70 (if (Character/isHighSurrogate ~character)
71 (let [~character (.codePointAt s# i#)]
72 ~@body
73 (recur (+ 2 i#)))
74 (let [~character (int ~character)]
75 ~@body
76 (recur (inc i#))))))))))
78 (defn codepoints
79 "Returns a sequence of integer Unicode code points in s. Handles
80 Unicode supplementary characters (above U+FFFF) correctly."
81 [^String s]
82 (let [len (.length s)
83 f (fn thisfn [^String s i]
84 (when (< i len)
85 (let [c (.charAt s i)]
86 (if (Character/isHighSurrogate c)
87 (cons (.codePointAt s i) (thisfn s (+ 2 i)))
88 (cons (int c) (thisfn s (inc i)))))))]
89 (lazy-seq (f s 0))))
91 (defn ^String escape
92 "Returns a new String by applying cmap (a function or a map) to each
93 character in s. If cmap returns nil, the original character is
94 added to the output unchanged."
95 {:deprecated "1.2"}
96 [cmap ^String s]
97 (let [buffer (StringBuilder. (.length s))]
98 (dochars [c s]
99 (if-let [r (cmap c)]
100 (.append buffer r)
101 (.append buffer c)))
102 (.toString buffer)))
104 (defn blank?
105 "True if s is nil, empty, or contains only whitespace."
106 {:deprecated "1.2"}
107 [^String s]
108 (every? (fn [^Character c] (Character/isWhitespace c)) s))
110 (defn ^String take
111 "Take first n characters from s, up to the length of s."
112 [n ^String s]
113 (if (< (count s) n)
114 s
115 (.substring s 0 n)))
117 (defn ^String drop
118 "Drops first n characters from s. Returns an empty string if n is
119 greater than the length of s."
120 [n ^String s]
121 (if (< (count s) n)
122 ""
123 (.substring s n)))
125 (defn ^String butlast
126 "Returns s without the last n characters. Returns an empty string
127 if n is greater than the length of s."
128 [n ^String s]
129 (if (< (count s) n)
130 ""
131 (.substring s 0 (- (count s) n))))
133 (defn ^String tail
134 "Returns the last n characters of s."
135 [n ^String s]
136 (if (< (count s) n)
137 s
138 (.substring s (- (count s) n))))
140 (defn ^String repeat
141 "Returns a new String containing s repeated n times."
142 [n ^String s]
143 (apply str (clojure.core/repeat n s)))
145 (defn ^String reverse
146 "Returns s with its characters reversed."
147 {:deprecated "1.2"}
148 [^String s]
149 (.toString (.reverse (StringBuilder. s))))
151 (defn replace-str
152 "Replaces all instances of substring a with b in s."
153 {:deprecated "1.2"}
154 [^String a ^String b ^String s]
155 (.replace s a b))
157 (defn replace-char
158 "Replaces all instances of character a with character b in s."
159 {:deprecated "1.2"}
160 [^Character a ^Character b ^String s]
161 (.replace s a b))
163 (defn replace-re
164 "Replaces all matches of re with replacement in s."
165 {:deprecated "1.2"}
166 [re replacement ^String s]
167 (.replaceAll (re-matcher re s) replacement))
169 (defn replace-by
170 "Replaces all matches of re in s with the result of
171 (f (re-groups the-match))."
172 {:deprecated "1.2"}
173 [re f ^String s]
174 (let [m (re-matcher re s)]
175 (let [buffer (StringBuffer. (.length s))]
176 (loop []
177 (if (.find m)
178 (do (.appendReplacement m buffer (f (re-groups m)))
179 (recur))
180 (do (.appendTail m buffer)
181 (.toString buffer)))))))
183 (defn replace-first-str
184 "Replace first occurance of substring a with b in s."
185 {:deprecated "1.2"}
186 [^String a ^String b ^String s]
187 (.replaceFirst (re-matcher (Pattern/quote a) s) b))
189 (defn replace-first-re
190 "Replace first match of re in s."
191 {:deprecated "1.2"}
192 [^Pattern re ^String replacement ^String s]
193 (.replaceFirst (re-matcher re s) replacement))
195 (defn replace-first-by
196 "Replace first match of re in s with the result of
197 (f (re-groups the-match))."
198 {:deprecated "1.2"}
199 [^Pattern re f ^String s]
200 (let [m (re-matcher re s)]
201 (let [buffer (StringBuffer.)]
202 (if (.find m)
203 (let [rep (f (re-groups m))]
204 (.appendReplacement m buffer rep)
205 (.appendTail m buffer)
206 (str buffer))))))
208 (defn partition
209 "Splits the string into a lazy sequence of substrings, alternating
210 between substrings that match the patthern and the substrings
211 between the matches. The sequence always starts with the substring
212 before the first match, or an empty string if the beginning of the
213 string matches.
215 For example: (partition #\"[a-z]+\" \"abc123def\")
216 returns: (\"\" \"abc\" \"123\" \"def\")"
217 [^Pattern re ^String s]
218 (let [m (re-matcher re s)]
219 ((fn step [prevend]
220 (lazy-seq
221 (if (.find m)
222 (cons (.subSequence s prevend (.start m))
223 (cons (re-groups m)
224 (step (+ (.start m) (count (.group m))))))
225 (when (< prevend (.length s))
226 (list (.subSequence s prevend (.length s)))))))
227 0)))
229 (defn ^String join
230 "Returns a string of all elements in coll, separated by
231 separator. Like Perl's join."
232 {:deprecated "1.2"}
233 [^String separator coll]
234 (apply str (interpose separator coll)))
236 (defn ^String chop
237 "Removes the last character of string, does nothing on a zero-length
238 string."
239 [^String s]
240 (let [size (count s)]
241 (if (zero? size)
242 s
243 (subs s 0 (dec (count s))))))
245 (defn ^String chomp
246 "Removes all trailing newline \\n or return \\r characters from
247 string. Note: String.trim() is similar and faster.
248 Deprecated in 1.2. Use clojure.string/trim-newline"
249 {:deprecated "1.2"}
250 [^String s]
251 (replace-re #"[\r\n]+$" "" s))
253 (defn ^String swap-case
254 "Changes upper case characters to lower case and vice-versa.
255 Handles Unicode supplementary characters correctly. Uses the
256 locale-sensitive String.toUpperCase() and String.toLowerCase()
257 methods."
258 [^String s]
259 (let [buffer (StringBuilder. (.length s))
260 ;; array to make a String from one code point
261 ^"[I" array (make-array Integer/TYPE 1)]
262 (docodepoints [c s]
263 (aset-int array 0 c)
264 (if (Character/isLowerCase c)
265 ;; Character.toUpperCase is not locale-sensitive, but
266 ;; String.toUpperCase is; so we use a String.
267 (.append buffer (.toUpperCase (String. array 0 1)))
268 (.append buffer (.toLowerCase (String. array 0 1)))))
269 (.toString buffer)))
271 (defn ^String capitalize
272 "Converts first character of the string to upper-case, all other
273 characters to lower-case."
274 {:deprecated "1.2"}
275 [^String s]
276 (if (< (count s) 2)
277 (.toUpperCase s)
278 (str (.toUpperCase ^String (subs s 0 1))
279 (.toLowerCase ^String (subs s 1)))))
281 (defn ^String ltrim
282 "Removes whitespace from the left side of string.
283 Deprecated in 1.2. Use clojure.string/triml."
284 {:deprecated "1.2"}
285 [^String s]
286 (replace-re #"^\s+" "" s))
288 (defn ^String rtrim
289 "Removes whitespace from the right side of string.
290 Deprecated in 1.2. Use clojure.string/trimr."
291 {:deprecated "1.2"}
292 [^String s]
293 (replace-re #"\s+$" "" s))
295 (defn split-lines
296 "Splits s on \\n or \\r\\n."
297 {:deprecated "1.2"}
298 [^String s]
299 (seq (.split #"\r?\n" s)))
301 ;; borrowed from compojure.string, by James Reeves, EPL 1.0
302 (defn ^String map-str
303 "Apply f to each element of coll, concatenate all results into a
304 String."
305 [f coll]
306 (apply str (map f coll)))
308 ;; borrowed from compojure.string, by James Reeves, EPL 1.0
309 (defn grep
310 "Filters elements of coll by a regular expression. The String
311 representation (with str) of each element is tested with re-find."
312 [re coll]
313 (filter (fn [x] (re-find re (str x))) coll))
315 (defn as-str
316 "Like clojure.core/str, but if an argument is a keyword or symbol,
317 its name will be used instead of its literal representation.
319 Example:
320 (str :foo :bar) ;;=> \":foo:bar\"
321 (as-str :foo :bar) ;;=> \"foobar\"
323 Note that this does not apply to keywords or symbols nested within
324 data structures; they will be rendered as with str.
326 Example:
327 (str {:foo :bar}) ;;=> \"{:foo :bar}\"
328 (as-str {:foo :bar}) ;;=> \"{:foo :bar}\" "
329 ([] "")
330 ([x] (if (instance? clojure.lang.Named x)
331 (name x)
332 (str x)))
333 ([x & ys]
334 ((fn [^StringBuilder sb more]
335 (if more
336 (recur (. sb (append (as-str (first more)))) (next more))
337 (str sb)))
338 (new StringBuilder ^String (as-str x)) ys)))
341 ;;; WRAPPERS
343 ;; The following functions are simple wrappers around java.lang.String
344 ;; functions. They are included here for completeness, and for use
345 ;; when mapping over a collection of strings.
347 (defn ^String upper-case
348 "Converts string to all upper-case."
349 {:deprecated "1.2"}
350 [^String s]
351 (.toUpperCase s))
353 (defn ^String lower-case
354 "Converts string to all lower-case."
355 {:deprecated "1.2"}
356 [^String s]
357 (.toLowerCase s))
359 (defn split
360 "Splits string on a regular expression. Optional argument limit is
361 the maximum number of splits."
362 {:deprecated "1.2"}
363 ([^Pattern re ^String s] (seq (.split re s)))
364 ([^Pattern re limit ^String s] (seq (.split re s limit))))
366 (defn ^String trim
367 "Removes whitespace from both ends of string."
368 {:deprecated "1.2"}
369 [^String s]
370 (.trim s))
372 (defn ^String substring?
373 "True if s contains the substring."
374 [substring ^String s]
375 (.contains s substring))
377 (defn ^String get
378 "Gets the i'th character in string."
379 {:deprecated "1.2"}
380 [^String s i]
381 (.charAt s i))