Mercurial > lasercutter
comparison src/clojure/contrib/string.clj @ 10:ef7dbbd6452c
added clojure source goodness
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sat, 21 Aug 2010 06:25:44 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9:35cf337adfcf | 10:ef7dbbd6452c |
---|---|
1 ;;; string.clj -- functional string utilities for Clojure | |
2 | |
3 ;; by Stuart Sierra, http://stuartsierra.com/ | |
4 ;; January 26, 2010 | |
5 | |
6 ;; Copyright (c) Stuart Sierra, 2010. All rights reserved. The use | |
7 ;; and distribution terms for this software are covered by the Eclipse | |
8 ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) | |
9 ;; which can be found in the file epl-v10.html at the root of this | |
10 ;; distribution. By using this software in any fashion, you are | |
11 ;; agreeing to be bound by the terms of this license. You must not | |
12 ;; remove this notice, or any other, from this software. | |
13 | |
14 ;; DEPRECATED in 1.2: Many functions have moved to clojure.string. | |
15 | |
16 (ns ^{:author "Stuart Sierra" | |
17 :doc "This is a library of string manipulation functions. It | |
18 is intented as a replacement for clojure.contrib.string. | |
19 | |
20 You cannot (use 'clojure.contrib.string) because it defines | |
21 functions with the same names as functions in clojure.core. | |
22 Instead, do (require '[clojure.contrib.string :as s]) | |
23 or something similar. | |
24 | |
25 Goals: | |
26 1. Be functional | |
27 2. Most significant argument LAST, to work with ->> | |
28 3. At least O(n) performance for Strings of length n | |
29 | |
30 Some ideas are borrowed from | |
31 http://github.com/francoisdevlin/devlinsf-clojure-utils/"} | |
32 clojure.contrib.string | |
33 (:refer-clojure :exclude (take replace drop butlast partition | |
34 contains? get repeat reverse partial)) | |
35 (:import (java.util.regex Pattern))) | |
36 | |
37 | |
38 (defmacro dochars | |
39 "bindings => [name string] | |
40 | |
41 Repeatedly executes body, with name bound to each character in | |
42 string. Does NOT handle Unicode supplementary characters (above | |
43 U+FFFF)." | |
44 [bindings & body] | |
45 (assert (vector bindings)) | |
46 (assert (= 2 (count bindings))) | |
47 ;; This seems to be the fastest way to iterate over characters. | |
48 `(let [^String s# ~(second bindings)] | |
49 (dotimes [i# (.length s#)] | |
50 (let [~(first bindings) (.charAt s# i#)] | |
51 ~@body)))) | |
52 | |
53 | |
54 (defmacro docodepoints | |
55 "bindings => [name string] | |
56 | |
57 Repeatedly executes body, with name bound to the integer code point | |
58 of each Unicode character in the string. Handles Unicode | |
59 supplementary characters (above U+FFFF) correctly." | |
60 [bindings & body] | |
61 (assert (vector bindings)) | |
62 (assert (= 2 (count bindings))) | |
63 (let [character (first bindings) | |
64 string (second bindings)] | |
65 `(let [^String s# ~string | |
66 len# (.length s#)] | |
67 (loop [i# 0] | |
68 (when (< i# len#) | |
69 (let [~character (.charAt s# i#)] | |
70 (if (Character/isHighSurrogate ~character) | |
71 (let [~character (.codePointAt s# i#)] | |
72 ~@body | |
73 (recur (+ 2 i#))) | |
74 (let [~character (int ~character)] | |
75 ~@body | |
76 (recur (inc i#)))))))))) | |
77 | |
78 (defn codepoints | |
79 "Returns a sequence of integer Unicode code points in s. Handles | |
80 Unicode supplementary characters (above U+FFFF) correctly." | |
81 [^String s] | |
82 (let [len (.length s) | |
83 f (fn thisfn [^String s i] | |
84 (when (< i len) | |
85 (let [c (.charAt s i)] | |
86 (if (Character/isHighSurrogate c) | |
87 (cons (.codePointAt s i) (thisfn s (+ 2 i))) | |
88 (cons (int c) (thisfn s (inc i)))))))] | |
89 (lazy-seq (f s 0)))) | |
90 | |
91 (defn ^String escape | |
92 "Returns a new String by applying cmap (a function or a map) to each | |
93 character in s. If cmap returns nil, the original character is | |
94 added to the output unchanged." | |
95 {:deprecated "1.2"} | |
96 [cmap ^String s] | |
97 (let [buffer (StringBuilder. (.length s))] | |
98 (dochars [c s] | |
99 (if-let [r (cmap c)] | |
100 (.append buffer r) | |
101 (.append buffer c))) | |
102 (.toString buffer))) | |
103 | |
104 (defn blank? | |
105 "True if s is nil, empty, or contains only whitespace." | |
106 {:deprecated "1.2"} | |
107 [^String s] | |
108 (every? (fn [^Character c] (Character/isWhitespace c)) s)) | |
109 | |
110 (defn ^String take | |
111 "Take first n characters from s, up to the length of s." | |
112 [n ^String s] | |
113 (if (< (count s) n) | |
114 s | |
115 (.substring s 0 n))) | |
116 | |
117 (defn ^String drop | |
118 "Drops first n characters from s. Returns an empty string if n is | |
119 greater than the length of s." | |
120 [n ^String s] | |
121 (if (< (count s) n) | |
122 "" | |
123 (.substring s n))) | |
124 | |
125 (defn ^String butlast | |
126 "Returns s without the last n characters. Returns an empty string | |
127 if n is greater than the length of s." | |
128 [n ^String s] | |
129 (if (< (count s) n) | |
130 "" | |
131 (.substring s 0 (- (count s) n)))) | |
132 | |
133 (defn ^String tail | |
134 "Returns the last n characters of s." | |
135 [n ^String s] | |
136 (if (< (count s) n) | |
137 s | |
138 (.substring s (- (count s) n)))) | |
139 | |
140 (defn ^String repeat | |
141 "Returns a new String containing s repeated n times." | |
142 [n ^String s] | |
143 (apply str (clojure.core/repeat n s))) | |
144 | |
145 (defn ^String reverse | |
146 "Returns s with its characters reversed." | |
147 {:deprecated "1.2"} | |
148 [^String s] | |
149 (.toString (.reverse (StringBuilder. s)))) | |
150 | |
151 (defn replace-str | |
152 "Replaces all instances of substring a with b in s." | |
153 {:deprecated "1.2"} | |
154 [^String a ^String b ^String s] | |
155 (.replace s a b)) | |
156 | |
157 (defn replace-char | |
158 "Replaces all instances of character a with character b in s." | |
159 {:deprecated "1.2"} | |
160 [^Character a ^Character b ^String s] | |
161 (.replace s a b)) | |
162 | |
163 (defn replace-re | |
164 "Replaces all matches of re with replacement in s." | |
165 {:deprecated "1.2"} | |
166 [re replacement ^String s] | |
167 (.replaceAll (re-matcher re s) replacement)) | |
168 | |
169 (defn replace-by | |
170 "Replaces all matches of re in s with the result of | |
171 (f (re-groups the-match))." | |
172 {:deprecated "1.2"} | |
173 [re f ^String s] | |
174 (let [m (re-matcher re s)] | |
175 (let [buffer (StringBuffer. (.length s))] | |
176 (loop [] | |
177 (if (.find m) | |
178 (do (.appendReplacement m buffer (f (re-groups m))) | |
179 (recur)) | |
180 (do (.appendTail m buffer) | |
181 (.toString buffer))))))) | |
182 | |
183 (defn replace-first-str | |
184 "Replace first occurance of substring a with b in s." | |
185 {:deprecated "1.2"} | |
186 [^String a ^String b ^String s] | |
187 (.replaceFirst (re-matcher (Pattern/quote a) s) b)) | |
188 | |
189 (defn replace-first-re | |
190 "Replace first match of re in s." | |
191 {:deprecated "1.2"} | |
192 [^Pattern re ^String replacement ^String s] | |
193 (.replaceFirst (re-matcher re s) replacement)) | |
194 | |
195 (defn replace-first-by | |
196 "Replace first match of re in s with the result of | |
197 (f (re-groups the-match))." | |
198 {:deprecated "1.2"} | |
199 [^Pattern re f ^String s] | |
200 (let [m (re-matcher re s)] | |
201 (let [buffer (StringBuffer.)] | |
202 (if (.find m) | |
203 (let [rep (f (re-groups m))] | |
204 (.appendReplacement m buffer rep) | |
205 (.appendTail m buffer) | |
206 (str buffer)))))) | |
207 | |
208 (defn partition | |
209 "Splits the string into a lazy sequence of substrings, alternating | |
210 between substrings that match the patthern and the substrings | |
211 between the matches. The sequence always starts with the substring | |
212 before the first match, or an empty string if the beginning of the | |
213 string matches. | |
214 | |
215 For example: (partition #\"[a-z]+\" \"abc123def\") | |
216 returns: (\"\" \"abc\" \"123\" \"def\")" | |
217 [^Pattern re ^String s] | |
218 (let [m (re-matcher re s)] | |
219 ((fn step [prevend] | |
220 (lazy-seq | |
221 (if (.find m) | |
222 (cons (.subSequence s prevend (.start m)) | |
223 (cons (re-groups m) | |
224 (step (+ (.start m) (count (.group m)))))) | |
225 (when (< prevend (.length s)) | |
226 (list (.subSequence s prevend (.length s))))))) | |
227 0))) | |
228 | |
229 (defn ^String join | |
230 "Returns a string of all elements in coll, separated by | |
231 separator. Like Perl's join." | |
232 {:deprecated "1.2"} | |
233 [^String separator coll] | |
234 (apply str (interpose separator coll))) | |
235 | |
236 (defn ^String chop | |
237 "Removes the last character of string, does nothing on a zero-length | |
238 string." | |
239 [^String s] | |
240 (let [size (count s)] | |
241 (if (zero? size) | |
242 s | |
243 (subs s 0 (dec (count s)))))) | |
244 | |
245 (defn ^String chomp | |
246 "Removes all trailing newline \\n or return \\r characters from | |
247 string. Note: String.trim() is similar and faster. | |
248 Deprecated in 1.2. Use clojure.string/trim-newline" | |
249 {:deprecated "1.2"} | |
250 [^String s] | |
251 (replace-re #"[\r\n]+$" "" s)) | |
252 | |
253 (defn ^String swap-case | |
254 "Changes upper case characters to lower case and vice-versa. | |
255 Handles Unicode supplementary characters correctly. Uses the | |
256 locale-sensitive String.toUpperCase() and String.toLowerCase() | |
257 methods." | |
258 [^String s] | |
259 (let [buffer (StringBuilder. (.length s)) | |
260 ;; array to make a String from one code point | |
261 ^"[I" array (make-array Integer/TYPE 1)] | |
262 (docodepoints [c s] | |
263 (aset-int array 0 c) | |
264 (if (Character/isLowerCase c) | |
265 ;; Character.toUpperCase is not locale-sensitive, but | |
266 ;; String.toUpperCase is; so we use a String. | |
267 (.append buffer (.toUpperCase (String. array 0 1))) | |
268 (.append buffer (.toLowerCase (String. array 0 1))))) | |
269 (.toString buffer))) | |
270 | |
271 (defn ^String capitalize | |
272 "Converts first character of the string to upper-case, all other | |
273 characters to lower-case." | |
274 {:deprecated "1.2"} | |
275 [^String s] | |
276 (if (< (count s) 2) | |
277 (.toUpperCase s) | |
278 (str (.toUpperCase ^String (subs s 0 1)) | |
279 (.toLowerCase ^String (subs s 1))))) | |
280 | |
281 (defn ^String ltrim | |
282 "Removes whitespace from the left side of string. | |
283 Deprecated in 1.2. Use clojure.string/triml." | |
284 {:deprecated "1.2"} | |
285 [^String s] | |
286 (replace-re #"^\s+" "" s)) | |
287 | |
288 (defn ^String rtrim | |
289 "Removes whitespace from the right side of string. | |
290 Deprecated in 1.2. Use clojure.string/trimr." | |
291 {:deprecated "1.2"} | |
292 [^String s] | |
293 (replace-re #"\s+$" "" s)) | |
294 | |
295 (defn split-lines | |
296 "Splits s on \\n or \\r\\n." | |
297 {:deprecated "1.2"} | |
298 [^String s] | |
299 (seq (.split #"\r?\n" s))) | |
300 | |
301 ;; borrowed from compojure.string, by James Reeves, EPL 1.0 | |
302 (defn ^String map-str | |
303 "Apply f to each element of coll, concatenate all results into a | |
304 String." | |
305 [f coll] | |
306 (apply str (map f coll))) | |
307 | |
308 ;; borrowed from compojure.string, by James Reeves, EPL 1.0 | |
309 (defn grep | |
310 "Filters elements of coll by a regular expression. The String | |
311 representation (with str) of each element is tested with re-find." | |
312 [re coll] | |
313 (filter (fn [x] (re-find re (str x))) coll)) | |
314 | |
315 (defn as-str | |
316 "Like clojure.core/str, but if an argument is a keyword or symbol, | |
317 its name will be used instead of its literal representation. | |
318 | |
319 Example: | |
320 (str :foo :bar) ;;=> \":foo:bar\" | |
321 (as-str :foo :bar) ;;=> \"foobar\" | |
322 | |
323 Note that this does not apply to keywords or symbols nested within | |
324 data structures; they will be rendered as with str. | |
325 | |
326 Example: | |
327 (str {:foo :bar}) ;;=> \"{:foo :bar}\" | |
328 (as-str {:foo :bar}) ;;=> \"{:foo :bar}\" " | |
329 ([] "") | |
330 ([x] (if (instance? clojure.lang.Named x) | |
331 (name x) | |
332 (str x))) | |
333 ([x & ys] | |
334 ((fn [^StringBuilder sb more] | |
335 (if more | |
336 (recur (. sb (append (as-str (first more)))) (next more)) | |
337 (str sb))) | |
338 (new StringBuilder ^String (as-str x)) ys))) | |
339 | |
340 | |
341 ;;; WRAPPERS | |
342 | |
343 ;; The following functions are simple wrappers around java.lang.String | |
344 ;; functions. They are included here for completeness, and for use | |
345 ;; when mapping over a collection of strings. | |
346 | |
347 (defn ^String upper-case | |
348 "Converts string to all upper-case." | |
349 {:deprecated "1.2"} | |
350 [^String s] | |
351 (.toUpperCase s)) | |
352 | |
353 (defn ^String lower-case | |
354 "Converts string to all lower-case." | |
355 {:deprecated "1.2"} | |
356 [^String s] | |
357 (.toLowerCase s)) | |
358 | |
359 (defn split | |
360 "Splits string on a regular expression. Optional argument limit is | |
361 the maximum number of splits." | |
362 {:deprecated "1.2"} | |
363 ([^Pattern re ^String s] (seq (.split re s))) | |
364 ([^Pattern re limit ^String s] (seq (.split re s limit)))) | |
365 | |
366 (defn ^String trim | |
367 "Removes whitespace from both ends of string." | |
368 {:deprecated "1.2"} | |
369 [^String s] | |
370 (.trim s)) | |
371 | |
372 (defn ^String substring? | |
373 "True if s contains the substring." | |
374 [substring ^String s] | |
375 (.contains s substring)) | |
376 | |
377 (defn ^String get | |
378 "Gets the i'th character in string." | |
379 {:deprecated "1.2"} | |
380 [^String s i] | |
381 (.charAt s i)) | |
382 |