Mercurial > lasercutter
comparison src/clojure/contrib/str_utils2.clj @ 10:ef7dbbd6452c
added clojure source goodness
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sat, 21 Aug 2010 06:25:44 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9:35cf337adfcf | 10:ef7dbbd6452c |
---|---|
1 ;;; str_utils2.clj -- functional string utilities for Clojure | |
2 | |
3 ;; by Stuart Sierra, http://stuartsierra.com/ | |
4 ;; August 19, 2009 | |
5 | |
6 ;; Copyright (c) Stuart Sierra, 2009. All rights reserved. The use | |
7 ;; and distribution terms for this software are covered by the Eclipse | |
8 ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) | |
9 ;; which can be found in the file epl-v10.html at the root of this | |
10 ;; distribution. By using this software in any fashion, you are | |
11 ;; agreeing to be bound by the terms of this license. You must not | |
12 ;; remove this notice, or any other, from this software. | |
13 | |
14 ;; DEPRECATED in 1.2: Promoted to clojure.java.string. Note that | |
15 ;; many function names and semantics have changed | |
16 | |
17 (ns ^{:author "Stuart Sierra" | |
18 :deprecated "1.2" | |
19 :doc "This is a library of string manipulation functions. It | |
20 is intented as a replacement for clojure.contrib.str-utils. | |
21 | |
22 You cannot (use 'clojure.contrib.str-utils2) because it defines | |
23 functions with the same names as functions in clojure.core. | |
24 Instead, do (require '[clojure.contrib.str-utils2 :as s]) | |
25 or something similar. | |
26 | |
27 Goals: | |
28 1. Be functional | |
29 2. String argument first, to work with -> | |
30 3. Performance linear in string length | |
31 | |
32 Some ideas are borrowed from | |
33 http://github.com/francoisdevlin/devlinsf-clojure-utils/"} | |
34 clojure.contrib.str-utils2 | |
35 (:refer-clojure :exclude (take replace drop butlast partition | |
36 contains? get repeat reverse partial)) | |
37 (:import (java.util.regex Pattern))) | |
38 | |
39 | |
40 (defmacro dochars | |
41 "bindings => [name string] | |
42 | |
43 Repeatedly executes body, with name bound to each character in | |
44 string. Does NOT handle Unicode supplementary characters (above | |
45 U+FFFF)." | |
46 [bindings & body] | |
47 (assert (vector bindings)) | |
48 (assert (= 2 (count bindings))) | |
49 ;; This seems to be the fastest way to iterate over characters. | |
50 `(let [^String s# ~(second bindings)] | |
51 (dotimes [i# (.length s#)] | |
52 (let [~(first bindings) (.charAt s# i#)] | |
53 ~@body)))) | |
54 | |
55 | |
56 (defmacro docodepoints | |
57 "bindings => [name string] | |
58 | |
59 Repeatedly executes body, with name bound to the integer code point | |
60 of each Unicode character in the string. Handles Unicode | |
61 supplementary characters (above U+FFFF) correctly." | |
62 [bindings & body] | |
63 (assert (vector bindings)) | |
64 (assert (= 2 (count bindings))) | |
65 (let [character (first bindings) | |
66 string (second bindings)] | |
67 `(let [^String s# ~string | |
68 len# (.length s#)] | |
69 (loop [i# 0] | |
70 (when (< i# len#) | |
71 (let [~character (.charAt s# i#)] | |
72 (if (Character/isHighSurrogate ~character) | |
73 (let [~character (.codePointAt s# i#)] | |
74 ~@body | |
75 (recur (+ 2 i#))) | |
76 (let [~character (int ~character)] | |
77 ~@body | |
78 (recur (inc i#)))))))))) | |
79 | |
80 (defn codepoints | |
81 "Returns a sequence of integer Unicode code points in s. Handles | |
82 Unicode supplementary characters (above U+FFFF) correctly." | |
83 [^String s] | |
84 (let [len (.length s) | |
85 f (fn thisfn [^String s i] | |
86 (when (< i len) | |
87 (let [c (.charAt s i)] | |
88 (if (Character/isHighSurrogate c) | |
89 (cons (.codePointAt s i) (thisfn s (+ 2 i))) | |
90 (cons (int c) (thisfn s (inc i)))))))] | |
91 (lazy-seq (f s 0)))) | |
92 | |
93 (defn ^String escape | |
94 "Returns a new String by applying cmap (a function or a map) to each | |
95 character in s. If cmap returns nil, the original character is | |
96 added to the output unchanged." | |
97 [^String s cmap] | |
98 (let [buffer (StringBuilder. (.length s))] | |
99 (dochars [c s] | |
100 (if-let [r (cmap c)] | |
101 (.append buffer r) | |
102 (.append buffer c))) | |
103 (.toString buffer))) | |
104 | |
105 (defn blank? | |
106 "True if s is nil, empty, or contains only whitespace." | |
107 [^String s] | |
108 (every? (fn [^Character c] (Character/isWhitespace c)) s)) | |
109 | |
110 (defn ^String take | |
111 "Take first n characters from s, up to the length of s. | |
112 | |
113 Note the argument order is the opposite of clojure.core/take; this | |
114 is to keep the string as the first argument for use with ->" | |
115 [^String s n] | |
116 (if (< (count s) n) | |
117 s | |
118 (.substring s 0 n))) | |
119 | |
120 (defn ^String drop | |
121 "Drops first n characters from s. Returns an empty string if n is | |
122 greater than the length of s. | |
123 | |
124 Note the argument order is the opposite of clojure.core/drop; this | |
125 is to keep the string as the first argument for use with ->" | |
126 [^String s n] | |
127 (if (< (count s) n) | |
128 "" | |
129 (.substring s n))) | |
130 | |
131 (defn ^String butlast | |
132 "Returns s without the last n characters. Returns an empty string | |
133 if n is greater than the length of s. | |
134 | |
135 Note the argument order is the opposite of clojure.core/butlast; | |
136 this is to keep the string as the first argument for use with ->" | |
137 [^String s n] | |
138 (if (< (count s) n) | |
139 "" | |
140 (.substring s 0 (- (count s) n)))) | |
141 | |
142 (defn ^String tail | |
143 "Returns the last n characters of s." | |
144 [^String s n] | |
145 (if (< (count s) n) | |
146 s | |
147 (.substring s (- (count s) n)))) | |
148 | |
149 (defn ^String repeat | |
150 "Returns a new String containing s repeated n times." | |
151 [^String s n] | |
152 (apply str (clojure.core/repeat n s))) | |
153 | |
154 (defn ^String reverse | |
155 "Returns s with its characters reversed." | |
156 [^String s] | |
157 (.toString (.reverse (StringBuilder. s)))) | |
158 | |
159 (defmulti | |
160 ^{:doc "Replaces all instances of pattern in string with replacement. | |
161 | |
162 Allowed argument types for pattern and replacement are: | |
163 1. String and String | |
164 2. Character and Character | |
165 3. regex Pattern and String | |
166 (Uses java.util.regex.Matcher.replaceAll) | |
167 4. regex Pattern and function | |
168 (Calls function with re-groups of each match, uses return | |
169 value as replacement.)" | |
170 :arglists '([string pattern replacement]) | |
171 :tag String} | |
172 replace | |
173 (fn [^String string pattern replacement] | |
174 [(class pattern) (class replacement)])) | |
175 | |
176 (defmethod replace [String String] [^String s ^String a ^String b] | |
177 (.replace s a b)) | |
178 | |
179 (defmethod replace [Character Character] [^String s ^Character a ^Character b] | |
180 (.replace s a b)) | |
181 | |
182 (defmethod replace [Pattern String] [^String s re replacement] | |
183 (.replaceAll (re-matcher re s) replacement)) | |
184 | |
185 (defmethod replace [Pattern clojure.lang.IFn] [^String s re replacement] | |
186 (let [m (re-matcher re s)] | |
187 (let [buffer (StringBuffer. (.length s))] | |
188 (loop [] | |
189 (if (.find m) | |
190 (do (.appendReplacement m buffer (replacement (re-groups m))) | |
191 (recur)) | |
192 (do (.appendTail m buffer) | |
193 (.toString buffer))))))) | |
194 | |
195 (defmulti | |
196 ^{:doc "Replaces the first instance of pattern in s with replacement. | |
197 | |
198 Allowed argument types for pattern and replacement are: | |
199 1. String and String | |
200 2. regex Pattern and String | |
201 (Uses java.util.regex.Matcher.replaceAll) | |
202 3. regex Pattern and function | |
203 " | |
204 :arglists '([s pattern replacement]) | |
205 :tag String} | |
206 replace-first | |
207 (fn [s pattern replacement] | |
208 [(class pattern) (class replacement)])) | |
209 | |
210 (defmethod replace-first [String String] [^String s pattern replacement] | |
211 (.replaceFirst (re-matcher (Pattern/quote pattern) s) replacement)) | |
212 | |
213 (defmethod replace-first [Pattern String] [^String s re replacement] | |
214 (.replaceFirst (re-matcher re s) replacement)) | |
215 | |
216 (defmethod replace-first [Pattern clojure.lang.IFn] [^String s ^Pattern re f] | |
217 (let [m (re-matcher re s)] | |
218 (let [buffer (StringBuffer.)] | |
219 (if (.find m) | |
220 (let [rep (f (re-groups m))] | |
221 (.appendReplacement m buffer rep) | |
222 (.appendTail m buffer) | |
223 (str buffer)))))) | |
224 | |
225 (defn partition | |
226 "Splits the string into a lazy sequence of substrings, alternating | |
227 between substrings that match the patthern and the substrings | |
228 between the matches. The sequence always starts with the substring | |
229 before the first match, or an empty string if the beginning of the | |
230 string matches. | |
231 | |
232 For example: (partition \"abc123def\" #\"[a-z]+\") | |
233 returns: (\"\" \"abc\" \"123\" \"def\")" | |
234 [^String s ^Pattern re] | |
235 (let [m (re-matcher re s)] | |
236 ((fn step [prevend] | |
237 (lazy-seq | |
238 (if (.find m) | |
239 (cons (.subSequence s prevend (.start m)) | |
240 (cons (re-groups m) | |
241 (step (+ (.start m) (count (.group m)))))) | |
242 (when (< prevend (.length s)) | |
243 (list (.subSequence s prevend (.length s))))))) | |
244 0))) | |
245 | |
246 (defn ^String join | |
247 "Returns a string of all elements in coll, separated by | |
248 separator. Like Perl's join." | |
249 [^String separator coll] | |
250 (apply str (interpose separator coll))) | |
251 | |
252 (defn ^String chop | |
253 "Removes the last character of string, does nothing on a zero-length | |
254 string." | |
255 [^String s] | |
256 (let [size (count s)] | |
257 (if (zero? size) | |
258 s | |
259 (subs s 0 (dec (count s)))))) | |
260 | |
261 (defn ^String chomp | |
262 "Removes all trailing newline \\n or return \\r characters from | |
263 string. Note: String.trim() is similar and faster." | |
264 [^String s] | |
265 (replace s #"[\r\n]+$" "")) | |
266 | |
267 (defn title-case [^String s] | |
268 (throw (Exception. "title-case not implemeted yet"))) | |
269 | |
270 (defn ^String swap-case | |
271 "Changes upper case characters to lower case and vice-versa. | |
272 Handles Unicode supplementary characters correctly. Uses the | |
273 locale-sensitive String.toUpperCase() and String.toLowerCase() | |
274 methods." | |
275 [^String s] | |
276 (let [buffer (StringBuilder. (.length s)) | |
277 ;; array to make a String from one code point | |
278 ^"[I" array (make-array Integer/TYPE 1)] | |
279 (docodepoints [c s] | |
280 (aset-int array 0 c) | |
281 (if (Character/isLowerCase c) | |
282 ;; Character.toUpperCase is not locale-sensitive, but | |
283 ;; String.toUpperCase is; so we use a String. | |
284 (.append buffer (.toUpperCase (String. array 0 1))) | |
285 (.append buffer (.toLowerCase (String. array 0 1))))) | |
286 (.toString buffer))) | |
287 | |
288 (defn ^String capitalize | |
289 "Converts first character of the string to upper-case, all other | |
290 characters to lower-case." | |
291 [^String s] | |
292 (if (< (count s) 2) | |
293 (.toUpperCase s) | |
294 (str (.toUpperCase ^String (subs s 0 1)) | |
295 (.toLowerCase ^String (subs s 1))))) | |
296 | |
297 (defn ^String ltrim | |
298 "Removes whitespace from the left side of string." | |
299 [^String s] | |
300 (replace s #"^\s+" "")) | |
301 | |
302 (defn ^String rtrim | |
303 "Removes whitespace from the right side of string." | |
304 [^String s] | |
305 (replace s #"\s+$" "")) | |
306 | |
307 (defn split-lines | |
308 "Splits s on \\n or \\r\\n." | |
309 [^String s] | |
310 (seq (.split #"\r?\n" s))) | |
311 | |
312 ;; borrowed from compojure.str-utils, by James Reeves, EPL 1.0 | |
313 (defn ^String map-str | |
314 "Apply f to each element of coll, concatenate all results into a | |
315 String." | |
316 [f coll] | |
317 (apply str (map f coll))) | |
318 | |
319 ;; borrowed from compojure.str-utils, by James Reeves, EPL 1.0 | |
320 (defn grep | |
321 "Filters elements of coll by a regular expression. The String | |
322 representation (with str) of each element is tested with re-find." | |
323 [re coll] | |
324 (filter (fn [x] (re-find re (str x))) coll)) | |
325 | |
326 (defn partial | |
327 "Like clojure.core/partial for functions that take their primary | |
328 argument first. | |
329 | |
330 Takes a function f and its arguments, NOT INCLUDING the first | |
331 argument. Returns a new function whose first argument will be the | |
332 first argument to f. | |
333 | |
334 Example: (str-utils2/partial str-utils2/take 2) | |
335 ;;=> (fn [s] (str-utils2/take s 2))" | |
336 [f & args] | |
337 (fn [s & more] (apply f s (concat args more)))) | |
338 | |
339 | |
340 ;;; WRAPPERS | |
341 | |
342 ;; The following functions are simple wrappers around java.lang.String | |
343 ;; functions. They are included here for completeness, and for use | |
344 ;; when mapping over a collection of strings. | |
345 | |
346 (defn ^String upper-case | |
347 "Converts string to all upper-case." | |
348 [^String s] | |
349 (.toUpperCase s)) | |
350 | |
351 (defn ^String lower-case | |
352 "Converts string to all lower-case." | |
353 [^String s] | |
354 (.toLowerCase s)) | |
355 | |
356 (defn split | |
357 "Splits string on a regular expression. Optional argument limit is | |
358 the maximum number of splits." | |
359 ([^String s ^Pattern re] (seq (.split re s))) | |
360 ([^String s ^Pattern re limit] (seq (.split re s limit)))) | |
361 | |
362 (defn ^String trim | |
363 "Removes whitespace from both ends of string." | |
364 [^String s] | |
365 (.trim s)) | |
366 | |
367 (defn ^String contains? | |
368 "True if s contains the substring." | |
369 [^String s substring] | |
370 (.contains s substring)) | |
371 | |
372 (defn ^String get | |
373 "Gets the i'th character in string." | |
374 [^String s i] | |
375 (.charAt s i)) | |
376 |