view src/clojure/contrib/str_utils2.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
line wrap: on
line source
1 ;;; str_utils2.clj -- functional string utilities for Clojure
3 ;; by Stuart Sierra, http://stuartsierra.com/
4 ;; August 19, 2009
6 ;; Copyright (c) Stuart Sierra, 2009. All rights reserved. The use
7 ;; and distribution terms for this software are covered by the Eclipse
8 ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
9 ;; which can be found in the file epl-v10.html at the root of this
10 ;; distribution. By using this software in any fashion, you are
11 ;; agreeing to be bound by the terms of this license. You must not
12 ;; remove this notice, or any other, from this software.
14 ;; DEPRECATED in 1.2: Promoted to clojure.java.string. Note that
15 ;; many function names and semantics have changed
17 (ns ^{:author "Stuart Sierra"
18 :deprecated "1.2"
19 :doc "This is a library of string manipulation functions. It
20 is intented as a replacement for clojure.contrib.str-utils.
22 You cannot (use 'clojure.contrib.str-utils2) because it defines
23 functions with the same names as functions in clojure.core.
24 Instead, do (require '[clojure.contrib.str-utils2 :as s])
25 or something similar.
27 Goals:
28 1. Be functional
29 2. String argument first, to work with ->
30 3. Performance linear in string length
32 Some ideas are borrowed from
33 http://github.com/francoisdevlin/devlinsf-clojure-utils/"}
34 clojure.contrib.str-utils2
35 (:refer-clojure :exclude (take replace drop butlast partition
36 contains? get repeat reverse partial))
37 (:import (java.util.regex Pattern)))
40 (defmacro dochars
41 "bindings => [name string]
43 Repeatedly executes body, with name bound to each character in
44 string. Does NOT handle Unicode supplementary characters (above
45 U+FFFF)."
46 [bindings & body]
47 (assert (vector bindings))
48 (assert (= 2 (count bindings)))
49 ;; This seems to be the fastest way to iterate over characters.
50 `(let [^String s# ~(second bindings)]
51 (dotimes [i# (.length s#)]
52 (let [~(first bindings) (.charAt s# i#)]
53 ~@body))))
56 (defmacro docodepoints
57 "bindings => [name string]
59 Repeatedly executes body, with name bound to the integer code point
60 of each Unicode character in the string. Handles Unicode
61 supplementary characters (above U+FFFF) correctly."
62 [bindings & body]
63 (assert (vector bindings))
64 (assert (= 2 (count bindings)))
65 (let [character (first bindings)
66 string (second bindings)]
67 `(let [^String s# ~string
68 len# (.length s#)]
69 (loop [i# 0]
70 (when (< i# len#)
71 (let [~character (.charAt s# i#)]
72 (if (Character/isHighSurrogate ~character)
73 (let [~character (.codePointAt s# i#)]
74 ~@body
75 (recur (+ 2 i#)))
76 (let [~character (int ~character)]
77 ~@body
78 (recur (inc i#))))))))))
80 (defn codepoints
81 "Returns a sequence of integer Unicode code points in s. Handles
82 Unicode supplementary characters (above U+FFFF) correctly."
83 [^String s]
84 (let [len (.length s)
85 f (fn thisfn [^String s i]
86 (when (< i len)
87 (let [c (.charAt s i)]
88 (if (Character/isHighSurrogate c)
89 (cons (.codePointAt s i) (thisfn s (+ 2 i)))
90 (cons (int c) (thisfn s (inc i)))))))]
91 (lazy-seq (f s 0))))
93 (defn ^String escape
94 "Returns a new String by applying cmap (a function or a map) to each
95 character in s. If cmap returns nil, the original character is
96 added to the output unchanged."
97 [^String s cmap]
98 (let [buffer (StringBuilder. (.length s))]
99 (dochars [c s]
100 (if-let [r (cmap c)]
101 (.append buffer r)
102 (.append buffer c)))
103 (.toString buffer)))
105 (defn blank?
106 "True if s is nil, empty, or contains only whitespace."
107 [^String s]
108 (every? (fn [^Character c] (Character/isWhitespace c)) s))
110 (defn ^String take
111 "Take first n characters from s, up to the length of s.
113 Note the argument order is the opposite of clojure.core/take; this
114 is to keep the string as the first argument for use with ->"
115 [^String s n]
116 (if (< (count s) n)
117 s
118 (.substring s 0 n)))
120 (defn ^String drop
121 "Drops first n characters from s. Returns an empty string if n is
122 greater than the length of s.
124 Note the argument order is the opposite of clojure.core/drop; this
125 is to keep the string as the first argument for use with ->"
126 [^String s n]
127 (if (< (count s) n)
128 ""
129 (.substring s n)))
131 (defn ^String butlast
132 "Returns s without the last n characters. Returns an empty string
133 if n is greater than the length of s.
135 Note the argument order is the opposite of clojure.core/butlast;
136 this is to keep the string as the first argument for use with ->"
137 [^String s n]
138 (if (< (count s) n)
139 ""
140 (.substring s 0 (- (count s) n))))
142 (defn ^String tail
143 "Returns the last n characters of s."
144 [^String s n]
145 (if (< (count s) n)
146 s
147 (.substring s (- (count s) n))))
149 (defn ^String repeat
150 "Returns a new String containing s repeated n times."
151 [^String s n]
152 (apply str (clojure.core/repeat n s)))
154 (defn ^String reverse
155 "Returns s with its characters reversed."
156 [^String s]
157 (.toString (.reverse (StringBuilder. s))))
159 (defmulti
160 ^{:doc "Replaces all instances of pattern in string with replacement.
162 Allowed argument types for pattern and replacement are:
163 1. String and String
164 2. Character and Character
165 3. regex Pattern and String
166 (Uses java.util.regex.Matcher.replaceAll)
167 4. regex Pattern and function
168 (Calls function with re-groups of each match, uses return
169 value as replacement.)"
170 :arglists '([string pattern replacement])
171 :tag String}
172 replace
173 (fn [^String string pattern replacement]
174 [(class pattern) (class replacement)]))
176 (defmethod replace [String String] [^String s ^String a ^String b]
177 (.replace s a b))
179 (defmethod replace [Character Character] [^String s ^Character a ^Character b]
180 (.replace s a b))
182 (defmethod replace [Pattern String] [^String s re replacement]
183 (.replaceAll (re-matcher re s) replacement))
185 (defmethod replace [Pattern clojure.lang.IFn] [^String s re replacement]
186 (let [m (re-matcher re s)]
187 (let [buffer (StringBuffer. (.length s))]
188 (loop []
189 (if (.find m)
190 (do (.appendReplacement m buffer (replacement (re-groups m)))
191 (recur))
192 (do (.appendTail m buffer)
193 (.toString buffer)))))))
195 (defmulti
196 ^{:doc "Replaces the first instance of pattern in s with replacement.
198 Allowed argument types for pattern and replacement are:
199 1. String and String
200 2. regex Pattern and String
201 (Uses java.util.regex.Matcher.replaceAll)
202 3. regex Pattern and function
203 "
204 :arglists '([s pattern replacement])
205 :tag String}
206 replace-first
207 (fn [s pattern replacement]
208 [(class pattern) (class replacement)]))
210 (defmethod replace-first [String String] [^String s pattern replacement]
211 (.replaceFirst (re-matcher (Pattern/quote pattern) s) replacement))
213 (defmethod replace-first [Pattern String] [^String s re replacement]
214 (.replaceFirst (re-matcher re s) replacement))
216 (defmethod replace-first [Pattern clojure.lang.IFn] [^String s ^Pattern re f]
217 (let [m (re-matcher re s)]
218 (let [buffer (StringBuffer.)]
219 (if (.find m)
220 (let [rep (f (re-groups m))]
221 (.appendReplacement m buffer rep)
222 (.appendTail m buffer)
223 (str buffer))))))
225 (defn partition
226 "Splits the string into a lazy sequence of substrings, alternating
227 between substrings that match the patthern and the substrings
228 between the matches. The sequence always starts with the substring
229 before the first match, or an empty string if the beginning of the
230 string matches.
232 For example: (partition \"abc123def\" #\"[a-z]+\")
233 returns: (\"\" \"abc\" \"123\" \"def\")"
234 [^String s ^Pattern re]
235 (let [m (re-matcher re s)]
236 ((fn step [prevend]
237 (lazy-seq
238 (if (.find m)
239 (cons (.subSequence s prevend (.start m))
240 (cons (re-groups m)
241 (step (+ (.start m) (count (.group m))))))
242 (when (< prevend (.length s))
243 (list (.subSequence s prevend (.length s)))))))
244 0)))
246 (defn ^String join
247 "Returns a string of all elements in coll, separated by
248 separator. Like Perl's join."
249 [^String separator coll]
250 (apply str (interpose separator coll)))
252 (defn ^String chop
253 "Removes the last character of string, does nothing on a zero-length
254 string."
255 [^String s]
256 (let [size (count s)]
257 (if (zero? size)
258 s
259 (subs s 0 (dec (count s))))))
261 (defn ^String chomp
262 "Removes all trailing newline \\n or return \\r characters from
263 string. Note: String.trim() is similar and faster."
264 [^String s]
265 (replace s #"[\r\n]+$" ""))
267 (defn title-case [^String s]
268 (throw (Exception. "title-case not implemeted yet")))
270 (defn ^String swap-case
271 "Changes upper case characters to lower case and vice-versa.
272 Handles Unicode supplementary characters correctly. Uses the
273 locale-sensitive String.toUpperCase() and String.toLowerCase()
274 methods."
275 [^String s]
276 (let [buffer (StringBuilder. (.length s))
277 ;; array to make a String from one code point
278 ^"[I" array (make-array Integer/TYPE 1)]
279 (docodepoints [c s]
280 (aset-int array 0 c)
281 (if (Character/isLowerCase c)
282 ;; Character.toUpperCase is not locale-sensitive, but
283 ;; String.toUpperCase is; so we use a String.
284 (.append buffer (.toUpperCase (String. array 0 1)))
285 (.append buffer (.toLowerCase (String. array 0 1)))))
286 (.toString buffer)))
288 (defn ^String capitalize
289 "Converts first character of the string to upper-case, all other
290 characters to lower-case."
291 [^String s]
292 (if (< (count s) 2)
293 (.toUpperCase s)
294 (str (.toUpperCase ^String (subs s 0 1))
295 (.toLowerCase ^String (subs s 1)))))
297 (defn ^String ltrim
298 "Removes whitespace from the left side of string."
299 [^String s]
300 (replace s #"^\s+" ""))
302 (defn ^String rtrim
303 "Removes whitespace from the right side of string."
304 [^String s]
305 (replace s #"\s+$" ""))
307 (defn split-lines
308 "Splits s on \\n or \\r\\n."
309 [^String s]
310 (seq (.split #"\r?\n" s)))
312 ;; borrowed from compojure.str-utils, by James Reeves, EPL 1.0
313 (defn ^String map-str
314 "Apply f to each element of coll, concatenate all results into a
315 String."
316 [f coll]
317 (apply str (map f coll)))
319 ;; borrowed from compojure.str-utils, by James Reeves, EPL 1.0
320 (defn grep
321 "Filters elements of coll by a regular expression. The String
322 representation (with str) of each element is tested with re-find."
323 [re coll]
324 (filter (fn [x] (re-find re (str x))) coll))
326 (defn partial
327 "Like clojure.core/partial for functions that take their primary
328 argument first.
330 Takes a function f and its arguments, NOT INCLUDING the first
331 argument. Returns a new function whose first argument will be the
332 first argument to f.
334 Example: (str-utils2/partial str-utils2/take 2)
335 ;;=> (fn [s] (str-utils2/take s 2))"
336 [f & args]
337 (fn [s & more] (apply f s (concat args more))))
340 ;;; WRAPPERS
342 ;; The following functions are simple wrappers around java.lang.String
343 ;; functions. They are included here for completeness, and for use
344 ;; when mapping over a collection of strings.
346 (defn ^String upper-case
347 "Converts string to all upper-case."
348 [^String s]
349 (.toUpperCase s))
351 (defn ^String lower-case
352 "Converts string to all lower-case."
353 [^String s]
354 (.toLowerCase s))
356 (defn split
357 "Splits string on a regular expression. Optional argument limit is
358 the maximum number of splits."
359 ([^String s ^Pattern re] (seq (.split re s)))
360 ([^String s ^Pattern re limit] (seq (.split re s limit))))
362 (defn ^String trim
363 "Removes whitespace from both ends of string."
364 [^String s]
365 (.trim s))
367 (defn ^String contains?
368 "True if s contains the substring."
369 [^String s substring]
370 (.contains s substring))
372 (defn ^String get
373 "Gets the i'th character in string."
374 [^String s i]
375 (.charAt s i))