rlm@10
|
1 ; Copyright (c) Rich Hickey. All rights reserved.
|
rlm@10
|
2 ; The use and distribution terms for this software are covered by the
|
rlm@10
|
3 ; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
|
rlm@10
|
4 ; which can be found in the file epl-v10.html at the root of this distribution.
|
rlm@10
|
5 ; By using this software in any fashion, you are agreeing to be bound by
|
rlm@10
|
6 ; the terms of this license.
|
rlm@10
|
7 ; You must not remove this notice, or any other, from this software.
|
rlm@10
|
8
|
rlm@10
|
9 (ns ^{:doc "Clojure String utilities
|
rlm@10
|
10
|
rlm@10
|
11 It is poor form to (:use clojure.string). Instead, use require
|
rlm@10
|
12 with :as to specify a prefix, e.g.
|
rlm@10
|
13
|
rlm@10
|
14 (ns your.namespace.here
|
rlm@10
|
15 (:require '[clojure.string :as str]))
|
rlm@10
|
16
|
rlm@10
|
17 Design notes for clojure.string:
|
rlm@10
|
18
|
rlm@10
|
19 1. Strings are objects (as opposed to sequences). As such, the
|
rlm@10
|
20 string being manipulated is the first argument to a function;
|
rlm@10
|
21 passing nil will result in a NullPointerException unless
|
rlm@10
|
22 documented otherwise. If you want sequence-y behavior instead,
|
rlm@10
|
23 use a sequence.
|
rlm@10
|
24
|
rlm@10
|
25 2. Functions are generally not lazy, and call straight to host
|
rlm@10
|
26 methods where those are available and efficient.
|
rlm@10
|
27
|
rlm@10
|
28 3. Functions take advantage of String implementation details to
|
rlm@10
|
29 write high-performing loop/recurs instead of using higher-order
|
rlm@10
|
30 functions. (This is not idiomatic in general-purpose application
|
rlm@10
|
31 code.)
|
rlm@10
|
32
|
rlm@10
|
33 4. When a function is documented to accept a string argument, it
|
rlm@10
|
34 will take any implementation of the correct *interface* on the
|
rlm@10
|
35 host platform. In Java, this is CharSequence, which is more
|
rlm@10
|
36 general than String. In ordinary usage you will almost always
|
rlm@10
|
37 pass concrete strings. If you are doing something unusual,
|
rlm@10
|
38 e.g. passing a mutable implementation of CharSequence, then
|
rlm@10
|
39 thead-safety is your responsibility."
|
rlm@10
|
40 :author "Stuart Sierra, Stuart Halloway, David Liebke"}
|
rlm@10
|
41 clojure.string
|
rlm@10
|
42 (:refer-clojure :exclude (replace reverse))
|
rlm@10
|
43 (:import (java.util.regex Pattern)
|
rlm@10
|
44 clojure.lang.LazilyPersistentVector))
|
rlm@10
|
45
|
rlm@10
|
46 (defn ^String reverse
|
rlm@10
|
47 "Returns s with its characters reversed."
|
rlm@10
|
48 {:added "1.2"}
|
rlm@10
|
49 [^CharSequence s]
|
rlm@10
|
50 (.toString (.reverse (StringBuilder. s))))
|
rlm@10
|
51
|
rlm@10
|
52 (defn- replace-by
|
rlm@10
|
53 [^CharSequence s re f]
|
rlm@10
|
54 (let [m (re-matcher re s)]
|
rlm@10
|
55 (let [buffer (StringBuffer. (.length s))]
|
rlm@10
|
56 (loop []
|
rlm@10
|
57 (if (.find m)
|
rlm@10
|
58 (do (.appendReplacement m buffer (f (re-groups m)))
|
rlm@10
|
59 (recur))
|
rlm@10
|
60 (do (.appendTail m buffer)
|
rlm@10
|
61 (.toString buffer)))))))
|
rlm@10
|
62
|
rlm@10
|
63 (defn ^String replace
|
rlm@10
|
64 "Replaces all instance of match with replacement in s.
|
rlm@10
|
65
|
rlm@10
|
66 match/replacement can be:
|
rlm@10
|
67
|
rlm@10
|
68 string / string
|
rlm@10
|
69 char / char
|
rlm@10
|
70 pattern / (string or function of match).
|
rlm@10
|
71
|
rlm@10
|
72 See also replace-first."
|
rlm@10
|
73 {:added "1.2"}
|
rlm@10
|
74 [^CharSequence s match replacement]
|
rlm@10
|
75 (let [s (.toString s)]
|
rlm@10
|
76 (cond
|
rlm@10
|
77 (instance? Character match) (.replace s ^Character match ^Character replacement)
|
rlm@10
|
78 (instance? CharSequence match) (.replace s ^CharSequence match ^CharSequence replacement)
|
rlm@10
|
79 (instance? Pattern match) (if (instance? CharSequence replacement)
|
rlm@10
|
80 (.replaceAll (re-matcher ^Pattern match s)
|
rlm@10
|
81 (.toString ^CharSequence replacement))
|
rlm@10
|
82 (replace-by s match replacement))
|
rlm@10
|
83 :else (throw (IllegalArgumentException. (str "Invalid match arg: " match))))))
|
rlm@10
|
84
|
rlm@10
|
85 (defn- replace-first-by
|
rlm@10
|
86 [^CharSequence s ^Pattern re f]
|
rlm@10
|
87 (let [m (re-matcher re s)]
|
rlm@10
|
88 (let [buffer (StringBuffer. (.length s))]
|
rlm@10
|
89 (if (.find m)
|
rlm@10
|
90 (let [rep (f (re-groups m))]
|
rlm@10
|
91 (.appendReplacement m buffer rep)
|
rlm@10
|
92 (.appendTail m buffer)
|
rlm@10
|
93 (str buffer))))))
|
rlm@10
|
94
|
rlm@10
|
95 (defn- replace-first-char
|
rlm@10
|
96 [^CharSequence s ^Character match replace]
|
rlm@10
|
97 (let [s (.toString s)
|
rlm@10
|
98 i (.indexOf s (int match))]
|
rlm@10
|
99 (if (= -1 i)
|
rlm@10
|
100 s
|
rlm@10
|
101 (str (subs s 0 i) replace (subs s (inc i))))))
|
rlm@10
|
102
|
rlm@10
|
103 (defn ^String replace-first
|
rlm@10
|
104 "Replaces the first instance of match with replacement in s.
|
rlm@10
|
105
|
rlm@10
|
106 match/replacement can be:
|
rlm@10
|
107
|
rlm@10
|
108 char / char
|
rlm@10
|
109 string / string
|
rlm@10
|
110 pattern / (string or function of match).
|
rlm@10
|
111
|
rlm@10
|
112 See also replace-all."
|
rlm@10
|
113 {:added "1.2"}
|
rlm@10
|
114 [^CharSequence s match replacement]
|
rlm@10
|
115 (let [s (.toString s)]
|
rlm@10
|
116 (cond
|
rlm@10
|
117 (instance? Character match)
|
rlm@10
|
118 (replace-first-char s match replacement)
|
rlm@10
|
119 (instance? CharSequence match)
|
rlm@10
|
120 (.replaceFirst s (Pattern/quote (.toString ^CharSequence match))
|
rlm@10
|
121 (.toString ^CharSequence replacement))
|
rlm@10
|
122 (instance? Pattern match)
|
rlm@10
|
123 (if (instance? CharSequence replacement)
|
rlm@10
|
124 (.replaceFirst (re-matcher ^Pattern match s)
|
rlm@10
|
125 (.toString ^CharSequence replacement))
|
rlm@10
|
126 (replace-first-by s match replacement))
|
rlm@10
|
127 :else (throw (IllegalArgumentException. (str "Invalid match arg: " match))))))
|
rlm@10
|
128
|
rlm@10
|
129
|
rlm@10
|
130 (defn ^String join
|
rlm@10
|
131 "Returns a string of all elements in coll, separated by
|
rlm@10
|
132 an optional separator. Like Perl's join."
|
rlm@10
|
133 {:added "1.2"}
|
rlm@10
|
134 ([coll]
|
rlm@10
|
135 (apply str coll))
|
rlm@10
|
136 ([separator [x & more]]
|
rlm@10
|
137 (loop [sb (StringBuilder. (str x))
|
rlm@10
|
138 more more
|
rlm@10
|
139 sep (str separator)]
|
rlm@10
|
140 (if more
|
rlm@10
|
141 (recur (-> sb (.append sep) (.append (str (first more))))
|
rlm@10
|
142 (next more)
|
rlm@10
|
143 sep)
|
rlm@10
|
144 (str sb)))))
|
rlm@10
|
145
|
rlm@10
|
146 (defn ^String capitalize
|
rlm@10
|
147 "Converts first character of the string to upper-case, all other
|
rlm@10
|
148 characters to lower-case."
|
rlm@10
|
149 {:added "1.2"}
|
rlm@10
|
150 [^CharSequence s]
|
rlm@10
|
151 (let [s (.toString s)]
|
rlm@10
|
152 (if (< (count s) 2)
|
rlm@10
|
153 (.toUpperCase s)
|
rlm@10
|
154 (str (.toUpperCase (subs s 0 1))
|
rlm@10
|
155 (.toLowerCase (subs s 1))))))
|
rlm@10
|
156
|
rlm@10
|
157 (defn ^String upper-case
|
rlm@10
|
158 "Converts string to all upper-case."
|
rlm@10
|
159 {:added "1.2"}
|
rlm@10
|
160 [^CharSequence s]
|
rlm@10
|
161 (.. s toString toUpperCase))
|
rlm@10
|
162
|
rlm@10
|
163 (defn ^String lower-case
|
rlm@10
|
164 "Converts string to all lower-case."
|
rlm@10
|
165 {:added "1.2"}
|
rlm@10
|
166 [^CharSequence s]
|
rlm@10
|
167 (.. s toString toLowerCase))
|
rlm@10
|
168
|
rlm@10
|
169 (defn split
|
rlm@10
|
170 "Splits string on a regular expression. Optional argument limit is
|
rlm@10
|
171 the maximum number of splits. Not lazy. Returns vector of the splits."
|
rlm@10
|
172 {:added "1.2"}
|
rlm@10
|
173 ([^CharSequence s ^Pattern re]
|
rlm@10
|
174 (LazilyPersistentVector/createOwning (.split re s)))
|
rlm@10
|
175 ([ ^CharSequence s ^Pattern re limit]
|
rlm@10
|
176 (LazilyPersistentVector/createOwning (.split re s limit))))
|
rlm@10
|
177
|
rlm@10
|
178 (defn split-lines
|
rlm@10
|
179 "Splits s on \\n or \\r\\n."
|
rlm@10
|
180 {:added "1.2"}
|
rlm@10
|
181 [^CharSequence s]
|
rlm@10
|
182 (split s #"\r?\n"))
|
rlm@10
|
183
|
rlm@10
|
184 (defn ^String trim
|
rlm@10
|
185 "Removes whitespace from both ends of string."
|
rlm@10
|
186 {:added "1.2"}
|
rlm@10
|
187 [^CharSequence s]
|
rlm@10
|
188 (.. s toString trim))
|
rlm@10
|
189
|
rlm@10
|
190 (defn ^String triml
|
rlm@10
|
191 "Removes whitespace from the left side of string."
|
rlm@10
|
192 {:added "1.2"}
|
rlm@10
|
193 [^CharSequence s]
|
rlm@10
|
194 (loop [index (int 0)]
|
rlm@10
|
195 (if (= (.length s) index)
|
rlm@10
|
196 ""
|
rlm@10
|
197 (if (Character/isWhitespace (.charAt s index))
|
rlm@10
|
198 (recur (inc index))
|
rlm@10
|
199 (.. s (subSequence index (.length s)) toString)))))
|
rlm@10
|
200
|
rlm@10
|
201 (defn ^String trimr
|
rlm@10
|
202 "Removes whitespace from the right side of string."
|
rlm@10
|
203 {:added "1.2"}
|
rlm@10
|
204 [^CharSequence s]
|
rlm@10
|
205 (loop [index (.length s)]
|
rlm@10
|
206 (if (zero? index)
|
rlm@10
|
207 ""
|
rlm@10
|
208 (if (Character/isWhitespace (.charAt s (dec index)))
|
rlm@10
|
209 (recur (dec index))
|
rlm@10
|
210 (.. s (subSequence 0 index) toString)))))
|
rlm@10
|
211
|
rlm@10
|
212 (defn ^String trim-newline
|
rlm@10
|
213 "Removes all trailing newline \\n or return \\r characters from
|
rlm@10
|
214 string. Similar to Perl's chomp."
|
rlm@10
|
215 {:added "1.2"}
|
rlm@10
|
216 [^CharSequence s]
|
rlm@10
|
217 (loop [index (.length s)]
|
rlm@10
|
218 (if (zero? index)
|
rlm@10
|
219 ""
|
rlm@10
|
220 (let [ch (.charAt s (dec index))]
|
rlm@10
|
221 (if (or (= ch \newline) (= ch \return))
|
rlm@10
|
222 (recur (dec index))
|
rlm@10
|
223 (.. s (subSequence 0 index) toString))))))
|
rlm@10
|
224
|
rlm@10
|
225 (defn blank?
|
rlm@10
|
226 "True if s is nil, empty, or contains only whitespace."
|
rlm@10
|
227 {:added "1.2"}
|
rlm@10
|
228 [^CharSequence s]
|
rlm@10
|
229 (if s
|
rlm@10
|
230 (loop [index (int 0)]
|
rlm@10
|
231 (if (= (.length s) index)
|
rlm@10
|
232 true
|
rlm@10
|
233 (if (Character/isWhitespace (.charAt s index))
|
rlm@10
|
234 (recur (inc index))
|
rlm@10
|
235 false)))
|
rlm@10
|
236 true))
|
rlm@10
|
237
|
rlm@10
|
238 (defn ^String escape
|
rlm@10
|
239 "Return a new string, using cmap to escape each character ch
|
rlm@10
|
240 from s as follows:
|
rlm@10
|
241
|
rlm@10
|
242 If (cmap ch) is nil, append ch to the new string.
|
rlm@10
|
243 If (cmap ch) is non-nil, append (str (cmap ch)) instead."
|
rlm@10
|
244 {:added "1.2"}
|
rlm@10
|
245 [^CharSequence s cmap]
|
rlm@10
|
246 (loop [index (int 0)
|
rlm@10
|
247 buffer (StringBuilder. (.length s))]
|
rlm@10
|
248 (if (= (.length s) index)
|
rlm@10
|
249 (.toString buffer)
|
rlm@10
|
250 (let [ch (.charAt s index)]
|
rlm@10
|
251 (if-let [replacement (cmap ch)]
|
rlm@10
|
252 (.append buffer replacement)
|
rlm@10
|
253 (.append buffer ch))
|
rlm@10
|
254 (recur (inc index) buffer)))))
|