annotate src/clojure/string.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
rev   line source
rlm@10 1 ; Copyright (c) Rich Hickey. All rights reserved.
rlm@10 2 ; The use and distribution terms for this software are covered by the
rlm@10 3 ; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
rlm@10 4 ; which can be found in the file epl-v10.html at the root of this distribution.
rlm@10 5 ; By using this software in any fashion, you are agreeing to be bound by
rlm@10 6 ; the terms of this license.
rlm@10 7 ; You must not remove this notice, or any other, from this software.
rlm@10 8
rlm@10 9 (ns ^{:doc "Clojure String utilities
rlm@10 10
rlm@10 11 It is poor form to (:use clojure.string). Instead, use require
rlm@10 12 with :as to specify a prefix, e.g.
rlm@10 13
rlm@10 14 (ns your.namespace.here
rlm@10 15 (:require '[clojure.string :as str]))
rlm@10 16
rlm@10 17 Design notes for clojure.string:
rlm@10 18
rlm@10 19 1. Strings are objects (as opposed to sequences). As such, the
rlm@10 20 string being manipulated is the first argument to a function;
rlm@10 21 passing nil will result in a NullPointerException unless
rlm@10 22 documented otherwise. If you want sequence-y behavior instead,
rlm@10 23 use a sequence.
rlm@10 24
rlm@10 25 2. Functions are generally not lazy, and call straight to host
rlm@10 26 methods where those are available and efficient.
rlm@10 27
rlm@10 28 3. Functions take advantage of String implementation details to
rlm@10 29 write high-performing loop/recurs instead of using higher-order
rlm@10 30 functions. (This is not idiomatic in general-purpose application
rlm@10 31 code.)
rlm@10 32
rlm@10 33 4. When a function is documented to accept a string argument, it
rlm@10 34 will take any implementation of the correct *interface* on the
rlm@10 35 host platform. In Java, this is CharSequence, which is more
rlm@10 36 general than String. In ordinary usage you will almost always
rlm@10 37 pass concrete strings. If you are doing something unusual,
rlm@10 38 e.g. passing a mutable implementation of CharSequence, then
rlm@10 39 thead-safety is your responsibility."
rlm@10 40 :author "Stuart Sierra, Stuart Halloway, David Liebke"}
rlm@10 41 clojure.string
rlm@10 42 (:refer-clojure :exclude (replace reverse))
rlm@10 43 (:import (java.util.regex Pattern)
rlm@10 44 clojure.lang.LazilyPersistentVector))
rlm@10 45
rlm@10 46 (defn ^String reverse
rlm@10 47 "Returns s with its characters reversed."
rlm@10 48 {:added "1.2"}
rlm@10 49 [^CharSequence s]
rlm@10 50 (.toString (.reverse (StringBuilder. s))))
rlm@10 51
rlm@10 52 (defn- replace-by
rlm@10 53 [^CharSequence s re f]
rlm@10 54 (let [m (re-matcher re s)]
rlm@10 55 (let [buffer (StringBuffer. (.length s))]
rlm@10 56 (loop []
rlm@10 57 (if (.find m)
rlm@10 58 (do (.appendReplacement m buffer (f (re-groups m)))
rlm@10 59 (recur))
rlm@10 60 (do (.appendTail m buffer)
rlm@10 61 (.toString buffer)))))))
rlm@10 62
rlm@10 63 (defn ^String replace
rlm@10 64 "Replaces all instance of match with replacement in s.
rlm@10 65
rlm@10 66 match/replacement can be:
rlm@10 67
rlm@10 68 string / string
rlm@10 69 char / char
rlm@10 70 pattern / (string or function of match).
rlm@10 71
rlm@10 72 See also replace-first."
rlm@10 73 {:added "1.2"}
rlm@10 74 [^CharSequence s match replacement]
rlm@10 75 (let [s (.toString s)]
rlm@10 76 (cond
rlm@10 77 (instance? Character match) (.replace s ^Character match ^Character replacement)
rlm@10 78 (instance? CharSequence match) (.replace s ^CharSequence match ^CharSequence replacement)
rlm@10 79 (instance? Pattern match) (if (instance? CharSequence replacement)
rlm@10 80 (.replaceAll (re-matcher ^Pattern match s)
rlm@10 81 (.toString ^CharSequence replacement))
rlm@10 82 (replace-by s match replacement))
rlm@10 83 :else (throw (IllegalArgumentException. (str "Invalid match arg: " match))))))
rlm@10 84
rlm@10 85 (defn- replace-first-by
rlm@10 86 [^CharSequence s ^Pattern re f]
rlm@10 87 (let [m (re-matcher re s)]
rlm@10 88 (let [buffer (StringBuffer. (.length s))]
rlm@10 89 (if (.find m)
rlm@10 90 (let [rep (f (re-groups m))]
rlm@10 91 (.appendReplacement m buffer rep)
rlm@10 92 (.appendTail m buffer)
rlm@10 93 (str buffer))))))
rlm@10 94
rlm@10 95 (defn- replace-first-char
rlm@10 96 [^CharSequence s ^Character match replace]
rlm@10 97 (let [s (.toString s)
rlm@10 98 i (.indexOf s (int match))]
rlm@10 99 (if (= -1 i)
rlm@10 100 s
rlm@10 101 (str (subs s 0 i) replace (subs s (inc i))))))
rlm@10 102
rlm@10 103 (defn ^String replace-first
rlm@10 104 "Replaces the first instance of match with replacement in s.
rlm@10 105
rlm@10 106 match/replacement can be:
rlm@10 107
rlm@10 108 char / char
rlm@10 109 string / string
rlm@10 110 pattern / (string or function of match).
rlm@10 111
rlm@10 112 See also replace-all."
rlm@10 113 {:added "1.2"}
rlm@10 114 [^CharSequence s match replacement]
rlm@10 115 (let [s (.toString s)]
rlm@10 116 (cond
rlm@10 117 (instance? Character match)
rlm@10 118 (replace-first-char s match replacement)
rlm@10 119 (instance? CharSequence match)
rlm@10 120 (.replaceFirst s (Pattern/quote (.toString ^CharSequence match))
rlm@10 121 (.toString ^CharSequence replacement))
rlm@10 122 (instance? Pattern match)
rlm@10 123 (if (instance? CharSequence replacement)
rlm@10 124 (.replaceFirst (re-matcher ^Pattern match s)
rlm@10 125 (.toString ^CharSequence replacement))
rlm@10 126 (replace-first-by s match replacement))
rlm@10 127 :else (throw (IllegalArgumentException. (str "Invalid match arg: " match))))))
rlm@10 128
rlm@10 129
rlm@10 130 (defn ^String join
rlm@10 131 "Returns a string of all elements in coll, separated by
rlm@10 132 an optional separator. Like Perl's join."
rlm@10 133 {:added "1.2"}
rlm@10 134 ([coll]
rlm@10 135 (apply str coll))
rlm@10 136 ([separator [x & more]]
rlm@10 137 (loop [sb (StringBuilder. (str x))
rlm@10 138 more more
rlm@10 139 sep (str separator)]
rlm@10 140 (if more
rlm@10 141 (recur (-> sb (.append sep) (.append (str (first more))))
rlm@10 142 (next more)
rlm@10 143 sep)
rlm@10 144 (str sb)))))
rlm@10 145
rlm@10 146 (defn ^String capitalize
rlm@10 147 "Converts first character of the string to upper-case, all other
rlm@10 148 characters to lower-case."
rlm@10 149 {:added "1.2"}
rlm@10 150 [^CharSequence s]
rlm@10 151 (let [s (.toString s)]
rlm@10 152 (if (< (count s) 2)
rlm@10 153 (.toUpperCase s)
rlm@10 154 (str (.toUpperCase (subs s 0 1))
rlm@10 155 (.toLowerCase (subs s 1))))))
rlm@10 156
rlm@10 157 (defn ^String upper-case
rlm@10 158 "Converts string to all upper-case."
rlm@10 159 {:added "1.2"}
rlm@10 160 [^CharSequence s]
rlm@10 161 (.. s toString toUpperCase))
rlm@10 162
rlm@10 163 (defn ^String lower-case
rlm@10 164 "Converts string to all lower-case."
rlm@10 165 {:added "1.2"}
rlm@10 166 [^CharSequence s]
rlm@10 167 (.. s toString toLowerCase))
rlm@10 168
rlm@10 169 (defn split
rlm@10 170 "Splits string on a regular expression. Optional argument limit is
rlm@10 171 the maximum number of splits. Not lazy. Returns vector of the splits."
rlm@10 172 {:added "1.2"}
rlm@10 173 ([^CharSequence s ^Pattern re]
rlm@10 174 (LazilyPersistentVector/createOwning (.split re s)))
rlm@10 175 ([ ^CharSequence s ^Pattern re limit]
rlm@10 176 (LazilyPersistentVector/createOwning (.split re s limit))))
rlm@10 177
rlm@10 178 (defn split-lines
rlm@10 179 "Splits s on \\n or \\r\\n."
rlm@10 180 {:added "1.2"}
rlm@10 181 [^CharSequence s]
rlm@10 182 (split s #"\r?\n"))
rlm@10 183
rlm@10 184 (defn ^String trim
rlm@10 185 "Removes whitespace from both ends of string."
rlm@10 186 {:added "1.2"}
rlm@10 187 [^CharSequence s]
rlm@10 188 (.. s toString trim))
rlm@10 189
rlm@10 190 (defn ^String triml
rlm@10 191 "Removes whitespace from the left side of string."
rlm@10 192 {:added "1.2"}
rlm@10 193 [^CharSequence s]
rlm@10 194 (loop [index (int 0)]
rlm@10 195 (if (= (.length s) index)
rlm@10 196 ""
rlm@10 197 (if (Character/isWhitespace (.charAt s index))
rlm@10 198 (recur (inc index))
rlm@10 199 (.. s (subSequence index (.length s)) toString)))))
rlm@10 200
rlm@10 201 (defn ^String trimr
rlm@10 202 "Removes whitespace from the right side of string."
rlm@10 203 {:added "1.2"}
rlm@10 204 [^CharSequence s]
rlm@10 205 (loop [index (.length s)]
rlm@10 206 (if (zero? index)
rlm@10 207 ""
rlm@10 208 (if (Character/isWhitespace (.charAt s (dec index)))
rlm@10 209 (recur (dec index))
rlm@10 210 (.. s (subSequence 0 index) toString)))))
rlm@10 211
rlm@10 212 (defn ^String trim-newline
rlm@10 213 "Removes all trailing newline \\n or return \\r characters from
rlm@10 214 string. Similar to Perl's chomp."
rlm@10 215 {:added "1.2"}
rlm@10 216 [^CharSequence s]
rlm@10 217 (loop [index (.length s)]
rlm@10 218 (if (zero? index)
rlm@10 219 ""
rlm@10 220 (let [ch (.charAt s (dec index))]
rlm@10 221 (if (or (= ch \newline) (= ch \return))
rlm@10 222 (recur (dec index))
rlm@10 223 (.. s (subSequence 0 index) toString))))))
rlm@10 224
rlm@10 225 (defn blank?
rlm@10 226 "True if s is nil, empty, or contains only whitespace."
rlm@10 227 {:added "1.2"}
rlm@10 228 [^CharSequence s]
rlm@10 229 (if s
rlm@10 230 (loop [index (int 0)]
rlm@10 231 (if (= (.length s) index)
rlm@10 232 true
rlm@10 233 (if (Character/isWhitespace (.charAt s index))
rlm@10 234 (recur (inc index))
rlm@10 235 false)))
rlm@10 236 true))
rlm@10 237
rlm@10 238 (defn ^String escape
rlm@10 239 "Return a new string, using cmap to escape each character ch
rlm@10 240 from s as follows:
rlm@10 241
rlm@10 242 If (cmap ch) is nil, append ch to the new string.
rlm@10 243 If (cmap ch) is non-nil, append (str (cmap ch)) instead."
rlm@10 244 {:added "1.2"}
rlm@10 245 [^CharSequence s cmap]
rlm@10 246 (loop [index (int 0)
rlm@10 247 buffer (StringBuilder. (.length s))]
rlm@10 248 (if (= (.length s) index)
rlm@10 249 (.toString buffer)
rlm@10 250 (let [ch (.charAt s index)]
rlm@10 251 (if-let [replacement (cmap ch)]
rlm@10 252 (.append buffer replacement)
rlm@10 253 (.append buffer ch))
rlm@10 254 (recur (inc index) buffer)))))