Mercurial > lasercutter
diff src/clojure/contrib/string.clj @ 10:ef7dbbd6452c
added clojure source goodness
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sat, 21 Aug 2010 06:25:44 -0400 |
parents | |
children |
line wrap: on
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/clojure/contrib/string.clj Sat Aug 21 06:25:44 2010 -0400 1.3 @@ -0,0 +1,382 @@ 1.4 +;;; string.clj -- functional string utilities for Clojure 1.5 + 1.6 +;; by Stuart Sierra, http://stuartsierra.com/ 1.7 +;; January 26, 2010 1.8 + 1.9 +;; Copyright (c) Stuart Sierra, 2010. All rights reserved. The use 1.10 +;; and distribution terms for this software are covered by the Eclipse 1.11 +;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 1.12 +;; which can be found in the file epl-v10.html at the root of this 1.13 +;; distribution. By using this software in any fashion, you are 1.14 +;; agreeing to be bound by the terms of this license. You must not 1.15 +;; remove this notice, or any other, from this software. 1.16 + 1.17 +;; DEPRECATED in 1.2: Many functions have moved to clojure.string. 1.18 + 1.19 +(ns ^{:author "Stuart Sierra" 1.20 + :doc "This is a library of string manipulation functions. It 1.21 + is intented as a replacement for clojure.contrib.string. 1.22 + 1.23 + You cannot (use 'clojure.contrib.string) because it defines 1.24 + functions with the same names as functions in clojure.core. 1.25 + Instead, do (require '[clojure.contrib.string :as s]) 1.26 + or something similar. 1.27 + 1.28 + Goals: 1.29 + 1. Be functional 1.30 + 2. Most significant argument LAST, to work with ->> 1.31 + 3. At least O(n) performance for Strings of length n 1.32 + 1.33 + Some ideas are borrowed from 1.34 + http://github.com/francoisdevlin/devlinsf-clojure-utils/"} 1.35 + clojure.contrib.string 1.36 + (:refer-clojure :exclude (take replace drop butlast partition 1.37 + contains? get repeat reverse partial)) 1.38 + (:import (java.util.regex Pattern))) 1.39 + 1.40 + 1.41 +(defmacro dochars 1.42 + "bindings => [name string] 1.43 + 1.44 + Repeatedly executes body, with name bound to each character in 1.45 + string. Does NOT handle Unicode supplementary characters (above 1.46 + U+FFFF)." 1.47 + [bindings & body] 1.48 + (assert (vector bindings)) 1.49 + (assert (= 2 (count bindings))) 1.50 + ;; This seems to be the fastest way to iterate over characters. 1.51 + `(let [^String s# ~(second bindings)] 1.52 + (dotimes [i# (.length s#)] 1.53 + (let [~(first bindings) (.charAt s# i#)] 1.54 + ~@body)))) 1.55 + 1.56 + 1.57 +(defmacro docodepoints 1.58 + "bindings => [name string] 1.59 + 1.60 + Repeatedly executes body, with name bound to the integer code point 1.61 + of each Unicode character in the string. Handles Unicode 1.62 + supplementary characters (above U+FFFF) correctly." 1.63 + [bindings & body] 1.64 + (assert (vector bindings)) 1.65 + (assert (= 2 (count bindings))) 1.66 + (let [character (first bindings) 1.67 + string (second bindings)] 1.68 + `(let [^String s# ~string 1.69 + len# (.length s#)] 1.70 + (loop [i# 0] 1.71 + (when (< i# len#) 1.72 + (let [~character (.charAt s# i#)] 1.73 + (if (Character/isHighSurrogate ~character) 1.74 + (let [~character (.codePointAt s# i#)] 1.75 + ~@body 1.76 + (recur (+ 2 i#))) 1.77 + (let [~character (int ~character)] 1.78 + ~@body 1.79 + (recur (inc i#)))))))))) 1.80 + 1.81 +(defn codepoints 1.82 + "Returns a sequence of integer Unicode code points in s. Handles 1.83 + Unicode supplementary characters (above U+FFFF) correctly." 1.84 + [^String s] 1.85 + (let [len (.length s) 1.86 + f (fn thisfn [^String s i] 1.87 + (when (< i len) 1.88 + (let [c (.charAt s i)] 1.89 + (if (Character/isHighSurrogate c) 1.90 + (cons (.codePointAt s i) (thisfn s (+ 2 i))) 1.91 + (cons (int c) (thisfn s (inc i)))))))] 1.92 + (lazy-seq (f s 0)))) 1.93 + 1.94 +(defn ^String escape 1.95 + "Returns a new String by applying cmap (a function or a map) to each 1.96 + character in s. If cmap returns nil, the original character is 1.97 + added to the output unchanged." 1.98 + {:deprecated "1.2"} 1.99 + [cmap ^String s] 1.100 + (let [buffer (StringBuilder. (.length s))] 1.101 + (dochars [c s] 1.102 + (if-let [r (cmap c)] 1.103 + (.append buffer r) 1.104 + (.append buffer c))) 1.105 + (.toString buffer))) 1.106 + 1.107 +(defn blank? 1.108 + "True if s is nil, empty, or contains only whitespace." 1.109 + {:deprecated "1.2"} 1.110 + [^String s] 1.111 + (every? (fn [^Character c] (Character/isWhitespace c)) s)) 1.112 + 1.113 +(defn ^String take 1.114 + "Take first n characters from s, up to the length of s." 1.115 + [n ^String s] 1.116 + (if (< (count s) n) 1.117 + s 1.118 + (.substring s 0 n))) 1.119 + 1.120 +(defn ^String drop 1.121 + "Drops first n characters from s. Returns an empty string if n is 1.122 + greater than the length of s." 1.123 + [n ^String s] 1.124 + (if (< (count s) n) 1.125 + "" 1.126 + (.substring s n))) 1.127 + 1.128 +(defn ^String butlast 1.129 + "Returns s without the last n characters. Returns an empty string 1.130 + if n is greater than the length of s." 1.131 + [n ^String s] 1.132 + (if (< (count s) n) 1.133 + "" 1.134 + (.substring s 0 (- (count s) n)))) 1.135 + 1.136 +(defn ^String tail 1.137 + "Returns the last n characters of s." 1.138 + [n ^String s] 1.139 + (if (< (count s) n) 1.140 + s 1.141 + (.substring s (- (count s) n)))) 1.142 + 1.143 +(defn ^String repeat 1.144 + "Returns a new String containing s repeated n times." 1.145 + [n ^String s] 1.146 + (apply str (clojure.core/repeat n s))) 1.147 + 1.148 +(defn ^String reverse 1.149 + "Returns s with its characters reversed." 1.150 + {:deprecated "1.2"} 1.151 + [^String s] 1.152 + (.toString (.reverse (StringBuilder. s)))) 1.153 + 1.154 +(defn replace-str 1.155 + "Replaces all instances of substring a with b in s." 1.156 + {:deprecated "1.2"} 1.157 + [^String a ^String b ^String s] 1.158 + (.replace s a b)) 1.159 + 1.160 +(defn replace-char 1.161 + "Replaces all instances of character a with character b in s." 1.162 + {:deprecated "1.2"} 1.163 + [^Character a ^Character b ^String s] 1.164 + (.replace s a b)) 1.165 + 1.166 +(defn replace-re 1.167 + "Replaces all matches of re with replacement in s." 1.168 + {:deprecated "1.2"} 1.169 + [re replacement ^String s] 1.170 + (.replaceAll (re-matcher re s) replacement)) 1.171 + 1.172 +(defn replace-by 1.173 + "Replaces all matches of re in s with the result of 1.174 + (f (re-groups the-match))." 1.175 + {:deprecated "1.2"} 1.176 + [re f ^String s] 1.177 + (let [m (re-matcher re s)] 1.178 + (let [buffer (StringBuffer. (.length s))] 1.179 + (loop [] 1.180 + (if (.find m) 1.181 + (do (.appendReplacement m buffer (f (re-groups m))) 1.182 + (recur)) 1.183 + (do (.appendTail m buffer) 1.184 + (.toString buffer))))))) 1.185 + 1.186 +(defn replace-first-str 1.187 + "Replace first occurance of substring a with b in s." 1.188 + {:deprecated "1.2"} 1.189 + [^String a ^String b ^String s] 1.190 + (.replaceFirst (re-matcher (Pattern/quote a) s) b)) 1.191 + 1.192 +(defn replace-first-re 1.193 + "Replace first match of re in s." 1.194 + {:deprecated "1.2"} 1.195 + [^Pattern re ^String replacement ^String s] 1.196 + (.replaceFirst (re-matcher re s) replacement)) 1.197 + 1.198 +(defn replace-first-by 1.199 + "Replace first match of re in s with the result of 1.200 + (f (re-groups the-match))." 1.201 + {:deprecated "1.2"} 1.202 + [^Pattern re f ^String s] 1.203 + (let [m (re-matcher re s)] 1.204 + (let [buffer (StringBuffer.)] 1.205 + (if (.find m) 1.206 + (let [rep (f (re-groups m))] 1.207 + (.appendReplacement m buffer rep) 1.208 + (.appendTail m buffer) 1.209 + (str buffer)))))) 1.210 + 1.211 +(defn partition 1.212 + "Splits the string into a lazy sequence of substrings, alternating 1.213 + between substrings that match the patthern and the substrings 1.214 + between the matches. The sequence always starts with the substring 1.215 + before the first match, or an empty string if the beginning of the 1.216 + string matches. 1.217 + 1.218 + For example: (partition #\"[a-z]+\" \"abc123def\") 1.219 + returns: (\"\" \"abc\" \"123\" \"def\")" 1.220 + [^Pattern re ^String s] 1.221 + (let [m (re-matcher re s)] 1.222 + ((fn step [prevend] 1.223 + (lazy-seq 1.224 + (if (.find m) 1.225 + (cons (.subSequence s prevend (.start m)) 1.226 + (cons (re-groups m) 1.227 + (step (+ (.start m) (count (.group m)))))) 1.228 + (when (< prevend (.length s)) 1.229 + (list (.subSequence s prevend (.length s))))))) 1.230 + 0))) 1.231 + 1.232 +(defn ^String join 1.233 + "Returns a string of all elements in coll, separated by 1.234 + separator. Like Perl's join." 1.235 + {:deprecated "1.2"} 1.236 + [^String separator coll] 1.237 + (apply str (interpose separator coll))) 1.238 + 1.239 +(defn ^String chop 1.240 + "Removes the last character of string, does nothing on a zero-length 1.241 + string." 1.242 + [^String s] 1.243 + (let [size (count s)] 1.244 + (if (zero? size) 1.245 + s 1.246 + (subs s 0 (dec (count s)))))) 1.247 + 1.248 +(defn ^String chomp 1.249 + "Removes all trailing newline \\n or return \\r characters from 1.250 + string. Note: String.trim() is similar and faster. 1.251 + Deprecated in 1.2. Use clojure.string/trim-newline" 1.252 + {:deprecated "1.2"} 1.253 + [^String s] 1.254 + (replace-re #"[\r\n]+$" "" s)) 1.255 + 1.256 +(defn ^String swap-case 1.257 + "Changes upper case characters to lower case and vice-versa. 1.258 + Handles Unicode supplementary characters correctly. Uses the 1.259 + locale-sensitive String.toUpperCase() and String.toLowerCase() 1.260 + methods." 1.261 + [^String s] 1.262 + (let [buffer (StringBuilder. (.length s)) 1.263 + ;; array to make a String from one code point 1.264 + ^"[I" array (make-array Integer/TYPE 1)] 1.265 + (docodepoints [c s] 1.266 + (aset-int array 0 c) 1.267 + (if (Character/isLowerCase c) 1.268 + ;; Character.toUpperCase is not locale-sensitive, but 1.269 + ;; String.toUpperCase is; so we use a String. 1.270 + (.append buffer (.toUpperCase (String. array 0 1))) 1.271 + (.append buffer (.toLowerCase (String. array 0 1))))) 1.272 + (.toString buffer))) 1.273 + 1.274 +(defn ^String capitalize 1.275 + "Converts first character of the string to upper-case, all other 1.276 + characters to lower-case." 1.277 + {:deprecated "1.2"} 1.278 + [^String s] 1.279 + (if (< (count s) 2) 1.280 + (.toUpperCase s) 1.281 + (str (.toUpperCase ^String (subs s 0 1)) 1.282 + (.toLowerCase ^String (subs s 1))))) 1.283 + 1.284 +(defn ^String ltrim 1.285 + "Removes whitespace from the left side of string. 1.286 + Deprecated in 1.2. Use clojure.string/triml." 1.287 + {:deprecated "1.2"} 1.288 + [^String s] 1.289 + (replace-re #"^\s+" "" s)) 1.290 + 1.291 +(defn ^String rtrim 1.292 + "Removes whitespace from the right side of string. 1.293 + Deprecated in 1.2. Use clojure.string/trimr." 1.294 + {:deprecated "1.2"} 1.295 + [^String s] 1.296 + (replace-re #"\s+$" "" s)) 1.297 + 1.298 +(defn split-lines 1.299 + "Splits s on \\n or \\r\\n." 1.300 + {:deprecated "1.2"} 1.301 + [^String s] 1.302 + (seq (.split #"\r?\n" s))) 1.303 + 1.304 +;; borrowed from compojure.string, by James Reeves, EPL 1.0 1.305 +(defn ^String map-str 1.306 + "Apply f to each element of coll, concatenate all results into a 1.307 + String." 1.308 + [f coll] 1.309 + (apply str (map f coll))) 1.310 + 1.311 +;; borrowed from compojure.string, by James Reeves, EPL 1.0 1.312 +(defn grep 1.313 + "Filters elements of coll by a regular expression. The String 1.314 + representation (with str) of each element is tested with re-find." 1.315 + [re coll] 1.316 + (filter (fn [x] (re-find re (str x))) coll)) 1.317 + 1.318 +(defn as-str 1.319 + "Like clojure.core/str, but if an argument is a keyword or symbol, 1.320 + its name will be used instead of its literal representation. 1.321 + 1.322 + Example: 1.323 + (str :foo :bar) ;;=> \":foo:bar\" 1.324 + (as-str :foo :bar) ;;=> \"foobar\" 1.325 + 1.326 + Note that this does not apply to keywords or symbols nested within 1.327 + data structures; they will be rendered as with str. 1.328 + 1.329 + Example: 1.330 + (str {:foo :bar}) ;;=> \"{:foo :bar}\" 1.331 + (as-str {:foo :bar}) ;;=> \"{:foo :bar}\" " 1.332 + ([] "") 1.333 + ([x] (if (instance? clojure.lang.Named x) 1.334 + (name x) 1.335 + (str x))) 1.336 + ([x & ys] 1.337 + ((fn [^StringBuilder sb more] 1.338 + (if more 1.339 + (recur (. sb (append (as-str (first more)))) (next more)) 1.340 + (str sb))) 1.341 + (new StringBuilder ^String (as-str x)) ys))) 1.342 + 1.343 + 1.344 +;;; WRAPPERS 1.345 + 1.346 +;; The following functions are simple wrappers around java.lang.String 1.347 +;; functions. They are included here for completeness, and for use 1.348 +;; when mapping over a collection of strings. 1.349 + 1.350 +(defn ^String upper-case 1.351 + "Converts string to all upper-case." 1.352 + {:deprecated "1.2"} 1.353 + [^String s] 1.354 + (.toUpperCase s)) 1.355 + 1.356 +(defn ^String lower-case 1.357 + "Converts string to all lower-case." 1.358 + {:deprecated "1.2"} 1.359 + [^String s] 1.360 + (.toLowerCase s)) 1.361 + 1.362 +(defn split 1.363 + "Splits string on a regular expression. Optional argument limit is 1.364 + the maximum number of splits." 1.365 + {:deprecated "1.2"} 1.366 + ([^Pattern re ^String s] (seq (.split re s))) 1.367 + ([^Pattern re limit ^String s] (seq (.split re s limit)))) 1.368 + 1.369 +(defn ^String trim 1.370 + "Removes whitespace from both ends of string." 1.371 + {:deprecated "1.2"} 1.372 + [^String s] 1.373 + (.trim s)) 1.374 + 1.375 +(defn ^String substring? 1.376 + "True if s contains the substring." 1.377 + [substring ^String s] 1.378 + (.contains s substring)) 1.379 + 1.380 +(defn ^String get 1.381 + "Gets the i'th character in string." 1.382 + {:deprecated "1.2"} 1.383 + [^String s i] 1.384 + (.charAt s i)) 1.385 +