Mercurial > lasercutter
diff src/clojure/contrib/str_utils2.clj @ 10:ef7dbbd6452c
added clojure source goodness
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sat, 21 Aug 2010 06:25:44 -0400 |
parents | |
children |
line wrap: on
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/clojure/contrib/str_utils2.clj Sat Aug 21 06:25:44 2010 -0400 1.3 @@ -0,0 +1,376 @@ 1.4 +;;; str_utils2.clj -- functional string utilities for Clojure 1.5 + 1.6 +;; by Stuart Sierra, http://stuartsierra.com/ 1.7 +;; August 19, 2009 1.8 + 1.9 +;; Copyright (c) Stuart Sierra, 2009. All rights reserved. The use 1.10 +;; and distribution terms for this software are covered by the Eclipse 1.11 +;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) 1.12 +;; which can be found in the file epl-v10.html at the root of this 1.13 +;; distribution. By using this software in any fashion, you are 1.14 +;; agreeing to be bound by the terms of this license. You must not 1.15 +;; remove this notice, or any other, from this software. 1.16 + 1.17 +;; DEPRECATED in 1.2: Promoted to clojure.java.string. Note that 1.18 +;; many function names and semantics have changed 1.19 + 1.20 +(ns ^{:author "Stuart Sierra" 1.21 + :deprecated "1.2" 1.22 + :doc "This is a library of string manipulation functions. It 1.23 + is intented as a replacement for clojure.contrib.str-utils. 1.24 + 1.25 + You cannot (use 'clojure.contrib.str-utils2) because it defines 1.26 + functions with the same names as functions in clojure.core. 1.27 + Instead, do (require '[clojure.contrib.str-utils2 :as s]) 1.28 + or something similar. 1.29 + 1.30 + Goals: 1.31 + 1. Be functional 1.32 + 2. String argument first, to work with -> 1.33 + 3. Performance linear in string length 1.34 + 1.35 + Some ideas are borrowed from 1.36 + http://github.com/francoisdevlin/devlinsf-clojure-utils/"} 1.37 + clojure.contrib.str-utils2 1.38 + (:refer-clojure :exclude (take replace drop butlast partition 1.39 + contains? get repeat reverse partial)) 1.40 + (:import (java.util.regex Pattern))) 1.41 + 1.42 + 1.43 +(defmacro dochars 1.44 + "bindings => [name string] 1.45 + 1.46 + Repeatedly executes body, with name bound to each character in 1.47 + string. Does NOT handle Unicode supplementary characters (above 1.48 + U+FFFF)." 1.49 + [bindings & body] 1.50 + (assert (vector bindings)) 1.51 + (assert (= 2 (count bindings))) 1.52 + ;; This seems to be the fastest way to iterate over characters. 1.53 + `(let [^String s# ~(second bindings)] 1.54 + (dotimes [i# (.length s#)] 1.55 + (let [~(first bindings) (.charAt s# i#)] 1.56 + ~@body)))) 1.57 + 1.58 + 1.59 +(defmacro docodepoints 1.60 + "bindings => [name string] 1.61 + 1.62 + Repeatedly executes body, with name bound to the integer code point 1.63 + of each Unicode character in the string. Handles Unicode 1.64 + supplementary characters (above U+FFFF) correctly." 1.65 + [bindings & body] 1.66 + (assert (vector bindings)) 1.67 + (assert (= 2 (count bindings))) 1.68 + (let [character (first bindings) 1.69 + string (second bindings)] 1.70 + `(let [^String s# ~string 1.71 + len# (.length s#)] 1.72 + (loop [i# 0] 1.73 + (when (< i# len#) 1.74 + (let [~character (.charAt s# i#)] 1.75 + (if (Character/isHighSurrogate ~character) 1.76 + (let [~character (.codePointAt s# i#)] 1.77 + ~@body 1.78 + (recur (+ 2 i#))) 1.79 + (let [~character (int ~character)] 1.80 + ~@body 1.81 + (recur (inc i#)))))))))) 1.82 + 1.83 +(defn codepoints 1.84 + "Returns a sequence of integer Unicode code points in s. Handles 1.85 + Unicode supplementary characters (above U+FFFF) correctly." 1.86 + [^String s] 1.87 + (let [len (.length s) 1.88 + f (fn thisfn [^String s i] 1.89 + (when (< i len) 1.90 + (let [c (.charAt s i)] 1.91 + (if (Character/isHighSurrogate c) 1.92 + (cons (.codePointAt s i) (thisfn s (+ 2 i))) 1.93 + (cons (int c) (thisfn s (inc i)))))))] 1.94 + (lazy-seq (f s 0)))) 1.95 + 1.96 +(defn ^String escape 1.97 + "Returns a new String by applying cmap (a function or a map) to each 1.98 + character in s. If cmap returns nil, the original character is 1.99 + added to the output unchanged." 1.100 + [^String s cmap] 1.101 + (let [buffer (StringBuilder. (.length s))] 1.102 + (dochars [c s] 1.103 + (if-let [r (cmap c)] 1.104 + (.append buffer r) 1.105 + (.append buffer c))) 1.106 + (.toString buffer))) 1.107 + 1.108 +(defn blank? 1.109 + "True if s is nil, empty, or contains only whitespace." 1.110 + [^String s] 1.111 + (every? (fn [^Character c] (Character/isWhitespace c)) s)) 1.112 + 1.113 +(defn ^String take 1.114 + "Take first n characters from s, up to the length of s. 1.115 + 1.116 + Note the argument order is the opposite of clojure.core/take; this 1.117 + is to keep the string as the first argument for use with ->" 1.118 + [^String s n] 1.119 + (if (< (count s) n) 1.120 + s 1.121 + (.substring s 0 n))) 1.122 + 1.123 +(defn ^String drop 1.124 + "Drops first n characters from s. Returns an empty string if n is 1.125 + greater than the length of s. 1.126 + 1.127 + Note the argument order is the opposite of clojure.core/drop; this 1.128 + is to keep the string as the first argument for use with ->" 1.129 + [^String s n] 1.130 + (if (< (count s) n) 1.131 + "" 1.132 + (.substring s n))) 1.133 + 1.134 +(defn ^String butlast 1.135 + "Returns s without the last n characters. Returns an empty string 1.136 + if n is greater than the length of s. 1.137 + 1.138 + Note the argument order is the opposite of clojure.core/butlast; 1.139 + this is to keep the string as the first argument for use with ->" 1.140 + [^String s n] 1.141 + (if (< (count s) n) 1.142 + "" 1.143 + (.substring s 0 (- (count s) n)))) 1.144 + 1.145 +(defn ^String tail 1.146 + "Returns the last n characters of s." 1.147 + [^String s n] 1.148 + (if (< (count s) n) 1.149 + s 1.150 + (.substring s (- (count s) n)))) 1.151 + 1.152 +(defn ^String repeat 1.153 + "Returns a new String containing s repeated n times." 1.154 + [^String s n] 1.155 + (apply str (clojure.core/repeat n s))) 1.156 + 1.157 +(defn ^String reverse 1.158 + "Returns s with its characters reversed." 1.159 + [^String s] 1.160 + (.toString (.reverse (StringBuilder. s)))) 1.161 + 1.162 +(defmulti 1.163 + ^{:doc "Replaces all instances of pattern in string with replacement. 1.164 + 1.165 + Allowed argument types for pattern and replacement are: 1.166 + 1. String and String 1.167 + 2. Character and Character 1.168 + 3. regex Pattern and String 1.169 + (Uses java.util.regex.Matcher.replaceAll) 1.170 + 4. regex Pattern and function 1.171 + (Calls function with re-groups of each match, uses return 1.172 + value as replacement.)" 1.173 + :arglists '([string pattern replacement]) 1.174 + :tag String} 1.175 + replace 1.176 + (fn [^String string pattern replacement] 1.177 + [(class pattern) (class replacement)])) 1.178 + 1.179 +(defmethod replace [String String] [^String s ^String a ^String b] 1.180 + (.replace s a b)) 1.181 + 1.182 +(defmethod replace [Character Character] [^String s ^Character a ^Character b] 1.183 + (.replace s a b)) 1.184 + 1.185 +(defmethod replace [Pattern String] [^String s re replacement] 1.186 + (.replaceAll (re-matcher re s) replacement)) 1.187 + 1.188 +(defmethod replace [Pattern clojure.lang.IFn] [^String s re replacement] 1.189 + (let [m (re-matcher re s)] 1.190 + (let [buffer (StringBuffer. (.length s))] 1.191 + (loop [] 1.192 + (if (.find m) 1.193 + (do (.appendReplacement m buffer (replacement (re-groups m))) 1.194 + (recur)) 1.195 + (do (.appendTail m buffer) 1.196 + (.toString buffer))))))) 1.197 + 1.198 +(defmulti 1.199 + ^{:doc "Replaces the first instance of pattern in s with replacement. 1.200 + 1.201 + Allowed argument types for pattern and replacement are: 1.202 + 1. String and String 1.203 + 2. regex Pattern and String 1.204 + (Uses java.util.regex.Matcher.replaceAll) 1.205 + 3. regex Pattern and function 1.206 +" 1.207 + :arglists '([s pattern replacement]) 1.208 + :tag String} 1.209 + replace-first 1.210 + (fn [s pattern replacement] 1.211 + [(class pattern) (class replacement)])) 1.212 + 1.213 +(defmethod replace-first [String String] [^String s pattern replacement] 1.214 + (.replaceFirst (re-matcher (Pattern/quote pattern) s) replacement)) 1.215 + 1.216 +(defmethod replace-first [Pattern String] [^String s re replacement] 1.217 + (.replaceFirst (re-matcher re s) replacement)) 1.218 + 1.219 +(defmethod replace-first [Pattern clojure.lang.IFn] [^String s ^Pattern re f] 1.220 + (let [m (re-matcher re s)] 1.221 + (let [buffer (StringBuffer.)] 1.222 + (if (.find m) 1.223 + (let [rep (f (re-groups m))] 1.224 + (.appendReplacement m buffer rep) 1.225 + (.appendTail m buffer) 1.226 + (str buffer)))))) 1.227 + 1.228 +(defn partition 1.229 + "Splits the string into a lazy sequence of substrings, alternating 1.230 + between substrings that match the patthern and the substrings 1.231 + between the matches. The sequence always starts with the substring 1.232 + before the first match, or an empty string if the beginning of the 1.233 + string matches. 1.234 + 1.235 + For example: (partition \"abc123def\" #\"[a-z]+\") 1.236 + returns: (\"\" \"abc\" \"123\" \"def\")" 1.237 + [^String s ^Pattern re] 1.238 + (let [m (re-matcher re s)] 1.239 + ((fn step [prevend] 1.240 + (lazy-seq 1.241 + (if (.find m) 1.242 + (cons (.subSequence s prevend (.start m)) 1.243 + (cons (re-groups m) 1.244 + (step (+ (.start m) (count (.group m)))))) 1.245 + (when (< prevend (.length s)) 1.246 + (list (.subSequence s prevend (.length s))))))) 1.247 + 0))) 1.248 + 1.249 +(defn ^String join 1.250 + "Returns a string of all elements in coll, separated by 1.251 + separator. Like Perl's join." 1.252 + [^String separator coll] 1.253 + (apply str (interpose separator coll))) 1.254 + 1.255 +(defn ^String chop 1.256 + "Removes the last character of string, does nothing on a zero-length 1.257 + string." 1.258 + [^String s] 1.259 + (let [size (count s)] 1.260 + (if (zero? size) 1.261 + s 1.262 + (subs s 0 (dec (count s)))))) 1.263 + 1.264 +(defn ^String chomp 1.265 + "Removes all trailing newline \\n or return \\r characters from 1.266 + string. Note: String.trim() is similar and faster." 1.267 + [^String s] 1.268 + (replace s #"[\r\n]+$" "")) 1.269 + 1.270 +(defn title-case [^String s] 1.271 + (throw (Exception. "title-case not implemeted yet"))) 1.272 + 1.273 +(defn ^String swap-case 1.274 + "Changes upper case characters to lower case and vice-versa. 1.275 + Handles Unicode supplementary characters correctly. Uses the 1.276 + locale-sensitive String.toUpperCase() and String.toLowerCase() 1.277 + methods." 1.278 + [^String s] 1.279 + (let [buffer (StringBuilder. (.length s)) 1.280 + ;; array to make a String from one code point 1.281 + ^"[I" array (make-array Integer/TYPE 1)] 1.282 + (docodepoints [c s] 1.283 + (aset-int array 0 c) 1.284 + (if (Character/isLowerCase c) 1.285 + ;; Character.toUpperCase is not locale-sensitive, but 1.286 + ;; String.toUpperCase is; so we use a String. 1.287 + (.append buffer (.toUpperCase (String. array 0 1))) 1.288 + (.append buffer (.toLowerCase (String. array 0 1))))) 1.289 + (.toString buffer))) 1.290 + 1.291 +(defn ^String capitalize 1.292 + "Converts first character of the string to upper-case, all other 1.293 + characters to lower-case." 1.294 + [^String s] 1.295 + (if (< (count s) 2) 1.296 + (.toUpperCase s) 1.297 + (str (.toUpperCase ^String (subs s 0 1)) 1.298 + (.toLowerCase ^String (subs s 1))))) 1.299 + 1.300 +(defn ^String ltrim 1.301 + "Removes whitespace from the left side of string." 1.302 + [^String s] 1.303 + (replace s #"^\s+" "")) 1.304 + 1.305 +(defn ^String rtrim 1.306 + "Removes whitespace from the right side of string." 1.307 + [^String s] 1.308 + (replace s #"\s+$" "")) 1.309 + 1.310 +(defn split-lines 1.311 + "Splits s on \\n or \\r\\n." 1.312 + [^String s] 1.313 + (seq (.split #"\r?\n" s))) 1.314 + 1.315 +;; borrowed from compojure.str-utils, by James Reeves, EPL 1.0 1.316 +(defn ^String map-str 1.317 + "Apply f to each element of coll, concatenate all results into a 1.318 + String." 1.319 + [f coll] 1.320 + (apply str (map f coll))) 1.321 + 1.322 +;; borrowed from compojure.str-utils, by James Reeves, EPL 1.0 1.323 +(defn grep 1.324 + "Filters elements of coll by a regular expression. The String 1.325 + representation (with str) of each element is tested with re-find." 1.326 + [re coll] 1.327 + (filter (fn [x] (re-find re (str x))) coll)) 1.328 + 1.329 +(defn partial 1.330 + "Like clojure.core/partial for functions that take their primary 1.331 + argument first. 1.332 + 1.333 + Takes a function f and its arguments, NOT INCLUDING the first 1.334 + argument. Returns a new function whose first argument will be the 1.335 + first argument to f. 1.336 + 1.337 + Example: (str-utils2/partial str-utils2/take 2) 1.338 + ;;=> (fn [s] (str-utils2/take s 2))" 1.339 + [f & args] 1.340 + (fn [s & more] (apply f s (concat args more)))) 1.341 + 1.342 + 1.343 +;;; WRAPPERS 1.344 + 1.345 +;; The following functions are simple wrappers around java.lang.String 1.346 +;; functions. They are included here for completeness, and for use 1.347 +;; when mapping over a collection of strings. 1.348 + 1.349 +(defn ^String upper-case 1.350 + "Converts string to all upper-case." 1.351 + [^String s] 1.352 + (.toUpperCase s)) 1.353 + 1.354 +(defn ^String lower-case 1.355 + "Converts string to all lower-case." 1.356 + [^String s] 1.357 + (.toLowerCase s)) 1.358 + 1.359 +(defn split 1.360 + "Splits string on a regular expression. Optional argument limit is 1.361 + the maximum number of splits." 1.362 + ([^String s ^Pattern re] (seq (.split re s))) 1.363 + ([^String s ^Pattern re limit] (seq (.split re s limit)))) 1.364 + 1.365 +(defn ^String trim 1.366 + "Removes whitespace from both ends of string." 1.367 + [^String s] 1.368 + (.trim s)) 1.369 + 1.370 +(defn ^String contains? 1.371 + "True if s contains the substring." 1.372 + [^String s substring] 1.373 + (.contains s substring)) 1.374 + 1.375 +(defn ^String get 1.376 + "Gets the i'th character in string." 1.377 + [^String s i] 1.378 + (.charAt s i)) 1.379 +