Mercurial > lasercutter
view src/clojure/contrib/str_utils2.clj @ 10:ef7dbbd6452c
added clojure source goodness
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sat, 21 Aug 2010 06:25:44 -0400 |
parents | |
children |
line wrap: on
line source
1 ;;; str_utils2.clj -- functional string utilities for Clojure3 ;; by Stuart Sierra, http://stuartsierra.com/4 ;; August 19, 20096 ;; Copyright (c) Stuart Sierra, 2009. All rights reserved. The use7 ;; and distribution terms for this software are covered by the Eclipse8 ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)9 ;; which can be found in the file epl-v10.html at the root of this10 ;; distribution. By using this software in any fashion, you are11 ;; agreeing to be bound by the terms of this license. You must not12 ;; remove this notice, or any other, from this software.14 ;; DEPRECATED in 1.2: Promoted to clojure.java.string. Note that15 ;; many function names and semantics have changed17 (ns ^{:author "Stuart Sierra"18 :deprecated "1.2"19 :doc "This is a library of string manipulation functions. It20 is intented as a replacement for clojure.contrib.str-utils.22 You cannot (use 'clojure.contrib.str-utils2) because it defines23 functions with the same names as functions in clojure.core.24 Instead, do (require '[clojure.contrib.str-utils2 :as s])25 or something similar.27 Goals:28 1. Be functional29 2. String argument first, to work with ->30 3. Performance linear in string length32 Some ideas are borrowed from33 http://github.com/francoisdevlin/devlinsf-clojure-utils/"}34 clojure.contrib.str-utils235 (:refer-clojure :exclude (take replace drop butlast partition36 contains? get repeat reverse partial))37 (:import (java.util.regex Pattern)))40 (defmacro dochars41 "bindings => [name string]43 Repeatedly executes body, with name bound to each character in44 string. Does NOT handle Unicode supplementary characters (above45 U+FFFF)."46 [bindings & body]47 (assert (vector bindings))48 (assert (= 2 (count bindings)))49 ;; This seems to be the fastest way to iterate over characters.50 `(let [^String s# ~(second bindings)]51 (dotimes [i# (.length s#)]52 (let [~(first bindings) (.charAt s# i#)]53 ~@body))))56 (defmacro docodepoints57 "bindings => [name string]59 Repeatedly executes body, with name bound to the integer code point60 of each Unicode character in the string. Handles Unicode61 supplementary characters (above U+FFFF) correctly."62 [bindings & body]63 (assert (vector bindings))64 (assert (= 2 (count bindings)))65 (let [character (first bindings)66 string (second bindings)]67 `(let [^String s# ~string68 len# (.length s#)]69 (loop [i# 0]70 (when (< i# len#)71 (let [~character (.charAt s# i#)]72 (if (Character/isHighSurrogate ~character)73 (let [~character (.codePointAt s# i#)]74 ~@body75 (recur (+ 2 i#)))76 (let [~character (int ~character)]77 ~@body78 (recur (inc i#))))))))))80 (defn codepoints81 "Returns a sequence of integer Unicode code points in s. Handles82 Unicode supplementary characters (above U+FFFF) correctly."83 [^String s]84 (let [len (.length s)85 f (fn thisfn [^String s i]86 (when (< i len)87 (let [c (.charAt s i)]88 (if (Character/isHighSurrogate c)89 (cons (.codePointAt s i) (thisfn s (+ 2 i)))90 (cons (int c) (thisfn s (inc i)))))))]91 (lazy-seq (f s 0))))93 (defn ^String escape94 "Returns a new String by applying cmap (a function or a map) to each95 character in s. If cmap returns nil, the original character is96 added to the output unchanged."97 [^String s cmap]98 (let [buffer (StringBuilder. (.length s))]99 (dochars [c s]100 (if-let [r (cmap c)]101 (.append buffer r)102 (.append buffer c)))103 (.toString buffer)))105 (defn blank?106 "True if s is nil, empty, or contains only whitespace."107 [^String s]108 (every? (fn [^Character c] (Character/isWhitespace c)) s))110 (defn ^String take111 "Take first n characters from s, up to the length of s.113 Note the argument order is the opposite of clojure.core/take; this114 is to keep the string as the first argument for use with ->"115 [^String s n]116 (if (< (count s) n)117 s118 (.substring s 0 n)))120 (defn ^String drop121 "Drops first n characters from s. Returns an empty string if n is122 greater than the length of s.124 Note the argument order is the opposite of clojure.core/drop; this125 is to keep the string as the first argument for use with ->"126 [^String s n]127 (if (< (count s) n)128 ""129 (.substring s n)))131 (defn ^String butlast132 "Returns s without the last n characters. Returns an empty string133 if n is greater than the length of s.135 Note the argument order is the opposite of clojure.core/butlast;136 this is to keep the string as the first argument for use with ->"137 [^String s n]138 (if (< (count s) n)139 ""140 (.substring s 0 (- (count s) n))))142 (defn ^String tail143 "Returns the last n characters of s."144 [^String s n]145 (if (< (count s) n)146 s147 (.substring s (- (count s) n))))149 (defn ^String repeat150 "Returns a new String containing s repeated n times."151 [^String s n]152 (apply str (clojure.core/repeat n s)))154 (defn ^String reverse155 "Returns s with its characters reversed."156 [^String s]157 (.toString (.reverse (StringBuilder. s))))159 (defmulti160 ^{:doc "Replaces all instances of pattern in string with replacement.162 Allowed argument types for pattern and replacement are:163 1. String and String164 2. Character and Character165 3. regex Pattern and String166 (Uses java.util.regex.Matcher.replaceAll)167 4. regex Pattern and function168 (Calls function with re-groups of each match, uses return169 value as replacement.)"170 :arglists '([string pattern replacement])171 :tag String}172 replace173 (fn [^String string pattern replacement]174 [(class pattern) (class replacement)]))176 (defmethod replace [String String] [^String s ^String a ^String b]177 (.replace s a b))179 (defmethod replace [Character Character] [^String s ^Character a ^Character b]180 (.replace s a b))182 (defmethod replace [Pattern String] [^String s re replacement]183 (.replaceAll (re-matcher re s) replacement))185 (defmethod replace [Pattern clojure.lang.IFn] [^String s re replacement]186 (let [m (re-matcher re s)]187 (let [buffer (StringBuffer. (.length s))]188 (loop []189 (if (.find m)190 (do (.appendReplacement m buffer (replacement (re-groups m)))191 (recur))192 (do (.appendTail m buffer)193 (.toString buffer)))))))195 (defmulti196 ^{:doc "Replaces the first instance of pattern in s with replacement.198 Allowed argument types for pattern and replacement are:199 1. String and String200 2. regex Pattern and String201 (Uses java.util.regex.Matcher.replaceAll)202 3. regex Pattern and function203 "204 :arglists '([s pattern replacement])205 :tag String}206 replace-first207 (fn [s pattern replacement]208 [(class pattern) (class replacement)]))210 (defmethod replace-first [String String] [^String s pattern replacement]211 (.replaceFirst (re-matcher (Pattern/quote pattern) s) replacement))213 (defmethod replace-first [Pattern String] [^String s re replacement]214 (.replaceFirst (re-matcher re s) replacement))216 (defmethod replace-first [Pattern clojure.lang.IFn] [^String s ^Pattern re f]217 (let [m (re-matcher re s)]218 (let [buffer (StringBuffer.)]219 (if (.find m)220 (let [rep (f (re-groups m))]221 (.appendReplacement m buffer rep)222 (.appendTail m buffer)223 (str buffer))))))225 (defn partition226 "Splits the string into a lazy sequence of substrings, alternating227 between substrings that match the patthern and the substrings228 between the matches. The sequence always starts with the substring229 before the first match, or an empty string if the beginning of the230 string matches.232 For example: (partition \"abc123def\" #\"[a-z]+\")233 returns: (\"\" \"abc\" \"123\" \"def\")"234 [^String s ^Pattern re]235 (let [m (re-matcher re s)]236 ((fn step [prevend]237 (lazy-seq238 (if (.find m)239 (cons (.subSequence s prevend (.start m))240 (cons (re-groups m)241 (step (+ (.start m) (count (.group m))))))242 (when (< prevend (.length s))243 (list (.subSequence s prevend (.length s)))))))244 0)))246 (defn ^String join247 "Returns a string of all elements in coll, separated by248 separator. Like Perl's join."249 [^String separator coll]250 (apply str (interpose separator coll)))252 (defn ^String chop253 "Removes the last character of string, does nothing on a zero-length254 string."255 [^String s]256 (let [size (count s)]257 (if (zero? size)258 s259 (subs s 0 (dec (count s))))))261 (defn ^String chomp262 "Removes all trailing newline \\n or return \\r characters from263 string. Note: String.trim() is similar and faster."264 [^String s]265 (replace s #"[\r\n]+$" ""))267 (defn title-case [^String s]268 (throw (Exception. "title-case not implemeted yet")))270 (defn ^String swap-case271 "Changes upper case characters to lower case and vice-versa.272 Handles Unicode supplementary characters correctly. Uses the273 locale-sensitive String.toUpperCase() and String.toLowerCase()274 methods."275 [^String s]276 (let [buffer (StringBuilder. (.length s))277 ;; array to make a String from one code point278 ^"[I" array (make-array Integer/TYPE 1)]279 (docodepoints [c s]280 (aset-int array 0 c)281 (if (Character/isLowerCase c)282 ;; Character.toUpperCase is not locale-sensitive, but283 ;; String.toUpperCase is; so we use a String.284 (.append buffer (.toUpperCase (String. array 0 1)))285 (.append buffer (.toLowerCase (String. array 0 1)))))286 (.toString buffer)))288 (defn ^String capitalize289 "Converts first character of the string to upper-case, all other290 characters to lower-case."291 [^String s]292 (if (< (count s) 2)293 (.toUpperCase s)294 (str (.toUpperCase ^String (subs s 0 1))295 (.toLowerCase ^String (subs s 1)))))297 (defn ^String ltrim298 "Removes whitespace from the left side of string."299 [^String s]300 (replace s #"^\s+" ""))302 (defn ^String rtrim303 "Removes whitespace from the right side of string."304 [^String s]305 (replace s #"\s+$" ""))307 (defn split-lines308 "Splits s on \\n or \\r\\n."309 [^String s]310 (seq (.split #"\r?\n" s)))312 ;; borrowed from compojure.str-utils, by James Reeves, EPL 1.0313 (defn ^String map-str314 "Apply f to each element of coll, concatenate all results into a315 String."316 [f coll]317 (apply str (map f coll)))319 ;; borrowed from compojure.str-utils, by James Reeves, EPL 1.0320 (defn grep321 "Filters elements of coll by a regular expression. The String322 representation (with str) of each element is tested with re-find."323 [re coll]324 (filter (fn [x] (re-find re (str x))) coll))326 (defn partial327 "Like clojure.core/partial for functions that take their primary328 argument first.330 Takes a function f and its arguments, NOT INCLUDING the first331 argument. Returns a new function whose first argument will be the332 first argument to f.334 Example: (str-utils2/partial str-utils2/take 2)335 ;;=> (fn [s] (str-utils2/take s 2))"336 [f & args]337 (fn [s & more] (apply f s (concat args more))))340 ;;; WRAPPERS342 ;; The following functions are simple wrappers around java.lang.String343 ;; functions. They are included here for completeness, and for use344 ;; when mapping over a collection of strings.346 (defn ^String upper-case347 "Converts string to all upper-case."348 [^String s]349 (.toUpperCase s))351 (defn ^String lower-case352 "Converts string to all lower-case."353 [^String s]354 (.toLowerCase s))356 (defn split357 "Splits string on a regular expression. Optional argument limit is358 the maximum number of splits."359 ([^String s ^Pattern re] (seq (.split re s)))360 ([^String s ^Pattern re limit] (seq (.split re s limit))))362 (defn ^String trim363 "Removes whitespace from both ends of string."364 [^String s]365 (.trim s))367 (defn ^String contains?368 "True if s contains the substring."369 [^String s substring]370 (.contains s substring))372 (defn ^String get373 "Gets the i'th character in string."374 [^String s i]375 (.charAt s i))