Mercurial > lasercutter
view src/clojure/contrib/string.clj @ 10:ef7dbbd6452c
added clojure source goodness
author | Robert McIntyre <rlm@mit.edu> |
---|---|
date | Sat, 21 Aug 2010 06:25:44 -0400 |
parents | |
children |
line wrap: on
line source
1 ;;; string.clj -- functional string utilities for Clojure3 ;; by Stuart Sierra, http://stuartsierra.com/4 ;; January 26, 20106 ;; Copyright (c) Stuart Sierra, 2010. All rights reserved. The use7 ;; and distribution terms for this software are covered by the Eclipse8 ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)9 ;; which can be found in the file epl-v10.html at the root of this10 ;; distribution. By using this software in any fashion, you are11 ;; agreeing to be bound by the terms of this license. You must not12 ;; remove this notice, or any other, from this software.14 ;; DEPRECATED in 1.2: Many functions have moved to clojure.string.16 (ns ^{:author "Stuart Sierra"17 :doc "This is a library of string manipulation functions. It18 is intented as a replacement for clojure.contrib.string.20 You cannot (use 'clojure.contrib.string) because it defines21 functions with the same names as functions in clojure.core.22 Instead, do (require '[clojure.contrib.string :as s])23 or something similar.25 Goals:26 1. Be functional27 2. Most significant argument LAST, to work with ->>28 3. At least O(n) performance for Strings of length n30 Some ideas are borrowed from31 http://github.com/francoisdevlin/devlinsf-clojure-utils/"}32 clojure.contrib.string33 (:refer-clojure :exclude (take replace drop butlast partition34 contains? get repeat reverse partial))35 (:import (java.util.regex Pattern)))38 (defmacro dochars39 "bindings => [name string]41 Repeatedly executes body, with name bound to each character in42 string. Does NOT handle Unicode supplementary characters (above43 U+FFFF)."44 [bindings & body]45 (assert (vector bindings))46 (assert (= 2 (count bindings)))47 ;; This seems to be the fastest way to iterate over characters.48 `(let [^String s# ~(second bindings)]49 (dotimes [i# (.length s#)]50 (let [~(first bindings) (.charAt s# i#)]51 ~@body))))54 (defmacro docodepoints55 "bindings => [name string]57 Repeatedly executes body, with name bound to the integer code point58 of each Unicode character in the string. Handles Unicode59 supplementary characters (above U+FFFF) correctly."60 [bindings & body]61 (assert (vector bindings))62 (assert (= 2 (count bindings)))63 (let [character (first bindings)64 string (second bindings)]65 `(let [^String s# ~string66 len# (.length s#)]67 (loop [i# 0]68 (when (< i# len#)69 (let [~character (.charAt s# i#)]70 (if (Character/isHighSurrogate ~character)71 (let [~character (.codePointAt s# i#)]72 ~@body73 (recur (+ 2 i#)))74 (let [~character (int ~character)]75 ~@body76 (recur (inc i#))))))))))78 (defn codepoints79 "Returns a sequence of integer Unicode code points in s. Handles80 Unicode supplementary characters (above U+FFFF) correctly."81 [^String s]82 (let [len (.length s)83 f (fn thisfn [^String s i]84 (when (< i len)85 (let [c (.charAt s i)]86 (if (Character/isHighSurrogate c)87 (cons (.codePointAt s i) (thisfn s (+ 2 i)))88 (cons (int c) (thisfn s (inc i)))))))]89 (lazy-seq (f s 0))))91 (defn ^String escape92 "Returns a new String by applying cmap (a function or a map) to each93 character in s. If cmap returns nil, the original character is94 added to the output unchanged."95 {:deprecated "1.2"}96 [cmap ^String s]97 (let [buffer (StringBuilder. (.length s))]98 (dochars [c s]99 (if-let [r (cmap c)]100 (.append buffer r)101 (.append buffer c)))102 (.toString buffer)))104 (defn blank?105 "True if s is nil, empty, or contains only whitespace."106 {:deprecated "1.2"}107 [^String s]108 (every? (fn [^Character c] (Character/isWhitespace c)) s))110 (defn ^String take111 "Take first n characters from s, up to the length of s."112 [n ^String s]113 (if (< (count s) n)114 s115 (.substring s 0 n)))117 (defn ^String drop118 "Drops first n characters from s. Returns an empty string if n is119 greater than the length of s."120 [n ^String s]121 (if (< (count s) n)122 ""123 (.substring s n)))125 (defn ^String butlast126 "Returns s without the last n characters. Returns an empty string127 if n is greater than the length of s."128 [n ^String s]129 (if (< (count s) n)130 ""131 (.substring s 0 (- (count s) n))))133 (defn ^String tail134 "Returns the last n characters of s."135 [n ^String s]136 (if (< (count s) n)137 s138 (.substring s (- (count s) n))))140 (defn ^String repeat141 "Returns a new String containing s repeated n times."142 [n ^String s]143 (apply str (clojure.core/repeat n s)))145 (defn ^String reverse146 "Returns s with its characters reversed."147 {:deprecated "1.2"}148 [^String s]149 (.toString (.reverse (StringBuilder. s))))151 (defn replace-str152 "Replaces all instances of substring a with b in s."153 {:deprecated "1.2"}154 [^String a ^String b ^String s]155 (.replace s a b))157 (defn replace-char158 "Replaces all instances of character a with character b in s."159 {:deprecated "1.2"}160 [^Character a ^Character b ^String s]161 (.replace s a b))163 (defn replace-re164 "Replaces all matches of re with replacement in s."165 {:deprecated "1.2"}166 [re replacement ^String s]167 (.replaceAll (re-matcher re s) replacement))169 (defn replace-by170 "Replaces all matches of re in s with the result of171 (f (re-groups the-match))."172 {:deprecated "1.2"}173 [re f ^String s]174 (let [m (re-matcher re s)]175 (let [buffer (StringBuffer. (.length s))]176 (loop []177 (if (.find m)178 (do (.appendReplacement m buffer (f (re-groups m)))179 (recur))180 (do (.appendTail m buffer)181 (.toString buffer)))))))183 (defn replace-first-str184 "Replace first occurance of substring a with b in s."185 {:deprecated "1.2"}186 [^String a ^String b ^String s]187 (.replaceFirst (re-matcher (Pattern/quote a) s) b))189 (defn replace-first-re190 "Replace first match of re in s."191 {:deprecated "1.2"}192 [^Pattern re ^String replacement ^String s]193 (.replaceFirst (re-matcher re s) replacement))195 (defn replace-first-by196 "Replace first match of re in s with the result of197 (f (re-groups the-match))."198 {:deprecated "1.2"}199 [^Pattern re f ^String s]200 (let [m (re-matcher re s)]201 (let [buffer (StringBuffer.)]202 (if (.find m)203 (let [rep (f (re-groups m))]204 (.appendReplacement m buffer rep)205 (.appendTail m buffer)206 (str buffer))))))208 (defn partition209 "Splits the string into a lazy sequence of substrings, alternating210 between substrings that match the patthern and the substrings211 between the matches. The sequence always starts with the substring212 before the first match, or an empty string if the beginning of the213 string matches.215 For example: (partition #\"[a-z]+\" \"abc123def\")216 returns: (\"\" \"abc\" \"123\" \"def\")"217 [^Pattern re ^String s]218 (let [m (re-matcher re s)]219 ((fn step [prevend]220 (lazy-seq221 (if (.find m)222 (cons (.subSequence s prevend (.start m))223 (cons (re-groups m)224 (step (+ (.start m) (count (.group m))))))225 (when (< prevend (.length s))226 (list (.subSequence s prevend (.length s)))))))227 0)))229 (defn ^String join230 "Returns a string of all elements in coll, separated by231 separator. Like Perl's join."232 {:deprecated "1.2"}233 [^String separator coll]234 (apply str (interpose separator coll)))236 (defn ^String chop237 "Removes the last character of string, does nothing on a zero-length238 string."239 [^String s]240 (let [size (count s)]241 (if (zero? size)242 s243 (subs s 0 (dec (count s))))))245 (defn ^String chomp246 "Removes all trailing newline \\n or return \\r characters from247 string. Note: String.trim() is similar and faster.248 Deprecated in 1.2. Use clojure.string/trim-newline"249 {:deprecated "1.2"}250 [^String s]251 (replace-re #"[\r\n]+$" "" s))253 (defn ^String swap-case254 "Changes upper case characters to lower case and vice-versa.255 Handles Unicode supplementary characters correctly. Uses the256 locale-sensitive String.toUpperCase() and String.toLowerCase()257 methods."258 [^String s]259 (let [buffer (StringBuilder. (.length s))260 ;; array to make a String from one code point261 ^"[I" array (make-array Integer/TYPE 1)]262 (docodepoints [c s]263 (aset-int array 0 c)264 (if (Character/isLowerCase c)265 ;; Character.toUpperCase is not locale-sensitive, but266 ;; String.toUpperCase is; so we use a String.267 (.append buffer (.toUpperCase (String. array 0 1)))268 (.append buffer (.toLowerCase (String. array 0 1)))))269 (.toString buffer)))271 (defn ^String capitalize272 "Converts first character of the string to upper-case, all other273 characters to lower-case."274 {:deprecated "1.2"}275 [^String s]276 (if (< (count s) 2)277 (.toUpperCase s)278 (str (.toUpperCase ^String (subs s 0 1))279 (.toLowerCase ^String (subs s 1)))))281 (defn ^String ltrim282 "Removes whitespace from the left side of string.283 Deprecated in 1.2. Use clojure.string/triml."284 {:deprecated "1.2"}285 [^String s]286 (replace-re #"^\s+" "" s))288 (defn ^String rtrim289 "Removes whitespace from the right side of string.290 Deprecated in 1.2. Use clojure.string/trimr."291 {:deprecated "1.2"}292 [^String s]293 (replace-re #"\s+$" "" s))295 (defn split-lines296 "Splits s on \\n or \\r\\n."297 {:deprecated "1.2"}298 [^String s]299 (seq (.split #"\r?\n" s)))301 ;; borrowed from compojure.string, by James Reeves, EPL 1.0302 (defn ^String map-str303 "Apply f to each element of coll, concatenate all results into a304 String."305 [f coll]306 (apply str (map f coll)))308 ;; borrowed from compojure.string, by James Reeves, EPL 1.0309 (defn grep310 "Filters elements of coll by a regular expression. The String311 representation (with str) of each element is tested with re-find."312 [re coll]313 (filter (fn [x] (re-find re (str x))) coll))315 (defn as-str316 "Like clojure.core/str, but if an argument is a keyword or symbol,317 its name will be used instead of its literal representation.319 Example:320 (str :foo :bar) ;;=> \":foo:bar\"321 (as-str :foo :bar) ;;=> \"foobar\"323 Note that this does not apply to keywords or symbols nested within324 data structures; they will be rendered as with str.326 Example:327 (str {:foo :bar}) ;;=> \"{:foo :bar}\"328 (as-str {:foo :bar}) ;;=> \"{:foo :bar}\" "329 ([] "")330 ([x] (if (instance? clojure.lang.Named x)331 (name x)332 (str x)))333 ([x & ys]334 ((fn [^StringBuilder sb more]335 (if more336 (recur (. sb (append (as-str (first more)))) (next more))337 (str sb)))338 (new StringBuilder ^String (as-str x)) ys)))341 ;;; WRAPPERS343 ;; The following functions are simple wrappers around java.lang.String344 ;; functions. They are included here for completeness, and for use345 ;; when mapping over a collection of strings.347 (defn ^String upper-case348 "Converts string to all upper-case."349 {:deprecated "1.2"}350 [^String s]351 (.toUpperCase s))353 (defn ^String lower-case354 "Converts string to all lower-case."355 {:deprecated "1.2"}356 [^String s]357 (.toLowerCase s))359 (defn split360 "Splits string on a regular expression. Optional argument limit is361 the maximum number of splits."362 {:deprecated "1.2"}363 ([^Pattern re ^String s] (seq (.split re s)))364 ([^Pattern re limit ^String s] (seq (.split re s limit))))366 (defn ^String trim367 "Removes whitespace from both ends of string."368 {:deprecated "1.2"}369 [^String s]370 (.trim s))372 (defn ^String substring?373 "True if s contains the substring."374 [substring ^String s]375 (.contains s substring))377 (defn ^String get378 "Gets the i'th character in string."379 {:deprecated "1.2"}380 [^String s i]381 (.charAt s i))