diff src/clojure/contrib/string.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
line wrap: on
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/clojure/contrib/string.clj	Sat Aug 21 06:25:44 2010 -0400
     1.3 @@ -0,0 +1,382 @@
     1.4 +;;; string.clj -- functional string utilities for Clojure
     1.5 +
     1.6 +;; by Stuart Sierra, http://stuartsierra.com/
     1.7 +;; January 26, 2010
     1.8 +
     1.9 +;; Copyright (c) Stuart Sierra, 2010. All rights reserved.  The use
    1.10 +;; and distribution terms for this software are covered by the Eclipse
    1.11 +;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
    1.12 +;; which can be found in the file epl-v10.html at the root of this
    1.13 +;; distribution.  By using this software in any fashion, you are
    1.14 +;; agreeing to be bound by the terms of this license.  You must not
    1.15 +;; remove this notice, or any other, from this software.
    1.16 +
    1.17 +;; DEPRECATED in 1.2: Many functions have moved to clojure.string.
    1.18 +
    1.19 +(ns ^{:author "Stuart Sierra"
    1.20 +       :doc "This is a library of string manipulation functions.  It
    1.21 +    is intented as a replacement for clojure.contrib.string.
    1.22 +
    1.23 +    You cannot (use 'clojure.contrib.string) because it defines
    1.24 +    functions with the same names as functions in clojure.core.
    1.25 +    Instead, do (require '[clojure.contrib.string :as s]) 
    1.26 +    or something similar.
    1.27 +
    1.28 +    Goals:
    1.29 +      1. Be functional
    1.30 +      2. Most significant argument LAST, to work with ->>
    1.31 +      3. At least O(n) performance for Strings of length n
    1.32 +
    1.33 +    Some ideas are borrowed from
    1.34 +    http://github.com/francoisdevlin/devlinsf-clojure-utils/"}
    1.35 + clojure.contrib.string
    1.36 + (:refer-clojure :exclude (take replace drop butlast partition
    1.37 +                           contains? get repeat reverse partial))
    1.38 + (:import (java.util.regex Pattern)))
    1.39 +
    1.40 +
    1.41 +(defmacro dochars 
    1.42 +  "bindings => [name string]
    1.43 +
    1.44 +  Repeatedly executes body, with name bound to each character in
    1.45 +  string.  Does NOT handle Unicode supplementary characters (above
    1.46 +  U+FFFF)."
    1.47 +  [bindings & body]
    1.48 +  (assert (vector bindings))
    1.49 +  (assert (= 2 (count bindings)))
    1.50 +  ;; This seems to be the fastest way to iterate over characters.
    1.51 +  `(let [^String s# ~(second bindings)]
    1.52 +     (dotimes [i# (.length s#)]
    1.53 +       (let [~(first bindings) (.charAt s# i#)]
    1.54 +         ~@body))))
    1.55 +
    1.56 +
    1.57 +(defmacro docodepoints
    1.58 +  "bindings => [name string]
    1.59 +
    1.60 +  Repeatedly executes body, with name bound to the integer code point
    1.61 +  of each Unicode character in the string.  Handles Unicode
    1.62 +  supplementary characters (above U+FFFF) correctly."
    1.63 +  [bindings & body]
    1.64 +  (assert (vector bindings))
    1.65 +  (assert (= 2 (count bindings)))
    1.66 +  (let [character (first bindings)
    1.67 +        string (second bindings)]
    1.68 +    `(let [^String s# ~string
    1.69 +           len# (.length s#)]
    1.70 +       (loop [i# 0]
    1.71 +         (when (< i# len#)
    1.72 +           (let [~character (.charAt s# i#)]
    1.73 +             (if (Character/isHighSurrogate ~character)
    1.74 +               (let [~character (.codePointAt s# i#)]
    1.75 +                 ~@body
    1.76 +                 (recur (+ 2 i#)))
    1.77 +               (let [~character (int ~character)]
    1.78 +                 ~@body
    1.79 +                 (recur (inc i#))))))))))
    1.80 +
    1.81 +(defn codepoints
    1.82 +  "Returns a sequence of integer Unicode code points in s.  Handles
    1.83 +  Unicode supplementary characters (above U+FFFF) correctly."
    1.84 +  [^String s]
    1.85 +  (let [len (.length s)
    1.86 +        f (fn thisfn [^String s i]
    1.87 +            (when (< i len)
    1.88 +              (let [c (.charAt s i)]
    1.89 +                (if (Character/isHighSurrogate c)
    1.90 +                  (cons (.codePointAt s i) (thisfn s (+ 2 i)))
    1.91 +                  (cons (int c) (thisfn s (inc i)))))))]
    1.92 +    (lazy-seq (f s 0))))
    1.93 +
    1.94 +(defn ^String escape
    1.95 +  "Returns a new String by applying cmap (a function or a map) to each
    1.96 +   character in s.  If cmap returns nil, the original character is
    1.97 +   added to the output unchanged."
    1.98 +   {:deprecated "1.2"}
    1.99 +  [cmap ^String s]
   1.100 +  (let [buffer (StringBuilder. (.length s))]
   1.101 +    (dochars [c s]
   1.102 +      (if-let [r (cmap c)]
   1.103 +        (.append buffer r)
   1.104 +        (.append buffer c)))
   1.105 +    (.toString buffer)))
   1.106 +
   1.107 +(defn blank?
   1.108 +  "True if s is nil, empty, or contains only whitespace."
   1.109 +  {:deprecated "1.2"}
   1.110 +  [^String s]
   1.111 +  (every? (fn [^Character c] (Character/isWhitespace c)) s))
   1.112 +
   1.113 +(defn ^String take
   1.114 +  "Take first n characters from s, up to the length of s."
   1.115 +  [n ^String s]
   1.116 +  (if (< (count s) n)
   1.117 +    s
   1.118 +    (.substring s 0 n)))
   1.119 +
   1.120 +(defn ^String drop
   1.121 +  "Drops first n characters from s.  Returns an empty string if n is
   1.122 +  greater than the length of s."
   1.123 +  [n ^String s]
   1.124 +  (if (< (count s) n)
   1.125 +    ""
   1.126 +    (.substring s n)))
   1.127 +
   1.128 +(defn ^String butlast
   1.129 +  "Returns s without the last n characters.  Returns an empty string
   1.130 +  if n is greater than the length of s."
   1.131 +  [n ^String s]
   1.132 +  (if (< (count s) n)
   1.133 +    ""
   1.134 +    (.substring s 0 (- (count s) n))))
   1.135 +
   1.136 +(defn ^String tail
   1.137 +  "Returns the last n characters of s."
   1.138 +  [n ^String s]
   1.139 +  (if (< (count s) n)
   1.140 +    s
   1.141 +    (.substring s (- (count s) n))))
   1.142 +
   1.143 +(defn ^String repeat
   1.144 +  "Returns a new String containing s repeated n times."
   1.145 +  [n ^String s]
   1.146 +  (apply str (clojure.core/repeat n s)))
   1.147 +
   1.148 +(defn ^String reverse
   1.149 +  "Returns s with its characters reversed."
   1.150 +  {:deprecated "1.2"}
   1.151 +  [^String s]
   1.152 +  (.toString (.reverse (StringBuilder. s))))
   1.153 +
   1.154 +(defn replace-str
   1.155 +  "Replaces all instances of substring a with b in s."
   1.156 +  {:deprecated "1.2"}
   1.157 +  [^String a ^String b ^String s]
   1.158 +  (.replace s a b))
   1.159 +
   1.160 +(defn replace-char
   1.161 +  "Replaces all instances of character a with character b in s."
   1.162 +  {:deprecated "1.2"}
   1.163 +  [^Character a ^Character b ^String s]
   1.164 +  (.replace s a b))
   1.165 +
   1.166 +(defn replace-re
   1.167 +  "Replaces all matches of re with replacement in s."
   1.168 +  {:deprecated "1.2"}
   1.169 +  [re replacement ^String s]
   1.170 +  (.replaceAll (re-matcher re s) replacement))
   1.171 +
   1.172 +(defn replace-by
   1.173 +  "Replaces all matches of re in s with the result of 
   1.174 +  (f (re-groups the-match))."
   1.175 +  {:deprecated "1.2"}
   1.176 +  [re f ^String s]
   1.177 +  (let [m (re-matcher re s)]
   1.178 +    (let [buffer (StringBuffer. (.length s))]
   1.179 +      (loop []
   1.180 +        (if (.find m)
   1.181 +          (do (.appendReplacement m buffer (f (re-groups m)))
   1.182 +              (recur))
   1.183 +          (do (.appendTail m buffer)
   1.184 +              (.toString buffer)))))))
   1.185 +
   1.186 +(defn replace-first-str
   1.187 +  "Replace first occurance of substring a with b in s."
   1.188 +  {:deprecated "1.2"}
   1.189 +  [^String a ^String b ^String s]
   1.190 +  (.replaceFirst (re-matcher (Pattern/quote a) s) b))
   1.191 +
   1.192 +(defn replace-first-re
   1.193 +  "Replace first match of re in s."
   1.194 +  {:deprecated "1.2"}
   1.195 +  [^Pattern re ^String replacement ^String s]
   1.196 +  (.replaceFirst (re-matcher re s) replacement))
   1.197 +
   1.198 +(defn replace-first-by
   1.199 +  "Replace first match of re in s with the result of
   1.200 +  (f (re-groups the-match))."
   1.201 +  {:deprecated "1.2"}
   1.202 +  [^Pattern re f ^String s]
   1.203 +  (let [m (re-matcher re s)]
   1.204 +    (let [buffer (StringBuffer.)]
   1.205 +      (if (.find m)
   1.206 +        (let [rep (f (re-groups m))]
   1.207 +          (.appendReplacement m buffer rep)
   1.208 +          (.appendTail m buffer)
   1.209 +          (str buffer))))))
   1.210 +
   1.211 +(defn partition
   1.212 +  "Splits the string into a lazy sequence of substrings, alternating
   1.213 +  between substrings that match the patthern and the substrings
   1.214 +  between the matches.  The sequence always starts with the substring
   1.215 +  before the first match, or an empty string if the beginning of the
   1.216 +  string matches.
   1.217 +
   1.218 +  For example: (partition #\"[a-z]+\" \"abc123def\")
   1.219 +  returns: (\"\" \"abc\" \"123\" \"def\")"
   1.220 +  [^Pattern re ^String s]
   1.221 +  (let [m (re-matcher re s)]
   1.222 +    ((fn step [prevend]
   1.223 +       (lazy-seq
   1.224 +        (if (.find m)
   1.225 +          (cons (.subSequence s prevend (.start m))
   1.226 +                (cons (re-groups m)
   1.227 +                      (step (+ (.start m) (count (.group m))))))
   1.228 +          (when (< prevend (.length s))
   1.229 +            (list (.subSequence s prevend (.length s)))))))
   1.230 +     0)))
   1.231 +
   1.232 +(defn ^String join
   1.233 +  "Returns a string of all elements in coll, separated by
   1.234 +  separator.  Like Perl's join."
   1.235 +  {:deprecated "1.2"}
   1.236 +  [^String separator coll]
   1.237 +  (apply str (interpose separator coll)))
   1.238 +
   1.239 +(defn ^String chop
   1.240 +  "Removes the last character of string, does nothing on a zero-length
   1.241 +  string."
   1.242 +  [^String s]
   1.243 +  (let [size (count s)]
   1.244 +    (if (zero? size)
   1.245 +      s
   1.246 +      (subs s 0 (dec (count s))))))
   1.247 +
   1.248 +(defn ^String chomp
   1.249 +  "Removes all trailing newline \\n or return \\r characters from
   1.250 +  string.  Note: String.trim() is similar and faster.
   1.251 +  Deprecated in 1.2. Use clojure.string/trim-newline"
   1.252 +  {:deprecated "1.2"}
   1.253 +  [^String s]
   1.254 +  (replace-re #"[\r\n]+$" "" s))
   1.255 +
   1.256 +(defn ^String swap-case
   1.257 +  "Changes upper case characters to lower case and vice-versa.
   1.258 +  Handles Unicode supplementary characters correctly.  Uses the
   1.259 +  locale-sensitive String.toUpperCase() and String.toLowerCase()
   1.260 +  methods."
   1.261 +  [^String s]
   1.262 +  (let [buffer (StringBuilder. (.length s))
   1.263 +        ;; array to make a String from one code point
   1.264 +        ^"[I" array (make-array Integer/TYPE 1)]
   1.265 +    (docodepoints [c s]
   1.266 +      (aset-int array 0 c)
   1.267 +      (if (Character/isLowerCase c)
   1.268 +        ;; Character.toUpperCase is not locale-sensitive, but
   1.269 +        ;; String.toUpperCase is; so we use a String.
   1.270 +        (.append buffer (.toUpperCase (String. array 0 1)))
   1.271 +        (.append buffer (.toLowerCase (String. array 0 1)))))
   1.272 +    (.toString buffer)))
   1.273 +
   1.274 +(defn ^String capitalize
   1.275 +  "Converts first character of the string to upper-case, all other
   1.276 +  characters to lower-case."
   1.277 +  {:deprecated "1.2"}
   1.278 +  [^String s]
   1.279 +  (if (< (count s) 2)
   1.280 +    (.toUpperCase s)
   1.281 +    (str (.toUpperCase ^String (subs s 0 1))
   1.282 +         (.toLowerCase ^String (subs s 1)))))
   1.283 +
   1.284 +(defn ^String ltrim
   1.285 +  "Removes whitespace from the left side of string.
   1.286 +   Deprecated in 1.2. Use clojure.string/triml."
   1.287 +  {:deprecated "1.2"}
   1.288 +  [^String s]
   1.289 +  (replace-re #"^\s+" "" s))
   1.290 +
   1.291 +(defn ^String rtrim
   1.292 +  "Removes whitespace from the right side of string.
   1.293 +   Deprecated in 1.2. Use clojure.string/trimr."
   1.294 +  {:deprecated "1.2"}
   1.295 +  [^String s]
   1.296 +  (replace-re #"\s+$" "" s))
   1.297 +
   1.298 +(defn split-lines
   1.299 +  "Splits s on \\n or \\r\\n."
   1.300 +  {:deprecated "1.2"}
   1.301 +  [^String s]
   1.302 +  (seq (.split #"\r?\n" s)))
   1.303 +
   1.304 +;; borrowed from compojure.string, by James Reeves, EPL 1.0
   1.305 +(defn ^String map-str
   1.306 +  "Apply f to each element of coll, concatenate all results into a
   1.307 +  String."
   1.308 +  [f coll]
   1.309 +  (apply str (map f coll)))
   1.310 +
   1.311 +;; borrowed from compojure.string, by James Reeves, EPL 1.0
   1.312 +(defn grep
   1.313 +  "Filters elements of coll by a regular expression.  The String
   1.314 +  representation (with str) of each element is tested with re-find."
   1.315 +  [re coll]
   1.316 +  (filter (fn [x] (re-find re (str x))) coll))
   1.317 +
   1.318 +(defn as-str
   1.319 +  "Like clojure.core/str, but if an argument is a keyword or symbol,
   1.320 +  its name will be used instead of its literal representation.
   1.321 +
   1.322 +  Example:
   1.323 +     (str :foo :bar)     ;;=> \":foo:bar\"
   1.324 +     (as-str :foo :bar)  ;;=> \"foobar\" 
   1.325 +
   1.326 +  Note that this does not apply to keywords or symbols nested within
   1.327 +  data structures; they will be rendered as with str.
   1.328 +
   1.329 +  Example:
   1.330 +     (str {:foo :bar})     ;;=> \"{:foo :bar}\"
   1.331 +     (as-str {:foo :bar})  ;;=> \"{:foo :bar}\" "
   1.332 +  ([] "")
   1.333 +  ([x] (if (instance? clojure.lang.Named x)
   1.334 +         (name x)
   1.335 +         (str x)))
   1.336 +  ([x & ys]
   1.337 +     ((fn [^StringBuilder sb more]
   1.338 +        (if more
   1.339 +          (recur (. sb  (append (as-str (first more)))) (next more))
   1.340 +          (str sb)))
   1.341 +      (new StringBuilder ^String (as-str x)) ys)))
   1.342 +
   1.343 +
   1.344 +;;; WRAPPERS
   1.345 +
   1.346 +;; The following functions are simple wrappers around java.lang.String
   1.347 +;; functions.  They are included here for completeness, and for use
   1.348 +;; when mapping over a collection of strings.
   1.349 +
   1.350 +(defn ^String upper-case
   1.351 +  "Converts string to all upper-case."
   1.352 +  {:deprecated "1.2"}
   1.353 +  [^String s]
   1.354 +  (.toUpperCase s))
   1.355 +
   1.356 +(defn ^String lower-case
   1.357 +  "Converts string to all lower-case."
   1.358 +  {:deprecated "1.2"}
   1.359 +  [^String s]
   1.360 +  (.toLowerCase s))
   1.361 +
   1.362 +(defn split
   1.363 +  "Splits string on a regular expression.  Optional argument limit is
   1.364 +  the maximum number of splits."
   1.365 +  {:deprecated "1.2"}
   1.366 +  ([^Pattern re ^String s] (seq (.split re s)))
   1.367 +  ([^Pattern re limit ^String s] (seq (.split re s limit))))
   1.368 +
   1.369 +(defn ^String trim
   1.370 +  "Removes whitespace from both ends of string."
   1.371 +  {:deprecated "1.2"}
   1.372 +  [^String s]
   1.373 +  (.trim s))
   1.374 +
   1.375 +(defn ^String substring?
   1.376 +  "True if s contains the substring."
   1.377 +  [substring ^String s]
   1.378 +  (.contains s substring))
   1.379 +
   1.380 +(defn ^String get
   1.381 +  "Gets the i'th character in string."
   1.382 +  {:deprecated "1.2"}
   1.383 +  [^String s i]
   1.384 +  (.charAt s i))
   1.385 +