diff src/clojure/string.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
line wrap: on
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/clojure/string.clj	Sat Aug 21 06:25:44 2010 -0400
     1.3 @@ -0,0 +1,254 @@
     1.4 +;   Copyright (c) Rich Hickey. All rights reserved.
     1.5 +;   The use and distribution terms for this software are covered by the
     1.6 +;   Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
     1.7 +;   which can be found in the file epl-v10.html at the root of this distribution.
     1.8 +;   By using this software in any fashion, you are agreeing to be bound by
     1.9 +;   the terms of this license.
    1.10 +;   You must not remove this notice, or any other, from this software.
    1.11 +
    1.12 +(ns ^{:doc "Clojure String utilities
    1.13 +
    1.14 +It is poor form to (:use clojure.string). Instead, use require
    1.15 +with :as to specify a prefix, e.g.
    1.16 +
    1.17 +(ns your.namespace.here
    1.18 +  (:require '[clojure.string :as str]))
    1.19 +
    1.20 +Design notes for clojure.string:
    1.21 +
    1.22 +1. Strings are objects (as opposed to sequences). As such, the
    1.23 +   string being manipulated is the first argument to a function;
    1.24 +   passing nil will result in a NullPointerException unless
    1.25 +   documented otherwise. If you want sequence-y behavior instead,
    1.26 +   use a sequence.
    1.27 +
    1.28 +2. Functions are generally not lazy, and call straight to host
    1.29 +   methods where those are available and efficient.
    1.30 +
    1.31 +3. Functions take advantage of String implementation details to
    1.32 +   write high-performing loop/recurs instead of using higher-order
    1.33 +   functions. (This is not idiomatic in general-purpose application
    1.34 +   code.)
    1.35 +
    1.36 +4. When a function is documented to accept a string argument, it
    1.37 +   will take any implementation of the correct *interface* on the
    1.38 +   host platform. In Java, this is CharSequence, which is more
    1.39 +   general than String. In ordinary usage you will almost always
    1.40 +   pass concrete strings. If you are doing something unusual,
    1.41 +   e.g. passing a mutable implementation of CharSequence, then
    1.42 +   thead-safety is your responsibility."
    1.43 +      :author "Stuart Sierra, Stuart Halloway, David Liebke"}
    1.44 +  clojure.string
    1.45 +  (:refer-clojure :exclude (replace reverse))
    1.46 +  (:import (java.util.regex Pattern)
    1.47 +           clojure.lang.LazilyPersistentVector))
    1.48 +
    1.49 +(defn ^String reverse
    1.50 +  "Returns s with its characters reversed."
    1.51 +  {:added "1.2"}
    1.52 +  [^CharSequence s]
    1.53 +  (.toString (.reverse (StringBuilder. s))))
    1.54 +
    1.55 +(defn- replace-by
    1.56 +  [^CharSequence s re f]
    1.57 +  (let [m (re-matcher re s)]
    1.58 +    (let [buffer (StringBuffer. (.length s))]
    1.59 +      (loop []
    1.60 +        (if (.find m)
    1.61 +          (do (.appendReplacement m buffer (f (re-groups m)))
    1.62 +              (recur))
    1.63 +          (do (.appendTail m buffer)
    1.64 +              (.toString buffer)))))))
    1.65 +
    1.66 +(defn ^String replace
    1.67 +  "Replaces all instance of match with replacement in s.
    1.68 +
    1.69 +   match/replacement can be:
    1.70 +
    1.71 +   string / string
    1.72 +   char / char
    1.73 +   pattern / (string or function of match).
    1.74 +
    1.75 +   See also replace-first."
    1.76 +  {:added "1.2"}
    1.77 +  [^CharSequence s match replacement]
    1.78 +  (let [s (.toString s)]
    1.79 +    (cond 
    1.80 +     (instance? Character match) (.replace s ^Character match ^Character replacement)
    1.81 +     (instance? CharSequence match) (.replace s ^CharSequence match ^CharSequence replacement)
    1.82 +     (instance? Pattern match) (if (instance? CharSequence replacement)
    1.83 +                                 (.replaceAll (re-matcher ^Pattern match s)
    1.84 +                                              (.toString ^CharSequence replacement))
    1.85 +                                 (replace-by s match replacement))
    1.86 +     :else (throw (IllegalArgumentException. (str "Invalid match arg: " match))))))
    1.87 +
    1.88 +(defn- replace-first-by
    1.89 +  [^CharSequence s ^Pattern re f]
    1.90 +  (let [m (re-matcher re s)]
    1.91 +    (let [buffer (StringBuffer. (.length s))]
    1.92 +      (if (.find m)
    1.93 +        (let [rep (f (re-groups m))]
    1.94 +          (.appendReplacement m buffer rep)
    1.95 +          (.appendTail m buffer)
    1.96 +          (str buffer))))))
    1.97 +
    1.98 +(defn- replace-first-char
    1.99 +  [^CharSequence s ^Character match replace]
   1.100 +  (let [s (.toString s)
   1.101 +        i (.indexOf s (int match))]
   1.102 +    (if (= -1 i)
   1.103 +      s
   1.104 +      (str (subs s 0 i) replace (subs s (inc i))))))
   1.105 +
   1.106 +(defn ^String replace-first
   1.107 +  "Replaces the first instance of match with replacement in s.
   1.108 +
   1.109 +   match/replacement can be:
   1.110 +
   1.111 +   char / char
   1.112 +   string / string
   1.113 +   pattern / (string or function of match).
   1.114 +
   1.115 +   See also replace-all."
   1.116 +  {:added "1.2"}
   1.117 +  [^CharSequence s match replacement]
   1.118 +  (let [s (.toString s)]
   1.119 +    (cond
   1.120 +     (instance? Character match)
   1.121 +     (replace-first-char s match replacement)
   1.122 +     (instance? CharSequence match)
   1.123 +     (.replaceFirst s (Pattern/quote (.toString ^CharSequence match))
   1.124 +                    (.toString ^CharSequence replacement))
   1.125 +     (instance? Pattern match)
   1.126 +     (if (instance? CharSequence replacement)
   1.127 +       (.replaceFirst (re-matcher ^Pattern match s)
   1.128 +                      (.toString ^CharSequence replacement))
   1.129 +       (replace-first-by s match replacement))
   1.130 +     :else (throw (IllegalArgumentException. (str "Invalid match arg: " match))))))
   1.131 +
   1.132 +
   1.133 +(defn ^String join
   1.134 +  "Returns a string of all elements in coll, separated by
   1.135 +   an optional separator.  Like Perl's join."
   1.136 +  {:added "1.2"}
   1.137 +  ([coll]
   1.138 +     (apply str coll))
   1.139 +  ([separator [x & more]]
   1.140 +     (loop [sb (StringBuilder. (str x))
   1.141 +            more more
   1.142 +            sep (str separator)]
   1.143 +       (if more
   1.144 +         (recur (-> sb (.append sep) (.append (str (first more))))
   1.145 +                (next more)
   1.146 +                sep)
   1.147 +         (str sb)))))
   1.148 +
   1.149 +(defn ^String capitalize
   1.150 +  "Converts first character of the string to upper-case, all other
   1.151 +  characters to lower-case."
   1.152 +  {:added "1.2"}
   1.153 +  [^CharSequence s]
   1.154 +  (let [s (.toString s)]
   1.155 +    (if (< (count s) 2)
   1.156 +      (.toUpperCase s)
   1.157 +      (str (.toUpperCase (subs s 0 1))
   1.158 +           (.toLowerCase (subs s 1))))))
   1.159 +
   1.160 +(defn ^String upper-case
   1.161 +  "Converts string to all upper-case."
   1.162 +  {:added "1.2"}
   1.163 +  [^CharSequence s]
   1.164 +  (.. s toString toUpperCase))
   1.165 +
   1.166 +(defn ^String lower-case
   1.167 +  "Converts string to all lower-case."
   1.168 +  {:added "1.2"}
   1.169 +  [^CharSequence s]
   1.170 +  (.. s toString toLowerCase))
   1.171 +
   1.172 +(defn split
   1.173 +  "Splits string on a regular expression.  Optional argument limit is
   1.174 +  the maximum number of splits. Not lazy. Returns vector of the splits."
   1.175 +  {:added "1.2"}
   1.176 +  ([^CharSequence s ^Pattern re]
   1.177 +     (LazilyPersistentVector/createOwning (.split re s)))
   1.178 +  ([ ^CharSequence s ^Pattern re limit]
   1.179 +     (LazilyPersistentVector/createOwning (.split re s limit))))
   1.180 +
   1.181 +(defn split-lines
   1.182 +  "Splits s on \\n or \\r\\n."
   1.183 +  {:added "1.2"}
   1.184 +  [^CharSequence s]
   1.185 +  (split s #"\r?\n"))
   1.186 +
   1.187 +(defn ^String trim
   1.188 +  "Removes whitespace from both ends of string."
   1.189 +  {:added "1.2"}
   1.190 +  [^CharSequence s]
   1.191 +  (.. s toString trim))
   1.192 +
   1.193 +(defn ^String triml
   1.194 +  "Removes whitespace from the left side of string."
   1.195 +  {:added "1.2"}
   1.196 +  [^CharSequence s]
   1.197 +  (loop [index (int 0)]
   1.198 +    (if (= (.length s) index)
   1.199 +      ""
   1.200 +      (if (Character/isWhitespace (.charAt s index))
   1.201 +        (recur (inc index))
   1.202 +        (.. s (subSequence index (.length s)) toString)))))
   1.203 +
   1.204 +(defn ^String trimr
   1.205 +  "Removes whitespace from the right side of string."
   1.206 +  {:added "1.2"}
   1.207 +  [^CharSequence s]
   1.208 +  (loop [index (.length s)]
   1.209 +    (if (zero? index)
   1.210 +      ""
   1.211 +      (if (Character/isWhitespace (.charAt s (dec index)))
   1.212 +        (recur (dec index))
   1.213 +        (.. s (subSequence 0 index) toString)))))
   1.214 +
   1.215 +(defn ^String trim-newline
   1.216 +  "Removes all trailing newline \\n or return \\r characters from
   1.217 +  string.  Similar to Perl's chomp."
   1.218 +  {:added "1.2"}
   1.219 +  [^CharSequence s]
   1.220 +  (loop [index (.length s)]
   1.221 +    (if (zero? index)
   1.222 +      ""
   1.223 +      (let [ch (.charAt s (dec index))]
   1.224 +        (if (or (= ch \newline) (= ch \return))
   1.225 +          (recur (dec index))
   1.226 +          (.. s (subSequence 0 index) toString))))))
   1.227 +
   1.228 +(defn blank?
   1.229 +  "True if s is nil, empty, or contains only whitespace."
   1.230 +  {:added "1.2"}
   1.231 +  [^CharSequence s]
   1.232 +  (if s
   1.233 +    (loop [index (int 0)]
   1.234 +      (if (= (.length s) index)
   1.235 +        true
   1.236 +        (if (Character/isWhitespace (.charAt s index))
   1.237 +          (recur (inc index))
   1.238 +          false)))
   1.239 +    true))
   1.240 +
   1.241 +(defn ^String escape
   1.242 +  "Return a new string, using cmap to escape each character ch
   1.243 +   from s as follows:
   1.244 +   
   1.245 +   If (cmap ch) is nil, append ch to the new string.
   1.246 +   If (cmap ch) is non-nil, append (str (cmap ch)) instead."
   1.247 +  {:added "1.2"}
   1.248 +  [^CharSequence s cmap]
   1.249 +  (loop [index (int 0)
   1.250 +         buffer (StringBuilder. (.length s))]
   1.251 +    (if (= (.length s) index)
   1.252 +      (.toString buffer)
   1.253 +      (let [ch (.charAt s index)]
   1.254 +        (if-let [replacement (cmap ch)]
   1.255 +          (.append buffer replacement)
   1.256 +          (.append buffer ch))
   1.257 +        (recur (inc index) buffer)))))