rlm@10: ; Copyright (c) Rich Hickey. All rights reserved. rlm@10: ; The use and distribution terms for this software are covered by the rlm@10: ; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) rlm@10: ; which can be found in the file epl-v10.html at the root of this distribution. rlm@10: ; By using this software in any fashion, you are agreeing to be bound by rlm@10: ; the terms of this license. rlm@10: ; You must not remove this notice, or any other, from this software. rlm@10: rlm@10: (ns ^{:doc "Clojure String utilities rlm@10: rlm@10: It is poor form to (:use clojure.string). Instead, use require rlm@10: with :as to specify a prefix, e.g. rlm@10: rlm@10: (ns your.namespace.here rlm@10: (:require '[clojure.string :as str])) rlm@10: rlm@10: Design notes for clojure.string: rlm@10: rlm@10: 1. Strings are objects (as opposed to sequences). As such, the rlm@10: string being manipulated is the first argument to a function; rlm@10: passing nil will result in a NullPointerException unless rlm@10: documented otherwise. If you want sequence-y behavior instead, rlm@10: use a sequence. rlm@10: rlm@10: 2. Functions are generally not lazy, and call straight to host rlm@10: methods where those are available and efficient. rlm@10: rlm@10: 3. Functions take advantage of String implementation details to rlm@10: write high-performing loop/recurs instead of using higher-order rlm@10: functions. (This is not idiomatic in general-purpose application rlm@10: code.) rlm@10: rlm@10: 4. When a function is documented to accept a string argument, it rlm@10: will take any implementation of the correct *interface* on the rlm@10: host platform. In Java, this is CharSequence, which is more rlm@10: general than String. In ordinary usage you will almost always rlm@10: pass concrete strings. If you are doing something unusual, rlm@10: e.g. passing a mutable implementation of CharSequence, then rlm@10: thead-safety is your responsibility." rlm@10: :author "Stuart Sierra, Stuart Halloway, David Liebke"} rlm@10: clojure.string rlm@10: (:refer-clojure :exclude (replace reverse)) rlm@10: (:import (java.util.regex Pattern) rlm@10: clojure.lang.LazilyPersistentVector)) rlm@10: rlm@10: (defn ^String reverse rlm@10: "Returns s with its characters reversed." rlm@10: {:added "1.2"} rlm@10: [^CharSequence s] rlm@10: (.toString (.reverse (StringBuilder. s)))) rlm@10: rlm@10: (defn- replace-by rlm@10: [^CharSequence s re f] rlm@10: (let [m (re-matcher re s)] rlm@10: (let [buffer (StringBuffer. (.length s))] rlm@10: (loop [] rlm@10: (if (.find m) rlm@10: (do (.appendReplacement m buffer (f (re-groups m))) rlm@10: (recur)) rlm@10: (do (.appendTail m buffer) rlm@10: (.toString buffer))))))) rlm@10: rlm@10: (defn ^String replace rlm@10: "Replaces all instance of match with replacement in s. rlm@10: rlm@10: match/replacement can be: rlm@10: rlm@10: string / string rlm@10: char / char rlm@10: pattern / (string or function of match). rlm@10: rlm@10: See also replace-first." rlm@10: {:added "1.2"} rlm@10: [^CharSequence s match replacement] rlm@10: (let [s (.toString s)] rlm@10: (cond rlm@10: (instance? Character match) (.replace s ^Character match ^Character replacement) rlm@10: (instance? CharSequence match) (.replace s ^CharSequence match ^CharSequence replacement) rlm@10: (instance? Pattern match) (if (instance? CharSequence replacement) rlm@10: (.replaceAll (re-matcher ^Pattern match s) rlm@10: (.toString ^CharSequence replacement)) rlm@10: (replace-by s match replacement)) rlm@10: :else (throw (IllegalArgumentException. (str "Invalid match arg: " match)))))) rlm@10: rlm@10: (defn- replace-first-by rlm@10: [^CharSequence s ^Pattern re f] rlm@10: (let [m (re-matcher re s)] rlm@10: (let [buffer (StringBuffer. (.length s))] rlm@10: (if (.find m) rlm@10: (let [rep (f (re-groups m))] rlm@10: (.appendReplacement m buffer rep) rlm@10: (.appendTail m buffer) rlm@10: (str buffer)))))) rlm@10: rlm@10: (defn- replace-first-char rlm@10: [^CharSequence s ^Character match replace] rlm@10: (let [s (.toString s) rlm@10: i (.indexOf s (int match))] rlm@10: (if (= -1 i) rlm@10: s rlm@10: (str (subs s 0 i) replace (subs s (inc i)))))) rlm@10: rlm@10: (defn ^String replace-first rlm@10: "Replaces the first instance of match with replacement in s. rlm@10: rlm@10: match/replacement can be: rlm@10: rlm@10: char / char rlm@10: string / string rlm@10: pattern / (string or function of match). rlm@10: rlm@10: See also replace-all." rlm@10: {:added "1.2"} rlm@10: [^CharSequence s match replacement] rlm@10: (let [s (.toString s)] rlm@10: (cond rlm@10: (instance? Character match) rlm@10: (replace-first-char s match replacement) rlm@10: (instance? CharSequence match) rlm@10: (.replaceFirst s (Pattern/quote (.toString ^CharSequence match)) rlm@10: (.toString ^CharSequence replacement)) rlm@10: (instance? Pattern match) rlm@10: (if (instance? CharSequence replacement) rlm@10: (.replaceFirst (re-matcher ^Pattern match s) rlm@10: (.toString ^CharSequence replacement)) rlm@10: (replace-first-by s match replacement)) rlm@10: :else (throw (IllegalArgumentException. (str "Invalid match arg: " match)))))) rlm@10: rlm@10: rlm@10: (defn ^String join rlm@10: "Returns a string of all elements in coll, separated by rlm@10: an optional separator. Like Perl's join." rlm@10: {:added "1.2"} rlm@10: ([coll] rlm@10: (apply str coll)) rlm@10: ([separator [x & more]] rlm@10: (loop [sb (StringBuilder. (str x)) rlm@10: more more rlm@10: sep (str separator)] rlm@10: (if more rlm@10: (recur (-> sb (.append sep) (.append (str (first more)))) rlm@10: (next more) rlm@10: sep) rlm@10: (str sb))))) rlm@10: rlm@10: (defn ^String capitalize rlm@10: "Converts first character of the string to upper-case, all other rlm@10: characters to lower-case." rlm@10: {:added "1.2"} rlm@10: [^CharSequence s] rlm@10: (let [s (.toString s)] rlm@10: (if (< (count s) 2) rlm@10: (.toUpperCase s) rlm@10: (str (.toUpperCase (subs s 0 1)) rlm@10: (.toLowerCase (subs s 1)))))) rlm@10: rlm@10: (defn ^String upper-case rlm@10: "Converts string to all upper-case." rlm@10: {:added "1.2"} rlm@10: [^CharSequence s] rlm@10: (.. s toString toUpperCase)) rlm@10: rlm@10: (defn ^String lower-case rlm@10: "Converts string to all lower-case." rlm@10: {:added "1.2"} rlm@10: [^CharSequence s] rlm@10: (.. s toString toLowerCase)) rlm@10: rlm@10: (defn split rlm@10: "Splits string on a regular expression. Optional argument limit is rlm@10: the maximum number of splits. Not lazy. Returns vector of the splits." rlm@10: {:added "1.2"} rlm@10: ([^CharSequence s ^Pattern re] rlm@10: (LazilyPersistentVector/createOwning (.split re s))) rlm@10: ([ ^CharSequence s ^Pattern re limit] rlm@10: (LazilyPersistentVector/createOwning (.split re s limit)))) rlm@10: rlm@10: (defn split-lines rlm@10: "Splits s on \\n or \\r\\n." rlm@10: {:added "1.2"} rlm@10: [^CharSequence s] rlm@10: (split s #"\r?\n")) rlm@10: rlm@10: (defn ^String trim rlm@10: "Removes whitespace from both ends of string." rlm@10: {:added "1.2"} rlm@10: [^CharSequence s] rlm@10: (.. s toString trim)) rlm@10: rlm@10: (defn ^String triml rlm@10: "Removes whitespace from the left side of string." rlm@10: {:added "1.2"} rlm@10: [^CharSequence s] rlm@10: (loop [index (int 0)] rlm@10: (if (= (.length s) index) rlm@10: "" rlm@10: (if (Character/isWhitespace (.charAt s index)) rlm@10: (recur (inc index)) rlm@10: (.. s (subSequence index (.length s)) toString))))) rlm@10: rlm@10: (defn ^String trimr rlm@10: "Removes whitespace from the right side of string." rlm@10: {:added "1.2"} rlm@10: [^CharSequence s] rlm@10: (loop [index (.length s)] rlm@10: (if (zero? index) rlm@10: "" rlm@10: (if (Character/isWhitespace (.charAt s (dec index))) rlm@10: (recur (dec index)) rlm@10: (.. s (subSequence 0 index) toString))))) rlm@10: rlm@10: (defn ^String trim-newline rlm@10: "Removes all trailing newline \\n or return \\r characters from rlm@10: string. Similar to Perl's chomp." rlm@10: {:added "1.2"} rlm@10: [^CharSequence s] rlm@10: (loop [index (.length s)] rlm@10: (if (zero? index) rlm@10: "" rlm@10: (let [ch (.charAt s (dec index))] rlm@10: (if (or (= ch \newline) (= ch \return)) rlm@10: (recur (dec index)) rlm@10: (.. s (subSequence 0 index) toString)))))) rlm@10: rlm@10: (defn blank? rlm@10: "True if s is nil, empty, or contains only whitespace." rlm@10: {:added "1.2"} rlm@10: [^CharSequence s] rlm@10: (if s rlm@10: (loop [index (int 0)] rlm@10: (if (= (.length s) index) rlm@10: true rlm@10: (if (Character/isWhitespace (.charAt s index)) rlm@10: (recur (inc index)) rlm@10: false))) rlm@10: true)) rlm@10: rlm@10: (defn ^String escape rlm@10: "Return a new string, using cmap to escape each character ch rlm@10: from s as follows: rlm@10: rlm@10: If (cmap ch) is nil, append ch to the new string. rlm@10: If (cmap ch) is non-nil, append (str (cmap ch)) instead." rlm@10: {:added "1.2"} rlm@10: [^CharSequence s cmap] rlm@10: (loop [index (int 0) rlm@10: buffer (StringBuilder. (.length s))] rlm@10: (if (= (.length s) index) rlm@10: (.toString buffer) rlm@10: (let [ch (.charAt s index)] rlm@10: (if-let [replacement (cmap ch)] rlm@10: (.append buffer replacement) rlm@10: (.append buffer ch)) rlm@10: (recur (inc index) buffer)))))