annotate src/clojure/contrib/str_utils.clj @ 10:ef7dbbd6452c

added clojure source goodness
author Robert McIntyre <rlm@mit.edu>
date Sat, 21 Aug 2010 06:25:44 -0400
parents
children
rev   line source
rlm@10 1 ;;; str_utils.clj -- string utilities for Clojure
rlm@10 2
rlm@10 3 ;; by Stuart Sierra <mail@stuartsierra.com>
rlm@10 4 ;; April 8, 2008
rlm@10 5
rlm@10 6 ;; Copyright (c) Stuart Sierra, 2008. All rights reserved. The use
rlm@10 7 ;; and distribution terms for this software are covered by the Eclipse
rlm@10 8 ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
rlm@10 9 ;; which can be found in the file epl-v10.html at the root of this
rlm@10 10 ;; distribution. By using this software in any fashion, you are
rlm@10 11 ;; agreeing to be bound by the terms of this license. You must not
rlm@10 12 ;; remove this notice, or any other, from this software.
rlm@10 13
rlm@10 14 ;; DEPRECATED in 1.2: Promoted to clojure.java.string. Note that
rlm@10 15 ;; many function names and semantics have changed
rlm@10 16
rlm@10 17 (ns
rlm@10 18 ^{:author "Stuart Sierra",
rlm@10 19 :deprecated "1.2"
rlm@10 20 :doc "String utilities for Clojure"}
rlm@10 21 clojure.contrib.str-utils
rlm@10 22 (:import (java.util.regex Pattern)))
rlm@10 23
rlm@10 24 (defn re-split
rlm@10 25 "Splits the string on instances of 'pattern'. Returns a sequence of
rlm@10 26 strings. Optional 'limit' argument is the maximum number of
rlm@10 27 splits. Like Perl's 'split'."
rlm@10 28 ([^Pattern pattern string] (seq (. pattern (split string))))
rlm@10 29 ([^Pattern pattern string limit] (seq (. pattern (split string limit)))))
rlm@10 30
rlm@10 31 (defn re-partition
rlm@10 32 "Splits the string into a lazy sequence of substrings, alternating
rlm@10 33 between substrings that match the patthern and the substrings
rlm@10 34 between the matches. The sequence always starts with the substring
rlm@10 35 before the first match, or an empty string if the beginning of the
rlm@10 36 string matches.
rlm@10 37
rlm@10 38 For example: (re-partition #\"[a-z]+\" \"abc123def\")
rlm@10 39
rlm@10 40 Returns: (\"\" \"abc\" \"123\" \"def\")"
rlm@10 41 [^Pattern re string]
rlm@10 42 (let [m (re-matcher re string)]
rlm@10 43 ((fn step [prevend]
rlm@10 44 (lazy-seq
rlm@10 45 (if (.find m)
rlm@10 46 (cons (.subSequence string prevend (.start m))
rlm@10 47 (cons (re-groups m)
rlm@10 48 (step (+ (.start m) (count (.group m))))))
rlm@10 49 (when (< prevend (.length string))
rlm@10 50 (list (.subSequence string prevend (.length string)))))))
rlm@10 51 0)))
rlm@10 52
rlm@10 53 (defn re-gsub
rlm@10 54 "Replaces all instances of 'pattern' in 'string' with
rlm@10 55 'replacement'. Like Ruby's 'String#gsub'.
rlm@10 56
rlm@10 57 If (ifn? replacment) is true, the replacement is called with the
rlm@10 58 match.
rlm@10 59 "
rlm@10 60 [^java.util.regex.Pattern regex replacement ^String string]
rlm@10 61 (if (ifn? replacement)
rlm@10 62 (let [parts (vec (re-partition regex string))]
rlm@10 63 (apply str
rlm@10 64 (reduce (fn [parts match-idx]
rlm@10 65 (update-in parts [match-idx] replacement))
rlm@10 66 parts (range 1 (count parts) 2))))
rlm@10 67 (.. regex (matcher string) (replaceAll replacement))))
rlm@10 68
rlm@10 69 (defn re-sub
rlm@10 70 "Replaces the first instance of 'pattern' in 'string' with
rlm@10 71 'replacement'. Like Ruby's 'String#sub'.
rlm@10 72
rlm@10 73 If (ifn? replacement) is true, the replacement is called with
rlm@10 74 the match.
rlm@10 75 "
rlm@10 76 [^Pattern regex replacement ^String string]
rlm@10 77 (if (ifn? replacement)
rlm@10 78 (let [m (re-matcher regex string)]
rlm@10 79 (if (.find m)
rlm@10 80 (str (.subSequence string 0 (.start m))
rlm@10 81 (replacement (re-groups m))
rlm@10 82 (.subSequence string (.end m) (.length string)))
rlm@10 83 string))
rlm@10 84 (.. regex (matcher string) (replaceFirst replacement))))
rlm@10 85
rlm@10 86
rlm@10 87 (defn str-join
rlm@10 88 "Returns a string of all elements in 'sequence', separated by
rlm@10 89 'separator'. Like Perl's 'join'."
rlm@10 90 [separator sequence]
rlm@10 91 (apply str (interpose separator sequence)))
rlm@10 92
rlm@10 93
rlm@10 94 (defn chop
rlm@10 95 "Removes the last character of string."
rlm@10 96 [s]
rlm@10 97 (subs s 0 (dec (count s))))
rlm@10 98
rlm@10 99 (defn chomp
rlm@10 100 "Removes all trailing newline \\n or return \\r characters from
rlm@10 101 string. Note: String.trim() is similar and faster."
rlm@10 102 [s]
rlm@10 103 (re-sub #"[\r\n]+$" "" s))