rlm@10
|
1 ;;; str_utils.clj -- string utilities for Clojure
|
rlm@10
|
2
|
rlm@10
|
3 ;; by Stuart Sierra <mail@stuartsierra.com>
|
rlm@10
|
4 ;; April 8, 2008
|
rlm@10
|
5
|
rlm@10
|
6 ;; Copyright (c) Stuart Sierra, 2008. All rights reserved. The use
|
rlm@10
|
7 ;; and distribution terms for this software are covered by the Eclipse
|
rlm@10
|
8 ;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
|
rlm@10
|
9 ;; which can be found in the file epl-v10.html at the root of this
|
rlm@10
|
10 ;; distribution. By using this software in any fashion, you are
|
rlm@10
|
11 ;; agreeing to be bound by the terms of this license. You must not
|
rlm@10
|
12 ;; remove this notice, or any other, from this software.
|
rlm@10
|
13
|
rlm@10
|
14 ;; DEPRECATED in 1.2: Promoted to clojure.java.string. Note that
|
rlm@10
|
15 ;; many function names and semantics have changed
|
rlm@10
|
16
|
rlm@10
|
17 (ns
|
rlm@10
|
18 ^{:author "Stuart Sierra",
|
rlm@10
|
19 :deprecated "1.2"
|
rlm@10
|
20 :doc "String utilities for Clojure"}
|
rlm@10
|
21 clojure.contrib.str-utils
|
rlm@10
|
22 (:import (java.util.regex Pattern)))
|
rlm@10
|
23
|
rlm@10
|
24 (defn re-split
|
rlm@10
|
25 "Splits the string on instances of 'pattern'. Returns a sequence of
|
rlm@10
|
26 strings. Optional 'limit' argument is the maximum number of
|
rlm@10
|
27 splits. Like Perl's 'split'."
|
rlm@10
|
28 ([^Pattern pattern string] (seq (. pattern (split string))))
|
rlm@10
|
29 ([^Pattern pattern string limit] (seq (. pattern (split string limit)))))
|
rlm@10
|
30
|
rlm@10
|
31 (defn re-partition
|
rlm@10
|
32 "Splits the string into a lazy sequence of substrings, alternating
|
rlm@10
|
33 between substrings that match the patthern and the substrings
|
rlm@10
|
34 between the matches. The sequence always starts with the substring
|
rlm@10
|
35 before the first match, or an empty string if the beginning of the
|
rlm@10
|
36 string matches.
|
rlm@10
|
37
|
rlm@10
|
38 For example: (re-partition #\"[a-z]+\" \"abc123def\")
|
rlm@10
|
39
|
rlm@10
|
40 Returns: (\"\" \"abc\" \"123\" \"def\")"
|
rlm@10
|
41 [^Pattern re string]
|
rlm@10
|
42 (let [m (re-matcher re string)]
|
rlm@10
|
43 ((fn step [prevend]
|
rlm@10
|
44 (lazy-seq
|
rlm@10
|
45 (if (.find m)
|
rlm@10
|
46 (cons (.subSequence string prevend (.start m))
|
rlm@10
|
47 (cons (re-groups m)
|
rlm@10
|
48 (step (+ (.start m) (count (.group m))))))
|
rlm@10
|
49 (when (< prevend (.length string))
|
rlm@10
|
50 (list (.subSequence string prevend (.length string)))))))
|
rlm@10
|
51 0)))
|
rlm@10
|
52
|
rlm@10
|
53 (defn re-gsub
|
rlm@10
|
54 "Replaces all instances of 'pattern' in 'string' with
|
rlm@10
|
55 'replacement'. Like Ruby's 'String#gsub'.
|
rlm@10
|
56
|
rlm@10
|
57 If (ifn? replacment) is true, the replacement is called with the
|
rlm@10
|
58 match.
|
rlm@10
|
59 "
|
rlm@10
|
60 [^java.util.regex.Pattern regex replacement ^String string]
|
rlm@10
|
61 (if (ifn? replacement)
|
rlm@10
|
62 (let [parts (vec (re-partition regex string))]
|
rlm@10
|
63 (apply str
|
rlm@10
|
64 (reduce (fn [parts match-idx]
|
rlm@10
|
65 (update-in parts [match-idx] replacement))
|
rlm@10
|
66 parts (range 1 (count parts) 2))))
|
rlm@10
|
67 (.. regex (matcher string) (replaceAll replacement))))
|
rlm@10
|
68
|
rlm@10
|
69 (defn re-sub
|
rlm@10
|
70 "Replaces the first instance of 'pattern' in 'string' with
|
rlm@10
|
71 'replacement'. Like Ruby's 'String#sub'.
|
rlm@10
|
72
|
rlm@10
|
73 If (ifn? replacement) is true, the replacement is called with
|
rlm@10
|
74 the match.
|
rlm@10
|
75 "
|
rlm@10
|
76 [^Pattern regex replacement ^String string]
|
rlm@10
|
77 (if (ifn? replacement)
|
rlm@10
|
78 (let [m (re-matcher regex string)]
|
rlm@10
|
79 (if (.find m)
|
rlm@10
|
80 (str (.subSequence string 0 (.start m))
|
rlm@10
|
81 (replacement (re-groups m))
|
rlm@10
|
82 (.subSequence string (.end m) (.length string)))
|
rlm@10
|
83 string))
|
rlm@10
|
84 (.. regex (matcher string) (replaceFirst replacement))))
|
rlm@10
|
85
|
rlm@10
|
86
|
rlm@10
|
87 (defn str-join
|
rlm@10
|
88 "Returns a string of all elements in 'sequence', separated by
|
rlm@10
|
89 'separator'. Like Perl's 'join'."
|
rlm@10
|
90 [separator sequence]
|
rlm@10
|
91 (apply str (interpose separator sequence)))
|
rlm@10
|
92
|
rlm@10
|
93
|
rlm@10
|
94 (defn chop
|
rlm@10
|
95 "Removes the last character of string."
|
rlm@10
|
96 [s]
|
rlm@10
|
97 (subs s 0 (dec (count s))))
|
rlm@10
|
98
|
rlm@10
|
99 (defn chomp
|
rlm@10
|
100 "Removes all trailing newline \\n or return \\r characters from
|
rlm@10
|
101 string. Note: String.trim() is similar and faster."
|
rlm@10
|
102 [s]
|
rlm@10
|
103 (re-sub #"[\r\n]+$" "" s))
|