Split a string into words separated by whitespace or other
delimiters. See the function
split in Python or
Perl.
(string-split " abc d e f ") ==> ("abc" "d" "e" "f")
(string-split " abc d e f " '() 1) ==> ("abc d e f ")
(string-split " abc d e f " '() 0) ==> ()
(string-split ":" '(#\:)) ==> ("" "")
(string-split ":abc:d:e::f:" '(#\:))
==> ("" "abc" "d" "e" "" "f" "")
(string-split "root:x:0:0:Lord" '(#\:) 2)
==> ("root" "x:0:0:Lord")
(string-split "/usr/local/bin:/usr/bin:/usr/ucb/bin" '(#\:))
==> ("/usr/local/bin" "/usr/bin" "/usr/ucb/bin")
(string-split "/usr/local/bin" '(#\/))
==> ("" "usr" "local" "bin")
string-split STRING -> STRINGS
string-split STRING '() -> STRINGS
string-split STRING '() MAXSPLIT -> STRINGS
These procedures return a list of whitespace-delimited words in
STRING. Leading and trailing whitespaces of the words are trimmed. If
STRING is empty or contains only whitespace, the empty list is returned.
string-split STRING CHARSET -> STRINGS
string-split STRING CHARSET MAXSPLIT -> STRINGS
These procedures return a list of words in
STRING delimited by the
characters in
CHARSET. The latter is a list of characters to be treated as delimiters. Leading or trailing delimiters of the words are not trimmed. That is, the resulting list will have as many initial empty string elements as there are leading delimiters in
STRING.
If
MAXSPLIT is specified and positive, the resulting list will contain at most
MAXSPLIT elements, the last of which is the string remaining after
(MAXSPLIT - 1) splits. If
MAXSPLIT is specified and non-positive, the empty list is returned. "In time critical applications it behooves you not to plit into more fields than you really need."
(define (string-split str . rest)
(define (split-by-whitespace str maxsplit)
(define (skip-ws i yet-to-split-count)
(cond
((>= i (string-length str)) '())
((char-whitespace? (string-ref str i))
(skip-ws (inc i) yet-to-split-count))
(else (scan-beg-word (inc i) i yet-to-split-count))))
(define (scan-beg-word i from yet-to-split-count)
(cond
((zero? yet-to-split-count)
(cons (substring str from (string-length str)) '()))
(else (scan-word i from yet-to-split-count))))
(define (scan-word i from yet-to-split-count)
(cond
((>= i (string-length str))
(cons (substring str from i) '()))
((char-whitespace? (string-ref str i))
(cons (substring str from i)
(skip-ws (inc i) (- yet-to-split-count 1))))
(else (scan-word (inc i) from yet-to-split-count))))
(skip-ws 0 (- maxsplit 1)))
(define (split-by-charset str delimeters maxsplit)
(define (scan-beg-word from yet-to-split-count)
(cond
((>= from (string-length str)) '(""))
((zero? yet-to-split-count)
(cons (substring str from (string-length str)) '()))
(else (scan-word from from yet-to-split-count))))
(define (scan-word i from yet-to-split-count)
(cond
((>= i (string-length str))
(cons (substring str from i) '()))
((memq (string-ref str i) delimeters)
(cons (substring str from i)
(scan-beg-word (inc i) (- yet-to-split-count 1))))
(else (scan-word (inc i) from yet-to-split-count))))
(scan-beg-word 0 (- maxsplit 1)))
(if (string-null? str) '()
(if (null? rest)
(split-by-whitespace str (inc (string-length str)))
(let ((charset (car rest))
(maxsplit
(if (pair? (cdr rest)) (cadr rest) (inc (string-length str)))))
(cond
((not (positive? maxsplit)) '())
((null? charset) (split-by-whitespace str maxsplit))
(else (split-by-charset str charset maxsplit))))))
)
Here
inc is a macro or a function that returns the incremented argument. On many Scheme systems, it can be implemented more efficiently than merely
(+ 1 x) if we assume that
x is a fixnum.
http://pobox.com/~oleg/ftp/Scheme/util.html#string-split
Replace memq by memv if necessary. It's probably necessary if (eq? #\A #\A) ==> #f on your Scheme system. --
JohnRussell - 02 Mar 2005
If you only need to split on 1 character and you never need to split portions of the string, here's a simpler and potentially more efficient function:
(define (str-split str ch)
(let ((len (string-length str)))
(letrec
((split
(lambda (a b)
(cond
((>= b len) (if (= a b) '() (cons (substring str a b) '())))
((char=? ch (string-ref str b)) (if (= a b)
(split (+ 1 a) (+ 1 b))
(cons (substring str a b) (split b b))))
(else (split a (+ 1 b)))))))
(split 0 0))))
--
DougHoyte - 20 Apr 2006
The version above by
DougHoyte doesn't follow the original spec. entirely. Namely it eats the delimiters:
> (str-split " a b c" #\space)
("a" "b" "c")
This version doesn't eat them:
(define (str-split-1 str ch)
(let ((len (string-length str)))
(letrec
((split
(lambda (a b)
(cond
((>= b len) (if (= a b) '("") (cons (substring str a b) '())))
((char=? ch (string-ref str b)) (if (= a b)
(cons "" (split (+ 1 a) (+ 1 b)))
(cons (substring str a b) (split (+ 1 b) (+ 1 b)))))
(else (split a (+ 1 b)))))))
(split 0 0))))
Example:
> (str-split-1 " a b c" #\space)
("" "a" "" "b" "c")
--
NoelWelsh - 19 May 2006
To understand the (delimiter-eating) version above by
DougHoyte - 20 Apr 2006 I added comments etc:
(define str-split
(lambda (whole-str delimiter)
(define str-len (string-length whole-str))
(define split
(lambda (seg-start seg-end)
(cond
[(>= seg-end str-len)
(if (= seg-start seg-end)
'()
(list (substring whole-str seg-start
seg-end)))]
[(char=? delimiter
(string-ref whole-str seg-end))
(if (= seg-start seg-end)
(split (+ 1 seg-start) (+ 1 seg-end))
(cons (substring whole-str seg-start seg-end)
(split seg-end seg-end)))]
[else (split seg-start (+ 1 seg-end))]))) (split 0 0)))
> (str-split " abc d e f " #\space)
("abc" "d" "e" "f")
--
GeorgeHerson - 18 Mar 2007
--
OlegK - 14 Sep 2004