summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOri Bernstein <ori@eigenstate.org>2016-05-25 00:07:06 -0700
committerOri Bernstein <ori@eigenstate.org>2016-05-25 00:07:06 -0700
commit336413341a83ffd0294b16e2debe1ec5132fa19b (patch)
tree82993c3418c11c395035b5c2377d9b5f729e8f47
parenta849073c6d1c7174ba648155bcca935a00faab0f (diff)
downloadmc-336413341a83ffd0294b16e2debe1ec5132fa19b.tar.gz
Add more string split utility functions.
-rw-r--r--lib/std/strsplit.myr67
-rw-r--r--lib/std/test/strsplit.myr21
2 files changed, 77 insertions, 11 deletions
diff --git a/lib/std/strsplit.myr b/lib/std/strsplit.myr
index ba15615..60fd039 100644
--- a/lib/std/strsplit.myr
+++ b/lib/std/strsplit.myr
@@ -10,40 +10,78 @@ use "utf"
pkg std =
const strsplit : (s : byte[:], delim : byte[:] -> byte[:][:])
+ const bstrsplit : (sp : byte[:][:], s : byte[:], delim : byte[:] -> byte[:][:])
const strtok : (s : byte[:] -> byte[:][:])
+ const bstrtok : (sp : byte[:][:], s : byte[:] -> byte[:][:])
;;
extern const put : (fmt : byte[:], args : ... -> size)
const strsplit = {s, delim
- var last
var sp
sp = [][:]
+ -> dostrsplit(&sp, s, delim, true)
+}
+
+const bstrsplit = {sp, s, delim
+ -> dostrsplit(&sp, s, delim, false)
+}
+
+const dostrsplit : (sp : byte[:][:]#, s : byte[:], delim : byte[:], grow : bool -> byte[:][:]) = {sp : byte[:][:]#, s, delim, grow
+ var last
+ var idx
+
+ last = 0
+ idx = 0
+
if s.len == 0
- -> sp
+ -> sp#[:idx]
;;
- last = 0
+
while true
match strfind(s, delim)
| `Some i:
- slpush(&sp, s[:i])
+ if grow
+ slpush(sp, s[:i])
+ elif idx < sp#.len - 1
+ sp#[idx] = s[:i]
+ else
+ goto donesplit
+ ;;
s = s[i + delim.len:]
+ idx++
| `None:
goto donesplit
;;
;;
:donesplit
- slpush(&sp, s[:])
- -> sp
+ if grow
+ slpush(sp, s)
+ else
+ sp#[idx] = s
+ ;;
+ idx++
+ -> sp#[:idx]
}
const strtok = {s
- var i, j
var toks
+ toks = [][:]
+
+ -> dostrtok(&toks, s, true)
+}
+
+const bstrtok = {toks, s
+ -> dostrtok(&toks, s, false)
+}
+
+const dostrtok = {toks, s, grow
+ var i, j
+ var idx
i = 0
- toks = [][:]
+ idx = 0
while i != s.len
while isspace(std.decode(s[i:])) && i < s.len
i++
@@ -53,9 +91,18 @@ const strtok = {s
j++
;;
if i != j
- slpush(&toks, s[i:j])
+ if grow
+ slpush(toks, s[i:j])
+ elif idx < toks#.len - 1
+ toks#[idx] = s[i:j]
+ else
+ toks#[idx] = s[i:]
+ idx++
+ break
+ ;;
+ idx++
;;
i = j
;;
- -> toks
+ -> toks#[:idx]
}
diff --git a/lib/std/test/strsplit.myr b/lib/std/test/strsplit.myr
index 8c4a97c..e6dd7a8 100644
--- a/lib/std/test/strsplit.myr
+++ b/lib/std/test/strsplit.myr
@@ -1,13 +1,32 @@
use std
const main = {
+ var b : byte[:][6]
+
+ /* dynamic str split */
check(std.strsplit("", ","), [][:])
check(std.strsplit("a,b,c ,,d,", ","), \
["a", "b", "c ", "", "d", ""][:])
+ check(std.strsplit("a,b,c ,,d,", ","), \
+ ["a", "b", "c ", "", "d", ""][:])
+
+ /* buffered str split */
+ check(std.bstrsplit(b[:], "a,b", ","), \
+ ["a", "b"][:])
+ check(std.bstrsplit(b[:], "a,b,c ,,d,", ","), \
+ ["a", "b", "c ", "", "d", ""][:])
+ check(std.bstrsplit(b[:], "a,b,c,d,e,f,g,h", ","), \
+ ["a", "b", "c", "d", "e", "f,g,h",][:])
+
+ /* tokenizing */
check(std.strtok(""), [][:])
check(std.strtok(" "), [][:])
check(std.strtok("\t"), [][:])
check(std.strtok("a b c\td"), ["a", "b", "c", "d"][:])
+
+ /* buffered tokenizing */
+ check(std.bstrtok(b[:], "a b c\td"), ["a", "b", "c", "d"][:])
+ check(std.bstrtok(b[:2], "a b c\td"), ["a", "b c\td"][:])
}
const check = {a, b
@@ -17,7 +36,7 @@ const check = {a, b
;;
for var i = 0; i < a.len; i++
if !std.sleq(a[i], b[i])
- std.fatal("element {} mismatched: {} != {}\n", i, a[i], b[i])
+ std.fatal("element {} mismatched: '{}' != '{}'\n", i, a[i], b[i])
;;
;;
}