summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOri Bernstein <ori@eigenstate.org>2016-05-23 14:17:36 -0700
committerOri Bernstein <ori@eigenstate.org>2016-05-23 14:17:36 -0700
commit167228d8551ba8b4dc282489258ad2e50363a56f (patch)
tree27c8d6257bfc63bb9d6523d96ee282309259deba
parent31247e29fb596b44fdd5c18140bc945b6a4b71ce (diff)
downloadmc-167228d8551ba8b4dc282489258ad2e50363a56f.tar.gz
add substitution for full matches.
-rw-r--r--lib/regex/interp.myr49
-rw-r--r--lib/regex/test/basic.myr9
-rw-r--r--lib/regex/test/bld.sub8
-rw-r--r--lib/regex/test/subst.myr52
-rw-r--r--lib/regex/test/testmatch.myr55
5 files changed, 162 insertions, 11 deletions
diff --git a/lib/regex/interp.myr b/lib/regex/interp.myr
index c52ed25..fbc685e 100644
--- a/lib/regex/interp.myr
+++ b/lib/regex/interp.myr
@@ -5,6 +5,8 @@ use "types"
pkg regex =
const exec : (re : regex#, str : byte[:] -> std.option(byte[:][:]))
const search : (re : regex#, str : byte[:] -> std.option(byte[:][:]))
+ const sub : (re : regex#, str : byte[:], subst : byte[:][:] -> std.option(byte[:]))
+ const sbsub : (sb : std.strbuf#, re : regex#, str : byte[:], subst : byte[:][:] -> bool)
const matchfree : (pat : byte[:][:] -> void)
;;
@@ -42,6 +44,53 @@ const search = {re, str
-> m
}
+const sub = {re, str, subst
+ var sb
+
+ sb = std.mksb()
+ if !sbsub(sb, re, str, subst)
+ -> `std.None
+ else
+ -> `std.Some std.sbfin(sb)
+ ;;
+}
+
+const sbsub = {sb, re, str, subst
+ var thr, m
+
+ /* we always have m[0] as the full match */
+ if re.nmatch != subst.len + 1
+ -> false
+ ;;
+
+ re.str = str
+ re.strp = 0
+ thr = run(re, true)
+ if thr == Zthr
+ m = false
+ else
+ m = dosubst(sb, re, thr, subst)
+ ;;
+ cleanup(re)
+ -> m
+}
+
+const dosubst = {sb, re, thr, subst
+ var off
+
+ off = 0
+ for var i = 1; i < re.nmatch; i++
+ if thr.mstart[i] != -1 && thr.mend[i] != -1
+ std.sbputs(sb, re.str[off:thr.mstart[i]])
+ std.sbputs(sb, subst[i - 1])
+ off = thr.mend[i]
+ ;;
+ ;;
+ std.sbputs(sb, re.str[off:])
+ thrfree(re, thr)
+ -> true
+}
+
const cleanup = {re
var thr, next
diff --git a/lib/regex/test/basic.myr b/lib/regex/test/basic.myr
index 506b7c0..9506a90 100644
--- a/lib/regex/test/basic.myr
+++ b/lib/regex/test/basic.myr
@@ -19,27 +19,20 @@ const main = {
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
][:], "")
- std.put("hi\n")
testmatch(".*bc", "Abc", `std.Some [][:])
- std.put("1\n")
- dbgmatch("(a*)*", "a", `std.Some ["a"][:])
- std.put("2\n")
+ testmatch("(a*)*", "a", `std.Some ["a"][:])
testmatch("(aa|aab?)*", s, `std.Some ["aa"][:])
- std.put("3\n")
/* greedy matches */
testmatch("(<.*>).*", "<a foo> blah <bar>", `std.Some [
"<a foo> blah <bar>",
][:])
- std.put("3\n")
testmatch("(<.+>).*", "<a foo> blah <bar>", `std.Some [
"<a foo> blah <bar>",
][:])
- std.put("4\n")
/* reluctant matches */
testmatch("(<.*?>).*", "<a foo> blah <bar>", `std.Some [
"<a foo>",
][:])
- std.put("5\n")
testmatch("(<.+?>).*", "<a foo> blah <bar>", `std.Some [
"<a foo>",
][:])
diff --git a/lib/regex/test/bld.sub b/lib/regex/test/bld.sub
index 4551085..be1a868 100644
--- a/lib/regex/test/bld.sub
+++ b/lib/regex/test/bld.sub
@@ -55,3 +55,11 @@ test search =
lib @/lib/sys:sys
lib @/lib/regex:regex
;;
+
+test subst =
+ subst.myr
+ testmatch.myr
+ lib @/lib/std:std
+ lib @/lib/sys:sys
+ lib @/lib/regex:regex
+;;
diff --git a/lib/regex/test/subst.myr b/lib/regex/test/subst.myr
new file mode 100644
index 0000000..0235ad1
--- /dev/null
+++ b/lib/regex/test/subst.myr
@@ -0,0 +1,52 @@
+use std
+
+use "testmatch"
+
+const main = {
+ var s : byte[:]
+ var e : byte[:]
+
+ s = std.strjoin([
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ ][:], "")
+ e = std.strcat(s[:s.len - 2], "XYZ")
+
+ testsub(".*bc", "Abc", [][:], `std.Some "Abc")
+ testsub("(a*)*", "a", ["b"][:], `std.Some "b")
+ testsub("(aa|aab?)*", s, ["XYZ"][:], `std.Some e)
+
+ /* greedy matches */
+ testsub("(<.*>).*", \
+ "<a foo> blah <bar>", \
+ ["some shite"][:], \
+ `std.Some "some shite")
+
+ /* reluctant matches */
+ testsub("(<.*?>).*", \
+ "<a foo> blah <bar>", \
+ ["<some tag>"][:], \
+ `std.Some "<some tag> blah <bar>")
+
+ /* multiple subs */
+ testsub("([a-z]*)[^a-z]*([a-z]*)foo", \
+ "abc123foofoo", \
+ ["XYZ", "ABC"][:], \
+ `std.Some "XYZ123ABCfoo")
+
+ /* failure */
+ testsub("([a-z]*).*([a-z]*)foo", \
+ "abc123foobar", \
+ ["XYZ", "ABC"][:], \
+ `std.None)
+}
diff --git a/lib/regex/test/testmatch.myr b/lib/regex/test/testmatch.myr
index 2ee7605..a556557 100644
--- a/lib/regex/test/testmatch.myr
+++ b/lib/regex/test/testmatch.myr
@@ -2,9 +2,30 @@ use std
use regex
pkg =
- const testmatch : (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void)
- const testsearch : (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void)
- const dbgmatch : (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void)
+ const testmatch : (\
+ pat : byte[:], \
+ text : byte[:], \
+ expected : std.option(byte[:][:]) \
+ -> void)
+
+ const testsub : ( \
+ pat : byte[:], \
+ text : byte[:], \
+ sub : byte[:][:], \
+ expected : std.option(byte[:]) \
+ -> void)
+
+ const testsearch : ( \
+ pat : byte[:], \
+ text : byte[:], \
+ expected : std.option(byte[:][:]) \
+ -> void)
+
+ const dbgmatch : ( \
+ pat : byte[:], \
+ text : byte[:], \
+ expected : std.option(byte[:][:]) \
+ -> void)
;;
const testmatch = {pat, text, expected
@@ -15,10 +36,38 @@ const testsearch = {pat, text, expected
run(regex.compile(pat), pat, text, expected, true)
}
+const testsub = {pat, text, sub, expected
+ subst(regex.compile(pat), pat, text, sub, expected)
+}
+
const dbgmatch = {pat, text, expected
run(regex.dbgcompile(pat, true), pat, text, expected, false)
}
+const subst = {regex, pat, text, sub, expected
+ var re
+
+ re = std.try(regex)
+ match regex.sub(re, text, sub)
+ | `std.Some res:
+ std.put("res: {}\n", res)
+ match expected
+ | `std.Some e:
+ if !std.sleq(res, e)
+ std.fatal("bad subst: expected {}, got {}\n", e, res)
+ ;;
+ | `std.None:
+ std.fatal("expected no subst, got {}", res)
+ ;;
+ | `std.None:
+ match expected
+ | `std.Some e:
+ std.fatal("got no sub, expected {}\n", e)
+ | `std.None:
+ ;;
+ ;;
+}
+
const run = {regex, pat, text, expected, search
var i, re, r