diff options
author | Ori Bernstein <ori@eigenstate.org> | 2016-05-23 14:17:36 -0700 |
---|---|---|
committer | Ori Bernstein <ori@eigenstate.org> | 2016-05-23 14:17:36 -0700 |
commit | 167228d8551ba8b4dc282489258ad2e50363a56f (patch) | |
tree | 27c8d6257bfc63bb9d6523d96ee282309259deba /lib/regex | |
parent | 31247e29fb596b44fdd5c18140bc945b6a4b71ce (diff) | |
download | mc-167228d8551ba8b4dc282489258ad2e50363a56f.tar.gz |
add substitution for full matches.
Diffstat (limited to 'lib/regex')
-rw-r--r-- | lib/regex/interp.myr | 49 | ||||
-rw-r--r-- | lib/regex/test/basic.myr | 9 | ||||
-rw-r--r-- | lib/regex/test/bld.sub | 8 | ||||
-rw-r--r-- | lib/regex/test/subst.myr | 52 | ||||
-rw-r--r-- | lib/regex/test/testmatch.myr | 55 |
5 files changed, 162 insertions, 11 deletions
diff --git a/lib/regex/interp.myr b/lib/regex/interp.myr index c52ed25..fbc685e 100644 --- a/lib/regex/interp.myr +++ b/lib/regex/interp.myr @@ -5,6 +5,8 @@ use "types" pkg regex = const exec : (re : regex#, str : byte[:] -> std.option(byte[:][:])) const search : (re : regex#, str : byte[:] -> std.option(byte[:][:])) + const sub : (re : regex#, str : byte[:], subst : byte[:][:] -> std.option(byte[:])) + const sbsub : (sb : std.strbuf#, re : regex#, str : byte[:], subst : byte[:][:] -> bool) const matchfree : (pat : byte[:][:] -> void) ;; @@ -42,6 +44,53 @@ const search = {re, str -> m } +const sub = {re, str, subst + var sb + + sb = std.mksb() + if !sbsub(sb, re, str, subst) + -> `std.None + else + -> `std.Some std.sbfin(sb) + ;; +} + +const sbsub = {sb, re, str, subst + var thr, m + + /* we always have m[0] as the full match */ + if re.nmatch != subst.len + 1 + -> false + ;; + + re.str = str + re.strp = 0 + thr = run(re, true) + if thr == Zthr + m = false + else + m = dosubst(sb, re, thr, subst) + ;; + cleanup(re) + -> m +} + +const dosubst = {sb, re, thr, subst + var off + + off = 0 + for var i = 1; i < re.nmatch; i++ + if thr.mstart[i] != -1 && thr.mend[i] != -1 + std.sbputs(sb, re.str[off:thr.mstart[i]]) + std.sbputs(sb, subst[i - 1]) + off = thr.mend[i] + ;; + ;; + std.sbputs(sb, re.str[off:]) + thrfree(re, thr) + -> true +} + const cleanup = {re var thr, next diff --git a/lib/regex/test/basic.myr b/lib/regex/test/basic.myr index 506b7c0..9506a90 100644 --- a/lib/regex/test/basic.myr +++ b/lib/regex/test/basic.myr @@ -19,27 +19,20 @@ const main = { "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", ][:], "") - std.put("hi\n") testmatch(".*bc", "Abc", `std.Some [][:]) - std.put("1\n") - dbgmatch("(a*)*", "a", `std.Some ["a"][:]) - std.put("2\n") + testmatch("(a*)*", "a", `std.Some ["a"][:]) testmatch("(aa|aab?)*", s, `std.Some ["aa"][:]) - std.put("3\n") /* greedy matches */ testmatch("(<.*>).*", "<a foo> blah <bar>", `std.Some [ "<a foo> blah <bar>", ][:]) - std.put("3\n") testmatch("(<.+>).*", "<a foo> blah <bar>", `std.Some [ "<a foo> blah <bar>", ][:]) - std.put("4\n") /* reluctant matches */ testmatch("(<.*?>).*", "<a foo> blah <bar>", `std.Some [ "<a foo>", ][:]) - std.put("5\n") testmatch("(<.+?>).*", "<a foo> blah <bar>", `std.Some [ "<a foo>", ][:]) diff --git a/lib/regex/test/bld.sub b/lib/regex/test/bld.sub index 4551085..be1a868 100644 --- a/lib/regex/test/bld.sub +++ b/lib/regex/test/bld.sub @@ -55,3 +55,11 @@ test search = lib @/lib/sys:sys lib @/lib/regex:regex ;; + +test subst = + subst.myr + testmatch.myr + lib @/lib/std:std + lib @/lib/sys:sys + lib @/lib/regex:regex +;; diff --git a/lib/regex/test/subst.myr b/lib/regex/test/subst.myr new file mode 100644 index 0000000..0235ad1 --- /dev/null +++ b/lib/regex/test/subst.myr @@ -0,0 +1,52 @@ +use std + +use "testmatch" + +const main = { + var s : byte[:] + var e : byte[:] + + s = std.strjoin([ + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + ][:], "") + e = std.strcat(s[:s.len - 2], "XYZ") + + testsub(".*bc", "Abc", [][:], `std.Some "Abc") + testsub("(a*)*", "a", ["b"][:], `std.Some "b") + testsub("(aa|aab?)*", s, ["XYZ"][:], `std.Some e) + + /* greedy matches */ + testsub("(<.*>).*", \ + "<a foo> blah <bar>", \ + ["some shite"][:], \ + `std.Some "some shite") + + /* reluctant matches */ + testsub("(<.*?>).*", \ + "<a foo> blah <bar>", \ + ["<some tag>"][:], \ + `std.Some "<some tag> blah <bar>") + + /* multiple subs */ + testsub("([a-z]*)[^a-z]*([a-z]*)foo", \ + "abc123foofoo", \ + ["XYZ", "ABC"][:], \ + `std.Some "XYZ123ABCfoo") + + /* failure */ + testsub("([a-z]*).*([a-z]*)foo", \ + "abc123foobar", \ + ["XYZ", "ABC"][:], \ + `std.None) +} diff --git a/lib/regex/test/testmatch.myr b/lib/regex/test/testmatch.myr index 2ee7605..a556557 100644 --- a/lib/regex/test/testmatch.myr +++ b/lib/regex/test/testmatch.myr @@ -2,9 +2,30 @@ use std use regex pkg = - const testmatch : (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void) - const testsearch : (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void) - const dbgmatch : (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void) + const testmatch : (\ + pat : byte[:], \ + text : byte[:], \ + expected : std.option(byte[:][:]) \ + -> void) + + const testsub : ( \ + pat : byte[:], \ + text : byte[:], \ + sub : byte[:][:], \ + expected : std.option(byte[:]) \ + -> void) + + const testsearch : ( \ + pat : byte[:], \ + text : byte[:], \ + expected : std.option(byte[:][:]) \ + -> void) + + const dbgmatch : ( \ + pat : byte[:], \ + text : byte[:], \ + expected : std.option(byte[:][:]) \ + -> void) ;; const testmatch = {pat, text, expected @@ -15,10 +36,38 @@ const testsearch = {pat, text, expected run(regex.compile(pat), pat, text, expected, true) } +const testsub = {pat, text, sub, expected + subst(regex.compile(pat), pat, text, sub, expected) +} + const dbgmatch = {pat, text, expected run(regex.dbgcompile(pat, true), pat, text, expected, false) } +const subst = {regex, pat, text, sub, expected + var re + + re = std.try(regex) + match regex.sub(re, text, sub) + | `std.Some res: + std.put("res: {}\n", res) + match expected + | `std.Some e: + if !std.sleq(res, e) + std.fatal("bad subst: expected {}, got {}\n", e, res) + ;; + | `std.None: + std.fatal("expected no subst, got {}", res) + ;; + | `std.None: + match expected + | `std.Some e: + std.fatal("got no sub, expected {}\n", e) + | `std.None: + ;; + ;; +} + const run = {regex, pat, text, expected, search var i, re, r |