diff options
author | Ori Bernstein <ori@eigenstate.org> | 2016-05-11 16:03:33 -0700 |
---|---|---|
committer | Ori Bernstein <ori@eigenstate.org> | 2016-05-11 16:13:25 -0700 |
commit | 107e78e61117bdadc667fb5bdece373b90a26015 (patch) | |
tree | a5f0aa5c3d920c06a32f249392e8045eb6fdfdaa /lib/regex | |
parent | 709e7754dcec96e08464e52bb4c1551e427b5c06 (diff) | |
download | mc-107e78e61117bdadc667fb5bdece373b90a26015.tar.gz |
Implement regex.search()
Diffstat (limited to 'lib/regex')
-rw-r--r-- | lib/regex/interp.myr | 49 | ||||
-rw-r--r-- | lib/regex/test/bld.sub | 8 | ||||
-rw-r--r-- | lib/regex/test/search.myr | 27 | ||||
-rw-r--r-- | lib/regex/test/testmatch.myr | 22 |
4 files changed, 92 insertions, 14 deletions
diff --git a/lib/regex/interp.myr b/lib/regex/interp.myr index 8000ac8..4458370 100644 --- a/lib/regex/interp.myr +++ b/lib/regex/interp.myr @@ -4,6 +4,7 @@ use "types" pkg regex = const exec : (re : regex#, str : byte[:] -> std.option(byte[:][:])) + const search : (re : regex#, str : byte[:] -> std.option(byte[:][:])) const matchfree : (pat : byte[:][:] -> void) /* FIXME: implement. This should scan for a possible start char in the @@ -21,7 +22,7 @@ const exec = {re, str re.str = str re.strp = 0 - thr = run(re) + thr = run(re, true) if thr != Zthr m = getmatches(re, thr) thrfree(re, thr) @@ -33,6 +34,26 @@ const exec = {re, str ;; } +const search = {re, str + var thr + var m + + for var i = 0; i < str.len; i++ + re.str = str[i:] + re.strp = 0 + thr = run(re, false) + if thr != Zthr + m = getmatches(re, thr) + thrfree(re, thr) + cleanup(re) + -> `std.Some m + else + cleanup(re) + ;; + ;; + -> `std.None +} + const cleanup = {re var thr, next @@ -66,12 +87,14 @@ const getmatches = {re, thr /* returns a matching thread, or Zthr if no threads matched */ -const run = {re - var ip +const run = {re, wholestr + var bestmatch var consumed - var thr var states + var thr + var ip + bestmatch = Zthr states = std.mkbs() re.runq = mkthread(re, 0) re.runq.mstart = std.slalloc(re.nmatch) @@ -99,9 +122,18 @@ const run = {re if thr.dead thrfree(re, thr) - elif thr.matched && re.strp == re.str.len - std.bsfree(states) - -> thr + elif thr.matched + trace(re, thr, "new bestmatch\n") + if bestmatch != Zthr + thrfree(re, bestmatch) + ;; + + if re.strp == re.str.len + bestmatch = thr + goto done + elif !wholestr + bestmatch = thr + ;; elif !thr.matched std.bsput(states, thr.ip) if re.expired == Zthr @@ -122,8 +154,9 @@ const run = {re re.expiredtail = Zthr re.strp++ ;; +:done std.bsfree(states) - -> Zthr + -> bestmatch } /* diff --git a/lib/regex/test/bld.sub b/lib/regex/test/bld.sub index 8307b99..4551085 100644 --- a/lib/regex/test/bld.sub +++ b/lib/regex/test/bld.sub @@ -47,3 +47,11 @@ test unicode = lib @/lib/sys:sys lib @/lib/regex:regex ;; + +test search = + search.myr + testmatch.myr + lib @/lib/std:std + lib @/lib/sys:sys + lib @/lib/regex:regex +;; diff --git a/lib/regex/test/search.myr b/lib/regex/test/search.myr new file mode 100644 index 0000000..33790de --- /dev/null +++ b/lib/regex/test/search.myr @@ -0,0 +1,27 @@ +use std + +use "testmatch" + +const main = { + var s : byte[:] + + s = std.strjoin([ + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + ][:], "") + testsearch("bc", "Abcd", `std.Some [][:]) + testsearch("Abc", "Abc", `std.Some [][:]) + testsearch("(bc)", "Abc", `std.Some ["bc"][:]) + testsearch("(bc.*)", "Abcde", `std.Some ["bcde"][:]) + testsearch("(b.*c)", "ABbasdfcrap", `std.Some ["basdfc"][:]) +} diff --git a/lib/regex/test/testmatch.myr b/lib/regex/test/testmatch.myr index 4600410..5730fc1 100644 --- a/lib/regex/test/testmatch.myr +++ b/lib/regex/test/testmatch.myr @@ -3,22 +3,32 @@ use regex pkg = const testmatch : (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void) + const testsearch : (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void) const dbgmatch : (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void) ;; const testmatch = {pat, text, expected - run(regex.compile(pat), pat, text, expected) + run(regex.compile(pat), pat, text, expected, false) +} + +const testsearch = {pat, text, expected + run(regex.compile(pat), pat, text, expected, true) } const dbgmatch = {pat, text, expected - run(regex.dbgcompile(pat), pat, text, expected) + run(regex.dbgcompile(pat), pat, text, expected, false) } -const run = {regex, pat, text, expected - var i, re +const run = {regex, pat, text, expected, search + var i, re, r re = std.try(regex) - match regex.exec(re, text) + if search + r = regex.search(re, text) + else + r = regex.exec(re, text) + ;; + match r | `std.Some res: match expected | `std.None: @@ -27,7 +37,7 @@ const run = {regex, pat, text, expected std.put("\t{}: {}\n", i, res[i]) ;; | `std.Some exp: - if !std.sleq(res[0], text) + if !search && !std.sleq(res[0], text) std.put("whole match does not match text!\n") std.fatal("failed matching {} over {}\n", pat, text) ;; |