summaryrefslogtreecommitdiff
path: root/lib/regex
diff options
context:
space:
mode:
authorOri Bernstein <ori@eigenstate.org>2016-05-11 16:03:33 -0700
committerOri Bernstein <ori@eigenstate.org>2016-05-11 16:13:25 -0700
commit107e78e61117bdadc667fb5bdece373b90a26015 (patch)
treea5f0aa5c3d920c06a32f249392e8045eb6fdfdaa /lib/regex
parent709e7754dcec96e08464e52bb4c1551e427b5c06 (diff)
downloadmc-107e78e61117bdadc667fb5bdece373b90a26015.tar.gz
Implement regex.search()
Diffstat (limited to 'lib/regex')
-rw-r--r--lib/regex/interp.myr49
-rw-r--r--lib/regex/test/bld.sub8
-rw-r--r--lib/regex/test/search.myr27
-rw-r--r--lib/regex/test/testmatch.myr22
4 files changed, 92 insertions, 14 deletions
diff --git a/lib/regex/interp.myr b/lib/regex/interp.myr
index 8000ac8..4458370 100644
--- a/lib/regex/interp.myr
+++ b/lib/regex/interp.myr
@@ -4,6 +4,7 @@ use "types"
pkg regex =
const exec : (re : regex#, str : byte[:] -> std.option(byte[:][:]))
+ const search : (re : regex#, str : byte[:] -> std.option(byte[:][:]))
const matchfree : (pat : byte[:][:] -> void)
/*
FIXME: implement. This should scan for a possible start char in the
@@ -21,7 +22,7 @@ const exec = {re, str
re.str = str
re.strp = 0
- thr = run(re)
+ thr = run(re, true)
if thr != Zthr
m = getmatches(re, thr)
thrfree(re, thr)
@@ -33,6 +34,26 @@ const exec = {re, str
;;
}
+const search = {re, str
+ var thr
+ var m
+
+ for var i = 0; i < str.len; i++
+ re.str = str[i:]
+ re.strp = 0
+ thr = run(re, false)
+ if thr != Zthr
+ m = getmatches(re, thr)
+ thrfree(re, thr)
+ cleanup(re)
+ -> `std.Some m
+ else
+ cleanup(re)
+ ;;
+ ;;
+ -> `std.None
+}
+
const cleanup = {re
var thr, next
@@ -66,12 +87,14 @@ const getmatches = {re, thr
/* returns a matching thread, or Zthr if no threads matched */
-const run = {re
- var ip
+const run = {re, wholestr
+ var bestmatch
var consumed
- var thr
var states
+ var thr
+ var ip
+ bestmatch = Zthr
states = std.mkbs()
re.runq = mkthread(re, 0)
re.runq.mstart = std.slalloc(re.nmatch)
@@ -99,9 +122,18 @@ const run = {re
if thr.dead
thrfree(re, thr)
- elif thr.matched && re.strp == re.str.len
- std.bsfree(states)
- -> thr
+ elif thr.matched
+ trace(re, thr, "new bestmatch\n")
+ if bestmatch != Zthr
+ thrfree(re, bestmatch)
+ ;;
+
+ if re.strp == re.str.len
+ bestmatch = thr
+ goto done
+ elif !wholestr
+ bestmatch = thr
+ ;;
elif !thr.matched
std.bsput(states, thr.ip)
if re.expired == Zthr
@@ -122,8 +154,9 @@ const run = {re
re.expiredtail = Zthr
re.strp++
;;
+:done
std.bsfree(states)
- -> Zthr
+ -> bestmatch
}
/*
diff --git a/lib/regex/test/bld.sub b/lib/regex/test/bld.sub
index 8307b99..4551085 100644
--- a/lib/regex/test/bld.sub
+++ b/lib/regex/test/bld.sub
@@ -47,3 +47,11 @@ test unicode =
lib @/lib/sys:sys
lib @/lib/regex:regex
;;
+
+test search =
+ search.myr
+ testmatch.myr
+ lib @/lib/std:std
+ lib @/lib/sys:sys
+ lib @/lib/regex:regex
+;;
diff --git a/lib/regex/test/search.myr b/lib/regex/test/search.myr
new file mode 100644
index 0000000..33790de
--- /dev/null
+++ b/lib/regex/test/search.myr
@@ -0,0 +1,27 @@
+use std
+
+use "testmatch"
+
+const main = {
+ var s : byte[:]
+
+ s = std.strjoin([
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+ ][:], "")
+ testsearch("bc", "Abcd", `std.Some [][:])
+ testsearch("Abc", "Abc", `std.Some [][:])
+ testsearch("(bc)", "Abc", `std.Some ["bc"][:])
+ testsearch("(bc.*)", "Abcde", `std.Some ["bcde"][:])
+ testsearch("(b.*c)", "ABbasdfcrap", `std.Some ["basdfc"][:])
+}
diff --git a/lib/regex/test/testmatch.myr b/lib/regex/test/testmatch.myr
index 4600410..5730fc1 100644
--- a/lib/regex/test/testmatch.myr
+++ b/lib/regex/test/testmatch.myr
@@ -3,22 +3,32 @@ use regex
pkg =
const testmatch : (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void)
+ const testsearch : (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void)
const dbgmatch : (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void)
;;
const testmatch = {pat, text, expected
- run(regex.compile(pat), pat, text, expected)
+ run(regex.compile(pat), pat, text, expected, false)
+}
+
+const testsearch = {pat, text, expected
+ run(regex.compile(pat), pat, text, expected, true)
}
const dbgmatch = {pat, text, expected
- run(regex.dbgcompile(pat), pat, text, expected)
+ run(regex.dbgcompile(pat), pat, text, expected, false)
}
-const run = {regex, pat, text, expected
- var i, re
+const run = {regex, pat, text, expected, search
+ var i, re, r
re = std.try(regex)
- match regex.exec(re, text)
+ if search
+ r = regex.search(re, text)
+ else
+ r = regex.exec(re, text)
+ ;;
+ match r
| `std.Some res:
match expected
| `std.None:
@@ -27,7 +37,7 @@ const run = {regex, pat, text, expected
std.put("\t{}: {}\n", i, res[i])
;;
| `std.Some exp:
- if !std.sleq(res[0], text)
+ if !search && !std.sleq(res[0], text)
std.put("whole match does not match text!\n")
std.fatal("failed matching {} over {}\n", pat, text)
;;