summaryrefslogtreecommitdiff
path: root/lib/regex/compile.myr
diff options
context:
space:
mode:
Diffstat (limited to 'lib/regex/compile.myr')
-rw-r--r--lib/regex/compile.myr80
1 files changed, 43 insertions, 37 deletions
diff --git a/lib/regex/compile.myr b/lib/regex/compile.myr
index 7c4c56b..023d7ae 100644
--- a/lib/regex/compile.myr
+++ b/lib/regex/compile.myr
@@ -85,6 +85,10 @@ const free = {re
if re.debug
std.htfree(re.astloc)
std.slfree(re.pcidx)
+ for bs in re.traces
+ std.bsfree(bs)
+ ;;
+ std.slfree(re.traces)
;;
std.free(re)
}
@@ -93,13 +97,13 @@ const free = {re
/* generates bytecode from an AST */
const gen = {re, t
match t#
- |`Alt (a, b): genalt(re, a, b)
+ |`Alt (a, b): genalt(re, a, b, t)
|`Cat (a, b): gen(re, a); gen(re, b)
/* repetition */
- |`Star a: genstar(re, a, false)
- |`Rstar a: genstar(re, a, true)
- |`Plus a: gen(re, a); genstar(re, a, false)
- |`Rplus a: gen(re, a); genstar(re, a, true)
+ |`Star a: genstar(re, a, false, t)
+ |`Rstar a: genstar(re, a, true, t)
+ |`Plus a: gen(re, a); genstar(re, a, false, t)
+ |`Rplus a: gen(re, a); genstar(re, a, true, t)
|`Quest a: genquest(re, a)
/* end matches */
@@ -290,7 +294,7 @@ const jmpdist = {n
}
/* generates an alternation */
-const genalt = {re, l, r
+const genalt = {re, l, r, t
var alt
var jmp
var l0
@@ -298,10 +302,10 @@ const genalt = {re, l, r
var l2
alt = re.proglen
- l0 = append(re, `Ifork (-1, -1), l) /* needs to be replaced */
+ l0 = append(re, `Ifork (-1, -1), t) /* needs to be replaced */
gen(re, l)
jmp = re.proglen
- l1 = append(re, `Ijmp -1, r) /* needs to be replaced */
+ l1 = append(re, `Ijmp -1, t) /* needs to be replaced */
l2 = gen(re, r)
re.prog[alt] = `Ifork(l0, l1)
@@ -310,7 +314,7 @@ const genalt = {re, l, r
}
/* generates a repetition operator */
-const genstar = {re, rep, reluct
+const genstar = {re, rep, reluct, t
var alt
var jmp
var l0
@@ -319,9 +323,9 @@ const genstar = {re, rep, reluct
l0 = re.proglen
alt = re.proglen
- l1 = append(re, `Ifork (-1, -1), rep) /* needs to be replaced */
+ l1 = append(re, `Ifork (-1, -1), t) /* needs to be replaced */
jmp = gen(re, rep)
- l2 = append(re, `Ijmp -1, rep)
+ l2 = append(re, `Ijmp -1, t)
/* reluctant matches should prefer jumping to the end. */
@@ -419,6 +423,7 @@ const dump = {re, t, indent
for var i = 0; i < indent; i++
std.put(" ")
;;
+ std.put("{}/", std.htgetv(re.astloc, t, -1))
match t#
| `Alt (a, b):
std.put("Alt\n")
@@ -454,7 +459,7 @@ const dump = {re, t, indent
std.put("Eow\n")
/* end matches */
| `Chr c:
- std.put("Char {}\n", c)
+ std.put("Chr {}\n", c)
| `Ranges rl:
std.put("Ranges")
for r in rl
@@ -495,8 +500,8 @@ const altexpr = {re
match catexpr(re)
| `Some t:
ret = t
+ idx = re.idx
if matchc(re, '|')
- idx = re.idx
match altexpr(re)
| `Some rhs:
ret = mk(re, `Alt (ret, rhs), idx)
@@ -517,9 +522,9 @@ const catexpr = {re
var ret
var idx
+ idx = re.idx
match repexpr(re)
| `Some t:
- idx = re.idx
ret = t
match catexpr(re)
| `Some rhs:
@@ -569,6 +574,7 @@ const baseexpr = {re
if re.pat.len == re.idx
-> `None
;;
+ idx = re.idx
match peekc(re)
/* lower prec operators */
| '|': -> `None
@@ -581,10 +587,9 @@ const baseexpr = {re
| '$': getc(re); ret = mk(re, `Eol, re.idx)
| '.':
getc(re);
- ret = mk(re, `Ranges std.sldup([[0, std.Maxcharval]][:]), re.idx)
+ ret = mk(re, `Ranges std.sldup([[0, std.Maxcharval]][:]), idx)
| '(':
m = re.nmatch++
- idx = re.idx
getc(re)
match altexpr(re)
| `Some s:
@@ -604,44 +609,45 @@ const baseexpr = {re
-> escaped(re)
| c:
getc(re)
- ret = mk(re, `Chr c, re.idx)
+ ret = mk(re, `Chr c, idx)
;;
-> `Some ret
}
const escaped = {re
- var ret
+ var ret, idx
+ idx = re.idx
match getc(re)
/* character classes */
- | 'd': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciidigit[:]), re.idx)
- | 'x': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciixdigit[:]), re.idx)
- | 's': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciispace[:]), re.idx)
- | 'w': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciiword[:]), re.idx)
- | 'h': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciiblank[:]), re.idx)
+ | 'd': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciidigit[:]), idx)
+ | 'x': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciixdigit[:]), idx)
+ | 's': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciispace[:]), idx)
+ | 'w': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciiword[:]), idx)
+ | 'h': ret = `Some mk(re, `Ranges std.sldup(_ranges.tabasciiblank[:]), idx)
/* negated character classes */
- | 'W': ret = `Some mk(re, `Ranges negate(_ranges.tabasciiword[:]), re.idx)
- | 'S': ret = `Some mk(re, `Ranges negate(_ranges.tabasciispace[:]), re.idx)
- | 'D': ret = `Some mk(re, `Ranges negate(_ranges.tabasciidigit[:]), re.idx)
- | 'X': ret = `Some mk(re, `Ranges negate(_ranges.tabasciixdigit[:]), re.idx)
- | 'H': ret = `Some mk(re, `Ranges negate(_ranges.tabasciiblank[:]), re.idx)
+ | 'W': ret = `Some mk(re, `Ranges negate(_ranges.tabasciiword[:]), idx)
+ | 'S': ret = `Some mk(re, `Ranges negate(_ranges.tabasciispace[:]), idx)
+ | 'D': ret = `Some mk(re, `Ranges negate(_ranges.tabasciidigit[:]), idx)
+ | 'X': ret = `Some mk(re, `Ranges negate(_ranges.tabasciixdigit[:]), idx)
+ | 'H': ret = `Some mk(re, `Ranges negate(_ranges.tabasciiblank[:]), idx)
/* unicode character classes */
| 'p': ret = unicodeclass(re, false)
| 'P': ret = unicodeclass(re, true)
/* operators that need an escape */
- | '<': ret = `Some mk(re, `Bow, re.idx)
- | '>': ret = `Some mk(re, `Eow, re.idx)
+ | '<': ret = `Some mk(re, `Bow, idx)
+ | '>': ret = `Some mk(re, `Eow, idx)
/* escaped metachars */
- | '^': ret = `Some mk(re, `Chr '^', re.idx)
- | '$': ret = `Some mk(re, `Chr '$', re.idx)
- | '.': ret = `Some mk(re, `Chr '.', re.idx)
- | '+': ret = `Some mk(re, `Chr '+', re.idx)
- | '?': ret = `Some mk(re, `Chr '?', re.idx)
- | '*': ret = `Some mk(re, `Chr '*', re.idx)
+ | '^': ret = `Some mk(re, `Chr '^', idx)
+ | '$': ret = `Some mk(re, `Chr '$', idx)
+ | '.': ret = `Some mk(re, `Chr '.', idx)
+ | '+': ret = `Some mk(re, `Chr '+', idx)
+ | '?': ret = `Some mk(re, `Chr '?', idx)
+ | '*': ret = `Some mk(re, `Chr '*', idx)
| chr: ret = `Fail `Badescape chr
;;
-> ret
@@ -709,8 +715,8 @@ const chrclass = {re
var t
/* we know we saw '[' on entry */
- matchc(re, '[')
idx = re.idx
+ matchc(re, '[')
neg = false
if matchc(re, '^')
neg = true