summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOri Bernstein <ori@eigenstate.org>2015-04-29 21:23:39 -0700
committerOri Bernstein <ori@eigenstate.org>2015-04-29 21:23:39 -0700
commitff18725b46d903e6dcedd0c6262aea47fe6c8869 (patch)
treecfdee0a795df83e672a3709b285ff40ca4a9d7d0
parent404f711755c8dbe4104eedbae62cddfbba6bbfa8 (diff)
downloadmc-ff18725b46d903e6dcedd0c6262aea47fe6c8869.tar.gz
Add regex parsing to libregex.
I'd like to use the same regex parser for source indexing and parser generation.
-rw-r--r--libregex/compile.myr62
-rw-r--r--libregex/types.myr26
-rw-r--r--parse/gram.y5
-rw-r--r--parse/infer.c4
4 files changed, 56 insertions, 41 deletions
diff --git a/libregex/compile.myr b/libregex/compile.myr
index c2f6a47..898c28f 100644
--- a/libregex/compile.myr
+++ b/libregex/compile.myr
@@ -4,61 +4,50 @@ use "types.use"
use "ranges.use"
pkg regex =
+ const parse : (re : byte[:] -> std.result(ast#, status))
const compile : (re : byte[:] -> std.result(regex#, status))
const dbgcompile : (re : byte[:] -> std.result(regex#, status))
const free : (re : regex# -> void)
;;
-type tree = union
- /* basic string building */
- `Alt (tree#, tree#)
- `Cat (tree#, tree#)
-
- /* repetition */
- `Star tree#
- `Rstar tree#
- `Plus tree#
- `Rplus tree#
- `Quest tree#
-
- /* end matches */
- `Byte byte
- `Chr char
- `Ranges char[2][:]
-
- /* meta */
- `Cap (std.size, tree#) /* id, tree */
- `Bol /* beginning of line */
- `Eol /* end of line */
- `Bow /* beginning of word */
- `Eow /* end of word */
-;;
-
type parseresult = union
- `Some tree#
+ `Some ast#
`None
`Fail status
;;
/* Compiles a pattern into a regex */
const compile = {pat
- -> regexcompile(std.zalloc(), pat)
+ -> regexcompile(std.mk([.pat = pat, .nmatch = 1]))
+}
+
+const parse = {pat
+ var re
+
+ re = std.mk([.pat = pat, .nmatch = 1])
+ match regexparse(re)
+ | `None: -> `std.Fail `Incomplete
+ | `Fail f: -> `std.Fail f
+ | `Some t:
+ if re.pat.len > 0
+ -> `std.Fail `Incomplete
+ else
+ -> `std.Ok t
+ ;;
+ ;;
}
/* Compiles a pattern into a debug regex. This can be verbose. */
const dbgcompile = {pat
var re
- re = std.zalloc()
- re.debug = true
- -> regexcompile(re, pat)
+ re = std.mk([.pat = pat, .nmatch = 1, .debug = true])
+ -> regexcompile(re)
}
/* compiles a pattern into an allocated regex */
-const regexcompile = {re, pat
- re.pat = pat
- re.nmatch = 1 /* whole match */
- match parse(re)
+const regexcompile = {re
+ match regexparse(re)
| `None: -> `std.Fail (`Incomplete)
| `Fail f: -> `std.Fail f
| `Some t:
@@ -67,6 +56,7 @@ const regexcompile = {re, pat
an incorrectly encoded char
*/
if re.pat.len > 0
+ astfree(t)
-> `std.Fail (`Incomplete)
;;
dump(re, t, 0)
@@ -473,7 +463,7 @@ const dump = {re, t, indent
}
/* parses an expression */
-const parse = {re
+const regexparse = {re
match altexpr(re)
| `Some t:
if re.pat.len == 0
@@ -490,7 +480,7 @@ const parse = {re
}
const altexpr = {re
- var ret : tree#
+ var ret
match catexpr(re)
| `Some t:
diff --git a/libregex/types.myr b/libregex/types.myr
index 24e9842..a0f8ceb 100644
--- a/libregex/types.myr
+++ b/libregex/types.myr
@@ -11,6 +11,32 @@ pkg regex =
`Badescape
;;
+ type ast = union
+ /* basic string building */
+ `Alt (ast#, ast#)
+ `Cat (ast#, ast#)
+
+ /* repetition */
+ `Star ast#
+ `Rstar ast#
+ `Plus ast#
+ `Rplus ast#
+ `Quest ast#
+
+ /* end matches */
+ `Byte byte
+ `Chr char
+ `Ranges char[2][:]
+
+ /* meta */
+ `Cap (std.size, ast#) /* id, ast */
+ `Bol /* beginning of line */
+ `Eol /* end of line */
+ `Bow /* beginning of word */
+ `Eow /* end of word */
+ ;;
+
+
type regex = struct
/* compile state */
debug : bool
diff --git a/parse/gram.y b/parse/gram.y
index 74f24c6..8190e9c 100644
--- a/parse/gram.y
+++ b/parse/gram.y
@@ -467,10 +467,7 @@ typaramlist
compoundtype
: functype {$$ = $1;}
| type Tosqbrac Tcolon Tcsqbrac {$$ = mktyslice($2->loc, $1);}
- | type Tosqbrac expr Tcsqbrac {
- $3->expr.type = mktype($3->loc, Tyuint32);
- $$ = mktyarray($2->loc, $1, $3);
- }
+ | type Tosqbrac expr Tcsqbrac {$$ = mktyarray($2->loc, $1, $3);}
| type Tosqbrac Tellipsis Tcsqbrac {$$ = mktyarray($2->loc, $1, NULL);}
| type Tderef {$$ = mktyptr($2->loc, $1);}
| Tat Tident {$$ = mktyparam($1->loc, $2->id);}
diff --git a/parse/infer.c b/parse/infer.c
index b60d79d..576ac3f 100644
--- a/parse/infer.c
+++ b/parse/infer.c
@@ -1712,8 +1712,10 @@ static Type *tyfix(Inferstate *st, Node *ctx, Type *orig, int noerr)
st->intype--;
} else if (t->type == Tyunion) {
for (i = 0; i < t->nmemb; i++) {
- if (t->udecls[i]->etype)
+ if (t->udecls[i]->etype) {
+ tyresolve(st, t->udecls[i]->etype);
t->udecls[i]->etype = tyfix(st, ctx, t->udecls[i]->etype, noerr);
+ }
}
} else if (t->type == Tyname) {
for (i = 0; i < t->narg; i++)