diff options
author | Mura Li <github@ctli.io> | 2019-10-23 15:14:24 +0800 |
---|---|---|
committer | Mura Li <github@ctli.io> | 2019-10-29 10:02:52 +0800 |
commit | 16b894430a5b71f4956ee305229bbbffd13605d0 (patch) | |
tree | 64af8620c0ef314e9eb8fd83d8bcfeac0b8e6007 /mi | |
parent | e9b59bee4efb648e323dfb6269209b59c575806f (diff) | |
download | mc-16b894430a5b71f4956ee305229bbbffd13605d0.tar.gz |
Merge decision tree nodes when possible
(Tested on Linux/AMD64)
Sample count: 506
Dtree Refcnt avg: 5.38 95th percentile: 3.00 maximum: 100
Dtree Size avg: 5.23 95th percentile: 3.00 maximum: 84
Dtree Height avg: 1.39 95th percentile: 1.00 maximum: 12
References:
Mikael Pettersson. A term pattern-match compiler inspired by finite automata
theory. (p.6 "Step 3: Optimizing the DFA")
Diffstat (limited to 'mi')
-rw-r--r-- | mi/match.c | 169 | ||||
-rw-r--r-- | mi/match_test.c | 4 |
2 files changed, 145 insertions, 28 deletions
@@ -14,11 +14,12 @@ #include "parse.h" #include "mi.h" -Dtree *gendtree(Node *m, Node *val, Node **lbl, size_t nlbl, int startid); +Dtree *gendtree(Node *m, Node *val, Node **lbl, size_t nlbl); void dtreedump(FILE *fd, Dtree *dt); -static int ndtree; +static size_t ndtree; +static Dtree **dtree; /* Path is a integer sequence that labels a subtree of a subject tree. * For example, @@ -178,10 +179,82 @@ mkdtree(Srcloc loc, Node *lbl) t = zalloc(sizeof(Dtree)); t->lbl = lbl; t->loc = loc; - t->id = ndtree++; + t->id = ndtree; + lappend(&dtree, &ndtree, t); return t; } +static int +loadeq(Node *a, Node *b) +{ + if (a == b) + return 1; + + if (exprop(a) != exprop(b)) + return 0; + + switch (exprop(a)) { + case Outag: + case Oudata: + case Oderef: + return loadeq(a->expr.args[0], b->expr.args[0]); + case Omemb: + return loadeq(a->expr.args[0], b->expr.args[0]) && nameeq(a->expr.args[1], b->expr.args[1]); + case Otupget: + case Oidx: + return loadeq(a->expr.args[0], b->expr.args[0]) && liteq(a->expr.args[1]->expr.args[0], b->expr.args[1]->expr.args[0]); + case Ovar: + return a == b; + default: + die("unreachable"); + } +} + +static int +pateq(Node *a, Node *b) +{ + if (exprop(a) != exprop(b)) + return 0; + + switch (exprop(a)) { + case Olit: + return liteq(a->expr.args[0], b->expr.args[0]); + case Ogap: + case Ovar: + return 1; + default: + die("unreachable"); + } + return 0; +} + +static int +dtreusable(Dtree *dt, Node *load, Node **pat, size_t npat, Dtree **next, size_t nnext, Dtree *any) +{ + size_t i; + + if (!loadeq(dt->load, load)) + return 0; + + if (dt->npat != npat) + return 0; + + for (i = 0; i < npat; i++) { + if (dt->next[i]->id != next[i]->id) + return 0; + if (!pateq(dt->pat[i], pat[i])) + return 0; + } + + if (dt->any == NULL && any == NULL) + return 1; + + if (dt->any->id != any->id) + return 0; + + return 1; +} + void dtreedumplit(FILE *fd, Dtree *dt, Node *n, size_t depth) { @@ -260,24 +333,6 @@ patheq(Path *a, Path *b) return 1; } -static int -pateq(Node *a, Node *b) -{ - if (exprop(a) != exprop(b)) - return 0; - - switch (exprop(a)) { - case Olit: - return liteq(a->expr.args[0], b->expr.args[0]); - case Ogap: - case Ovar: - return 1; - default: - die("unreachable"); - } - return 0; -} - char * pathfmt(Path *p) { @@ -683,7 +738,24 @@ pi_found: any = NULL; /* construct the result dtree */ - out = mkdtree(slot->pat->loc, genlbl(slot->pat->loc)); + out = NULL; + + /* TODO + * use a hash table to avoid the quadratic complexity + * when we have a large N and the bottleneck becomes obvious. + */ + for (i = 0; i < ndtree; i++) { + if (!dtree[i]->accept && dtreusable(dtree[i], slot->load, pat, npat, edge, nedge, any)) { + out = dtree[i]; + out->refcnt++; + break; + } + } + if (out == NULL) { + out = mkdtree(slot->pat->loc, genlbl(slot->pat->loc)); + out->refcnt++; + } + out->load = slot->load; out->npat = npat, out->pat = pat, @@ -749,8 +821,49 @@ dtheight(Dtree *dt) return m+1; } +static size_t +refsum(Dtree *dt) +{ + size_t i; + size_t sum; + + if (dt == NULL) + return 0; + + dt->emitted = 1; + + /* NOTE + * MATCH nodes are always pre-allocated and shared, + * so counted as 1 for the size measurement. + */ + if (dt->accept) + return 1; + + sum = 0; + for (i = 0; i < dt->nnext; i++) + if (!dt->next[i]->emitted) + sum += refsum(dt->next[i]); + if (dt->any && !dt->any->emitted) + sum += refsum(dt->any); + + return dt->refcnt + sum; +} + +static void +clearemit(Dtree *dt) +{ + size_t i; + + if (dt == NULL) + return; + for (i = 0; i < dt->nnext; i++) + clearemit(dt->next[i]); + clearemit(dt->any); + dt->emitted = 0; +} + Dtree * -gendtree(Node *m, Node *val, Node **lbl, size_t nlbl, int startid) +gendtree(Node *m, Node *val, Node **lbl, size_t nlbl) { Dtree *root; Node **pat; @@ -762,7 +875,8 @@ gendtree(Node *m, Node *val, Node **lbl, size_t nlbl, int startid) char *dbgloc, *dbgfn, *dbgln; - ndtree = startid; + ndtree = 0; + dtree = NULL; pat = m->matchstmt.matches; npat = m->matchstmt.nmatches; @@ -799,8 +913,11 @@ gendtree(Node *m, Node *val, Node **lbl, size_t nlbl, int startid) if (getenv("MATCH_STATS")) { csv = fopen("match.csv", "a"); assert(csv != NULL); - fprintf(csv, "%s@L%d, %d, %ld\n", fname(m->loc), lnum(m->loc), ndtree, dtheight(root)); + fprintf(csv, "%s@L%d, %ld, %zd, %zd\n", fname(m->loc), lnum(m->loc), refsum(root), ndtree, dtheight(root)); fclose(csv); + + /* clear 'emitted' so it can be reused by genmatchcode. */ + clearemit(root); } return root; @@ -887,7 +1004,7 @@ genmatch(Node *m, Node *val, Node ***out, size_t *nout) endlbl = genlbl(m->loc); - dt = gendtree(m, val, lbl, nlbl, 0); + dt = gendtree(m, val, lbl, nlbl); genmatchcode(dt, out, nout); for (i = 0; i < npat; i++) { diff --git a/mi/match_test.c b/mi/match_test.c index dda8fdb..d190aba 100644 --- a/mi/match_test.c +++ b/mi/match_test.c @@ -26,7 +26,7 @@ File file; char debugopt[128]; typedef struct Dtree Dtree; -extern Dtree *gendtree(Node *m, Node *val, Node **lbl, size_t nlbl, int startid); +extern Dtree *gendtree(Node *m, Node *val, Node **lbl, size_t nlbl); extern void dtreedump(FILE *fd, Dtree *dt); @@ -250,7 +250,7 @@ test_match(int idx, Node *val, Node **pat, Dtree *want) lappend(&lbl, &nlbl, genlbl(pat[i]->match.block->loc)); } - dt = gendtree(m, v, lbl, nlbl, 0); + dt = gendtree(m, v, lbl, nlbl); if (getenv("d")) { fprintf(stderr, "dtree >>\n"); dtreedump(stderr, dt); |