summaryrefslogtreecommitdiff
path: root/mi
diff options
context:
space:
mode:
authorMura Li <github@ctli.io>2019-10-23 15:14:24 +0800
committerMura Li <github@ctli.io>2019-10-29 10:02:52 +0800
commit16b894430a5b71f4956ee305229bbbffd13605d0 (patch)
tree64af8620c0ef314e9eb8fd83d8bcfeac0b8e6007 /mi
parente9b59bee4efb648e323dfb6269209b59c575806f (diff)
downloadmc-16b894430a5b71f4956ee305229bbbffd13605d0.tar.gz
Merge decision tree nodes when possible
(Tested on Linux/AMD64) Sample count: 506 Dtree Refcnt avg: 5.38 95th percentile: 3.00 maximum: 100 Dtree Size avg: 5.23 95th percentile: 3.00 maximum: 84 Dtree Height avg: 1.39 95th percentile: 1.00 maximum: 12 References: Mikael Pettersson. A term pattern-match compiler inspired by finite automata theory. (p.6 "Step 3: Optimizing the DFA")
Diffstat (limited to 'mi')
-rw-r--r--mi/match.c169
-rw-r--r--mi/match_test.c4
2 files changed, 145 insertions, 28 deletions
diff --git a/mi/match.c b/mi/match.c
index cc34390..94c6f8d 100644
--- a/mi/match.c
+++ b/mi/match.c
@@ -14,11 +14,12 @@
#include "parse.h"
#include "mi.h"
-Dtree *gendtree(Node *m, Node *val, Node **lbl, size_t nlbl, int startid);
+Dtree *gendtree(Node *m, Node *val, Node **lbl, size_t nlbl);
void dtreedump(FILE *fd, Dtree *dt);
-static int ndtree;
+static size_t ndtree;
+static Dtree **dtree;
/* Path is a integer sequence that labels a subtree of a subject tree.
* For example,
@@ -178,10 +179,82 @@ mkdtree(Srcloc loc, Node *lbl)
t = zalloc(sizeof(Dtree));
t->lbl = lbl;
t->loc = loc;
- t->id = ndtree++;
+ t->id = ndtree;
+ lappend(&dtree, &ndtree, t);
return t;
}
+static int
+loadeq(Node *a, Node *b)
+{
+ if (a == b)
+ return 1;
+
+ if (exprop(a) != exprop(b))
+ return 0;
+
+ switch (exprop(a)) {
+ case Outag:
+ case Oudata:
+ case Oderef:
+ return loadeq(a->expr.args[0], b->expr.args[0]);
+ case Omemb:
+ return loadeq(a->expr.args[0], b->expr.args[0]) && nameeq(a->expr.args[1], b->expr.args[1]);
+ case Otupget:
+ case Oidx:
+ return loadeq(a->expr.args[0], b->expr.args[0]) && liteq(a->expr.args[1]->expr.args[0], b->expr.args[1]->expr.args[0]);
+ case Ovar:
+ return a == b;
+ default:
+ die("unreachable");
+ }
+}
+
+static int
+pateq(Node *a, Node *b)
+{
+ if (exprop(a) != exprop(b))
+ return 0;
+
+ switch (exprop(a)) {
+ case Olit:
+ return liteq(a->expr.args[0], b->expr.args[0]);
+ case Ogap:
+ case Ovar:
+ return 1;
+ default:
+ die("unreachable");
+ }
+ return 0;
+}
+
+static int
+dtreusable(Dtree *dt, Node *load, Node **pat, size_t npat, Dtree **next, size_t nnext, Dtree *any)
+{
+ size_t i;
+
+ if (!loadeq(dt->load, load))
+ return 0;
+
+ if (dt->npat != npat)
+ return 0;
+
+ for (i = 0; i < npat; i++) {
+ if (dt->next[i]->id != next[i]->id)
+ return 0;
+ if (!pateq(dt->pat[i], pat[i]))
+ return 0;
+ }
+
+ if (dt->any == NULL && any == NULL)
+ return 1;
+
+ if (dt->any->id != any->id)
+ return 0;
+
+ return 1;
+}
+
void
dtreedumplit(FILE *fd, Dtree *dt, Node *n, size_t depth)
{
@@ -260,24 +333,6 @@ patheq(Path *a, Path *b)
return 1;
}
-static int
-pateq(Node *a, Node *b)
-{
- if (exprop(a) != exprop(b))
- return 0;
-
- switch (exprop(a)) {
- case Olit:
- return liteq(a->expr.args[0], b->expr.args[0]);
- case Ogap:
- case Ovar:
- return 1;
- default:
- die("unreachable");
- }
- return 0;
-}
-
char *
pathfmt(Path *p)
{
@@ -683,7 +738,24 @@ pi_found:
any = NULL;
/* construct the result dtree */
- out = mkdtree(slot->pat->loc, genlbl(slot->pat->loc));
+ out = NULL;
+
+ /* TODO
+ * use a hash table to avoid the quadratic complexity
+ * when we have a large N and the bottleneck becomes obvious.
+ */
+ for (i = 0; i < ndtree; i++) {
+ if (!dtree[i]->accept && dtreusable(dtree[i], slot->load, pat, npat, edge, nedge, any)) {
+ out = dtree[i];
+ out->refcnt++;
+ break;
+ }
+ }
+ if (out == NULL) {
+ out = mkdtree(slot->pat->loc, genlbl(slot->pat->loc));
+ out->refcnt++;
+ }
+
out->load = slot->load;
out->npat = npat,
out->pat = pat,
@@ -749,8 +821,49 @@ dtheight(Dtree *dt)
return m+1;
}
+static size_t
+refsum(Dtree *dt)
+{
+ size_t i;
+ size_t sum;
+
+ if (dt == NULL)
+ return 0;
+
+ dt->emitted = 1;
+
+ /* NOTE
+ * MATCH nodes are always pre-allocated and shared,
+ * so counted as 1 for the size measurement.
+ */
+ if (dt->accept)
+ return 1;
+
+ sum = 0;
+ for (i = 0; i < dt->nnext; i++)
+ if (!dt->next[i]->emitted)
+ sum += refsum(dt->next[i]);
+ if (dt->any && !dt->any->emitted)
+ sum += refsum(dt->any);
+
+ return dt->refcnt + sum;
+}
+
+static void
+clearemit(Dtree *dt)
+{
+ size_t i;
+
+ if (dt == NULL)
+ return;
+ for (i = 0; i < dt->nnext; i++)
+ clearemit(dt->next[i]);
+ clearemit(dt->any);
+ dt->emitted = 0;
+}
+
Dtree *
-gendtree(Node *m, Node *val, Node **lbl, size_t nlbl, int startid)
+gendtree(Node *m, Node *val, Node **lbl, size_t nlbl)
{
Dtree *root;
Node **pat;
@@ -762,7 +875,8 @@ gendtree(Node *m, Node *val, Node **lbl, size_t nlbl, int startid)
char *dbgloc, *dbgfn, *dbgln;
- ndtree = startid;
+ ndtree = 0;
+ dtree = NULL;
pat = m->matchstmt.matches;
npat = m->matchstmt.nmatches;
@@ -799,8 +913,11 @@ gendtree(Node *m, Node *val, Node **lbl, size_t nlbl, int startid)
if (getenv("MATCH_STATS")) {
csv = fopen("match.csv", "a");
assert(csv != NULL);
- fprintf(csv, "%s@L%d, %d, %ld\n", fname(m->loc), lnum(m->loc), ndtree, dtheight(root));
+ fprintf(csv, "%s@L%d, %ld, %zd, %zd\n", fname(m->loc), lnum(m->loc), refsum(root), ndtree, dtheight(root));
fclose(csv);
+
+ /* clear 'emitted' so it can be reused by genmatchcode. */
+ clearemit(root);
}
return root;
@@ -887,7 +1004,7 @@ genmatch(Node *m, Node *val, Node ***out, size_t *nout)
endlbl = genlbl(m->loc);
- dt = gendtree(m, val, lbl, nlbl, 0);
+ dt = gendtree(m, val, lbl, nlbl);
genmatchcode(dt, out, nout);
for (i = 0; i < npat; i++) {
diff --git a/mi/match_test.c b/mi/match_test.c
index dda8fdb..d190aba 100644
--- a/mi/match_test.c
+++ b/mi/match_test.c
@@ -26,7 +26,7 @@ File file;
char debugopt[128];
typedef struct Dtree Dtree;
-extern Dtree *gendtree(Node *m, Node *val, Node **lbl, size_t nlbl, int startid);
+extern Dtree *gendtree(Node *m, Node *val, Node **lbl, size_t nlbl);
extern void dtreedump(FILE *fd, Dtree *dt);
@@ -250,7 +250,7 @@ test_match(int idx, Node *val, Node **pat, Dtree *want)
lappend(&lbl, &nlbl, genlbl(pat[i]->match.block->loc));
}
- dt = gendtree(m, v, lbl, nlbl, 0);
+ dt = gendtree(m, v, lbl, nlbl);
if (getenv("d")) {
fprintf(stderr, "dtree >>\n");
dtreedump(stderr, dt);