summaryrefslogtreecommitdiff
path: root/support
diff options
context:
space:
mode:
authorMura Li <github@ctli.io>2019-09-23 17:54:24 +0800
committerMura Li <github@ctli.io>2019-10-23 11:58:52 +0800
commit1f645d217c2faf9cd08b5f4910ca10479821b434 (patch)
treeae958d7382718d880f6a8fdb95c6002451c9fe88 /support
parentf2d12752c483648d554700e6ac7dc100801dfa59 (diff)
downloadmc-1f645d217c2faf9cd08b5f4910ca10479821b434.tar.gz
Add a new match compiler implementation
References: "When Do Match-Compilation Heuristics Matter?" by Kevin Scott and Norman Ramsey Stats: Sample count: 506 Dtree Size avg: 5.38 95th percentile: 3.00 maximum: 100 Dtree Height avg: 1.39 95th percentile: 1.00 maximum: 12 Sample generation: $ MATCH_STATS=1 make bootstrap && mbld -R support/matchstats.myr ./match.csv
Diffstat (limited to 'support')
-rwxr-xr-xsupport/matchstats.myr105
1 files changed, 105 insertions, 0 deletions
diff --git a/support/matchstats.myr b/support/matchstats.myr
new file mode 100755
index 0000000..ae20ae9
--- /dev/null
+++ b/support/matchstats.myr
@@ -0,0 +1,105 @@
+use std
+use bio
+use math
+
+const atoi = {s
+ match std.intparse(s)
+ | `std.Some v: -> v
+ | `std.None: std.fatal("error")
+ ;;
+}
+
+const avg = {xs
+ var sum
+
+ sum = 0
+ for x : xs
+ sum += x
+ ;;
+ -> (sum : flt64) / (xs.len : flt64)
+}
+
+const intcmp = {a, b
+ if a < b
+ -> `std.After
+ elif a > b
+ -> `std.Before
+ else
+ -> `std.Equal
+ ;;
+}
+
+const percentile = {percent, xs
+ var sorted
+ var idx, i
+ var ret
+
+ sorted = std.sort(xs, intcmp)
+ idx = ((percent : flt64) / 100.0) * (sorted.len : flt64)
+ i = (math.floor(idx) : int)
+ if idx == math.floor(idx)
+ ret = (xs[i-1] : flt64)
+ elif idx > 1.0
+ ret = ((xs[i-1] + xs[i]) / 2 : flt64)
+ else
+ std.fatal("percentile out-of-bunds\n")
+ ;;
+ -> ret
+}
+
+const maximum = {xs
+ var m
+
+ m = xs[0]
+ for v : xs
+ if v > m
+ m = v
+ ;;
+ ;;
+ -> m
+}
+
+const main = {args : byte[:][:]
+ var f, locs, sizes, heights, count
+
+ if args.len < 2
+ std.put("need input file\n")
+ std.exit(1)
+ ;;
+
+ match bio.open(args[1], bio.Rd)
+ | `std.Ok fd: f = fd
+ | `std.Err e: std.fatal("error opening {}: {}\n", args[0], e)
+ ;;
+
+ locs = [][:]
+ sizes = [][:]
+ heights = [][:]
+ count = 0
+
+ while true
+ match bio.readto(f, ",")
+ | `std.Ok loc: std.slpush(&locs, std.strstrip(loc))
+ | `std.Err `bio.Eof: break
+ | `std.Err e: std.fatal("error read loc: {}\n", e)
+ ;;
+
+ match bio.readto(f, ",")
+ | `std.Ok size: std.slpush(&sizes, atoi(std.strstrip(size)))
+ | `std.Err e: std.fatal("error read size: {}\n", e)
+ ;;
+
+ match bio.readto(f, "\n")
+ | `std.Ok height: std.slpush(&heights, atoi(std.strstrip(height)))
+ | `std.Err e: std.fatal("error read height: {}\n", e)
+ ;;
+ count ++
+ ;;
+
+ std.put("Sample count: {}\n", count)
+ std.put("Dtree Size\tavg: {s=3}\t95th percentile: {s=3}\t maximum: {}\n", avg(sizes), percentile(95, sizes), maximum(sizes))
+ std.put("Dtree Height\tavg: {s=3}\t95th percentile: {s=3}\t maximum: {}\n", avg(heights), percentile(95, heights), maximum(heights))
+
+ bio.close(f)
+}
+