diff options
author | Ori Bernstein <ori@eigenstate.org> | 2015-08-26 12:20:58 -0700 |
---|---|---|
committer | Ori Bernstein <ori@eigenstate.org> | 2015-08-26 12:20:58 -0700 |
commit | 2bc852bda98762d3bc01548bf972e3f1b137fbfb (patch) | |
tree | 74831deed3c9057c5fe0cbb8790d220e855bc792 /lib/regex | |
parent | 3de952510eb2a23350d24ed926f19c0cf72a12f2 (diff) | |
download | mc-2bc852bda98762d3bc01548bf972e3f1b137fbfb.tar.gz |
Move Myrddin libs to lib/ subdirectory.
Diffstat (limited to 'lib/regex')
-rw-r--r-- | lib/regex/bld.sub | 21 | ||||
-rw-r--r-- | lib/regex/compile.myr | 848 | ||||
-rwxr-xr-x | lib/regex/configure | 52 | ||||
-rw-r--r-- | lib/regex/doc/Makefile | 5 | ||||
-rw-r--r-- | lib/regex/doc/myr-regex.3 | 198 | ||||
-rw-r--r-- | lib/regex/interp.myr | 311 | ||||
-rw-r--r-- | lib/regex/ranges.myr | 2386 | ||||
-rw-r--r-- | lib/regex/redump.myr | 87 | ||||
-rw-r--r-- | lib/regex/test/basic.myr | 39 | ||||
-rw-r--r-- | lib/regex/test/bld.sub | 7 | ||||
-rw-r--r-- | lib/regex/test/boundaries.myr | 36 | ||||
-rw-r--r-- | lib/regex/test/capture.myr | 17 | ||||
-rw-r--r-- | lib/regex/test/class.myr | 67 | ||||
-rw-r--r-- | lib/regex/test/failmatch.myr | 7 | ||||
-rw-r--r-- | lib/regex/test/negclass.myr | 72 | ||||
-rwxr-xr-x | lib/regex/test/runtest.sh | 124 | ||||
-rw-r--r-- | lib/regex/test/testmatch.myr | 58 | ||||
-rw-r--r-- | lib/regex/test/tests | 29 | ||||
-rw-r--r-- | lib/regex/test/unicode.myr | 19 | ||||
-rw-r--r-- | lib/regex/types.myr | 88 |
20 files changed, 4471 insertions, 0 deletions
diff --git a/lib/regex/bld.sub b/lib/regex/bld.sub new file mode 100644 index 0000000..0078d09 --- /dev/null +++ b/lib/regex/bld.sub @@ -0,0 +1,21 @@ +lib regex = + compile.myr + interp.myr + ranges.myr + types.myr + + lib ../std:std +;; + +bin redump {noinst} = + redump.myr + lib ../std:std + lib ../bio:bio + lib regex +;; + +gen ranges.myr {durable} = + mkchartab -a -p_ranges UnicodeData.txt -o ranges.myr +;; + +sub = test ;; diff --git a/lib/regex/compile.myr b/lib/regex/compile.myr new file mode 100644 index 0000000..28d7ce8 --- /dev/null +++ b/lib/regex/compile.myr @@ -0,0 +1,848 @@ +use std + +use "types.use" +use "ranges.use" + +pkg regex = + const parse : (re : byte[:] -> std.result(ast#, status)) + const compile : (re : byte[:] -> std.result(regex#, status)) + const dbgcompile : (re : byte[:] -> std.result(regex#, status)) + const free : (re : regex# -> void) + const failmsg : (st : status -> byte[:]) +;; + +type parseresult = union + `Some ast# + `None + `Fail status +;; + +/* Compiles a pattern into a regex */ +const compile = {pat + -> regexcompile(std.mk([.pat = pat, .nmatch = 1]), 0) +} + +const parse = {pat + var re + + re = std.mk([.pat = pat, .nmatch = 1]) + match regexparse(re) + | `None: -> `std.Fail `Incomplete + | `Fail f: -> `std.Fail f + | `Some t: + if re.pat.len > 0 + -> `std.Fail `Incomplete + else + -> `std.Ok t + ;; + ;; +} + +/* Compiles a pattern into a debug regex. This can be verbose. */ +const dbgcompile = {pat + var re + + re = std.mk([.pat = pat, .nmatch = 1, .debug = true]) + -> regexcompile(re, 0) +} + +/* compiles a pattern into an allocated regex */ +const regexcompile = {re, id + match regexparse(re) + | `None: -> `std.Fail (`Incomplete) + | `Fail f: -> `std.Fail f + | `Some t: + /* + we can stop early if we get + an incorrectly encoded char + */ + if re.pat.len > 0 + astfree(t) + -> `std.Fail (`Incomplete) + ;; + dump(re, t, 0) + append(re, `Ilbra 0) + gen(re, t) + append(re, `Irbra 0) + append(re, `Imatch id) + idump(re) + astfree(t) + -> `std.Ok re + ;; + -> `std.Fail (`Noimpl) +} + +const free = {re + /* all the threads should be dead, + so we shouldn't have to free any*/ + std.slfree(re.prog) + std.free(re) +} + + +/* generates bytecode from an AST */ +const gen = {re, t + match t# + |`Alt (a, b): genalt(re, a, b) + |`Cat (a, b): gen(re, a); gen(re, b) + /* repetition */ + |`Star a: genstar(re, a, false) + |`Rstar a: genstar(re, a, true) + |`Plus a: gen(re, a); genstar(re, a, false) + |`Rplus a: gen(re, a); genstar(re, a, true) + |`Quest a: genquest(re, a) + + /* end matches */ + |`Chr c: genchar(re, c) + |`Ranges sl: genranges(re, sl) + + /* meta */ + |`Bol: append(re, `Ibol) + |`Eol: append(re, `Ibol) + |`Bow: append(re, `Ibow) + |`Eow: append(re, `Ieow) + |`Cap (m, a): + append(re, `Ilbra m) + gen(re, a) + append(re, `Irbra m) + ;; + -> re.proglen +} + +const genranges = {re, sl + var lbuf : byte[4], hbuf : byte[4], boundbuf : byte[4] + var lsz, hsz, bsz, i + var rt : rangetrie# + + /* generate a trie of ranges */ + rt = std.zalloc() + for r in sl + /* + encode: + lo => bounds[loidx] - 1 + bounds[loidx] => bounds[loidx + 1] - 1 + ... + bounds[hiidx - 1] => hi + */ + lsz = std.encode(lbuf[:], r[0]) + hsz = std.encode(hbuf[:], r[1]) + for i = lsz; i < hsz; i++ + bsz = bound(boundbuf[:], i, 0xff) + rtinsert(rt, lbuf[:lsz], boundbuf[:bsz]) + lsz = bound(lbuf[:], i + 1, 0x00) + ;; + rtinsert(rt, lbuf[:lsz], hbuf[:hsz]) + ;; + if re.debug + rtdump(rt, 0) + ;; + rangegen(re, rt, rt.ranges, rt.link, rangeprogsize(rt) + re.proglen) + rtfree(rt) + -> re.proglen +} + +const bound = {buf, len, fill + var i, s + + if len == 1 + buf[0] = 0x7f + else + s = len castto(byte) + buf[0] = (0xff << (8 - s)) | (fill >> (s + 1)) + for i = 1; i < len; i++ + buf[i] = 0x80 | (fill >> 2) + ;; + ;; + -> len +} + +type rangetrie = struct + ranges : (byte, byte)[:] + link : rangetrie#[:] + end : bool +;; + +const rtdump = {rt, ind + var i + var l, h + + indent(ind) + std.put("Range (end = {}) {{\n", rt.end) + for i = 0; i < rt.ranges.len; i++ + indent(ind + 1) + (l, h) = rt.ranges[i] + std.put("0x{x}-0x{x}: \n", l, h) + rtdump(rt.link[i], ind + 1) + ;; + indent(ind) + std.put("}\n") +} + +const indent = {ind + var i + for i = 0; i < ind; i++ + std.put("\t") + ;; +} + +const rtinsert = {rt, lo, hi + var a, b + var n + + std.assert(lo.len == hi.len, "range sizes differ") + if lo.len == 0 + rt.end = true + -> + ;; + + n = rt.ranges.len + if n == 0 + rt.ranges = std.slpush(rt.ranges, (lo[0], hi[0])) + rt.link = std.slpush(rt.link, std.zalloc()) + else + /* + this is a safe way to compare because we know that ranges + should always be coming in ordered. This means that equal + values will be added one after the other. + */ + (a, b) = rt.ranges[n - 1] + if a != lo[0] || b != hi[0] + rt.ranges = std.slpush(rt.ranges, (lo[0], hi[0])) + rt.link = std.slpush(rt.link, std.zalloc()) + ;; + ;; + + rtinsert(rt.link[rt.link.len - 1], lo[1:], hi[1:]) +} + +const rtfree = {rt + for l in rt.link + rtfree(l) + ;; + std.slfree(rt.link) + std.slfree(rt.ranges) + std.free(rt) +} + +const rangegen = {re, rt, ranges, links, end + var alt, l0, l1, l2 + var a, b + var n + + n = ranges.len + if n == 0 + -> re.proglen + elif n == 1 + (a, b) = ranges[0] + append(re, `Irange (a, b)) + if links[0].end + if links[0].ranges.len > 0 + append(re, `Ifork (re.prog.len + 1, end)) + else + append(re, `Ijmp end) + ;; + ;; + rangegen(re, links[0], links[0].ranges, links[0].link, end) + else + alt = re.proglen + l0 = append(re, `Ifork (-1, -1)) + l1 = rangegen(re, rt, ranges[0:n/2], links[0:n/2], end) + l2 = rangegen(re, rt, ranges[n/2:n], links[n/2:n], end) + re.prog[alt] = `Ifork (l0, l1) + ;; + -> re.proglen +} + +const rangeprogsize = {rt + var sz + + if rt.ranges.len == 0 + sz = 0 + else + sz = 2*rt.ranges.len - 1 + for l in rt.link + sz += rangeprogsize(l) + ;; + ;; + if rt.end + sz += 1 + ;; + -> sz +} + +/* calculates the forward jump distance for a utf8 character range */ +const jmpdist = {n + var d + var i + + d = n - 1 + for i = n - 1; i > 0; i-- + d += i + ;; + -> d +} + +/* generates an alternation */ +const genalt = {re, l, r + var alt + var jmp + var l0 + var l1 + var l2 + + alt = re.proglen + l0 = append(re, `Ifork (-1, -1)) /* needs to be replaced */ + gen(re, l) + jmp = re.proglen + l1 = append(re, `Ijmp -1) /* needs to be replaced */ + l2 = gen(re, r) + + re.prog[alt] = `Ifork(l0, l1) + re.prog[jmp] = `Ijmp l2 + -> re.proglen +} + +/* generates a repetition operator */ +const genstar = {re, rep, reluct + var alt + var jmp + var l0 + var l1 + var l2 + + l0 = re.proglen + alt = re.proglen + l1 = append(re, `Ifork (-1, -1)) /* needs to be replaced */ + jmp = gen(re, rep) + l2 = append(re, `Ijmp -1) + + + /* reluctant matches should prefer jumping to the end. */ + if reluct + re.prog[alt] = `Ifork (l2, l1) + else + re.prog[alt] = `Ifork (l1, l2) + ;; + re.prog[jmp] = `Ijmp l0 + -> re.proglen +} + +/* generates a question mark operator */ +const genquest = {re, q + var alt + var l0 + var l1 + + alt = re.proglen + l0 = append(re, `Ifork (-1, -1)) /* needs to be replaced */ + l1 = gen(re, q) + re.prog[alt] = `Ifork (l0, l1) + -> re.proglen +} + +/* generates a single char match */ +const genchar = {re, c + var b : byte[4] + var n + var i + + n = std.encode(b[:], c) + std.assert(n > 0 && n < 4, "non-utf character in regex\n") + for i = 0; i < n; i++ + append(re, `Ibyte b[i]) + ;; + -> re.proglen +} + +/* appends an instructon to an re program */ +const append = {re, insn + if re.proglen == re.prog.len + re.prog = std.slgrow(re.prog, std.max(1, 2*re.proglen)) + ;; + re.prog[re.proglen] = insn + re.proglen++ + -> re.proglen +} + +/* instruction dump */ +const idump = {re + var i + + if !re.debug + -> + ;; + for i = 0; i < re.proglen; i++ + std.put("{}:\t", i) + match re.prog[i] + /* Char matching. Consume exactly one byte from the string. */ + | `Ibyte b: std.put("`Ibyte {} ({})\n", b, b castto(char)) + | `Irange (start, end): + std.put("`Irange ({},{})", start, end) + if std.isalnum(start castto(char)) && std.isalnum(end castto(char)) + std.put("\t/* {}-{} */", start castto(char), end castto(char)) + ;; + std.put("\n") + /* capture groups */ + | `Ilbra m: std.put("`Ilbra {}\n", m) + | `Irbra m: std.put("`Irbra {}\n", m) + /* anchors */ + | `Ibol: std.put("`Ibol\n") + | `Ieol: std.put("`Ieol\n") + | `Ibow: std.put("`Ibow\n") + | `Ieow: std.put("`Ieow\n") + /* control flow */ + | `Ifork (lip, rip): std.put("`Ifork ({},{})\n", lip, rip) + | `Ijmp ip: std.put("`Ijmp {}\n", ip) + | `Imatch id: std.put("`Imatch {}\n", id) + ;; + ;; +} + +/* AST dump */ +const dump = {re, t, indent + var i + + if !re.debug + -> + ;; + for i = 0; i < indent; i++ + std.put(" ") + ;; + match t# + | `Alt (a, b): + std.put("Alt\n") + dump(re, a, indent + 1) + dump(re, b, indent + 1) + | `Cat (a, b): + std.put("Cat\n") + dump(re, a, indent + 1) + dump(re, b, indent + 1) + /* repetition */ + | `Star a: + std.put("Star\n") + dump(re, a, indent + 1) + | `Rstar a: + std.put("Rstar\n") + dump(re, a, indent + 1) + | `Plus a: + std.put("Plus\n") + dump(re, a, indent + 1) + | `Rplus a: + std.put("Rplus\n") + dump(re, a, indent + 1) + | `Quest a: + std.put("Quest\n") + dump(re, a, indent + 1) + | `Bol: + std.put("Bol\n") + | `Eol: + std.put("Eol\n") + | `Bow: + std.put("Bow\n") + | `Eow: + std.put("Eow\n") + /* end matches */ + | `Chr c: + std.put("Char {}\n", c) + | `Ranges rl: + std.put("Ranges") + for r in rl + for i = 0; i < indent + 1; i++ + std.put(" ") + ;; + std.put("\t({}-{})\n", r[0], r[1]) + ;; + + /* meta */ + | `Cap (m, a): + std.put("Cap {}\n", m) + dump(re, a, indent + 1) + ;; +} + +/* parses an expression */ +const regexparse = {re + match altexpr(re) + | `Some t: + if re.pat.len == 0 + -> `Some t + else + astfree(t) + -> `Fail `Incomplete + ;; + | `None: + -> `None + | `Fail st: + -> `Fail st + ;; +} + +const altexpr = {re + var ret + + match catexpr(re) + | `Some t: + ret = t + if matchc(re, '|') + match altexpr(re) + | `Some rhs: + ret = mk(`Alt (ret, rhs)) + | `None: + astfree(ret) + -> `Fail (`Incomplete) + | `Fail f: + -> `Fail f + ;; + ;; + | other: + -> other + ;; + -> `Some ret +} + +const catexpr = {re + var ret + + match repexpr(re) + | `Some t: + ret = t + match catexpr(re) + | `Some rhs: + ret = mk(`Cat (t, rhs)) + | `Fail f: -> `Fail f + | `None: /* nothing */ + ;; + | other: + -> other + ;; + -> `Some ret +} + +const repexpr = {re + var ret + + match baseexpr(re) + | `Some t: + if matchc(re, '*') + if matchc(re, '?') + ret = mk(`Rstar t) + else + ret = mk(`Star t) + ;; + elif matchc(re, '+') + if matchc(re, '?') + ret = mk(`Rplus t) + else + ret = mk(`Plus t) + ;; + elif matchc(re, '?') + ret = mk(`Quest t) + else + ret = t + ;; + | other: + -> other + ;; + -> `Some ret +} + +const baseexpr = {re + var ret, m + + if re.pat.len == 0 + -> `None + ;; + match peekc(re) + /* lower prec operators */ + | '|': -> `None + | ')': -> `None + | '*': -> `Fail `Badrep + | '+': -> `Fail `Badrep + | '?': -> `Fail `Badrep + | '[': -> chrclass(re) + | '.': getc(re); ret = mk(`Ranges std.slpush([][:], [0, std.Maxcharval])) + | '^': getc(re); ret = mk(`Bol) + | '$': getc(re); ret = mk(`Eol) + | '(': + m = re.nmatch++ + getc(re) + match altexpr(re) + | `Some s: + if matchc(re, ')') + -> `Some mk(`Cap (m, s)) + else + -> `Fail `Unbalanced + ;; + | `None: -> `Fail `Emptyparen + | `Fail st: -> `Fail st + ;; + | '\\': + getc(re) /* consume the slash */ + if re.pat.len == 0 + -> `Fail `Incomplete + ;; + -> escaped(re) + | c: + getc(re) + ret = mk(`Chr c) + ;; + -> `Some ret +} + +const escaped = {re + var ret + + match getc(re) + /* character classes */ + | 'd': ret = `Some mk(`Ranges std.sldup(_ranges.tabasciidigit[:])) + | 'x': ret = `Some mk(`Ranges std.sldup(_ranges.tabasciixdigit[:])) + | 's': ret = `Some mk(`Ranges std.sldup(_ranges.tabasciispace[:])) + | 'w': ret = `Some mk(`Ranges std.sldup(_ranges.tabasciiword[:])) + | 'h': ret = `Some mk(`Ranges std.sldup(_ranges.tabasciiblank[:])) + + /* negated character classes */ + | 'W': ret = `Some mk(`Ranges negate(_ranges.tabasciiword[:])) + | 'S': ret = `Some mk(`Ranges negate(_ranges.tabasciispace[:])) + | 'D': ret = `Some mk(`Ranges negate(_ranges.tabasciidigit[:])) + | 'X': ret = `Some mk(`Ranges negate(_ranges.tabasciixdigit[:])) + | 'H': ret = `Some mk(`Ranges negate(_ranges.tabasciiblank[:])) + + /* unicode character classes */ + | 'p': ret = unicodeclass(re, false) + | 'P': ret = unicodeclass(re, true) + + /* operators that need an escape */ + | '<': ret = `Some mk(`Bow) + | '>': ret = `Some mk(`Eow) + + /* escaped metachars */ + | '^': ret = `Some mk(`Chr '^') + | '$': ret = `Some mk(`Chr '$') + | '.': ret = `Some mk(`Chr '.') + | '+': ret = `Some mk(`Chr '+') + | '?': ret = `Some mk(`Chr '?') + | chr: ret = `Fail `Badescape + ;; + -> ret +} + +const unicodeclass = {re, neg + var c, s + var tab + var t + var n + + if re.pat.len == 0 + -> `Fail (`Incomplete) + ;; + n = 0 + s = re.pat + /* either a single char pattern, or {pat} */ + match getc(re) + | '{': + s = s[1:] + while re.pat.len > 0 + c = getc(re) + if c == '}' + break + ;; + n += std.charlen(c) + ;; + | r: + n += std.charlen(r) + ;; + s = s[:n] + /* letters */ + if std.sleq(s, "L") || std.sleq(s, "Letter") + tab = _ranges.tabalpha[:] + elif std.sleq(s, "Lu") || std.sleq(s, "Uppercase_Letter") + tab = _ranges.tabupper[:] + elif std.sleq(s, "Ll") || std.sleq(s, "Lowercase_Letter") + tab = _ranges.tablower[:] + elif std.sleq(s, "Lt") || std.sleq(s, "Titlecase_Letter") + tab = _ranges.tablower[:] + /* numbers (incomplete) */ + elif std.sleq(s, "N") || std.sleq(s, "Number") + tab = _ranges.tabdigit[:] + elif std.sleq(s, "Z") || std.sleq(s, "Separator") + tab = _ranges.tabspace[:] + elif std.sleq(s, "Zs") || std.sleq(s, "Space_Separator") + tab = _ranges.tabblank[:] + else + -> `Fail (`Badrange) + ;; + if !neg + t = mk(`Ranges std.sldup(tab)) + else + t = mk(`Ranges negate(tab)) + ;; + -> `Some t +} + +const chrclass = {re + var rl, m, n + var neg + var t + + /* we know we saw '[' on entry */ + matchc(re, '[') + neg = false + if matchc(re, '^') + neg = true + ;; + rl = rangematch(re, [][:]) + while peekc(re) != ']' && re.pat.len > 0 + rl = rangematch(re, rl) + ;; + if !matchc(re, ']') + std.slfree(rl) + -> `Fail `Unbalanced + ;; + + std.sort(rl, {a, b; + if a[0] < b[0] + -> `std.Before + elif a[0] == b[0] + -> `std.Equal + else + -> `std.After + ;;}) + m = merge(rl) + std.slfree(rl) + if neg + n = negate(m) + std.slfree(m) + t = mk(`Ranges n) + else + t = mk(`Ranges m) + ;; + -> `Some t +} + +const rangematch = {re, sl + var lo + var hi + + lo = getc(re) + if matchc(re, '-') + hi = getc(re) + if lo <= hi + -> std.slpush(sl, [lo, hi]) + else + -> std.slpush(sl, [hi, lo]) + ;; + else + -> std.slpush(sl, [lo, lo]) + ;; +} + +const negate = {rng + var start, end, next + var neg + + neg = [][:] + start = 0 + next = 0 /* if we have no ranges */ + for r in rng + (end, next) = (r[0], r[1]) + neg = std.slpush(neg, [start, end - 1]) + start = next + 1 + ;; + neg = std.slpush(neg, [next + 1, std.Maxcharval]) + -> neg +} + +/* rl is a sorted list of ranges */ +const merge = {rl + var lo, hi + var ret + + if rl.len == 0 + -> [][:] + ;; + ret = [][:] + lo = rl[0][0] + hi = rl[0][1] + for r in rl[1:] + /* if it overlaps or abuts, merge */ + if r[0] <= hi + 1 + hi = r[1] + else + ret = std.slpush(ret, [lo, hi]) + lo = r[0] + hi = r[1] + ;; + ;; + -> std.slpush(ret, [lo, hi]) +} + + +const matchc = {re, c + var str + var chr + + (chr, str) = std.striter(re.pat) + if chr != c + -> false + ;; + re.pat = str + -> true +} + +const getc = {re + var c + + (c, re.pat) = std.striter(re.pat) + -> c +} + +const peekc = {re + var c + + (c, _) = std.striter(re.pat) + -> c +} + +const mk = {v + var t + + t = std.alloc() + t# = v + -> t +} + +const astfree = {t + match t# + | `Alt (a, b): astfree(a); astfree(b) + | `Cat (a, b): astfree(a); astfree(b) + /* repetition */ + | `Star a: astfree(a) + | `Rstar a: astfree(a) + | `Plus a: astfree(a) + | `Rplus a: astfree(a) + | `Quest a: astfree(a) + + /* end matches */ + | `Chr c: + | `Ranges rl: std.slfree(rl) + + /* meta */ + | `Cap (m, a): astfree(a) + | _: /* other types have no suballocations */ + ;; + std.free(t) +} + +const failmsg = {st + match st + | `Noimpl: -> "no implementation" + | `Incomplete: -> "regex ended before input fully parsed" + | `Unbalanced: -> "unbalanced bracket" + | `Emptyparen: -> "empty parentheses" + | `Badrep: -> "invalid repetition" + | `Badrange: -> "invalid range" + | `Badescape: -> "invalid escape code" + + ;; +} + diff --git a/lib/regex/configure b/lib/regex/configure new file mode 100755 index 0000000..37fe623 --- /dev/null +++ b/lib/regex/configure @@ -0,0 +1,52 @@ +#!/bin/sh + +prefix="/usr/local" + +for i in `seq 300`; do + echo "Lots of output to emulate automake... ok" + echo "Testing for things you'll never use... fail" + echo "Satisfying the fortran77 lobby... ok" + echo "Burning CPU time checking for the bloody obvious... ok" +done +echo "Automake emulated successfully" + +INST_ROOT='/usr/local' + +for arg in $*; do + shift 1 + case $arg in + "--prefix" | "-p") + prefix=shift $* + ;; + --prefix=*) + prefix=`echo $arg | sed 's/^--prefix=//g'` + ;; + "--help" | "-h") + echo "Usage:" + echo " --prefix | -p: The prefix to install to" + break; + ;; + *) echo "Unrecognized argument $arg";; + esac +done + +OS=`uname` + +echo export INST_ROOT=$prefix > config.mk +case $OS in + *Linux*) + echo 'export SYS=linux' >> config.mk + ;; + *Darwin*) + echo 'export SYS=osx' >> config.mk + ;; + *) + echo 'Unknown architecture.' + ;; +esac + +cat << EOF + Building with: + prefix=$prefix +EOF + diff --git a/lib/regex/doc/Makefile b/lib/regex/doc/Makefile new file mode 100644 index 0000000..c1749d3 --- /dev/null +++ b/lib/regex/doc/Makefile @@ -0,0 +1,5 @@ +MAN=myr-regex.3 \ + +include ../config.mk +include ../mk/myr.mk + diff --git a/lib/regex/doc/myr-regex.3 b/lib/regex/doc/myr-regex.3 new file mode 100644 index 0000000..c0d0da0 --- /dev/null +++ b/lib/regex/doc/myr-regex.3 @@ -0,0 +1,198 @@ +.TH MYR REGEX 1 +.SH NAME +regex myr-regex +.SH LIBRARY +regex +.SH SYNOPSIS +.B use regex +.I const compile : (re : byte[:] -> std.error(regex#, status)) +.I const dbgcompile : (re : byte[:] -> std.error(regex#, status)) +.I const free : (re : regex# -> void) +.br +.I const exec : (re : regex#, str : byte[:] -> bool) +.I const search : (re : regex#, str : byte[:] -> bool) +.SH DESCRIPTION +.PP +The regex library provides functions for compiling and evaluating regular +expressions, as described later in this document, or in myr-regex(7). +.PP +.I regex.compile will take a string describing a regex, and will attempt +to compile it, returing +.I `std.Success regex# +if the regex is valid, and there were no error conditions encountered during +compilation. If the compilation failed, +.I `std.Failure regex.status +will be returned, where regex.status is a failure code. + +.PP +.I regex.dbgcompile +is identical to +.I regex.compile, +however, it will print debugging information as it compiles, and each +time the regex is evaluated. + +.PP +.I regex.exec +will take the regex passed to it, and evaluate it over the text provided, +returning the +.I `std.Some matches, +or +.I `std.None +if there were no matches found. The matches must span the whole string. + +.PP +.I regex.search +is similar to regex.exec, but it will attempt to find a match somewhere +within the string, instead of attempting to find a match spanning the whole +string. + +.SH REGEX SYNTAX +.PP +The grammar used by libregex is below: + +.EX + regex : altexpr + altexpr : catexpr ('|' altexpr)+ + catexpr : repexpr (catexpr)+ + repexpr : baseexpr[*+?] + baseexpr : literal + | charclass + | charrange + | escaped + | '.' + | '^' + | '$' + | '(' regex ')' + charclass : see below + charrange : '[' (literal('-' literal)?)+']' +.EE + +The following metacharacters have the meanings listed below: +.TP +. +Matches a single unicode character +.TP +^ +Matches the beginning of a line. Does not consume any characters. +.TP +$ +Matches the end of a line. Does not consume any characters. +.TP +* +Matches any number of repetitions of the preceding regex fragment. +.TP +*? +Reluctantly matches any number of repetitions of the preceding regex fragment. +.TP ++ +Matches one or more repetitions of the preceding regex fragment. +.TP ++? +Reluctantly matches one or more repetitions of the preceding regex fragment. +.TP +? +Matches zero or one of the preceding regex fragment. + +.PP +In order to match a literal metacharacter, it needs to be preceded by +a '\\' character. + +The following character classes are supported: +.TP +\\d +ASCII digits +.TP +\\D +Negation of ASCII digits +.TP +\\x +ASCII Hex digits +.TP +\\X +Negation of ASCII Hex digits +.TP +\\s +ASCII spaces +.TP +\\S +Negation of ASCII spaces +.TP +\\w +ASCII word characters +.TP +\\W +Negation of ASCII word characters +.TP +\\h +ASCII whitespace characters +.TP +\\H +Negation of ASCII whitespace characters +.TP +\\pX, \\p{X} +Characters with unicode property 'X' +.TP +\\PX, \\P{X} +Negation of characters with unicode property 'X' + +.PP +Unicode properties that are supported are listed below: + +.TP +L, Letter +Unicode letter property +.TP +Lu, Uppercase_Letter +Uppercase letter unicode property +.TP +Ll, Lowercase_Letter +Lowercase letter unicode property +.TP +Lt, Titlecase_Letter +Titlecase letter unicode property +.TP +N, Number +Number unicode property +.TP +Z, Separator +Any separator character unicode property +.TP +Zs, Space_Separator +Space separator unicode property + + +.SH EXAMPLE +.EX + use std + use regex + + const main = { + match regex.compile(pat) + var i + | `std.Success re: + match regex.exec(re, text) + | `std.Some matches: + for i = 0; i < matches.len; i++ + std.put("Match %i: %s\n", i, match[i]) + ;; + | `std.None: std.put("Text did not match\n") + ;; + | `std.Failure err: + std.put("failed to compile regex") + ;; + } +.EE + +.SH FILES +The source code for this compiler is available from +.B git://git.eigenstate.org/git/ori/libregex.git + +.SH SEE ALSO +.IR mc(1) + +.SH BUGS +.PP +This code is insufficiently tested. + +.PP +This code does not support all of the regex features that one would expect. diff --git a/lib/regex/interp.myr b/lib/regex/interp.myr new file mode 100644 index 0000000..fc179c0 --- /dev/null +++ b/lib/regex/interp.myr @@ -0,0 +1,311 @@ +use std + +use "types.use" + +pkg regex = + const exec : (re : regex#, str : byte[:] -> std.option(byte[:][:])) + /* + FIXME: implement. This should scan for a possible start char in the + regex and use that to optimize. + const search : (re : regex#, str : byte[:] -> std.option(byte[:][:])) + */ +;; + +/* Ugly: for performance. std.option() should be used instead when unions get faster. */ +const Zthr = 0 castto(rethread#) + +const exec = {re, str + var thr + var m + + re.str = str + re.strp = 0 + thr = run(re) + if thr != Zthr + m = getmatches(re, thr) + cleanup(re) + -> `std.Some m + else + cleanup(re) + -> `std.None + ;; +} + +const cleanup = {re + var thr, next + + for thr = re.runq; thr != Zthr; thr = next + next = thr.next + thrfree(re, thr) + ;; + for thr = re.expired; thr != Zthr; thr = next + next = thr.next + thrfree(re, thr) + ;; +} + +const getmatches = {re, thr + var ret + var i + + ret = std.slalloc(re.nmatch) + for i = 0; i < re.nmatch; i++ + if thr.mstart[i] != -1 && thr.mend[i] != -1 + ret[i] = re.str[thr.mstart[i]:thr.mend[i]] + else + ret[i] = [][:] + ;; + ;; + -> ret +} + + +/* returns a matching thread, or Zthr if no threads matched */ +const run = {re + var i, ip + var consumed + var thr + var states + + states = std.mkbs() + re.runq = mkthread(re, 0) + re.runq.mstart = std.slalloc(re.nmatch) + re.runq.mend = std.slalloc(re.nmatch) + for i = 0; i < re.nmatch; i++ + re.runq.mstart[i] = -1 + re.runq.mend[i] = -1 + ;; + while re.nthr > 0 + while re.runq != Zthr + /* set up the next thread */ + thr = re.runq + re.runq = thr.next + + trace(re, thr, "\nrunning tid={}, ip={}, s[{}]={}\n", thr.tid, thr.ip, re.strp, std.decode(re.str[re.strp:])) + ip = thr.ip + consumed = step(re, thr, -1) + while !consumed + consumed = step(re, thr, ip) + ;; + + if std.bshas(states, thr.ip) + die(re, thr, "there can be only one") + ;; + + if thr.dead + thrfree(re, thr) + elif thr.matched && re.strp == re.str.len + -> thr + elif !thr.matched + std.bsput(states, thr.ip) + if re.expired == Zthr + re.expired = thr + ;; + if re.expiredtail != Zthr + re.expiredtail.next = thr + ;; + re.expiredtail = thr + thr.next = Zthr + + ;; + ;; + std.bsclear(states) + trace(re, thr, "switch\n") + re.runq = re.expired + re.expired = Zthr + re.expiredtail = Zthr + re.strp++ + ;; + -> Zthr +} + +/* + Steps forward one instruction. Returns true if a byte of input was + consumed, false otherwise. +*/ +const step = {re, thr, curip + var str + var mstart + var mend + + str = re.str + match re.prog[thr.ip] + /* Char matching. Consume exactly one byte from the string. */ + | `Ibyte b: + trace(re, thr, "\t{}:\tByte {} ({})\n", thr.ip, b, b castto(char)) + if !within(re, str) + die(re, thr, "end of string") + elif b != str[re.strp] + die(re, thr, "not right char") + else + thr.ip++ + trace(re, thr, "\t\tmatched {} with {}\n", b, str[re.strp]) + ;; + | `Irange (start, end): + trace(re, thr, "\t{}:\tRange ({}, {}) /* {} - {} */\n", thr.ip, start, end, start castto(char), end castto(char)) + if !within(re, str) || start > str[re.strp] || end < str[re.strp] + die(re, thr, "bad range") + else + thr.ip++ + ;; + /* + Non-consuming. All of these return false, and expect step to be + called again until exactly one byte is consumed from the string. + */ + | `Ibol: + trace(re, thr, "\t{}:\tBol\n", thr.ip) + if re.strp == 0 || str[re.strp - 1] == '\n' castto(byte) + thr.ip++ + -> false + else + die(re, thr, "not beginning of line") + ;; + | `Ieol: + trace(re, thr, "\t{}:\tEol\n", thr.ip) + if re.strp == str.len || str[re.strp] == '\n' castto(byte) + thr.ip++ + -> false + else + die(re, thr, "not end of line") + ;; + /* check for word characters */ + | `Ibow: + trace(re, thr, "\t{}:\tBow\n", thr.ip) + if iswordchar(str[re.strp:]) && (re.strp == 0 || !iswordchar(prevchar(str, re.strp))) + thr.ip++ + -> false + else + die(re, thr, "not beginning of word") + ;; + | `Ieow: + trace(re, thr, "\t{}:\tEow\n", thr.ip) + if re.strp == str.len && iswordchar(prevchar(str, re.strp)) + thr.ip++ + -> false + elif re.strp > 0 && !iswordchar(str[re.strp:]) && iswordchar(prevchar(str, re.strp)) + thr.ip++ + -> false + else + die(re, thr, "not end of word") + ;; + | `Ilbra m: + trace(re, thr, "\t{}:\tLbra {}\n", thr.ip, m) + trace(re, thr, "\t\tmatch start = {}\n", re.strp) + thr.mstart[m] = re.strp + thr.ip++ + -> false + | `Irbra m: + trace(re, thr, "\t{}:\tRbra {}\n", thr.ip, m) + thr.mend[m] = re.strp + thr.ip++ + -> false + | `Ifork (lip, rip): + trace(re, thr, "\t{}:\tFork ({}, {})\n", thr.ip, lip, rip) + mstart = std.sldup(thr.mstart) + mend = std.sldup(thr.mend) + fork(re, thr, rip, curip, mstart, mend) + thr.ip = lip + -> false + | `Ijmp ip: + trace(re, thr, "\t{}:\tJmp {}\n", thr.ip, ip) + thr.ip = ip + -> false + | `Imatch id: + trace(re, thr, "\t{}:\tMatch\n", thr.ip) + finish(re, thr) + -> true + ;; + -> true +} + +const fork = {re, thr, ip, curip, mstart, mend + var thr + + if ip == curip /* loop detection */ + -> + ;; + thr = mkthread(re, ip) + thr.next = re.runq + thr.mstart = mstart + thr.mend = mend + re.runq = thr +} + +const die = {re, thr, msg + /* + we can have die called on a thread + multiple times, eg, if it has a bad + range *and* end in a state that another + thread is in. We should only decrement + the number of threads for that once. + */ + trace(re, thr, "\t\tdie {}: {}\n", thr.tid, msg) + if !thr.dead + re.nthr-- + ;; + thr.dead = true +} + +const finish = {re, thr + trace(re, thr, "finish {}\n", thr.tid) + thr.matched = true + re.nthr-- +} + +var nexttid = 0 +const mkthread = {re, ip + var thr : rethread# + + thr = std.alloc() + + thr.next = Zthr + + thr.ip = ip + thr.tid = nexttid++ + thr.dead = false + thr.matched = false + + thr.mstart = [][:] + thr.mend = [][:] + + re.nthr++ + + -> thr +} + +const thrfree = {re, thr + trace(re, thr, "\t\tcleanup {}\n", thr.tid) + std.slfree(thr.mstart) + std.slfree(thr.mend) + std.free(thr) +} + +const within = {re, str + -> re.strp < str.len +} + +const trace : (re : regex#, thr : rethread#, msg : byte[:], args : ... -> void) = {re, thr, msg, args + var ap + + if re.debug + ap = std.vastart(&args) + std.putv(msg, &ap) + ;; +} + +/* must be called with i >= 1 */ +const prevchar = {s, i + std.assert(i != 0, "prevchar must be called with i >= 1\n") + i-- + while i != 0 && s[i] >= 0x80 + i-- + ;; + -> s[i:] +} + +const iswordchar = {s + var c + + c = std.decode(s) + -> std.isalpha(c) || std.isdigit(c) || c == '_' +} diff --git a/lib/regex/ranges.myr b/lib/regex/ranges.myr new file mode 100644 index 0000000..3efae43 --- /dev/null +++ b/lib/regex/ranges.myr @@ -0,0 +1,2386 @@ +/* + This set of unicode tables was automatically generated + by the following command: + mkchartab -a -p_ranges UnicodeData.txt -o ranges.myr + editing it manually is probably a waste of time. +*/ + +pkg _ranges = + const tabasciialpha + const tabasciiupper + const tabasciilower + const tabasciiword + const tabasciidigit + const tabasciixdigit + const tabasciispace + const tabasciiblank + const tabalpha + const tabupper + const tablower + const tabtitle + const tabword + const tabdigit + const tabxdigit + const tabspace + const tabblank +;; +const tabasciialpha = [ + ['\u{41}','\u{5a}'], + ['\u{61}','\u{7a}'], +] + +const tabasciiupper = [ + ['\u{41}','\u{5a}'], +] + +const tabasciilower = [ + ['\u{61}','\u{7a}'], +] + +const tabasciiword = [ + ['\u{30}','\u{39}'], + ['\u{41}','\u{5a}'], + ['\u{5f}','\u{5f}'], + ['\u{61}','\u{7a}'], +] + +const tabasciidigit = [ + ['\u{30}','\u{39}'], +] + +const tabasciixdigit = [ + ['\u{30}','\u{39}'], + ['\u{41}','\u{46}'], + ['\u{61}','\u{66}'], +] + +const tabasciispace = [ + ['\u{9}','\u{d}'], + ['\u{20}','\u{20}'], +] + +const tabasciiblank = [ + ['\u{9}','\u{9}'], + ['\u{20}','\u{20}'], +] + +const tabalpha = [ + ['\u{41}','\u{5a}'], + ['\u{61}','\u{7a}'], + ['\u{aa}','\u{aa}'], + ['\u{b5}','\u{b5}'], + ['\u{ba}','\u{ba}'], + ['\u{c0}','\u{d6}'], + ['\u{d8}','\u{f6}'], + ['\u{f8}','\u{2c1}'], + ['\u{2c6}','\u{2d1}'], + ['\u{2e0}','\u{2e4}'], + ['\u{2ec}','\u{2ec}'], + ['\u{2ee}','\u{2ee}'], + ['\u{370}','\u{374}'], + ['\u{376}','\u{377}'], + ['\u{37a}','\u{37d}'], + ['\u{386}','\u{386}'], + ['\u{388}','\u{38a}'], + ['\u{38c}','\u{38c}'], + ['\u{38e}','\u{3a1}'], + ['\u{3a3}','\u{3f5}'], + ['\u{3f7}','\u{481}'], + ['\u{48a}','\u{527}'], + ['\u{531}','\u{556}'], + ['\u{559}','\u{559}'], + ['\u{561}','\u{587}'], + ['\u{5d0}','\u{5ea}'], + ['\u{5f0}','\u{5f2}'], + ['\u{620}','\u{64a}'], + ['\u{66e}','\u{66f}'], + ['\u{671}','\u{6d3}'], + ['\u{6d5}','\u{6d5}'], + ['\u{6e5}','\u{6e6}'], + ['\u{6ee}','\u{6ef}'], + ['\u{6fa}','\u{6fc}'], + ['\u{6ff}','\u{6ff}'], + ['\u{710}','\u{710}'], + ['\u{712}','\u{72f}'], + ['\u{74d}','\u{7a5}'], + ['\u{7b1}','\u{7b1}'], + ['\u{7ca}','\u{7ea}'], + ['\u{7f4}','\u{7f5}'], + ['\u{7fa}','\u{7fa}'], + ['\u{800}','\u{815}'], + ['\u{81a}','\u{81a}'], + ['\u{824}','\u{824}'], + ['\u{828}','\u{828}'], + ['\u{840}','\u{858}'], + ['\u{8a0}','\u{8a0}'], + ['\u{8a2}','\u{8ac}'], + ['\u{904}','\u{939}'], + ['\u{93d}','\u{93d}'], + ['\u{950}','\u{950}'], + ['\u{958}','\u{961}'], + ['\u{971}','\u{977}'], + ['\u{979}','\u{97f}'], + ['\u{985}','\u{98c}'], + ['\u{98f}','\u{990}'], + ['\u{993}','\u{9a8}'], + ['\u{9aa}','\u{9b0}'], + ['\u{9b2}','\u{9b2}'], + ['\u{9b6}','\u{9b9}'], + ['\u{9bd}','\u{9bd}'], + ['\u{9ce}','\u{9ce}'], + ['\u{9dc}','\u{9dd}'], + ['\u{9df}','\u{9e1}'], + ['\u{9f0}','\u{9f1}'], + ['\u{a05}','\u{a0a}'], + ['\u{a0f}','\u{a10}'], + ['\u{a13}','\u{a28}'], + ['\u{a2a}','\u{a30}'], + ['\u{a32}','\u{a33}'], + ['\u{a35}','\u{a36}'], + ['\u{a38}','\u{a39}'], + ['\u{a59}','\u{a5c}'], + ['\u{a5e}','\u{a5e}'], + ['\u{a72}','\u{a74}'], + ['\u{a85}','\u{a8d}'], + ['\u{a8f}','\u{a91}'], + ['\u{a93}','\u{aa8}'], + ['\u{aaa}','\u{ab0}'], + ['\u{ab2}','\u{ab3}'], + ['\u{ab5}','\u{ab9}'], + ['\u{abd}','\u{abd}'], + ['\u{ad0}','\u{ad0}'], + ['\u{ae0}','\u{ae1}'], + ['\u{b05}','\u{b0c}'], + ['\u{b0f}','\u{b10}'], + ['\u{b13}','\u{b28}'], + ['\u{b2a}','\u{b30}'], + ['\u{b32}','\u{b33}'], + ['\u{b35}','\u{b39}'], + ['\u{b3d}','\u{b3d}'], + ['\u{b5c}','\u{b5d}'], + ['\u{b5f}','\u{b61}'], + ['\u{b71}','\u{b71}'], + ['\u{b83}','\u{b83}'], + ['\u{b85}','\u{b8a}'], + ['\u{b8e}','\u{b90}'], + ['\u{b92}','\u{b95}'], + ['\u{b99}','\u{b9a}'], + ['\u{b9c}','\u{b9c}'], + ['\u{b9e}','\u{b9f}'], + ['\u{ba3}','\u{ba4}'], + ['\u{ba8}','\u{baa}'], + ['\u{bae}','\u{bb9}'], + ['\u{bd0}','\u{bd0}'], + ['\u{c05}','\u{c0c}'], + ['\u{c0e}','\u{c10}'], + ['\u{c12}','\u{c28}'], + ['\u{c2a}','\u{c33}'], + ['\u{c35}','\u{c39}'], + ['\u{c3d}','\u{c3d}'], + ['\u{c58}','\u{c59}'], + ['\u{c60}','\u{c61}'], + ['\u{c85}','\u{c8c}'], + ['\u{c8e}','\u{c90}'], + ['\u{c92}','\u{ca8}'], + ['\u{caa}','\u{cb3}'], + ['\u{cb5}','\u{cb9}'], + ['\u{cbd}','\u{cbd}'], + ['\u{cde}','\u{cde}'], + ['\u{ce0}','\u{ce1}'], + ['\u{cf1}','\u{cf2}'], + ['\u{d05}','\u{d0c}'], + ['\u{d0e}','\u{d10}'], + ['\u{d12}','\u{d3a}'], + ['\u{d3d}','\u{d3d}'], + ['\u{d4e}','\u{d4e}'], + ['\u{d60}','\u{d61}'], + ['\u{d7a}','\u{d7f}'], + ['\u{d85}','\u{d96}'], + ['\u{d9a}','\u{db1}'], + ['\u{db3}','\u{dbb}'], + ['\u{dbd}','\u{dbd}'], + ['\u{dc0}','\u{dc6}'], + ['\u{e01}','\u{e30}'], + ['\u{e32}','\u{e33}'], + ['\u{e40}','\u{e46}'], + ['\u{e81}','\u{e82}'], + ['\u{e84}','\u{e84}'], + ['\u{e87}','\u{e88}'], + ['\u{e8a}','\u{e8a}'], + ['\u{e8d}','\u{e8d}'], + ['\u{e94}','\u{e97}'], + ['\u{e99}','\u{e9f}'], + ['\u{ea1}','\u{ea3}'], + ['\u{ea5}','\u{ea5}'], + ['\u{ea7}','\u{ea7}'], + ['\u{eaa}','\u{eab}'], + ['\u{ead}','\u{eb0}'], + ['\u{eb2}','\u{eb3}'], + ['\u{ebd}','\u{ebd}'], + ['\u{ec0}','\u{ec4}'], + ['\u{ec6}','\u{ec6}'], + ['\u{edc}','\u{edf}'], + ['\u{f00}','\u{f00}'], + ['\u{f40}','\u{f47}'], + ['\u{f49}','\u{f6c}'], + ['\u{f88}','\u{f8c}'], + ['\u{1000}','\u{102a}'], + ['\u{103f}','\u{103f}'], + ['\u{1050}','\u{1055}'], + ['\u{105a}','\u{105d}'], + ['\u{1061}','\u{1061}'], + ['\u{1065}','\u{1066}'], + ['\u{106e}','\u{1070}'], + ['\u{1075}','\u{1081}'], + ['\u{108e}','\u{108e}'], + ['\u{10a0}','\u{10c5}'], + ['\u{10c7}','\u{10c7}'], + ['\u{10cd}','\u{10cd}'], + ['\u{10d0}','\u{10fa}'], + ['\u{10fc}','\u{1248}'], + ['\u{124a}','\u{124d}'], + ['\u{1250}','\u{1256}'], + ['\u{1258}','\u{1258}'], + ['\u{125a}','\u{125d}'], + ['\u{1260}','\u{1288}'], + ['\u{128a}','\u{128d}'], + ['\u{1290}','\u{12b0}'], + ['\u{12b2}','\u{12b5}'], + ['\u{12b8}','\u{12be}'], + ['\u{12c0}','\u{12c0}'], + ['\u{12c2}','\u{12c5}'], + ['\u{12c8}','\u{12d6}'], + ['\u{12d8}','\u{1310}'], + ['\u{1312}','\u{1315}'], + ['\u{1318}','\u{135a}'], + ['\u{1380}','\u{138f}'], + ['\u{13a0}','\u{13f4}'], + ['\u{1401}','\u{166c}'], + ['\u{166f}','\u{167f}'], + ['\u{1681}','\u{169a}'], + ['\u{16a0}','\u{16ea}'], + ['\u{1700}','\u{170c}'], + ['\u{170e}','\u{1711}'], + ['\u{1720}','\u{1731}'], + ['\u{1740}','\u{1751}'], + ['\u{1760}','\u{176c}'], + ['\u{176e}','\u{1770}'], + ['\u{1780}','\u{17b3}'], + ['\u{17d7}','\u{17d7}'], + ['\u{17dc}','\u{17dc}'], + ['\u{1820}','\u{1877}'], + ['\u{1880}','\u{18a8}'], + ['\u{18aa}','\u{18aa}'], + ['\u{18b0}','\u{18f5}'], + ['\u{1900}','\u{191c}'], + ['\u{1950}','\u{196d}'], + ['\u{1970}','\u{1974}'], + ['\u{1980}','\u{19ab}'], + ['\u{19c1}','\u{19c7}'], + ['\u{1a00}','\u{1a16}'], + ['\u{1a20}','\u{1a54}'], + ['\u{1aa7}','\u{1aa7}'], + ['\u{1b05}','\u{1b33}'], + ['\u{1b45}','\u{1b4b}'], + ['\u{1b83}','\u{1ba0}'], + ['\u{1bae}','\u{1baf}'], + ['\u{1bba}','\u{1be5}'], + ['\u{1c00}','\u{1c23}'], + ['\u{1c4d}','\u{1c4f}'], + ['\u{1c5a}','\u{1c7d}'], + ['\u{1ce9}','\u{1cec}'], + ['\u{1cee}','\u{1cf1}'], + ['\u{1cf5}','\u{1cf6}'], + ['\u{1d00}','\u{1dbf}'], + ['\u{1e00}','\u{1f15}'], + ['\u{1f18}','\u{1f1d}'], + ['\u{1f20}','\u{1f45}'], + ['\u{1f48}','\u{1f4d}'], + ['\u{1f50}','\u{1f57}'], + ['\u{1f59}','\u{1f59}'], + ['\u{1f5b}','\u{1f5b}'], + ['\u{1f5d}','\u{1f5d}'], + ['\u{1f5f}','\u{1f7d}'], + ['\u{1f80}','\u{1fb4}'], + ['\u{1fb6}','\u{1fbc}'], + ['\u{1fbe}','\u{1fbe}'], + ['\u{1fc2}','\u{1fc4}'], + ['\u{1fc6}','\u{1fcc}'], + ['\u{1fd0}','\u{1fd3}'], + ['\u{1fd6}','\u{1fdb}'], + ['\u{1fe0}','\u{1fec}'], + ['\u{1ff2}','\u{1ff4}'], + ['\u{1ff6}','\u{1ffc}'], + ['\u{2071}','\u{2071}'], + ['\u{207f}','\u{207f}'], + ['\u{2090}','\u{209c}'], + ['\u{2102}','\u{2102}'], + ['\u{2107}','\u{2107}'], + ['\u{210a}','\u{2113}'], + ['\u{2115}','\u{2115}'], + ['\u{2119}','\u{211d}'], + ['\u{2124}','\u{2124}'], + ['\u{2126}','\u{2126}'], + ['\u{2128}','\u{2128}'], + ['\u{212a}','\u{212d}'], + ['\u{212f}','\u{2139}'], + ['\u{213c}','\u{213f}'], + ['\u{2145}','\u{2149}'], + ['\u{214e}','\u{214e}'], + ['\u{2183}','\u{2184}'], + ['\u{2c00}','\u{2c2e}'], + ['\u{2c30}','\u{2c5e}'], + ['\u{2c60}','\u{2ce4}'], + ['\u{2ceb}','\u{2cee}'], + ['\u{2cf2}','\u{2cf3}'], + ['\u{2d00}','\u{2d25}'], + ['\u{2d27}','\u{2d27}'], + ['\u{2d2d}','\u{2d2d}'], + ['\u{2d30}','\u{2d67}'], + ['\u{2d6f}','\u{2d6f}'], + ['\u{2d80}','\u{2d96}'], + ['\u{2da0}','\u{2da6}'], + ['\u{2da8}','\u{2dae}'], + ['\u{2db0}','\u{2db6}'], + ['\u{2db8}','\u{2dbe}'], + ['\u{2dc0}','\u{2dc6}'], + ['\u{2dc8}','\u{2dce}'], + ['\u{2dd0}','\u{2dd6}'], + ['\u{2dd8}','\u{2dde}'], + ['\u{2e2f}','\u{2e2f}'], + ['\u{3005}','\u{3006}'], + ['\u{3031}','\u{3035}'], + ['\u{303b}','\u{303c}'], + ['\u{3041}','\u{3096}'], + ['\u{309d}','\u{309f}'], + ['\u{30a1}','\u{30fa}'], + ['\u{30fc}','\u{30ff}'], + ['\u{3105}','\u{312d}'], + ['\u{3131}','\u{318e}'], + ['\u{31a0}','\u{31ba}'], + ['\u{31f0}','\u{31ff}'], + ['\u{3400}','\u{4db5}'], + ['\u{4e00}','\u{9fcc}'], + ['\u{a000}','\u{a48c}'], + ['\u{a4d0}','\u{a4fd}'], + ['\u{a500}','\u{a60c}'], + ['\u{a610}','\u{a61f}'], + ['\u{a62a}','\u{a62b}'], + ['\u{a640}','\u{a66e}'], + ['\u{a67f}','\u{a697}'], + ['\u{a6a0}','\u{a6e5}'], + ['\u{a717}','\u{a71f}'], + ['\u{a722}','\u{a788}'], + ['\u{a78b}','\u{a78e}'], + ['\u{a790}','\u{a793}'], + ['\u{a7a0}','\u{a7aa}'], + ['\u{a7f8}','\u{a801}'], + ['\u{a803}','\u{a805}'], + ['\u{a807}','\u{a80a}'], + ['\u{a80c}','\u{a822}'], + ['\u{a840}','\u{a873}'], + ['\u{a882}','\u{a8b3}'], + ['\u{a8f2}','\u{a8f7}'], + ['\u{a8fb}','\u{a8fb}'], + ['\u{a90a}','\u{a925}'], + ['\u{a930}','\u{a946}'], + ['\u{a960}','\u{a97c}'], + ['\u{a984}','\u{a9b2}'], + ['\u{a9cf}','\u{a9cf}'], + ['\u{aa00}','\u{aa28}'], + ['\u{aa40}','\u{aa42}'], + ['\u{aa44}','\u{aa4b}'], + ['\u{aa60}','\u{aa76}'], + ['\u{aa7a}','\u{aa7a}'], + ['\u{aa80}','\u{aaaf}'], + ['\u{aab1}','\u{aab1}'], + ['\u{aab5}','\u{aab6}'], + ['\u{aab9}','\u{aabd}'], + ['\u{aac0}','\u{aac0}'], + ['\u{aac2}','\u{aac2}'], + ['\u{aadb}','\u{aadd}'], + ['\u{aae0}','\u{aaea}'], + ['\u{aaf2}','\u{aaf4}'], + ['\u{ab01}','\u{ab06}'], + ['\u{ab09}','\u{ab0e}'], + ['\u{ab11}','\u{ab16}'], + ['\u{ab20}','\u{ab26}'], + ['\u{ab28}','\u{ab2e}'], + ['\u{abc0}','\u{abe2}'], + ['\u{ac00}','\u{d7a3}'], + ['\u{d7b0}','\u{d7c6}'], + ['\u{d7cb}','\u{d7fb}'], + ['\u{f900}','\u{fa6d}'], + ['\u{fa70}','\u{fad9}'], + ['\u{fb00}','\u{fb06}'], + ['\u{fb13}','\u{fb17}'], + ['\u{fb1d}','\u{fb1d}'], + ['\u{fb1f}','\u{fb28}'], + ['\u{fb2a}','\u{fb36}'], + ['\u{fb38}','\u{fb3c}'], + ['\u{fb3e}','\u{fb3e}'], + ['\u{fb40}','\u{fb41}'], + ['\u{fb43}','\u{fb44}'], + ['\u{fb46}','\u{fbb1}'], + ['\u{fbd3}','\u{fd3d}'], + ['\u{fd50}','\u{fd8f}'], + ['\u{fd92}','\u{fdc7}'], + ['\u{fdf0}','\u{fdfb}'], + ['\u{fe70}','\u{fe74}'], + ['\u{fe76}','\u{fefc}'], + ['\u{ff21}','\u{ff3a}'], + ['\u{ff41}','\u{ff5a}'], + ['\u{ff66}','\u{ffbe}'], + ['\u{ffc2}','\u{ffc7}'], + ['\u{ffca}','\u{ffcf}'], + ['\u{ffd2}','\u{ffd7}'], + ['\u{ffda}','\u{ffdc}'], + ['\u{10000}','\u{1000b}'], + ['\u{1000d}','\u{10026}'], + ['\u{10028}','\u{1003a}'], + ['\u{1003c}','\u{1003d}'], + ['\u{1003f}','\u{1004d}'], + ['\u{10050}','\u{1005d}'], + ['\u{10080}','\u{100fa}'], + ['\u{10280}','\u{1029c}'], + ['\u{102a0}','\u{102d0}'], + ['\u{10300}','\u{1031e}'], + ['\u{10330}','\u{10340}'], + ['\u{10342}','\u{10349}'], + ['\u{10380}','\u{1039d}'], + ['\u{103a0}','\u{103c3}'], + ['\u{103c8}','\u{103cf}'], + ['\u{10400}','\u{1049d}'], + ['\u{10800}','\u{10805}'], + ['\u{10808}','\u{10808}'], + ['\u{1080a}','\u{10835}'], + ['\u{10837}','\u{10838}'], + ['\u{1083c}','\u{1083c}'], + ['\u{1083f}','\u{10855}'], + ['\u{10900}','\u{10915}'], + ['\u{10920}','\u{10939}'], + ['\u{10980}','\u{109b7}'], + ['\u{109be}','\u{109bf}'], + ['\u{10a00}','\u{10a00}'], + ['\u{10a10}','\u{10a13}'], + ['\u{10a15}','\u{10a17}'], + ['\u{10a19}','\u{10a33}'], + ['\u{10a60}','\u{10a7c}'], + ['\u{10b00}','\u{10b35}'], + ['\u{10b40}','\u{10b55}'], + ['\u{10b60}','\u{10b72}'], + ['\u{10c00}','\u{10c48}'], + ['\u{11003}','\u{11037}'], + ['\u{11083}','\u{110af}'], + ['\u{110d0}','\u{110e8}'], + ['\u{11103}','\u{11126}'], + ['\u{11183}','\u{111b2}'], + ['\u{111c1}','\u{111c4}'], + ['\u{11680}','\u{116aa}'], + ['\u{12000}','\u{1236e}'], + ['\u{13000}','\u{1342e}'], + ['\u{16800}','\u{16a38}'], + ['\u{16f00}','\u{16f44}'], + ['\u{16f50}','\u{16f50}'], + ['\u{16f93}','\u{16f9f}'], + ['\u{1b000}','\u{1b001}'], + ['\u{1d400}','\u{1d454}'], + ['\u{1d456}','\u{1d49c}'], + ['\u{1d49e}','\u{1d49f}'], + ['\u{1d4a2}','\u{1d4a2}'], + ['\u{1d4a5}','\u{1d4a6}'], + ['\u{1d4a9}','\u{1d4ac}'], + ['\u{1d4ae}','\u{1d4b9}'], + ['\u{1d4bb}','\u{1d4bb}'], + ['\u{1d4bd}','\u{1d4c3}'], + ['\u{1d4c5}','\u{1d505}'], + ['\u{1d507}','\u{1d50a}'], + ['\u{1d50d}','\u{1d514}'], + ['\u{1d516}','\u{1d51c}'], + ['\u{1d51e}','\u{1d539}'], + ['\u{1d53b}','\u{1d53e}'], + ['\u{1d540}','\u{1d544}'], + ['\u{1d546}','\u{1d546}'], + ['\u{1d54a}','\u{1d550}'], + ['\u{1d552}','\u{1d6a5}'], + ['\u{1d6a8}','\u{1d6c0}'], + ['\u{1d6c2}','\u{1d6da}'], + ['\u{1d6dc}','\u{1d6fa}'], + ['\u{1d6fc}','\u{1d714}'], + ['\u{1d716}','\u{1d734}'], + ['\u{1d736}','\u{1d74e}'], + ['\u{1d750}','\u{1d76e}'], + ['\u{1d770}','\u{1d788}'], + ['\u{1d78a}','\u{1d7a8}'], + ['\u{1d7aa}','\u{1d7c2}'], + ['\u{1d7c4}','\u{1d7cb}'], + ['\u{1ee00}','\u{1ee03}'], + ['\u{1ee05}','\u{1ee1f}'], + ['\u{1ee21}','\u{1ee22}'], + ['\u{1ee24}','\u{1ee24}'], + ['\u{1ee27}','\u{1ee27}'], + ['\u{1ee29}','\u{1ee32}'], + ['\u{1ee34}','\u{1ee37}'], + ['\u{1ee39}','\u{1ee39}'], + ['\u{1ee3b}','\u{1ee3b}'], + ['\u{1ee42}','\u{1ee42}'], + ['\u{1ee47}','\u{1ee47}'], + ['\u{1ee49}','\u{1ee49}'], + ['\u{1ee4b}','\u{1ee4b}'], + ['\u{1ee4d}','\u{1ee4f}'], + ['\u{1ee51}','\u{1ee52}'], + ['\u{1ee54}','\u{1ee54}'], + ['\u{1ee57}','\u{1ee57}'], + ['\u{1ee59}','\u{1ee59}'], + ['\u{1ee5b}','\u{1ee5b}'], + ['\u{1ee5d}','\u{1ee5d}'], + ['\u{1ee5f}','\u{1ee5f}'], + ['\u{1ee61}','\u{1ee62}'], + ['\u{1ee64}','\u{1ee64}'], + ['\u{1ee67}','\u{1ee6a}'], + ['\u{1ee6c}','\u{1ee72}'], + ['\u{1ee74}','\u{1ee77}'], + ['\u{1ee79}','\u{1ee7c}'], + ['\u{1ee7e}','\u{1ee7e}'], + ['\u{1ee80}','\u{1ee89}'], + ['\u{1ee8b}','\u{1ee9b}'], + ['\u{1eea1}','\u{1eea3}'], + ['\u{1eea5}','\u{1eea9}'], + ['\u{1eeab}','\u{1eebb}'], + ['\u{20000}','\u{2a6d6}'], + ['\u{2a700}','\u{2b734}'], + ['\u{2b740}','\u{2b81d}'], + ['\u{2f800}','\u{2fa1d}'], +] + +const tabupper = [ + ['\u{41}','\u{5a}'], + ['\u{c0}','\u{d6}'], + ['\u{d8}','\u{de}'], + ['\u{100}','\u{100}'], + ['\u{102}','\u{102}'], + ['\u{104}','\u{104}'], + ['\u{106}','\u{106}'], + ['\u{108}','\u{108}'], + ['\u{10a}','\u{10a}'], + ['\u{10c}','\u{10c}'], + ['\u{10e}','\u{10e}'], + ['\u{110}','\u{110}'], + ['\u{112}','\u{112}'], + ['\u{114}','\u{114}'], + ['\u{116}','\u{116}'], + ['\u{118}','\u{118}'], + ['\u{11a}','\u{11a}'], + ['\u{11c}','\u{11c}'], + ['\u{11e}','\u{11e}'], + ['\u{120}','\u{120}'], + ['\u{122}','\u{122}'], + ['\u{124}','\u{124}'], + ['\u{126}','\u{126}'], + ['\u{128}','\u{128}'], + ['\u{12a}','\u{12a}'], + ['\u{12c}','\u{12c}'], + ['\u{12e}','\u{12e}'], + ['\u{130}','\u{130}'], + ['\u{132}','\u{132}'], + ['\u{134}','\u{134}'], + ['\u{136}','\u{136}'], + ['\u{139}','\u{139}'], + ['\u{13b}','\u{13b}'], + ['\u{13d}','\u{13d}'], + ['\u{13f}','\u{13f}'], + ['\u{141}','\u{141}'], + ['\u{143}','\u{143}'], + ['\u{145}','\u{145}'], + ['\u{147}','\u{147}'], + ['\u{14a}','\u{14a}'], + ['\u{14c}','\u{14c}'], + ['\u{14e}','\u{14e}'], + ['\u{150}','\u{150}'], + ['\u{152}','\u{152}'], + ['\u{154}','\u{154}'], + ['\u{156}','\u{156}'], + ['\u{158}','\u{158}'], + ['\u{15a}','\u{15a}'], + ['\u{15c}','\u{15c}'], + ['\u{15e}','\u{15e}'], + ['\u{160}','\u{160}'], + ['\u{162}','\u{162}'], + ['\u{164}','\u{164}'], + ['\u{166}','\u{166}'], + ['\u{168}','\u{168}'], + ['\u{16a}','\u{16a}'], + ['\u{16c}','\u{16c}'], + ['\u{16e}','\u{16e}'], + ['\u{170}','\u{170}'], + ['\u{172}','\u{172}'], + ['\u{174}','\u{174}'], + ['\u{176}','\u{176}'], + ['\u{178}','\u{179}'], + ['\u{17b}','\u{17b}'], + ['\u{17d}','\u{17d}'], + ['\u{181}','\u{182}'], + ['\u{184}','\u{184}'], + ['\u{186}','\u{187}'], + ['\u{189}','\u{18b}'], + ['\u{18e}','\u{191}'], + ['\u{193}','\u{194}'], + ['\u{196}','\u{198}'], + ['\u{19c}','\u{19d}'], + ['\u{19f}','\u{1a0}'], + ['\u{1a2}','\u{1a2}'], + ['\u{1a4}','\u{1a4}'], + ['\u{1a6}','\u{1a7}'], + ['\u{1a9}','\u{1a9}'], + ['\u{1ac}','\u{1ac}'], + ['\u{1ae}','\u{1af}'], + ['\u{1b1}','\u{1b3}'], + ['\u{1b5}','\u{1b5}'], + ['\u{1b7}','\u{1b8}'], + ['\u{1bc}','\u{1bc}'], + ['\u{1c4}','\u{1c4}'], + ['\u{1c7}','\u{1c7}'], + ['\u{1ca}','\u{1ca}'], + ['\u{1cd}','\u{1cd}'], + ['\u{1cf}','\u{1cf}'], + ['\u{1d1}','\u{1d1}'], + ['\u{1d3}','\u{1d3}'], + ['\u{1d5}','\u{1d5}'], + ['\u{1d7}','\u{1d7}'], + ['\u{1d9}','\u{1d9}'], + ['\u{1db}','\u{1db}'], + ['\u{1de}','\u{1de}'], + ['\u{1e0}','\u{1e0}'], + ['\u{1e2}','\u{1e2}'], + ['\u{1e4}','\u{1e4}'], + ['\u{1e6}','\u{1e6}'], + ['\u{1e8}','\u{1e8}'], + ['\u{1ea}','\u{1ea}'], + ['\u{1ec}','\u{1ec}'], + ['\u{1ee}','\u{1ee}'], + ['\u{1f1}','\u{1f1}'], + ['\u{1f4}','\u{1f4}'], + ['\u{1f6}','\u{1f8}'], + ['\u{1fa}','\u{1fa}'], + ['\u{1fc}','\u{1fc}'], + ['\u{1fe}','\u{1fe}'], + ['\u{200}','\u{200}'], + ['\u{202}','\u{202}'], + ['\u{204}','\u{204}'], + ['\u{206}','\u{206}'], + ['\u{208}','\u{208}'], + ['\u{20a}','\u{20a}'], + ['\u{20c}','\u{20c}'], + ['\u{20e}','\u{20e}'], + ['\u{210}','\u{210}'], + ['\u{212}','\u{212}'], + ['\u{214}','\u{214}'], + ['\u{216}','\u{216}'], + ['\u{218}','\u{218}'], + ['\u{21a}','\u{21a}'], + ['\u{21c}','\u{21c}'], + ['\u{21e}','\u{21e}'], + ['\u{220}','\u{220}'], + ['\u{222}','\u{222}'], + ['\u{224}','\u{224}'], + ['\u{226}','\u{226}'], + ['\u{228}','\u{228}'], + ['\u{22a}','\u{22a}'], + ['\u{22c}','\u{22c}'], + ['\u{22e}','\u{22e}'], + ['\u{230}','\u{230}'], + ['\u{232}','\u{232}'], + ['\u{23a}','\u{23b}'], + ['\u{23d}','\u{23e}'], + ['\u{241}','\u{241}'], + ['\u{243}','\u{246}'], + ['\u{248}','\u{248}'], + ['\u{24a}','\u{24a}'], + ['\u{24c}','\u{24c}'], + ['\u{24e}','\u{24e}'], + ['\u{370}','\u{370}'], + ['\u{372}','\u{372}'], + ['\u{376}','\u{376}'], + ['\u{386}','\u{386}'], + ['\u{388}','\u{38a}'], + ['\u{38c}','\u{38c}'], + ['\u{38e}','\u{38f}'], + ['\u{391}','\u{3a1}'], + ['\u{3a3}','\u{3ab}'], + ['\u{3cf}','\u{3cf}'], + ['\u{3d2}','\u{3d4}'], + ['\u{3d8}','\u{3d8}'], + ['\u{3da}','\u{3da}'], + ['\u{3dc}','\u{3dc}'], + ['\u{3de}','\u{3de}'], + ['\u{3e0}','\u{3e0}'], + ['\u{3e2}','\u{3e2}'], + ['\u{3e4}','\u{3e4}'], + ['\u{3e6}','\u{3e6}'], + ['\u{3e8}','\u{3e8}'], + ['\u{3ea}','\u{3ea}'], + ['\u{3ec}','\u{3ec}'], + ['\u{3ee}','\u{3ee}'], + ['\u{3f4}','\u{3f4}'], + ['\u{3f7}','\u{3f7}'], + ['\u{3f9}','\u{3fa}'], + ['\u{3fd}','\u{42f}'], + ['\u{460}','\u{460}'], + ['\u{462}','\u{462}'], + ['\u{464}','\u{464}'], + ['\u{466}','\u{466}'], + ['\u{468}','\u{468}'], + ['\u{46a}','\u{46a}'], + ['\u{46c}','\u{46c}'], + ['\u{46e}','\u{46e}'], + ['\u{470}','\u{470}'], + ['\u{472}','\u{472}'], + ['\u{474}','\u{474}'], + ['\u{476}','\u{476}'], + ['\u{478}','\u{478}'], + ['\u{47a}','\u{47a}'], + ['\u{47c}','\u{47c}'], + ['\u{47e}','\u{47e}'], + ['\u{480}','\u{480}'], + ['\u{48a}','\u{48a}'], + ['\u{48c}','\u{48c}'], + ['\u{48e}','\u{48e}'], + ['\u{490}','\u{490}'], + ['\u{492}','\u{492}'], + ['\u{494}','\u{494}'], + ['\u{496}','\u{496}'], + ['\u{498}','\u{498}'], + ['\u{49a}','\u{49a}'], + ['\u{49c}','\u{49c}'], + ['\u{49e}','\u{49e}'], + ['\u{4a0}','\u{4a0}'], + ['\u{4a2}','\u{4a2}'], + ['\u{4a4}','\u{4a4}'], + ['\u{4a6}','\u{4a6}'], + ['\u{4a8}','\u{4a8}'], + ['\u{4aa}','\u{4aa}'], + ['\u{4ac}','\u{4ac}'], + ['\u{4ae}','\u{4ae}'], + ['\u{4b0}','\u{4b0}'], + ['\u{4b2}','\u{4b2}'], + ['\u{4b4}','\u{4b4}'], + ['\u{4b6}','\u{4b6}'], + ['\u{4b8}','\u{4b8}'], + ['\u{4ba}','\u{4ba}'], + ['\u{4bc}','\u{4bc}'], + ['\u{4be}','\u{4be}'], + ['\u{4c0}','\u{4c1}'], + ['\u{4c3}','\u{4c3}'], + ['\u{4c5}','\u{4c5}'], + ['\u{4c7}','\u{4c7}'], + ['\u{4c9}','\u{4c9}'], + ['\u{4cb}','\u{4cb}'], + ['\u{4cd}','\u{4cd}'], + ['\u{4d0}','\u{4d0}'], + ['\u{4d2}','\u{4d2}'], + ['\u{4d4}','\u{4d4}'], + ['\u{4d6}','\u{4d6}'], + ['\u{4d8}','\u{4d8}'], + ['\u{4da}','\u{4da}'], + ['\u{4dc}','\u{4dc}'], + ['\u{4de}','\u{4de}'], + ['\u{4e0}','\u{4e0}'], + ['\u{4e2}','\u{4e2}'], + ['\u{4e4}','\u{4e4}'], + ['\u{4e6}','\u{4e6}'], + ['\u{4e8}','\u{4e8}'], + ['\u{4ea}','\u{4ea}'], + ['\u{4ec}','\u{4ec}'], + ['\u{4ee}','\u{4ee}'], + ['\u{4f0}','\u{4f0}'], + ['\u{4f2}','\u{4f2}'], + ['\u{4f4}','\u{4f4}'], + ['\u{4f6}','\u{4f6}'], + ['\u{4f8}','\u{4f8}'], + ['\u{4fa}','\u{4fa}'], + ['\u{4fc}','\u{4fc}'], + ['\u{4fe}','\u{4fe}'], + ['\u{500}','\u{500}'], + ['\u{502}','\u{502}'], + ['\u{504}','\u{504}'], + ['\u{506}','\u{506}'], + ['\u{508}','\u{508}'], + ['\u{50a}','\u{50a}'], + ['\u{50c}','\u{50c}'], + ['\u{50e}','\u{50e}'], + ['\u{510}','\u{510}'], + ['\u{512}','\u{512}'], + ['\u{514}','\u{514}'], + ['\u{516}','\u{516}'], + ['\u{518}','\u{518}'], + ['\u{51a}','\u{51a}'], + ['\u{51c}','\u{51c}'], + ['\u{51e}','\u{51e}'], + ['\u{520}','\u{520}'], + ['\u{522}','\u{522}'], + ['\u{524}','\u{524}'], + ['\u{526}','\u{526}'], + ['\u{531}','\u{556}'], + ['\u{10a0}','\u{10c5}'], + ['\u{10c7}','\u{10c7}'], + ['\u{10cd}','\u{10cd}'], + ['\u{1e00}','\u{1e00}'], + ['\u{1e02}','\u{1e02}'], + ['\u{1e04}','\u{1e04}'], + ['\u{1e06}','\u{1e06}'], + ['\u{1e08}','\u{1e08}'], + ['\u{1e0a}','\u{1e0a}'], + ['\u{1e0c}','\u{1e0c}'], + ['\u{1e0e}','\u{1e0e}'], + ['\u{1e10}','\u{1e10}'], + ['\u{1e12}','\u{1e12}'], + ['\u{1e14}','\u{1e14}'], + ['\u{1e16}','\u{1e16}'], + ['\u{1e18}','\u{1e18}'], + ['\u{1e1a}','\u{1e1a}'], + ['\u{1e1c}','\u{1e1c}'], + ['\u{1e1e}','\u{1e1e}'], + ['\u{1e20}','\u{1e20}'], + ['\u{1e22}','\u{1e22}'], + ['\u{1e24}','\u{1e24}'], + ['\u{1e26}','\u{1e26}'], + ['\u{1e28}','\u{1e28}'], + ['\u{1e2a}','\u{1e2a}'], + ['\u{1e2c}','\u{1e2c}'], + ['\u{1e2e}','\u{1e2e}'], + ['\u{1e30}','\u{1e30}'], + ['\u{1e32}','\u{1e32}'], + ['\u{1e34}','\u{1e34}'], + ['\u{1e36}','\u{1e36}'], + ['\u{1e38}','\u{1e38}'], + ['\u{1e3a}','\u{1e3a}'], + ['\u{1e3c}','\u{1e3c}'], + ['\u{1e3e}','\u{1e3e}'], + ['\u{1e40}','\u{1e40}'], + ['\u{1e42}','\u{1e42}'], + ['\u{1e44}','\u{1e44}'], + ['\u{1e46}','\u{1e46}'], + ['\u{1e48}','\u{1e48}'], + ['\u{1e4a}','\u{1e4a}'], + ['\u{1e4c}','\u{1e4c}'], + ['\u{1e4e}','\u{1e4e}'], + ['\u{1e50}','\u{1e50}'], + ['\u{1e52}','\u{1e52}'], + ['\u{1e54}','\u{1e54}'], + ['\u{1e56}','\u{1e56}'], + ['\u{1e58}','\u{1e58}'], + ['\u{1e5a}','\u{1e5a}'], + ['\u{1e5c}','\u{1e5c}'], + ['\u{1e5e}','\u{1e5e}'], + ['\u{1e60}','\u{1e60}'], + ['\u{1e62}','\u{1e62}'], + ['\u{1e64}','\u{1e64}'], + ['\u{1e66}','\u{1e66}'], + ['\u{1e68}','\u{1e68}'], + ['\u{1e6a}','\u{1e6a}'], + ['\u{1e6c}','\u{1e6c}'], + ['\u{1e6e}','\u{1e6e}'], + ['\u{1e70}','\u{1e70}'], + ['\u{1e72}','\u{1e72}'], + ['\u{1e74}','\u{1e74}'], + ['\u{1e76}','\u{1e76}'], + ['\u{1e78}','\u{1e78}'], + ['\u{1e7a}','\u{1e7a}'], + ['\u{1e7c}','\u{1e7c}'], + ['\u{1e7e}','\u{1e7e}'], + ['\u{1e80}','\u{1e80}'], + ['\u{1e82}','\u{1e82}'], + ['\u{1e84}','\u{1e84}'], + ['\u{1e86}','\u{1e86}'], + ['\u{1e88}','\u{1e88}'], + ['\u{1e8a}','\u{1e8a}'], + ['\u{1e8c}','\u{1e8c}'], + ['\u{1e8e}','\u{1e8e}'], + ['\u{1e90}','\u{1e90}'], + ['\u{1e92}','\u{1e92}'], + ['\u{1e94}','\u{1e94}'], + ['\u{1e9e}','\u{1e9e}'], + ['\u{1ea0}','\u{1ea0}'], + ['\u{1ea2}','\u{1ea2}'], + ['\u{1ea4}','\u{1ea4}'], + ['\u{1ea6}','\u{1ea6}'], + ['\u{1ea8}','\u{1ea8}'], + ['\u{1eaa}','\u{1eaa}'], + ['\u{1eac}','\u{1eac}'], + ['\u{1eae}','\u{1eae}'], + ['\u{1eb0}','\u{1eb0}'], + ['\u{1eb2}','\u{1eb2}'], + ['\u{1eb4}','\u{1eb4}'], + ['\u{1eb6}','\u{1eb6}'], + ['\u{1eb8}','\u{1eb8}'], + ['\u{1eba}','\u{1eba}'], + ['\u{1ebc}','\u{1ebc}'], + ['\u{1ebe}','\u{1ebe}'], + ['\u{1ec0}','\u{1ec0}'], + ['\u{1ec2}','\u{1ec2}'], + ['\u{1ec4}','\u{1ec4}'], + ['\u{1ec6}','\u{1ec6}'], + ['\u{1ec8}','\u{1ec8}'], + ['\u{1eca}','\u{1eca}'], + ['\u{1ecc}','\u{1ecc}'], + ['\u{1ece}','\u{1ece}'], + ['\u{1ed0}','\u{1ed0}'], + ['\u{1ed2}','\u{1ed2}'], + ['\u{1ed4}','\u{1ed4}'], + ['\u{1ed6}','\u{1ed6}'], + ['\u{1ed8}','\u{1ed8}'], + ['\u{1eda}','\u{1eda}'], + ['\u{1edc}','\u{1edc}'], + ['\u{1ede}','\u{1ede}'], + ['\u{1ee0}','\u{1ee0}'], + ['\u{1ee2}','\u{1ee2}'], + ['\u{1ee4}','\u{1ee4}'], + ['\u{1ee6}','\u{1ee6}'], + ['\u{1ee8}','\u{1ee8}'], + ['\u{1eea}','\u{1eea}'], + ['\u{1eec}','\u{1eec}'], + ['\u{1eee}','\u{1eee}'], + ['\u{1ef0}','\u{1ef0}'], + ['\u{1ef2}','\u{1ef2}'], + ['\u{1ef4}','\u{1ef4}'], + ['\u{1ef6}','\u{1ef6}'], + ['\u{1ef8}','\u{1ef8}'], + ['\u{1efa}','\u{1efa}'], + ['\u{1efc}','\u{1efc}'], + ['\u{1efe}','\u{1efe}'], + ['\u{1f08}','\u{1f0f}'], + ['\u{1f18}','\u{1f1d}'], + ['\u{1f28}','\u{1f2f}'], + ['\u{1f38}','\u{1f3f}'], + ['\u{1f48}','\u{1f4d}'], + ['\u{1f59}','\u{1f59}'], + ['\u{1f5b}','\u{1f5b}'], + ['\u{1f5d}','\u{1f5d}'], + ['\u{1f5f}','\u{1f5f}'], + ['\u{1f68}','\u{1f6f}'], + ['\u{1fb8}','\u{1fbb}'], + ['\u{1fc8}','\u{1fcb}'], + ['\u{1fd8}','\u{1fdb}'], + ['\u{1fe8}','\u{1fec}'], + ['\u{1ff8}','\u{1ffb}'], + ['\u{2102}','\u{2102}'], + ['\u{2107}','\u{2107}'], + ['\u{210b}','\u{210d}'], + ['\u{2110}','\u{2112}'], + ['\u{2115}','\u{2115}'], + ['\u{2119}','\u{211d}'], + ['\u{2124}','\u{2124}'], + ['\u{2126}','\u{2126}'], + ['\u{2128}','\u{2128}'], + ['\u{212a}','\u{212d}'], + ['\u{2130}','\u{2133}'], + ['\u{213e}','\u{213f}'], + ['\u{2145}','\u{2145}'], + ['\u{2183}','\u{2183}'], + ['\u{2c00}','\u{2c2e}'], + ['\u{2c60}','\u{2c60}'], + ['\u{2c62}','\u{2c64}'], + ['\u{2c67}','\u{2c67}'], + ['\u{2c69}','\u{2c69}'], + ['\u{2c6b}','\u{2c6b}'], + ['\u{2c6d}','\u{2c70}'], + ['\u{2c72}','\u{2c72}'], + ['\u{2c75}','\u{2c75}'], + ['\u{2c7e}','\u{2c80}'], + ['\u{2c82}','\u{2c82}'], + ['\u{2c84}','\u{2c84}'], + ['\u{2c86}','\u{2c86}'], + ['\u{2c88}','\u{2c88}'], + ['\u{2c8a}','\u{2c8a}'], + ['\u{2c8c}','\u{2c8c}'], + ['\u{2c8e}','\u{2c8e}'], + ['\u{2c90}','\u{2c90}'], + ['\u{2c92}','\u{2c92}'], + ['\u{2c94}','\u{2c94}'], + ['\u{2c96}','\u{2c96}'], + ['\u{2c98}','\u{2c98}'], + ['\u{2c9a}','\u{2c9a}'], + ['\u{2c9c}','\u{2c9c}'], + ['\u{2c9e}','\u{2c9e}'], + ['\u{2ca0}','\u{2ca0}'], + ['\u{2ca2}','\u{2ca2}'], + ['\u{2ca4}','\u{2ca4}'], + ['\u{2ca6}','\u{2ca6}'], + ['\u{2ca8}','\u{2ca8}'], + ['\u{2caa}','\u{2caa}'], + ['\u{2cac}','\u{2cac}'], + ['\u{2cae}','\u{2cae}'], + ['\u{2cb0}','\u{2cb0}'], + ['\u{2cb2}','\u{2cb2}'], + ['\u{2cb4}','\u{2cb4}'], + ['\u{2cb6}','\u{2cb6}'], + ['\u{2cb8}','\u{2cb8}'], + ['\u{2cba}','\u{2cba}'], + ['\u{2cbc}','\u{2cbc}'], + ['\u{2cbe}','\u{2cbe}'], + ['\u{2cc0}','\u{2cc0}'], + ['\u{2cc2}','\u{2cc2}'], + ['\u{2cc4}','\u{2cc4}'], + ['\u{2cc6}','\u{2cc6}'], + ['\u{2cc8}','\u{2cc8}'], + ['\u{2cca}','\u{2cca}'], + ['\u{2ccc}','\u{2ccc}'], + ['\u{2cce}','\u{2cce}'], + ['\u{2cd0}','\u{2cd0}'], + ['\u{2cd2}','\u{2cd2}'], + ['\u{2cd4}','\u{2cd4}'], + ['\u{2cd6}','\u{2cd6}'], + ['\u{2cd8}','\u{2cd8}'], + ['\u{2cda}','\u{2cda}'], + ['\u{2cdc}','\u{2cdc}'], + ['\u{2cde}','\u{2cde}'], + ['\u{2ce0}','\u{2ce0}'], + ['\u{2ce2}','\u{2ce2}'], + ['\u{2ceb}','\u{2ceb}'], + ['\u{2ced}','\u{2ced}'], + ['\u{2cf2}','\u{2cf2}'], + ['\u{a640}','\u{a640}'], + ['\u{a642}','\u{a642}'], + ['\u{a644}','\u{a644}'], + ['\u{a646}','\u{a646}'], + ['\u{a648}','\u{a648}'], + ['\u{a64a}','\u{a64a}'], + ['\u{a64c}','\u{a64c}'], + ['\u{a64e}','\u{a64e}'], + ['\u{a650}','\u{a650}'], + ['\u{a652}','\u{a652}'], + ['\u{a654}','\u{a654}'], + ['\u{a656}','\u{a656}'], + ['\u{a658}','\u{a658}'], + ['\u{a65a}','\u{a65a}'], + ['\u{a65c}','\u{a65c}'], + ['\u{a65e}','\u{a65e}'], + ['\u{a660}','\u{a660}'], + ['\u{a662}','\u{a662}'], + ['\u{a664}','\u{a664}'], + ['\u{a666}','\u{a666}'], + ['\u{a668}','\u{a668}'], + ['\u{a66a}','\u{a66a}'], + ['\u{a66c}','\u{a66c}'], + ['\u{a680}','\u{a680}'], + ['\u{a682}','\u{a682}'], + ['\u{a684}','\u{a684}'], + ['\u{a686}','\u{a686}'], + ['\u{a688}','\u{a688}'], + ['\u{a68a}','\u{a68a}'], + ['\u{a68c}','\u{a68c}'], + ['\u{a68e}','\u{a68e}'], + ['\u{a690}','\u{a690}'], + ['\u{a692}','\u{a692}'], + ['\u{a694}','\u{a694}'], + ['\u{a696}','\u{a696}'], + ['\u{a722}','\u{a722}'], + ['\u{a724}','\u{a724}'], + ['\u{a726}','\u{a726}'], + ['\u{a728}','\u{a728}'], + ['\u{a72a}','\u{a72a}'], + ['\u{a72c}','\u{a72c}'], + ['\u{a72e}','\u{a72e}'], + ['\u{a732}','\u{a732}'], + ['\u{a734}','\u{a734}'], + ['\u{a736}','\u{a736}'], + ['\u{a738}','\u{a738}'], + ['\u{a73a}','\u{a73a}'], + ['\u{a73c}','\u{a73c}'], + ['\u{a73e}','\u{a73e}'], + ['\u{a740}','\u{a740}'], + ['\u{a742}','\u{a742}'], + ['\u{a744}','\u{a744}'], + ['\u{a746}','\u{a746}'], + ['\u{a748}','\u{a748}'], + ['\u{a74a}','\u{a74a}'], + ['\u{a74c}','\u{a74c}'], + ['\u{a74e}','\u{a74e}'], + ['\u{a750}','\u{a750}'], + ['\u{a752}','\u{a752}'], + ['\u{a754}','\u{a754}'], + ['\u{a756}','\u{a756}'], + ['\u{a758}','\u{a758}'], + ['\u{a75a}','\u{a75a}'], + ['\u{a75c}','\u{a75c}'], + ['\u{a75e}','\u{a75e}'], + ['\u{a760}','\u{a760}'], + ['\u{a762}','\u{a762}'], + ['\u{a764}','\u{a764}'], + ['\u{a766}','\u{a766}'], + ['\u{a768}','\u{a768}'], + ['\u{a76a}','\u{a76a}'], + ['\u{a76c}','\u{a76c}'], + ['\u{a76e}','\u{a76e}'], + ['\u{a779}','\u{a779}'], + ['\u{a77b}','\u{a77b}'], + ['\u{a77d}','\u{a77e}'], + ['\u{a780}','\u{a780}'], + ['\u{a782}','\u{a782}'], + ['\u{a784}','\u{a784}'], + ['\u{a786}','\u{a786}'], + ['\u{a78b}','\u{a78b}'], + ['\u{a78d}','\u{a78d}'], + ['\u{a790}','\u{a790}'], + ['\u{a792}','\u{a792}'], + ['\u{a7a0}','\u{a7a0}'], + ['\u{a7a2}','\u{a7a2}'], + ['\u{a7a4}','\u{a7a4}'], + ['\u{a7a6}','\u{a7a6}'], + ['\u{a7a8}','\u{a7a8}'], + ['\u{a7aa}','\u{a7aa}'], + ['\u{ff21}','\u{ff3a}'], + ['\u{10400}','\u{10427}'], + ['\u{1d400}','\u{1d419}'], + ['\u{1d434}','\u{1d44d}'], + ['\u{1d468}','\u{1d481}'], + ['\u{1d49c}','\u{1d49c}'], + ['\u{1d49e}','\u{1d49f}'], + ['\u{1d4a2}','\u{1d4a2}'], + ['\u{1d4a5}','\u{1d4a6}'], + ['\u{1d4a9}','\u{1d4ac}'], + ['\u{1d4ae}','\u{1d4b5}'], + ['\u{1d4d0}','\u{1d4e9}'], + ['\u{1d504}','\u{1d505}'], + ['\u{1d507}','\u{1d50a}'], + ['\u{1d50d}','\u{1d514}'], + ['\u{1d516}','\u{1d51c}'], + ['\u{1d538}','\u{1d539}'], + ['\u{1d53b}','\u{1d53e}'], + ['\u{1d540}','\u{1d544}'], + ['\u{1d546}','\u{1d546}'], + ['\u{1d54a}','\u{1d550}'], + ['\u{1d56c}','\u{1d585}'], + ['\u{1d5a0}','\u{1d5b9}'], + ['\u{1d5d4}','\u{1d5ed}'], + ['\u{1d608}','\u{1d621}'], + ['\u{1d63c}','\u{1d655}'], + ['\u{1d670}','\u{1d689}'], + ['\u{1d6a8}','\u{1d6c0}'], + ['\u{1d6e2}','\u{1d6fa}'], + ['\u{1d71c}','\u{1d734}'], + ['\u{1d756}','\u{1d76e}'], + ['\u{1d790}','\u{1d7a8}'], + ['\u{1d7ca}','\u{1d7ca}'], +] + +const tablower = [ + ['\u{61}','\u{7a}'], + ['\u{b5}','\u{b5}'], + ['\u{df}','\u{f6}'], + ['\u{f8}','\u{ff}'], + ['\u{101}','\u{101}'], + ['\u{103}','\u{103}'], + ['\u{105}','\u{105}'], + ['\u{107}','\u{107}'], + ['\u{109}','\u{109}'], + ['\u{10b}','\u{10b}'], + ['\u{10d}','\u{10d}'], + ['\u{10f}','\u{10f}'], + ['\u{111}','\u{111}'], + ['\u{113}','\u{113}'], + ['\u{115}','\u{115}'], + ['\u{117}','\u{117}'], + ['\u{119}','\u{119}'], + ['\u{11b}','\u{11b}'], + ['\u{11d}','\u{11d}'], + ['\u{11f}','\u{11f}'], + ['\u{121}','\u{121}'], + ['\u{123}','\u{123}'], + ['\u{125}','\u{125}'], + ['\u{127}','\u{127}'], + ['\u{129}','\u{129}'], + ['\u{12b}','\u{12b}'], + ['\u{12d}','\u{12d}'], + ['\u{12f}','\u{12f}'], + ['\u{131}','\u{131}'], + ['\u{133}','\u{133}'], + ['\u{135}','\u{135}'], + ['\u{137}','\u{138}'], + ['\u{13a}','\u{13a}'], + ['\u{13c}','\u{13c}'], + ['\u{13e}','\u{13e}'], + ['\u{140}','\u{140}'], + ['\u{142}','\u{142}'], + ['\u{144}','\u{144}'], + ['\u{146}','\u{146}'], + ['\u{148}','\u{149}'], + ['\u{14b}','\u{14b}'], + ['\u{14d}','\u{14d}'], + ['\u{14f}','\u{14f}'], + ['\u{151}','\u{151}'], + ['\u{153}','\u{153}'], + ['\u{155}','\u{155}'], + ['\u{157}','\u{157}'], + ['\u{159}','\u{159}'], + ['\u{15b}','\u{15b}'], + ['\u{15d}','\u{15d}'], + ['\u{15f}','\u{15f}'], + ['\u{161}','\u{161}'], + ['\u{163}','\u{163}'], + ['\u{165}','\u{165}'], + ['\u{167}','\u{167}'], + ['\u{169}','\u{169}'], + ['\u{16b}','\u{16b}'], + ['\u{16d}','\u{16d}'], + ['\u{16f}','\u{16f}'], + ['\u{171}','\u{171}'], + ['\u{173}','\u{173}'], + ['\u{175}','\u{175}'], + ['\u{177}','\u{177}'], + ['\u{17a}','\u{17a}'], + ['\u{17c}','\u{17c}'], + ['\u{17e}','\u{180}'], + ['\u{183}','\u{183}'], + ['\u{185}','\u{185}'], + ['\u{188}','\u{188}'], + ['\u{18c}','\u{18d}'], + ['\u{192}','\u{192}'], + ['\u{195}','\u{195}'], + ['\u{199}','\u{19b}'], + ['\u{19e}','\u{19e}'], + ['\u{1a1}','\u{1a1}'], + ['\u{1a3}','\u{1a3}'], + ['\u{1a5}','\u{1a5}'], + ['\u{1a8}','\u{1a8}'], + ['\u{1aa}','\u{1ab}'], + ['\u{1ad}','\u{1ad}'], + ['\u{1b0}','\u{1b0}'], + ['\u{1b4}','\u{1b4}'], + ['\u{1b6}','\u{1b6}'], + ['\u{1b9}','\u{1ba}'], + ['\u{1bd}','\u{1bf}'], + ['\u{1c6}','\u{1c6}'], + ['\u{1c9}','\u{1c9}'], + ['\u{1cc}','\u{1cc}'], + ['\u{1ce}','\u{1ce}'], + ['\u{1d0}','\u{1d0}'], + ['\u{1d2}','\u{1d2}'], + ['\u{1d4}','\u{1d4}'], + ['\u{1d6}','\u{1d6}'], + ['\u{1d8}','\u{1d8}'], + ['\u{1da}','\u{1da}'], + ['\u{1dc}','\u{1dd}'], + ['\u{1df}','\u{1df}'], + ['\u{1e1}','\u{1e1}'], + ['\u{1e3}','\u{1e3}'], + ['\u{1e5}','\u{1e5}'], + ['\u{1e7}','\u{1e7}'], + ['\u{1e9}','\u{1e9}'], + ['\u{1eb}','\u{1eb}'], + ['\u{1ed}','\u{1ed}'], + ['\u{1ef}','\u{1f0}'], + ['\u{1f3}','\u{1f3}'], + ['\u{1f5}','\u{1f5}'], + ['\u{1f9}','\u{1f9}'], + ['\u{1fb}','\u{1fb}'], + ['\u{1fd}','\u{1fd}'], + ['\u{1ff}','\u{1ff}'], + ['\u{201}','\u{201}'], + ['\u{203}','\u{203}'], + ['\u{205}','\u{205}'], + ['\u{207}','\u{207}'], + ['\u{209}','\u{209}'], + ['\u{20b}','\u{20b}'], + ['\u{20d}','\u{20d}'], + ['\u{20f}','\u{20f}'], + ['\u{211}','\u{211}'], + ['\u{213}','\u{213}'], + ['\u{215}','\u{215}'], + ['\u{217}','\u{217}'], + ['\u{219}','\u{219}'], + ['\u{21b}','\u{21b}'], + ['\u{21d}','\u{21d}'], + ['\u{21f}','\u{21f}'], + ['\u{221}','\u{221}'], + ['\u{223}','\u{223}'], + ['\u{225}','\u{225}'], + ['\u{227}','\u{227}'], + ['\u{229}','\u{229}'], + ['\u{22b}','\u{22b}'], + ['\u{22d}','\u{22d}'], + ['\u{22f}','\u{22f}'], + ['\u{231}','\u{231}'], + ['\u{233}','\u{239}'], + ['\u{23c}','\u{23c}'], + ['\u{23f}','\u{240}'], + ['\u{242}','\u{242}'], + ['\u{247}','\u{247}'], + ['\u{249}','\u{249}'], + ['\u{24b}','\u{24b}'], + ['\u{24d}','\u{24d}'], + ['\u{24f}','\u{293}'], + ['\u{295}','\u{2af}'], + ['\u{371}','\u{371}'], + ['\u{373}','\u{373}'], + ['\u{377}','\u{377}'], + ['\u{37b}','\u{37d}'], + ['\u{390}','\u{390}'], + ['\u{3ac}','\u{3ce}'], + ['\u{3d0}','\u{3d1}'], + ['\u{3d5}','\u{3d7}'], + ['\u{3d9}','\u{3d9}'], + ['\u{3db}','\u{3db}'], + ['\u{3dd}','\u{3dd}'], + ['\u{3df}','\u{3df}'], + ['\u{3e1}','\u{3e1}'], + ['\u{3e3}','\u{3e3}'], + ['\u{3e5}','\u{3e5}'], + ['\u{3e7}','\u{3e7}'], + ['\u{3e9}','\u{3e9}'], + ['\u{3eb}','\u{3eb}'], + ['\u{3ed}','\u{3ed}'], + ['\u{3ef}','\u{3f3}'], + ['\u{3f5}','\u{3f5}'], + ['\u{3f8}','\u{3f8}'], + ['\u{3fb}','\u{3fc}'], + ['\u{430}','\u{45f}'], + ['\u{461}','\u{461}'], + ['\u{463}','\u{463}'], + ['\u{465}','\u{465}'], + ['\u{467}','\u{467}'], + ['\u{469}','\u{469}'], + ['\u{46b}','\u{46b}'], + ['\u{46d}','\u{46d}'], + ['\u{46f}','\u{46f}'], + ['\u{471}','\u{471}'], + ['\u{473}','\u{473}'], + ['\u{475}','\u{475}'], + ['\u{477}','\u{477}'], + ['\u{479}','\u{479}'], + ['\u{47b}','\u{47b}'], + ['\u{47d}','\u{47d}'], + ['\u{47f}','\u{47f}'], + ['\u{481}','\u{481}'], + ['\u{48b}','\u{48b}'], + ['\u{48d}','\u{48d}'], + ['\u{48f}','\u{48f}'], + ['\u{491}','\u{491}'], + ['\u{493}','\u{493}'], + ['\u{495}','\u{495}'], + ['\u{497}','\u{497}'], + ['\u{499}','\u{499}'], + ['\u{49b}','\u{49b}'], + ['\u{49d}','\u{49d}'], + ['\u{49f}','\u{49f}'], + ['\u{4a1}','\u{4a1}'], + ['\u{4a3}','\u{4a3}'], + ['\u{4a5}','\u{4a5}'], + ['\u{4a7}','\u{4a7}'], + ['\u{4a9}','\u{4a9}'], + ['\u{4ab}','\u{4ab}'], + ['\u{4ad}','\u{4ad}'], + ['\u{4af}','\u{4af}'], + ['\u{4b1}','\u{4b1}'], + ['\u{4b3}','\u{4b3}'], + ['\u{4b5}','\u{4b5}'], + ['\u{4b7}','\u{4b7}'], + ['\u{4b9}','\u{4b9}'], + ['\u{4bb}','\u{4bb}'], + ['\u{4bd}','\u{4bd}'], + ['\u{4bf}','\u{4bf}'], + ['\u{4c2}','\u{4c2}'], + ['\u{4c4}','\u{4c4}'], + ['\u{4c6}','\u{4c6}'], + ['\u{4c8}','\u{4c8}'], + ['\u{4ca}','\u{4ca}'], + ['\u{4cc}','\u{4cc}'], + ['\u{4ce}','\u{4cf}'], + ['\u{4d1}','\u{4d1}'], + ['\u{4d3}','\u{4d3}'], + ['\u{4d5}','\u{4d5}'], + ['\u{4d7}','\u{4d7}'], + ['\u{4d9}','\u{4d9}'], + ['\u{4db}','\u{4db}'], + ['\u{4dd}','\u{4dd}'], + ['\u{4df}','\u{4df}'], + ['\u{4e1}','\u{4e1}'], + ['\u{4e3}','\u{4e3}'], + ['\u{4e5}','\u{4e5}'], + ['\u{4e7}','\u{4e7}'], + ['\u{4e9}','\u{4e9}'], + ['\u{4eb}','\u{4eb}'], + ['\u{4ed}','\u{4ed}'], + ['\u{4ef}','\u{4ef}'], + ['\u{4f1}','\u{4f1}'], + ['\u{4f3}','\u{4f3}'], + ['\u{4f5}','\u{4f5}'], + ['\u{4f7}','\u{4f7}'], + ['\u{4f9}','\u{4f9}'], + ['\u{4fb}','\u{4fb}'], + ['\u{4fd}','\u{4fd}'], + ['\u{4ff}','\u{4ff}'], + ['\u{501}','\u{501}'], + ['\u{503}','\u{503}'], + ['\u{505}','\u{505}'], + ['\u{507}','\u{507}'], + ['\u{509}','\u{509}'], + ['\u{50b}','\u{50b}'], + ['\u{50d}','\u{50d}'], + ['\u{50f}','\u{50f}'], + ['\u{511}','\u{511}'], + ['\u{513}','\u{513}'], + ['\u{515}','\u{515}'], + ['\u{517}','\u{517}'], + ['\u{519}','\u{519}'], + ['\u{51b}','\u{51b}'], + ['\u{51d}','\u{51d}'], + ['\u{51f}','\u{51f}'], + ['\u{521}','\u{521}'], + ['\u{523}','\u{523}'], + ['\u{525}','\u{525}'], + ['\u{527}','\u{527}'], + ['\u{561}','\u{587}'], + ['\u{1d00}','\u{1d2b}'], + ['\u{1d6b}','\u{1d77}'], + ['\u{1d79}','\u{1d9a}'], + ['\u{1e01}','\u{1e01}'], + ['\u{1e03}','\u{1e03}'], + ['\u{1e05}','\u{1e05}'], + ['\u{1e07}','\u{1e07}'], + ['\u{1e09}','\u{1e09}'], + ['\u{1e0b}','\u{1e0b}'], + ['\u{1e0d}','\u{1e0d}'], + ['\u{1e0f}','\u{1e0f}'], + ['\u{1e11}','\u{1e11}'], + ['\u{1e13}','\u{1e13}'], + ['\u{1e15}','\u{1e15}'], + ['\u{1e17}','\u{1e17}'], + ['\u{1e19}','\u{1e19}'], + ['\u{1e1b}','\u{1e1b}'], + ['\u{1e1d}','\u{1e1d}'], + ['\u{1e1f}','\u{1e1f}'], + ['\u{1e21}','\u{1e21}'], + ['\u{1e23}','\u{1e23}'], + ['\u{1e25}','\u{1e25}'], + ['\u{1e27}','\u{1e27}'], + ['\u{1e29}','\u{1e29}'], + ['\u{1e2b}','\u{1e2b}'], + ['\u{1e2d}','\u{1e2d}'], + ['\u{1e2f}','\u{1e2f}'], + ['\u{1e31}','\u{1e31}'], + ['\u{1e33}','\u{1e33}'], + ['\u{1e35}','\u{1e35}'], + ['\u{1e37}','\u{1e37}'], + ['\u{1e39}','\u{1e39}'], + ['\u{1e3b}','\u{1e3b}'], + ['\u{1e3d}','\u{1e3d}'], + ['\u{1e3f}','\u{1e3f}'], + ['\u{1e41}','\u{1e41}'], + ['\u{1e43}','\u{1e43}'], + ['\u{1e45}','\u{1e45}'], + ['\u{1e47}','\u{1e47}'], + ['\u{1e49}','\u{1e49}'], + ['\u{1e4b}','\u{1e4b}'], + ['\u{1e4d}','\u{1e4d}'], + ['\u{1e4f}','\u{1e4f}'], + ['\u{1e51}','\u{1e51}'], + ['\u{1e53}','\u{1e53}'], + ['\u{1e55}','\u{1e55}'], + ['\u{1e57}','\u{1e57}'], + ['\u{1e59}','\u{1e59}'], + ['\u{1e5b}','\u{1e5b}'], + ['\u{1e5d}','\u{1e5d}'], + ['\u{1e5f}','\u{1e5f}'], + ['\u{1e61}','\u{1e61}'], + ['\u{1e63}','\u{1e63}'], + ['\u{1e65}','\u{1e65}'], + ['\u{1e67}','\u{1e67}'], + ['\u{1e69}','\u{1e69}'], + ['\u{1e6b}','\u{1e6b}'], + ['\u{1e6d}','\u{1e6d}'], + ['\u{1e6f}','\u{1e6f}'], + ['\u{1e71}','\u{1e71}'], + ['\u{1e73}','\u{1e73}'], + ['\u{1e75}','\u{1e75}'], + ['\u{1e77}','\u{1e77}'], + ['\u{1e79}','\u{1e79}'], + ['\u{1e7b}','\u{1e7b}'], + ['\u{1e7d}','\u{1e7d}'], + ['\u{1e7f}','\u{1e7f}'], + ['\u{1e81}','\u{1e81}'], + ['\u{1e83}','\u{1e83}'], + ['\u{1e85}','\u{1e85}'], + ['\u{1e87}','\u{1e87}'], + ['\u{1e89}','\u{1e89}'], + ['\u{1e8b}','\u{1e8b}'], + ['\u{1e8d}','\u{1e8d}'], + ['\u{1e8f}','\u{1e8f}'], + ['\u{1e91}','\u{1e91}'], + ['\u{1e93}','\u{1e93}'], + ['\u{1e95}','\u{1e9d}'], + ['\u{1e9f}','\u{1e9f}'], + ['\u{1ea1}','\u{1ea1}'], + ['\u{1ea3}','\u{1ea3}'], + ['\u{1ea5}','\u{1ea5}'], + ['\u{1ea7}','\u{1ea7}'], + ['\u{1ea9}','\u{1ea9}'], + ['\u{1eab}','\u{1eab}'], + ['\u{1ead}','\u{1ead}'], + ['\u{1eaf}','\u{1eaf}'], + ['\u{1eb1}','\u{1eb1}'], + ['\u{1eb3}','\u{1eb3}'], + ['\u{1eb5}','\u{1eb5}'], + ['\u{1eb7}','\u{1eb7}'], + ['\u{1eb9}','\u{1eb9}'], + ['\u{1ebb}','\u{1ebb}'], + ['\u{1ebd}','\u{1ebd}'], + ['\u{1ebf}','\u{1ebf}'], + ['\u{1ec1}','\u{1ec1}'], + ['\u{1ec3}','\u{1ec3}'], + ['\u{1ec5}','\u{1ec5}'], + ['\u{1ec7}','\u{1ec7}'], + ['\u{1ec9}','\u{1ec9}'], + ['\u{1ecb}','\u{1ecb}'], + ['\u{1ecd}','\u{1ecd}'], + ['\u{1ecf}','\u{1ecf}'], + ['\u{1ed1}','\u{1ed1}'], + ['\u{1ed3}','\u{1ed3}'], + ['\u{1ed5}','\u{1ed5}'], + ['\u{1ed7}','\u{1ed7}'], + ['\u{1ed9}','\u{1ed9}'], + ['\u{1edb}','\u{1edb}'], + ['\u{1edd}','\u{1edd}'], + ['\u{1edf}','\u{1edf}'], + ['\u{1ee1}','\u{1ee1}'], + ['\u{1ee3}','\u{1ee3}'], + ['\u{1ee5}','\u{1ee5}'], + ['\u{1ee7}','\u{1ee7}'], + ['\u{1ee9}','\u{1ee9}'], + ['\u{1eeb}','\u{1eeb}'], + ['\u{1eed}','\u{1eed}'], + ['\u{1eef}','\u{1eef}'], + ['\u{1ef1}','\u{1ef1}'], + ['\u{1ef3}','\u{1ef3}'], + ['\u{1ef5}','\u{1ef5}'], + ['\u{1ef7}','\u{1ef7}'], + ['\u{1ef9}','\u{1ef9}'], + ['\u{1efb}','\u{1efb}'], + ['\u{1efd}','\u{1efd}'], + ['\u{1eff}','\u{1f07}'], + ['\u{1f10}','\u{1f15}'], + ['\u{1f20}','\u{1f27}'], + ['\u{1f30}','\u{1f37}'], + ['\u{1f40}','\u{1f45}'], + ['\u{1f50}','\u{1f57}'], + ['\u{1f60}','\u{1f67}'], + ['\u{1f70}','\u{1f7d}'], + ['\u{1f80}','\u{1f87}'], + ['\u{1f90}','\u{1f97}'], + ['\u{1fa0}','\u{1fa7}'], + ['\u{1fb0}','\u{1fb4}'], + ['\u{1fb6}','\u{1fb7}'], + ['\u{1fbe}','\u{1fbe}'], + ['\u{1fc2}','\u{1fc4}'], + ['\u{1fc6}','\u{1fc7}'], + ['\u{1fd0}','\u{1fd3}'], + ['\u{1fd6}','\u{1fd7}'], + ['\u{1fe0}','\u{1fe7}'], + ['\u{1ff2}','\u{1ff4}'], + ['\u{1ff6}','\u{1ff7}'], + ['\u{210a}','\u{210a}'], + ['\u{210e}','\u{210f}'], + ['\u{2113}','\u{2113}'], + ['\u{212f}','\u{212f}'], + ['\u{2134}','\u{2134}'], + ['\u{2139}','\u{2139}'], + ['\u{213c}','\u{213d}'], + ['\u{2146}','\u{2149}'], + ['\u{214e}','\u{214e}'], + ['\u{2184}','\u{2184}'], + ['\u{2c30}','\u{2c5e}'], + ['\u{2c61}','\u{2c61}'], + ['\u{2c65}','\u{2c66}'], + ['\u{2c68}','\u{2c68}'], + ['\u{2c6a}','\u{2c6a}'], + ['\u{2c6c}','\u{2c6c}'], + ['\u{2c71}','\u{2c71}'], + ['\u{2c73}','\u{2c74}'], + ['\u{2c76}','\u{2c7b}'], + ['\u{2c81}','\u{2c81}'], + ['\u{2c83}','\u{2c83}'], + ['\u{2c85}','\u{2c85}'], + ['\u{2c87}','\u{2c87}'], + ['\u{2c89}','\u{2c89}'], + ['\u{2c8b}','\u{2c8b}'], + ['\u{2c8d}','\u{2c8d}'], + ['\u{2c8f}','\u{2c8f}'], + ['\u{2c91}','\u{2c91}'], + ['\u{2c93}','\u{2c93}'], + ['\u{2c95}','\u{2c95}'], + ['\u{2c97}','\u{2c97}'], + ['\u{2c99}','\u{2c99}'], + ['\u{2c9b}','\u{2c9b}'], + ['\u{2c9d}','\u{2c9d}'], + ['\u{2c9f}','\u{2c9f}'], + ['\u{2ca1}','\u{2ca1}'], + ['\u{2ca3}','\u{2ca3}'], + ['\u{2ca5}','\u{2ca5}'], + ['\u{2ca7}','\u{2ca7}'], + ['\u{2ca9}','\u{2ca9}'], + ['\u{2cab}','\u{2cab}'], + ['\u{2cad}','\u{2cad}'], + ['\u{2caf}','\u{2caf}'], + ['\u{2cb1}','\u{2cb1}'], + ['\u{2cb3}','\u{2cb3}'], + ['\u{2cb5}','\u{2cb5}'], + ['\u{2cb7}','\u{2cb7}'], + ['\u{2cb9}','\u{2cb9}'], + ['\u{2cbb}','\u{2cbb}'], + ['\u{2cbd}','\u{2cbd}'], + ['\u{2cbf}','\u{2cbf}'], + ['\u{2cc1}','\u{2cc1}'], + ['\u{2cc3}','\u{2cc3}'], + ['\u{2cc5}','\u{2cc5}'], + ['\u{2cc7}','\u{2cc7}'], + ['\u{2cc9}','\u{2cc9}'], + ['\u{2ccb}','\u{2ccb}'], + ['\u{2ccd}','\u{2ccd}'], + ['\u{2ccf}','\u{2ccf}'], + ['\u{2cd1}','\u{2cd1}'], + ['\u{2cd3}','\u{2cd3}'], + ['\u{2cd5}','\u{2cd5}'], + ['\u{2cd7}','\u{2cd7}'], + ['\u{2cd9}','\u{2cd9}'], + ['\u{2cdb}','\u{2cdb}'], + ['\u{2cdd}','\u{2cdd}'], + ['\u{2cdf}','\u{2cdf}'], + ['\u{2ce1}','\u{2ce1}'], + ['\u{2ce3}','\u{2ce4}'], + ['\u{2cec}','\u{2cec}'], + ['\u{2cee}','\u{2cee}'], + ['\u{2cf3}','\u{2cf3}'], + ['\u{2d00}','\u{2d25}'], + ['\u{2d27}','\u{2d27}'], + ['\u{2d2d}','\u{2d2d}'], + ['\u{a641}','\u{a641}'], + ['\u{a643}','\u{a643}'], + ['\u{a645}','\u{a645}'], + ['\u{a647}','\u{a647}'], + ['\u{a649}','\u{a649}'], + ['\u{a64b}','\u{a64b}'], + ['\u{a64d}','\u{a64d}'], + ['\u{a64f}','\u{a64f}'], + ['\u{a651}','\u{a651}'], + ['\u{a653}','\u{a653}'], + ['\u{a655}','\u{a655}'], + ['\u{a657}','\u{a657}'], + ['\u{a659}','\u{a659}'], + ['\u{a65b}','\u{a65b}'], + ['\u{a65d}','\u{a65d}'], + ['\u{a65f}','\u{a65f}'], + ['\u{a661}','\u{a661}'], + ['\u{a663}','\u{a663}'], + ['\u{a665}','\u{a665}'], + ['\u{a667}','\u{a667}'], + ['\u{a669}','\u{a669}'], + ['\u{a66b}','\u{a66b}'], + ['\u{a66d}','\u{a66d}'], + ['\u{a681}','\u{a681}'], + ['\u{a683}','\u{a683}'], + ['\u{a685}','\u{a685}'], + ['\u{a687}','\u{a687}'], + ['\u{a689}','\u{a689}'], + ['\u{a68b}','\u{a68b}'], + ['\u{a68d}','\u{a68d}'], + ['\u{a68f}','\u{a68f}'], + ['\u{a691}','\u{a691}'], + ['\u{a693}','\u{a693}'], + ['\u{a695}','\u{a695}'], + ['\u{a697}','\u{a697}'], + ['\u{a723}','\u{a723}'], + ['\u{a725}','\u{a725}'], + ['\u{a727}','\u{a727}'], + ['\u{a729}','\u{a729}'], + ['\u{a72b}','\u{a72b}'], + ['\u{a72d}','\u{a72d}'], + ['\u{a72f}','\u{a731}'], + ['\u{a733}','\u{a733}'], + ['\u{a735}','\u{a735}'], + ['\u{a737}','\u{a737}'], + ['\u{a739}','\u{a739}'], + ['\u{a73b}','\u{a73b}'], + ['\u{a73d}','\u{a73d}'], + ['\u{a73f}','\u{a73f}'], + ['\u{a741}','\u{a741}'], + ['\u{a743}','\u{a743}'], + ['\u{a745}','\u{a745}'], + ['\u{a747}','\u{a747}'], + ['\u{a749}','\u{a749}'], + ['\u{a74b}','\u{a74b}'], + ['\u{a74d}','\u{a74d}'], + ['\u{a74f}','\u{a74f}'], + ['\u{a751}','\u{a751}'], + ['\u{a753}','\u{a753}'], + ['\u{a755}','\u{a755}'], + ['\u{a757}','\u{a757}'], + ['\u{a759}','\u{a759}'], + ['\u{a75b}','\u{a75b}'], + ['\u{a75d}','\u{a75d}'], + ['\u{a75f}','\u{a75f}'], + ['\u{a761}','\u{a761}'], + ['\u{a763}','\u{a763}'], + ['\u{a765}','\u{a765}'], + ['\u{a767}','\u{a767}'], + ['\u{a769}','\u{a769}'], + ['\u{a76b}','\u{a76b}'], + ['\u{a76d}','\u{a76d}'], + ['\u{a76f}','\u{a76f}'], + ['\u{a771}','\u{a778}'], + ['\u{a77a}','\u{a77a}'], + ['\u{a77c}','\u{a77c}'], + ['\u{a77f}','\u{a77f}'], + ['\u{a781}','\u{a781}'], + ['\u{a783}','\u{a783}'], + ['\u{a785}','\u{a785}'], + ['\u{a787}','\u{a787}'], + ['\u{a78c}','\u{a78c}'], + ['\u{a78e}','\u{a78e}'], + ['\u{a791}','\u{a791}'], + ['\u{a793}','\u{a793}'], + ['\u{a7a1}','\u{a7a1}'], + ['\u{a7a3}','\u{a7a3}'], + ['\u{a7a5}','\u{a7a5}'], + ['\u{a7a7}','\u{a7a7}'], + ['\u{a7a9}','\u{a7a9}'], + ['\u{a7fa}','\u{a7fa}'], + ['\u{fb00}','\u{fb06}'], + ['\u{fb13}','\u{fb17}'], + ['\u{ff41}','\u{ff5a}'], + ['\u{10428}','\u{1044f}'], + ['\u{1d41a}','\u{1d433}'], + ['\u{1d44e}','\u{1d454}'], + ['\u{1d456}','\u{1d467}'], + ['\u{1d482}','\u{1d49b}'], + ['\u{1d4b6}','\u{1d4b9}'], + ['\u{1d4bb}','\u{1d4bb}'], + ['\u{1d4bd}','\u{1d4c3}'], + ['\u{1d4c5}','\u{1d4cf}'], + ['\u{1d4ea}','\u{1d503}'], + ['\u{1d51e}','\u{1d537}'], + ['\u{1d552}','\u{1d56b}'], + ['\u{1d586}','\u{1d59f}'], + ['\u{1d5ba}','\u{1d5d3}'], + ['\u{1d5ee}','\u{1d607}'], + ['\u{1d622}','\u{1d63b}'], + ['\u{1d656}','\u{1d66f}'], + ['\u{1d68a}','\u{1d6a5}'], + ['\u{1d6c2}','\u{1d6da}'], + ['\u{1d6dc}','\u{1d6e1}'], + ['\u{1d6fc}','\u{1d714}'], + ['\u{1d716}','\u{1d71b}'], + ['\u{1d736}','\u{1d74e}'], + ['\u{1d750}','\u{1d755}'], + ['\u{1d770}','\u{1d788}'], + ['\u{1d78a}','\u{1d78f}'], + ['\u{1d7aa}','\u{1d7c2}'], + ['\u{1d7c4}','\u{1d7c9}'], + ['\u{1d7cb}','\u{1d7cb}'], +] + +const tabtitle = [ + ['\u{1c5}','\u{1c5}'], + ['\u{1c8}','\u{1c8}'], + ['\u{1cb}','\u{1cb}'], + ['\u{1f2}','\u{1f2}'], + ['\u{1f88}','\u{1f8f}'], + ['\u{1f98}','\u{1f9f}'], + ['\u{1fa8}','\u{1faf}'], + ['\u{1fbc}','\u{1fbc}'], + ['\u{1fcc}','\u{1fcc}'], + ['\u{1ffc}','\u{1ffc}'], +] + +const tabword = [ + ['\u{30}','\u{39}'], + ['\u{41}','\u{5a}'], + ['\u{5f}','\u{5f}'], + ['\u{61}','\u{7a}'], + ['\u{aa}','\u{aa}'], + ['\u{b5}','\u{b5}'], + ['\u{ba}','\u{ba}'], + ['\u{c0}','\u{d6}'], + ['\u{d8}','\u{f6}'], + ['\u{f8}','\u{2c1}'], + ['\u{2c6}','\u{2d1}'], + ['\u{2e0}','\u{2e4}'], + ['\u{2ec}','\u{2ec}'], + ['\u{2ee}','\u{2ee}'], + ['\u{370}','\u{374}'], + ['\u{376}','\u{377}'], + ['\u{37a}','\u{37d}'], + ['\u{386}','\u{386}'], + ['\u{388}','\u{38a}'], + ['\u{38c}','\u{38c}'], + ['\u{38e}','\u{3a1}'], + ['\u{3a3}','\u{3f5}'], + ['\u{3f7}','\u{481}'], + ['\u{48a}','\u{527}'], + ['\u{531}','\u{556}'], + ['\u{559}','\u{559}'], + ['\u{561}','\u{587}'], + ['\u{5d0}','\u{5ea}'], + ['\u{5f0}','\u{5f2}'], + ['\u{620}','\u{64a}'], + ['\u{660}','\u{669}'], + ['\u{66e}','\u{66f}'], + ['\u{671}','\u{6d3}'], + ['\u{6d5}','\u{6d5}'], + ['\u{6e5}','\u{6e6}'], + ['\u{6ee}','\u{6fc}'], + ['\u{6ff}','\u{6ff}'], + ['\u{710}','\u{710}'], + ['\u{712}','\u{72f}'], + ['\u{74d}','\u{7a5}'], + ['\u{7b1}','\u{7b1}'], + ['\u{7c0}','\u{7ea}'], + ['\u{7f4}','\u{7f5}'], + ['\u{7fa}','\u{7fa}'], + ['\u{800}','\u{815}'], + ['\u{81a}','\u{81a}'], + ['\u{824}','\u{824}'], + ['\u{828}','\u{828}'], + ['\u{840}','\u{858}'], + ['\u{8a0}','\u{8a0}'], + ['\u{8a2}','\u{8ac}'], + ['\u{904}','\u{939}'], + ['\u{93d}','\u{93d}'], + ['\u{950}','\u{950}'], + ['\u{958}','\u{961}'], + ['\u{966}','\u{96f}'], + ['\u{971}','\u{977}'], + ['\u{979}','\u{97f}'], + ['\u{985}','\u{98c}'], + ['\u{98f}','\u{990}'], + ['\u{993}','\u{9a8}'], + ['\u{9aa}','\u{9b0}'], + ['\u{9b2}','\u{9b2}'], + ['\u{9b6}','\u{9b9}'], + ['\u{9bd}','\u{9bd}'], + ['\u{9ce}','\u{9ce}'], + ['\u{9dc}','\u{9dd}'], + ['\u{9df}','\u{9e1}'], + ['\u{9e6}','\u{9f1}'], + ['\u{a05}','\u{a0a}'], + ['\u{a0f}','\u{a10}'], + ['\u{a13}','\u{a28}'], + ['\u{a2a}','\u{a30}'], + ['\u{a32}','\u{a33}'], + ['\u{a35}','\u{a36}'], + ['\u{a38}','\u{a39}'], + ['\u{a59}','\u{a5c}'], + ['\u{a5e}','\u{a5e}'], + ['\u{a66}','\u{a6f}'], + ['\u{a72}','\u{a74}'], + ['\u{a85}','\u{a8d}'], + ['\u{a8f}','\u{a91}'], + ['\u{a93}','\u{aa8}'], + ['\u{aaa}','\u{ab0}'], + ['\u{ab2}','\u{ab3}'], + ['\u{ab5}','\u{ab9}'], + ['\u{abd}','\u{abd}'], + ['\u{ad0}','\u{ad0}'], + ['\u{ae0}','\u{ae1}'], + ['\u{ae6}','\u{aef}'], + ['\u{b05}','\u{b0c}'], + ['\u{b0f}','\u{b10}'], + ['\u{b13}','\u{b28}'], + ['\u{b2a}','\u{b30}'], + ['\u{b32}','\u{b33}'], + ['\u{b35}','\u{b39}'], + ['\u{b3d}','\u{b3d}'], + ['\u{b5c}','\u{b5d}'], + ['\u{b5f}','\u{b61}'], + ['\u{b66}','\u{b6f}'], + ['\u{b71}','\u{b71}'], + ['\u{b83}','\u{b83}'], + ['\u{b85}','\u{b8a}'], + ['\u{b8e}','\u{b90}'], + ['\u{b92}','\u{b95}'], + ['\u{b99}','\u{b9a}'], + ['\u{b9c}','\u{b9c}'], + ['\u{b9e}','\u{b9f}'], + ['\u{ba3}','\u{ba4}'], + ['\u{ba8}','\u{baa}'], + ['\u{bae}','\u{bb9}'], + ['\u{bd0}','\u{bd0}'], + ['\u{be6}','\u{bef}'], + ['\u{c05}','\u{c0c}'], + ['\u{c0e}','\u{c10}'], + ['\u{c12}','\u{c28}'], + ['\u{c2a}','\u{c33}'], + ['\u{c35}','\u{c39}'], + ['\u{c3d}','\u{c3d}'], + ['\u{c58}','\u{c59}'], + ['\u{c60}','\u{c61}'], + ['\u{c66}','\u{c6f}'], + ['\u{c85}','\u{c8c}'], + ['\u{c8e}','\u{c90}'], + ['\u{c92}','\u{ca8}'], + ['\u{caa}','\u{cb3}'], + ['\u{cb5}','\u{cb9}'], + ['\u{cbd}','\u{cbd}'], + ['\u{cde}','\u{cde}'], + ['\u{ce0}','\u{ce1}'], + ['\u{ce6}','\u{cef}'], + ['\u{cf1}','\u{cf2}'], + ['\u{d05}','\u{d0c}'], + ['\u{d0e}','\u{d10}'], + ['\u{d12}','\u{d3a}'], + ['\u{d3d}','\u{d3d}'], + ['\u{d4e}','\u{d4e}'], + ['\u{d60}','\u{d61}'], + ['\u{d66}','\u{d6f}'], + ['\u{d7a}','\u{d7f}'], + ['\u{d85}','\u{d96}'], + ['\u{d9a}','\u{db1}'], + ['\u{db3}','\u{dbb}'], + ['\u{dbd}','\u{dbd}'], + ['\u{dc0}','\u{dc6}'], + ['\u{e01}','\u{e30}'], + ['\u{e32}','\u{e33}'], + ['\u{e40}','\u{e46}'], + ['\u{e50}','\u{e59}'], + ['\u{e81}','\u{e82}'], + ['\u{e84}','\u{e84}'], + ['\u{e87}','\u{e88}'], + ['\u{e8a}','\u{e8a}'], + ['\u{e8d}','\u{e8d}'], + ['\u{e94}','\u{e97}'], + ['\u{e99}','\u{e9f}'], + ['\u{ea1}','\u{ea3}'], + ['\u{ea5}','\u{ea5}'], + ['\u{ea7}','\u{ea7}'], + ['\u{eaa}','\u{eab}'], + ['\u{ead}','\u{eb0}'], + ['\u{eb2}','\u{eb3}'], + ['\u{ebd}','\u{ebd}'], + ['\u{ec0}','\u{ec4}'], + ['\u{ec6}','\u{ec6}'], + ['\u{ed0}','\u{ed9}'], + ['\u{edc}','\u{edf}'], + ['\u{f00}','\u{f00}'], + ['\u{f20}','\u{f29}'], + ['\u{f40}','\u{f47}'], + ['\u{f49}','\u{f6c}'], + ['\u{f88}','\u{f8c}'], + ['\u{1000}','\u{102a}'], + ['\u{103f}','\u{1049}'], + ['\u{1050}','\u{1055}'], + ['\u{105a}','\u{105d}'], + ['\u{1061}','\u{1061}'], + ['\u{1065}','\u{1066}'], + ['\u{106e}','\u{1070}'], + ['\u{1075}','\u{1081}'], + ['\u{108e}','\u{108e}'], + ['\u{1090}','\u{1099}'], + ['\u{10a0}','\u{10c5}'], + ['\u{10c7}','\u{10c7}'], + ['\u{10cd}','\u{10cd}'], + ['\u{10d0}','\u{10fa}'], + ['\u{10fc}','\u{1248}'], + ['\u{124a}','\u{124d}'], + ['\u{1250}','\u{1256}'], + ['\u{1258}','\u{1258}'], + ['\u{125a}','\u{125d}'], + ['\u{1260}','\u{1288}'], + ['\u{128a}','\u{128d}'], + ['\u{1290}','\u{12b0}'], + ['\u{12b2}','\u{12b5}'], + ['\u{12b8}','\u{12be}'], + ['\u{12c0}','\u{12c0}'], + ['\u{12c2}','\u{12c5}'], + ['\u{12c8}','\u{12d6}'], + ['\u{12d8}','\u{1310}'], + ['\u{1312}','\u{1315}'], + ['\u{1318}','\u{135a}'], + ['\u{1380}','\u{138f}'], + ['\u{13a0}','\u{13f4}'], + ['\u{1401}','\u{166c}'], + ['\u{166f}','\u{167f}'], + ['\u{1681}','\u{169a}'], + ['\u{16a0}','\u{16ea}'], + ['\u{1700}','\u{170c}'], + ['\u{170e}','\u{1711}'], + ['\u{1720}','\u{1731}'], + ['\u{1740}','\u{1751}'], + ['\u{1760}','\u{176c}'], + ['\u{176e}','\u{1770}'], + ['\u{1780}','\u{17b3}'], + ['\u{17d7}','\u{17d7}'], + ['\u{17dc}','\u{17dc}'], + ['\u{17e0}','\u{17e9}'], + ['\u{1810}','\u{1819}'], + ['\u{1820}','\u{1877}'], + ['\u{1880}','\u{18a8}'], + ['\u{18aa}','\u{18aa}'], + ['\u{18b0}','\u{18f5}'], + ['\u{1900}','\u{191c}'], + ['\u{1946}','\u{196d}'], + ['\u{1970}','\u{1974}'], + ['\u{1980}','\u{19ab}'], + ['\u{19c1}','\u{19c7}'], + ['\u{19d0}','\u{19d9}'], + ['\u{1a00}','\u{1a16}'], + ['\u{1a20}','\u{1a54}'], + ['\u{1a80}','\u{1a89}'], + ['\u{1a90}','\u{1a99}'], + ['\u{1aa7}','\u{1aa7}'], + ['\u{1b05}','\u{1b33}'], + ['\u{1b45}','\u{1b4b}'], + ['\u{1b50}','\u{1b59}'], + ['\u{1b83}','\u{1ba0}'], + ['\u{1bae}','\u{1be5}'], + ['\u{1c00}','\u{1c23}'], + ['\u{1c40}','\u{1c49}'], + ['\u{1c4d}','\u{1c7d}'], + ['\u{1ce9}','\u{1cec}'], + ['\u{1cee}','\u{1cf1}'], + ['\u{1cf5}','\u{1cf6}'], + ['\u{1d00}','\u{1dbf}'], + ['\u{1e00}','\u{1f15}'], + ['\u{1f18}','\u{1f1d}'], + ['\u{1f20}','\u{1f45}'], + ['\u{1f48}','\u{1f4d}'], + ['\u{1f50}','\u{1f57}'], + ['\u{1f59}','\u{1f59}'], + ['\u{1f5b}','\u{1f5b}'], + ['\u{1f5d}','\u{1f5d}'], + ['\u{1f5f}','\u{1f7d}'], + ['\u{1f80}','\u{1fb4}'], + ['\u{1fb6}','\u{1fbc}'], + ['\u{1fbe}','\u{1fbe}'], + ['\u{1fc2}','\u{1fc4}'], + ['\u{1fc6}','\u{1fcc}'], + ['\u{1fd0}','\u{1fd3}'], + ['\u{1fd6}','\u{1fdb}'], + ['\u{1fe0}','\u{1fec}'], + ['\u{1ff2}','\u{1ff4}'], + ['\u{1ff6}','\u{1ffc}'], + ['\u{2071}','\u{2071}'], + ['\u{207f}','\u{207f}'], + ['\u{2090}','\u{209c}'], + ['\u{2102}','\u{2102}'], + ['\u{2107}','\u{2107}'], + ['\u{210a}','\u{2113}'], + ['\u{2115}','\u{2115}'], + ['\u{2119}','\u{211d}'], + ['\u{2124}','\u{2124}'], + ['\u{2126}','\u{2126}'], + ['\u{2128}','\u{2128}'], + ['\u{212a}','\u{212d}'], + ['\u{212f}','\u{2139}'], + ['\u{213c}','\u{213f}'], + ['\u{2145}','\u{2149}'], + ['\u{214e}','\u{214e}'], + ['\u{2183}','\u{2184}'], + ['\u{2c00}','\u{2c2e}'], + ['\u{2c30}','\u{2c5e}'], + ['\u{2c60}','\u{2ce4}'], + ['\u{2ceb}','\u{2cee}'], + ['\u{2cf2}','\u{2cf3}'], + ['\u{2d00}','\u{2d25}'], + ['\u{2d27}','\u{2d27}'], + ['\u{2d2d}','\u{2d2d}'], + ['\u{2d30}','\u{2d67}'], + ['\u{2d6f}','\u{2d6f}'], + ['\u{2d80}','\u{2d96}'], + ['\u{2da0}','\u{2da6}'], + ['\u{2da8}','\u{2dae}'], + ['\u{2db0}','\u{2db6}'], + ['\u{2db8}','\u{2dbe}'], + ['\u{2dc0}','\u{2dc6}'], + ['\u{2dc8}','\u{2dce}'], + ['\u{2dd0}','\u{2dd6}'], + ['\u{2dd8}','\u{2dde}'], + ['\u{2e2f}','\u{2e2f}'], + ['\u{3005}','\u{3006}'], + ['\u{3031}','\u{3035}'], + ['\u{303b}','\u{303c}'], + ['\u{3041}','\u{3096}'], + ['\u{309d}','\u{309f}'], + ['\u{30a1}','\u{30fa}'], + ['\u{30fc}','\u{30ff}'], + ['\u{3105}','\u{312d}'], + ['\u{3131}','\u{318e}'], + ['\u{31a0}','\u{31ba}'], + ['\u{31f0}','\u{31ff}'], + ['\u{3400}','\u{4db5}'], + ['\u{4e00}','\u{9fcc}'], + ['\u{a000}','\u{a48c}'], + ['\u{a4d0}','\u{a4fd}'], + ['\u{a500}','\u{a60c}'], + ['\u{a610}','\u{a62b}'], + ['\u{a640}','\u{a66e}'], + ['\u{a67f}','\u{a697}'], + ['\u{a6a0}','\u{a6e5}'], + ['\u{a717}','\u{a71f}'], + ['\u{a722}','\u{a788}'], + ['\u{a78b}','\u{a78e}'], + ['\u{a790}','\u{a793}'], + ['\u{a7a0}','\u{a7aa}'], + ['\u{a7f8}','\u{a801}'], + ['\u{a803}','\u{a805}'], + ['\u{a807}','\u{a80a}'], + ['\u{a80c}','\u{a822}'], + ['\u{a840}','\u{a873}'], + ['\u{a882}','\u{a8b3}'], + ['\u{a8d0}','\u{a8d9}'], + ['\u{a8f2}','\u{a8f7}'], + ['\u{a8fb}','\u{a8fb}'], + ['\u{a900}','\u{a925}'], + ['\u{a930}','\u{a946}'], + ['\u{a960}','\u{a97c}'], + ['\u{a984}','\u{a9b2}'], + ['\u{a9cf}','\u{a9d9}'], + ['\u{aa00}','\u{aa28}'], + ['\u{aa40}','\u{aa42}'], + ['\u{aa44}','\u{aa4b}'], + ['\u{aa50}','\u{aa59}'], + ['\u{aa60}','\u{aa76}'], + ['\u{aa7a}','\u{aa7a}'], + ['\u{aa80}','\u{aaaf}'], + ['\u{aab1}','\u{aab1}'], + ['\u{aab5}','\u{aab6}'], + ['\u{aab9}','\u{aabd}'], + ['\u{aac0}','\u{aac0}'], + ['\u{aac2}','\u{aac2}'], + ['\u{aadb}','\u{aadd}'], + ['\u{aae0}','\u{aaea}'], + ['\u{aaf2}','\u{aaf4}'], + ['\u{ab01}','\u{ab06}'], + ['\u{ab09}','\u{ab0e}'], + ['\u{ab11}','\u{ab16}'], + ['\u{ab20}','\u{ab26}'], + ['\u{ab28}','\u{ab2e}'], + ['\u{abc0}','\u{abe2}'], + ['\u{abf0}','\u{abf9}'], + ['\u{ac00}','\u{d7a3}'], + ['\u{d7b0}','\u{d7c6}'], + ['\u{d7cb}','\u{d7fb}'], + ['\u{f900}','\u{fa6d}'], + ['\u{fa70}','\u{fad9}'], + ['\u{fb00}','\u{fb06}'], + ['\u{fb13}','\u{fb17}'], + ['\u{fb1d}','\u{fb1d}'], + ['\u{fb1f}','\u{fb28}'], + ['\u{fb2a}','\u{fb36}'], + ['\u{fb38}','\u{fb3c}'], + ['\u{fb3e}','\u{fb3e}'], + ['\u{fb40}','\u{fb41}'], + ['\u{fb43}','\u{fb44}'], + ['\u{fb46}','\u{fbb1}'], + ['\u{fbd3}','\u{fd3d}'], + ['\u{fd50}','\u{fd8f}'], + ['\u{fd92}','\u{fdc7}'], + ['\u{fdf0}','\u{fdfb}'], + ['\u{fe70}','\u{fe74}'], + ['\u{fe76}','\u{fefc}'], + ['\u{ff10}','\u{ff19}'], + ['\u{ff21}','\u{ff3a}'], + ['\u{ff41}','\u{ff5a}'], + ['\u{ff66}','\u{ffbe}'], + ['\u{ffc2}','\u{ffc7}'], + ['\u{ffca}','\u{ffcf}'], + ['\u{ffd2}','\u{ffd7}'], + ['\u{ffda}','\u{ffdc}'], + ['\u{10000}','\u{1000b}'], + ['\u{1000d}','\u{10026}'], + ['\u{10028}','\u{1003a}'], + ['\u{1003c}','\u{1003d}'], + ['\u{1003f}','\u{1004d}'], + ['\u{10050}','\u{1005d}'], + ['\u{10080}','\u{100fa}'], + ['\u{10280}','\u{1029c}'], + ['\u{102a0}','\u{102d0}'], + ['\u{10300}','\u{1031e}'], + ['\u{10330}','\u{10340}'], + ['\u{10342}','\u{10349}'], + ['\u{10380}','\u{1039d}'], + ['\u{103a0}','\u{103c3}'], + ['\u{103c8}','\u{103cf}'], + ['\u{10400}','\u{1049d}'], + ['\u{104a0}','\u{104a9}'], + ['\u{10800}','\u{10805}'], + ['\u{10808}','\u{10808}'], + ['\u{1080a}','\u{10835}'], + ['\u{10837}','\u{10838}'], + ['\u{1083c}','\u{1083c}'], + ['\u{1083f}','\u{10855}'], + ['\u{10900}','\u{10915}'], + ['\u{10920}','\u{10939}'], + ['\u{10980}','\u{109b7}'], + ['\u{109be}','\u{109bf}'], + ['\u{10a00}','\u{10a00}'], + ['\u{10a10}','\u{10a13}'], + ['\u{10a15}','\u{10a17}'], + ['\u{10a19}','\u{10a33}'], + ['\u{10a60}','\u{10a7c}'], + ['\u{10b00}','\u{10b35}'], + ['\u{10b40}','\u{10b55}'], + ['\u{10b60}','\u{10b72}'], + ['\u{10c00}','\u{10c48}'], + ['\u{11003}','\u{11037}'], + ['\u{11066}','\u{1106f}'], + ['\u{11083}','\u{110af}'], + ['\u{110d0}','\u{110e8}'], + ['\u{110f0}','\u{110f9}'], + ['\u{11103}','\u{11126}'], + ['\u{11136}','\u{1113f}'], + ['\u{11183}','\u{111b2}'], + ['\u{111c1}','\u{111c4}'], + ['\u{111d0}','\u{111d9}'], + ['\u{11680}','\u{116aa}'], + ['\u{116c0}','\u{116c9}'], + ['\u{12000}','\u{1236e}'], + ['\u{13000}','\u{1342e}'], + ['\u{16800}','\u{16a38}'], + ['\u{16f00}','\u{16f44}'], + ['\u{16f50}','\u{16f50}'], + ['\u{16f93}','\u{16f9f}'], + ['\u{1b000}','\u{1b001}'], + ['\u{1d400}','\u{1d454}'], + ['\u{1d456}','\u{1d49c}'], + ['\u{1d49e}','\u{1d49f}'], + ['\u{1d4a2}','\u{1d4a2}'], + ['\u{1d4a5}','\u{1d4a6}'], + ['\u{1d4a9}','\u{1d4ac}'], + ['\u{1d4ae}','\u{1d4b9}'], + ['\u{1d4bb}','\u{1d4bb}'], + ['\u{1d4bd}','\u{1d4c3}'], + ['\u{1d4c5}','\u{1d505}'], + ['\u{1d507}','\u{1d50a}'], + ['\u{1d50d}','\u{1d514}'], + ['\u{1d516}','\u{1d51c}'], + ['\u{1d51e}','\u{1d539}'], + ['\u{1d53b}','\u{1d53e}'], + ['\u{1d540}','\u{1d544}'], + ['\u{1d546}','\u{1d546}'], + ['\u{1d54a}','\u{1d550}'], + ['\u{1d552}','\u{1d6a5}'], + ['\u{1d6a8}','\u{1d6c0}'], + ['\u{1d6c2}','\u{1d6da}'], + ['\u{1d6dc}','\u{1d6fa}'], + ['\u{1d6fc}','\u{1d714}'], + ['\u{1d716}','\u{1d734}'], + ['\u{1d736}','\u{1d74e}'], + ['\u{1d750}','\u{1d76e}'], + ['\u{1d770}','\u{1d788}'], + ['\u{1d78a}','\u{1d7a8}'], + ['\u{1d7aa}','\u{1d7c2}'], + ['\u{1d7c4}','\u{1d7cb}'], + ['\u{1d7ce}','\u{1d7ff}'], + ['\u{1ee00}','\u{1ee03}'], + ['\u{1ee05}','\u{1ee1f}'], + ['\u{1ee21}','\u{1ee22}'], + ['\u{1ee24}','\u{1ee24}'], + ['\u{1ee27}','\u{1ee27}'], + ['\u{1ee29}','\u{1ee32}'], + ['\u{1ee34}','\u{1ee37}'], + ['\u{1ee39}','\u{1ee39}'], + ['\u{1ee3b}','\u{1ee3b}'], + ['\u{1ee42}','\u{1ee42}'], + ['\u{1ee47}','\u{1ee47}'], + ['\u{1ee49}','\u{1ee49}'], + ['\u{1ee4b}','\u{1ee4b}'], + ['\u{1ee4d}','\u{1ee4f}'], + ['\u{1ee51}','\u{1ee52}'], + ['\u{1ee54}','\u{1ee54}'], + ['\u{1ee57}','\u{1ee57}'], + ['\u{1ee59}','\u{1ee59}'], + ['\u{1ee5b}','\u{1ee5b}'], + ['\u{1ee5d}','\u{1ee5d}'], + ['\u{1ee5f}','\u{1ee5f}'], + ['\u{1ee61}','\u{1ee62}'], + ['\u{1ee64}','\u{1ee64}'], + ['\u{1ee67}','\u{1ee6a}'], + ['\u{1ee6c}','\u{1ee72}'], + ['\u{1ee74}','\u{1ee77}'], + ['\u{1ee79}','\u{1ee7c}'], + ['\u{1ee7e}','\u{1ee7e}'], + ['\u{1ee80}','\u{1ee89}'], + ['\u{1ee8b}','\u{1ee9b}'], + ['\u{1eea1}','\u{1eea3}'], + ['\u{1eea5}','\u{1eea9}'], + ['\u{1eeab}','\u{1eebb}'], + ['\u{20000}','\u{2a6d6}'], + ['\u{2a700}','\u{2b734}'], + ['\u{2b740}','\u{2b81d}'], + ['\u{2f800}','\u{2fa1d}'], +] + +const tabdigit = [ + ['\u{30}','\u{39}'], + ['\u{660}','\u{669}'], + ['\u{6f0}','\u{6f9}'], + ['\u{7c0}','\u{7c9}'], + ['\u{966}','\u{96f}'], + ['\u{9e6}','\u{9ef}'], + ['\u{a66}','\u{a6f}'], + ['\u{ae6}','\u{aef}'], + ['\u{b66}','\u{b6f}'], + ['\u{be6}','\u{bef}'], + ['\u{c66}','\u{c6f}'], + ['\u{ce6}','\u{cef}'], + ['\u{d66}','\u{d6f}'], + ['\u{e50}','\u{e59}'], + ['\u{ed0}','\u{ed9}'], + ['\u{f20}','\u{f29}'], + ['\u{1040}','\u{1049}'], + ['\u{1090}','\u{1099}'], + ['\u{17e0}','\u{17e9}'], + ['\u{1810}','\u{1819}'], + ['\u{1946}','\u{194f}'], + ['\u{19d0}','\u{19d9}'], + ['\u{1a80}','\u{1a89}'], + ['\u{1a90}','\u{1a99}'], + ['\u{1b50}','\u{1b59}'], + ['\u{1bb0}','\u{1bb9}'], + ['\u{1c40}','\u{1c49}'], + ['\u{1c50}','\u{1c59}'], + ['\u{a620}','\u{a629}'], + ['\u{a8d0}','\u{a8d9}'], + ['\u{a900}','\u{a909}'], + ['\u{a9d0}','\u{a9d9}'], + ['\u{aa50}','\u{aa59}'], + ['\u{abf0}','\u{abf9}'], + ['\u{ff10}','\u{ff19}'], + ['\u{104a0}','\u{104a9}'], + ['\u{11066}','\u{1106f}'], + ['\u{110f0}','\u{110f9}'], + ['\u{11136}','\u{1113f}'], + ['\u{111d0}','\u{111d9}'], + ['\u{116c0}','\u{116c9}'], + ['\u{1d7ce}','\u{1d7ff}'], +] + +const tabxdigit = [ + ['\u{30}','\u{39}'], + ['\u{41}','\u{46}'], + ['\u{61}','\u{66}'], +] + +const tabspace = [ + ['\u{9}','\u{d}'], + ['\u{20}','\u{20}'], + ['\u{85}','\u{85}'], + ['\u{a0}','\u{a0}'], + ['\u{1680}','\u{1680}'], + ['\u{2000}','\u{200a}'], + ['\u{2028}','\u{2029}'], + ['\u{202f}','\u{202f}'], + ['\u{205f}','\u{205f}'], + ['\u{3000}','\u{3000}'], +] + +const tabblank = [ + ['\u{9}','\u{9}'], + ['\u{20}','\u{20}'], + ['\u{a0}','\u{a0}'], + ['\u{1680}','\u{1680}'], + ['\u{2000}','\u{200a}'], + ['\u{202f}','\u{202f}'], + ['\u{205f}','\u{205f}'], + ['\u{3000}','\u{3000}'], +] + diff --git a/lib/regex/redump.myr b/lib/regex/redump.myr new file mode 100644 index 0000000..7233864 --- /dev/null +++ b/lib/regex/redump.myr @@ -0,0 +1,87 @@ +use std +use bio +use regex + +const main = {args + var cmd, comp + var verbose + var fd + + verbose = false + cmd = std.optparse(args, &[ + .argdesc = "regex [inputs...]", + .minargs = 1, + .maxargs = 1, + .opts = [ + [.opt='v', .desc="dump verbose regex output"] + ][:], + ]) + for opt in cmd.opts + match opt + | ('v', _): verbose = true + | _: std.fatal("Unknown argument") + ;; + ;; + if verbose + comp = regex.dbgcompile(cmd.args[0]) + else + comp = regex.compile(cmd.args[0]) + ;; + match comp + | `std.Fail m: + std.fatal("unable to compile regex: {}\n", regex.failmsg(m)) + | `std.Ok re: + if cmd.args.len > 1 + runall(re, cmd.args) + else + fd = bio.mkfile(0, bio.Rd) + dump(re, fd) + bio.close(fd) + ;; + ;; +} + +const runall = {re, files + + for f in files + match bio.open(f, bio.Rd) + | `std.Ok fd: + dump(re, fd) + bio.close(fd) + | `std.Fail m: + std.fatal("failed to open {}: {}\n", f, m) + ;; + ;; +} + +const dump = {re, fd + while true + match bio.readln(fd) + | `std.Some ln: + show(re, ln, regex.exec(re, ln)) + std.slfree(ln) + | `std.None: + break + ;; + ;; +} + +const show = {re, ln, mg + var i + + match mg + | `std.Some rl: + std.put("Matched: {}\n", rl[0]) + for i = 1; i < rl.len; i++ + std.put("\tgroup {}: {}\n", i, rl[i]) + ;; + | `std.None: + std.put("Match failed:\n") + std.put("\t{}\n", ln) + std.put("\t") + for i = 0; i < re.strp - 1; i++ + std.put("~") + ;; + std.put("^\n") + ;; +} diff --git a/lib/regex/test/basic.myr b/lib/regex/test/basic.myr new file mode 100644 index 0000000..168b547 --- /dev/null +++ b/lib/regex/test/basic.myr @@ -0,0 +1,39 @@ +use std + +use "testmatch.use" + +const main = { + var s : byte[:] + + s = std.strjoin([ + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + ][:], "") + testmatch(".*bc", "Abc", `std.Some [][:]) + testmatch("(a*)*", "a", `std.Some ["a"][:]) + testmatch("(aa|aab?)*", s, `std.Some ["aa"][:]) + /* greedy matches */ + testmatch("(<.*>).*", "<a foo> blah <bar>", `std.Some [ + "<a foo> blah <bar>", + ][:]) + testmatch("(<.+>).*", "<a foo> blah <bar>", `std.Some [ + "<a foo> blah <bar>", + ][:]) + /* reluctant matches */ + testmatch("(<.*?>).*", "<a foo> blah <bar>", `std.Some [ + "<a foo>", + ][:]) + testmatch("(<.+?>).*", "<a foo> blah <bar>", `std.Some [ + "<a foo>", + ][:]) +} diff --git a/lib/regex/test/bld.sub b/lib/regex/test/bld.sub new file mode 100644 index 0000000..12f8056 --- /dev/null +++ b/lib/regex/test/bld.sub @@ -0,0 +1,7 @@ +test basic {inc=../libstd,inc=..} = basic.myr testmatch.myr;; +test boundaries {inc=../libstd,inc=..} = boundaries.myr testmatch.myr;; +test capture {inc=../libstd,inc=..} = capture.myr testmatch.myr;; +test class {inc=../libstd,inc=..} = class.myr testmatch.myr;; +test failmatch {inc=../libstd,inc=..} = failmatch.myr testmatch.myr;; +test negclass {inc=../libstd,inc=..} = negclass.myr testmatch.myr;; +test unicode {inc=../libstd,inc=..} = unicode.myr testmatch.myr;; diff --git a/lib/regex/test/boundaries.myr b/lib/regex/test/boundaries.myr new file mode 100644 index 0000000..196d197 --- /dev/null +++ b/lib/regex/test/boundaries.myr @@ -0,0 +1,36 @@ +use std +use "testmatch.use" + +const main = { + /* expected matches */ + testmatch("\\<([a-z]*)\\>", "abcdef", `std.Some [ + "abcdef", + ][:]) + testmatch(".*(\\<.*\\>).*", "!m!", `std.Some [ /* single char word in midstring */ + "m", + ][:]) + testmatch(".*(\\<.*\\>).*", "!m", `std.Some [ /* single char word at end of string */ + "m", + ][:]) + testmatch(".*(\\<.*\\>).*", "m!", `std.Some [ /* single char word at start of string */ + "m", + ][:]) + testmatch(".*(\\<.*\\>).*", "!@#!!matches!!%!", `std.Some [ /* word in midstring */ + "matches", + ][:]) + testmatch(".*(\\<.*\\>).*", "matches!!%!", `std.Some [ /* word at start of string */ + "matches", + ][:]) + testmatch(".*(\\<.*\\>).*", "!@#!!matches", `std.Some [ /* word at end of string */ + "matches", + ][:]) + testmatch(".*(\\<.*\\>).*", "!@#!!matches!!%!foo", `std.Some [ /* matches last word in string */ + "foo", + ][:]) + testmatch(".*(\\<.*\\>).*", "123", `std.Some [ /* numbers are also word bounds */ + "123", + ][:]) + + /* nonmatches */ + testmatch("\\<([a-z]*)\\>foo", "abcdefoo", `std.None) /* word boundary needed in midstring */ +} diff --git a/lib/regex/test/capture.myr b/lib/regex/test/capture.myr new file mode 100644 index 0000000..f270428 --- /dev/null +++ b/lib/regex/test/capture.myr @@ -0,0 +1,17 @@ +use std +use "testmatch.use" + +const main = { + testmatch("A(.*)", "Abc", `std.Some ["bc"][:]) + testmatch("A(.*)e", "Abcde", `std.Some ["bcd"][:]) + testmatch("(a|b)+", "abab", `std.Some ["b"][:]) + testmatch("A(b(.*)d)e", "Abcde", `std.Some [ + "bcd", + "c" + ][:]) + testmatch("(a?)(a*)(a?)", "aaaa", `std.Some [ + "a", + "aaa", + "" + ][:]) +} diff --git a/lib/regex/test/class.myr b/lib/regex/test/class.myr new file mode 100644 index 0000000..284f440 --- /dev/null +++ b/lib/regex/test/class.myr @@ -0,0 +1,67 @@ +use std + +use "testmatch.use" + +const main = { + asciiclass() + set() + /* + unicodeclass() + negasciiclass() + negasciirange() + negset() + */ +} + +const asciiclass = { + /* \d success */ + testmatch("\\d", "1", `std.Some [][:]) + testmatch("\\d\\d", "13", `std.Some [][:]) + testmatch("\\d+", "13688", `std.Some [][:]) + /* \d fail */ + testmatch("\\d", "x", `std.None) + testmatch("\\d\\d", "x3", `std.None) + testmatch("\\d+", "1368f", `std.None) + + /* \x success */ + testmatch("\\x", "a", `std.Some [][:]) + testmatch("\\x\\x", "1F", `std.Some [][:]) + testmatch("\\x+", "13b8cDEf", `std.Some [][:]) + /* \x fail */ + testmatch("\\x", "Z", `std.None) + testmatch("\\x\\x", "fg", `std.None) + testmatch("\\x+", "13b8cg", `std.None) + + /* \s success */ + testmatch("\\s", " ", `std.Some [][:]) + testmatch("\\s\\s", "\t\n", `std.Some [][:]) + testmatch("\\s+", "\t\n\r \t", `std.Some [][:]) + /* \s fail */ + testmatch("\\s", "a", `std.None) + testmatch("\\s\\s", "i\n", `std.None) + testmatch("\\s+", "\t\n\r.\t", `std.None) + + /* word success */ + testmatch("\\w+", "abcABC0123_", `std.Some [][:]) + /* word fail */ + testmatch("\\w+", "abcABC0123_.", `std.None) + + /* \h success */ + testmatch("\\h", " ", `std.Some [][:]) + testmatch("\\h\\h", "\t ", `std.Some [][:]) + testmatch("\\h+", "\t \t ", `std.Some [][:]) + /* \h fail */ + testmatch("\\h", "\n", `std.None) + testmatch("\\h\\h", "\t\r", `std.None) + testmatch("\\h+", "\t \t.", `std.None) +} + +const set = { + /* ranges */ + testmatch("[a-z]*", "abcd", `std.Some [][:]) + testmatch("[a-zA-Z]*", "abCD", `std.Some [][:]) + testmatch("[a-zA-Z0-9_]*", "_abCD018", `std.Some [][:]) + + testmatch("[abc]*", "abba", `std.Some [][:]) + testmatch("[a-zABC]*", "abBa", `std.Some [][:]) +} diff --git a/lib/regex/test/failmatch.myr b/lib/regex/test/failmatch.myr new file mode 100644 index 0000000..c501d00 --- /dev/null +++ b/lib/regex/test/failmatch.myr @@ -0,0 +1,7 @@ +use std + +use "testmatch.use" + +const main = { + testmatch(".*bcd", "Abc", `std.None) +} diff --git a/lib/regex/test/negclass.myr b/lib/regex/test/negclass.myr new file mode 100644 index 0000000..19404fa --- /dev/null +++ b/lib/regex/test/negclass.myr @@ -0,0 +1,72 @@ +use std + +use "testmatch.use" + +const main = { + asciiclass() + set() + /* + unicodeclass() + negasciiclass() + negasciirange() + negset() + */ +} + +const asciiclass = { + /* \D success */ + testmatch("\\D", "x", `std.Some [][:]) + testmatch("\\D+", "xa!#^cs", `std.Some [][:]) + + /* \D fail: end of ranges chars */ + testmatch("\\D", "0", `std.None) + testmatch("\\D", "9", `std.None) + testmatch("\\D+", "a35x", `std.None) + testmatch("\\D+", "13688", `std.None) + + /* \X success */ + testmatch("\\X", "Z", `std.Some [][:]) + testmatch("\\X\\X", "gg", `std.Some [][:]) + /* \X fail */ + testmatch("\\X", "a", `std.None) + testmatch("\\X+", "zz13b8cDEf", `std.None) + + /* \S success */ + testmatch("\\S", "a", `std.Some [][:]) + testmatch("\\S\\S", "i%", `std.Some [][:]) + testmatch("\\S+", "alskd690!#!!", `std.Some [][:]) + + /* \S fail */ + testmatch("\\S", " ", `std.None) + testmatch("\\S\\S", "\t\n", `std.None) + testmatch("\\S+", "\t \nkait", `std.None) + + /* word success */ + testmatch("\\W+", "!%!^^@@!^", `std.Some [][:]) + /* word fail */ + testmatch("\\W+", "a^#$bcABC0123_", `std.None) + + /* \H success */ + testmatch("\\H", "\n", `std.Some [][:]) + testmatch("\\H\\H", "\n\r", `std.Some [][:]) + /* \H fail */ + testmatch("\\H+", "\t \t.", `std.None) + testmatch("\\H\\H", "\t ", `std.None) + testmatch("\\H+", "\ta35 \t ", `std.None) +} + +const set = { + /* ranges: should succeed */ + testmatch("[^a-z]*", "ABCD", `std.Some [][:]) + testmatch("[^a-zA-Z]*", "1234", `std.Some [][:]) + testmatch("[^a-zA-Z0-9_]*", "-^^-", `std.Some [][:]) + testmatch("[^abc]*", "d6d", `std.Some [][:]) + testmatch("[^a-zABC]*", "!^!!))#", `std.Some [][:]) + + /* ranges: should fail */ + testmatch("[^a-z]*", "abcd", `std.None) + testmatch("[^a-zA-Z]*", "abCD", `std.None) + testmatch("[^a-zA-Z0-9_]*", "_abCD018", `std.None) + testmatch("[^abc]*", "abba", `std.None) + testmatch("[^a-zABC]*", "abBa", `std.None) +} diff --git a/lib/regex/test/runtest.sh b/lib/regex/test/runtest.sh new file mode 100755 index 0000000..95f548f --- /dev/null +++ b/lib/regex/test/runtest.sh @@ -0,0 +1,124 @@ +#!/bin/bash +NFAILURES=0 +NPASSES=0 + +function build { + rm -f $1 $1.o $1.s $1.use + myrbuild $FLAGS -b $1 $1.myr $EXTRA_SRC +} + +function pass { + PASSED="$PASSED $1" + NPASSED=$[$NPASSED + 1] +} + +function fail { + echo "FAIL: $1" + FAILED="$FAILED $1" + NFAILED=$[$NFAILED + 1] +} + +function expectstatus { + ./$1 $3 + if [ $? -eq $2 ]; then + pass $1 + return + else + fail $1 + fi +} + +function expectprint { + if [ "`./$1 $3`" != "$2" ]; then + fail $1 + else + pass $1 + fi +} + + +function expectcompare { + if [ x"" != x"$TMPDIR" ]; then + t=$TMPDIR/myrtest-$1-$RANDOM + else + t=/tmp/myrtest-$1-$RANDOM + fi + ./$1 $3 > $t + if cmp $t data/$1-expected; then + pass $1 + else + fail $1 + fi + rm -f $t +} + +function expectfcompare { + ./$1 $3 + if cmp data/$1-expected $2; then + pass $1 + else + fail $1 + fi +} + +function shouldskip { + if [ -z $ARGS ]; then + return 1 + fi + + for i in $ARGS; do + if [ $i = $1 ]; then + return 1 + fi + done + return 0 +} + + +# Should build and run +function B { + if shouldskip $1; then + return + fi + + test="$1"; shift + type="$1"; shift + res="$1"; shift + if [ $# > 0 ]; then + args="$1"; shift + fi + build $test + case $type in + "E") expectstatus "$test" "$res" "$input";; + "P") expectprint "$test" "$res" "$input";; + "C") expectcompare "$test" "$res" "$input";; + "F") expectfcompare "$test" "$res" "$args";; + esac +} + +# Should fail +function F { + if shouldskip $1; then + return + fi + (build $1) > /dev/null + if [ $? -eq '1' ]; then + pass $1 + else + fail $1 + fi +} + +# Should generate a usefile +function U { + return +} + +source tests + +echo "PASSED ($NPASSED): $PASSED" +if [ -z "$NFAILED" ]; then + echo "SUCCESS" +else + echo "FAILURES ($NFAILED): $FAILED" +fi diff --git a/lib/regex/test/testmatch.myr b/lib/regex/test/testmatch.myr new file mode 100644 index 0000000..4600410 --- /dev/null +++ b/lib/regex/test/testmatch.myr @@ -0,0 +1,58 @@ +use std +use regex + +pkg = + const testmatch : (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void) + const dbgmatch : (pat : byte[:], text : byte[:], expected : std.option(byte[:][:]) -> void) +;; + +const testmatch = {pat, text, expected + run(regex.compile(pat), pat, text, expected) +} + +const dbgmatch = {pat, text, expected + run(regex.dbgcompile(pat), pat, text, expected) +} + +const run = {regex, pat, text, expected + var i, re + + re = std.try(regex) + match regex.exec(re, text) + | `std.Some res: + match expected + | `std.None: + std.fatal("expected no match, got:") + for i = 0; i < res.len; i++ + std.put("\t{}: {}\n", i, res[i]) + ;; + | `std.Some exp: + if !std.sleq(res[0], text) + std.put("whole match does not match text!\n") + std.fatal("failed matching {} over {}\n", pat, text) + ;; + res = res[1:] + if res.len != exp.len + std.put("mismatch: expected {} matches, got {}\n", exp.len, res.len) + std.fatal("failed matching {} over {}\n", pat, text) + ;; + for i = 0; i < exp.len; i++ + if !std.sleq(res[i], exp[i]) + std.put("mismatch on {}: expected {}, got {}\n", i, exp[i], res[i]) + std.fatal("failed matching {} over {}\n", pat, text) + ;; + ;; + ;; + | `std.None: + match expected + | `std.None: /* : expected failure */ + | `std.Some matches: + std.put("expected matches:\n") + for i = 0; i < matches.len; i++ + std.put("\t{}: {}\n", i, matches[i]) + ;; + std.fatal("no match found\n") + ;; + ;; + regex.free(re) +} diff --git a/lib/regex/test/tests b/lib/regex/test/tests new file mode 100644 index 0000000..a5f70f7 --- /dev/null +++ b/lib/regex/test/tests @@ -0,0 +1,29 @@ +FLAGS=-I../ +EXTRA_SRC=testmatch.myr +# Format: +# [B|F] testname [E|P] result +# [B|F]: Compiler outcome. +# B: Expect that this test will build. +# F: Expect that this test will not build. +# testname: Test case +# The test that will run. We will try to +# compile 'testname.myr' to 'testname', +# and then execute it, verifying the result +# [E|P|C]: Result type +# E tells us that the result is an exit status +# P tells us that the result is on stdout, +# and should be compared to the value on the +# line +# C tells us that the result is on stdout, +# and should be compared to the contents of +# the file passed on the line. +# result: Result value +# What we compare with. This should be self- +# evident. +B basic C +B boundaries C +B capture C +B class C +B failmatch C +B negclass C +B unicode C diff --git a/lib/regex/test/unicode.myr b/lib/regex/test/unicode.myr new file mode 100644 index 0000000..5916a1c --- /dev/null +++ b/lib/regex/test/unicode.myr @@ -0,0 +1,19 @@ +use std +use regex + +use "testmatch.use" + +const main = { + testmatch(".*bæc", "Abæc", `std.Some [][:]) + testmatch("(\\p{L}*)bæc\\P{L}*", \ + "Aabæc%!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", \ + `std.Some ["Aa"][:]) + /* test various syntaxen */ + testmatch("(\\pL*)bæc\\PL*", \ + "Aabæc%!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", \ + `std.Some ["Aa"][:]) + testmatch("(\\p{Letter}*)bæc\\P{Uppercase_Letter}*", \ + "Aabæc%!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", \ + `std.Some ["Aa"][:]) + testmatch(".", "æ", `std.Some [][:]) +} diff --git a/lib/regex/types.myr b/lib/regex/types.myr new file mode 100644 index 0000000..b283d2a --- /dev/null +++ b/lib/regex/types.myr @@ -0,0 +1,88 @@ +use std + +pkg regex = + type status = union + `Noimpl + `Incomplete + `Unbalanced + `Emptyparen + `Badrep + `Badrange + `Badescape + ;; + + type ast = union + /* basic string building */ + `Alt (ast#, ast#) + `Cat (ast#, ast#) + + /* repetition */ + `Star ast# + `Rstar ast# + `Plus ast# + `Rplus ast# + `Quest ast# + + /* end matches */ + `Chr char + `Ranges char[2][:] + + /* meta */ + `Cap (std.size, ast#) /* id, ast */ + `Bol /* beginning of line */ + `Eol /* end of line */ + `Bow /* beginning of word */ + `Eow /* end of word */ + ;; + + + type regex = struct + /* compile state */ + debug : bool + pat : byte[:] + nmatch : std.size + + /* VM state */ + runq : rethread# + expired : rethread# + expiredtail : rethread# + proglen : std.size + prog : reinst[:] + nthr : std.size + str : byte[:] + strp : std.size + ;; + + type rethread = struct + next : rethread# /* run queue link */ + + tid : std.size /* just for debugging */ + ip : std.size /* the instruction pointer */ + dead : bool /* thread died */ + matched : bool /* thread matched */ + + mstart : std.size[:] /* match starts */ + mend : std.size[:] /* match ends */ + ;; + + pkglocal type reinst = union + /* direct consumers */ + `Ibyte byte + `Irange (byte, byte) + + /* groups */ + `Ilbra std.size + `Irbra std.size + + /* anchors */ + `Ibol + `Ieol + `Ibow + `Ieow + + /* control flow */ + `Ifork (std.size, std.size) + `Ijmp std.size + `Imatch std.size + ;; +;; |