summaryrefslogtreecommitdiff
path: root/doc/api/libregex/index.txt
diff options
context:
space:
mode:
Diffstat (limited to 'doc/api/libregex/index.txt')
-rw-r--r--doc/api/libregex/index.txt279
1 files changed, 0 insertions, 279 deletions
diff --git a/doc/api/libregex/index.txt b/doc/api/libregex/index.txt
deleted file mode 100644
index 5cf4716..0000000
--- a/doc/api/libregex/index.txt
+++ /dev/null
@@ -1,279 +0,0 @@
-{
- title: libregex
- description: Libregex API documentation.
-}
-
-Summary
--------
-
- pkg regex =
- type ast = union
- /* basic string building */
- `Alt (ast#, ast#)
- `Cat (ast#, ast#)
-
- /* repetition */
- `Star ast#
- `Rstar ast#
- `Plus ast#
- `Rplus ast#
- `Quest ast#
-
- /* end matches */
- `Chr char
- `Ranges char[2][:]
-
- /* meta */
- `Cap (std.size, ast#) /* id, ast */
- `Bol /* beginning of line */
- `Eol /* end of line */
- `Bow /* beginning of word */
- `Eow /* end of word */
- ;;
-
- type status = union
- `Noimpl
- `Incomplete
- `Unbalanced char
- `Emptyparen
- `Badrep char
- `Badrange byte[:]
- `Badescape char
- ;;
-
- /* regex compilation */
- const parse : (re : byte[:] -> std.result(ast#, status))
- const compile : (re : byte[:] -> std.result(regex#, status))
- const dbgcompile : (re : byte[:] -> std.result(regex#, status))
- const free : (re : regex# -> void)
-
- /* regex execution */
- const exec : (re : regex#, str : byte[:] -> std.option(byte[:][:]))
- const search : (re : regex#, str : byte[:] -> std.option(byte[:][:]))
-
- const sub : (re : regex#, str : byte[:], subst : byte[:][:] -> std.option(byte[:]))
- const sbsub : (sb : std.strbuf#, re : regex#, str : byte[:], subst : byte[:][:] -> bool)
- const suball : (re : regex#, str : byte[:], subst : byte[:][:] -> byte[:])
- const sbsuball : (sb : std.strbuf#, re : regex#, str : byte[:], subst : byte[:][:] -> void)
-
- const matchfree : (pat : byte[:][:] -> void)
- ;;
-
-
-Overview
---------
-
-Libregex is a simple regex API that uses a parallel NFA implementation. This
-means that while it is not blazingly fast, it does not exhibit pathological
-behavior on regexes like `(aa|aab?)\*` that many common regex APIs will see.
-
-Regex Syntax
--------------
-
-The grammar for regexes that are accepted is sketched out below.
-
- regex : altexpr
- altexpr : catexpr ('|' altexpr)+
- catexpr : repexpr (catexpr)+
- repexpr : baseexpr[*+?][?]
- baseexpr : literal
- | charclass
- | charrange
- | '.'
- | '^'
- | '$'
- | '(' regex ')'
- charclass : see below
- charrange : '[' (literal('-' literal)?)+']'
-
-The following metacharacters have the meanings listed below:
-
-Matches a single unicode character
-
-<table>
- <tr><tr><th>Metachar</th> <th>Description</th></tr>
- <tr><td><code>^</td></code> <td>Matches the beginning of a line. Does not consume any characters.</td></tr>
- <tr><td><code>$</td></code> <td>Matches the end of a line. Does not consume any characters.</td></tr>
- <tr><td><code>*</td></code> <td>Matches any number of repetitions of the preceding regex fragment.</td></tr>
- <tr><td><code>+</td></code> <td>Matches one or more repetitions of the preceding regex fragment.</td></tr>
- <tr><td><code>?</td></code> <td>Matches zero or one of the preceding regex fragment.</td></tr>
-</table>
-
-In order to match a literal metacharacter, it needs to be preceded by a '\' character.
-
-The following character classes are supported:
-
-<table>
- <tr><tr><th>Charclass</th> <th>Description</th></tr>
- <tr><td><code>\d </code></td> <td>ASCII digits</td></tr>
- <tr><td><code>\D </code></td> <td>Negation of ASCII digits</td></tr>
- <tr><td><code>\x </code></td> <td>ASCII Hex digits</td></tr>
- <tr><td><code>\X </code></td> <td>Negation of ASCII Hex digits</td></tr>
- <tr><td><code>\s </code></td> <td>ASCII spaces</td></tr>
- <tr><td><code>\S </code></td> <td>Negation of ASCII spaces</td></tr>
- <tr><td><code>\w </code></td> <td>ASCII word characters</td></tr>
- <tr><td><code>\W </code></td> <td>Negation of ASCII word characters</td></tr>
- <tr><td><code>\h </code></td> <td>ASCII whitespace characters</td></tr>
- <tr><td><code>\H </code></td> <td>Negation of ASCII whitespace characters</td></tr>
- <tr><td><code>\pX</code></td> <td>Characters with unicode property 'X'</td></tr>
- <tr><td><code>\PX</code></td> <td>Negation of characters with property 'X'</td></tr>
-</table>
-
-The current list of supported Unicode character classes `X` are
-
-<table>
- <tr><th>Abbrev</th> <th>Full name</th> <th>Description</th></tr>
- <tr>
- <td><code>L</code></td> <td><code>Letter</code></td>
- <td>All letters, including lowercase, uppercase, titlecase,
- and uncased.</td>
- </tr>
- <tr>
- <td><code>Lu</code></td> <td><code>Uppercase_Letter</code></td>
- <td>All uppercase letters.</td>
- </tr>
- <tr>
- <td><code>Ll</code></td> <td><code>Lowercase_Letter</code></td>
- <td>All lowercase letters.</td>
- </tr>
- <tr>
- <td><code>Lt</code></td> <td><code>Titlecase_Letter</code></td>
- <td>All titlecase letters.</td>
- </tr>
- <tr>
- <td><code>N</code></td> <td><code>Number</code></td>
- <td>All numbers.</td>
- </tr>
- <tr>
- <td><code>Z</code></td> <td><code>Separator</code></td>
- <td>All separators, including spaces and control characers.</td>
- </tr>
- <tr>
- <td><code>Zs</code></td> <td><code>Space_Separator</code></td>
- <td>All space separators, including tabs and ASCII spaces.</td>
- </tr>
-</table>
-
-Functions
----------
-
- const parse : (re : byte[:] -> std.result(ast#, status))
-
-Parse takes a regex string, and converts it to a regex syntax tree, returning
-`\`std.Success ast#` if the regex was valid, or a `\`std.Failure r` if the
-regex could not be parsed. This AST can be used to further process the regex,
-possibly turning it into a multiregex as in Hairless, or using it for NFA and
-DFA tricks.
-
- const compile : (re : byte[:] -> std.result(regex#, status))
- const dbgcompile : (re : byte[:] -> std.result(regex#, status))
-
-`compile` takes a regex string, and converts it to a compiled regex, returning
-`\`std.Success regex` if the regex was valid, or a `\`std.Failure r` with the
-reason that the compilation failed. `dbgcompile` is similar, however, the
-regex is compiled so it will spit out a good deal of debugging output. Unless
-you are intent on debugging the internals of the regex engine, this is likely
-only of academic interest.
-
- const free : (re : regex# -> void)
-
-`free` must be called on a compiled regex to release it's resources after you
-are finished using it.
-
- const exec : (re : regex#, str : byte[:] -> std.option(byte[:][:])
-
-`exec` runs the regex over the specified text, returning an `\`std.Some matches`
-if the text matched, or `std.None` if the text did not match. matches[0] is
-always the full text that was matched, and will always be returned regardless
-of whether capture groups are specified.
-
- const search : (re : regex#, str : byte[:] -> std.option(byte[:][:]))
-
-`search` searches for a matching sub-segment of the regex over the specified
-text, returning an `\`std.Some matches` if the text matched, or `std.None` if
-the text did not match. matches[0] is always the full text that was matched,
-and will always be returned regardless of whether capture groups are
-specified. `search` returns the the earliest match in the string provided.
-
-
- const sub : (re : regex#, str : byte[:], subst : byte[:][:] -> std.option(byte[:]))
- const sbsub : (sb : std.strbuf#, re : regex#, str : byte[:], subst : byte[:][:] -> bool)
-
-`sub` will take a pattern, an input string, and a set of substitutions, and
-attempt to match. If the match is successful, it will replace each group
-within `str` with `subst`, returning a freshly allocated string. `sbsub`
-behaves identically, however it inserts the new string into the string
-buffer provided, instead of allocating a new string.
-
-If there is no match, then `\`std.None` will be returned.
-
- const suball : (re : regex#, str : byte[:], subst : byte[:][:] -> byte[:])
- const sbsuball : (sb : std.strbuf#, re : regex#, str : byte[:], subst : byte[:][:] -> void)
-
-`suball` replaces every match within the string using the given substitutions.
-Only captured groups will be substituted. The remaining text will be left in
-place.
-
-Example
-------
-
-#### Pattern matching
-
-```{runmyr regex}
-use std
-use regex
-
-const main = {
- match regex.compile("ab(c+)")
- | `std.Ok re: runwith(re, "abccc")
- | `std.Fail m: std.fatal("Failed to compile regex\n")
- ;;
-}
-
-const runwith = {re, txt
- match regex.exec(re, txt)
- | `std.Some matches:
- std.put("matched {}, got {} matches\n", txt, matches.len)
- for m in matches
- std.put("Match: {}\n", m)
- ;;
- regex.matchfree(matches)
- | `std.None:
- std.put("%s did not match\n")
- ;;
-}
-```
-
-#### Substitution
-
-```{runmyr regex}
-use std
-use regex
-
-const main = {
- var re
-
- re = std.try(regex.compile("(a*)bc(d)e"))
- match regex.sub(re, "aaabcdef", ["HEY", "X"][:])
- | `std.Some sub:
- std.put("{}\n", sub[0])
- regex.matchfree(matches)
- | `std.None:
- std.fatal("should have matched")
- ;;
-}
-```
-
-```{runmyr regex}
-use std
-use regex
-
-const main = {
- var re, sub
-
- re = std.try(regex.compile("(b|e)"))
- sub = regex.suball(re, "aaabbbcdef", ["SUB"][:])
- std.put("subst: {}\n", sub)
- std.slfree(sub)
-}
-```