summaryrefslogtreecommitdiff
path: root/lib/regex/redump.myr
blob: af8e2bfd38d8d90ae476cafcedc421d8a36c7cdd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
use std
use bio
use regex

const main = {args
	var cmd, comp
	var verbose
	var fd

	verbose = false
	cmd = std.optparse(args, &[
		.argdesc = "regex [inputs...]",
		.minargs = 1,
		.opts = [
			[.opt='v', .desc="dump verbose regex output"]
		][:],
	])
	for opt : cmd.opts
		match opt
		| ('v', _):	verbose = true
		| _:	std.fatal("Unknown argument")
		;;
	;;
	if verbose
		comp = regex.dbgcompile(cmd.args[0], true)
	else
		comp = regex.dbgcompile(cmd.args[0], false)
	;;
	match comp
	| `std.Err m:	
		std.fatal("unable to compile regex: {}\n", m)
	| `std.Ok re:
		if cmd.args.len > 1
			runall(re, cmd.args)
		else
			fd = bio.mkfile(0, bio.Rd)
			dump(re, fd)
			bio.close(fd)
		;;
	;;
}

const runall = {re, files

	for f : files
		match bio.open(f, bio.Rd)
		| `std.Ok fd:
			dump(re, fd)
			bio.close(fd)
		| `std.Err m:
			std.fatal("failed to open {}: {}\n", f, m)
		;;
	;;
}

const dump = {re, fd 
	while true
		match bio.readln(fd)
		| `std.Ok ln:
			show(re, ln, regex.exec(re, ln))
			std.slfree(ln)
		| `std.Err `bio.Eof:
			break
		| `std.Err e:
			std.put("error reading from input: {}", e)
			break
		;;
	;;
}

/*
 * Renders a match in a way that's pleasant to read. There are
 * two cases here.
 *
 * 1) The pattern matched. In this case, we want to show the
 *    regions of the pattern that contributed to the match.
 *
 * 2) The pattern did not match. In this case, we want to show
 *    the location of the failed match.
 *
 * In both cases, we render a caret that describes the position
 * of the match. Unfortunately, for the coverage code we don't
 * have a great way of mapping whole subranges, so the caret can
 * be slightly truncated. Fixing this isn't worth hte complexity.
 */
const show = {re, ln, mg
	match mg
	| `std.Some rl:
		std.put("Matched: {}\n", rl[0])
		for var i = 1; i < rl.len; i++
			std.put("\tgroup {}: {}\n", i, rl[i])
		;;
		std.put("coverage:\n")
		std.put("\t{}\n", re.pat)
		showcoverage(re)
	| `std.None:
		std.put("Match failed at {}:\n", re.lastip)
		std.put("\t{}\n", re.pat)
		caret(re, re.pcidx[re.lastip])
		std.put("\t{}\n", ln)
		showpos(re, re.strp - 1)
	;;
}

/* 
 * Simple position carets for failures: Draws out
 * an arrow of the form:
 *
 *    ~~~~~^
 * 
 * from the start of the line.
 */
const showpos = {re, idx
	std.put("\t")
	for var i = 0; i < idx; i++
		std.put("~")
	;;
	std.put("^\n")
}

/*
 * Coverage carets for success. This tries to output
 * a '^' for every section of the string that matched.
 *
 *   (this|that)
 *    ^^^^
 */
const showcoverage = {re
	var hit
	var idx

	hit = std.slzalloc(re.pat.len)
	for var ip = 0; ip < re.proglen; ip++
		if !std.bshas(re.traces[re.lastthr], ip)
			continue
		;;
		idx = re.pcidx[ip]
		if idx >= 0 && idx < hit.len
			hit[idx] = true
		;;
	;;

	std.put("\t")
	for h : hit
		if h
			std.put("^")
		else
			std.put(" ")
		;;
	;;
	std.put("\n")
}