summaryrefslogtreecommitdiff
path: root/parse
diff options
context:
space:
mode:
authorOri Bernstein <ori@eigenstate.org>2014-12-13 23:21:11 -0800
committerOri Bernstein <ori@eigenstate.org>2014-12-13 23:21:44 -0800
commit24a3d753f2ac998c0b10579191984d79cfed0b7d (patch)
tree1b9733502a435451ce5ecf8785c14bbe1fc6705a /parse
parent008887ccd998a43db6980fa57f1e210ec26c4acf (diff)
downloadmc-24a3d753f2ac998c0b10579191984d79cfed0b7d.tar.gz
Add plan9 instruction formats.
Work towards a plan9 port. This commit also fixes a bug with strings that contain '\0'. The commits got tangled, and I'm too lazy to detangle them.
Diffstat (limited to 'parse')
-rw-r--r--parse/dump.c2
-rw-r--r--parse/gram.y48
-rw-r--r--parse/htab.c33
-rw-r--r--parse/node.c9
-rw-r--r--parse/parse.h17
-rw-r--r--parse/tok.c26
-rw-r--r--parse/use.c6
-rw-r--r--parse/util.c14
8 files changed, 110 insertions, 45 deletions
diff --git a/parse/dump.c b/parse/dump.c
index d8b4257..6b6c40d 100644
--- a/parse/dump.c
+++ b/parse/dump.c
@@ -202,7 +202,7 @@ static void outnode(Node *n, FILE *fd, int depth)
case Lbool: fprintf(fd, " Lbool %s\n", n->lit.boolval ? "true" : "false"); break;
case Lint: fprintf(fd, " Lint %llu\n", n->lit.intval); break;
case Lflt: fprintf(fd, " Lflt %lf\n", n->lit.fltval); break;
- case Lstr: fprintf(fd, " Lstr %s\n", n->lit.strval); break;
+ case Lstr: fprintf(fd, " Lstr %s\n", n->lit.strval.buf); break;
case Llbl: fprintf(fd, " Llbl %s\n", n->lit.lblval); break;
case Lfunc:
fprintf(fd, " Lfunc\n");
diff --git a/parse/gram.y b/parse/gram.y
index 0c24c78..7313e26 100644
--- a/parse/gram.y
+++ b/parse/gram.y
@@ -253,7 +253,7 @@ decl : attrs Tvar decllist {
attrs : /* empty */ {$$.nstr = 0; $$.str = NULL;}
| Tattr attrs {
$$ = $2;
- lappend(&$$.str, &$$.nstr, strdup($1->str));
+ lappend(&$$.str, &$$.nstr, strdup($1->id));
}
;
@@ -267,8 +267,8 @@ decllist: declbody {
}
;
-use : Tuse Tident {$$ = mkuse($1->loc, $2->str, 0);}
- | Tuse Tstrlit {$$ = mkuse($1->loc, $2->str, 1);}
+use : Tuse Tident {$$ = mkuse($1->loc, $2->id, 0);}
+ | Tuse Tstrlit {$$ = mkuse($1->loc, $2->strval.buf, 1);}
;
optident: Tident {$$ = $1;}
@@ -279,8 +279,8 @@ package : Tpkg optident Tasn pkgbody Tendblk {
if (file->file.exports->name)
lfatal($1->loc, "Package already declared\n");
if ($2) {
- updatens(file->file.exports, $2->str);
- updatens(file->file.globls, $2->str);
+ updatens(file->file.exports, $2->id);
+ updatens(file->file.globls, $2->id);
}
}
;
@@ -339,8 +339,8 @@ typedeclcore
: name Tcolon type {$$ = mkdecl($1->loc, $1, $3);}
;
-name : Tident {$$ = mkname($1->loc, $1->str);}
- | Tident Tdot name {$$ = $3; setns($3, $1->str);}
+name : Tident {$$ = mkname($1->loc, $1->id);}
+ | Tident Tdot name {$$ = $3; setns($3, $1->id);}
;
implstmt: Timpl name type {
@@ -357,7 +357,7 @@ implbody
| implbody Tident Tasn exprln optendlns {
Node *d;
$$ = $1;
- d = mkdecl($2->loc, mkname($2->loc, $2->str), mktyvar($2->loc));
+ d = mkdecl($2->loc, mkname($2->loc, $2->id), mktyvar($2->loc));
d->decl.init = $4;
d->decl.isconst = 1;
lappend(&$$.nl, &$$.nn, d);
@@ -365,11 +365,11 @@ implbody
;
traitdef: Ttrait Tident generictype /* trait prototype */ {
- $$ = mktrait($1->loc, mkname($2->loc, $2->str), $3, NULL, 0, NULL, 0, 1);
+ $$ = mktrait($1->loc, mkname($2->loc, $2->id), $3, NULL, 0, NULL, 0, 1);
}
| Ttrait Tident generictype Tasn traitbody Tendblk /* trait definition */ {
size_t i;
- $$ = mktrait($1->loc, mkname($2->loc, $2->str), $3, NULL, 0, $5.nl, $5.nn, 0);
+ $$ = mktrait($1->loc, mkname($2->loc, $2->id), $3, NULL, 0, $5.nl, $5.nn, 0);
for (i = 0; i < $5.nn; i++) {
$5.nl[i]->decl.trait = $$;
$5.nl[i]->decl.isgeneric = 1;
@@ -382,7 +382,7 @@ traitbody
| traitbody Tident Tcolon type optendlns {
Node *d;
$$ = $1;
- d = mkdecl($2->loc, mkname($2->loc, $2->str), $4);
+ d = mkdecl($2->loc, mkname($2->loc, $2->id), $4);
d->decl.isgeneric = 1;
lappend(&$$.nl, &$$.nn, d);
}
@@ -398,13 +398,13 @@ tydef : Ttype typeid {$$ = $2;}
typeid : Tident {
$$.loc = $1->loc;
- $$.name = $1->str;
+ $$.name = $1->id;
$$.params = NULL;
$$.type = NULL;
}
| Tident Toparen typarams Tcparen {
$$.loc = $1->loc;
- $$.name = $1->str;
+ $$.name = $1->id;
$$.params = $3.types;
$$.nparams = $3.ntypes;
$$.type = NULL;
@@ -427,14 +427,14 @@ type : structdef
;
generictype
- : Ttyparam {$$ = mktyparam($1->loc, $1->str);}
+ : Ttyparam {$$ = mktyparam($1->loc, $1->id);}
| Ttyparam Twith name {
- $$ = mktyparam($1->loc, $1->str);
+ $$ = mktyparam($1->loc, $1->id);
addtrait($$, $3->name.name);
}
| Ttyparam Twith Toparen typaramlist Tcparen {
size_t i;
- $$ = mktyparam($1->loc, $1->str);
+ $$ = mktyparam($1->loc, $1->id);
for (i = 0; i < $4.nn; i++)
addtrait($$, $4.nl[i]->name.name);
}
@@ -453,7 +453,7 @@ compoundtype
| type Tosqbrac Tcolon Tcsqbrac {$$ = mktyslice($2->loc, $1);}
| type Tosqbrac expr Tcsqbrac {$$ = mktyarray($2->loc, $1, $3);}
| type Tderef {$$ = mktyptr($2->loc, $1);}
- | Tat Tident {$$ = mktyparam($1->loc, $2->str);}
+ | Tat Tident {$$ = mktyparam($1->loc, $2->id);}
| name {$$ = mktyunres($1->loc, $1, NULL, 0);}
| name Toparen typelist Tcparen {$$ = mktyunres($1->loc, $1, $3.types, $3.ntypes);}
;
@@ -539,7 +539,7 @@ unionelt /* nb: the ucon union type gets filled in when we have context */
| Tendln {$$ = NULL;}
;
-goto : Tgoto Tident {$$ = mkexpr($1->loc, Ojmp, mklbl($2->loc, $2->str), NULL);}
+goto : Tgoto Tident {$$ = mkexpr($1->loc, Ojmp, mklbl($2->loc, $2->id), NULL);}
;
retexpr : Tret expr {$$ = mkexpr($1->loc, Oret, $2, NULL);}
@@ -659,7 +659,7 @@ prefixexpr
postfixexpr
: postfixexpr Tdot Tident
- {$$ = mkexpr($1->loc, Omemb, $1, mkname($3->loc, $3->str), NULL);}
+ {$$ = mkexpr($1->loc, Omemb, $1, mkname($3->loc, $3->id), NULL);}
| postfixexpr Tinc
{$$ = mkexpr($1->loc, Opostinc, $1, NULL);}
| postfixexpr Tdec
@@ -685,7 +685,7 @@ arglist : asnexpr
atomicexpr
: Tident
- {$$ = mkexpr($1->loc, Ovar, mkname($1->loc, $1->str), NULL);}
+ {$$ = mkexpr($1->loc, Ovar, mkname($1->loc, $1->id), NULL);}
| literal
| Toparen expr Tcparen
{$$ = $2;}
@@ -719,10 +719,10 @@ literal : funclit {$$ = mkexpr($1->loc, Olit, $1, NULL);}
tuplit : Toparen tupbody Tcparen
{$$ = mkexprl($1->loc, Otup, $2.nl, $2.nn);}
-littok : Tstrlit {$$ = mkstr($1->loc, $1->str);}
+littok : Tstrlit {$$ = mkstr($1->loc, $1->strval);}
| Tchrlit {$$ = mkchar($1->loc, $1->chrval);}
| Tfloatlit {$$ = mkfloat($1->loc, $1->fltval);}
- | Tboollit {$$ = mkbool($1->loc, !strcmp($1->str, "true"));}
+ | Tboollit {$$ = mkbool($1->loc, !strcmp($1->id, "true"));}
| Tintlit {
$$ = mkint($1->loc, $1->intval);
if ($1->inttype)
@@ -778,7 +778,7 @@ structelts
;
structelt: optendlns Tdot Tident Tasn expr optendlns
- {$$ = mkidxinit($2->loc, mkname($3->loc, $3->str), $5);}
+ {$$ = mkidxinit($2->loc, mkname($3->loc, $3->id), $5);}
;
optendlns : /* none */
@@ -882,7 +882,7 @@ blkbody : decl {
;
label : Tcolon Tident
- {$$ = mklbl($2->loc, $2->str);}
+ {$$ = mklbl($2->loc, $2->id);}
;
%%
diff --git a/parse/htab.c b/parse/htab.c
index 0a8c740..c33fbcd 100644
--- a/parse/htab.c
+++ b/parse/htab.c
@@ -227,6 +227,39 @@ int streq(void *a, void *b)
return !strcmp(a, b);
}
+ulong strlithash(void *_s)
+{
+ Str *s;
+ ulong h, g, i;
+
+ s = _s;
+ h = 0;
+ for (i = 0; i < s->len; i++) {
+ h = ((h << 4) + s->buf[i]);
+
+ if ((g = (h & 0xF0000000)))
+ h ^= (g >> 24);
+
+ h &= ~g;
+ }
+ return h;
+}
+
+int strliteq(void *_a, void *_b)
+{
+ Str *a, *b;
+
+ a = _a;
+ b = _b;
+ if (a == b)
+ return 1;
+ if (a == NULL || b == NULL)
+ return 0;
+ if (a->len != b->len)
+ return 0;
+ return !memcmp(a, b, a->len);
+}
+
ulong ptrhash(void *key)
{
return inthash((intptr_t)key);
diff --git a/parse/node.c b/parse/node.c
index 8d9a173..65f93e5 100644
--- a/parse/node.c
+++ b/parse/node.c
@@ -214,13 +214,15 @@ Node *mklbl(Srcloc loc, char *lbl)
return mkexpr(loc, Olit, n, NULL);
}
-Node *mkstr(Srcloc loc, char *val)
+Node *mkstr(Srcloc loc, Str val)
{
Node *n;
n = mknode(loc, Nlit);
n->lit.littype = Lstr;
- n->lit.strval = strdup(val);
+ n->lit.strval.len = val.len;
+ n->lit.strval.buf = malloc(val.len);
+ memcpy(n->lit.strval.buf, val.buf, val.len);
return n;
}
@@ -368,7 +370,8 @@ int liteq(Node *a, Node *b)
case Lflt:
return a->lit.fltval == b->lit.fltval;
case Lstr:
- return !strcmp(a->lit.strval, b->lit.strval);
+ return a->lit.strval.len == b->lit.strval.len &&
+ !memcmp(a->lit.strval.buf, b->lit.strval.buf, a->lit.strval.len);
case Lfunc:
return a->lit.fnval == b->lit.fnval;
case Llbl:
diff --git a/parse/parse.h b/parse/parse.h
index 2026da8..2aa228e 100644
--- a/parse/parse.h
+++ b/parse/parse.h
@@ -15,6 +15,7 @@ typedef struct Srcloc Srcloc;
typedef struct Bitset Bitset;
typedef struct Htab Htab;
typedef struct Optctx Optctx;
+typedef struct Str Str;
typedef struct Tok Tok;
typedef struct Node Node;
@@ -64,6 +65,11 @@ struct Srcloc {
int file;
};
+struct Str {
+ size_t len;
+ char *buf;
+};
+
typedef enum {
Visintern,
Visexport,
@@ -95,13 +101,14 @@ struct Htab {
struct Tok {
int type;
Srcloc loc;
- char *str;
+ char *id;
/* values parsed out */
vlong intval;
Ty inttype; /* for explicitly specified suffixes */
double fltval;
uint32_t chrval;
+ Str strval;
};
struct Stab {
@@ -222,7 +229,7 @@ struct Node {
uvlong intval;
double fltval;
uint32_t chrval;
- char *strval;
+ Str strval;
char *lblval;
int boolval;
Node *fnval;
@@ -396,6 +403,8 @@ void **htkeys(Htab *ht, size_t *nkeys);
int liteq(Node *a, Node *b);
ulong strhash(void *key);
int streq(void *a, void *b);
+ulong strlithash(void *key);
+int strliteq(void *a, void *b);
ulong ptrhash(void *key);
int ptreq(void *a, void *b);
ulong inthash(uint64_t key);
@@ -506,7 +515,7 @@ Node *mkidxinit(Srcloc l, Node *idx, Node *init);
Node *mkbool(Srcloc l, int val);
Node *mkint(Srcloc l, uint64_t val);
Node *mkchar(Srcloc l, uint32_t val);
-Node *mkstr(Srcloc l, char *s);
+Node *mkstr(Srcloc l, Str str);
Node *mkfloat(Srcloc l, double flt);
Node *mkfunc(Srcloc l, Node **args, size_t nargs, Type *ret, Node *body);
Node *mkname(Srcloc l, char *name);
@@ -582,6 +591,8 @@ void wrint(FILE *fd, long val);
long rdint(FILE *fd);
void wrstr(FILE *fd, char *val);
char *rdstr(FILE *fd);
+void wrstrbuf(FILE *fd, Str str);
+void rdstrbuf(FILE *fd, Str *str);
void wrflt(FILE *fd, double val);
double rdflt(FILE *fd);
void wrbool(FILE *fd, int val);
diff --git a/parse/tok.c b/parse/tok.c
index db9895f..f4ff221 100644
--- a/parse/tok.c
+++ b/parse/tok.c
@@ -233,7 +233,7 @@ static Tok *kwident(void)
if (!identstr(buf, sizeof buf))
return NULL;
t = mktok(kwd(buf));
- t->str = strdup(buf);
+ t->id = strdup(buf);
return t;
}
@@ -248,7 +248,8 @@ static void append(char **buf, size_t *len, size_t *sz, int c)
*buf = realloc(*buf, *sz);
}
- buf[0][len[0]++] = c;
+ buf[0][*len] = c;
+ (*len)++;
}
@@ -408,10 +409,13 @@ static Tok *strlit(void)
else
append(&buf, &len, &sz, c);
};
- append(&buf, &len, &sz, '\0');
-
t = mktok(Tstrlit);
- t->str = buf;
+ t->strval.len = len;
+
+ /* null terminator should not count towards length */
+ append(&buf, &len, &sz, '\0');
+ t->strval.buf = buf;
+ t->id = buf;
return t;
}
@@ -472,7 +476,7 @@ static Tok *charlit(void)
t = mktok(Tchrlit);
t->chrval = val;
- t->str = buf;
+ t->id = buf;
return t;
}
@@ -656,11 +660,11 @@ static Tok *number(int base)
/* we only support base 10 floats */
if (isfloat && base == 10) {
t = mktok(Tfloatlit);
- t->str = strdupn(&fbuf[start], fidx - start);
+ t->id = strdupn(&fbuf[start], fidx - start);
t->fltval = strtod(buf, NULL);
} else {
t = mktok(Tintlit);
- t->str = strdupn(&fbuf[start], fidx - start);
+ t->id = strdupn(&fbuf[start], fidx - start);
t->intval = strtoull(buf, NULL, base);
/* check suffixes:
* u -> unsigned
@@ -748,7 +752,7 @@ static Tok *typaram(void)
if (!identstr(buf, 1024))
return NULL;
t = mktok(Ttyparam);
- t->str = strdup(buf);
+ t->id = strdup(buf);
return t;
}
@@ -829,8 +833,8 @@ int yylex(void)
void yyerror(const char *s)
{
fprintf(stderr, "%s:%d: %s", filename, curloc.line, s);
- if (curtok->str)
- fprintf(stderr, " near \"%s\"", curtok->str);
+ if (curtok->id)
+ fprintf(stderr, " near \"%s\"", curtok->id);
fprintf(stderr, "\n");
exit(1);
}
diff --git a/parse/use.c b/parse/use.c
index e0157d7..51007ad 100644
--- a/parse/use.c
+++ b/parse/use.c
@@ -453,7 +453,7 @@ static void pickle(FILE *fd, Node *n)
case Lchr: wrint(fd, n->lit.chrval); break;
case Lint: wrint(fd, n->lit.intval); break;
case Lflt: wrflt(fd, n->lit.fltval); break;
- case Lstr: wrstr(fd, n->lit.strval); break;
+ case Lstr: wrstrbuf(fd, n->lit.strval); break;
case Llbl: wrstr(fd, n->lit.lblval); break;
case Lbool: wrbool(fd, n->lit.boolval); break;
case Lfunc: pickle(fd, n->lit.fnval); break;
@@ -585,10 +585,10 @@ static Node *unpickle(FILE *fd)
case Lchr: n->lit.chrval = rdint(fd); break;
case Lint: n->lit.intval = rdint(fd); break;
case Lflt: n->lit.fltval = rdflt(fd); break;
- case Lstr: n->lit.strval = rdstr(fd); break;
+ case Lstr: rdstrbuf(fd, &n->lit.strval); break;
case Llbl: n->lit.lblval = rdstr(fd); break;
case Lbool: n->lit.boolval = rdbool(fd); break;
- case Lfunc: n->lit.fnval = unpickle(fd); break;
+ case Lfunc: n->lit.fnval = unpickle(fd); break;
}
break;
case Nloopstmt:
diff --git a/parse/util.c b/parse/util.c
index e9aa687..b43ee43 100644
--- a/parse/util.c
+++ b/parse/util.c
@@ -316,6 +316,20 @@ char *rdstr(FILE *fd)
}
}
+void wrstrbuf(FILE *fd, Str str)
+{
+ wrint(fd, str.len);
+ wrbuf(fd, str.buf, str.len);
+}
+
+void rdstrbuf(FILE *fd, Str *str)
+{
+ str->len = rdint(fd);
+ str->buf = xalloc(str->len + 1);
+ rdbuf(fd, str->buf, str->len);
+ str->buf[str->len] = '\0';
+}
+
void wrflt(FILE *fd, double val)
{
byte buf[8];