diff options
author | Ori Bernstein <ori@eigenstate.org> | 2014-06-18 01:10:30 -0400 |
---|---|---|
committer | Ori Bernstein <ori@eigenstate.org> | 2014-06-18 01:10:30 -0400 |
commit | f6e53709407611de940092c9d73e63926723252f (patch) | |
tree | 320c6d9474ba1eb51f2dc88640e9edd0d1672a55 | |
download | mc-f6e53709407611de940092c9d73e63926723252f.tar.gz |
Merge pull request #1 from akoshibe/master
added sockproto values
252 files changed, 26282 insertions, 0 deletions
diff --git a/6/Makefile b/6/Makefile new file mode 100644 index 0000000..a3c69c0 --- /dev/null +++ b/6/Makefile @@ -0,0 +1,11 @@ +INSTBIN=6m +BIN=6m +OBJ=isel.o \ + locs.o \ + main.o \ + ra.o \ + simp.o \ + +DEPS=../parse/libparse.a ../mi/libmi.a + +include ../mk/c.mk @@ -0,0 +1,234 @@ +#define Maxarg 4 /* maximum number of args an insn can have */ +#define Maxuse (2*Maxarg) /* maximum number of registers an insn can use or def */ +#define Maxdef (2*Maxarg) /* maximum number of registers an insn can use or def */ +#define Wordsz 4 /* the size of a "natural int" */ +#define Ptrsz 8 /* the size of a machine word (ie, pointer size) */ +#define Nsaved 13 /* number of registers saved in the ABI */ + +typedef size_t regid; + +typedef struct Insn Insn; +typedef struct Loc Loc; +typedef struct Func Func; +typedef struct Blob Blob; +typedef struct Isel Isel; +typedef struct Asmbb Asmbb; + +typedef enum { +#define Insn(val, fmt, use, def) val, +#include "insns.def" +#undef Insn +} AsmOp; + +typedef enum { +#define Reg(r, name, mode) r, +#include "regs.def" +#undef Reg + Nreg +} Reg; + +typedef enum { + Locnone, + Loclbl, /* label */ + Locreg, /* register */ + Locmem, /* reg offset mem */ + Locmeml, /* label offset mem */ + Loclit, /* literal value */ + Loclitl /* label address */ +} Loctype; + +typedef enum { + ModeNone, + ModeB, /* byte */ + ModeW, /* short */ + ModeL, /* long */ + ModeQ, /* quad */ + ModeF, /* float32 */ + ModeD, /* float64 */ + Nmode, +} Mode; + +typedef enum { + Classbad, + Classint, + Classflt, + Nclass, +} Rclass; + +/* a register, label, or memory location */ +struct Loc { + Loctype type; /* the type of loc */ + Mode mode; /* the mode of this location */ + void *list; + union { + char *lbl; /* for Loclbl, Loclitl */ + struct { /* for Locreg */ + regid id; + Reg colour; + } reg; + long lit; /* for Loclit */ + /* + * for Locmem, Locmeml. + * address format is + * disp(base + index) + */ + struct { + /* only one of lbldisp and constdisp may be used */ + char *lbldisp; + long constdisp; + int scale; /* 0,1,2,4, or 8 */ + Loc *base; /* needed */ + Loc *idx; /* optional */ + } mem; + }; +}; + +struct Insn { + AsmOp op; + Loc *args[Maxarg]; + size_t nargs; +}; + +struct Func { + char *name; /* function name */ + int isexport; /* is this exported from the asm? */ + size_t stksz; /* stack size */ + Type *type; /* type of function */ + Htab *stkoff; /* Loc* -> int stackoff map */ + Node *ret; /* return value */ + Cfg *cfg; /* flow graph */ +}; + +struct Asmbb { + int id; /* unique identifier */ + char **lbls; /* list of BB labels */ + size_t nlbls; /* number of labels */ + Insn **il; /* instructions */ + size_t ni; /* number of instructions */ + + Bitset *pred; /* set of predecessor BB ids */ + Bitset *succ; /* set of successor BB ids */ + Bitset *use; /* registers used by this BB */ + Bitset *def; /* registers defined by this BB */ + Bitset *livein; /* variables live on entrance to BB */ + Bitset *liveout; /* variables live on exit from BB */ +}; + +/* instruction selection state */ +struct Isel { + Cfg *cfg; /* cfg built with nodes */ + + Asmbb **bb; /* 1:1 mappings with the Node bbs in the CFG */ + size_t nbb; + Asmbb *curbb; + + Node *ret; /* we store the return into here */ + Htab *spillslots; /* reg id => int stkoff */ + Htab *reglocs; /* decl id => Loc *reg */ + Htab *stkoff; /* decl id => int stkoff */ + Htab *globls; /* decl id => char *globlname */ + + /* increased when we spill */ + Loc *stksz; + Loc *calleesave[Nsaved]; + + /* register allocator state */ + + size_t *gbits; /* igraph matrix repr */ + regid **gadj; /* igraph adj set repr */ + size_t *ngadj; + int *degree; /* degree of nodes */ + Loc **aliasmap; /* mapping of aliases */ + + Bitset *shouldspill; /* the first registers we should try to spill */ + Bitset *neverspill; /* registers we should never spill */ + + Insn ***rmoves; + size_t *nrmoves; + + /* move sets */ + Insn **mcoalesced; + size_t nmcoalesced; + + Insn **mconstrained; + size_t nmconstrained; + + Insn **mfrozen; + size_t nmfrozen; + + Insn **mactive; + size_t nmactive; + + + /* worklists */ + Insn **wlmove; + size_t nwlmove; + + Loc **wlspill; + size_t nwlspill; + + Loc **wlfreeze; + size_t nwlfreeze; + + Loc **wlsimp; + size_t nwlsimp; + + Loc **selstk; + size_t nselstk; + + Bitset *coalesced; + Bitset *spilled; + Bitset *prepainted; /* locations that need to be a specific colour */ + Bitset *initial; /* initial set of locations used by this fn */ +}; + +/* entry points */ +void genblob(FILE *fd, Node *blob, Htab *globls, Htab *strtab); +void genasm(FILE *fd, Func *fn, Htab *globls, Htab *strtab); +void genstrings(FILE *fd, Htab *strtab); +void gen(Node *file, char *out); + +/* location generation */ +extern size_t maxregid; +extern Loc **locmap; /* mapping from reg id => Loc * */ + +char *genlblstr(char *buf, size_t sz); +Node *genlbl(void); +Loc *loclbl(Node *lbl); +Loc *locstrlbl(char *lbl); +Loc *locreg(Mode m); +Loc *locphysreg(Reg r); +Loc *locmem(long disp, Loc *base, Loc *idx, Mode mode); +Loc *locmeml(char *disp, Loc *base, Loc *idx, Mode mode); +Loc *locmems(long disp, Loc *base, Loc *idx, int scale, Mode mode); +Loc *locmemls(char *disp, Loc *base, Loc *idx, int scale, Mode mode); +Loc *loclit(long val, Mode m); +Loc *loclitl(char *lbl); +Loc *coreg(Reg r, Mode m); + +void locprint(FILE *fd, Loc *l, char spec); +void iprintf(FILE *fd, Insn *insn); + +/* emitting instructions */ +Insn *mkinsn(AsmOp op, ...); + +/* register allocation */ +extern char *regnames[]; /* name table */ +extern Mode regmodes[]; /* mode table */ +extern size_t modesize[]; /* mode size table */ +void regalloc(Isel *s); +Rclass rclass(Loc *l); + + +/* useful functions */ +size_t tysize(Type *t); +size_t size(Node *n); +int stacktype(Type *t); +int floattype(Type *t); +int stacknode(Node *n); +int floatnode(Node *n); +void breakhere(); +void dumpasm(Isel *s, FILE *fd); + +size_t alignto(size_t sz, Type *t); + diff --git a/6/insns.def b/6/insns.def new file mode 100644 index 0000000..c820269 --- /dev/null +++ b/6/insns.def @@ -0,0 +1,96 @@ +/* Table of instructions. Each instruction + is defined by the following macro: + Insn(enumval, fmt, attr) + The format string 'fmt' has the following expansions: + %r - int reg + %f - xmm reg + %m - mem + %i - imm + %v - reg/mem + %u - reg/imm + %x - reg/freg/mem/imm + %[1-9]*t - Mode of an operand. The optional number + preceeding it is the operand desired for + the mode. + Currently, there aren't any attrs, because none were needed yet. + Eventually, they'll probably include flag setting and so on. + + For technical reasons, the indexing on use and def statments is 1-based, + instead of 0-based. (0 is the sentinel value). +*/ + +Insn(Inone, "BAD_INSN", Use(), Def()) +/* Note, the mov instruction is specified in an overly general manner. */ +Insn(Imov, "\tmov%t %x,%x\n", Use(.l={1}), Def(.l={2})) +Insn(Imovt, "PSEUDO: TRUNCATE\n", Use(.l={1}), Def(.l={2})) +Insn(Imovzx, "\tmovz%1t%2t %x,%x\n", Use(.l={1}), Def(.l={2})) +Insn(Imovsx, "\tmovs%1t%2t %x,%x\n", Use(.l={1}), Def(.l={2})) +Insn(Irepmovsb, "\trep movsb\n", Use(.r={Rrcx,Rrsi,Rrdi}), Def()) +Insn(Irepmovsw, "\trep movsw\n", Use(.r={Rrcx,Rrsi,Rrdi}), Def()) +Insn(Irepmovsl, "\trep movsl\n", Use(.r={Rrcx,Rrsi,Rrdi}), Def()) +Insn(Irepmovsq, "\trep movsq\n", Use(.r={Rrcx,Rrsi,Rrdi}), Def()) +Insn(Ilea, "\tlea%2t %m,%r\n", Use(.l={1}), Def(.l={2})) + +Insn(Iadd, "\tadd%t %x,%r\n", Use(.l={1,2}), Def(.l={2})) +Insn(Isub, "\tsub%t %x,%r\n", Use(.l={1,2}), Def(.l={2})) +Insn(Iimul, "\timul%t %x,%r\n", Use(.l={1,2}), Def(.l={2})) +/* there is no imul for 8 bit values. */ +Insn(Iimul_r, "\timul%t %r\n", Use(.l={1},.r={Ral}), Def(.r={Rax})) +Insn(Imul, "\tmul%t %r\n", Use(.l={1},.r={Reax}), Def(.r={Reax,Redx})) +Insn(Idiv, "\tdiv%t %r\n", Use(.l={1},.r={Reax,Redx}), Def(.r={Reax,Redx})) +Insn(Ineg, "\tneg%t %r\n", Use(.l={1}), Def(.l={1})) +Insn(Iand, "\tand%t %x,%r\n", Use(.l={1,2}), Def(.l={2})) +Insn(Ior, "\tor%t %x,%r\n", Use(.l={1,2}), Def(.l={2})) +Insn(Ixor, "\txor%t %x,%r\n", Use(.l={1,2}), Def(.l={2})) +Insn(Inot, "\tnot%t %v\n", Use(.l={1}), Def(.l={1})) +Insn(Ishl, "\tsal%2t %u,%r\n", Use(.l={1,2}), Def(.l={2})) +Insn(Isar, "\tsar%2t %u,%r\n", Use(.l={1,2}), Def(.l={2})) +Insn(Ishr, "\tshr%2t %u,%r\n", Use(.l={1,2}), Def(.l={2})) + +Insn(Itest, "\ttest%t %x,%r\n", Use(.l={1,2}), Def(.l={})) +Insn(Icmp, "\tcmp%t %x,%r\n", Use(.l={1,2}), Def(.l={})) + +Insn(Ipush, "\tpush%t %r\n", Use(.l={1}), Def()) +Insn(Ipop, "\tpop%t %r\n", Use(.l={1}), Def()) + +/* branch instructions */ +Insn(Isetz, "\tsetz %v\n", Use(), Def(.l={1})) +Insn(Isetnz, "\tsetnz %v\n", Use(), Def(.l={1})) +Insn(Isetl, "\tsetl %v\n", Use(), Def(.l={1})) +Insn(Isetle, "\tsetle %v\n", Use(), Def(.l={1})) +Insn(Isetg, "\tsetg %v\n", Use(), Def(.l={1})) +Insn(Isetge, "\tsetge %v\n", Use(), Def(.l={1})) +Insn(Isetb, "\tsetb %v\n", Use(), Def(.l={1})) +Insn(Isetbe, "\tsetbe %v\n", Use(), Def(.l={1})) +Insn(Iseta, "\tseta %v\n", Use(), Def(.l={1})) +Insn(Isetae, "\tsetae %v\n", Use(), Def(.l={1})) + +/* fp specific instructions */ +Insn(Imovs, "\tmovs%1t %x,%x\n", Use(.l={1}), Def(.l={2})) +Insn(Icvttsd2si, "\tcvttsd2si%2t %x,%r\n", Use(.l={1}), Def(.l={2})) +Insn(Icvttsi2sd, "\tcvttsi2sd%2t %x,%f\n", Use(.l={1}), Def(.l={2})) +Insn(Iadds, "\tadds%t %x,%f\n", Use(.l={1,2}), Def(.l={2})) +Insn(Isubs, "\tsubs%t %x,%f\n", Use(.l={1,2}), Def(.l={2})) +Insn(Imuls, "\tmuls%t %x,%f\n", Use(.l={1,2}), Def(.l={2})) +Insn(Idivs, "\tdivs%t %x,%f\n", Use(.l={1,2}), Def(.l={2})) +Insn(Icomis, "\tcomis%t %x,%f\n", Use(.l={1,2}), Def()) +Insn(Ixorp, "\tmuls%t %x,%f\n", Use(.l={1,2}), Def(.l={2})) + +/* branch instructions */ +Insn(Icall, "\tcall %v\n", Use(.l={1}), Def(.r={Rrax,Reax,Rax,Ral,Rah})) +Insn(Icallind, "\tcall *%v\n", Use(.l={1}), Def(.r={Rrax,Reax,Rax,Ral,Rah})) +Insn(Ijmp, "\tjmp %v\n", Use(.l={1}), Def()) +Insn(Ijz, "\tjz %v\n", Use(.l={1}), Def()) +Insn(Ijnz, "\tjnz %v\n", Use(.l={1}), Def()) +Insn(Ijl, "\tjl %v\n", Use(.l={1}), Def()) +Insn(Ijle, "\tjle %v\n", Use(.l={1}), Def()) +Insn(Ijg, "\tjg %v\n", Use(.l={1}), Def()) +Insn(Ijge, "\tjge %v\n", Use(.l={1}), Def()) +Insn(Ijb, "\tjb %v\n", Use(.l={1}), Def()) +Insn(Ijbe, "\tjbe %v\n", Use(.l={1}), Def()) +Insn(Ija, "\tja %v\n", Use(.l={1}), Def()) +Insn(Ijae, "\tjae %v\n", Use(.l={1}), Def()) +Insn(Iret, "\tret\n", Use(), Def()) + +/* not really an insn... */ +Insn(Ilbl, "%v:\n", Use(), Def()) diff --git a/6/isel.c b/6/isel.c new file mode 100644 index 0000000..6e3f323 --- /dev/null +++ b/6/isel.c @@ -0,0 +1,1221 @@ +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <stdarg.h> +#include <inttypes.h> +#include <ctype.h> +#include <string.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "parse.h" +#include "opt.h" +#include "asm.h" + +#include "platform.h" + +/* string tables */ +char *insnfmts[] = { +#define Insn(val, fmt, use, def) fmt, +#include "insns.def" +#undef Insn +}; + +char* modenames[] = { + [ModeB] = "b", + [ModeW] = "w", + [ModeL] = "l", + [ModeQ] = "q", + [ModeF] = "s", + [ModeD] = "d" +}; + +/* forward decls */ +Loc *selexpr(Isel *s, Node *n); +static size_t writeblob(FILE *fd, Htab *globls, Htab *strtab, Node *blob); + +/* used to decide which operator is appropriate + * for implementing various conditional operators */ +struct { + AsmOp test; + AsmOp jmp; + AsmOp getflag; +} reloptab[Numops] = { + [Olnot] = {Itest, Ijz, Isetz}, /* lnot invalid for floats */ + /* signed int */ + [Oeq] = {Icmp, Ijz, Isetz}, + [One] = {Icmp, Ijnz, Isetnz}, + [Ogt] = {Icmp, Ijg, Isetg}, + [Oge] = {Icmp, Ijge, Isetge}, + [Olt] = {Icmp, Ijl, Isetl}, + [Ole] = {Icmp, Ijle, Isetle}, + /* unsigned int */ + [Oueq] = {Icmp, Ijz, Isetz}, + [Oune] = {Icmp, Ijnz, Isetnz}, + [Ougt] = {Icmp, Ija, Iseta}, + [Ouge] = {Icmp, Ijae, Isetae}, + [Oult] = {Icmp, Ijb, Isetb}, + [Oule] = {Icmp, Ijbe, Isetbe}, + /* float */ + [Ofeq] = {Icomis, Ijz, Isetz}, + [Ofne] = {Icomis, Ijnz, Isetnz}, + [Ofgt] = {Icomis, Ija, Iseta}, + [Ofge] = {Icomis, Ijae, Isetae}, + [Oflt] = {Icomis, Ijb, Isetb}, + [Ofle] = {Icomis, Ijbe, Isetbe}, +}; + +static Mode mode(Node *n) +{ + Type *t; + + t = tybase(exprtype(n)); + /* FIXME: What should the mode for, say, structs be when we have no + * intention of loading /through/ the pointer? For now, we'll just say it's + * the pointer mode, since we expect to address through the pointer */ + switch (t->type) { + case Tyfloat32: return ModeF; break; + case Tyfloat64: return ModeD; break; + default: + if (stacktype(t)) + return ModeQ; + switch (size(n)) { + case 1: return ModeB; break; + case 2: return ModeW; break; + case 4: return ModeL; break; + case 8: return ModeQ; break; + } + break; + } + return ModeQ; +} + +static int isintmode(Mode m) +{ + return m == ModeB || m == ModeW || m == ModeL || m == ModeQ; +} + +static int isfloatmode(Mode m) +{ + return m == ModeF || m == ModeD; +} + +static Loc *loc(Isel *s, Node *n) +{ + ssize_t stkoff; + Loc *l, *rip; + Node *v; + + switch (exprop(n)) { + case Ovar: + if (hthas(s->globls, n)) { + rip = locphysreg(Rrip); + l = locmeml(htget(s->globls, n), rip, NULL, mode(n)); + } else if (hthas(s->stkoff, n)) { + stkoff = ptoi(htget(s->stkoff, n)); + l = locmem(-stkoff, locphysreg(Rrbp), NULL, mode(n)); + } else { + if (!hthas(s->reglocs, n)) + htput(s->reglocs, n, locreg(mode(n))); + return htget(s->reglocs, n); + } + break; + case Olit: + v = n->expr.args[0]; + switch (v->lit.littype) { + case Lchr: l = loclit(v->lit.chrval, mode(n)); break; + case Lbool: l = loclit(v->lit.boolval, mode(n)); break; + case Lint: l = loclit(v->lit.intval, mode(n)); break; + default: + die("Literal type %s should be blob", litstr(v->lit.littype)); + } + break; + default: + die("Node %s not leaf in loc()", opstr(exprop(n))); + break; + } + return l; +} + +static Insn *mkinsnv(AsmOp op, va_list ap) +{ + Loc *l; + Insn *i; + int n; + + n = 0; + i = malloc(sizeof(Insn)); + i->op = op; + while ((l = va_arg(ap, Loc*)) != NULL) + i->args[n++] = l; + i->nargs = n; + return i; +} + +Insn *mkinsn(AsmOp op, ...) +{ + va_list ap; + Insn *i; + + va_start(ap, op); + i = mkinsnv(op, ap); + va_end(ap); + return i; +} + +void g(Isel *s, AsmOp op, ...) +{ + va_list ap; + Insn *i; + + va_start(ap, op); + i = mkinsnv(op, ap); + va_end(ap); + if (debugopt['i']) { + printf("GEN "); + iprintf(stdout, i); + } + lappend(&s->curbb->il, &s->curbb->ni, i); +} + +static void movz(Isel *s, Loc *src, Loc *dst) +{ + if (src->mode == dst->mode) + g(s, Imov, src, dst, NULL); + else + g(s, Imovzx, src, dst, NULL); +} + +static void load(Isel *s, Loc *a, Loc *b) +{ + Loc *l; + + assert(b->type == Locreg); + if (a->type == Locreg) + l = locmem(0, b, Rnone, a->mode); + else + l = a; + if (isfloatmode(b->mode)) + g(s, Imovs, l, b, NULL); + else + g(s, Imov, l, b, NULL); +} + +static void stor(Isel *s, Loc *a, Loc *b) +{ + Loc *l; + + assert(a->type == Locreg || a->type == Loclit); + if (b->type == Locreg) + l = locmem(0, b, Rnone, b->mode); + else + l = b; + if (isfloatmode(b->mode)) + g(s, Imovs, a, l, NULL); + else + g(s, Imov, a, l, NULL); +} + +/* ensures that a location is within a reg */ +static Loc *newr(Isel *s, Loc *a) +{ + Loc *r; + + r = locreg(a->mode); + if (a->type == Locreg) { + if (isfloatmode(a->mode)) + g(s, Imovs, a, r, NULL); + else + g(s, Imov, a, r, NULL); + } else { + load(s, a, r); + } + return r; +} + +static Loc *inr(Isel *s, Loc *a) +{ + if (a->type == Locreg) + return a; + return newr(s, a); +} + +/* ensures that a location is within a reg or an imm */ +static Loc *inri(Isel *s, Loc *a) +{ + if (a->type == Locreg || a->type == Loclit) + return a; + else + return newr(s, a); +} + +/* If we're testing equality, etc, it's a bit silly + * to generate the test, store it to a bite, expand it + * to the right width, and then test it again. Try to optimize + * for these common cases. + * + * if we're doing the optimization to avoid + * multiple tests, we want to eval the children + * of the first arg, instead of the first arg + * directly */ +static void selcjmp(Isel *s, Node *n, Node **args) +{ + Loc *a, *b; + Loc *l1, *l2; + AsmOp cond, jmp; + + cond = reloptab[exprop(args[0])].test; + jmp = reloptab[exprop(args[0])].jmp; + /* if we have a cond, we're knocking off the redundant test, + * and want to eval the children */ + if (cond) { + a = selexpr(s, args[0]->expr.args[0]); + if (args[0]->expr.nargs == 2) + b = selexpr(s, args[0]->expr.args[1]); + else + b = a; + a = newr(s, a); + } else { + cond = Itest; + jmp = Ijnz; + b = newr(s, selexpr(s, args[0])); /* cond */ + a = b; + } + + /* the jump targets will always be evaluated the same way */ + l1 = loclbl(args[1]); /* if true */ + l2 = loclbl(args[2]); /* if false */ + + g(s, cond, b, a, NULL); + g(s, jmp, l1, NULL); + g(s, Ijmp, l2, NULL); +} + +static Loc *binop(Isel *s, AsmOp op, Node *x, Node *y) +{ + Loc *a, *b; + + a = selexpr(s, x); + b = selexpr(s, y); + a = newr(s, a); + g(s, op, b, a, NULL); + return a; +} + +/* We have a few common cases to optimize here: + * Oaddr(expr) + * or: + * Oadd( + * reg, + * reg||const) + * + * or: + * Oadd( + * reg, + * Omul(reg, + * 2 || 4 || 8))) + */ +static int ismergablemul(Node *n, int *r) +{ + int v; + + if (exprop(n) != Omul) + return 0; + if (exprop(n->expr.args[1]) != Olit) + return 0; + if (n->expr.args[1]->expr.args[0]->type != Nlit) + return 0; + if (n->expr.args[1]->expr.args[0]->lit.littype != Lint) + return 0; + v = n->expr.args[1]->expr.args[0]->lit.intval; + if (v != 2 && v != 4 && v != 8) + return 0; + *r = v; + return 1; +} + +static Loc *memloc(Isel *s, Node *e, Mode m) +{ + Node **args; + Loc *l, *b, *o; /* location, base, offset */ + int scale; + + scale = 1; + l = NULL; + args = e->expr.args; + if (exprop(e) == Oadd) { + b = selexpr(s, args[0]); + if (ismergablemul(args[1], &scale)) + o = selexpr(s, args[1]->expr.args[0]); + else + o = selexpr(s, args[1]); + + if (b->type != Locreg) + b = inr(s, b); + if (o->type == Loclit) { + l = locmem(scale*o->lit, b, Rnone, m); + } else { + b = inr(s, b); + o = inr(s, o); + l = locmems(0, b, o, scale, m); + } + } else { + l = selexpr(s, e); + l = inr(s, l); + l = locmem(0, l, Rnone, m); + } + assert(l != NULL); + return l; +} + +static void blit(Isel *s, Loc *to, Loc *from, size_t dstoff, size_t srcoff, size_t sz) +{ + Loc *sp, *dp, *len; /* pointers to src, dst */ + + len = loclit(sz, ModeQ); + sp = newr(s, from); + dp = newr(s, to); + + /* length to blit */ + g(s, Imov, len, locphysreg(Rrcx), NULL); + /* source address with offset */ + if (srcoff) + g(s, Ilea, locmem(srcoff, sp, NULL, ModeQ), locphysreg(Rrsi), NULL); + else + g(s, Imov, sp, locphysreg(Rrsi), NULL); + /* dest address with offset */ + if (dstoff) + g(s, Ilea, locmem(dstoff, dp, NULL, ModeQ), locphysreg(Rrdi), NULL); + else + g(s, Imov, dp, locphysreg(Rrdi), NULL); + g(s, Irepmovsb, NULL); +} + +static int isfunc(Isel *s, Node *n) +{ + Node *d; + + if (exprop(n) != Ovar) + return 0; + if (!hthas(s->globls, n)) + return 0; + d = decls[n->expr.did]; + if (d && d->decl.isconst) + return tybase(decltype(d))->type == Tyfunc; + return 0; +} + +static void call(Isel *s, Node *n) +{ + AsmOp op; + Loc *f; + + if (isfunc(s, n)) { + op = Icall; + f = locmeml(htget(s->globls, n), NULL, NULL, mode(n)); + } else { + op = Icallind; + f = selexpr(s, n); + } + g(s, op, f, NULL); +} + +static Loc *gencall(Isel *s, Node *n) +{ + Loc *src, *dst, *arg; /* values we reduced */ + Loc *retloc, *rsp, *ret; /* hard-coded registers */ + Loc *stkbump; /* calculated stack offset */ + int argsz, argoff; + size_t i; + + rsp = locphysreg(Rrsp); + if (tybase(exprtype(n))->type == Tyvoid) { + retloc = NULL; + ret = NULL; + } else if (stacktype(exprtype(n))) { + retloc = locphysreg(Rrax); + ret = locreg(ModeQ); + } else if (floattype(exprtype(n))) { + retloc = coreg(Rxmm0d, mode(n)); + ret = locreg(mode(n)); + } else { + retloc = coreg(Rrax, mode(n)); + ret = locreg(mode(n)); + } + argsz = 0; + /* Have to calculate the amount to bump the stack + * pointer by in one pass first, otherwise if we push + * one at a time, we evaluate the args in reverse order. + * Not good. + * + * Skip the first operand, since it's the function itself */ + for (i = 1; i < n->expr.nargs; i++) { + argsz = align(argsz, min(size(n->expr.args[i]), Ptrsz)); + argsz += size(n->expr.args[i]); + } + argsz = align(argsz, 16); + stkbump = loclit(argsz, ModeQ); + if (argsz) + g(s, Isub, stkbump, rsp, NULL); + + /* Now, we can evaluate the arguments */ + argoff = 0; + for (i = 1; i < n->expr.nargs; i++) { + arg = selexpr(s, n->expr.args[i]); + argoff = align(argoff, min(size(n->expr.args[i]), Ptrsz)); + if (stacknode(n->expr.args[i])) { + src = locreg(ModeQ); + g(s, Ilea, arg, src, NULL); + blit(s, rsp, src, argoff, 0, size(n->expr.args[i])); + } else { + dst = locmem(argoff, rsp, NULL, arg->mode); + arg = inri(s, arg); + stor(s, arg, dst); + } + argoff += size(n->expr.args[i]); + } + call(s, n->expr.args[0]); + if (argsz) + g(s, Iadd, stkbump, rsp, NULL); + if (retloc) { + if (isfloatmode(retloc->mode)) + g(s, Imovs, retloc, ret, NULL); + else + g(s, Imov, retloc, ret, NULL); + } + return ret; +} + +Loc *selexpr(Isel *s, Node *n) +{ + Loc *a, *b, *c, *d, *r; + Loc *eax, *edx, *cl; /* x86 wants some hard-coded regs */ + Node **args; + + args = n->expr.args; + eax = locphysreg(Reax); + edx = locphysreg(Redx); + cl = locphysreg(Rcl); + r = NULL; + switch (exprop(n)) { + case Oadd: r = binop(s, Iadd, args[0], args[1]); break; + case Osub: r = binop(s, Isub, args[0], args[1]); break; + case Obor: r = binop(s, Ior, args[0], args[1]); break; + case Oband: r = binop(s, Iand, args[0], args[1]); break; + case Obxor: r = binop(s, Ixor, args[0], args[1]); break; + case Omul: + if (size(args[0]) == 1) { + a = selexpr(s, args[0]); + b = selexpr(s, args[1]); + + c = locphysreg(Ral); + r = locreg(a->mode); + g(s, Imov, a, c, NULL); + g(s, Iimul_r, b, NULL); + g(s, Imov, c, r, NULL); + } else { + r = binop(s, Iimul, args[0], args[1]); + } + break; + case Odiv: + case Omod: + /* these get clobbered by the div insn */ + a = selexpr(s, args[0]); + b = selexpr(s, args[1]); + b = newr(s, b); + c = coreg(Reax, mode(n)); + r = locreg(a->mode); + if (r->mode == ModeB) + g(s, Ixor, eax, eax, NULL); + else + g(s, Ixor, edx, edx, NULL); + g(s, Imov, a, c, NULL); + g(s, Idiv, b, NULL); + if (exprop(n) == Odiv) + d = coreg(Reax, mode(n)); + else if (r->mode != ModeB) + d = coreg(Redx, mode(n)); + else + d = locphysreg(Rah); + g(s, Imov, d, r, NULL); + break; + case Oneg: + r = selexpr(s, args[0]); + r = newr(s, r); + g(s, Ineg, r, NULL); + break; + + /* fp expressions */ + case Ofadd: r = binop(s, Iadds, args[0], args[1]); break; + case Ofsub: r = binop(s, Isubs, args[0], args[1]); break; + case Ofmul: r = binop(s, Imuls, args[0], args[1]); break; + case Ofdiv: r = binop(s, Idivs, args[0], args[1]); break; + case Ofneg: + r = selexpr(s, args[0]); + r = newr(s, r); + a = NULL; + b = NULL; + if (mode(args[0]) == ModeF) { + a = locreg(ModeF); + b = loclit(1LL << (31), ModeF); + g(s, Imovs, r, a); + } else if (mode(args[0]) == ModeD) { + a = locreg(ModeQ); + b = loclit(1LL << 63, ModeQ); + g(s, Imov, r, a, NULL); + } + g(s, Ixor, b, a, NULL); + g(s, Imov, a, r, NULL); + break; + case Obsl: + case Obsr: + a = newr(s, selexpr(s, args[0])); + b = selexpr(s, args[1]); + if (b->type == Loclit) { + d = b; + } else { + c = coreg(Rcl, b->mode); + g(s, Imov, b, c, NULL); + d = cl; + } + if (exprop(n) == Obsr) { + if (istysigned(n->expr.type)) + g(s, Isar, d, a, NULL); + else + g(s, Ishr, d, a, NULL); + } else { + g(s, Ishl, d, a, NULL); + } + r = a; + break; + case Obnot: + r = selexpr(s, args[0]); + r = newr(s, r); + g(s, Inot, r, NULL); + break; + + case Oderef: + r = memloc(s, args[0], mode(n)); + break; + + case Oaddr: + a = selexpr(s, args[0]); + if (a->type == Loclbl || (a->type == Locmeml && !a->mem.base)) { + r = loclitl(a->lbl); + } else { + r = locreg(ModeQ); + g(s, Ilea, a, r, NULL); + } + break; + + case Olnot: + a = newr(s, selexpr(s, args[0])); + b = locreg(ModeB); + r = locreg(mode(n)); + /* lnot only valid for integer-like values */ + g(s, reloptab[exprop(n)].test, a, a, NULL); + g(s, reloptab[exprop(n)].getflag, b, NULL); + movz(s, b, r); + break; + + case Oeq: case One: case Ogt: case Oge: case Olt: case Ole: + case Ofeq: case Ofne: case Ofgt: case Ofge: case Oflt: case Ofle: + case Oueq: case Oune: case Ougt: case Ouge: case Oult: case Oule: + a = selexpr(s, args[0]); + b = selexpr(s, args[1]); + a = newr(s, a); + c = locreg(ModeB); + r = locreg(mode(n)); + g(s, reloptab[exprop(n)].test, b, a, NULL); + g(s, reloptab[exprop(n)].getflag, c, NULL); + movz(s, c, r); + return r; + + case Oasn: /* relabel */ + die("Unimplemented op %s", opstr(exprop(n))); + break; + case Oset: + assert(exprop(args[0]) == Ovar || exprop(args[0]) == Oderef); + b = selexpr(s, args[1]); + if (exprop(args[0]) == Oderef) + a = memloc(s, args[0]->expr.args[0], mode(n)); + else + a = selexpr(s, args[0]); + b = inri(s, b); + if (isfloatmode(b->mode)) + g(s, Imovs, b, a, NULL); + else + g(s, Imov, b, a, NULL); + r = b; + break; + case Ocall: + r = gencall(s, n); + break; + case Ojmp: + g(s, Ijmp, a = loclbl(args[0]), NULL); + break; + case Ocjmp: + selcjmp(s, n, args); + break; + + case Olit: /* fall through */ + r = loc(s, n); + break; + case Ovar: + if (isfunc(s, n)) { + r = locreg(ModeQ); + a = loc(s, n); + g(s, Ilea, a, r, NULL); + } else { + r = loc(s, n); + } + break; + case Olbl: + r = loclbl(args[0]); + break; + case Oblit: + a = selexpr(s, args[0]); + r = selexpr(s, args[1]); + blit(s, a, r, 0, 0, args[2]->expr.args[0]->lit.intval); + break; + case Otrunc: + a = selexpr(s, args[0]); + a = inr(s, a); + r = locreg(mode(n)); + g(s, Imov, a, r, NULL); + break; + case Ozwiden: + a = selexpr(s, args[0]); + a = inr(s, a); + r = locreg(mode(n)); + movz(s, a, r); + break; + case Oswiden: + a = selexpr(s, args[0]); + a = inr(s, a); + r = locreg(mode(n)); + g(s, Imovsx, a, r, NULL); + break; + case Oint2flt: + a = selexpr(s, args[0]); + b = locreg(ModeQ); + r = locreg(mode(n)); + g(s, Imovs, a, b, NULL); + g(s, Icvttsi2sd, b, r, NULL); + break; + case Oflt2int: + a = selexpr(s, args[0]); + b = locreg(ModeQ); + r = locreg(mode(n)); + g(s, Icvttsd2si, a, b, NULL); + g(s, Imov, b, r, NULL); + break; + + /* These operators should never show up in the reduced trees, + * since they should have been replaced with more primitive + * expressions by now */ + case Obad: case Oret: case Opreinc: case Opostinc: case Opredec: + case Opostdec: case Olor: case Oland: case Oaddeq: + case Osubeq: case Omuleq: case Odiveq: case Omodeq: case Oboreq: + case Obandeq: case Obxoreq: case Obsleq: case Obsreq: case Omemb: + case Oslice: case Oidx: case Osize: case Numops: + case Oucon: case Ouget: case Otup: case Oarr: case Ostruct: + case Oslbase: case Osllen: case Ocast: + case Obreak: case Ocontinue: + dump(n, stdout); + die("Should not see %s in isel", opstr(exprop(n))); + break; + } + return r; +} + +void locprint(FILE *fd, Loc *l, char spec) +{ + assert(l->mode); + switch (l->type) { + case Loclitl: + assert(spec == 'i' || spec == 'x' || spec == 'u'); + fprintf(fd, "$%s", l->lbl); + break; + case Loclbl: + assert(spec == 'm' || spec == 'v' || spec == 'x'); + fprintf(fd, "%s", l->lbl); + break; + case Locreg: + assert((spec == 'r' && isintmode(l->mode)) || + (spec == 'f' && isfloatmode(l->mode)) || + spec == 'v' || + spec == 'x' || + spec == 'u'); + if (l->reg.colour == Rnone) + fprintf(fd, "%%P.%zd", l->reg.id); + else + fprintf(fd, "%s", regnames[l->reg.colour]); + break; + case Locmem: + case Locmeml: + assert(spec == 'm' || spec == 'v' || spec == 'x'); + if (l->type == Locmem) { + if (l->mem.constdisp) + fprintf(fd, "%ld", l->mem.constdisp); + } else { + if (l->mem.lbldisp) + fprintf(fd, "%s", l->mem.lbldisp); + } + if (l->mem.base) { + fprintf(fd, "("); + locprint(fd, l->mem.base, 'r'); + if (l->mem.idx) { + fprintf(fd, ","); + locprint(fd, l->mem.idx, 'r'); + } + if (l->mem.scale > 1) + fprintf(fd, ",%d", l->mem.scale); + if (l->mem.base) + fprintf(fd, ")"); + } else if (l->type != Locmeml) { + die("Only locmeml can have unspecified base reg"); + } + break; + case Loclit: + assert(spec == 'i' || spec == 'x' || spec == 'u'); + fprintf(fd, "$%ld", l->lit); + break; + case Locnone: + die("Bad location in locprint()"); + break; + } +} + +int subreg(Loc *a, Loc *b) +{ + return rclass(a) == rclass(b) && a->mode != b->mode; +} + +void iprintf(FILE *fd, Insn *insn) +{ + char *p; + int i; + int modeidx; + + /* x64 has a quirk; it has no movzlq because mov zero extends. This + * means that we need to do a movl when we really want a movzlq. Since + * we don't know the name of the reg to use, we need to sub it in when + * writing... */ + switch (insn->op) { + case Imovzx: + if (insn->args[0]->mode == ModeL && insn->args[1]->mode == ModeQ) { + if (insn->args[1]->reg.colour) { + insn->op = Imov; + insn->args[1] = coreg(insn->args[1]->reg.colour, ModeL); + } + } + break; + case Imovs: + /* moving a reg to itself is dumb. */ + if (insn->args[0]->reg.colour == insn->args[1]->reg.colour) + return; + break; + case Imov: + assert(!isfloatmode(insn->args[1]->mode)); + if (insn->args[0]->type != Locreg || insn->args[1]->type != Locreg) + break; + if (insn->args[0]->reg.colour == Rnone || insn->args[1]->reg.colour == Rnone) + break; + /* if one reg is a subreg of another, we can just use the right + * mode to move between them. */ + if (subreg(insn->args[0], insn->args[1])) + insn->args[0] = coreg(insn->args[0]->reg.colour, insn->args[1]->mode); + /* moving a reg to itself is dumb. */ + if (insn->args[0]->reg.colour == insn->args[1]->reg.colour) + return; + break; + default: + break; + } + p = insnfmts[insn->op]; + i = 0; + modeidx = 0; + for (; *p; p++) { + if (*p != '%') { + fputc(*p, fd); + continue; + } + + /* %-formating */ + p++; + switch (*p) { + case '\0': + goto done; /* skip the final p++ */ + case 'r': /* int register */ + case 'f': /* float register */ + case 'm': /* memory */ + case 'i': /* imm */ + case 'v': /* reg/mem */ + case 'u': /* reg/imm */ + case 'x': /* reg/mem/imm */ + locprint(fd, insn->args[i], *p); + i++; + break; + case 't': + default: + /* the asm description uses 1-based indexing, so that 0 + * can be used as a sentinel. */ + if (isdigit(*p)) + modeidx = strtol(p, &p, 10) - 1; + + if (*p == 't') + fputs(modenames[insn->args[modeidx]->mode], fd); + else + die("Invalid %%-specifier '%c'", *p); + break; + } + } +done: + return; +} + +static void isel(Isel *s, Node *n) +{ + switch (n->type) { + case Nexpr: + selexpr(s, n); + break; + case Ndecl: + break; + default: + die("Bad node type in isel()"); + break; + } +} + +Reg savedregs[] = { + Rrcx, Rrdx, Rrbx, Rrsi, Rrdi, Rr8, Rr9, Rr10, Rr11, Rr12, Rr13, Rr14, Rr15, + /* + Rxmm0d, Rxmm1d, Rxmm2d, Rxmm3d, Rxmm4d, Rxmm5d, Rxmm6d, Rxmm7d, + Rxmm8d, Rxmm9d, Rxmm10d, Rxmm11d, Rxmm12d, Rxmm13d, Rxmm14d, Rxmm15d, + */ +}; + +static void prologue(Isel *s, size_t sz) +{ + Loc *rsp; + Loc *rbp; + Loc *stksz; + size_t i; + + rsp = locphysreg(Rrsp); + rbp = locphysreg(Rrbp); + stksz = loclit(sz, ModeQ); + /* enter function */ + g(s, Ipush, rbp, NULL); + g(s, Imov, rsp, rbp, NULL); + g(s, Isub, stksz, rsp, NULL); + /* save registers */ + for (i = 0; i < sizeof(savedregs)/sizeof(savedregs[0]); i++) { + s->calleesave[i] = locreg(ModeQ); + g(s, Imov, locphysreg(savedregs[i]), s->calleesave[i], NULL); + } + s->stksz = stksz; /* need to update if we spill */ +} + +static void epilogue(Isel *s) +{ + Loc *rsp, *rbp; + Loc *ret; + size_t i; + + rsp = locphysreg(Rrsp); + rbp = locphysreg(Rrbp); + if (s->ret) { + ret = loc(s, s->ret); + if (floattype(exprtype(s->ret))) + g(s, Imovs, ret, coreg(Rxmm0d, ret->mode), NULL); + else + g(s, Imov, ret, coreg(Rax, ret->mode), NULL); + } + /* restore registers */ + for (i = 0; i < Nsaved; i++) + g(s, Imov, s->calleesave[i], locphysreg(savedregs[i]), NULL); + /* leave function */ + g(s, Imov, rbp, rsp, NULL); + g(s, Ipop, rbp, NULL); + g(s, Iret, NULL); +} + +static void writeasm(FILE *fd, Isel *s, Func *fn) +{ + size_t i, j; + + if (fn->isexport || !strcmp(fn->name, Symprefix "main")) + fprintf(fd, ".globl %s\n", fn->name); + fprintf(fd, "%s:\n", fn->name); + for (j = 0; j < s->cfg->nbb; j++) { + for (i = 0; i < s->bb[j]->nlbls; i++) + fprintf(fd, "%s:\n", s->bb[j]->lbls[i]); + for (i = 0; i < s->bb[j]->ni; i++) + iprintf(fd, s->bb[j]->il[i]); + } +} + +static Asmbb *mkasmbb(Bb *bb) +{ + Asmbb *as; + + as = zalloc(sizeof(Asmbb)); + as->id = bb->id; + as->pred = bsdup(bb->pred); + as->succ = bsdup(bb->succ); + as->lbls = memdup(bb->lbls, bb->nlbls*sizeof(char*)); + as->nlbls = bb->nlbls; + return as; +} + +static void writebytes(FILE *fd, char *p, size_t sz) +{ + size_t i; + + for (i = 0; i < sz; i++) { + if (i % 60 == 0) + fprintf(fd, "\t.ascii \""); + if (p[i] == '"' || p[i] == '\\') + fprintf(fd, "\\"); + if (isprint(p[i])) + fprintf(fd, "%c", p[i]); + else + fprintf(fd, "\\%03o", (uint8_t)p[i] & 0xff); + /* line wrapping for readability */ + if (i % 60 == 59 || i == sz - 1) + fprintf(fd, "\"\n"); + } +} + +static size_t writelit(FILE *fd, Htab *strtab, Node *v, Type *ty) +{ + char buf[128]; + char *lbl; + size_t sz; + char *intsz[] = { + [1] = ".byte", + [2] = ".short", + [4] = ".long", + [8] = ".quad" + }; + union { + float fv; + double dv; + uint64_t qv; + uint32_t lv; + } u; + + assert(v->type == Nlit); + sz = tysize(ty); + switch (v->lit.littype) { + case Lint: fprintf(fd, "\t%s %lld\n", intsz[sz], v->lit.intval); break; + case Lbool: fprintf(fd, "\t.byte %d\n", v->lit.boolval); break; + case Lchr: fprintf(fd, "\t.long %d\n", v->lit.chrval); break; + case Lflt: + if (tybase(v->lit.type)->type == Tyfloat32) { + u.fv = v->lit.fltval; + fprintf(fd, "\t.long 0x%"PRIx32"\n", u.lv); + } else if (tybase(v->lit.type)->type == Tyfloat64) { + u.dv = v->lit.fltval; + fprintf(fd, "\t.quad 0x%"PRIx64"\n", u.qv); + } + break; + case Lstr: + if (hthas(strtab, v->lit.strval)) { + lbl = htget(strtab, v->lit.strval); + } else { + lbl = genlblstr(buf, sizeof buf); + htput(strtab, v->lit.strval, strdup(lbl)); + } + fprintf(fd, "\t.quad %s\n", lbl); + fprintf(fd, "\t.quad %zd\n", strlen(v->lit.strval)); + break; + case Lfunc: + die("Generating this shit ain't ready yet "); + break; + case Llbl: + die("Can't generate literal labels, ffs. They're not data."); + break; + } + return sz; +} + +static size_t writepad(FILE *fd, size_t sz) +{ + assert((ssize_t)sz >= 0); + if (sz > 0) + fprintf(fd, "\t.fill %zd,1,0\n", sz); + return sz; +} + +static size_t getintlit(Node *n, char *failmsg) +{ + if (exprop(n) != Olit) + fatal(n->line, "%s", failmsg); + n = n->expr.args[0]; + if (n->lit.littype != Lint) + fatal(n->line, "%s", failmsg); + return n->lit.intval; +} + +static size_t writeslice(FILE *fd, Htab *globls, Htab *strtab, Node *n) +{ + Node *base, *lo, *hi; + ssize_t loval, hival, sz; + char *lbl; + + base = n->expr.args[0]; + lo = n->expr.args[1]; + hi = n->expr.args[2]; + + /* by this point, all slicing operations should have had their bases + * pulled out, and we should have vars with their pseudo-decls in their + * place */ + if (exprop(base) != Ovar || !base->expr.isconst) + fatal(base->line, "slice base is not a constant value"); + loval = getintlit(lo, "lower bound in slice is not constant literal"); + hival = getintlit(hi, "upper bound in slice is not constant literal"); + sz = tysize(tybase(exprtype(base))->sub[0]); + + lbl = htget(globls, base); + fprintf(fd, "\t.quad %s + (%zd*%zd)\n", lbl, loval, sz); + fprintf(fd, "\t.quad %zd\n", (hival - loval)); + return size(n); +} + +static size_t writestruct(FILE *fd, Htab *globls, Htab *strtab, Node *n) +{ + Type *t; + Node **dcl; + int found; + size_t i, j; + size_t sz, pad, end; + size_t ndcl; + + sz = 0; + t = tybase(exprtype(n)); + assert(t->type == Tystruct); + dcl = t->sdecls; + ndcl = t->nmemb; + for (i = 0; i < ndcl; i++) { + pad = alignto(sz, decltype(dcl[i])); + sz += writepad(fd, pad - sz); + found = 0; + for (j = 0; j < n->expr.nargs; j++) + if (!strcmp(namestr(n->expr.args[j]->expr.idx), declname(dcl[i]))) { + found = 1; + sz += writeblob(fd, globls, strtab, n->expr.args[j]); + } + if (!found) + sz += writepad(fd, size(dcl[i])); + } + end = alignto(sz, t); + sz += writepad(fd, end - sz); + return sz; +} + +static size_t writeblob(FILE *fd, Htab *globls, Htab *strtab, Node *n) +{ + size_t i, sz; + + switch(exprop(n)) { + case Otup: + case Oarr: + sz = 0; + for (i = 0; i < n->expr.nargs; i++) + sz += writeblob(fd, globls, strtab, n->expr.args[i]); + break; + case Ostruct: + sz = writestruct(fd, globls, strtab, n); + break; + case Olit: + sz = writelit(fd, strtab, n->expr.args[0], exprtype(n)); + break; + case Oslice: + sz = writeslice(fd, globls, strtab, n); + break; + default: + dump(n, stdout); + die("Nonliteral initializer for global"); + break; + } + return sz; +} + +void genblob(FILE *fd, Node *blob, Htab *globls, Htab *strtab) +{ + char *lbl; + + /* lits and such also get wrapped in decls */ + assert(blob->type == Ndecl); + + lbl = htget(globls, blob); + if (blob->decl.vis != Visintern) + fprintf(fd, ".globl %s\n", lbl); + fprintf(fd, "%s:\n", lbl); + if (blob->decl.init) + writeblob(fd, globls, strtab, blob->decl.init); + else + writepad(fd, size(blob)); +} + +/* genasm requires all nodes in 'nl' to map cleanly to operations that are + * natively supported, as promised in the output of reduce(). No 64-bit + * operations on x32, no structures, and so on. */ +void genasm(FILE *fd, Func *fn, Htab *globls, Htab *strtab) +{ + Isel is = {0,}; + size_t i, j; + char buf[128]; + + is.reglocs = mkht(varhash, vareq); + is.stkoff = fn->stkoff; + is.globls = globls; + is.ret = fn->ret; + is.cfg = fn->cfg; + /* ensure that all physical registers have a loc created, so we + * don't get any surprises referring to them in the allocator */ + for (i = 0; i < Nreg; i++) + locphysreg(i); + + for (i = 0; i < fn->cfg->nbb; i++) + lappend(&is.bb, &is.nbb, mkasmbb(fn->cfg->bb[i])); + + is.curbb = is.bb[0]; + prologue(&is, fn->stksz); + for (j = 0; j < fn->cfg->nbb - 1; j++) { + is.curbb = is.bb[j]; + for (i = 0; i < fn->cfg->bb[j]->nnl; i++) { + /* put in a comment that says where this line comes from */ + snprintf(buf, sizeof buf, "\n\t# bb = %zd, bbidx = %zd, line=%d", + j, i, fn->cfg->bb[j]->nl[i]->line); + g(&is, Ilbl, locstrlbl(buf), NULL); + isel(&is, fn->cfg->bb[j]->nl[i]); + } + } + is.curbb = is.bb[is.nbb - 1]; + epilogue(&is); + regalloc(&is); + + if (debugopt['i']) + writeasm(stdout, &is, fn); + writeasm(fd, &is, fn); +} + +void genstrings(FILE *fd, Htab *strtab) +{ + void **k; + size_t i, nk; + + k = htkeys(strtab, &nk); + for (i = 0; i < nk; i++) { + fprintf(fd, "%s:\n", (char*)htget(strtab, k[i])); + writebytes(fd, k[i], strlen(k[i])); + } +} diff --git a/6/locs.c b/6/locs.c new file mode 100644 index 0000000..90892cd --- /dev/null +++ b/6/locs.c @@ -0,0 +1,303 @@ +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <stdarg.h> +#include <ctype.h> +#include <string.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "parse.h" +#include "opt.h" +#include "asm.h" + +Mode regmodes[] = { +#define Reg(r, name, mode) mode, +#include "regs.def" +#undef Reg +}; + +char *regnames[] = { +#define Reg(r, name, mode) name, +#include "regs.def" +#undef Reg +}; + +size_t modesize[Nmode] = { + [ModeNone] = 0, + [ModeB] = 1, + [ModeW] = 2, + [ModeL] = 4, + [ModeQ] = 8, + [ModeF] = 4, + [ModeD] = 8, +}; + + +const Reg reginterferes[Nreg][Nmode + 1] = { + /* byte */ + [Ral] = {Ral, Rax, Reax}, + [Rcl] = {Rcl, Rcx, Recx}, + [Rdl] = {Rdl, Rdx, Redx}, + [Rbl] = {Rbl, Rbx, Rebx}, + + /* word */ + [Rax] = {Ral, Rax, Reax}, + [Rcx] = {Rcl, Rcx, Recx}, + [Rdx] = {Rdl, Rdx, Redx}, + [Rbx] = {Rbl, Rbx, Rebx}, + [Rsi] = {Rsi, Resi}, + [Rdi] = {Rdi, Redi}, + + /* dword */ + [Reax] = {Ral, Rax, Reax}, + [Recx] = {Rcl, Rcx, Recx}, + [Redx] = {Rdl, Rdx, Redx}, + [Rebx] = {Rbl, Rbx, Rebx}, + [Resi] = {Rsi, Resi}, + [Redi] = {Rdi, Redi}, + [Resp] = {Resp}, + [Rebp] = {Rebp}, +}; + +char *genlblstr(char *buf, size_t sz) +{ + static int nextlbl; + snprintf(buf, 128, ".L%d", nextlbl++); + return buf; +} + +Node *genlbl(void) +{ + char buf[128]; + + genlblstr(buf, 128); + return mklbl(-1, buf); +} + +Loc *locstrlbl(char *lbl) +{ + Loc *l; + + l = zalloc(sizeof(Loc)); + l->type = Loclbl; + l->mode = ModeQ; + l->lbl = strdup(lbl); + return l; +} + +Loc *loclitl(char *lbl) +{ + Loc *l; + + l = zalloc(sizeof(Loc)); + l->type = Loclitl; + l->mode = ModeQ; + l->lbl = strdup(lbl); + return l; +} + +Loc *loclbl(Node *e) +{ + Node *lbl; + assert(e->type == Nexpr); + lbl = e->expr.args[0]; + assert(lbl->type == Nlit); + assert(lbl->lit.littype = Llbl); + return locstrlbl(lbl->lit.lblval); +} + +Loc **locmap = NULL; +size_t maxregid = 0; + +static Loc *locregid(regid id, Mode m) +{ + Loc *l; + + l = zalloc(sizeof(Loc)); + l->type = Locreg; + l->mode = m; + l->reg.id = id; + locmap = xrealloc(locmap, maxregid * sizeof(Loc*)); + locmap[l->reg.id] = l; + return l; +} + +Loc *locreg(Mode m) +{ + return locregid(maxregid++, m); +} + +Loc *locphysreg(Reg r) +{ + static Loc *physregs[Nreg] = {0,}; + + if (physregs[r]) + return physregs[r]; + physregs[r] = locreg(regmodes[r]); + physregs[r]->reg.colour = r; + return physregs[r]; +} + +Loc *locmem(long disp, Loc *base, Loc *idx, Mode mode) +{ + Loc *l; + + l = zalloc(sizeof(Loc)); + l->type = Locmem; + l->mode = mode; + l->mem.constdisp = disp; + l->mem.base = base; + l->mem.idx = idx; + l->mem.scale = 0; + return l; +} + +Loc *locmems(long disp, Loc *base, Loc *idx, int scale, Mode mode) +{ + Loc *l; + + l = locmem(disp, base, idx, mode); + l->mem.scale = scale; + return l; +} + +Loc *locmeml(char *disp, Loc *base, Loc *idx, Mode mode) +{ + Loc *l; + + l = zalloc(sizeof(Loc)); + l->type = Locmeml; + l->mode = mode; + l->mem.lbldisp = strdup(disp); + l->mem.base = base; + l->mem.idx = idx; + l->mem.scale = 0; + return l; +} + +Loc *locmemls(char *disp, Loc *base, Loc *idx, int scale, Mode mode) +{ + Loc *l; + + l = locmeml(disp, base, idx, mode); + l->mem.scale = scale; + return l; +} + + +Loc *loclit(long val, Mode m) +{ + Loc *l; + + l = zalloc(sizeof(Loc)); + l->type = Loclit; + l->mode = m; + l->lit = val; + return l; +} + +Loc *coreg(Reg r, Mode m) +{ + Reg crtab[][Nmode + 1] = { + [Ral] = {Rnone, Ral, Rax, Reax, Rrax}, + [Rcl] = {Rnone, Rcl, Rcx, Recx, Rrcx}, + [Rdl] = {Rnone, Rdl, Rdx, Redx, Rrdx}, + [Rbl] = {Rnone, Rbl, Rbx, Rebx, Rrbx}, + [Rsil] = {Rnone, Rsil, Rsi, Resi, Rrsi}, + [Rdil] = {Rnone, Rdil, Rdi, Redi, Rrdi}, + [Rr8b] = {Rnone, Rr8b, Rr8w, Rr8d, Rr8}, + [Rr9b] = {Rnone, Rr9b, Rr9w, Rr9d, Rr9}, + [Rr10b] = {Rnone, Rr10b, Rr10w, Rr10d, Rr10}, + [Rr11b] = {Rnone, Rr11b, Rr11w, Rr11d, Rr11}, + [Rr12b] = {Rnone, Rr12b, Rr12w, Rr12d, Rr12}, + [Rr13b] = {Rnone, Rr13b, Rr13w, Rr13d, Rr13}, + [Rr14b] = {Rnone, Rr14b, Rr14w, Rr14d, Rr14}, + [Rr15b] = {Rnone, Rr15b, Rr15w, Rr15d, Rr15}, + + [Rax] = {Rnone, Ral, Rax, Reax, Rrax}, + [Rcx] = {Rnone, Rcl, Rcx, Recx, Rrcx}, + [Rdx] = {Rnone, Rdl, Rdx, Redx, Rrdx}, + [Rbx] = {Rnone, Rbl, Rbx, Rebx, Rrbx}, + [Rsi] = {Rnone, Rsil, Rsi, Resi, Rrsi}, + [Rdi] = {Rnone, Rsil, Rdi, Redi, Rrdi}, + [Rr8w] = {Rnone, Rr8b, Rr8w, Rr8d, Rr8}, + [Rr9w] = {Rnone, Rr9b, Rr9w, Rr9d, Rr9}, + [Rr10w] = {Rnone, Rr10b, Rr10w, Rr10d, Rr10}, + [Rr11w] = {Rnone, Rr11b, Rr11w, Rr11d, Rr11}, + [Rr12w] = {Rnone, Rr12b, Rr12w, Rr12d, Rr12}, + [Rr13w] = {Rnone, Rr13b, Rr13w, Rr13d, Rr13}, + [Rr14w] = {Rnone, Rr14b, Rr14w, Rr14d, Rr14}, + [Rr15w] = {Rnone, Rr15b, Rr15w, Rr15d, Rr15}, + + [Reax] = {Rnone, Ral, Rax, Reax, Rrax}, + [Recx] = {Rnone, Rcl, Rcx, Recx, Rrcx}, + [Redx] = {Rnone, Rdl, Rdx, Redx, Rrdx}, + [Rebx] = {Rnone, Rbl, Rbx, Rebx, Rrbx}, + [Resi] = {Rnone, Rsil, Rsi, Resi, Rrsi}, + [Redi] = {Rnone, Rsil, Rdi, Redi, Rrdi}, + [Rr8d] = {Rnone, Rr8b, Rr8w, Rr8d, Rr8}, + [Rr9d] = {Rnone, Rr9b, Rr9w, Rr9d, Rr9}, + [Rr10d] = {Rnone, Rr10b, Rr10w, Rr10d, Rr10}, + [Rr11d] = {Rnone, Rr11b, Rr11w, Rr11d, Rr11}, + [Rr12d] = {Rnone, Rr12b, Rr12w, Rr12d, Rr12}, + [Rr13d] = {Rnone, Rr13b, Rr13w, Rr13d, Rr13}, + [Rr14d] = {Rnone, Rr14b, Rr14w, Rr14d, Rr14}, + [Rr15d] = {Rnone, Rr15b, Rr15w, Rr15d, Rr15}, + + [Rrax] = {Rnone, Ral, Rax, Reax, Rrax}, + [Rrcx] = {Rnone, Rcl, Rcx, Recx, Rrcx}, + [Rrdx] = {Rnone, Rdl, Rdx, Redx, Rrdx}, + [Rrbx] = {Rnone, Rbl, Rbx, Rebx, Rrbx}, + [Rrsi] = {Rnone, Rsil, Rsi, Resi, Rrsi}, + [Rrdi] = {Rnone, Rsil, Rdi, Redi, Rrdi}, + [Rr8] = {Rnone, Rr8b, Rr8w, Rr8d, Rr8}, + [Rr9] = {Rnone, Rr9b, Rr9w, Rr9d, Rr9}, + [Rr10] = {Rnone, Rr10b, Rr10w, Rr10d, Rr10}, + [Rr11] = {Rnone, Rr11b, Rr11w, Rr11d, Rr11}, + [Rr12] = {Rnone, Rr12b, Rr12w, Rr12d, Rr12}, + [Rr13] = {Rnone, Rr13b, Rr13w, Rr13d, Rr13}, + [Rr14] = {Rnone, Rr14b, Rr14w, Rr14d, Rr14}, + [Rr15] = {Rnone, Rr15b, Rr15w, Rr15d, Rr15}, + + [Rxmm0f] = {[ModeF] = Rxmm0f, [ModeD] = Rxmm0d}, + [Rxmm1f] = {[ModeF] = Rxmm1f, [ModeD] = Rxmm1d}, + [Rxmm2f] = {[ModeF] = Rxmm2f, [ModeD] = Rxmm2d}, + [Rxmm3f] = {[ModeF] = Rxmm3f, [ModeD] = Rxmm3d}, + [Rxmm4f] = {[ModeF] = Rxmm4f, [ModeD] = Rxmm4d}, + [Rxmm5f] = {[ModeF] = Rxmm5f, [ModeD] = Rxmm5d}, + [Rxmm6f] = {[ModeF] = Rxmm6f, [ModeD] = Rxmm6d}, + [Rxmm7f] = {[ModeF] = Rxmm7f, [ModeD] = Rxmm7d}, + [Rxmm8f] = {[ModeF] = Rxmm8f, [ModeD] = Rxmm8d}, + [Rxmm9f] = {[ModeF] = Rxmm9f, [ModeD] = Rxmm9d}, + [Rxmm10f] = {[ModeF] = Rxmm0f, [ModeD] = Rxmm0d}, + [Rxmm11f] = {[ModeF] = Rxmm1f, [ModeD] = Rxmm1d}, + [Rxmm12f] = {[ModeF] = Rxmm2f, [ModeD] = Rxmm2d}, + [Rxmm13f] = {[ModeF] = Rxmm3f, [ModeD] = Rxmm3d}, + [Rxmm14f] = {[ModeF] = Rxmm4f, [ModeD] = Rxmm4d}, + [Rxmm15f] = {[ModeF] = Rxmm5f, [ModeD] = Rxmm5d}, + + [Rxmm0d] = {[ModeF] = Rxmm0f, [ModeD] = Rxmm0d}, + [Rxmm1d] = {[ModeF] = Rxmm1f, [ModeD] = Rxmm1d}, + [Rxmm2d] = {[ModeF] = Rxmm2f, [ModeD] = Rxmm2d}, + [Rxmm3d] = {[ModeF] = Rxmm3f, [ModeD] = Rxmm3d}, + [Rxmm4d] = {[ModeF] = Rxmm4f, [ModeD] = Rxmm4d}, + [Rxmm5d] = {[ModeF] = Rxmm5f, [ModeD] = Rxmm5d}, + [Rxmm6d] = {[ModeF] = Rxmm6f, [ModeD] = Rxmm6d}, + [Rxmm7d] = {[ModeF] = Rxmm7f, [ModeD] = Rxmm7d}, + [Rxmm8d] = {[ModeF] = Rxmm8f, [ModeD] = Rxmm8d}, + [Rxmm9d] = {[ModeF] = Rxmm9f, [ModeD] = Rxmm9d}, + [Rxmm10d] = {[ModeF] = Rxmm0f, [ModeD] = Rxmm0d}, + [Rxmm11d] = {[ModeF] = Rxmm1f, [ModeD] = Rxmm1d}, + [Rxmm12d] = {[ModeF] = Rxmm2f, [ModeD] = Rxmm2d}, + [Rxmm13d] = {[ModeF] = Rxmm3f, [ModeD] = Rxmm3d}, + [Rxmm14d] = {[ModeF] = Rxmm4f, [ModeD] = Rxmm4d}, + [Rxmm15d] = {[ModeF] = Rxmm5f, [ModeD] = Rxmm5d}, + }; + + assert(crtab[r][m] != Rnone); + return locphysreg(crtab[r][m]); +} diff --git a/6/main.c b/6/main.c new file mode 100644 index 0000000..408507a --- /dev/null +++ b/6/main.c @@ -0,0 +1,150 @@ +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <ctype.h> +#include <string.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <err.h> + +#include "parse.h" +#include "opt.h" +#include "asm.h" + +#include "../config.h" + +/* FIXME: move into one place...? */ +Node *file; +char debugopt[128]; +int writeasm; +char *outfile; +char **incpaths; +size_t nincpaths; + +static void usage(char *prog) +{ + printf("%s [-h] [-o outfile] [-d[dbgopts]] inputs\n", prog); + printf("\t-h\tPrint this help\n"); + printf("\t-S\tWrite out `input.s` when compiling\n"); + printf("\t-I path\tAdd 'path' to use search path\n"); + printf("\t-d\tPrint debug dumps. Recognized options: f r p i\n"); + printf("\t\t\tf: log folded trees\n"); + printf("\t\t\tl: log lowered pre-cfg trees\n"); + printf("\t\t\tT: log tree immediately\n"); + printf("\t\t\tr: log register allocation activity\n"); + printf("\t\t\ti: log instruction selection activity\n"); + printf("\t\t\tu: log type unifications\n"); + printf("\t-o\tOutput to outfile\n"); + printf("\t-S\tGenerate assembly instead of object code\n"); +} + +static void assem(char *asmsrc, char *input) +{ + char objfile[1024]; + char cmd[2048]; + + swapsuffix(objfile, 1024, input, ".myr", ".o"); + snprintf(cmd, 1024, Asmcmd, objfile, asmsrc); + if (system(cmd) == -1) + die("Couldn't run assembler"); +} + +static char *gentemp(char *buf, size_t bufsz, char *path, char *suffix) +{ + char *tmpdir; + char *base; + + tmpdir = getenv("TMPDIR"); + if (!tmpdir) + tmpdir = "/tmp"; + base = strrchr(path, '/'); + if (base) + base++; + else + base = path; + snprintf(buf, bufsz, "%s/tmp%lx-%s%s", tmpdir, random(), base, suffix); + return buf; +} + +static void genuse(char *path) +{ + FILE *f; + char buf[1024]; + + swapsuffix(buf, sizeof buf, path, ".myr", ".use"); + f = fopen(buf, "w"); + if (!f) + err(1, "Could not open path %s\n", buf); + writeuse(f, file); + fclose(f); +} + +int main(int argc, char **argv) +{ + int opt; + int i; + Stab *globls; + char buf[1024]; + + while ((opt = getopt(argc, argv, "d:hSo:I:")) != -1) { + switch (opt) { + case 'o': + outfile = optarg; + break; + case 'S': + writeasm = 1; + break; + case 'h': + usage(argv[0]); + exit(0); + break; + case 'd': + while (optarg && *optarg) { + if (*optarg == 'y') + yydebug = 1; + debugopt[*optarg++ & 0x7f]++; + } + break; + case 'I': + lappend(&incpaths, &nincpaths, optarg); + break; + default: + usage(argv[0]); + exit(0); + break; + } + } + + lappend(&incpaths, &nincpaths, Instroot "/lib/myr"); + for (i = optind; i < argc; i++) { + globls = mkstab(); + tyinit(globls); + tokinit(argv[i]); + file = mkfile(argv[i]); + file->file.exports = mkstab(); + file->file.globls = globls; + yyparse(); + + /* before we do anything to the parse */ + if (debugopt['T']) + dump(file, stdout); + infer(file); + /* after all type inference */ + if (debugopt['t']) + dump(file, stdout); + + if (writeasm) { + swapsuffix(buf, sizeof buf, argv[i], ".myr", ".s"); + } else { + gentemp(buf, sizeof buf, argv[i], ".s"); + } + gen(file, buf); + assem(buf, argv[i]); + genuse(argv[i]); + } + + return 0; +} diff --git a/6/platform.h b/6/platform.h new file mode 100644 index 0000000..75c03ad --- /dev/null +++ b/6/platform.h @@ -0,0 +1,9 @@ +#if defined(__APPLE__) && defined(__MACH__) +/* for OSX */ +# define Asmcmd "as -g -o %s %s" +# define Symprefix "_" +#else +/* Default to linux */ +# define Asmcmd "as -g -o %s %s" +# define Symprefix "" +#endif @@ -0,0 +1,1333 @@ +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <stdarg.h> +#include <assert.h> +#include <limits.h> +#include <string.h> + +#include "parse.h" +#include "opt.h" +#include "asm.h" + +#define Sizetbits (CHAR_BIT*sizeof(size_t)) /* used in graph reprs */ + +typedef struct Usemap Usemap; +struct Usemap { + int l[Maxarg + 1]; /* location of arg used in instruction's arg list */ + int r[Maxarg + 1]; /* list of registers used implicitly by instruction */ +}; + +void wlprint(FILE *fd, char *name, Loc **wl, size_t nwl); +static int moverelated(Isel *s, regid n); +static void printedge(FILE *fd, char *msg, size_t a, size_t b); + +/* tables of uses/defs by instruction */ +Usemap usetab[] = { +#define Use(...) {__VA_ARGS__} +#define Insn(i, fmt, use, def) use, +#include "insns.def" +#undef Insn +#undef Use +}; + +Usemap deftab[] = { +#define Def(...) {__VA_ARGS__} +#define Insn(i, fmt, use, def) def, +#include "insns.def" +#undef Insn +#undef Def +}; + +/* A map of which registers interfere */ +#define Northogonal 32 +Reg regmap[Northogonal][Nmode] = { + [0] = {Rnone, Ral, Rax, Reax, Rrax}, + [1] = {Rnone, Rcl, Rcx, Recx, Rrcx}, + [2] = {Rnone, Rdl, Rdx, Redx, Rrdx}, + [3] = {Rnone, Rbl, Rbx, Rebx, Rrbx}, + [4] = {Rnone, Rsil, Rsi, Resi, Rrsi}, + [5] = {Rnone, Rdil, Rdi, Redi, Rrdi}, + [6] = {Rnone, Rr8b, Rr8w, Rr8d, Rr8}, + [7] = {Rnone, Rr9b, Rr9w, Rr9d, Rr9}, + [8] = {Rnone, Rr10b, Rr10w, Rr10d, Rr10}, + [9] = {Rnone, Rr11b, Rr11w, Rr11d, Rr11}, + [10] = {Rnone, Rr12b, Rr12w, Rr12d, Rr12}, + [11] = {Rnone, Rr13b, Rr13w, Rr13d, Rr13}, + [12] = {Rnone, Rr14b, Rr14w, Rr14d, Rr14}, + [13] = {Rnone, Rr15b, Rr15w, Rr15d, Rr15}, + [14] = {Rnone, Rnone, Rnone, Rnone, Rnone}, + [15] = {Rnone, Rnone, Rnone, Rnone, Rnone}, + [16] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm0f, Rxmm0d}, + [17] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm1f, Rxmm1d}, + [18] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm2f, Rxmm2d}, + [19] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm3f, Rxmm3d}, + [20] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm4f, Rxmm4d}, + [21] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm5f, Rxmm5d}, + [22] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm6f, Rxmm6d}, + [23] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm7f, Rxmm7d}, + [24] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm8f, Rxmm8d}, + [25] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm9f, Rxmm9d}, + [26] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm10f, Rxmm10d}, + [27] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm11f, Rxmm11d}, + [28] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm12f, Rxmm12d}, + [29] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm13f, Rxmm13d}, + [30] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm14f, Rxmm14d}, + [31] = {Rnone, Rnone, Rnone, Rnone, Rnone, Rxmm15f, Rxmm15d}, +}; + +/* Which regmap entry a register maps to */ +int colourmap[Nreg] = { + /* byte */ + [Ral] = 0, [Rax] = 0, [Reax] = 0, [Rrax] = 0, + [Rcl] = 1, [Rcx] = 1, [Recx] = 1, [Rrcx] = 1, + [Rdl] = 2, [Rdx] = 2, [Redx] = 2, [Rrdx] = 2, + [Rbl] = 3, [Rbx] = 3, [Rebx] = 3, [Rrbx] = 3, + [Rsil] = 4, [Rsi] = 4, [Resi] = 4, [Rrsi] = 4, + [Rdil] = 5, [Rdi] = 5, [Redi] = 5, [Rrdi] = 5, + [Rr8b] = 6, [Rr8w] = 6, [Rr8d] = 6, [Rr8] = 6, + [Rr9b] = 7, [Rr9w] = 7, [Rr9d] = 7, [Rr9] = 7, + [Rr10b] = 8, [Rr10w] = 8, [Rr10d] = 8, [Rr10] = 8, + [Rr11b] = 9, [Rr11w] = 9, [Rr11d] = 9, [Rr11] = 9, + [Rr12b] = 10, [Rr12w] = 10, [Rr12d] = 10, [Rr12] = 10, + [Rr13b] = 11, [Rr13w] = 11, [Rr13d] = 11, [Rr13] = 11, + [Rr14b] = 12, [Rr14w] = 12, [Rr14d] = 12, [Rr14] = 12, + [Rr15b] = 13, [Rr15w] = 13, [Rr15d] = 13, [Rr15] = 13, + + /* float */ + [Rxmm0f] = 16, [Rxmm0d] = 16, + [Rxmm1f] = 17, [Rxmm1d] = 17, + [Rxmm2f] = 18, [Rxmm2d] = 18, + [Rxmm3f] = 19, [Rxmm3d] = 19, + [Rxmm4f] = 20, [Rxmm4d] = 20, + [Rxmm5f] = 21, [Rxmm5d] = 21, + [Rxmm6f] = 22, [Rxmm6d] = 22, + [Rxmm7f] = 23, [Rxmm7d] = 23, + [Rxmm8f] = 24, [Rxmm8d] = 24, + [Rxmm9f] = 25, [Rxmm9d] = 25, + [Rxmm10f] = 26, [Rxmm10d] = 26, + [Rxmm11f] = 27, [Rxmm11d] = 27, + [Rxmm12f] = 28, [Rxmm12d] = 28, + [Rxmm13f] = 29, [Rxmm13d] = 29, + [Rxmm14f] = 30, [Rxmm14d] = 30, + [Rxmm15f] = 31, [Rxmm15d] = 31, +}; + +static int _K[Nclass] = { + [Classbad] = 0, + [Classint] = 14, + [Classflt] = 16, +}; + +Rclass rclass(Loc *l) +{ + switch (l->mode) { + case ModeNone: return Classbad; + case Nmode: return Classbad; + case ModeB: return Classint; + case ModeW: return Classint; + case ModeL: return Classint; + case ModeQ: return Classint; + + case ModeF: return Classflt; + case ModeD: return Classflt; + } + return Classbad; +} + +/* %esp, %ebp are not in the allocatable pool */ +static int isfixreg(Loc *l) +{ + if (l->reg.colour == Resp) + return 1; + if (l->reg.colour == Rebp) + return 1; + return 0; +} + +static size_t uses(Insn *insn, regid *u) +{ + size_t i, j; + int k; + Loc *m; + + j = 0; + /* Add all the registers used and defined. Duplicates + * in this list are fine, since they're being added to + * a set anyways */ + for (i = 0; i < Maxarg; i++) { + if (!usetab[insn->op].l[i]) + break; + k = usetab[insn->op].l[i] - 1; + /* non-registers are handled later */ + if (insn->args[k]->type == Locreg) + if (!isfixreg(insn->args[k])) + u[j++] = insn->args[k]->reg.id; + } + /* some insns don't reflect their defs in the args. + * These are explictly listed in the insn description */ + for (i = 0; i < Maxarg; i++) { + if (!usetab[insn->op].r[i]) + break; + /* not a leak; physical registers get memoized */ + u[j++] = locphysreg(usetab[insn->op].r[i])->reg.id; + } + /* If the registers are in an address calculation, + * they're used no matter what. */ + for (i = 0; i < insn->nargs; i++) { + m = insn->args[i]; + if (m->type != Locmem && m->type != Locmeml) + continue; + if (m->mem.base) + if (!isfixreg(m->mem.base)) + u[j++] = m->mem.base->reg.id; + if (m->mem.idx) + if (!isfixreg(m->mem.base)) + u[j++] = m->mem.idx->reg.id; + } + return j; +} + +static size_t defs(Insn *insn, regid *d) +{ + size_t i, j; + int k; + + j = 0; + /* Add all the registers dsed and defined. Duplicates + * in this list are fine, since they're being added to + * a set anyways */ + for (i = 0; i < Maxarg; i++) { + if (!deftab[insn->op].l[i]) + break; + k = deftab[insn->op].l[i] - 1; + if (insn->args[k]->type == Locreg) + if (!isfixreg(insn->args[k])) + d[j++] = insn->args[k]->reg.id; + } + /* some insns don't reflect their defs in the args. + * These are explictly listed in the insn description */ + for (i = 0; i < Maxarg; i++) { + if (!deftab[insn->op].r[i]) + break; + /* not a leak; physical registers get memoized */ + d[j++] = locphysreg(deftab[insn->op].r[i])->reg.id; + } + return j; +} + +/* The uses and defs for an entire BB. */ +static void udcalc(Asmbb *bb) +{ + /* up to 2 registers per memloc, so + * 2*Maxarg is the maximum number of + * uses or defs we can see */ + regid u[Maxuse], d[Maxdef]; + size_t nu, nd; + size_t i, j; + + bb->use = bsclear(bb->use); + bb->def = bsclear(bb->def); + for (i = 0; i < bb->ni; i++) { + nu = uses(bb->il[i], u); + nd = defs(bb->il[i], d); + for (j = 0; j < nu; j++) + if (!bshas(bb->def, u[j])) + bsput(bb->use, u[j]); + for (j = 0; j < nd; j++) + bsput(bb->def, d[j]); + } +} + +static int istrivial(Isel *s, regid r) +{ + return s->degree[r] < _K[rclass(locmap[r])]; +} + +static void liveness(Isel *s) +{ + Bitset *old; + Asmbb **bb; + ssize_t nbb; + ssize_t i; + size_t j; + int changed; + + bb = s->bb; + nbb = s->nbb; + for (i = 0; i < nbb; i++) { + udcalc(s->bb[i]); + bb[i]->livein = bsclear(bb[i]->livein); + bb[i]->liveout = bsclear(bb[i]->liveout); + } + + changed = 1; + while (changed) { + changed = 0; + for (i = nbb - 1; i >= 0; i--) { + old = bsdup(bb[i]->liveout); + /* liveout[b] = U(s in succ) livein[s] */ + for (j = 0; bsiter(bb[i]->succ, &j); j++) + bsunion(bb[i]->liveout, bb[j]->livein); + /* livein[b] = use[b] U (out[b] \ def[b]) */ + bb[i]->livein = bsclear(bb[i]->livein); + bsunion(bb[i]->livein, bb[i]->liveout); + bsdiff(bb[i]->livein, bb[i]->def); + bsunion(bb[i]->livein, bb[i]->use); + if (!changed) + changed = !bseq(old, bb[i]->liveout); + } + } +} + +/* we're only interested in register->register moves */ +static int ismove(Insn *i) +{ + if (i->op != Imov) + return 0; + return i->args[0]->type == Locreg && i->args[1]->type == Locreg; +} + +static int gbhasedge(Isel *s, size_t u, size_t v) +{ + size_t i; + i = (maxregid * v) + u; + return (s->gbits[i/Sizetbits] & (1ULL <<(i % Sizetbits))) != 0; +} + +static void gbputedge(Isel *s, size_t u, size_t v) +{ + size_t i, j; + + i = (maxregid * u) + v; + j = (maxregid * v) + u; + s->gbits[i/Sizetbits] |= 1ULL << (i % Sizetbits); + s->gbits[j/Sizetbits] |= 1ULL << (j % Sizetbits); + assert(gbhasedge(s, u, v) && gbhasedge(s, v, u)); +} + +static int wlfind(Loc **wl, size_t nwl, regid v, size_t *idx) +{ + size_t i; + + for (i = 0; i < nwl; i++) { + if (wl[i]->reg.id == v) { + *idx = i; + return 1; + } + } + *idx = -1; + return 0; +} + +/* + * If we have an edge between two aliasing registers, + * we should not increment the degree, since that would + * be double counting. + */ +static int degreechange(Isel *s, regid u, regid v) +{ + regid phys, virt, r; + size_t i; + + if (bshas(s->prepainted, u)) { + phys = u; + virt = v; + } else if (bshas(s->prepainted, v)) { + phys = v; + virt = u; + } else { + return 1; + } + + for (i = 0; i < Nmode; i++) { + r = regmap[colourmap[phys]][i]; + if (r != phys && gbhasedge(s, virt, regmap[colourmap[phys]][i])) { + return 0; + } + } + return 1; +} + +static void alputedge(Isel *s, regid u, regid v) +{ + s->ngadj[u]++; + s->gadj[u] = xrealloc(s->gadj[u], s->ngadj[u]*sizeof(regid)); + s->gadj[u][s->ngadj[u] - 1] = v; +} + +static void wlput(Loc ***wl, size_t *nwl, Loc *l) +{ + lappend(wl, nwl, l); + l->list = wl; +} + +static void wldel(Isel *s, Loc ***wl, size_t *nwl, size_t idx) +{ + (*wl)[idx]->list = NULL; + ldel(wl, nwl, idx); +} + +static void wlputset(Bitset *bs, regid r) +{ + bsput(bs, r); + locmap[r]->list = bs; +} + + +static void addedge(Isel *s, regid u, regid v) +{ + if (u == v || gbhasedge(s, u, v)) + return; + if (u == Rrbp || u == Rrsp || u == Rrip) + return; + if (v == Rrbp || v == Rrsp || v == Rrip) + return; + + gbputedge(s, u, v); + gbputedge(s, v, u); + if (!bshas(s->prepainted, u)) { + alputedge(s, u, v); + s->degree[u] += degreechange(s, v, u); + } + if (!bshas(s->prepainted, v)) { + alputedge(s, v, u); + s->degree[v] += degreechange(s, u, v); + } +} + +static void setup(Isel *s) +{ + size_t gchunks; + size_t i; + + free(s->gbits); + gchunks = (maxregid*maxregid)/Sizetbits + 1; + s->gbits = zalloc(gchunks*sizeof(size_t)); + /* fresh adj list repr. */ + free(s->gadj); + free(s->ngadj); + s->gadj = zalloc(maxregid * sizeof(regid*)); + s->ngadj = zalloc(maxregid * sizeof(size_t)); + + s->spilled = bsclear(s->spilled); + s->coalesced = bsclear(s->coalesced); + lfree(&s->wlspill, &s->nwlspill); + lfree(&s->wlfreeze, &s->nwlfreeze); + lfree(&s->wlsimp, &s->nwlsimp); + + free(s->aliasmap); + free(s->degree); + free(s->rmoves); + free(s->nrmoves); + + s->aliasmap = zalloc(maxregid * sizeof(size_t)); + s->degree = zalloc(maxregid * sizeof(int)); + s->rmoves = zalloc(maxregid * sizeof(Loc **)); + s->nrmoves = zalloc(maxregid * sizeof(size_t)); + + for (i = 0; bsiter(s->prepainted, &i); i++) + s->degree[i] = 1<<16; +} + +static void build(Isel *s) +{ + regid u[Maxuse], d[Maxdef]; + size_t nu, nd; + size_t i, k, a; + ssize_t j; + Bitset *live; + Asmbb **bb; + size_t nbb; + Insn *insn; + size_t l; + + /* set up convenience vars */ + bb = s->bb; + nbb = s->nbb; + + for (i = 0; i < nbb; i++) { + live = bsdup(bb[i]->liveout); + for (j = bb[i]->ni - 1; j >= 0; j--) { + insn = bb[i]->il[j]; + nu = uses(insn, u); + nd = defs(insn, d); + + /* add these to the initial set */ + for (k = 0; k < nu; k++) { + if (!bshas(s->prepainted, u[k])) + wlputset(s->initial, u[k]); + } + for (k = 0; k < nd; k++) { + if (!bshas(s->prepainted, d[k])) + wlputset(s->initial, d[k]); + } + + /* moves get special treatment, since we don't want spurious + * edges between the src and dest */ + //iprintf(stdout, insn); + if (ismove(insn)) { + /* live \= uses(i) */ + for (k = 0; k < nu; k++) { + /* remove all physical register aliases */ + if (bshas(s->prepainted, u[k])) { + for (a = 0; a < Nmode; a++) + bsdel(live, regmap[colourmap[u[k]]][a]); + } else { + bsdel(live, u[k]); + } + } + + for (k = 0; k < nu; k++) + lappend(&s->rmoves[u[k]], &s->nrmoves[u[k]], insn); + for (k = 0; k < nd; k++) + lappend(&s->rmoves[d[k]], &s->nrmoves[d[k]], insn); + lappend(&s->wlmove, &s->nwlmove, insn); + } + /* live = live U def(i) */ + for (k = 0; k < nd; k++) + bsput(live, d[k]); + + for (k = 0; k < nd; k++) + for (l = 0; bsiter(live, &l); l++) + addedge(s, d[k], l); + /* live = use(i) U (live \ def(i)) */ + for (k = 0; k < nd; k++) + bsdel(live, d[k]); + for (k = 0; k < nu; k++) + bsput(live, u[k]); + } + } +} + +static int adjavail(Isel *s, regid r) +{ + if (bshas(s->coalesced, r)) + return 0; + if (locmap[r]->list == &s->selstk) + return 0; + return 1; +} + +static size_t nodemoves(Isel *s, regid n, Insn ***pil) +{ + size_t i, j; + size_t count; + + /* FIXME: inefficient. Do I care? */ + count = 0; + if (pil) + *pil = NULL; + for (i = 0; i < s->nrmoves[n]; i++) { + for (j = 0; j < s->nmactive; j++) { + if (s->mactive[j] == s->rmoves[n][i]) { + if (pil) + lappend(pil, &count, s->rmoves[n][i]); + else + count++; + } + } + for (j = 0; j < s->nwlmove; j++) { + if (s->wlmove[j] == s->rmoves[n][i]) { + if (pil) + lappend(pil, &count, s->rmoves[n][i]); + else + count++; + } + } + } + return count; +} + +static int moverelated(Isel *s, regid n) +{ + return nodemoves(s, n, NULL) != 0; +} + +static void mkworklist(Isel *s) +{ + size_t i; + + for (i = 0; bsiter(s->initial, &i); i++) { + if (bshas(s->prepainted, i)) + continue; + else if (!istrivial(s, i)) + wlput(&s->wlspill, &s->nwlspill, locmap[i]); + else if (moverelated(s, i)) { + wlput(&s->wlfreeze, &s->nwlfreeze, locmap[i]); + } + else + wlput(&s->wlsimp, &s->nwlsimp, locmap[i]); + locmap[i]->reg.colour = 0; + } +} + +static void enablemove(Isel *s, regid n) +{ + size_t i, j; + Insn **il; + size_t ni; + + ni = nodemoves(s, n, &il); + for (i = 0; i < ni; i++) { + for (j = 0; j < s->nmactive; j++) { + if (il[i] == s->mactive[j]) { + ldel(&s->mactive, &s->nmactive, j); + lappend(&s->wlmove, &s->nwlmove, il[i]); + } + } + } +} + +static void decdegree(Isel *s, regid m) +{ + int before, after; + int found; + size_t idx, i; + regid n; + + assert(m < maxregid); + before = istrivial(s, m); + s->degree[m]--; + after = istrivial(s, m); + + if (before != after) { + enablemove(s, m); + for (i = 0; i < s->ngadj[m]; i++) { + n = s->gadj[m][i]; + if (adjavail(s, n)) + enablemove(s, n); + } + + /* Subtle: + * + * If this code is being called from coalesce(), + * then the degree could have been bumped up only + * temporarily. This means that the node can already + * be on wlfreeze or wlsimp. + * + * Therefore, if we don't find it on wlspill, we assert + * that the node is already on the list that we'd be + * moving it to. + */ + found = wlfind(s->wlspill, s->nwlspill, m, &idx); + if (found) + wldel(s, &s->wlspill, &s->nwlspill, idx); + if (moverelated(s, m)) { + if (!found) + assert(wlfind(s->wlfreeze, s->nwlfreeze, m, &idx)); + else + wlput(&s->wlfreeze, &s->nwlfreeze, locmap[m]); + } else { + if (!found) + assert(wlfind(s->wlsimp, s->nwlsimp, m, &idx)); + else + wlput(&s->wlsimp, &s->nwlsimp, locmap[m]); + } + } +} + +static void simp(Isel *s) +{ + Loc *l; + regid m; + size_t i; + + l = lpop(&s->wlsimp, &s->nwlsimp); + wlput(&s->selstk, &s->nselstk, l); + for (i = 0; i < s->ngadj[l->reg.id]; i++) { + m = s->gadj[l->reg.id][i]; + if (adjavail(s, m)) + decdegree(s, m); + } +} + +static regid getalias(Isel *s, regid id) +{ + while (1) { + if (!s->aliasmap[id]) + break; + id = s->aliasmap[id]->reg.id; + }; + return id; +} + +static void wladd(Isel *s, regid u) +{ + size_t i; + + if (bshas(s->prepainted, u)) + return; + if (moverelated(s, u)) + return; + if (!istrivial(s, u)) + return; + + assert(locmap[u]->list == &s->wlfreeze || locmap[u]->list == &s->wlsimp); + if (wlfind(s->wlfreeze, s->nwlfreeze, u, &i)) + wldel(s, &s->wlfreeze, &s->nwlfreeze, i); + wlput(&s->wlsimp, &s->nwlsimp, locmap[u]); +} + +static int conservative(Isel *s, regid u, regid v) +{ + int k; + size_t i; + regid n; + + k = 0; + for (i = 0; i < s->ngadj[u]; i++) { + n = s->gadj[u][i]; + if (adjavail(s, n) && !istrivial(s, n)) + k++; + } + for (i = 0; i < s->ngadj[v]; i++) { + n = s->gadj[v][i]; + if (adjavail(s, n) && !istrivial(s, n)) + k++; + } + return k < _K[rclass(locmap[u])]; +} + +/* FIXME: is this actually correct? */ +static int ok(Isel *s, regid t, regid r) +{ + return istrivial(s, t) || bshas(s->prepainted, t) || gbhasedge(s, t, r); +} + +static int combinable(Isel *s, regid u, regid v) +{ + regid t; + size_t i; + + /* Regs of different modes can't be combined as things stand. + * In principle they should be combinable, but it confused the + * whole mode dance. */ + if (locmap[u]->mode != locmap[v]->mode) + return 0; + /* if u isn't prepainted, can we conservatively coalesce? */ + if (!bshas(s->prepainted, u) && conservative(s, u, v)) + return 1; + + /* if it is, are the adjacent nodes ok to combine with this? */ + for (i = 0; i < s->ngadj[v]; i++) { + t = s->gadj[v][i]; + if (adjavail(s, t) && !ok(s, t, u)) + return 0; + } + return 1; +} + +static void combine(Isel *s, regid u, regid v) +{ + regid t; + size_t idx; + size_t i, j; + int has; + + if (debugopt['r'] > 2) + printedge(stdout, "combining:", u, v); + if (wlfind(s->wlfreeze, s->nwlfreeze, v, &idx)) + wldel(s, &s->wlfreeze, &s->nwlfreeze, idx); + else if (wlfind(s->wlspill, s->nwlspill, v, &idx)) { + wldel(s, &s->wlspill, &s->nwlspill, idx); + } + wlputset(s->coalesced, v); + s->aliasmap[v] = locmap[u]; + + /* nodemoves[u] = nodemoves[u] U nodemoves[v] */ + for (i = 0; i < s->nrmoves[v]; i++) { + has = 0; + for (j = 0; j < s->nrmoves[u]; j++) { + if (s->rmoves[v][i] == s->rmoves[u][j]) { + has = 1; + break; + } + } + if (!has) + lappend(&s->rmoves[u], &s->nrmoves[u], s->rmoves[v][i]); + } + + for (i = 0; i < s->ngadj[v]; i++) { + t = s->gadj[v][i]; + if (!adjavail(s, t)) + continue; + if (debugopt['r'] > 2) + printedge(stdout, "combine-putedge:", t, u); + addedge(s, t, u); + decdegree(s, t); + } + if (!istrivial(s, u) && wlfind(s->wlfreeze, s->nwlfreeze, u, &idx)) { + wldel(s, &s->wlfreeze, &s->nwlfreeze, idx); + wlput(&s->wlspill, &s->nwlspill, locmap[u]); + } +} + +static int constrained(Isel *s, regid u, regid v) +{ + size_t i; + + if (bshas(s->prepainted, v)) + return 1; + if (bshas(s->prepainted, u)) + for (i = 0; i < Nmode; i++) + if (regmap[colourmap[u]][i] && gbhasedge(s, regmap[colourmap[u]][i], v)) + return 1; + return gbhasedge(s, u, v); +} + +static void coalesce(Isel *s) +{ + Insn *m; + regid u, v, tmp; + + m = lpop(&s->wlmove, &s->nwlmove); + u = getalias(s, m->args[0]->reg.id); + v = getalias(s, m->args[1]->reg.id); + + if (bshas(s->prepainted, v)) { + tmp = u; + u = v; + v = tmp; + } + + if (u == v) { + lappend(&s->mcoalesced, &s->nmcoalesced, m); + wladd(s, u); + wladd(s, v); + } else if (constrained(s, u, v)) { + lappend(&s->mconstrained, &s->nmconstrained, m); + wladd(s, u); + wladd(s, v); + } else if (combinable(s, u, v)) { + lappend(&s->mcoalesced, &s->nmcoalesced, m); + combine(s, u, v); + wladd(s, u); + } else { + lappend(&s->mactive, &s->nmactive, m); + } +} + +static int mldel(Insn ***ml, size_t *nml, Insn *m) +{ + size_t i; + for (i = 0; i < *nml; i++) { + if (m == (*ml)[i]) { + ldel(ml, nml, i); + return 1; + } + } + return 0; +} + +static void freezemoves(Isel *s, Loc *u) +{ + size_t i; + Insn **ml; + Insn *m; + size_t nml; + size_t idx; + Loc *v; + + nml = nodemoves(s, u->reg.id, &ml); + for (i = 0; i < nml; i++) { + m = ml[i]; + if (getalias(s, m->args[0]->reg.id) == getalias(s, u->reg.id)) + v = locmap[getalias(s, m->args[1]->reg.id)]; + else + v = locmap[getalias(s, m->args[0]->reg.id)]; + + if (!mldel(&s->mactive, &s->nmactive, m)) + mldel(&s->wlmove, &s->nwlmove, m); + lappend(&s->mfrozen, &s->nmfrozen, m); + if (!nodemoves(s, v->reg.id, NULL) && istrivial(s, v->reg.id)) { + if (!wlfind(s->wlfreeze, s->nwlfreeze, v->reg.id, &idx)) + die("Reg %zd not in freeze wl\n", v->reg.id); + wldel(s, &s->wlfreeze, &s->nwlfreeze, idx); + wlput(&s->wlsimp, &s->nwlsimp, v); + } + + } + lfree(&ml, &nml); +} + +static void freeze(Isel *s) +{ + Loc *l; + + l = lpop(&s->wlfreeze, &s->nwlfreeze); + wlput(&s->wlsimp, &s->nwlsimp, l); + freezemoves(s, l); +} + +/* Select the spill candidates */ +static void selspill(Isel *s) +{ + size_t i; + Loc *m; + + /* FIXME: pick a better heuristic for spilling */ + m = NULL; + for (i = 0; i < s->nwlspill; i++) { + if (!bshas(s->shouldspill, s->wlspill[i]->reg.id)) + continue; + m = s->wlspill[i]; + wldel(s, &s->wlspill, &s->nwlspill, i); + break; + } + if (!m) { + for (i = 0; i < s->nwlspill; i++) { + if (bshas(s->neverspill, s->wlspill[i]->reg.id)) { + printf("Not spilling %zd\n", s->wlspill[i]->reg.id); + continue; + } + m = s->wlspill[i]; + wldel(s, &s->wlspill, &s->nwlspill, i); + break; + } + } + assert(m != NULL); + wlput(&s->wlsimp, &s->nwlsimp, m); + freezemoves(s, m); +} + +/* + * Selects the colours for registers, spilling to the + * stack if no free registers can be found. + */ +static int paint(Isel *s) +{ + int taken[Nreg]; + Loc *n, *w; + regid l; + size_t i, j; + int spilled; + int found; + + spilled = 0; + while (s->nselstk) { + bzero(taken, Nreg*sizeof(int)); + n = lpop(&s->selstk, &s->nselstk); + + for (j = 0; j < s->ngadj[n->reg.id];j++) { + l = s->gadj[n->reg.id][j]; + if (debugopt['r'] > 1) + printedge(stdout, "paint-edge:", n->reg.id, l); + w = locmap[getalias(s, l)]; + if (w->reg.colour) + taken[colourmap[w->reg.colour]] = 1; + } + + found = 0; + for (i = 0; i < Northogonal; i++) { + if (regmap[i][n->mode] && !taken[i]) { + if (debugopt['r']) { + fprintf(stdout, "\tselecting "); + locprint(stdout, n, 'x'); + fprintf(stdout, " = %s\n", regnames[regmap[i][n->mode]]); + } + n->reg.colour = regmap[i][n->mode]; + found = 1; + break; + } + } + if (!found) { + spilled = 1; + wlputset(s->spilled, n->reg.id); + } + } + for (l = 0; bsiter(s->coalesced, &l); l++) { + n = locmap[getalias(s, l)]; + locmap[l]->reg.colour = n->reg.colour; + } + return spilled; +} + +typedef struct Remapping Remapping; +struct Remapping { + regid oldreg; + Loc *newreg; +}; + +static Loc *mapfind(Loc *old, Remapping *r, size_t nr) +{ + Loc *new; + Loc *base; + Loc *idx; + size_t i; + + if (!old) + return NULL; + + new = NULL; + if (old->type == Locreg) { + for (i = 0; i < nr; i++) { + if (old->reg.id == r[i].oldreg) { + return r[i].newreg; + } + } + } else if (old->type == Locmem || old->type == Locmeml) { + base = mapfind(old->mem.base, r, nr); + idx = mapfind(old->mem.idx, r, nr); + if (base != old->mem.base || idx != old->mem.idx) { + if (old->type == Locmem) + new = locmems(old->mem.constdisp, base, idx, old->mem.scale, old->mode); + else + new = locmemls(old->mem.lbldisp, base, idx, old->mem.scale, old->mode); + } + if (new) + return new; + } + return old; +} + +static Loc *spillslot(Isel *s, regid reg) +{ + size_t stkoff; + + stkoff = ptoi(htget(s->spillslots, itop(reg))); + return locmem(-stkoff, locphysreg(Rrbp), NULL, locmap[reg]->mode); +} + +static void updatelocs(Isel *s, Insn *insn, Remapping *use, size_t nuse, Remapping *def, size_t ndef) +{ + size_t i; + + for (i = 0; i < insn->nargs; i++) { + insn->args[i] = mapfind(insn->args[i], use, nuse); + insn->args[i] = mapfind(insn->args[i], def, ndef); + } +} + +/* + * Takes two tables for remappings, of size Maxuse/Maxdef, + * and fills them, storign the number of uses or defs. Returns + * whether there are any remappings at all. + */ +static int remap(Isel *s, Insn *insn, Remapping *use, size_t *nuse, Remapping *def, size_t *ndef) +{ + regid u[Maxuse], d[Maxdef]; + size_t nu, nd; + size_t useidx, defidx; + size_t i, j, k; + int found; + + useidx = 0; + nu = uses(insn, u); + nd = defs(insn, d); + for (i = 0; i < nu; i++) { + if (!bshas(s->spilled, u[i])) + continue; + use[useidx].oldreg = u[i]; + use[useidx].newreg = locreg(locmap[u[i]]->mode); + bsput(s->neverspill, use[useidx].newreg->reg.id); + useidx++; + } + + defidx = 0; + for (i = 0; i < nd; i++) { + if (!bshas(s->spilled, d[i])) + continue; + def[defidx].oldreg = d[i]; + + /* if we already have remapped a use for this register, we want to + * store the same register from the def. */ + found = 0; + for (j = 0; j < defidx; j++) { + for (k = 0; i < useidx; k++) { + if (use[j].oldreg == d[k]) { + def[defidx].newreg = use[j].newreg; + bsput(s->neverspill, def[defidx].newreg->reg.id); + found = 1; + } + } + } + if (!found) { + def[defidx].newreg = locreg(locmap[d[i]]->mode); + bsput(s->neverspill, def[defidx].newreg->reg.id); + } + + defidx++; + } + + *nuse = useidx; + *ndef = defidx; + return useidx > 0 || defidx > 0; +} + +/* + * Rewrite instructions using spilled registers, inserting + * appropriate loads and stores into the BB + */ +static void rewritebb(Isel *s, Asmbb *bb) +{ + Remapping use[Maxuse], def[Maxdef]; + Insn *insn; + size_t nuse, ndef; + size_t i, j; + Insn **new; + size_t nnew; + + new = NULL; + nnew = 0; + for (j = 0; j < bb->ni; j++) { + /* if there is a remapping, insert the loads and stores as needed */ + if (remap(s, bb->il[j], use, &nuse, def, &ndef)) { + for (i = 0; i < nuse; i++) { + insn = mkinsn(Imov, spillslot(s, use[i].oldreg), use[i].newreg, NULL); + lappend(&new, &nnew, insn); + if (debugopt['r']) { + printf("loading "); + locprint(stdout, locmap[use[i].oldreg], 'x'); + printf(" -> "); + locprint(stdout, use[i].newreg, 'x'); + printf("\n"); + } + } + insn = bb->il[j]; + updatelocs(s, insn, use, nuse, def, ndef); + lappend(&new, &nnew, insn); + for (i = 0; i < ndef; i++) { + insn = mkinsn(Imov, def[i].newreg, spillslot(s, def[i].oldreg), NULL); + lappend(&new, &nnew, insn); + if (debugopt['r']) { + printf("storing "); + locprint(stdout, locmap[def[i].oldreg], 'x'); + printf(" -> "); + locprint(stdout, def[i].newreg, 'x'); + printf("\n"); + } + } + } else { + lappend(&new, &nnew, bb->il[j]); + } + } + lfree(&bb->il, &bb->ni); + bb->il = new; + bb->ni = nnew; +} + +static void addspill(Isel *s, Loc *l) +{ + s->stksz->lit += modesize[l->mode]; + s->stksz->lit = align(s->stksz->lit, modesize[l->mode]); + if (debugopt['r']) { + printf("spill "); + locprint(stdout, l, 'x'); + printf(" to %zd(%%rbp)\n", s->stksz->lit); + } + htput(s->spillslots, itop(l->reg.id), itop(s->stksz->lit)); +} + +/* + * Rewrites the function code so that it no longer contains + * references to spilled registers. Every use of spilled regs + * + * insn %rX,%rY + * + * is rewritten to look like: + * + * mov 123(%rsp),%rZ + * insn %rZ,%rW + * mov %rW,234(%rsp) + */ +static void rewrite(Isel *s) +{ + size_t i; + + s->spillslots = mkht(ptrhash, ptreq); + /* set up stack locations for all spilled registers. */ + for (i = 0; bsiter(s->spilled, &i); i++) + addspill(s, locmap[i]); + + /* rewrite instructions using them */ + for (i = 0; i < s->nbb; i++) + rewritebb(s, s->bb[i]); + htfree(s->spillslots); + bsclear(s->spilled); +} + +/* + * Coalescing registers leaves a lot + * of moves that look like + * + * mov %r123,%r123. + * + * This is useless. This deletes them. + */ +static void delnops(Isel *s) +{ + Insn *insn; + Asmbb *bb; + Insn **new; + size_t nnew; + size_t i, j; + + for (i = 0; i < s->nbb; i++) { + new = NULL; + nnew = 0; + bb = s->bb[i]; + for (j = 0; j < bb->ni; j++) { + insn = bb->il[j]; + if (ismove(insn) && insn->args[0]->reg.colour == insn->args[1]->reg.colour) + continue; + lappend(&new, &nnew, insn); + } + lfree(&bb->il, &bb->ni); + bb->il = new; + bb->ni = nnew; + } + if (debugopt['r']) + dumpasm(s, stdout); +} + +void regalloc(Isel *s) +{ + int spilled; + size_t i; + + /* Initialize the list of prepainted registers */ + s->prepainted = mkbs(); + bsput(s->prepainted, 0); + for (i = 0; i < Nreg; i++) + bsput(s->prepainted, i); + + s->shouldspill = mkbs(); + s->neverspill = mkbs(); + s->initial = mkbs(); + for (i = 0; i < Nsaved; i++) + bsput(s->shouldspill, s->calleesave[i]->reg.id); + do { + setup(s); + liveness(s); + build(s); + mkworklist(s); + if (debugopt['r']) + dumpasm(s, stdout); + do { + if (s->nwlsimp) + simp(s); + else if (s->nwlmove) + coalesce(s); + else if (s->nwlfreeze) + freeze(s); + else if (s->nwlspill) + selspill(s); + } while (s->nwlsimp || s->nwlmove || s->nwlfreeze || s->nwlspill); + spilled = paint(s); + if (spilled) + rewrite(s); + } while (spilled); + delnops(s); + bsfree(s->prepainted); + bsfree(s->shouldspill); + bsfree(s->neverspill); +} + +void wlprint(FILE *fd, char *name, Loc **wl, size_t nwl) +{ + size_t i; + char *sep; + + sep = ""; + fprintf(fd, "%s = [", name); + for (i = 0; i < nwl; i++) { + fprintf(fd, "%s", sep); + locprint(fd, wl[i], 'x'); + fprintf(fd, "(%zd)", wl[i]->reg.id); + sep = ","; + } + fprintf(fd, "]\n"); +} + +static void setprint(FILE *fd, Bitset *s) +{ + char *sep; + size_t i; + + sep = ""; + for (i = 0; i < bsmax(s); i++) { + if (bshas(s, i)) { + fprintf(fd, "%s%zd", sep, i); + sep = ","; + } + } + fprintf(fd, "\n"); +} + +static void locsetprint(FILE *fd, Bitset *s) +{ + char *sep; + size_t i; + + sep = ""; + for (i = 0; i < bsmax(s); i++) { + if (bshas(s, i)) { + fprintf(fd, "%s", sep); + locprint(fd, locmap[i], 'x'); + sep = ","; + } + } + fprintf(fd, "\n"); +} + +static void printedge(FILE *fd, char *msg, size_t a, size_t b) +{ + fprintf(fd, "\t%s ", msg); + locprint(fd, locmap[a], 'x'); + fprintf(fd, " -- "); + locprint(fd, locmap[b], 'x'); + fprintf(fd, "\n"); +} + +void dumpasm(Isel *s, FILE *fd) +{ + size_t i, j; + char *sep; + Asmbb *bb; + + fprintf(fd, "WORKLISTS -- \n"); + wlprint(stdout, "spill", s->wlspill, s->nwlspill); + wlprint(stdout, "simp", s->wlsimp, s->nwlsimp); + wlprint(stdout, "freeze", s->wlfreeze, s->nwlfreeze); + /* noisy to dump this all the time; only dump for higher debug levels */ + if (debugopt['r'] > 2) { + fprintf(fd, "IGRAPH ----- \n"); + for (i = 0; i < maxregid; i++) { + for (j = i; j < maxregid; j++) { + if (gbhasedge(s, i, j)) + printedge(stdout, "", i, j); + } + } + } + fprintf(fd, "ASM -------- \n"); + for (j = 0; j < s->nbb; j++) { + bb = s->bb[j]; + fprintf(fd, "\n"); + fprintf(fd, "Bb: %d labels=(", bb->id); + sep = ""; + for (i = 0; i < bb->nlbls; i++) {; + fprintf(fd, "%s%s", bb->lbls[i], sep); + sep = ","; + } + fprintf(fd, ")\n"); + + fprintf(fd, "Pred: "); + setprint(fd, bb->pred); + fprintf(fd, "Succ: "); + setprint(fd, bb->succ); + + fprintf(fd, "Use: "); + locsetprint(fd, bb->use); + fprintf(fd, "Def: "); + locsetprint(fd, bb->def); + fprintf(fd, "Livein: "); + locsetprint(fd, bb->livein); + fprintf(fd, "Liveout: "); + locsetprint(fd, bb->liveout); + for (i = 0; i < bb->ni; i++) + iprintf(fd, bb->il[i]); + } + fprintf(fd, "ENDASM -------- \n"); +} + diff --git a/6/regs.def b/6/regs.def new file mode 100644 index 0000000..aa08e07 --- /dev/null +++ b/6/regs.def @@ -0,0 +1,117 @@ +Reg(Rnone, "%NOREG", ModeB) +/* byte regs */ +Reg(Ral, "%al", ModeB) +Reg(Rcl, "%cl", ModeB) +Reg(Rdl, "%dl", ModeB) +Reg(Rbl, "%bl", ModeB) +Reg(Rsil, "%sil", ModeB) +Reg(Rdil, "%dil", ModeB) +Reg(Rspl, "%spl", ModeB) +Reg(Rbpl, "%bpl", ModeB) +Reg(Rr8b, "%r8b", ModeB) +Reg(Rr9b, "%r9b", ModeB) +Reg(Rr10b, "%r10b", ModeB) +Reg(Rr11b, "%r11b", ModeB) +Reg(Rr12b, "%r12b", ModeB) +Reg(Rr13b, "%r13b", ModeB) +Reg(Rr14b, "%r14b", ModeB) +Reg(Rr15b, "%r15b", ModeB) + +/* high byte regs. We *NEVER* allocate these */ +Reg(Rah, "%ah", ModeB) +Reg(Rch, "%ch", ModeB) +Reg(Rdh, "%dh", ModeB) +Reg(Rbh, "%bh", ModeB) + +/* short regs */ +Reg(Rax, "%ax", ModeW) +Reg(Rbx, "%bx", ModeW) +Reg(Rcx, "%cx", ModeW) +Reg(Rdx, "%dx", ModeW) +Reg(Rsi, "%si", ModeW) +Reg(Rdi, "%di", ModeW) +Reg(Rsp, "%sp", ModeW) +Reg(Rbp, "%bp", ModeW) +Reg(Rr8w, "%r8w", ModeW) +Reg(Rr9w, "%r9w", ModeW) +Reg(Rr10w, "%r10w", ModeW) +Reg(Rr11w, "%r11w", ModeW) +Reg(Rr12w, "%r12w", ModeW) +Reg(Rr13w, "%r13w", ModeW) +Reg(Rr14w, "%r14w", ModeW) +Reg(Rr15w, "%r15w", ModeW) + + +/* long regs */ +Reg(Reax, "%eax", ModeL) +Reg(Recx, "%ecx", ModeL) +Reg(Redx, "%edx", ModeL) +Reg(Rebx, "%ebx", ModeL) +Reg(Resi, "%esi", ModeL) +Reg(Redi, "%edi", ModeL) +Reg(Resp, "%esp", ModeL) +Reg(Rebp, "%ebp", ModeL) +Reg(Rr8d, "%r8d", ModeL) +Reg(Rr9d, "%r9d", ModeL) +Reg(Rr10d, "%r10d", ModeL) +Reg(Rr11d, "%r11d", ModeL) +Reg(Rr12d, "%r12d", ModeL) +Reg(Rr13d, "%r13d", ModeL) +Reg(Rr14d, "%r14d", ModeL) +Reg(Rr15d, "%r15d", ModeL) + +/* quad regs */ +Reg(Rrax, "%rax", ModeQ) +Reg(Rrcx, "%rcx", ModeQ) +Reg(Rrdx, "%rdx", ModeQ) +Reg(Rrbx, "%rbx", ModeQ) +Reg(Rrsi, "%rsi", ModeQ) +Reg(Rrdi, "%rdi", ModeQ) +Reg(Rr8, "%r8", ModeQ) +Reg(Rr9, "%r9", ModeQ) +Reg(Rr10, "%r10", ModeQ) +Reg(Rr11, "%r11", ModeQ) +Reg(Rr12, "%r12", ModeQ) +Reg(Rr13, "%r13", ModeQ) +Reg(Rr14, "%r14", ModeQ) +Reg(Rr15, "%r15", ModeQ) + +/* floating point registers */ +Reg(Rxmm0f, "%xmm0", ModeF) +Reg(Rxmm1f, "%xmm1", ModeF) +Reg(Rxmm2f, "%xmm2", ModeF) +Reg(Rxmm3f, "%xmm3", ModeF) +Reg(Rxmm4f, "%xmm4", ModeF) +Reg(Rxmm5f, "%xmm5", ModeF) +Reg(Rxmm6f, "%xmm6", ModeF) +Reg(Rxmm7f, "%xmm7", ModeF) +Reg(Rxmm8f, "%xmm8", ModeF) +Reg(Rxmm9f, "%xmm9", ModeF) +Reg(Rxmm10f, "%xmm10", ModeF) +Reg(Rxmm11f, "%xmm11", ModeF) +Reg(Rxmm12f, "%xmm12", ModeF) +Reg(Rxmm13f, "%xmm13", ModeF) +Reg(Rxmm14f, "%xmm14", ModeF) +Reg(Rxmm15f, "%xmm15", ModeF) + +/* double precision floating point registers */ +Reg(Rxmm0d, "%xmm0", ModeD) +Reg(Rxmm1d, "%xmm1", ModeD) +Reg(Rxmm2d, "%xmm2", ModeD) +Reg(Rxmm3d, "%xmm3", ModeD) +Reg(Rxmm4d, "%xmm4", ModeD) +Reg(Rxmm5d, "%xmm5", ModeD) +Reg(Rxmm6d, "%xmm6", ModeD) +Reg(Rxmm7d, "%xmm7", ModeD) +Reg(Rxmm8d, "%xmm8", ModeD) +Reg(Rxmm9d, "%xmm9", ModeD) +Reg(Rxmm10d, "%xmm10", ModeD) +Reg(Rxmm11d, "%xmm11", ModeD) +Reg(Rxmm12d, "%xmm12", ModeD) +Reg(Rxmm13d, "%xmm13", ModeD) +Reg(Rxmm14d, "%xmm14", ModeD) +Reg(Rxmm15d, "%xmm15", ModeD) + +Reg(Rrip, "%rip", ModeQ) +Reg(Rrsp, "%rsp", ModeQ) +Reg(Rrbp, "%rbp", ModeQ) diff --git a/6/simp.c b/6/simp.c new file mode 100644 index 0000000..0eeb992 --- /dev/null +++ b/6/simp.c @@ -0,0 +1,1836 @@ +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <ctype.h> +#include <string.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "parse.h" +#include "opt.h" +#include "asm.h" + +#include "platform.h" /* HACK. We need some platform specific code gen behavior. *sigh.* */ + + +/* takes a list of nodes, and reduces it (and it's subnodes) to a list + * following these constraints: + * - All nodes are expression nodes + * - Nodes with side effects are root nodes + * - All nodes operate on machine-primitive types and tuples + */ +typedef struct Simp Simp; +struct Simp { + int isglobl; + + Node **stmts; + size_t nstmts; + + /* return handling */ + Node *endlbl; + Node *ret; + int isbigret; + + /* pre/postinc handling */ + Node **incqueue; + size_t nqueue; + + /* break/continue handling */ + Node **loopstep; + size_t nloopstep; + Node **loopexit; + size_t nloopexit; + + /* location handling */ + Node **blobs; + size_t nblobs; + size_t stksz; + size_t argsz; + Htab *globls; + Htab *stkoff; +}; + +static char *asmname(Node *n); +static Node *simp(Simp *s, Node *n); +static Node *rval(Simp *s, Node *n, Node *dst); +static Node *lval(Simp *s, Node *n); +static Node *assign(Simp *s, Node *lhs, Node *rhs); +static void simpcond(Simp *s, Node *n, Node *ltrue, Node *lfalse); +static void simpconstinit(Simp *s, Node *dcl); +static Node *simpcast(Simp *s, Node *val, Type *to); +static Node *simpslice(Simp *s, Node *n, Node *dst); +static Node *idxaddr(Simp *s, Node *seq, Node *idx); +static void umatch(Simp *s, Node *pat, Node *val, Type *t, Node *iftrue, Node *iffalse); + +/* useful constants */ +static Type *tyintptr; +static Type *tyword; +static Type *tyvoid; + +size_t alignto(size_t sz, Type *t) +{ + size_t a; + size_t i; + + t = tybase(t); + a = 0; + switch (t->type) { + case Tyarray: + a = alignto(1, t->sub[0]); + case Tytuple: + for (i = 0; i < t->nsub; i++) + a = max(alignto(1, t->sub[i]), a); + break; + case Tystruct: + for (i = 0; i < t->nmemb; i++) + a = max(alignto(1, decltype(t->sdecls[i])), a); + break; + default: + a = tysize(t); + break; + } + + return align(sz, min(a, Ptrsz)); +} + +static Type *base(Type *t) +{ + assert(t->nsub == 1); + return t->sub[0]; +} + +static Node *add(Node *a, Node *b) +{ + Node *n; + + assert(size(a) == size(b)); + n = mkexpr(a->line, Oadd, a, b, NULL); + n->expr.type = a->expr.type; + return n; +} + +static Node *addk(Node *n, uvlong v) +{ + Node *k; + + k = mkintlit(n->line, v); + k->expr.type = exprtype(n); + return add(n, k); +} + +static Node *sub(Node *a, Node *b) +{ + Node *n; + + n = mkexpr(a->line, Osub, a, b, NULL); + n->expr.type = a->expr.type; + return n; +} + +static Node *subk(Node *n, uvlong v) +{ + Node *k; + + k = mkintlit(n->line, v); + k->expr.type = exprtype(n); + return sub(n, k); +} + +static Node *mul(Node *a, Node *b) +{ + Node *n; + + n = mkexpr(a->line, Omul, a, b, NULL); + n->expr.type = a->expr.type; + return n; +} + +static int addressable(Simp *s, Node *a) +{ + if (a->type == Ndecl || (a->type == Nexpr && exprop(a) == Ovar)) + return hthas(s->stkoff, a) || hthas(s->globls, a); + else + return stacknode(a); +} + +int stacktype(Type *t) +{ + /* the types are arranged in types.def such that this is true */ + t = tybase(t); + return t->type >= Tyslice; +} + +int floattype(Type *t) +{ + t = tybase(t); + return t->type == Tyfloat32 || t->type == Tyfloat64; +} + +int stacknode(Node *n) +{ + if (n->type == Nexpr) + return stacktype(n->expr.type); + else + return stacktype(n->decl.type); +} + +int floatnode(Node *n) +{ + if (n->type == Nexpr) + return floattype(n->expr.type); + else + return floattype(n->decl.type); +} + +static void forcelocal(Simp *s, Node *n) +{ + assert(n->type == Ndecl || (n->type == Nexpr && exprop(n) == Ovar)); + s->stksz += size(n); + s->stksz = align(s->stksz, min(size(n), Ptrsz)); + if (debugopt['i']) { + dump(n, stdout); + printf("declared at %zd, size = %zd\n", s->stksz, size(n)); + } + htput(s->stkoff, n, itop(s->stksz)); +} + +static void declarelocal(Simp *s, Node *n) +{ + if (stacknode(n)) + forcelocal(s, n); +} + +/* takes the address of a node, possibly converting it to + * a pointer to the base type 'bt' */ +static Node *addr(Simp *s, Node *a, Type *bt) +{ + Node *n; + + n = mkexpr(a->line, Oaddr, a, NULL); + if (!addressable(s, a)) + forcelocal(s, a); + if (!bt) + n->expr.type = mktyptr(a->line, a->expr.type); + else + n->expr.type = mktyptr(a->line, bt); + return n; +} + +static Node *load(Node *a) +{ + Node *n; + + assert(a->expr.type->type == Typtr); + n = mkexpr(a->line, Oderef, a, NULL); + n->expr.type = base(a->expr.type); + return n; +} + +static Node *deref(Node *a) +{ + Node *n; + + assert(a->expr.type->type == Typtr); + n = mkexpr(a->line, Oderef, a, NULL); + n->expr.type = base(a->expr.type); + return n; +} + +static Node *set(Node *a, Node *b) +{ + Node *n; + + assert(a != NULL && b != NULL); + assert(exprop(a) == Ovar || exprop(a) == Oderef); + n = mkexpr(a->line, Oset, a, b, NULL); + n->expr.type = exprtype(a); + return n; +} + +static Node *disp(int line, uint v) +{ + Node *n; + + n = mkintlit(line, v); + n->expr.type = tyintptr; + return n; +} + +static Node *word(int line, uint v) +{ + Node *n; + + n = mkintlit(line, v); + n->expr.type = tyword; + return n; +} + +static void append(Simp *s, Node *n) +{ + lappend(&s->stmts, &s->nstmts, n); +} + +static int ispure(Node *n) +{ + return ispureop[exprop(n)]; +} + +static int isconstfn(Node *s) +{ + return s->decl.isconst && decltype(s)->type == Tyfunc; +} + +/* For x86, the assembly names are generated as follows: + * local symbols: .name + * un-namespaced symbols: <symprefix>name + * namespaced symbols: <symprefix>namespace$name + */ +static char *asmname(Node *n) +{ + char *s; + int len; + + len = strlen(Symprefix); + if (n->name.ns) + len += strlen(n->name.ns) + 1; /* +1 for separator */ + len += strlen(n->name.name) + 1; + + s = xalloc(len + 1); + s[0] = '\0'; + if (n->name.ns) + snprintf(s, len, "%s%s$%s", Symprefix, n->name.ns, n->name.name); + else if (n->name.name[0] == '.') + snprintf(s, len, "%s", n->name.name); + else + snprintf(s, len, "%s%s", Symprefix, n->name.name); + return s; +} + +size_t tysize(Type *t) +{ + size_t sz; + size_t i; + + sz = 0; + if (!t) + die("size of empty type => bailing."); + switch (t->type) { + case Tyvoid: + die("void has no size"); + return 1; + case Tybool: case Tyint8: + case Tybyte: case Tyuint8: + return 1; + case Tyint16: case Tyuint16: + return 2; + case Tyint: case Tyint32: + case Tyuint: case Tyuint32: + case Tychar: /* utf32 */ + return 4; + + case Typtr: case Tyfunc: + case Tyvalist: /* ptr to first element of valist */ + return Ptrsz; + + case Tyint64: case Tylong: + case Tyuint64: case Tyulong: + return 8; + + /*end integer types*/ + case Tyfloat32: + return 4; + case Tyfloat64: + return 8; + + case Tyslice: + return 2*Ptrsz; /* len; ptr */ + case Tyname: + return tysize(t->sub[0]); + case Tyarray: + t->asize = fold(t->asize, 1); + assert(exprop(t->asize) == Olit); + return t->asize->expr.args[0]->lit.intval * tysize(t->sub[0]); + case Tytuple: + for (i = 0; i < t->nsub; i++) { + sz = alignto(sz, t->sub[i]); + sz += tysize(t->sub[i]); + } + sz = alignto(sz, t); + return sz; + break; + case Tystruct: + for (i = 0; i < t->nmemb; i++) { + sz = alignto(sz, decltype(t->sdecls[i])); + sz += size(t->sdecls[i]); + } + sz = alignto(sz, t); + return sz; + break; + case Tyunion: + sz = Wordsz; + for (i = 0; i < t->nmemb; i++) + if (t->udecls[i]->etype) + sz = max(sz, tysize(t->udecls[i]->etype) + Wordsz); + return align(sz, Ptrsz); + break; + case Tybad: case Tyvar: case Typaram: case Tyunres: case Ntypes: + die("Type %s does not have size; why did it get down to here?", tystr(t)); + break; + } + return -1; +} + +size_t size(Node *n) +{ + Type *t; + + if (n->type == Nexpr) + t = n->expr.type; + else + t = n->decl.type; + return tysize(t); +} + +static Node *gentemp(Simp *simp, Node *e, Type *ty, Node **dcl) +{ + char buf[128]; + static int nexttmp; + Node *t, *r, *n; + + snprintf(buf, 128, ".t%d", nexttmp++); + n = mkname(e->line, buf); + t = mkdecl(e->line, n, ty); + r = mkexpr(e->line, Ovar, n, NULL); + r->expr.type = t->decl.type; + r->expr.did = t->decl.did; + if (dcl) + *dcl = t; + return r; +} + +static Node *temp(Simp *simp, Node *e) +{ + Node *t, *dcl; + + assert(e->type == Nexpr); + t = gentemp(simp, e, e->expr.type, &dcl); + if (stacknode(e)) + declarelocal(simp, dcl); + return t; +} + +static void jmp(Simp *s, Node *lbl) +{ + append(s, mkexpr(lbl->line, Ojmp, lbl, NULL)); +} + +static void cjmp(Simp *s, Node *cond, Node *iftrue, Node *iffalse) +{ + Node *jmp; + + jmp = mkexpr(cond->line, Ocjmp, cond, iftrue, iffalse, NULL); + append(s, jmp); +} + +static Node *slicelen(Simp *s, Node *sl) +{ + /* *(&sl + sizeof(size_t)) */ + return load(addk(addr(s, sl, tyintptr), Ptrsz)); +} + + +static Node *seqlen(Simp *s, Node *n, Type *ty) +{ + Node *t, *r; + + if (exprtype(n)->type == Tyslice) { + t = slicelen(s, n); + r = simpcast(s, t, ty); + } else if (exprtype(n)->type == Tyarray) { + t = exprtype(n)->asize; + r = simpcast(s, t, ty); + } else { + r = NULL; + } + return r; +} + +/* if foo; bar; else baz;; + * => cjmp (foo) :bar :baz */ +static void simpif(Simp *s, Node *n, Node *exit) +{ + Node *l1, *l2, *l3; + Node *iftrue, *iffalse; + + l1 = genlbl(); + l2 = genlbl(); + if (exit) + l3 = exit; + else + l3 = genlbl(); + + iftrue = n->ifstmt.iftrue; + iffalse = n->ifstmt.iffalse; + + simpcond(s, n->ifstmt.cond, l1, l2); + simp(s, l1); + simp(s, iftrue); + jmp(s, l3); + simp(s, l2); + /* because lots of bunched up end labels are ugly, + * coalesce them by handling 'elif'-like constructs + * separately */ + if (iffalse && iffalse->type == Nifstmt) { + simpif(s, iffalse, exit); + } else { + simp(s, iffalse); + jmp(s, l3); + } + + if (!exit) + simp(s, l3); +} + +/* init; while cond; body;; + * => init + * jmp :cond + * :body + * ...body... + * ...step... + * :cond + * ...cond... + * cjmp (cond) :body :end + * :end + */ +static void simploop(Simp *s, Node *n) +{ + Node *lbody; + Node *lend; + Node *lcond; + Node *lstep; + + lbody = genlbl(); + lcond = genlbl(); + lstep = genlbl(); + lend = genlbl(); + + lappend(&s->loopstep, &s->nloopstep, lstep); + lappend(&s->loopexit, &s->nloopexit, lend); + + simp(s, n->loopstmt.init); /* init */ + jmp(s, lcond); /* goto test */ + simp(s, lbody); /* body lbl */ + simp(s, n->loopstmt.body); /* body */ + simp(s, lstep); /* test lbl */ + simp(s, n->loopstmt.step); /* step */ + simp(s, lcond); /* test lbl */ + simpcond(s, n->loopstmt.cond, lbody, lend); /* repeat? */ + simp(s, lend); /* exit */ + + s->nloopstep--; + s->nloopexit--; +} + +/* pat; seq; + * body;; + * + * => + * .pseudo = seqinit + * jmp :cond + * :body + * ...body... + * :step + * ...step... + * :cond + * ...cond... + * cjmp (cond) :match :end + * :match + * ...match... + * cjmp (match) :body :step + * :end + */ +static void simpiter(Simp *s, Node *n) +{ + Node *lbody, *lstep, *lcond, *lmatch, *lend; + Node *idx, *len, *dcl, *seq, *val, *done; + Node *zero; + + lbody = genlbl(); + lstep = genlbl(); + lcond = genlbl(); + lmatch = genlbl(); + lend = genlbl(); + + lappend(&s->loopstep, &s->nloopstep, lstep); + lappend(&s->loopexit, &s->nloopexit, lend); + + zero = mkintlit(n->line, 0); + zero->expr.type = tyintptr; + + seq = rval(s, n->iterstmt.seq, NULL); + idx = gentemp(s, n, tyintptr, &dcl); + declarelocal(s, dcl); + + /* setup */ + append(s, assign(s, idx, zero)); + jmp(s, lcond); + simp(s, lbody); + /* body */ + simp(s, n->iterstmt.body); + /* step */ + simp(s, lstep); + simp(s, assign(s, idx, addk(idx, 1))); + /* condition */ + simp(s, lcond); + len = seqlen(s, seq, tyintptr); + done = mkexpr(n->line, Olt, idx, len, NULL); + cjmp(s, done, lmatch, lend); + simp(s, lmatch); + val = load(idxaddr(s, seq, idx)); + umatch(s, n->iterstmt.elt, val, val->expr.type, lbody, lstep); + simp(s, lend); + + s->nloopstep--; + s->nloopexit--; +} + +static Ucon *finducon(Node *n) +{ + size_t i; + Type *t; + Ucon *uc; + + t = tybase(n->expr.type); + if (exprop(n) != Oucon) + return NULL; + for (i = 0; i < t->nmemb; i++) { + uc = t->udecls[i]; + if (!strcmp(namestr(uc->name), namestr(n->expr.args[0]))) + return uc; + } + die("No ucon?!?"); + return NULL; +} + +static Node *uconid(Simp *s, Node *n) +{ + Ucon *uc; + + if (exprop(n) != Oucon) + return load(addr(s, n, mktype(n->line, Tyuint))); + + uc = finducon(n); + return word(uc->line, uc->id); +} + +static Node *patval(Simp *s, Node *n, Type *t) +{ + if (exprop(n) == Oucon) + return n->expr.args[1]; + else if (exprop(n) == Olit) + return n; + else + return load(addk(addr(s, n, t), Wordsz)); +} + +static void umatch(Simp *s, Node *pat, Node *val, Type *t, Node *iftrue, Node *iffalse) +{ + Node *v, *x, *y; + Node *deeper, *next; + Node **patarg, *lit, *idx; + char *str; + size_t len; + Ucon *uc; + size_t i; + size_t off; + + assert(pat->type == Nexpr); + t = tybase(t); + if (exprop(pat) == Ovar && !decls[pat->expr.did]->decl.isconst) { + v = assign(s, pat, val); + append(s, v); + jmp(s, iftrue); + return; + } + switch (t->type) { + /* Never supported */ + case Tyvoid: case Tybad: case Tyvalist: case Tyvar: + case Typaram: case Tyunres: case Tyname: case Ntypes: + die("Unsupported type for pattern"); + break; + /* only valid for string literals */ + case Tyslice: + lit = pat->expr.args[0]; + if (exprop(pat) != Olit || lit->lit.littype != Lstr) + die("Unsupported pattern"); + str = lit->lit.strval; + + /* load slice length */ + next = genlbl(); + x = slicelen(s, val); + len = strlen(str); + y = mkintlit(lit->line, len); + y->expr.type = tyintptr; + v = mkexpr(pat->line, Oeq, x, y, NULL); + cjmp(s, v, next, iffalse); + append(s, next); + + for (i = 0; i < len; i++) { + next = genlbl(); + x = mkintlit(pat->line, str[i]); + x->expr.type = mktype(-1, Tybyte); + idx = mkintlit(pat->line, i); + idx->expr.type = tyintptr; + y = load(idxaddr(s, val, idx)); + v = mkexpr(pat->line, Oeq, x, y, NULL); + v->expr.type = mktype(pat->line, Tybool); + cjmp(s, v, next, iffalse); + append(s, next); + } + jmp(s, iftrue); + break; + case Tybool: case Tychar: case Tybyte: + case Tyint8: case Tyint16: case Tyint32: case Tyint: + case Tyuint8: case Tyuint16: case Tyuint32: case Tyuint: + case Tyint64: case Tyuint64: case Tylong: case Tyulong: + case Tyfloat32: case Tyfloat64: + case Typtr: case Tyfunc: + v = mkexpr(pat->line, Oeq, pat, val, NULL); + v->expr.type = mktype(pat->line, Tybool); + cjmp(s, v, iftrue, iffalse); + break; + /* We got lucky. The structure of tuple, array, and struct literals + * is the same, so long as we don't inspect the type, so we can + * share the code*/ + case Tystruct: case Tytuple: case Tyarray: + patarg = pat->expr.args; + off = 0; + for (i = 0; i < pat->expr.nargs; i++) { + off = alignto(off, exprtype(patarg[i])); + next = genlbl(); + v = load(addk(addr(s, val, exprtype(patarg[i])), off)); + umatch(s, patarg[i], v, exprtype(patarg[i]), next, iffalse); + append(s, next); + off += size(patarg[i]); + } + jmp(s, iftrue); + break; + case Tyunion: + uc = finducon(pat); + if (!uc) + uc = finducon(val); + + deeper = genlbl(); + + x = uconid(s, pat); + y = uconid(s, val); + v = mkexpr(pat->line, Oeq, x, y, NULL); + v->expr.type = tyintptr; + cjmp(s, v, deeper, iffalse); + append(s, deeper); + if (uc->etype) { + pat = patval(s, pat, uc->etype); + val = patval(s, val, uc->etype); + umatch(s, pat, val, uc->etype, iftrue, iffalse); + } + break; + } +} + +static void simpmatch(Simp *s, Node *n) +{ + Node *end, *cur, *next; /* labels */ + Node *val, *tmp; + Node *m; + size_t i; + + end = genlbl(); + val = temp(s, n->matchstmt.val); + tmp = rval(s, n->matchstmt.val, val); + if (val != tmp) + append(s, assign(s, val, tmp)); + for (i = 0; i < n->matchstmt.nmatches; i++) { + m = n->matchstmt.matches[i]; + + /* check pattern */ + cur = genlbl(); + next = genlbl(); + umatch(s, m->match.pat, val, val->expr.type, cur, next); + + /* do the action if it matches */ + append(s, cur); + simp(s, m->match.block); + jmp(s, end); + append(s, next); + } + append(s, end); +} + +static void simpblk(Simp *s, Node *n) +{ + size_t i; + + pushstab(n->block.scope); + for (i = 0; i < n->block.nstmts; i++) { + n->block.stmts[i] = fold(n->block.stmts[i], 0); + simp(s, n->block.stmts[i]); + } + popstab(); +} + +static Node *simpblob(Simp *s, Node *blob, Node ***l, size_t *nl) +{ + Node *n, *d, *r; + char lbl[128]; + + n = mkname(blob->line, genlblstr(lbl, 128)); + d = mkdecl(blob->line, n, blob->expr.type); + r = mkexpr(blob->line, Ovar, n, NULL); + + d->decl.init = blob; + d->decl.type = blob->expr.type; + d->decl.isconst = 1; + htput(s->globls, d, strdup(lbl)); + + r->expr.did = d->decl.did; + r->expr.type = blob->expr.type; + r->expr.isconst = 1; + + lappend(l, nl, d); + return r; +} + +/* gets the byte offset of 'memb' within the aggregate type 'aggr' */ +static size_t offset(Node *aggr, Node *memb) +{ + Type *ty; + size_t i; + size_t off; + + ty = tybase(exprtype(aggr)); + if (ty->type == Typtr) + ty = tybase(ty->sub[0]); + + assert(ty->type == Tystruct); + off = 0; + for (i = 0; i < ty->nmemb; i++) { + off = alignto(off, decltype(ty->sdecls[i])); + if (!strcmp(namestr(memb), declname(ty->sdecls[i]))) + return off; + off += size(ty->sdecls[i]); + } + die("Could not find member %s in struct", namestr(memb)); + return -1; +} + +static Node *ptrsized(Simp *s, Node *v) +{ + if (size(v) == Ptrsz) + return v; + else if (size(v) < Ptrsz) + v = mkexpr(v->line, Ozwiden, v, NULL); + else if (size(v) > Ptrsz) + v = mkexpr(v->line, Otrunc, v, NULL); + v->expr.type = tyintptr; + return v; +} + +static Node *membaddr(Simp *s, Node *n) +{ + Node *t, *u, *r; + Node **args; + Type *ty; + + args = n->expr.args; + ty = tybase(exprtype(args[0])); + if (ty->type == Typtr) { + t = lval(s, args[0]); + } else { + t = addr(s, lval(s, args[0]), exprtype(n)); + } + u = disp(n->line, offset(args[0], args[1])); + r = add(t, u); + r->expr.type = mktyptr(n->line, n->expr.type); + return r; +} + +static Node *idxaddr(Simp *s, Node *seq, Node *idx) +{ + Node *a, *t, *u, *v; /* temps */ + Node *r; /* result */ + Type *ty; + size_t sz; + + a = rval(s, seq, NULL); + ty = exprtype(seq)->sub[0]; + if (exprtype(seq)->type == Tyarray) + t = addr(s, a, ty); + else if (seq->expr.type->type == Tyslice) + t = load(addr(s, a, mktyptr(seq->line, ty))); + else + die("Can't index type %s\n", tystr(seq->expr.type)); + assert(t->expr.type->type == Typtr); + u = rval(s, idx, NULL); + u = ptrsized(s, u); + sz = tysize(ty); + v = mul(u, disp(seq->line, sz)); + r = add(t, v); + return r; +} + +static Node *slicebase(Simp *s, Node *n, Node *off) +{ + Node *t, *u, *v; + Type *ty; + int sz; + + t = rval(s, n, NULL); + u = NULL; + ty = tybase(exprtype(n)); + switch (ty->type) { + case Typtr: u = t; break; + case Tyarray: u = addr(s, t, base(exprtype(n))); break; + case Tyslice: u = load(addr(s, t, mktyptr(n->line, base(exprtype(n))))); break; + default: die("Unslicable type %s", tystr(n->expr.type)); + } + /* safe: all types we allow here have a sub[0] that we want to grab */ + if (off) { + off = ptrsized(s, rval(s, off, NULL)); + sz = tysize(n->expr.type->sub[0]); + v = mul(off, disp(n->line, sz)); + return add(u, v); + } else { + return u; + } +} + +static Node *lval(Simp *s, Node *n) +{ + Node *r; + + switch (exprop(n)) { + case Ovar: r = n; break; + case Oidx: r = deref(idxaddr(s, n->expr.args[0], n->expr.args[1])); break; + case Oderef: r = deref(rval(s, n->expr.args[0], NULL)); break; + case Omemb: r = deref(membaddr(s, n)); break; + default: + die("%s cannot be an lval", opstr(exprop(n))); + break; + } + return r; +} + +static void simpcond(Simp *s, Node *n, Node *ltrue, Node *lfalse) +{ + Node **args; + Node *v, *lnext; + + args = n->expr.args; + switch (exprop(n)) { + case Oland: + lnext = genlbl(); + simpcond(s, args[0], lnext, lfalse); + append(s, lnext); + simpcond(s, args[1], ltrue, lfalse); + break; + case Olor: + lnext = genlbl(); + simpcond(s, args[0], ltrue, lnext); + append(s, lnext); + simpcond(s, args[1], ltrue, lfalse); + break; + case Olnot: + simpcond(s, args[0], lfalse, ltrue); + break; + default: + v = rval(s, n, NULL); + cjmp(s, v, ltrue, lfalse); + break; + } +} + +static Node *intconvert(Simp *s, Node *from, Type *to, int issigned) +{ + Node *r; + size_t fromsz, tosz; + + fromsz = size(from); + tosz = tysize(to); + r = rval(s, from, NULL); + if (fromsz > tosz) { + r = mkexpr(from->line, Otrunc, r, NULL); + } else if (tosz > fromsz) { + if (issigned) + r = mkexpr(from->line, Oswiden, r, NULL); + else + r = mkexpr(from->line, Ozwiden, r, NULL); + } + r->expr.type = to; + return r; +} + +static Node *simpcast(Simp *s, Node *val, Type *to) +{ + Node *r; + Type *t; + + r = NULL; + /* do the type conversion */ + switch (tybase(to)->type) { + case Tybool: + case Tyint8: case Tyint16: case Tyint32: case Tyint64: + case Tyuint8: case Tyuint16: case Tyuint32: case Tyuint64: + case Tyint: case Tyuint: case Tylong: case Tyulong: + case Tychar: case Tybyte: + case Typtr: + t = tybase(exprtype(val)); + switch (t->type) { + /* ptr -> slice conversion is disallowed */ + case Tyslice: + if (t->type == Typtr) + fatal(val->line, "Bad cast from %s to %s", + tystr(exprtype(val)), tystr(to)); + r = slicebase(s, val, NULL); + break; + /* signed conversions */ + case Tyint8: case Tyint16: case Tyint32: case Tyint64: + case Tyint: case Tylong: + r = intconvert(s, val, to, 1); + break; + /* unsigned conversions */ + case Tybool: + case Tyuint8: case Tyuint16: case Tyuint32: case Tyuint64: + case Tyuint: case Tyulong: case Tychar: case Tybyte: + case Typtr: + r = intconvert(s, val, to, 0); + break; + case Tyfloat32: case Tyfloat64: + if (tybase(to)->type == Typtr) + fatal(val->line, "Bad cast from %s to %s", + tystr(exprtype(val)), tystr(to)); + r = mkexpr(val->line, Oflt2int, rval(s, val, NULL), NULL); + r->expr.type = to; + break; + default: + fatal(val->line, "Bad cast from %s to %s", + tystr(exprtype(val)), tystr(to)); + } + break; + case Tyfloat32: case Tyfloat64: + t = tybase(exprtype(val)); + switch (t->type) { + case Tyint8: case Tyint16: case Tyint32: case Tyint64: + case Tyuint8: case Tyuint16: case Tyuint32: case Tyuint64: + case Tyint: case Tyuint: case Tylong: case Tyulong: + case Tychar: case Tybyte: + r = mkexpr(val->line, Oflt2int, rval(s, val, NULL), NULL); + r->expr.type = to; + break; + default: + fatal(val->line, "Bad cast from %s to %s", + tystr(exprtype(val)), tystr(to)); + break; + } + break; + /* no other destination types are handled as things stand */ + default: + fatal(val->line, "Bad cast from %s to %s", + tystr(exprtype(val)), tystr(to)); + } + return r; +} + +/* Simplifies taking a slice of an array, pointer, + * or other slice down to primitive pointer operations */ +static Node *simpslice(Simp *s, Node *n, Node *dst) +{ + Node *t; + Node *start, *end; + Node *base, *sz, *len; + Node *stbase, *stlen; + + if (dst) + t = dst; + else + t = temp(s, n); + /* *(&slice) = (void*)base + off*sz */ + base = slicebase(s, n->expr.args[0], n->expr.args[1]); + start = ptrsized(s, rval(s, n->expr.args[1], NULL)); + end = ptrsized(s, rval(s, n->expr.args[2], NULL)); + len = sub(end, start); + /* we can be storing through a pointer, in the case + * of '*foo = bar'. */ + if (tybase(exprtype(t))->type == Typtr) { + stbase = set(simpcast(s, t, mktyptr(t->line, tyintptr)), base); + sz = addk(simpcast(s, t, mktyptr(t->line, tyintptr)), Ptrsz); + } else { + stbase = set(deref(addr(s, t, tyintptr)), base); + sz = addk(addr(s, t, tyintptr), Ptrsz); + } + /* *(&slice + ptrsz) = len */ + stlen = set(deref(sz), len); + append(s, stbase); + append(s, stlen); + return t; +} + +static Node *visit(Simp *s, Node *n) +{ + size_t i; + Node *r; + + for (i = 0; i < n->expr.nargs; i++) + n->expr.args[i] = rval(s, n->expr.args[i], NULL); + if (ispure(n)) { + r = n; + } else { + if (exprtype(n)->type == Tyvoid) { + r = NULL; + append(s, n); + } else { + r = temp(s, n); + append(s, set(r, n)); + } + } + return r; +} + +/* Takes a tuple and binds the i'th element of it to the + * i'th name on the rhs of the assignment. */ +static Node *destructure(Simp *s, Node *lhs, Node *rhs) +{ + Node *plv, *prv, *lv, *sz, *stor, **args; + size_t off, i; + + args = lhs->expr.args; + rhs = rval(s, rhs, NULL); + off = 0; + for (i = 0; i < lhs->expr.nargs; i++) { + lv = lval(s, args[i]); + off = alignto(off, exprtype(lv)); + prv = add(addr(s, rhs, exprtype(args[i])), disp(rhs->line, off)); + if (stacknode(args[i])) { + sz = disp(lhs->line, size(lv)); + plv = addr(s, lv, exprtype(lv)); + stor = mkexpr(lhs->line, Oblit, plv, prv, sz, NULL); + } else { + stor = set(lv, load(prv)); + } + append(s, stor); + off += size(lv); + } + + return NULL; +} + +static Node *assign(Simp *s, Node *lhs, Node *rhs) +{ + Node *t, *u, *v, *r; + + if (exprop(lhs) == Otup) { + r = destructure(s, lhs, rhs); + } else { + t = lval(s, lhs); + u = rval(s, rhs, t); + + /* if we stored the result into t, rval() should return that, + * so we know our work is done. */ + if (u == t) { + r = t; + } else if (stacknode(lhs)) { + t = addr(s, t, exprtype(lhs)); + u = addr(s, u, exprtype(lhs)); + v = disp(lhs->line, size(lhs)); + r = mkexpr(lhs->line, Oblit, t, u, v, NULL); + } else { + r = set(t, u); + } + } + return r; +} + +static Node *assignat(Simp *s, Node *r, size_t off, Node *val) +{ + Node *pval, *pdst; + Node *sz; + Node *st; + + val = rval(s, val, NULL); + pdst = add(r, disp(val->line, off)); + + if (stacknode(val)) { + sz = disp(val->line, size(val)); + pval = addr(s, val, exprtype(val)); + st = mkexpr(val->line, Oblit, pdst, pval, sz, NULL); + } else { + st = set(deref(pdst), val); + } + append(s, st); + return r; +} + +/* Simplify tuple construction to a stack allocated + * value by evaluating the rvalue of each node on the + * rhs and assigning it to the correct offset from the + * head of the tuple. */ +static Node *simptup(Simp *s, Node *n, Node *dst) +{ + Node **args; + Node *r; + size_t i, off; + + args = n->expr.args; + if (!dst) + dst = temp(s, n); + r = addr(s, dst, exprtype(dst)); + + off = 0; + for (i = 0; i < n->expr.nargs; i++) { + off = alignto(off, exprtype(args[i])); + assignat(s, r, off, args[i]); + off += size(args[i]); + } + return dst; +} + +static Node *simpucon(Simp *s, Node *n, Node *dst) +{ + Node *tmp, *u, *tag, *elt, *sz; + Node *r; + Type *ty; + Ucon *uc; + size_t i; + + /* find the ucon we're constructing here */ + ty = tybase(n->expr.type); + uc = NULL; + for (i = 0; i < ty->nmemb; i++) { + if (!strcmp(namestr(n->expr.args[0]), namestr(ty->udecls[i]->name))) { + uc = ty->udecls[i]; + break; + } + } + if (!uc) + die("Couldn't find union constructor"); + + if (dst) + tmp = dst; + else + tmp = temp(s, n); + + /* Set the tag on the ucon */ + u = addr(s, tmp, mktype(n->line, Tyuint)); + tag = mkintlit(n->line, uc->id); + tag->expr.type = mktype(n->line, Tyuint); + append(s, set(deref(u), tag)); + + + /* fill the value, if needed */ + if (!uc->etype) + return tmp; + elt = rval(s, n->expr.args[1], NULL); + u = addk(u, Wordsz); + if (stacktype(uc->etype)) { + elt = addr(s, elt, uc->etype); + sz = disp(n->line, tysize(uc->etype)); + r = mkexpr(n->line, Oblit, u, elt, sz, NULL); + } else { + r = set(deref(u), elt); + } + append(s, r); + return tmp; +} + +static Node *simpuget(Simp *s, Node *n, Node *dst) +{ + die("No uget simplification yet"); +} + +/* simplifies + * a || b + * to + * if a || b + * t = true + * else + * t = false + * ;; + */ +static Node *simplazy(Simp *s, Node *n) +{ + Node *r, *t, *u; + Node *ltrue, *lfalse, *ldone; + + /* set up temps and labels */ + r = temp(s, n); + ltrue = genlbl(); + lfalse = genlbl(); + ldone = genlbl(); + + /* simp the conditional */ + simpcond(s, n, ltrue, lfalse); + + /* if true */ + append(s, ltrue); + u = mkexpr(n->line, Olit, mkbool(n->line, 1), NULL); + u->expr.type = mktype(n->line, Tybool); + t = set(r, u); + append(s, t); + jmp(s, ldone); + + /* if false */ + append(s, lfalse); + u = mkexpr(n->line, Olit, mkbool(n->line, 0), NULL); + u->expr.type = mktype(n->line, Tybool); + t = set(r, u); + append(s, t); + jmp(s, ldone); + + /* finish */ + append(s, ldone); + return r; +} + +static Node *rval(Simp *s, Node *n, Node *dst) +{ + Node *r; /* expression result */ + Node *t, *u, *v; /* temporary nodes */ + Node **args; + size_t i; + const Op fusedmap[Numops] = { + [Oaddeq] = Oadd, + [Osubeq] = Osub, + [Omuleq] = Omul, + [Odiveq] = Odiv, + [Omodeq] = Omod, + [Oboreq] = Obor, + [Obandeq] = Oband, + [Obxoreq] = Obxor, + [Obsleq] = Obsl, + [Obsreq] = Obsr, + }; + const Op cmpmap[Numops][3] = { + [Oeq] = {Oeq, Ofeq, Oueq}, + [One] = {One, Ofne, Oune}, + [Ogt] = {Ogt, Ofgt, Ougt}, + [Oge] = {Oge, Ofge, Ouge}, + [Olt] = {Olt, Oflt, Oult}, + [Ole] = {Ole, Ofle, Oule} + }; + + r = NULL; + args = n->expr.args; + switch (exprop(n)) { + case Olor: case Oland: + r = simplazy(s, n); + break; + case Osize: + r = mkintlit(n->line, size(args[0])); + r->expr.type = exprtype(n); + break; + case Oslice: + r = simpslice(s, n, dst); + break; + case Oidx: + t = idxaddr(s, n->expr.args[0], n->expr.args[1]); + r = load(t); + break; + /* array.len slice.len are magic 'virtual' members. + * they need to be special cased. */ + case Omemb: + if (exprtype(args[0])->type == Tyslice || exprtype(args[0])->type == Tyarray) { + r = seqlen(s, args[0], exprtype(n)); + } else { + t = membaddr(s, n); + r = load(t); + } + break; + case Oucon: + r = simpucon(s, n, dst); + break; + case Ouget: + r = simpuget(s, n, dst); + break; + case Otup: + r = simptup(s, n, dst); + break; + case Oarr: + if (!dst) + dst = temp(s, n); + t = addr(s, dst, exprtype(dst)); + for (i = 0; i < n->expr.nargs; i++) + assignat(s, t, size(n->expr.args[i])*i, n->expr.args[i]); + r = dst; + break; + case Ostruct: + if (!dst) + dst = temp(s, n); + t = addr(s, dst, exprtype(dst)); + for (i = 0; i < n->expr.nargs; i++) + assignat(s, t, offset(n, n->expr.args[i]->expr.idx), n->expr.args[i]); + r = dst; + break; + case Ocast: + r = simpcast(s, args[0], exprtype(n)); + break; + + /* fused ops: + * foo ?= blah + * => + * foo = foo ? blah*/ + case Oaddeq: case Osubeq: case Omuleq: case Odiveq: case Omodeq: + case Oboreq: case Obandeq: case Obxoreq: case Obsleq: case Obsreq: + assert(fusedmap[exprop(n)] != Obad); + u = rval(s, args[0], NULL); + v = rval(s, args[1], NULL); + v = mkexpr(n->line, fusedmap[exprop(n)], u, v, NULL); + v->expr.type = u->expr.type; + r = set(u, v); + break; + + /* ++expr(x) + * => args[0] = args[0] + 1 + * expr(x) */ + case Opreinc: + v = assign(s, args[0], addk(args[0], 1)); + append(s, v); + r = rval(s, args[0], NULL); + break; + case Opredec: + v = assign(s, args[0], subk(args[0], 1)); + append(s, v); + r = rval(s, args[0], NULL); + break; + + /* expr(x++) + * => expr + * x = x + 1 + */ + case Opostinc: + r = lval(s, args[0]); + t = assign(s, r, addk(r, 1)); + lappend(&s->incqueue, &s->nqueue, t); + break; + case Opostdec: + r = lval(s, args[0]); + t = assign(s, r, subk(r, 1)); + lappend(&s->incqueue, &s->nqueue, t); + break; + case Olit: + switch (args[0]->lit.littype) { + case Lchr: case Lbool: case Llbl: + r = n; + break; + case Lint: + /* we can only have up to 4 byte immediates, but they + * can be moved into 64 bit regs */ + if (args[0]->lit.intval < 0xffffffffULL) + r = n; + else + r = simpblob(s, n, &s->blobs, &s->nblobs); + break; + case Lstr: case Lflt: + r = simpblob(s, n, &s->blobs, &s->nblobs); + break; + case Lfunc: + r = simpblob(s, n, &file->file.stmts, &file->file.nstmts); + break; + } + break; + case Ovar: + r = n; + break; + case Oret: + if (s->isbigret) { + t = rval(s, args[0], NULL); + t = addr(s, t, exprtype(args[0])); + u = disp(n->line, size(args[0])); + v = mkexpr(n->line, Oblit, s->ret, t, u, NULL); + append(s, v); + } else if (n->expr.nargs && n->expr.args[0]) { + t = s->ret; + t = set(t, rval(s, args[0], NULL)); + append(s, t); + } + /* drain the increment queue before we return */ + for (i = 0; i < s->nqueue; i++) + append(s, s->incqueue[i]); + lfree(&s->incqueue, &s->nqueue); + jmp(s, s->endlbl); + break; + case Oasn: + r = assign(s, args[0], args[1]); + break; + case Ocall: + if (exprtype(n)->type != Tyvoid && stacktype(exprtype(n))) { + r = temp(s, n); + linsert(&n->expr.args, &n->expr.nargs, 1, addr(s, r, exprtype(n))); + for (i = 0; i < n->expr.nargs; i++) + n->expr.args[i] = rval(s, n->expr.args[i], NULL); + append(s, n); + } else { + r = visit(s, n); + } + break; + case Oaddr: + t = lval(s, args[0]); + if (exprop(t) == Ovar) /* Ovar is the only one that doesn't return Oderef(Oaddr(...)) */ + r = addr(s, t, exprtype(t)); + else + r = t->expr.args[0]; + break; + case Oneg: + if (istyfloat(exprtype(n))) { + t =mkfloat(n->line, -1.0); + u = mkexpr(n->line, Olit, t, NULL); + t->lit.type = n->expr.type; + u->expr.type = n->expr.type; + v = simpblob(s, u, &s->blobs, &s->nblobs); + r = mkexpr(n->line, Ofmul, v, rval(s, args[0], NULL), NULL); + r->expr.type = n->expr.type; + } else { + r = visit(s, n); + } + break; + case Obreak: + if (s->nloopexit == 0) + fatal(n->line, "trying to break when not in loop"); + jmp(s, s->loopexit[s->nloopexit - 1]); + break; + case Ocontinue: + if (s->nloopstep == 0) + fatal(n->line, "trying to continue when not in loop"); + jmp(s, s->loopstep[s->nloopstep - 1]); + break; + case Oeq: case One: case Ogt: case Oge: case Olt: case Ole: + if (istyfloat(tybase(exprtype(args[0])))) + i = 1; + else if (istysigned(tybase(exprtype(args[0])))) + i = 0; + else + i = 2; + n->expr.op = cmpmap[n->expr.op][i]; + r = visit(s, n); + break; + default: + if (istyfloat(exprtype(n))) { + switch (exprop(n)) { + case Oadd: n->expr.op = Ofadd; break; + case Osub: n->expr.op = Ofsub; break; + case Omul: n->expr.op = Ofmul; break; + case Odiv: n->expr.op = Ofdiv; break; + default: break; + } + } + r = visit(s, n); + break; + case Obad: + die("Bad operator"); + break; + } + return r; +} + +static void declarearg(Simp *s, Node *n) +{ + assert(n->type == Ndecl || (n->type == Nexpr && exprop(n) == Ovar)); + s->argsz = align(s->argsz, min(size(n), Ptrsz)); + htput(s->stkoff, n, itop(-(s->argsz + 2*Ptrsz))); + if (debugopt['i']) { + dump(n, stdout); + printf("declared at %zd\n", -(s->argsz + 2*Ptrsz)); + } + s->argsz += size(n); +} + +static int islbl(Node *n) +{ + Node *l; + if (exprop(n) != Olit) + return 0; + l = n->expr.args[0]; + return l->type == Nlit && l->lit.littype == Llbl; +} + +static Node *simp(Simp *s, Node *n) +{ + Node *r, *t, *u; + size_t i; + + if (!n) + return NULL; + r = NULL; + switch (n->type) { + case Nblock: simpblk(s, n); break; + case Nifstmt: simpif(s, n, NULL); break; + case Nloopstmt: simploop(s, n); break; + case Niterstmt: simpiter(s, n); break; + case Nmatchstmt: simpmatch(s, n); break; + case Nexpr: + if (islbl(n)) + append(s, n); + else + r = rval(s, n, NULL); + if (r) + append(s, r); + /* drain the increment queue for this expr */ + for (i = 0; i < s->nqueue; i++) + append(s, s->incqueue[i]); + lfree(&s->incqueue, &s->nqueue); + break; + + case Ndecl: + declarelocal(s, n); + if (!n->decl.init) + break; + t = mkexpr(n->line, Ovar, n->decl.name, NULL); + u = mkexpr(n->line, Oasn, t, n->decl.init, NULL); + u->expr.type = n->decl.type; + t->expr.type = n->decl.type; + t->expr.did = n->decl.did; + simp(s, u); + break; + default: + die("Bad node passsed to simp()"); + break; + } + return r; +} + +/* + * Turns a deeply nested function body into a flatter + * and simpler representation, which maps easily and + * directly to assembly instructions. + */ +static void flatten(Simp *s, Node *f) +{ + Node *dcl; + Type *ty; + size_t i; + + assert(f->type == Nfunc); + s->nstmts = 0; + s->stmts = NULL; + s->endlbl = genlbl(); + s->ret = NULL; + + /* make a temp for the return type */ + ty = f->func.type->sub[0]; + if (stacktype(ty)) { + s->isbigret = 1; + s->ret = gentemp(s, f, mktyptr(f->line, ty), &dcl); + declarearg(s, dcl); + } else if (ty->type != Tyvoid) { + s->isbigret = 0; + s->ret = gentemp(s, f, ty, &dcl); + } + + for (i = 0; i < f->func.nargs; i++) { + declarearg(s, f->func.args[i]); + } + simp(s, f->func.body); + + append(s, s->endlbl); +} + +static Func *simpfn(Simp *s, char *name, Node *n, Vis vis) +{ + size_t i; + Func *fn; + Cfg *cfg; + + if(debugopt['i'] || debugopt['F'] || debugopt['f']) + printf("\n\nfunction %s\n", name); + + /* set up the simp context */ + /* unwrap to the function body */ + n = n->expr.args[0]; + n = n->lit.fnval; + pushstab(n->func.scope); + flatten(s, n); + popstab(); + + if (debugopt['f'] || debugopt['F']) + for (i = 0; i < s->nstmts; i++) + dump(s->stmts[i], stdout); + for (i = 0; i < s->nstmts; i++) { + if (s->stmts[i]->type != Nexpr) + continue; + if (debugopt['f']) { + printf("FOLD FROM ----------\n"); + dump(s->stmts[i], stdout); + } + s->stmts[i] = fold(s->stmts[i], 0); + if (debugopt['f']) { + printf("TO ------------\n"); + dump(s->stmts[i], stdout); + printf("DONE ----------------\n"); + } + } + + cfg = mkcfg(s->stmts, s->nstmts); + if (debugopt['t'] || debugopt['s']) + dumpcfg(cfg, stdout); + + fn = zalloc(sizeof(Func)); + fn->name = strdup(name); + if (vis != Visintern) + fn->isexport = 1; + fn->stksz = align(s->stksz, 8); + fn->stkoff = s->stkoff; + fn->ret = s->ret; + fn->cfg = cfg; + return fn; +} + +static void fillglobls(Stab *st, Htab *globls) +{ + void **k; + size_t i, nk; + Stab *stab; + Node *s; + + k = htkeys(st->dcl, &nk); + for (i = 0; i < nk; i++) { + s = htget(st->dcl, k[i]); + htput(globls, s, asmname(s->decl.name)); + } + free(k); + + k = htkeys(st->ns, &nk); + for (i = 0; i < nk; i++) { + stab = htget(st->ns, k[i]); + fillglobls(stab, globls); + } + free(k); +} + +static void extractsub(Simp *s, Node ***blobs, size_t *nblobs, Node *e) +{ + size_t i; + + switch (exprop(e)) { + case Oslice: + if (exprop(e->expr.args[0]) == Oarr) + e->expr.args[0] = simpblob(s, e->expr.args[0], blobs, nblobs); + break; + case Oarr: + case Ostruct: + for (i = 0; i < e->expr.nargs; i++) + extractsub(s, blobs, nblobs, e->expr.args[i]); + break; + default: + break; + } +} + +static void simpconstinit(Simp *s, Node *dcl) +{ + Node *e; + + dcl->decl.init = fold(dcl->decl.init, 1);; + e = dcl->decl.init; + if (e && exprop(e) == Olit) { + if (e->expr.args[0]->lit.littype == Lfunc) + simpblob(s, e, &file->file.stmts, &file->file.nstmts); + else + lappend(&s->blobs, &s->nblobs, dcl); + } else if (dcl->decl.isconst) { + switch (exprop(e)) { + case Oarr: + case Ostruct: + case Oslice: + extractsub(s, &s->blobs, &s->nblobs, e); + lappend(&s->blobs, &s->nblobs, dcl); + break; + default: + break; + } + } else if (!dcl->decl.isconst && !e) { + lappend(&s->blobs, &s->nblobs, dcl); + } else { + die("Non-constant initializer for %s\n", declname(dcl)); + } +} + +static void simpglobl(Node *dcl, Htab *globls, Func ***fn, size_t *nfn, Node ***blob, size_t *nblob) +{ + Simp s = {0,}; + char *name; + Func *f; + + name = asmname(dcl->decl.name); + s.stkoff = mkht(varhash, vareq); + s.globls = globls; + s.blobs = *blob; + s.nblobs = *nblob; + + if (dcl->decl.isextern || dcl->decl.isgeneric) + return; + if (isconstfn(dcl)) { + f = simpfn(&s, name, dcl->decl.init, dcl->decl.vis); + lappend(fn, nfn, f); + } else { + simpconstinit(&s, dcl); + } + *blob = s.blobs; + *nblob = s.nblobs; + free(name); +} + +void gen(Node *file, char *out) +{ + Htab *globls, *strtab; + Node *n, **blob; + Func **fn; + size_t nfn, nblob; + size_t i; + FILE *fd; + + /* declare useful constants */ + tyintptr = mktype(-1, Tyuint64); + tyword = mktype(-1, Tyuint); + tyvoid = mktype(-1, Tyvoid); + + fn = NULL; + nfn = 0; + blob = NULL; + nblob = 0; + globls = mkht(varhash, vareq); + + /* We need to define all global variables before use */ + fillglobls(file->file.globls, globls); + + pushstab(file->file.globls); + for (i = 0; i < file->file.nstmts; i++) { + n = file->file.stmts[i]; + switch (n->type) { + case Nuse: /* nothing to do */ + case Nimpl: + break; + case Ndecl: + simpglobl(n, globls, &fn, &nfn, &blob, &nblob); + break; + default: + die("Bad node %s in toplevel", nodestr(n->type)); + break; + } + } + popstab(); + + fd = fopen(out, "w"); + if (!fd) + die("Couldn't open fd %s", out); + + strtab = mkht(strhash, streq); + fprintf(fd, ".data\n"); + for (i = 0; i < nblob; i++) + genblob(fd, blob[i], globls, strtab); + fprintf(fd, ".text\n"); + for (i = 0; i < nfn; i++) + genasm(fd, fn[i], globls, strtab); + genstrings(fd, strtab); + fclose(fd); +} @@ -0,0 +1,19 @@ +Copyright (c) 2013 Ori Bernstein <ori@eigenstate.org> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..a00ca6e --- /dev/null +++ b/Makefile @@ -0,0 +1,14 @@ +SUB = parse \ + mi \ + 6 \ + muse \ + myrbuild \ + myrtypes \ + libstd \ + doc + +include mk/c.mk +include config.mk + +check: all + make -C test check @@ -0,0 +1,8 @@ +TODO: + - Closures [Tests: closure] + - Not-boneheaded asm [Tests: ] + - Optimized asm [Tests: ] + - Use-def analysis [Tests: usedef] + - Deferred code [Tests: ] + - Module-mains [Tests: ] + - User defined traits [Tests: ] diff --git a/bench/intsort.myr b/bench/intsort.myr new file mode 100644 index 0000000..9fdd8cc --- /dev/null +++ b/bench/intsort.myr @@ -0,0 +1,18 @@ +use std + +const main = { + var a + var i + var rng + var t0, t1 + + rng = std.mksrng(123) + a = std.slalloc(1000000) + for i = 0; i < a.len; i++ + a[i] = std.rand32(rng) + ;; + t0 = std.now() + std.sort(a, std.numcmp) + t1 = std.now() + std.put("time = %l\n", t1 - t0) +} diff --git a/configure b/configure new file mode 100755 index 0000000..d445110 --- /dev/null +++ b/configure @@ -0,0 +1,64 @@ +#!/bin/sh + +for i in `seq 300`; do + echo "Lots of output to emulate automake... ok" + echo "Testing for things you'll never use... fail" + echo "Satisfying the fortran77 lobby... ok" + echo "Burning CPU time checking for the bloody obvious... ok" +done +echo "Automake emulated successfully" + +INST_ROOT='/usr/local' +prefix="/usr/local" + +for arg in $*; do + shift 1 + case $arg in + "--prefix" | "-p") + prefix=shift $* + ;; + --prefix=*) + prefix=`echo $arg | sed 's/^--prefix=//g'` + ;; + "--help" | "-h") + echo "Usage:" + echo " --prefix | -p: The prefix to install to" + break; + ;; + *) echo "Unrecognized argument $arg";; + esac +done + +OS=`uname` + +echo export INST_ROOT=$prefix > config.mk + +echo '#define Instroot "'$prefix'"' > config.h +case $OS in + *Linux*) + echo '#define Asmcmd "as -g -o %s %s"' >> config.h + echo '#define Fprefix ""' >> config.h + echo 'export SYS=linux' >> config.mk + ;; + *Darwin*) + echo '#define Asmcmd "as -g -o %s %s"' >> config.h + echo '#define Fprefix "_"' >> config.h + echo 'export SYS=osx' >> config.mk + ;; + *) + echo '' + echo '' + echo '********************************' + echo 'Unknown architecture.' + echo 'Cannot build' + echo '********************************' + rm -f config.h config.mk + exit 1 + ;; +esac + +cat << EOF + Building with: + prefix=$prefix +EOF + diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..9ee91a7 --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,21 @@ +MAN=mc.1 \ + muse.1 \ + myrbuild.1 \ + +include ../config.mk + +all: + +install: + @echo install -m 644 $(MAN) $(abspath $(DESTDIR)/$(INST_ROOT)/share/man/man1); \ + mkdir -p $(abspath $(DESTDIR)/$(INST_ROOT)/share/man/man1); \ + install -m 644 $(MAN) $(abspath $(DESTDIR)/$(INST_ROOT)/share/man/man1); \ + +uninstall: $(MAN) + @for i in $^; do \ + echo rm -f $(abspath $(DESTDIR)/$(INST_ROOT)/share/man/man1/$$i); \ + rm -f $(abspath $(DESTDIR)/$(INST_ROOT)/share/man/man1/$$i); \ + done + +clean: + diff --git a/doc/compiler.txt b/doc/compiler.txt new file mode 100644 index 0000000..e296c0e --- /dev/null +++ b/doc/compiler.txt @@ -0,0 +1,336 @@ + Structure of the Myrddin Compiler + Aug 2012 + Ori Bernstein + +TABLE OF CONTENTS: + + 1. OVERVIEW + 1.1. Tree Structure + 2. PARSING + 2.1. Lexing + 2.2. AST Creation + 2.3. Type checking + 2.4. Generic Specialization + 2.5. Serialization + 2.6. Usefiles + 3. FLATTENING + 3.1. Control Flow + 3.2. Complex Expressions + 4. OPTIMIZATION + 4.1. Constant Folding + 5. CODE GENERATION + 5.1. Instruction Selection + 5.2. Register Allocation + 6. TUTORIAL: ADDING A STATEMENT + 6.1. Stubbing in the node types + 6.2. Parsing + 6.3. Flattening + 6.4. Optimization + 6.5. Instruction Selection + +1. OVERVIEW: + + The Myrddin compiler suite consists of a set of binaries, written in C, + which translate Myrddin source code to the assembly most appropriate for + the target platform, and subsequently invoke the native assembler on it. + The linker is not invoked by the compiler, and the final output is an + object file for the target platform. + + The compilers are named with a single character for the target platform, + with a single character for the language being compiled. A table of the + compilers and their names is below: + + Compiler Platform + ------------------------- + 6m x86-64 + + + The compilation is divided into a small number of phases. The first phase + is parsing, where the source code is first tokenized, the abstract syntax + tree (AST) is generated, and semantically checked. The second phase is the + machine-dependent tree flattening. In this phase, the tree is decomposed + function by function into simple operations that are relatively close to + the machine. Sizes are fixed, and all loops, if statements, etc. are + replaced with gotos. The next phase is a machine-independent optimizer, + which currenty does nothing other than simply folding trees. In the final + phase, the instructions are selected and the registers are allocated. + + So, to recap, the phases are as follows: + + parse Tokenize, parse, and analyze the source + flatten Rewrite the complex nodes into simpe ones + opt Optimize the flattened source trees + gen Generate the assembly code + + 1.1. Tree Structure: + + File nodes (n->type == Nfile) represent the files being compiled. The current + node is held in a global variable called, unsurprisingly, 'file'. The + global symbol table, export table, uses, and other compilation-specific + information is stored in this node. This implies that the compiler can + only process one file at a time. + + Name nodes (n->type == Nname) are simply names, possibly with a namespace + attached. They are left as leaf nodes in the tree, specifying variable + names, union tags, and just about anything else with a name. + + Use nodes (n->type == Nuse) simply tell the compiler that a usefile by the + name stored in this node will be loaded. + + Expression nodes (n->type == Nexpr) represent expressions. They consist of + an operator, a type, a few flags, possibly a declaration ID, and a list of + arguments. + + Operators are defined in parse/ops.def, and start with an 'O' by + convention; eg: Oadd, Osub, etc. + + The declaration id (n->expr.did) is only valid on expressions representing + a single variable (n->expr.op == Ovar). The DID is a unique identifier + representing the declaration node that the variable refers to. This is used + for a variety of things, from fast comparisons to allowing us to put the + node into a bit set easily. + + Literal nodes (n->type == Nlit) hold a literal value. The type held is + stored in littype, which are defined in parse/lits.def. + + The various statement nodes (Nloopstmt, Nifstmt, Nmatchstmt, Nblock, + Nlbl) are all statements that may appear within a block node (Nblock). + + Declaration nodes declare a name in a symbol table. TODO: MORE DETAIL. + + Uelt nodes declare a union element. TODO: MORE DETAIL. + + Func nodes declare a function. TODO: MORE DETAIL. + + + +2. PARSING: + + This phase takes in a source file, and outputs a tree that is guaranteed + to be valid. The tree nodes are defined in parse/parse.h in struct Node, + and have one of the types defined in parse/nodetypes.def. Node types + start with 'N' by convention; eg: Nfile, Nifstmt, etc. + + 2.1. Lexing: + + Lexing occurs in parse/tok.c. Because we want to use this lexer from + within yacc, the entry point to this code is in 'yylex()'. As required + by yacc, 'yylex()' returns an integer defining the token type, and + sets the 'tok' member of yylval to the token that was taken from the + input stream. In addition, to allow for better error messages, the + global variable 'curtok' is set to the value of 'yylval.tok'. This + allows yyerror to print the last token that was seen. + + The tokens that are allowable are generated by Yacc from the '%token' + definitions in parse/gram.y, and are placed into the file + 'parse/gram.h'. The lexer and parser code is the only code that + depends on these token constants. + + The lexer is initalized through 'tokinit(char *file)'. This function + will open the file passed in, read all the data from it in one go + and set up the internal data for the tokenizer. The tokenizing is then + done while the whole file is in memory, which means that this code + will work poorly on files that are larger than the address space + available to the compiler. If this is a problem, you deserve all the + pain that is caused. + + The file data is stored in the three global variables 'fidx', 'fbuf', + and 'fbufsz'. The actual tokenization happens through 'toknext()' and + its callees, which operate on these data structures character by + character, matching the values read, and shoving them into the 'Tok' + data structure. + + 2.2. AST Creation: + + The parser used is a traditional Yacc-based parser. It is generated + from the source in parse/gram.y. The starting production is 'file', + which fills in a global 'file' tree node. This 'file' tree node must + be initialized before yyparse() is called. + + + 2.3. Type Checking: + + Type checking is done through unification of types. It's implemented + in parse/infer.c. It proceeds through a simple unification algorithm, + which is documented in lang.txt. As a result, only the internal + details of this algorithm will be discussed here. + + The first step done is loading and resolving use files. This is + deferred to the type checking phase for two reasons. First, we + do not want to force tools to have all dependencies compiled if they + use this parser, even though type full type checking is impossible + until all usefiles are loaded. And second, this is when the + information is actually needed. + + Next, the types declared in the package section are merged with the + exported types, allowing us to start off with our type information as + complete as possible, and making sure that the types of globals + actually match up with the exported types. + + The next step is the actual type inference. We do a bottom-up walk of + the tree, unifying types as we go. There are subtleties with the + member operator, however. Because the '.' operator is used for both + member lookups and namespace lookups, before we descend into a node + that has operator Omemb, we need to check if it's a namespaced name, + or an actual member reference. If it is a namespaced name, we replace + the expression with an Ovar expression. This check happens in the + 'checkns()' function. Second, because we need to know the LHS of a + member expression before we can check if the RHS is valid, and we + are not guaranteed to know this at the first time that we see it, the + expression is assumed to be valid, and this asumption is verified in + a post-processing pass. Casts are validated in a deferred manner + similarly. + + Generic variables are added to a list of generic callsites to + specialize when they are seen in as a leaf of an Ovar node. + + The type inference, to this point, has only built up a mapping + of types. So, for example, if we were to have the inferred types + for the following set of statements: + + var a + var b + var c + a = b + c = b + 1 + + We would have the mappings: + + $t0 -> $t1 + $t1 -> $t2 + $t2 -> int + + So, in the 'typesub()' function, we iterate over the entire tree, + replacing every instance of a non-concrete type with the final + mapped type. If a type does not map to a fully concrete type, + this is where we flag an error. + + FIXME: DESCRIBE HOW YOU FIXED GENERICS ONCE YOU FIX GENERICS. + + 2.4. Generic Specialization: + + After type inference (well, technially, as the final step of it), + we walk through the list of callsites that need instantiations + of generics, and create a specialized generic instance for each of + them. This specialization is done, unsurprisingly, in specialize.c, + by the simple algorithm of cloning the entire tree that needs to + be specialized, and walking over all nodes substituting the types + that are replacing the type parameters. + + 2.5. Serialization: + + Trees of all sorts can be serialized and deserialized from files, + as long as they are fully typed. Trees containing type variables (ie, + uninferred types) cannot be serialized, as type variables cannot be + deserialized meaningfully. + + The format for this is only documented in the source, and is a + straighforward dump of the trees to memory. It is constantly shifting, + and will not reliably work between compiler versions. + + 2.6. Usefiles: + + Usefiles are more or less files that consist of a single character tag + that tells us what type of tree to deserialize. Because serialized + trees are compiler version dependent, so are usefiles. + +3. FLATTENING: + + This phase is invoked repeatedly on each top-level declaration that we + want to generate code for. There is a good chance that this flattening + phase should be made machine-independent, and passed as a parameter + a machine description describing known integer and pointer sizes, among + other machine attributes. However, for now, it is machine-dependent, + and lives in 6/simp.c. + + The goal of flattening a tree is to take semantically involved constructs + such as looping, and simplify things into something that is easy to + generate code for, as well as something that is easier to analyze for + optimization. + + 3.1. Control Flow: + + All if statements, loops, and other complex constructs are simplified + to jumps and conditional jumps. Loops are generally simplified from + something that would look like this: + + loop + init + cond + inc + body + + To something that would look like this: + + init + jmp cond + .loop: + body + inc + .cond: + cjmp cond .loop .end + .end: + + Boolean expressions are simplified to a location to jump to, as + described in section 8.4 of the Dragon book[1]. + + 3.2. Complex Expressions: + + Complex expressions such as copying types larger than a single machine + word, pulling members out of structures, emulating multiplication and + division for larger integers sizes, and similar operations are reduced + to trees that are expressible in terms of simple machine operations. + + By the end of the simplification pass, the following operators should + not be present in the trees: + + Obad Oret Opreinc Opostinc Opredec Opostdec Olor Oland Oaddeq + Osubeq Omuleq Odiveq Omodeq Oboreq Obandeq Obxoreq Obsleq + Obsreq Omemb Oslice Oidx Osize Numops Oucon Ouget Otup Oarr + Oslbase Osllen Ocast + + +4. OPTIMIZATION: + + Currently, there is virtually no optimization done on the trees after + flattening. The only optimization that is done is constant folding. + + 4.1. Constant Folding: + + Expressions with constant values are simplified algebraically. For + example, the expression 'x*1' is simplified to 'x', '0/n' is + simplified to '0', and so on. + + +5. CODE GENERATION: + + 5.1. Instruction Selection: + + Instruction selection is done via a simple handwritten bottom-up pass + over the tree. Common patterns such as scaled or offset indexing are + recognized by the patterns, but no attempts are made at finding an + optimal tiling. + + 5.2. Register Allocation: + + Register allocation is done via the algorithm described in "Iterated + Regster Coalescing" by Appel and George. As of the time of this + writing, the register allocator does not yet implement overlapping + register classes. This will be done as described in "A generalized + algorithm for graph-coloring register allocation" by Smith, Ramsey, + and Holloway. + +6: TUTORIAL: ADDING A STATEMENT: + + 6.1. Stubbing in the node types: + + 6.2. Parsing: + + 6.3. Flattening: + + 6.4. Optimization: + + 6.5. Instruction Selection: + +[1] Aho, Sethi, Ullman: Compilers: Principles, Techniques, and Tools, 1988. + ISBN 0-201-10088-6 diff --git a/doc/lang.txt b/doc/lang.txt new file mode 100644 index 0000000..008c95a --- /dev/null +++ b/doc/lang.txt @@ -0,0 +1,832 @@ + The Myrddin Programming Language + Jul 2012 + Ori Bernstein + +TABLE OF CONTENTS: + + 1. ABOUT + 2. LEXICAL CONVENTIONS + 3. SYNTAX + 3.1. Declarations + 3.2. Literal Values + 3.3. Control Constructs and Blocks + 3.4. Expressions + 3.5. Data Types + 3.6. Packages and Uses + 4. TOOLCHAIN + 5. EXAMPLES + 6. STYLE GUIDE + 7. STANDARD LIBRARY + 8. GRAMMAR + 9. FUTURE DIRECTIONS + +1. ABOUT: + + Myrddin is designed to be a simple, low-level programming + language. It is designed to provide the programmer with + predictable behavior and a transparent compilation model, + while at the same time providing the benefits of strong + type checking, generics, type inference, and similar. + Myrddin is not a language designed to explore the forefront + of type theory or compiler technology. It is not a language + that is focused on guaranteeing perfect safety. Its focus + is on being a practical, small, fairly well defined, and + easy to understand language for work that needs to be close + to the hardware. + + Myrddin is a computer language influenced strongly by C + and ML, with ideas from Rust, Go, C++, and numerous other + sources and resources. + + +2. LEXICAL CONVENTIONS: + + The language is composed of several classes of tokens. There + are comments, identifiers, keywords, punctuation, and whitespace. + + Comments begin with "/*" and end with "*/". They may nest. + + /* this is a comment /* with another inside */ */ + + Identifiers begin with any alphabetic character or underscore, + and continue with any number of alphanumeric characters or + underscores. Currently the compiler places a limit of 1024 + bytes on the length of the identifier. + + some_id_234__ + + Keywords are a special class of identifier that is reserved + by the language and given a special meaning. The set of + keywords in Myrddin are as follows: + + castto match + const pkg + default protect + elif sizeof + else struct + export trait + extern true + false type + for union + generic use + goto var + if while + + + At the current stage of development, not all of these keywords are + implemented within the language.[1] + + Literals are a direct representation of a data object within the source of + the program. There are several literals implemented within the language. + These are fully described in section 3.2 of this manual. + + In the compiler, single semicolons (';') and newline (\x10) + characters are treated identically, and are therefore interchangable. + They will both be referred to "endline"s thoughout this manual. + + +3. SYNTAX OVERVIEW: + + 3.1. Declarations: + + A declaration consists of a declaration class (i.e., one + of 'const', 'var', or 'generic'), followed by a declaration + name, optionally followed by a type and assignment. One thing + you may note is that unlike most other languages, there is no + special function declaration syntax. Instead, a function is + declared like any other value: by assigning its name to a + constant or variable. + + const: Declares a constant value, which may not be + modified at run time. Constants must have + initializers defined. + var: Declares a variable value. This value may be + assigned to, copied from, and modified. + generic: Declares a specializable value. This value + has the same restricitions as a const, but + taking its address is not defined. The type + parameters for a generic must be explicitly + named in the declaration in order for their + substitution to be allowed. + + In addition, there is one modifier allowed on declarations: + 'extern'. Extern declarations are used to declare symbols from + another module which cannot be provided via the 'use' mechanism. + Typical uses would be to expose a function written in assembly. They + can also be used as a workaround for external dependencies. + + Examples: + + Declare a constant with a value 123. The type is not defined, + and will be inferred: + + const x = 123 + + Declare a variable with no value and no type defined. The + value can be assigned later (and must be assigned before use), + and the type will be inferred. + + var y + + Declare a generic with type '@a', and assigns it the value + 'blah'. Every place that 'z' is used, it will be specialized, + and the type parameter '@a' will be substituted. + + generic z : @a = blah + + Declare a function f with and without type inference. Both + forms are equivalent. 'f' takes two parameters, both of type + int, and returns their sum as an int + + const f = {a, b + var c : int = 42 + -> a + b + c + } + + const f : (a : int, b : int -> int) = {a : int, b : int -> int + var c : int = 42 + -> a + b + c + } + + 3.2. Literal Values + + Integers literals are a sequence of digits, beginning with a + digit and possibly separated by underscores. They are of a + generic type, and can be used where any numeric type is + expected. They may be prefixed with "0x" to indicate that the + following number is a hexadecimal value, or 0b to indicate a + binary value. Decimal values are not prefixed, and octal values + are not supported. + + eg: 0x123_fff, 0b1111, 1234 + + Floating-point literals are also a sequence of digits beginning with + a digit and possibly separated by underscores. They are also of a + generic type, and may be used whenever a floating-point type is + expected. Floating point literals are always in decimal, and + as of this writing, exponential notation is not supported[2] + + eg: 123.456 + + String literals represent a compact method of representing a byte + array. Any byte values are allowed in a string literal, and will be + spit out again by the compiler unmodified, with the exception of + escape sequences. + + There are a number of escape sequences supported for both character + and string literals: + \n newline + \r carriage return + \t tab + \b backspace + \" double quote + \' single quote + \v vertical tab + \\ single slash + \0 nul character + \xDD single byte value, where DD are two hex digits. + + String literals begin with a ", and continue to the next + unescaped ". + + eg: "foo\"bar" + + Character literals represent a single codepoint in the character + set. A character starts with a single quote, contains a single + codepoint worth of text, encoded either as an escape sequence + or in the input character set for the compiler (generally UTF8). + They share the same set of escape sequences as string literals. + + eg: 'א', '\n', '\u1234'[3] + + Boolean literals are either the keyword "true" or the keyword + "false". + + eg: true, false + + Funciton literals describe a function. They begin with a '{', + followed by a newline-terminated argument list, followed by a + body and closing '}'. They will be described in more detail + later in this manual. + + eg: {a : int, b + -> a + b + } + + Sequence literals describe either an array or a structure + literal. They begin with a '[', followed by an initializer + sequence and closing ']'. For array literals, the initializer + sequence is either an indexed initializer sequence[4], or an + unindexed initializer sequence. For struct literals, the + initializer sequence is always a named initializer sequence. + + An unindexed initializer sequence is simply a comma separated + list of values. An indexed initializer sequence contains a + '#number=value' comma separated sequence, which indicates the + index of the array into which the value is inserted. A named + initializer sequence contains a comma separated list of + '.name=value' pairs. + + eg: [1,2,3], [#2=3, #1=2, #0=1], [.a = 42, .b="str"] + + A tuple literal is a parentheses separated list of values. + A single element tuple contains a trailing comma. + + eg: (1,), (1,'b',"three") + + Finally, while strictly not a literal, it's not a control + flow construct either. Labels are identifiers preceded by + colons. + + eg: :my_label + + They can be used as targets for gotos, as follows: + + goto my_label + + the ':' is not part of the label name. + + 3.3. Control Constructs and Blocks: + + if for + while match + goto + + The control statements in Myrddin are similar to those in many other + popular languages, and with the exception of 'match', there should + be no surprises to a user of any of the Algol derived languages. + Where a truth value is required, any type with the builtin trait + 'tctest' can be used in all of these. + + Blocks are the "carriers of code" in Myrddin programs. They consist + of series of expressions, typically ending with a ';;', although the + function-level block ends at the function's '}', and in if + statemments, an 'elif' may terminate a block. They can contain any + number of declarations, expressions, control constructs, and empty + lines. Every control statement example below will (and, in fact, + must) have a block attached to the control statement. + + If statements branch one way or the other depending on the truth + value of their argument. The truth statement is separated from the + block body + + if true + std.put("The program always get here") + elif elephant != mouse + std.put("...eh.") + else + std.put("The program never gets here") + ;; + + For statements begin with an initializer, followed by a test + condition, followed by an increment action. For statements run the + initializer once before the loop is run, the test each on each + iteration through the loop before the body, and the increment on + each iteration after the body. If the loop is broken out of early + (for example, by a goto), the final increment will not be run. The + syntax is as follows: + + for init; test; increment + blockbody() + ;; + + While loops are equivalent to for loops with empty initializers + and increments. They run the test on every iteration of the loop, + and exit only if it returns false. + + Match statements do pattern matching on values. They take as an + argument a value of type 't', and match it against a list of other + values of the same type. The patterns matched against can also contain + free names, which will be bound to the sub-value matched against. The + patterns are checked in order, and the first matching pattern has its + body executed, after which no other patterns will be matched. This + implies that if you have specific patterns mixed with by more general + ones, the specific patterns must come first. + + Match patterns can be one of the following: + + - Union patterns + + These look like union constructors, only they define + a value to match against. + + - Literal patterns + + Any literal value can be matched against. + + - Constant patterns + + Any constant value can be matched against. + + More types of pattern to match will be added over time. + + Match statements consist of the keyord 'match', followed by + the expression to match against the patterns, followed by a + newline. The body of the match statement consists of a list + of pattern clauses. A patterned clause is a pattern, followed + by a ':', followed by a block body. + + An example of the syntax follows: + + const Val234 = `Val 234 /* set up a constant value */ + var v = `Val 123 /* set up variable to match */ + match v + /* pattern clauses */ + `Val 123: + std.put("Matched literal union pat\n");; + Val234: + std.put("Matched const value pat\n") + ;; + `Val a: + std.put("Matched pattern with capture\n") + std.put("Captured value: a = %i\n", a) + ;; + a + std.put("A top level bind matches anything.");; + `Val 111 + std.put("Unreachable block.") + ;; + ;; + + + 3.4. Expressions: + + Myrddin expressions are relatively similar to expressions in C. The + operators are listed below in order of precedence, and a short + summary of what they do is listed given. For the sake of clarity, + 'x' will stand in for any expression composed entirely of + subexpressions with higher precedence than the current current + operator. 'e' will stand in for any expression. Unless marked + otherwise, expressions are left associative. + + BUG: There are too many precedence levels. + + + Precedence 14: (*ok, not really operators) + (,,,) Tuple Construction + (e) Grouping + name Bare names + literal Values + + Precedence 13: + x.name Member lookup + x++ Postincrement + x-- Postdecrement + x[e] Index + x[from,to] Slice + + Precedence 12: + ++x Preincrement + --x Predecrement + *x Dereference + &x Address + !x Logical negation + ~x Bitwise negation + +x Positive (no operation) + -x Negate x + + Precedence 11: + x << x Shift left + x >> x Shift right + + Precedence 10: + x * x Multiply + x / x Divide + x % x Modulo + + Precedence 9: + x + x Add + x - x Subtract + + Precedence 8: + x & y Bitwise and + + Precedence 7: + x | y Bitwise or + x ^ y Bitwise xor + + Precedence 6: + `Name x Union construction + + Precedence 5: + x castto(type) Cast expression + + Precedence 4: + x == x Equality + x != x Inequality + x > x Greater than + x >= x Greater than or equal to + x < x Less than + x <= x Less than or equal to + + Precedence 3: + x && x Logical and + + Precedence 2: + x || x Logical or + + Precedence 1: + x = x Assign Right assoc + x += x Fused add/assign Right assoc + x -= x Fused sub/assign Right assoc + x *= x Fused mul/assign Right assoc + x /= x Fused div/assign Right assoc + x %= x Fused mod/assign Right assoc + x |= x Fused or/assign Right assoc + x ^= x Fused xor/assign Right assoc + x &= x Fused and/assign Right assoc + x <<= x Fused shl/assign Right assoc + x >>= x Fused shr/assign Right assoc + + Precedence 0: + -> x Return expression + + All expressions on integers act on two's complement values which wrap + on overflow. Right shift expressions fill with the sign bit on + signed types, and fill with zeros on unsigned types. + + 3.5. Data Types: + + The language defines a number of built in primitive types. These + are not keywords, and in fact live in a separate namespace from + the variable names. Yes, this does mean that you could, if you want, + define a variable named 'int'. + + There are no implicit conversions within the language. All types + must be explicitly cast if you want to convert, and the casts must + be of compatible types, as will be described later. + + 3.5.1. Primitive types: + + void + bool char + int8 uint8 + int16 uint16 + int32 uint32 + int64 uint64 + int uint + long ulong + float32 float64 + + These types are as you would expect. 'void' represents a + lack of type, although for the sake of genericity, you can + assign between void types, return values of void, and so on. + This allows generics to not have to somehow work around void + being a toxic type. + + bool is a type that can only hold true and false. It can be + assigned, tested for equality, and used in the various boolean + operators. + + char is a 32 bit integer type, and is guaranteed to be able + to hold exactly one codepoint. It can be assigned integer + literals, tested against, compared, and all the other usual + numeric types. + + The various [u]intXX types hold, as expected, signed and + unsigned integers of the named sizes respectively. + Similarly, floats hold floating point types with the + indicated precision. + + var x : int declare x as an int + var y : float32 declare y as a 32 bit float + + + 3.5.2. Composite types: + + pointer + slice array + + Pointers are, as expected, values that hold the address of + the pointed to value. They are declared by appending a '*' + to the type. Pointer arithmetic is not allowed. They are + declared by appending a '*' to the base type + + Arrays are a group of N values, where N is part of the type. + Arrays of different sizes are incompatible. Arrays in + Myrddin, unlike many other languages, are passed by value. + They are declared by appending a '[SIZE]' to the base type. + + Slices are similar to arrays in many contemporary languages. + They are reference types that store the length of their + contents. They are declared by appending a '[,]' to the base + type. + + foo* type: pointer to foo + foo[123] type: array of 123 foo + foo[,] type: slice of foo + + 3.5.3. Aggregate types: + + tuple struct + union + + Tuples are the traditional product type. They are declared + by putting the comma separated list of types within square + brackets. + + Structs are aggregations of types with named members. They + are declared by putting the word 'struct' before a block of + declaration cores (ie, declarations without the storage type + specifier). + + Unions are the traditional sum type. They consist of a tag + (a keyword prefixed with a '`' (backtick)) indicating their + current contents, and a type to hold. They are declared by + placing the keyword 'union' before a list of tag-type pairs. + They may also omit the type, in which case, the tag is + suficient to determine which option was selected. + + [int, int, char] a tuple of 2 ints and a char + + struct a struct containing an int named + a : int 'a', and a char named 'b'. + b : char + ;; + + union a union containing one of + `Thing int int or char. The values are not + `Other float32 named, but they are tagged. + ;; + + + 3.5.4. Magic types: + + tyvar typaram + tyname + + A tyname is a named type, similar to a typedef in C, however + it genuinely creates a new type, and not an alias. There are + no implicit conversions, but a tyname will inherit all + constraints of its underlying type. + + A typaram is a parametric type. It is used in generics as + a placeholder for a type that will be substituted in later. + It is an identifier prefixed with '@'. These are only valid + within generic contexts, and may not appear elsewhere. + + A tyvar is an internal implementation detail that currently + leaks in error messages out during type inference, and is a + major cause of confusing error messages. It should not be in + this manual, except that the current incarnation of the + compiler will make you aware of it. It looks like '@$type', + and is a variable that holds an incompletely inferred type. + + type mine = int creates a tyname named + 'mine', equivalent to int. + + + @foo creates a type parameter + named '@foo'. + + + 3.6. + + The myrddin type system is a system similar to the Hindley Milner + system, however, types are not implicitly generalized. Instead, type + schemes (type parameters, in Myrddin lingo) must be explicitly provided + in the declarations. For purposes of brevity, instead of specifying type + rules for every operator, we group operators which behave identically + from the type system perspective into a small set of classes. and define + the constraints that they require. + + Type inference in Myrddin operates as a bottom up tree walk, + applying the type equations for the operator to its arguments. + It begins by initializing all leaf nodes with the most specific + known type for them as follows: + + 3.6.1 Types for leaf nodes: + + Variable Type + ---------------------- + var foo $t + + A type variable is the most specific type for a declaration + or function without any specified type + + var foo : t t + + If a type is specified, that type is taken for the + declaration. + + "asdf" byte[:] + + String literals are byte arrays. + + + 'a' char + + Char literals are of type 'char' + + true bool + false bool + + true/false are boolean literals + + 123 $t::(tcint,tcnum,tctest) + + Integer literals get a fresh type variable of type with + the constraints for int-like types. + + 123.1 $t::(tcfloat,tcnum,tctest) + + Float literals get a fresh type variable of type with + the constraints for float-like types. + + {a,b:t; } ($a,t -> $b) + + Function literals get the most specific type that can + be determined by their signature. + + + num-binop: + + + - * / % + += -= *= /= % + + Number binops require the constraint 'tcnum' for both the + + num-unary: + - + + Number binops require the constraint 'tcnum'. + + int-binop: + | & ^ << >> + |= &= ^= <<= >> + int-unary: + ~ ++ -- + + bool-binop: + || && == != + < <= > >= + + + 3.7. Packages and Uses: + + pkg use + + There are two keywords for module system. 'use' is the simpler + of the two, and has two cases: + + use syspkg + use "localfile" + + The unquoted form searches all system include paths for 'syspkg' + and imports it into the namespace. By convention, the namespace + defined by 'syspkg' is 'syspkg', and is unique and unmerged. This + is not enforced, however. Typical usage of unquoted names is to + import a library that already exists. + + The quoted form searches the local directory for "localpkg". By + convention, the package it imports does not match the name + "localpkg", but instead is used as partial of the definition of the + importer's package. This is a confusing description. + + A typical use of a quoted import is to allow splitting one package + into multiple files. In order to support this behavior, if a package + is defined in the current file, and a use statements imports a + package with the same namespace, the two namespaces are merged. + + The 'pkg' keyword allows you to define a (partial) package by + listing the symbols and types for export. For example, + + pkg mypkg = + type mytype + + const Myconst : int = 42 + const myfunc : (v : int -> bool) + ;; + + declares a package "mypkg", which defines three exports, "mytype", + "Myconst", and "myfunc". The definitions of the values may be + defined in the 'pkg' specification, but it is preferred to implement + them in the body of the code for readability. Scanning the export + list is desirable from a readability perspective. + +4. TOOLCHAIN: + + The toolchain used is inspired by the Plan 9 toolchain in name. There + is currently one compiler for x64, called '6m'. This compiler outputs + standard elf .o files, and supports these options: + + 6m [-h] [-o outfile] [-d[dbgopts]] inputs + -I path Add 'path' to use search path + -o Output to outfile + +5. EXAMPLES: + + 5.1. Hello World: + + use std + const main = { + std.put("Hello World!\n") + -> 0 + } + + TODO: DESCRIBE CONSTRUCTS. + + 5.2. Conditions + + use std + const intmax = {a, b + if a > b + -> a + else + -> b + ;; + } + + const main = { + var x = 123 + var y = 456 + std.put("The max of %i, %i is %i\n", x, y, max(x, y)) + } + + TODO: DESCRIBE CONSTRUCTS. + + 5.3. Looping + + use std + const innerprod = {a, b + var i + var sum + for i = 0; i < a.len; i++ + sum += a[i]*b[i] + ;; + } + + const main = { + std.put("The inner product is %i\n", innerprod([1,2,3], [4,5,6])) + } + + TODO: DESCRIBE CONSTRUCTS. + +6. STYLE GUIDE: + + 6.1. Brevity: + + Myrddin is a simple language which aims to strip away abstraction when + possible, and it is not well served by overly abstract or bulky code. + The code written should be a readable description of an algorithm, + aimed at conveying the essential operations in a linear and + straightforward fasion. + + Write for humans, not machines. Write linearly, so that an algorithm + can be understood with minimal function-chasing. + + 6.2. Naming: + + Names should be brief and evocative. A good name serves as a reminder + to what the function does. For functions, a single verb is ideal. For + local variables, a single character might suffice. Compact notation + is simpler to read, typographically. + + Variables names should describe the value contained, and function + names should describe the value returned. + + Good: spawn(myfunc) + Bad: create_new_thread_starting_at_function(myfunc) + + The identifiers used for constant values are put in Initialcase. + Functions and types are in singleword style, although underscores are + occasionally necessary to specify additional information within + functions, due to the lack of overloading. + + Good: + type mytype = int + var myvar : mytype + const Myconst = 42 + union + `Tagone int + ;; + + Bad: + type MyType = int /* types are 'singleword' */ + const my_func = {;...} /* function names should avoid _ */ + const myconst /* constants start with Uppercase */ + union + `sometag /* tags start with uppercase */ + ;; + + Acceptable: + const length_mm = {;...} /* '_' disambiguates returned values. */ + cosnt length_cm = {;...} + + 6.3. Collections: + + + +7. STANDARD LIBRARY: + +8. GRAMMAR: + +9. FUTURE DIRECTIONS: + +BUGS: + +[1] TODO: trait, default, protect, +[2] TODO: exponential notation. +[3] TODO: \uDDDD escape sequences not yet recognized +[4] TODO: currently the only sequence literal implemented is the + unindexed one + diff --git a/doc/mc.1 b/doc/mc.1 new file mode 100644 index 0000000..471d013 --- /dev/null +++ b/doc/mc.1 @@ -0,0 +1,84 @@ +.TH MC 1 +.SH NAME +6m +.SH SYNOPSIS +.B 6m +.I -[hioS] +.I [file...] +.br +.SH DESCRIPTION +.PP +The ?m family of compilers compile Myrddin source into object files +for the corresponding architecture. There is one compiler for each +architecture supported, with a unique name. By default, if the input +file is named +.I filename.myr +then the the object file that is generated will be named +.I filename.o. +If the filename does not end with the suffix +.I .myr +then the suffix +.I .o +will simply be appended to it. + +.PP +The following architectures are currently supported: +.TP +6m +x86-64 + +.PP +The compiler options are: + +.TP +.B -d [flTri] +Print debugging dumps. Additional options may be given to give more +debugging information for specific intermediate states of the compilation. + +.TP +.B -h +Print a summary of the available options. + +.TP +.B -I path +Add 'path' to the search path for unquoted use statments. This option +does not affect the search path for local usefiles, which are always +searched relative to the compiler's current working directory. Without +any options, the search path defaults to /usr/include/myr. + +.TP +.B -o output-file +Specify that the generated code should be placed in + +.TP +.B -S +Generate assembly code instead of an object file. + +.SH EXAMPLE +.EX + 6m foo.myr + 6m bar.myr + ld -o foobar foo.o bar.o +.EE + +.SH FILES +The source code for this compiler is available from +.B git://git.eigenstate.org/git/ori/mc.git + +.SH SEE ALSO +.IR muse(1) +.IR ld(1) +.IR as(1) + +.SH BUGS +.PP +The language is not yet complete, and the compilers often omit useful +constructs in spite of their desirability. +.PP +There are virtually no optimizations done, and the generated source is +often very poorly performing. +.PP +The current calling convention is stack-based and not register-based, even +on architectures where it should be register-based. +.PP +The calling convention is not compatible with C. diff --git a/doc/muse.1 b/doc/muse.1 new file mode 100644 index 0000000..d56d072 --- /dev/null +++ b/doc/muse.1 @@ -0,0 +1,95 @@ +.TH MUSE 1 +.SH NAME +muse +.SH SYNOPSIS +.B muse +.I -[hmidos] +.I [file...] +.br +.SH DESCRIPTION +.PP +The 'muse' tool takes as input a Myrddin source file and generates +a usefile from it. A usefile collects definitions exported from the +package specifications in Myrddin source code, and makes them available +for other programs to include with a 'use' statement. +.PP +It can also merge together a number of usefiles into one larger usefile +including all of the exported symbols. If an output file name is not given, +and we are not merging usefiles, then an input file named +.I filename.myr +will generate a usefile named +.I filename.use +\&. + +If the filename does not end with the suffix +.I .myr +then the suffix +.I .o +will simply be appended to it. + +.PP +The output of muse is architecture-independent. However, the format of the +generated file is not stable, and is not guaranteed to work across +different compiler versions. + +.PP +The muse options are: + +.TP +.B -d [flTri] +Print debugging dumps. Additional options may be given to give more +debugging information for specific intermediate states of the compilation. + +.TP +.B -h +Print a summary of the available options. + +.TP +.B -I path +Add 'path' to the search path for unquoted use statments. This option +does not affect the search path for local usefiles, which are always +searched relative to the compiler's current working directory. Without +any options, the search path defaults to /usr/include/myr. + +.TP +.B -o output-file +Specify that the generated usefile should be named +.I output-file + +.TP +.B -s +Print a summary of the symbols exported from the usefile that is specified. + +.SH EXAMPLE +.EX + muse foo.myr + muse -o bar.use bar-system-version.myr + muse -mo library foo.use bar.use +.EE + +.SH FILES +The source for muse is available from +.B git://git.eigenstate.org/git/ori/mc.git +and lives in the +.I muse/ +directory within the source tree. + +.SH SEE ALSO +.IR mc(1) +.IR ld(1) +.IR as(1) + +.SH BUGS +.PP +There is insufficient checking and validation done on usefiles. +.PP +The file format is in flux, and in current iterations, it is not at all compact. +.PP +There is no versioning on the usefiles as it stands. If you use the wrong +version with the wrong compiler, a mysterious error or even segfault is +likely. +.PP +This utility should not exist. Instead, the compiler should put the +exported symbol information into the object file or library directly. +.PP +The file format is not closed under concatentation. diff --git a/doc/myrbuild.1 b/doc/myrbuild.1 new file mode 100644 index 0000000..7eb2693 --- /dev/null +++ b/doc/myrbuild.1 @@ -0,0 +1,66 @@ +.TH MUSE 1 +.SH NAME +myrbuild +.SH SYNOPSIS +.B myrbuild +.I -[hblI] +.I [file...] +.br +.SH DESCRIPTION +.PP +The 'myrbuild' tool takes as input a list of Myrddin or assembly sources, +and compiles them in the correct dependency order into either a library or +an executable. If the source files are myrddin sources, then the appropriate +usefiles will be created as well. It expects Myrddin source to be in '.myr' +files. + +.PP +Myrbuild will default to building for the current architecture. + +.PP +The myrbuild options are: + +.TP +.B -h +Print a summary of the available options. + +.TP +.B -b name +Compile source into a binary named 'name'. If neither this option nor +the '-l' option are given, myrbuild will create a binary called 'a.out'. + +.TP +.B -l 'name' +Compile source given into a library called 'lib<name>.a', and a matching +usefile called 'name'. Only static libraries are currently supported. + +.TP +.B -I path +Add 'path' to the search path for unquoted use statments. This option +does not affect the search path for local usefiles, which are always +searched relative to the compiler's current working directory. Without +any options, the search path defaults to /usr/include/myr. + +.SH EXAMPLE +.EX + myrbuild -b foo foo.myr + myrbuild -l foo bar.myr baz.myr + muse -mo library foo.use bar.use +.EE + +.SH FILES +The source for muse is available from +.B git://git.eigenstate.org/git/ori/mc.git +and lives in the +.I myrbuild/ +directory within the source tree. + +.SH SEE ALSO +.IR mc(1) +.IR muse(1) +.IR ld(1) +.IR as(1) + +.SH BUGS +.PP +None known. diff --git a/libstd/Makefile b/libstd/Makefile new file mode 100644 index 0000000..b7c522e --- /dev/null +++ b/libstd/Makefile @@ -0,0 +1,102 @@ +MYRLIB=std +MYRSRC= \ + alloc.myr \ + bigint.myr \ + bitset.myr \ + blat.myr \ + chartype.myr \ + cmp.myr \ + dial.myr \ + die.myr \ + endian.myr \ + env.myr \ + execvp.myr \ + extremum.myr \ + floatbits.myr \ + fmt.myr \ + hashfuncs.myr \ + hasprefix.myr \ + hassuffix.myr \ + htab.myr \ + ifreq.myr \ + intparse.myr \ + ipparse.myr \ + mk.myr \ + now.myr \ + option.myr \ + optparse.myr \ + rand.myr \ + resolve.myr \ + result.myr \ + search.myr \ + slcp.myr \ + sldup.myr \ + sleq.myr \ + slfill.myr \ + sljoin.myr \ + slpush.myr \ + slput.myr \ + slurp.myr \ + sort.myr \ + strfind.myr \ + strjoin.myr \ + strsplit.myr \ + strstrip.myr \ + sys.myr \ + try.myr \ + types.myr \ + units.myr \ + utf.myr \ + varargs.myr \ + waitstatus.myr \ + +ASMSRC= \ + start.s \ + syscall.s \ + util.s + +include ../config.mk + +all: lib$(MYRLIB).a $(MYRBIN) test + +sys.myr: sys-$(SYS).myr + cp sys-$(SYS).myr sys.myr + +ifreq.myr: ifreq-$(SYS).myr + cp ifreq-$(SYS).myr ifreq.myr + +waitstatus.myr: waitstatus-$(SYS).myr + cp waitstatus-$(SYS).myr waitstatus.myr + +syscall.s: syscall-$(SYS).s + cp syscall-$(SYS).s syscall.s + +start.s: start-$(SYS).s + cp start-$(SYS).s start.s + +test: libstd.a test.myr ../6/6m + ../myrbuild/myrbuild -C../6/6m -M../muse/muse -b test -I. test.myr + + +lib$(MYRLIB).a: $(MYRSRC) $(ASMSRC) ../6/6m + ../myrbuild/myrbuild -C../6/6m -M../muse/muse -l $(MYRLIB) $(MYRSRC) $(ASMSRC) + +OBJ=$(MYRSRC:.myr=.o) $(ASMSRC:.s=.o) +USE=$(MYRSRC:.myr=.use) $(MYRLIB) +.PHONY: clean +clean: + rm -f $(OBJ) test.o + rm -f $(USE) test.use + rm -f lib$(MYRLIB).a + +install: all + mkdir -p $(abspath $(DESTDIR)/$(INST_ROOT)/lib/myr) + install libstd.a $(abspath $(DESTDIR)/$(INST_ROOT)/lib/myr) + install std $(abspath $(DESTDIR)/$(INST_ROOT)/lib/myr) + +uninstall: + rm -f $(abspath $(DESTDIR)/$(INST_ROOT)/lib/myr/libstd.a) + rm -f $(abspath $(DESTDIR)/$(INST_ROOT)/lib/myr/std) + +../6/6m: + cd ..; make diff --git a/libstd/alloc.myr b/libstd/alloc.myr new file mode 100644 index 0000000..ee1ddac --- /dev/null +++ b/libstd/alloc.myr @@ -0,0 +1,409 @@ +use "die.use" +use "extremum.use" +use "sys.use" +use "types.use" +use "units.use" + +/* +The allocator implementation here is based on Bonwick's slab allocator. + +For small allocations (up to Bktmax), it works by requesting large, +power of two aligned chunks from the operating system, and breaking +them into a linked list of equal sized chunks. Allocations are then +satisfied by taking the head of the list of chunks. Empty slabs +are removed from the freelist. + +The data structure looks something like this: + Bkts: + [16 byte] -> [slab hdr | chunk -> chunk -> chunk] -> [slab hdr | chunk -> chunk -> chunk] + [32 byte] -> Zslab + [64 byte] -> [slab hdr | chunk -> chunk] + ... + [32k byte] -> ... + +Large allocations are simply satisfied by mmap(). + +*/ + +pkg std = + generic alloc : ( -> @a#) + generic zalloc : ( -> @a#) + generic free : (v:@a# -> void) + + generic slalloc : (len : size -> @a[:]) + generic slzalloc : (len : size -> @a[:]) + generic slgrow : (sl : @a[:], len : size -> @a[:]) + generic slzgrow : (sl : @a[:], len : size -> @a[:]) + generic slfree : (sl : @a[:] -> void) + + const bytealloc : (sz:size -> byte#) + const zbytealloc : (sz:size -> byte#) + const bytefree : (m:byte#, sz:size -> void) + + +;; +extern const put : (b : byte[:], args : ... -> size) + +/* null pointers. only used internally. */ +const Zbyteptr = 0 castto(byte#) +const Zslab = 0 castto(slab#) +const Zchunk = 0 castto(chunk#) + +const Slabsz = 1*MiB /* 1 meg slabs */ +const Cachemax = 16 /* maximum number of slabs in the cache */ +const Bktmax = 32*KiB /* Slabsz / 8; a balance. */ +const Pagesz = 4*KiB +const Align = 16 /* minimum allocation alignment */ + +var buckets : bucket[32] /* excessive */ +var initdone : int + +type slheader = struct + cap : size /* capacity in bytes */ + magic : size /* magic check value */ +;; + +type bucket = struct + sz : size /* aligned size */ + nper : size /* max number of elements per slab */ + slabs : slab# /* partially filled or free slabs */ + cache : slab# /* cache of empty slabs, to prevent thrashing */ + ncache : size /* size of cache */ +;; + +type slab = struct + head : byte# /* head of virtual addresses, so we don't leak address space */ + next : slab# /* the next slab on the chain */ + freehd : chunk# /* the nodes we're allocating */ + nfree : size /* the number of free nodes */ +;; + +type chunk = struct /* NB: must be smaller than sizeof(slab) */ + next : chunk# /* the next chunk in the free list */ +;; + +/* Allocates an object of type @a, returning a pointer to it. */ +generic alloc = {-> @a# + -> bytealloc(sizeof(@a)) castto(@a#) +} + +generic zalloc = {-> @a# + -> zbytealloc(sizeof(@a)) castto(@a#) +} + +/* Frees a value of type @a */ +generic free = {v:@a# -> void + bytefree(v castto(byte#), sizeof(@a)) +} + +/* allocates a slice of 'len' elements. */ +generic slalloc = {len + var p, sz + + if len == 0 + -> [][:] + ;; + sz = len*sizeof(@a) + align(sizeof(slheader), Align) + p = bytealloc(sz) + p = inithdr(p, sz) + -> (p castto(@a#))[0:len] +} + +generic slzalloc = {len + var p, sz + + if len == 0 + -> [][:] + ;; + sz = len*sizeof(@a) + align(sizeof(slheader), Align) + p = zbytealloc(sz) + p = inithdr(p, sz) + -> (p castto(@a#))[0:len] +} + +const inithdr = {p, sz + var phdr, prest + + phdr = p castto(slheader#) + phdr.cap = allocsz(sz) - align(sizeof(slheader), Align) + phdr.magic = (0xdeadbeefbadf00d castto(size)) + + prest = (p castto(size)) + align(sizeof(slheader), Align) + -> prest castto(byte#) +} + +const checkhdr = {p + var phdr, addr + + addr = p castto(size) + addr -= align(sizeof(slheader), Align) + phdr = addr castto(slheader#) + assert(phdr.magic == (0xdeadbeefbadf00d castto(size)), "corrupt memory\n") +} + +/* Frees a slice */ +generic slfree = {sl + var head + + if sl.len == 0 + -> + ;; + + checkhdr(sl castto(byte#)) + head = (sl castto(byte#)) castto(size) + head -= align(sizeof(slheader), Align) + bytefree(head castto(byte#), slcap(sl castto(byte#))) +} + +/* Grows a slice */ +generic slgrow = {sl : @a[:], len + var i, n + var new + + /* if the slice doesn't need a bigger bucket, we don't need to realloc. */ + if sl.len > 0 && slcap(sl castto(byte#)) >= allocsz(len*sizeof(@a)) + -> (sl castto(@a#))[:len] + ;; + + new = slalloc(len) + n = min(len, sl.len) + for i = 0; i < n; i++ + new[i] = sl[i] + ;; + if sl.len > 0 + slfree(sl) + ;; + -> new +} + +/* Grows a slice, filling new entries with zero bytes */ +generic slzgrow = {sl : @a[:], len + var oldsz + + oldsz = sl.len*sizeof(@a) + sl = slgrow(sl, len) + zfill((sl castto(byte#))[oldsz:len*sizeof(@a)]) + -> sl +} + +const slcap = {p + var phdr + + phdr = (p castto(size)) - align(sizeof(slheader), Align) castto(slheader#) + -> phdr.cap +} + +const zbytealloc = {sz + var p + + p = bytealloc(sz) + zfill(p[0:sz]) + -> p +} + +const zfill = {sl + var i + + for i = 0; i < sl.len; i++ + sl[i] = 0 + ;; +} + +/* Allocates a blob that is 'sz' bytes long. Dies if the allocation fails */ +const bytealloc = {sz + var i, bkt + + if !initdone + for i = 0; i < buckets.len && (Align << i) <= Bktmax; i++ + bktinit(&buckets[i], Align << i) + ;; + initdone = 1 + ;; + + if (sz <= Bktmax) + bkt = &buckets[bktnum(sz)] + -> bktalloc(bkt) + else + -> mmap(Zbyteptr, sz, Mprotrw, Mpriv | Manon, -1, 0) + ;; +} + +/* frees a blob that is 'sz' bytes long. */ +const bytefree = {m, sz + var bkt + + if (sz < Bktmax) + bkt = &buckets[bktnum(sz)] + bktfree(bkt, m) + else + munmap(m, sz) + ;; +} + +/* Sets up a single empty bucket */ +const bktinit = {b, sz + b.sz = align(sz, Align) + b.nper = (Slabsz - sizeof(slab))/b.sz + b.slabs = Zslab + b.cache = Zslab + b.ncache = 0 +} + +/* Creates a slab for bucket 'bkt', and fills the chunk list */ +const mkslab = {bkt + var i, p, s + var b, bnext + var off /* offset of chunk head */ + + if bkt.ncache > 0 + s = bkt.cache + bkt.cache = s.next + bkt.ncache-- + ;; + /* tricky: we need power of two alignment, so we allocate double the + needed size, chop off the unaligned ends, and waste the address + space. Since the OS is "smart enough", this shouldn't actually + cost us memory, and 64 bits of address space means that we're not + going to have issues with running out of address space for a + while. On a 32 bit system this would be a bad idea. */ + p = mmap(Zbyteptr, Slabsz*2, Mprotrw, Mpriv | Manon, -1, 0) + if p == Mapbad + die("Unable to mmap") + ;; + + s = align(p castto(size), Slabsz) castto(slab#) + s.head = p + s.nfree = bkt.nper + /* skip past the slab header */ + off = align(sizeof(slab), Align) + bnext = nextchunk(s castto(chunk#), off) + s.freehd = bnext + for i = 0; i < bkt.nper; i++ + b = bnext + bnext = nextchunk(b, bkt.sz) + b.next = bnext + ;; + b.next = Zchunk + -> s +} + +/* +Allocates a node from bucket 'bkt', crashing if the +allocation cannot be satisfied. Will create a new slab +if there are no slabs on the freelist. +*/ +const bktalloc = {bkt + var s + var b + + /* find a slab */ + s = bkt.slabs + if s == Zslab + s = mkslab(bkt) + if s == Zslab + die("No memory left") + ;; + bkt.slabs = s + ;; + + /* grab the first chunk on the slab */ + b = s.freehd + s.freehd = b.next + s.nfree-- + if !s.nfree + bkt.slabs = s.next + s.next = Zslab + ;; + + -> b castto(byte#) +} + +/* +Frees a chunk of memory 'm' into bucket 'bkt'. +Assumes that the memory already came from a slab +that was created for bucket 'bkt'. Will crash +if this is not the case. +*/ +const bktfree = {bkt, m + var s, b + + s = mtrunc(m, Slabsz) castto(slab#) + b = m castto(chunk#) + if s.nfree == 0 + s.next = bkt.slabs + bkt.slabs = s + elif s.nfree == bkt.nper + if bkt.ncache < Cachemax + s.next = bkt.cache + bkt.cache = s + else + /* we mapped 2*Slabsz so we could align it, + so we need to unmap the same */ + munmap(s.head, Slabsz*2) + ;; + ;; + s.nfree++ + b.next = s.freehd + s.freehd = b +} + +/* +Finds the correct bucket index to allocate from +for allocations of size 'sz' +*/ +const bktnum = {sz + var i, bktsz + + bktsz = Align + for i = 0; bktsz <= Bktmax; i++ + if bktsz >= sz + -> i + ;; + bktsz *= 2 + ;; + die("Size does not match any buckets") +} + +/* +returns the actual size we allocated for a given +size request +*/ +const allocsz = {sz + var i, bktsz + + if sz <= Bktmax + bktsz = Align + for i = 0; bktsz <= Bktmax; i++ + if bktsz >= sz + -> bktsz + ;; + bktsz *= 2 + ;; + else + -> align(sz, Pagesz) + ;; +} + +/* +aligns a size to a requested alignment. +'align' must be a power of two +*/ +const align = {v, align + -> (v + align - 1) & ~(align - 1) +} + +/* +chunks are variable sizes, so we can't just +index to get to the next one +*/ +const nextchunk = {b, sz : size + -> ((b castto(intptr)) + (sz castto(intptr))) castto(chunk#) +} + +/* +truncates a pointer to 'align'. 'align' must +be a power of two. +*/ +const mtrunc = {m, align + -> ((m castto(intptr)) & ~((align castto(intptr)) - 1)) castto(byte#) +} diff --git a/libstd/bigint.myr b/libstd/bigint.myr new file mode 100644 index 0000000..ab7cb8c --- /dev/null +++ b/libstd/bigint.myr @@ -0,0 +1,577 @@ +use "alloc.use" +use "cmp.use" +use "die.use" +use "extremum.use" +use "fmt.use" +use "hasprefix.use" +use "chartype.use" +use "option.use" +use "slcp.use" +use "sldup.use" +use "slfill.use" +use "slpush.use" +use "types.use" +use "utf.use" + +pkg std = + type bigint = struct + dig : uint32[:] /* little endian, no leading zeros. */ + sign : int /* -1 for -ve, 0 for zero, 1 for +ve. */ + ;; + + /* administrivia */ + const mkbigint : (v : int32 -> bigint#) + const bigfree : (a : bigint# -> void) + const bigdup : (a : bigint# -> bigint#) + const bigassign : (d : bigint#, s : bigint# -> bigint#) + const bigparse : (s : byte[:] -> option(bigint#)) + const bigfmt : (b : byte[:], a : bigint# -> size) + + /* some useful predicates */ + const bigiszero : (a : bigint# -> bool) + const bigcmp : (a : bigint#, b : bigint# -> order) + + /* bigint*bigint -> bigint ops */ + const bigadd : (a : bigint#, b : bigint# -> bigint#) + const bigsub : (a : bigint#, b : bigint# -> bigint#) + const bigmul : (a : bigint#, b : bigint# -> bigint#) + const bigdiv : (a : bigint#, b : bigint# -> bigint#) + const bigmod : (a : bigint#, b : bigint# -> bigint#) + const bigdivmod : (a : bigint#, b : bigint# -> [bigint#, bigint#]) + const bigshl : (a : bigint#, b : bigint# -> bigint#) + const bigshr : (a : bigint#, b : bigint# -> bigint#) + + /* bigint*int -> bigint ops */ + const bigaddi : (a : bigint#, b : int64 -> bigint#) + const bigsubi : (a : bigint#, b : int64 -> bigint#) + const bigmuli : (a : bigint#, b : int64 -> bigint#) + const bigdivi : (a : bigint#, b : int64 -> bigint#) + const bigshli : (a : bigint#, b : uint64 -> bigint#) + const bigshri : (a : bigint#, b : uint64 -> bigint#) +;; + +const Base = 0x100000000ul + +const mkbigint = {v + var a + a = zalloc() + + a.dig = slalloc(1) + if v < 0 + a.sign = -1 + v = -v + elif v > 0 + a.sign = 1 + ;; + a.dig[0] = (v castto(uint32)) + -> trim(a) +} + +const bigfree = {a + slfree(a.dig) + free(a) +} + +const bigdup = {a + -> bigassign(zalloc(), a) +} + +const bigassign = {d, s + slfree(d.dig) + d.dig = sldup(s.dig) + d.sign = s.sign + -> d +} + + +/* for now, just dump out something for debugging... */ +const bigfmt = {buf, val + const digitchars = ['0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'] + var v + var n, i + var tmp + var rem + + n = 0 + if val.sign == 0 + n += encode(buf, '0') + elif val.sign == -1 + n += encode(buf, '-') + ;; + + val = bigdup(val) + /* generate the digits in reverse order */ + while !bigiszero(val) + (v, rem) = bigdivmod(val, mkbigint(10)) + if rem.dig.len > 0 + n += bfmt(buf[n:], "%c", digitchars[rem.dig[0]]) + else + n += bfmt(buf[n:], "0") + ;; + bigfree(val) + bigfree(rem) + val = v + ;; + bigfree(val) + /* we only generated ascii digits, so this works. ugh. */ + for i = 0; i < n/2; i++ + tmp = buf[i] + buf[i] = buf[n - i - 1] + buf[n - i - 1] = tmp + ;; + -> n +} + +const bigparse = {str + var c, val, base + var v, b + var a + + if hasprefix(str, "0x") || hasprefix(str, "0X") + base = 16 + elif hasprefix(str, "0o") || hasprefix(str, "0O") + base = 8 + elif hasprefix(str, "0b") || hasprefix(str, "0B") + base = 2 + else + base = 10 + ;; + + a = mkbigint(0) + b = mkbigint(base castto(int32)) + /* + efficiency hack: to save allocations, + just mutate v[0]. The value will always + fit in one digit. + */ + v = mkbigint(1) + while str.len != 0 + (c, str) = striter(str) + if c == '_' + continue + ;; + val = charval(c, base) + if val < 0 + bigfree(a) + -> `None + ;; + v.dig[0] = val + if val == 0 + v.sign = 0 + else + v.sign = 1 + ;; + bigmul(a, b) + bigadd(a, v) + + ;; + -> `Some a +} + +const bigiszero = {v + -> v.dig.len == 0 +} + +const bigcmp = {a, b + var i + + if a.sign < b.sign + -> `Before + elif a.sign > b.sign + -> `After + else + /* the one with more digits has greater magnitude */ + if a.dig.len > b.dig.len + -> signedorder(a.sign) + ;; + /* otherwise, the one with the first larger digit is bigger */ + for i = a.dig.len; i > 0; i-- + if a.dig[i - 1] > b.dig[i - 1] + -> signedorder(a.sign) + elif b.dig[i - 1] > a.dig[i - 1] + -> signedorder(a.sign) + ;; + ;; + ;; + -> `Equal +} + +const signedorder = {sign + if sign < 0 + -> `Before + else + -> `After + ;; +} + +/* a += b */ +const bigadd = {a, b + if a.sign == b.sign || a.sign == 0 + a.sign = b.sign + -> uadd(a, b) + elif b.sign == 0 + -> a + else + match bigcmp(a, b) + | `Before: /* a is negative */ + a.sign = b.sign + -> usub(b, a) + | `After: /* b is negative */ + -> usub(a, b) + | `Equal: + die("Impossible. Equal vals with different sign.") + ;; + ;; +} + +/* adds two unsigned values together. */ +const uadd = {a, b + var v, i + var carry + var n + + carry = 0 + n = max(a.dig.len, b.dig.len) + /* guaranteed to carry no more than one value */ + a.dig = slzgrow(a.dig, n + 1) + for i = 0; i < n; i++ + v = (a.dig[i] castto(uint64)) + (b.dig[i] castto(uint64)) + carry; + if v >= Base + carry = 1 + else + carry = 0 + ;; + a.dig[i] = v castto(uint32) + ;; + a.dig[i] += carry castto(uint32) + -> trim(a) +} + +/* a -= b */ +const bigsub = {a, b + /* 0 - x = -x */ + if a.sign == 0 + bigassign(a, b) + a.sign = -b.sign + -> a + /* x - 0 = x */ + elif b.sign == 0 + -> a + elif a.sign != b.sign + -> uadd(a, b) + else + match bigcmp(a, b) + | `Before: /* a is negative */ + a.sign = b.sign + -> usub(b, a) + | `After: /* b is negative */ + -> usub(a, b) + | `Equal: + die("Impossible. Equal vals with different sign.") + ;; + ;; + -> a +} + +/* subtracts two unsigned values, where 'a' is strictly greater than 'b' */ +const usub = {a, b + var carry + var v, i + + carry = 0 + for i = 0; i < a.dig.len; i++ + v = (a.dig[i] castto(int64)) - (b.dig[i] castto(int64)) - carry + if v < 0 + carry = 1 + else + carry = 0 + ;; + a.dig[i] = v castto(uint32) + ;; + -> trim(a) +} + +/* a *= b */ +const bigmul = {a, b + var i, j + var ai, bj, wij + var carry, t + var w + + if a.sign == 0 || b.sign == 0 + a.sign = 0 + slfree(a.dig) + a.dig = [][:] + -> a + elif a.sign != b.sign + a.sign = -1 + else + a.sign = 1 + ;; + w = slzalloc(a.dig.len + b.dig.len) + for j = 0; j < b.dig.len; j++ + if a.dig[j] == 0 + w[j] = 0 + continue + ;; + carry = 0 + for i = 0; i < a.dig.len; i++ + ai = a.dig[i] castto(uint64) + bj = b.dig[j] castto(uint64) + wij = w[i+j] castto(uint64) + t = ai * bj + wij + carry + w[i + j] = (t castto(uint32)) + carry = t >> 32 + ;; + w[i+j] = carry castto(uint32) + ;; + slfree(a.dig) + a.dig = w + -> trim(a) +} + +const bigdiv = {a : bigint#, b : bigint# -> bigint# + var q, r + + (q, r) = bigdivmod(a, b) + bigfree(r) + slfree(a.dig) + a.dig = q.dig + free(q) + -> a +} +const bigmod = {a : bigint#, b : bigint# -> bigint# + var q, r + + (q, r) = bigdivmod(a, b) + bigfree(q) + slfree(a.dig) + a.dig = r.dig + free(r) + -> a +} + +/* a /= b */ +const bigdivmod = {a : bigint#, b : bigint# -> [bigint#, bigint#] + /* + Implements bigint division using Algorithm D from + Knuth: Seminumerical algorithms, Section 4.3.1. + */ + var qhat, rhat, carry, shift + var x, y, z, w, p, t /* temporaries */ + var b0, aj + var u, v + var m : int64, n : int64 + var i, j : int64 + var q + + if bigiszero(b) + die("divide by zero\n") + ;; + /* if b > a, we trucate to 0, with remainder 'a' */ + if a.dig.len < b.dig.len + -> (mkbigint(0), bigdup(a)) + ;; + + q = zalloc() + q.dig = slzalloc(max(a.dig.len, b.dig.len) + 1) + if a.sign != b.sign + q.sign = -1 + else + q.sign = 1 + ;; + + /* handle single digit divisor separately: the knuth algorithm needs at least 2 digits. */ + if b.dig.len == 1 + carry = 0 + b0 = (b.dig[0] castto(uint64)) + for j = a.dig.len; j > 0; j-- + aj = (a.dig[j - 1] castto(uint64)) + q.dig[j - 1] = (((carry << 32) + aj)/b0) castto(uint32) + carry = (carry << 32) + aj - (q.dig[j-1] castto(uint64))*b0 + ;; + -> (trim(q), trim(mkbigint(carry castto(int32)))) + ;; + + u = bigdup(a) + v = bigdup(b) + m = u.dig.len + n = v.dig.len + + shift = nlz(v.dig[n - 1]) + bigshli(u, shift) + bigshli(v, shift) + for j = m - n; j >= 0; j-- + /* load a few temps */ + x = u.dig[j + n] castto(uint64) + y = u.dig[j + n - 1] castto(uint64) + z = v.dig[n - 1] castto(uint64) + w = v.dig[n - 2] castto(uint64) + t = u.dig[j + n - 2] castto(uint64) + + /* estimate qhat */ + qhat = (x*Base + y)/z + rhat = (x*Base + y) - (qhat * z) +:divagain + if qhat >= Base || (qhat * w) > (rhat*Base + t) + qhat-- + rhat += z + if rhat < Base + goto divagain + ;; + ;; + + /* multiply and subtract */ + carry = 0 + for i = 0; i < n; i++ + p = qhat * (v.dig[i] castto(uint64)) + t = (u.dig[i+j] castto(uint64)) - carry - (p % Base) + u.dig[i+j] = t castto(uint32) + carry = (((p castto(int64)) >> 32) - ((t castto(int64)) >> 32)) castto(uint64); + ;; + x = u.dig[j + n] castto(uint64) + t = x - carry + u.dig[j + n] = t castto(uint32) + q.dig[j] = qhat castto(uint32) + /* adjust */ + if x < carry + q.dig[j]-- + carry = 0 + for i = 0; i < n; i++ + t = (u.dig[i+j] castto(uint64)) + (v.dig[i] castto(uint64)) + carry + u.dig[i+j] = t castto(uint32) + carry = t >> 32 + ;; + u.dig[j+n] = u.dig[j+n] + (carry castto(uint32)); + ;; + + ;; + /* undo the biasing for remainder */ + bigshri(u, shift) + -> (trim(q), trim(u)) +} + +/* returns the number of leading zeros */ +const nlz = {a : uint32 + var n + + if a == 0 + -> 32 + ;; + n = 0 + if a <= 0x0000ffff + n += 16 + a <<= 16 + ;; + if a <= 0x00ffffff + n += 8 + a <<= 8 + ;; + if a <= 0x0fffffff + n += 4 + a <<= 4 + ;; + if a <= 0x3fffffff + n += 2 + a <<= 2 + ;; + if a <= 0x7fffffff + n += 1 + a <<= 1 + ;; + -> n +} + + +/* a <<= b */ +const bigshl = {a, b + match b.dig.len + | 0: -> a + | 1: -> bigshli(a, b.dig[0] castto(uint64)) + | n: die("shift by way too much\n") + ;; +} + +/* a >>= b, unsigned */ +const bigshr = {a, b + match b.dig.len + | 0: -> a + | 1: -> bigshri(a, b.dig[0] castto(uint64)) + | n: die("shift by way too much\n") + ;; +} + +/* + a << s, with integer arg. + logical left shift. any other type would be illogical. + */ +const bigshli = {a, s + var off, shift + var t, carry + var i + + off = s/32 + shift = s % 32 + + a.dig = slzgrow(a.dig, 1 + a.dig.len + off castto(size)) + /* blit over the base values */ + for i = a.dig.len; i > off; i-- + a.dig[i - 1] = a.dig[i - 1 - off] + ;; + for i = 0; i < off; i++ + a.dig[i] = 0 + ;; + /* and shift over by the remainder */ + carry = 0 + for i = 0; i < a.dig.len; i++ + t = (a.dig[i] castto(uint64)) << shift + a.dig[i] = (t | carry) castto(uint32) + carry = t >> 32 + ;; + -> trim(a) +} + +/* logical shift right, zero fills. sign remains untouched. */ +const bigshri = {a, s + var off, shift + var t, carry + var i + + off = s/32 + shift = s % 32 + + /* blit over the base values */ + for i = 0; i < a.dig.len - off; i++ + a.dig[i] = a.dig[i + off] + ;; + for i = a.dig.len; i < a.dig.len + off; i++ + a.dig[i] = 0 + ;; + /* and shift over by the remainder */ + carry = 0 + for i = a.dig.len; i > 0; i-- + t = (a.dig[i - 1] castto(uint64)) + a.dig[i - 1] = (carry | (t >> shift)) castto(uint32) + carry = t << (32 - shift) + ;; + -> trim(a) +} + +/* trims leading zeros */ +const trim = {a + var i + + for i = a.dig.len; i > 0; i-- + if a.dig[i - 1] != 0 + break + ;; + ;; + a.dig = slgrow(a.dig, i) + if i == 0 + a.sign = 0 + elif a.sign == 0 + a.sign = 1 + ;; + -> a +} + diff --git a/libstd/bitset.myr b/libstd/bitset.myr new file mode 100644 index 0000000..c6852f8 --- /dev/null +++ b/libstd/bitset.myr @@ -0,0 +1,142 @@ +use "alloc.use" +use "extremum.use" +use "mk.use" +use "slfill.use" +use "types.use" + +pkg std = + type bitset = struct + bits : size[:] + ;; + + const mkbs : (-> bitset#) + const bsdup : (bs : bitset# -> bitset#) + const bsfree : (bs : bitset# -> void) + + generic bsput : (bs : bitset#, v : @a::(integral,numeric) -> void) + generic bsdel : (bs : bitset#, v : @a::(integral,numeric) -> void) + generic bshas : (bs : bitset#, v : @a::(integral,numeric) -> bool) + + const bsdiff : (a : bitset#, b : bitset# -> void) + const bsintersect : (a : bitset#, b : bitset# -> void) + const bsunion : (a : bitset#, b : bitset# -> void) + const bseq : (a : bitset#, b : bitset# -> bool) + const bsissub : (a : bitset#, b : bitset# -> bool) + + const bsclear : (bs : bitset# -> bitset#) + const bscount : (bs : bitset# -> bitset#) + const bsiter : (bs : bitset# -> bitset#) +;; + +const mkbs = { + -> zalloc() +} + +const bsdup = {bs + -> mk([.bits=bs.bits]) +} + +const bsfree = {bs + slfree(bs.bits) + free(bs) +} + +const bsclear = {bs + slfill(bs.bits, 0) + -> bs +} + +generic bsput = {bs, v + var idx + var off + + idx = (v castto(size)) / (8*sizeof(size)) + off = (v castto(size)) % (8*sizeof(size)) + ensurespace(bs, idx) + bs.bits[idx] |= (1 << off) +} + +generic bsdel = {bs, v + var idx + var off + + idx = (v castto(size)) / (8*sizeof(size)) + off = (v castto(size)) % (8*sizeof(size)) + ensurespace(bs, idx) + bs.bits[idx] &= ~(1 << off) +} + +generic bshas = {bs, v + var idx + var off + + idx = (v castto(size)) / (8*sizeof(size)) + off = (v castto(size)) % (8*sizeof(size)) + ensurespace(bs, idx) + -> (bs.bits[idx] & (1 << off)) != 0 +} + +const bsunion = {a, b + var i + + eqsz(a, b) + for i = 0; i < b.bits.len; i++ + a.bits[i] |= b.bits[i] + ;; +} + +const bsintersect = {a, b + var i, n + + n = min(a.bits.len, b.bits.len) + for i = 0; i < n; i++ + a.bits[i] &= b.bits[i] + ;; +} + +const bsdiff = {a, b + var i + + ensurespace(b, a.bits.len) + for i = 0; i < a.bits.len; i++ + a.bits[i] &= ~b.bits[i] + ;; +} + +const bsissubset = {a, b + var i + + eqsz(a, b); + for i = 0; i < a.bits.len; i++ + if (b.bits[i] & a.bits[i]) != b.bits[i] + -> false + ;; + ;; + -> true +} + +const bseq = {a, b + var i + + eqsz(a, b) + for i = 0; i < a.bits.len; i++ + if a.bits[i] != b.bits[i] + -> false + ;; + ;; + -> true +} + +const ensurespace = {bs, v + if bs.bits.len <= v + bs.bits = slzgrow(bs.bits, v + 1) + ;; +} + +const eqsz = {a, b + var sz + + sz = max(a.bits.len, b.bits.len) + ensurespace(a, sz) + ensurespace(b, sz) +} diff --git a/libstd/blat.myr b/libstd/blat.myr new file mode 100644 index 0000000..ccd5ad0 --- /dev/null +++ b/libstd/blat.myr @@ -0,0 +1,28 @@ +use "sys.use" +use "fmt.use" + +pkg std = + const blat : (path : byte[:], buf : byte[:] -> bool) +;; + +const blat = {path, buf + var fd + var written + var n + + fd = openmode(path, Ocreat|Owronly, 0o777) + if fd < 0 + fatal(1, "Could not open file \"%s\"", path) + ;; + + n = 0 + while true + written = write(fd, buf[n:]) + if written <= 0 + goto done + ;; + n += written + ;; +:done + -> written == 0 +} diff --git a/libstd/chartype.myr b/libstd/chartype.myr new file mode 100644 index 0000000..c62fcf1 --- /dev/null +++ b/libstd/chartype.myr @@ -0,0 +1,1227 @@ +use "die.use" +use "sys.use" + +/* + Tables adapted from plan 9's runetype.c, + which lives in sys/src/libc/port/runetype.c +*/ + +pkg std = + /* predicates */ + const isalpha : (c : char -> bool) + const isnum : (c : char -> bool) + const isalnum : (c : char -> bool) + const isspace : (c : char -> bool) + const isblank : (c : char -> bool) + const islower : (c : char -> bool) + const isupper : (c : char -> bool) + const istitle : (c : char -> bool) + + /* transforms */ + const tolower : (c : char -> char) + const toupper : (c : char -> char) + const totitle : (c : char -> char) + + generic charval : (c : char, base : int -> @a::(integral,numeric)) +;; + +/* + * alpha ranges - + * only covers ranges not in lower||upper + */ +const ralpha2 = [ + 0x00d8, 0x00f6, /* Ø - ö */ + 0x00f8, 0x01f5, /* ø - ǵ */ + 0x0250, 0x02a8, /* ɐ - ʨ */ + 0x038e, 0x03a1, /* Ύ - Ρ */ + 0x03a3, 0x03ce, /* Σ - ώ */ + 0x03d0, 0x03d6, /* ϐ - ϖ */ + 0x03e2, 0x03f3, /* Ϣ - ϳ */ + 0x0490, 0x04c4, /* Ґ - ӄ */ + 0x0561, 0x0587, /* ա - և */ + 0x05d0, 0x05ea, /* א - ת */ + 0x05f0, 0x05f2, /* װ - ײ */ + 0x0621, 0x063a, /* ء - غ */ + 0x0640, 0x064a, /* ـ - ي */ + 0x0671, 0x06b7, /* ٱ - ڷ */ + 0x06ba, 0x06be, /* ں - ھ */ + 0x06c0, 0x06ce, /* ۀ - ێ */ + 0x06d0, 0x06d3, /* ې - ۓ */ + 0x0905, 0x0939, /* अ - ह */ + 0x0958, 0x0961, /* क़ - ॡ */ + 0x0985, 0x098c, /* অ - ঌ */ + 0x098f, 0x0990, /* এ - ঐ */ + 0x0993, 0x09a8, /* ও - ন */ + 0x09aa, 0x09b0, /* প - র */ + 0x09b6, 0x09b9, /* শ - হ */ + 0x09dc, 0x09dd, /* ড় - ঢ় */ + 0x09df, 0x09e1, /* য় - ৡ */ + 0x09f0, 0x09f1, /* ৰ - ৱ */ + 0x0a05, 0x0a0a, /* ਅ - ਊ */ + 0x0a0f, 0x0a10, /* ਏ - ਐ */ + 0x0a13, 0x0a28, /* ਓ - ਨ */ + 0x0a2a, 0x0a30, /* ਪ - ਰ */ + 0x0a32, 0x0a33, /* ਲ - ਲ਼ */ + 0x0a35, 0x0a36, /* ਵ - ਸ਼ */ + 0x0a38, 0x0a39, /* ਸ - ਹ */ + 0x0a59, 0x0a5c, /* ਖ਼ - ੜ */ + 0x0a85, 0x0a8b, /* અ - ઋ */ + 0x0a8f, 0x0a91, /* એ - ઑ */ + 0x0a93, 0x0aa8, /* ઓ - ન */ + 0x0aaa, 0x0ab0, /* પ - ર */ + 0x0ab2, 0x0ab3, /* લ - ળ */ + 0x0ab5, 0x0ab9, /* વ - હ */ + 0x0b05, 0x0b0c, /* ଅ - ଌ */ + 0x0b0f, 0x0b10, /* ଏ - ଐ */ + 0x0b13, 0x0b28, /* ଓ - ନ */ + 0x0b2a, 0x0b30, /* ପ - ର */ + 0x0b32, 0x0b33, /* ଲ - ଳ */ + 0x0b36, 0x0b39, /* ଶ - ହ */ + 0x0b5c, 0x0b5d, /* ଡ଼ - ଢ଼ */ + 0x0b5f, 0x0b61, /* ୟ - ୡ */ + 0x0b85, 0x0b8a, /* அ - ஊ */ + 0x0b8e, 0x0b90, /* எ - ஐ */ + 0x0b92, 0x0b95, /* ஒ - க */ + 0x0b99, 0x0b9a, /* ங - ச */ + 0x0b9e, 0x0b9f, /* ஞ - ட */ + 0x0ba3, 0x0ba4, /* ண - த */ + 0x0ba8, 0x0baa, /* ந - ப */ + 0x0bae, 0x0bb5, /* ம - வ */ + 0x0bb7, 0x0bb9, /* ஷ - ஹ */ + 0x0c05, 0x0c0c, /* అ - ఌ */ + 0x0c0e, 0x0c10, /* ఎ - ఐ */ + 0x0c12, 0x0c28, /* ఒ - న */ + 0x0c2a, 0x0c33, /* ప - ళ */ + 0x0c35, 0x0c39, /* వ - హ */ + 0x0c60, 0x0c61, /* ౠ - ౡ */ + 0x0c85, 0x0c8c, /* ಅ - ಌ */ + 0x0c8e, 0x0c90, /* ಎ - ಐ */ + 0x0c92, 0x0ca8, /* ಒ - ನ */ + 0x0caa, 0x0cb3, /* ಪ - ಳ */ + 0x0cb5, 0x0cb9, /* ವ - ಹ */ + 0x0ce0, 0x0ce1, /* ೠ - ೡ */ + 0x0d05, 0x0d0c, /* അ - ഌ */ + 0x0d0e, 0x0d10, /* എ - ഐ */ + 0x0d12, 0x0d28, /* ഒ - ന */ + 0x0d2a, 0x0d39, /* പ - ഹ */ + 0x0d60, 0x0d61, /* ൠ - ൡ */ + 0x0e01, 0x0e30, /* ก - ะ */ + 0x0e32, 0x0e33, /* า - ำ */ + 0x0e40, 0x0e46, /* เ - ๆ */ + 0x0e5a, 0x0e5b, /* ๚ - ๛ */ + 0x0e81, 0x0e82, /* ກ - ຂ */ + 0x0e87, 0x0e88, /* ງ - ຈ */ + 0x0e94, 0x0e97, /* ດ - ທ */ + 0x0e99, 0x0e9f, /* ນ - ຟ */ + 0x0ea1, 0x0ea3, /* ມ - ຣ */ + 0x0eaa, 0x0eab, /* ສ - ຫ */ + 0x0ead, 0x0eae, /* ອ - ຮ */ + 0x0eb2, 0x0eb3, /* າ - ຳ */ + 0x0ec0, 0x0ec4, /* ເ - ໄ */ + 0x0edc, 0x0edd, /* ໜ - ໝ */ + 0x0f18, 0x0f19, /* ༘ - ༙ */ + 0x0f40, 0x0f47, /* ཀ - ཇ */ + 0x0f49, 0x0f69, /* ཉ - ཀྵ */ + 0x10d0, 0x10f6, /* ა - ჶ */ + 0x1100, 0x1159, /* ᄀ - ᅙ */ + 0x115f, 0x11a2, /* ᅟ - ᆢ */ + 0x11a8, 0x11f9, /* ᆨ - ᇹ */ + 0x1e00, 0x1e9b, /* Ḁ - ẛ */ + 0x1f50, 0x1f57, /* ὐ - ὗ */ + 0x1f80, 0x1fb4, /* ᾀ - ᾴ */ + 0x1fb6, 0x1fbc, /* ᾶ - ᾼ */ + 0x1fc2, 0x1fc4, /* ῂ - ῄ */ + 0x1fc6, 0x1fcc, /* ῆ - ῌ */ + 0x1fd0, 0x1fd3, /* ῐ - ΐ */ + 0x1fd6, 0x1fdb, /* ῖ - Ί */ + 0x1fe0, 0x1fec, /* ῠ - Ῥ */ + 0x1ff2, 0x1ff4, /* ῲ - ῴ */ + 0x1ff6, 0x1ffc, /* ῶ - ῼ */ + 0x210a, 0x2113, /* ℊ - ℓ */ + 0x2115, 0x211d, /* ℕ - ℝ */ + 0x2120, 0x2122, /* ℠ - ™ */ + 0x212a, 0x2131, /* K - ℱ */ + 0x2133, 0x2138, /* ℳ - ℸ */ + 0x3041, 0x3094, /* ぁ - ゔ */ + 0x30a1, 0x30fa, /* ァ - ヺ */ + 0x3105, 0x312c, /* ㄅ - ㄬ */ + 0x3131, 0x318e, /* ㄱ - ㆎ */ + 0x3192, 0x319f, /* ㆒ - ㆟ */ + 0x3260, 0x327b, /* ㉠ - ㉻ */ + 0x328a, 0x32b0, /* ㊊ - ㊰ */ + 0x32d0, 0x32fe, /* ㋐ - ㋾ */ + 0x3300, 0x3357, /* ㌀ - ㍗ */ + 0x3371, 0x3376, /* ㍱ - ㍶ */ + 0x337b, 0x3394, /* ㍻ - ㎔ */ + 0x3399, 0x339e, /* ㎙ - ㎞ */ + 0x33a9, 0x33ad, /* ㎩ - ㎭ */ + 0x33b0, 0x33c1, /* ㎰ - ㏁ */ + 0x33c3, 0x33c5, /* ㏃ - ㏅ */ + 0x33c7, 0x33d7, /* ㏇ - ㏗ */ + 0x33d9, 0x33dd, /* ㏙ - ㏝ */ + 0x4e00, 0x9fff, /* 一 - 鿿 */ + 0xac00, 0xd7a3, /* 가 - 힣 */ + 0xf900, 0xfb06, /* 豈 - st */ + 0xfb13, 0xfb17, /* ﬓ - ﬗ */ + 0xfb1f, 0xfb28, /* ײַ - ﬨ */ + 0xfb2a, 0xfb36, /* שׁ - זּ */ + 0xfb38, 0xfb3c, /* טּ - לּ */ + 0xfb40, 0xfb41, /* נּ - סּ */ + 0xfb43, 0xfb44, /* ףּ - פּ */ + 0xfb46, 0xfbb1, /* צּ - ﮱ */ + 0xfbd3, 0xfd3d, /* ﯓ - ﴽ */ + 0xfd50, 0xfd8f, /* ﵐ - ﶏ */ + 0xfd92, 0xfdc7, /* ﶒ - ﷇ */ + 0xfdf0, 0xfdf9, /* ﷰ - ﷹ */ + 0xfe70, 0xfe72, /* ﹰ - ﹲ */ + 0xfe76, 0xfefc, /* ﹶ - ﻼ */ + 0xff66, 0xff6f, /* ヲ - ッ */ + 0xff71, 0xff9d, /* ア - ン */ + 0xffa0, 0xffbe, /* ᅠ - ᄒ */ + 0xffc2, 0xffc7, /* ᅡ - ᅦ */ + 0xffca, 0xffcf, /* ᅧ - ᅬ */ + 0xffd2, 0xffd7, /* ᅭ - ᅲ */ + 0xffda, 0xffdc /* ᅳ - ᅵ */ +] + +/* + * alpha singlets - + * only covers ranges not in lower||upper + */ +const ralpha1 = [ + 0x00aa, /* ª */ + 0x00b5, /* µ */ + 0x00ba, /* º */ + 0x03da, /* Ϛ */ + 0x03dc, /* Ϝ */ + 0x03de, /* Ϟ */ + 0x03e0, /* Ϡ */ + 0x06d5, /* ە */ + 0x09b2, /* ল */ + 0x0a5e, /* ਫ਼ */ + 0x0a8d, /* ઍ */ + 0x0ae0, /* ૠ */ + 0x0b9c, /* ஜ */ + 0x0cde, /* ೞ */ + 0x0e4f, /* ๏ */ + 0x0e84, /* ຄ */ + 0x0e8a, /* ຊ */ + 0x0e8d, /* ຍ */ + 0x0ea5, /* ລ */ + 0x0ea7, /* ວ */ + 0x0eb0, /* ະ */ + 0x0ebd, /* ຽ */ + 0x1fbe, /* ι */ + 0x207f, /* ⁿ */ + 0x20a8, /* ₨ */ + 0x2102, /* ℂ */ + 0x2107, /* ℇ */ + 0x2124, /* ℤ */ + 0x2126, /* Ω */ + 0x2128, /* ℨ */ + 0xfb3e, /* מּ */ + 0xfe74 /* ﹴ */ +] + +/* + * space ranges + */ +const rspace2 = [ + 0x0009, 0x0009, /* tab */ + 0x0020, 0x0020, /* space */ + 0x0085, 0x0085, + 0x00a0, 0x00a0, /* */ + 0x1680, 0x1680, + 0x180e, 0x180e, + 0x2000, 0x200b, /* - */ + 0x2028, 0x2029, /*
-
*/ + 0x202f, 0x202f, + 0x205f, 0x205f, + 0x3000, 0x3000, /* */ + 0xfeff, 0xfeff /* */ +] + +/* + * lower case ranges + * 3rd col is conversion excess 500 + */ +const rtoupper2 = [ + 0x0061, 0x007a, 468, /* a-z A-Z */ + 0x00e0, 0x00f6, 468, /* à-ö À-Ö */ + 0x00f8, 0x00fe, 468, /* ø-þ Ø-Þ */ + 0x0256, 0x0257, 295, /* ɖ-ɗ Ɖ-Ɗ */ + 0x0258, 0x0259, 298, /* ɘ-ə Ǝ-Ə */ + 0x028a, 0x028b, 283, /* ʊ-ʋ Ʊ-Ʋ */ + 0x03ad, 0x03af, 463, /* έ-ί Έ-Ί */ + 0x03b1, 0x03c1, 468, /* α-ρ Α-Ρ */ + 0x03c3, 0x03cb, 468, /* σ-ϋ Σ-Ϋ */ + 0x03cd, 0x03ce, 437, /* ύ-ώ Ύ-Ώ */ + 0x0430, 0x044f, 468, /* а-я А-Я */ + 0x0451, 0x045c, 420, /* ё-ќ Ё-Ќ */ + 0x045e, 0x045f, 420, /* ў-џ Ў-Џ */ + 0x0561, 0x0586, 452, /* ա-ֆ Ա-Ֆ */ + 0x1f00, 0x1f07, 508, /* ἀ-ἇ Ἀ-Ἇ */ + 0x1f10, 0x1f15, 508, /* ἐ-ἕ Ἐ-Ἕ */ + 0x1f20, 0x1f27, 508, /* ἠ-ἧ Ἠ-Ἧ */ + 0x1f30, 0x1f37, 508, /* ἰ-ἷ Ἰ-Ἷ */ + 0x1f40, 0x1f45, 508, /* ὀ-ὅ Ὀ-Ὅ */ + 0x1f60, 0x1f67, 508, /* ὠ-ὧ Ὠ-Ὧ */ + 0x1f70, 0x1f71, 574, /* ὰ-ά Ὰ-Ά */ + 0x1f72, 0x1f75, 586, /* ὲ-ή Ὲ-Ή */ + 0x1f76, 0x1f77, 600, /* ὶ-ί Ὶ-Ί */ + 0x1f78, 0x1f79, 628, /* ὸ-ό Ὸ-Ό */ + 0x1f7a, 0x1f7b, 612, /* ὺ-ύ Ὺ-Ύ */ + 0x1f7c, 0x1f7d, 626, /* ὼ-ώ Ὼ-Ώ */ + 0x1f80, 0x1f87, 508, /* ᾀ-ᾇ ᾈ-ᾏ */ + 0x1f90, 0x1f97, 508, /* ᾐ-ᾗ ᾘ-ᾟ */ + 0x1fa0, 0x1fa7, 508, /* ᾠ-ᾧ ᾨ-ᾯ */ + 0x1fb0, 0x1fb1, 508, /* ᾰ-ᾱ Ᾰ-Ᾱ */ + 0x1fd0, 0x1fd1, 508, /* ῐ-ῑ Ῐ-Ῑ */ + 0x1fe0, 0x1fe1, 508, /* ῠ-ῡ Ῠ-Ῡ */ + 0x2170, 0x217f, 484, /* ⅰ-ⅿ Ⅰ-Ⅿ */ + 0x24d0, 0x24e9, 474, /* ⓐ-ⓩ Ⓐ-Ⓩ */ + 0xff41, 0xff5a, 468 /* a-z A-Z */ +] + +/* + * lower case singlets + * 2nd col is conversion excess 500 + */ +const rtoupper1 = [ + 0x00ff, 621, /* ÿ Ÿ */ + 0x0101, 499, /* ā Ā */ + 0x0103, 499, /* ă Ă */ + 0x0105, 499, /* ą Ą */ + 0x0107, 499, /* ć Ć */ + 0x0109, 499, /* ĉ Ĉ */ + 0x010b, 499, /* ċ Ċ */ + 0x010d, 499, /* č Č */ + 0x010f, 499, /* ď Ď */ + 0x0111, 499, /* đ Đ */ + 0x0113, 499, /* ē Ē */ + 0x0115, 499, /* ĕ Ĕ */ + 0x0117, 499, /* ė Ė */ + 0x0119, 499, /* ę Ę */ + 0x011b, 499, /* ě Ě */ + 0x011d, 499, /* ĝ Ĝ */ + 0x011f, 499, /* ğ Ğ */ + 0x0121, 499, /* ġ Ġ */ + 0x0123, 499, /* ģ Ģ */ + 0x0125, 499, /* ĥ Ĥ */ + 0x0127, 499, /* ħ Ħ */ + 0x0129, 499, /* ĩ Ĩ */ + 0x012b, 499, /* ī Ī */ + 0x012d, 499, /* ĭ Ĭ */ + 0x012f, 499, /* į Į */ + 0x0131, 268, /* ı I */ + 0x0133, 499, /* ij IJ */ + 0x0135, 499, /* ĵ Ĵ */ + 0x0137, 499, /* ķ Ķ */ + 0x013a, 499, /* ĺ Ĺ */ + 0x013c, 499, /* ļ Ļ */ + 0x013e, 499, /* ľ Ľ */ + 0x0140, 499, /* ŀ Ŀ */ + 0x0142, 499, /* ł Ł */ + 0x0144, 499, /* ń Ń */ + 0x0146, 499, /* ņ Ņ */ + 0x0148, 499, /* ň Ň */ + 0x014b, 499, /* ŋ Ŋ */ + 0x014d, 499, /* ō Ō */ + 0x014f, 499, /* ŏ Ŏ */ + 0x0151, 499, /* ő Ő */ + 0x0153, 499, /* œ Œ */ + 0x0155, 499, /* ŕ Ŕ */ + 0x0157, 499, /* ŗ Ŗ */ + 0x0159, 499, /* ř Ř */ + 0x015b, 499, /* ś Ś */ + 0x015d, 499, /* ŝ Ŝ */ + 0x015f, 499, /* ş Ş */ + 0x0161, 499, /* š Š */ + 0x0163, 499, /* ţ Ţ */ + 0x0165, 499, /* ť Ť */ + 0x0167, 499, /* ŧ Ŧ */ + 0x0169, 499, /* ũ Ũ */ + 0x016b, 499, /* ū Ū */ + 0x016d, 499, /* ŭ Ŭ */ + 0x016f, 499, /* ů Ů */ + 0x0171, 499, /* ű Ű */ + 0x0173, 499, /* ų Ų */ + 0x0175, 499, /* ŵ Ŵ */ + 0x0177, 499, /* ŷ Ŷ */ + 0x017a, 499, /* ź Ź */ + 0x017c, 499, /* ż Ż */ + 0x017e, 499, /* ž Ž */ + 0x017f, 200, /* ſ S */ + 0x0183, 499, /* ƃ Ƃ */ + 0x0185, 499, /* ƅ Ƅ */ + 0x0188, 499, /* ƈ Ƈ */ + 0x018c, 499, /* ƌ Ƌ */ + 0x0192, 499, /* ƒ Ƒ */ + 0x0199, 499, /* ƙ Ƙ */ + 0x01a1, 499, /* ơ Ơ */ + 0x01a3, 499, /* ƣ Ƣ */ + 0x01a5, 499, /* ƥ Ƥ */ + 0x01a8, 499, /* ƨ Ƨ */ + 0x01ad, 499, /* ƭ Ƭ */ + 0x01b0, 499, /* ư Ư */ + 0x01b4, 499, /* ƴ Ƴ */ + 0x01b6, 499, /* ƶ Ƶ */ + 0x01b9, 499, /* ƹ Ƹ */ + 0x01bd, 499, /* ƽ Ƽ */ + 0x01c5, 499, /* Dž DŽ */ + 0x01c6, 498, /* dž DŽ */ + 0x01c8, 499, /* Lj LJ */ + 0x01c9, 498, /* lj LJ */ + 0x01cb, 499, /* Nj NJ */ + 0x01cc, 498, /* nj NJ */ + 0x01ce, 499, /* ǎ Ǎ */ + 0x01d0, 499, /* ǐ Ǐ */ + 0x01d2, 499, /* ǒ Ǒ */ + 0x01d4, 499, /* ǔ Ǔ */ + 0x01d6, 499, /* ǖ Ǖ */ + 0x01d8, 499, /* ǘ Ǘ */ + 0x01da, 499, /* ǚ Ǚ */ + 0x01dc, 499, /* ǜ Ǜ */ + 0x01df, 499, /* ǟ Ǟ */ + 0x01e1, 499, /* ǡ Ǡ */ + 0x01e3, 499, /* ǣ Ǣ */ + 0x01e5, 499, /* ǥ Ǥ */ + 0x01e7, 499, /* ǧ Ǧ */ + 0x01e9, 499, /* ǩ Ǩ */ + 0x01eb, 499, /* ǫ Ǫ */ + 0x01ed, 499, /* ǭ Ǭ */ + 0x01ef, 499, /* ǯ Ǯ */ + 0x01f2, 499, /* Dz DZ */ + 0x01f3, 498, /* dz DZ */ + 0x01f5, 499, /* ǵ Ǵ */ + 0x01fb, 499, /* ǻ Ǻ */ + 0x01fd, 499, /* ǽ Ǽ */ + 0x01ff, 499, /* ǿ Ǿ */ + 0x0201, 499, /* ȁ Ȁ */ + 0x0203, 499, /* ȃ Ȃ */ + 0x0205, 499, /* ȅ Ȅ */ + 0x0207, 499, /* ȇ Ȇ */ + 0x0209, 499, /* ȉ Ȉ */ + 0x020b, 499, /* ȋ Ȋ */ + 0x020d, 499, /* ȍ Ȍ */ + 0x020f, 499, /* ȏ Ȏ */ + 0x0211, 499, /* ȑ Ȑ */ + 0x0213, 499, /* ȓ Ȓ */ + 0x0215, 499, /* ȕ Ȕ */ + 0x0217, 499, /* ȗ Ȗ */ + 0x0253, 290, /* ɓ Ɓ */ + 0x0254, 294, /* ɔ Ɔ */ + 0x025b, 297, /* ɛ Ɛ */ + 0x0260, 295, /* ɠ Ɠ */ + 0x0263, 293, /* ɣ Ɣ */ + 0x0268, 291, /* ɨ Ɨ */ + 0x0269, 289, /* ɩ Ɩ */ + 0x026f, 289, /* ɯ Ɯ */ + 0x0272, 287, /* ɲ Ɲ */ + 0x0283, 282, /* ʃ Ʃ */ + 0x0288, 282, /* ʈ Ʈ */ + 0x0292, 281, /* ʒ Ʒ */ + 0x03ac, 462, /* ά Ά */ + 0x03cc, 436, /* ό Ό */ + 0x03d0, 438, /* ϐ Β */ + 0x03d1, 443, /* ϑ Θ */ + 0x03d5, 453, /* ϕ Φ */ + 0x03d6, 446, /* ϖ Π */ + 0x03e3, 499, /* ϣ Ϣ */ + 0x03e5, 499, /* ϥ Ϥ */ + 0x03e7, 499, /* ϧ Ϧ */ + 0x03e9, 499, /* ϩ Ϩ */ + 0x03eb, 499, /* ϫ Ϫ */ + 0x03ed, 499, /* ϭ Ϭ */ + 0x03ef, 499, /* ϯ Ϯ */ + 0x03f0, 414, /* ϰ Κ */ + 0x03f1, 420, /* ϱ Ρ */ + 0x0461, 499, /* ѡ Ѡ */ + 0x0463, 499, /* ѣ Ѣ */ + 0x0465, 499, /* ѥ Ѥ */ + 0x0467, 499, /* ѧ Ѧ */ + 0x0469, 499, /* ѩ Ѩ */ + 0x046b, 499, /* ѫ Ѫ */ + 0x046d, 499, /* ѭ Ѭ */ + 0x046f, 499, /* ѯ Ѯ */ + 0x0471, 499, /* ѱ Ѱ */ + 0x0473, 499, /* ѳ Ѳ */ + 0x0475, 499, /* ѵ Ѵ */ + 0x0477, 499, /* ѷ Ѷ */ + 0x0479, 499, /* ѹ Ѹ */ + 0x047b, 499, /* ѻ Ѻ */ + 0x047d, 499, /* ѽ Ѽ */ + 0x047f, 499, /* ѿ Ѿ */ + 0x0481, 499, /* ҁ Ҁ */ + 0x0491, 499, /* ґ Ґ */ + 0x0493, 499, /* ғ Ғ */ + 0x0495, 499, /* ҕ Ҕ */ + 0x0497, 499, /* җ Җ */ + 0x0499, 499, /* ҙ Ҙ */ + 0x049b, 499, /* қ Қ */ + 0x049d, 499, /* ҝ Ҝ */ + 0x049f, 499, /* ҟ Ҟ */ + 0x04a1, 499, /* ҡ Ҡ */ + 0x04a3, 499, /* ң Ң */ + 0x04a5, 499, /* ҥ Ҥ */ + 0x04a7, 499, /* ҧ Ҧ */ + 0x04a9, 499, /* ҩ Ҩ */ + 0x04ab, 499, /* ҫ Ҫ */ + 0x04ad, 499, /* ҭ Ҭ */ + 0x04af, 499, /* ү Ү */ + 0x04b1, 499, /* ұ Ұ */ + 0x04b3, 499, /* ҳ Ҳ */ + 0x04b5, 499, /* ҵ Ҵ */ + 0x04b7, 499, /* ҷ Ҷ */ + 0x04b9, 499, /* ҹ Ҹ */ + 0x04bb, 499, /* һ Һ */ + 0x04bd, 499, /* ҽ Ҽ */ + 0x04bf, 499, /* ҿ Ҿ */ + 0x04c2, 499, /* ӂ Ӂ */ + 0x04c4, 499, /* ӄ Ӄ */ + 0x04c8, 499, /* ӈ Ӈ */ + 0x04cc, 499, /* ӌ Ӌ */ + 0x04d1, 499, /* ӑ Ӑ */ + 0x04d3, 499, /* ӓ Ӓ */ + 0x04d5, 499, /* ӕ Ӕ */ + 0x04d7, 499, /* ӗ Ӗ */ + 0x04d9, 499, /* ә Ә */ + 0x04db, 499, /* ӛ Ӛ */ + 0x04dd, 499, /* ӝ Ӝ */ + 0x04df, 499, /* ӟ Ӟ */ + 0x04e1, 499, /* ӡ Ӡ */ + 0x04e3, 499, /* ӣ Ӣ */ + 0x04e5, 499, /* ӥ Ӥ */ + 0x04e7, 499, /* ӧ Ӧ */ + 0x04e9, 499, /* ө Ө */ + 0x04eb, 499, /* ӫ Ӫ */ + 0x04ef, 499, /* ӯ Ӯ */ + 0x04f1, 499, /* ӱ Ӱ */ + 0x04f3, 499, /* ӳ Ӳ */ + 0x04f5, 499, /* ӵ Ӵ */ + 0x04f9, 499, /* ӹ Ӹ */ + 0x1e01, 499, /* ḁ Ḁ */ + 0x1e03, 499, /* ḃ Ḃ */ + 0x1e05, 499, /* ḅ Ḅ */ + 0x1e07, 499, /* ḇ Ḇ */ + 0x1e09, 499, /* ḉ Ḉ */ + 0x1e0b, 499, /* ḋ Ḋ */ + 0x1e0d, 499, /* ḍ Ḍ */ + 0x1e0f, 499, /* ḏ Ḏ */ + 0x1e11, 499, /* ḑ Ḑ */ + 0x1e13, 499, /* ḓ Ḓ */ + 0x1e15, 499, /* ḕ Ḕ */ + 0x1e17, 499, /* ḗ Ḗ */ + 0x1e19, 499, /* ḙ Ḙ */ + 0x1e1b, 499, /* ḛ Ḛ */ + 0x1e1d, 499, /* ḝ Ḝ */ + 0x1e1f, 499, /* ḟ Ḟ */ + 0x1e21, 499, /* ḡ Ḡ */ + 0x1e23, 499, /* ḣ Ḣ */ + 0x1e25, 499, /* ḥ Ḥ */ + 0x1e27, 499, /* ḧ Ḧ */ + 0x1e29, 499, /* ḩ Ḩ */ + 0x1e2b, 499, /* ḫ Ḫ */ + 0x1e2d, 499, /* ḭ Ḭ */ + 0x1e2f, 499, /* ḯ Ḯ */ + 0x1e31, 499, /* ḱ Ḱ */ + 0x1e33, 499, /* ḳ Ḳ */ + 0x1e35, 499, /* ḵ Ḵ */ + 0x1e37, 499, /* ḷ Ḷ */ + 0x1e39, 499, /* ḹ Ḹ */ + 0x1e3b, 499, /* ḻ Ḻ */ + 0x1e3d, 499, /* ḽ Ḽ */ + 0x1e3f, 499, /* ḿ Ḿ */ + 0x1e41, 499, /* ṁ Ṁ */ + 0x1e43, 499, /* ṃ Ṃ */ + 0x1e45, 499, /* ṅ Ṅ */ + 0x1e47, 499, /* ṇ Ṇ */ + 0x1e49, 499, /* ṉ Ṉ */ + 0x1e4b, 499, /* ṋ Ṋ */ + 0x1e4d, 499, /* ṍ Ṍ */ + 0x1e4f, 499, /* ṏ Ṏ */ + 0x1e51, 499, /* ṑ Ṑ */ + 0x1e53, 499, /* ṓ Ṓ */ + 0x1e55, 499, /* ṕ Ṕ */ + 0x1e57, 499, /* ṗ Ṗ */ + 0x1e59, 499, /* ṙ Ṙ */ + 0x1e5b, 499, /* ṛ Ṛ */ + 0x1e5d, 499, /* ṝ Ṝ */ + 0x1e5f, 499, /* ṟ Ṟ */ + 0x1e61, 499, /* ṡ Ṡ */ + 0x1e63, 499, /* ṣ Ṣ */ + 0x1e65, 499, /* ṥ Ṥ */ + 0x1e67, 499, /* ṧ Ṧ */ + 0x1e69, 499, /* ṩ Ṩ */ + 0x1e6b, 499, /* ṫ Ṫ */ + 0x1e6d, 499, /* ṭ Ṭ */ + 0x1e6f, 499, /* ṯ Ṯ */ + 0x1e71, 499, /* ṱ Ṱ */ + 0x1e73, 499, /* ṳ Ṳ */ + 0x1e75, 499, /* ṵ Ṵ */ + 0x1e77, 499, /* ṷ Ṷ */ + 0x1e79, 499, /* ṹ Ṹ */ + 0x1e7b, 499, /* ṻ Ṻ */ + 0x1e7d, 499, /* ṽ Ṽ */ + 0x1e7f, 499, /* ṿ Ṿ */ + 0x1e81, 499, /* ẁ Ẁ */ + 0x1e83, 499, /* ẃ Ẃ */ + 0x1e85, 499, /* ẅ Ẅ */ + 0x1e87, 499, /* ẇ Ẇ */ + 0x1e89, 499, /* ẉ Ẉ */ + 0x1e8b, 499, /* ẋ Ẋ */ + 0x1e8d, 499, /* ẍ Ẍ */ + 0x1e8f, 499, /* ẏ Ẏ */ + 0x1e91, 499, /* ẑ Ẑ */ + 0x1e93, 499, /* ẓ Ẓ */ + 0x1e95, 499, /* ẕ Ẕ */ + 0x1ea1, 499, /* ạ Ạ */ + 0x1ea3, 499, /* ả Ả */ + 0x1ea5, 499, /* ấ Ấ */ + 0x1ea7, 499, /* ầ Ầ */ + 0x1ea9, 499, /* ẩ Ẩ */ + 0x1eab, 499, /* ẫ Ẫ */ + 0x1ead, 499, /* ậ Ậ */ + 0x1eaf, 499, /* ắ Ắ */ + 0x1eb1, 499, /* ằ Ằ */ + 0x1eb3, 499, /* ẳ Ẳ */ + 0x1eb5, 499, /* ẵ Ẵ */ + 0x1eb7, 499, /* ặ Ặ */ + 0x1eb9, 499, /* ẹ Ẹ */ + 0x1ebb, 499, /* ẻ Ẻ */ + 0x1ebd, 499, /* ẽ Ẽ */ + 0x1ebf, 499, /* ế Ế */ + 0x1ec1, 499, /* ề Ề */ + 0x1ec3, 499, /* ể Ể */ + 0x1ec5, 499, /* ễ Ễ */ + 0x1ec7, 499, /* ệ Ệ */ + 0x1ec9, 499, /* ỉ Ỉ */ + 0x1ecb, 499, /* ị Ị */ + 0x1ecd, 499, /* ọ Ọ */ + 0x1ecf, 499, /* ỏ Ỏ */ + 0x1ed1, 499, /* ố Ố */ + 0x1ed3, 499, /* ồ Ồ */ + 0x1ed5, 499, /* ổ Ổ */ + 0x1ed7, 499, /* ỗ Ỗ */ + 0x1ed9, 499, /* ộ Ộ */ + 0x1edb, 499, /* ớ Ớ */ + 0x1edd, 499, /* ờ Ờ */ + 0x1edf, 499, /* ở Ở */ + 0x1ee1, 499, /* ỡ Ỡ */ + 0x1ee3, 499, /* ợ Ợ */ + 0x1ee5, 499, /* ụ Ụ */ + 0x1ee7, 499, /* ủ Ủ */ + 0x1ee9, 499, /* ứ Ứ */ + 0x1eeb, 499, /* ừ Ừ */ + 0x1eed, 499, /* ử Ử */ + 0x1eef, 499, /* ữ Ữ */ + 0x1ef1, 499, /* ự Ự */ + 0x1ef3, 499, /* ỳ Ỳ */ + 0x1ef5, 499, /* ỵ Ỵ */ + 0x1ef7, 499, /* ỷ Ỷ */ + 0x1ef9, 499, /* ỹ Ỹ */ + 0x1f51, 508, /* ὑ Ὑ */ + 0x1f53, 508, /* ὓ Ὓ */ + 0x1f55, 508, /* ὕ Ὕ */ + 0x1f57, 508, /* ὗ Ὗ */ + 0x1fb3, 509, /* ᾳ ᾼ */ + 0x1fc3, 509, /* ῃ ῌ */ + 0x1fe5, 507, /* ῥ Ῥ */ + 0x1ff3, 509 /* ῳ ῼ */ +] + +const rnums = [ + 0x0030, 0x0039, + 0x0660, 0x0669, + 0x06f0, 0x06f9, + 0x07c0, 0x07c9, + 0x0966, 0x096f, + 0x09e6, 0x09ef, + 0x0a66, 0x0a6f, + 0x0ae6, 0x0aef, + 0x0b66, 0x0b6f, + 0x0be6, 0x0bef, + 0x0c66, 0x0c6f, + 0x0ce6, 0x0cef, + 0x0d66, 0x0d6f, + 0x0e50, 0x0e59, + 0x0ed0, 0x0ed9, + 0x0f20, 0x0f29, + 0x1040, 0x1049, + 0x17e0, 0x17e9, + 0x1810, 0x1819, + 0x1946, 0x194f, + 0x19d0, 0x19d9, + 0x1b50, 0x1b59, + 0xff10, 0xff19, + 0x104a0, 0x104a9, + 0x1d7ce, 0x1d7ff +] + +/* + * upper case ranges + * 3rd col is conversion excess 500 + */ +const rtolower2 = [ + 0x0041, 0x005a, 532, /* A-Z a-z */ + 0x00c0, 0x00d6, 532, /* À-Ö à-ö */ + 0x00d8, 0x00de, 532, /* Ø-Þ ø-þ */ + 0x0189, 0x018a, 705, /* Ɖ-Ɗ ɖ-ɗ */ + 0x018e, 0x018f, 702, /* Ǝ-Ə ɘ-ə */ + 0x01b1, 0x01b2, 717, /* Ʊ-Ʋ ʊ-ʋ */ + 0x0388, 0x038a, 537, /* Έ-Ί έ-ί */ + 0x038e, 0x038f, 563, /* Ύ-Ώ ύ-ώ */ + 0x0391, 0x03a1, 532, /* Α-Ρ α-ρ */ + 0x03a3, 0x03ab, 532, /* Σ-Ϋ σ-ϋ */ + 0x0401, 0x040c, 580, /* Ё-Ќ ё-ќ */ + 0x040e, 0x040f, 580, /* Ў-Џ ў-џ */ + 0x0410, 0x042f, 532, /* А-Я а-я */ + 0x0531, 0x0556, 548, /* Ա-Ֆ ա-ֆ */ + 0x10a0, 0x10c5, 548, /* Ⴀ-Ⴥ ა-ჵ */ + 0x1f08, 0x1f0f, 492, /* Ἀ-Ἇ ἀ-ἇ */ + 0x1f18, 0x1f1d, 492, /* Ἐ-Ἕ ἐ-ἕ */ + 0x1f28, 0x1f2f, 492, /* Ἠ-Ἧ ἠ-ἧ */ + 0x1f38, 0x1f3f, 492, /* Ἰ-Ἷ ἰ-ἷ */ + 0x1f48, 0x1f4d, 492, /* Ὀ-Ὅ ὀ-ὅ */ + 0x1f68, 0x1f6f, 492, /* Ὠ-Ὧ ὠ-ὧ */ + 0x1f88, 0x1f8f, 492, /* ᾈ-ᾏ ᾀ-ᾇ */ + 0x1f98, 0x1f9f, 492, /* ᾘ-ᾟ ᾐ-ᾗ */ + 0x1fa8, 0x1faf, 492, /* ᾨ-ᾯ ᾠ-ᾧ */ + 0x1fb8, 0x1fb9, 492, /* Ᾰ-Ᾱ ᾰ-ᾱ */ + 0x1fba, 0x1fbb, 426, /* Ὰ-Ά ὰ-ά */ + 0x1fc8, 0x1fcb, 414, /* Ὲ-Ή ὲ-ή */ + 0x1fd8, 0x1fd9, 492, /* Ῐ-Ῑ ῐ-ῑ */ + 0x1fda, 0x1fdb, 400, /* Ὶ-Ί ὶ-ί */ + 0x1fe8, 0x1fe9, 492, /* Ῠ-Ῡ ῠ-ῡ */ + 0x1fea, 0x1feb, 388, /* Ὺ-Ύ ὺ-ύ */ + 0x1ff8, 0x1ff9, 372, /* Ὸ-Ό ὸ-ό */ + 0x1ffa, 0x1ffb, 374, /* Ὼ-Ώ ὼ-ώ */ + 0x2160, 0x216f, 516, /* Ⅰ-Ⅿ ⅰ-ⅿ */ + 0x24b6, 0x24cf, 526, /* Ⓐ-Ⓩ ⓐ-ⓩ */ + 0xff21, 0xff3a, 532 /* A-Z a-z */ +] + +/* + * upper case singlets + * 2nd col is conversion excess 500 + */ +const rtolower1 = [ + 0x0100, 501, /* Ā ā */ + 0x0102, 501, /* Ă ă */ + 0x0104, 501, /* Ą ą */ + 0x0106, 501, /* Ć ć */ + 0x0108, 501, /* Ĉ ĉ */ + 0x010a, 501, /* Ċ ċ */ + 0x010c, 501, /* Č č */ + 0x010e, 501, /* Ď ď */ + 0x0110, 501, /* Đ đ */ + 0x0112, 501, /* Ē ē */ + 0x0114, 501, /* Ĕ ĕ */ + 0x0116, 501, /* Ė ė */ + 0x0118, 501, /* Ę ę */ + 0x011a, 501, /* Ě ě */ + 0x011c, 501, /* Ĝ ĝ */ + 0x011e, 501, /* Ğ ğ */ + 0x0120, 501, /* Ġ ġ */ + 0x0122, 501, /* Ģ ģ */ + 0x0124, 501, /* Ĥ ĥ */ + 0x0126, 501, /* Ħ ħ */ + 0x0128, 501, /* Ĩ ĩ */ + 0x012a, 501, /* Ī ī */ + 0x012c, 501, /* Ĭ ĭ */ + 0x012e, 501, /* Į į */ + 0x0130, 301, /* İ i */ + 0x0132, 501, /* IJ ij */ + 0x0134, 501, /* Ĵ ĵ */ + 0x0136, 501, /* Ķ ķ */ + 0x0139, 501, /* Ĺ ĺ */ + 0x013b, 501, /* Ļ ļ */ + 0x013d, 501, /* Ľ ľ */ + 0x013f, 501, /* Ŀ ŀ */ + 0x0141, 501, /* Ł ł */ + 0x0143, 501, /* Ń ń */ + 0x0145, 501, /* Ņ ņ */ + 0x0147, 501, /* Ň ň */ + 0x014a, 501, /* Ŋ ŋ */ + 0x014c, 501, /* Ō ō */ + 0x014e, 501, /* Ŏ ŏ */ + 0x0150, 501, /* Ő ő */ + 0x0152, 501, /* Œ œ */ + 0x0154, 501, /* Ŕ ŕ */ + 0x0156, 501, /* Ŗ ŗ */ + 0x0158, 501, /* Ř ř */ + 0x015a, 501, /* Ś ś */ + 0x015c, 501, /* Ŝ ŝ */ + 0x015e, 501, /* Ş ş */ + 0x0160, 501, /* Š š */ + 0x0162, 501, /* Ţ ţ */ + 0x0164, 501, /* Ť ť */ + 0x0166, 501, /* Ŧ ŧ */ + 0x0168, 501, /* Ũ ũ */ + 0x016a, 501, /* Ū ū */ + 0x016c, 501, /* Ŭ ŭ */ + 0x016e, 501, /* Ů ů */ + 0x0170, 501, /* Ű ű */ + 0x0172, 501, /* Ų ų */ + 0x0174, 501, /* Ŵ ŵ */ + 0x0176, 501, /* Ŷ ŷ */ + 0x0178, 379, /* Ÿ ÿ */ + 0x0179, 501, /* Ź ź */ + 0x017b, 501, /* Ż ż */ + 0x017d, 501, /* Ž ž */ + 0x0181, 710, /* Ɓ ɓ */ + 0x0182, 501, /* Ƃ ƃ */ + 0x0184, 501, /* Ƅ ƅ */ + 0x0186, 706, /* Ɔ ɔ */ + 0x0187, 501, /* Ƈ ƈ */ + 0x018b, 501, /* Ƌ ƌ */ + 0x0190, 703, /* Ɛ ɛ */ + 0x0191, 501, /* Ƒ ƒ */ + 0x0193, 705, /* Ɠ ɠ */ + 0x0194, 707, /* Ɣ ɣ */ + 0x0196, 711, /* Ɩ ɩ */ + 0x0197, 709, /* Ɨ ɨ */ + 0x0198, 501, /* Ƙ ƙ */ + 0x019c, 711, /* Ɯ ɯ */ + 0x019d, 713, /* Ɲ ɲ */ + 0x01a0, 501, /* Ơ ơ */ + 0x01a2, 501, /* Ƣ ƣ */ + 0x01a4, 501, /* Ƥ ƥ */ + 0x01a7, 501, /* Ƨ ƨ */ + 0x01a9, 718, /* Ʃ ʃ */ + 0x01ac, 501, /* Ƭ ƭ */ + 0x01ae, 718, /* Ʈ ʈ */ + 0x01af, 501, /* Ư ư */ + 0x01b3, 501, /* Ƴ ƴ */ + 0x01b5, 501, /* Ƶ ƶ */ + 0x01b7, 719, /* Ʒ ʒ */ + 0x01b8, 501, /* Ƹ ƹ */ + 0x01bc, 501, /* Ƽ ƽ */ + 0x01c4, 502, /* DŽ dž */ + 0x01c5, 501, /* Dž dž */ + 0x01c7, 502, /* LJ lj */ + 0x01c8, 501, /* Lj lj */ + 0x01ca, 502, /* NJ nj */ + 0x01cb, 501, /* Nj nj */ + 0x01cd, 501, /* Ǎ ǎ */ + 0x01cf, 501, /* Ǐ ǐ */ + 0x01d1, 501, /* Ǒ ǒ */ + 0x01d3, 501, /* Ǔ ǔ */ + 0x01d5, 501, /* Ǖ ǖ */ + 0x01d7, 501, /* Ǘ ǘ */ + 0x01d9, 501, /* Ǚ ǚ */ + 0x01db, 501, /* Ǜ ǜ */ + 0x01de, 501, /* Ǟ ǟ */ + 0x01e0, 501, /* Ǡ ǡ */ + 0x01e2, 501, /* Ǣ ǣ */ + 0x01e4, 501, /* Ǥ ǥ */ + 0x01e6, 501, /* Ǧ ǧ */ + 0x01e8, 501, /* Ǩ ǩ */ + 0x01ea, 501, /* Ǫ ǫ */ + 0x01ec, 501, /* Ǭ ǭ */ + 0x01ee, 501, /* Ǯ ǯ */ + 0x01f1, 502, /* DZ dz */ + 0x01f2, 501, /* Dz dz */ + 0x01f4, 501, /* Ǵ ǵ */ + 0x01fa, 501, /* Ǻ ǻ */ + 0x01fc, 501, /* Ǽ ǽ */ + 0x01fe, 501, /* Ǿ ǿ */ + 0x0200, 501, /* Ȁ ȁ */ + 0x0202, 501, /* Ȃ ȃ */ + 0x0204, 501, /* Ȅ ȅ */ + 0x0206, 501, /* Ȇ ȇ */ + 0x0208, 501, /* Ȉ ȉ */ + 0x020a, 501, /* Ȋ ȋ */ + 0x020c, 501, /* Ȍ ȍ */ + 0x020e, 501, /* Ȏ ȏ */ + 0x0210, 501, /* Ȑ ȑ */ + 0x0212, 501, /* Ȓ ȓ */ + 0x0214, 501, /* Ȕ ȕ */ + 0x0216, 501, /* Ȗ ȗ */ + 0x0386, 538, /* Ά ά */ + 0x038c, 564, /* Ό ό */ + 0x03e2, 501, /* Ϣ ϣ */ + 0x03e4, 501, /* Ϥ ϥ */ + 0x03e6, 501, /* Ϧ ϧ */ + 0x03e8, 501, /* Ϩ ϩ */ + 0x03ea, 501, /* Ϫ ϫ */ + 0x03ec, 501, /* Ϭ ϭ */ + 0x03ee, 501, /* Ϯ ϯ */ + 0x0460, 501, /* Ѡ ѡ */ + 0x0462, 501, /* Ѣ ѣ */ + 0x0464, 501, /* Ѥ ѥ */ + 0x0466, 501, /* Ѧ ѧ */ + 0x0468, 501, /* Ѩ ѩ */ + 0x046a, 501, /* Ѫ ѫ */ + 0x046c, 501, /* Ѭ ѭ */ + 0x046e, 501, /* Ѯ ѯ */ + 0x0470, 501, /* Ѱ ѱ */ + 0x0472, 501, /* Ѳ ѳ */ + 0x0474, 501, /* Ѵ ѵ */ + 0x0476, 501, /* Ѷ ѷ */ + 0x0478, 501, /* Ѹ ѹ */ + 0x047a, 501, /* Ѻ ѻ */ + 0x047c, 501, /* Ѽ ѽ */ + 0x047e, 501, /* Ѿ ѿ */ + 0x0480, 501, /* Ҁ ҁ */ + 0x0490, 501, /* Ґ ґ */ + 0x0492, 501, /* Ғ ғ */ + 0x0494, 501, /* Ҕ ҕ */ + 0x0496, 501, /* Җ җ */ + 0x0498, 501, /* Ҙ ҙ */ + 0x049a, 501, /* Қ қ */ + 0x049c, 501, /* Ҝ ҝ */ + 0x049e, 501, /* Ҟ ҟ */ + 0x04a0, 501, /* Ҡ ҡ */ + 0x04a2, 501, /* Ң ң */ + 0x04a4, 501, /* Ҥ ҥ */ + 0x04a6, 501, /* Ҧ ҧ */ + 0x04a8, 501, /* Ҩ ҩ */ + 0x04aa, 501, /* Ҫ ҫ */ + 0x04ac, 501, /* Ҭ ҭ */ + 0x04ae, 501, /* Ү ү */ + 0x04b0, 501, /* Ұ ұ */ + 0x04b2, 501, /* Ҳ ҳ */ + 0x04b4, 501, /* Ҵ ҵ */ + 0x04b6, 501, /* Ҷ ҷ */ + 0x04b8, 501, /* Ҹ ҹ */ + 0x04ba, 501, /* Һ һ */ + 0x04bc, 501, /* Ҽ ҽ */ + 0x04be, 501, /* Ҿ ҿ */ + 0x04c1, 501, /* Ӂ ӂ */ + 0x04c3, 501, /* Ӄ ӄ */ + 0x04c7, 501, /* Ӈ ӈ */ + 0x04cb, 501, /* Ӌ ӌ */ + 0x04d0, 501, /* Ӑ ӑ */ + 0x04d2, 501, /* Ӓ ӓ */ + 0x04d4, 501, /* Ӕ ӕ */ + 0x04d6, 501, /* Ӗ ӗ */ + 0x04d8, 501, /* Ә ә */ + 0x04da, 501, /* Ӛ ӛ */ + 0x04dc, 501, /* Ӝ ӝ */ + 0x04de, 501, /* Ӟ ӟ */ + 0x04e0, 501, /* Ӡ ӡ */ + 0x04e2, 501, /* Ӣ ӣ */ + 0x04e4, 501, /* Ӥ ӥ */ + 0x04e6, 501, /* Ӧ ӧ */ + 0x04e8, 501, /* Ө ө */ + 0x04ea, 501, /* Ӫ ӫ */ + 0x04ee, 501, /* Ӯ ӯ */ + 0x04f0, 501, /* Ӱ ӱ */ + 0x04f2, 501, /* Ӳ ӳ */ + 0x04f4, 501, /* Ӵ ӵ */ + 0x04f8, 501, /* Ӹ ӹ */ + 0x1e00, 501, /* Ḁ ḁ */ + 0x1e02, 501, /* Ḃ ḃ */ + 0x1e04, 501, /* Ḅ ḅ */ + 0x1e06, 501, /* Ḇ ḇ */ + 0x1e08, 501, /* Ḉ ḉ */ + 0x1e0a, 501, /* Ḋ ḋ */ + 0x1e0c, 501, /* Ḍ ḍ */ + 0x1e0e, 501, /* Ḏ ḏ */ + 0x1e10, 501, /* Ḑ ḑ */ + 0x1e12, 501, /* Ḓ ḓ */ + 0x1e14, 501, /* Ḕ ḕ */ + 0x1e16, 501, /* Ḗ ḗ */ + 0x1e18, 501, /* Ḙ ḙ */ + 0x1e1a, 501, /* Ḛ ḛ */ + 0x1e1c, 501, /* Ḝ ḝ */ + 0x1e1e, 501, /* Ḟ ḟ */ + 0x1e20, 501, /* Ḡ ḡ */ + 0x1e22, 501, /* Ḣ ḣ */ + 0x1e24, 501, /* Ḥ ḥ */ + 0x1e26, 501, /* Ḧ ḧ */ + 0x1e28, 501, /* Ḩ ḩ */ + 0x1e2a, 501, /* Ḫ ḫ */ + 0x1e2c, 501, /* Ḭ ḭ */ + 0x1e2e, 501, /* Ḯ ḯ */ + 0x1e30, 501, /* Ḱ ḱ */ + 0x1e32, 501, /* Ḳ ḳ */ + 0x1e34, 501, /* Ḵ ḵ */ + 0x1e36, 501, /* Ḷ ḷ */ + 0x1e38, 501, /* Ḹ ḹ */ + 0x1e3a, 501, /* Ḻ ḻ */ + 0x1e3c, 501, /* Ḽ ḽ */ + 0x1e3e, 501, /* Ḿ ḿ */ + 0x1e40, 501, /* Ṁ ṁ */ + 0x1e42, 501, /* Ṃ ṃ */ + 0x1e44, 501, /* Ṅ ṅ */ + 0x1e46, 501, /* Ṇ ṇ */ + 0x1e48, 501, /* Ṉ ṉ */ + 0x1e4a, 501, /* Ṋ ṋ */ + 0x1e4c, 501, /* Ṍ ṍ */ + 0x1e4e, 501, /* Ṏ ṏ */ + 0x1e50, 501, /* Ṑ ṑ */ + 0x1e52, 501, /* Ṓ ṓ */ + 0x1e54, 501, /* Ṕ ṕ */ + 0x1e56, 501, /* Ṗ ṗ */ + 0x1e58, 501, /* Ṙ ṙ */ + 0x1e5a, 501, /* Ṛ ṛ */ + 0x1e5c, 501, /* Ṝ ṝ */ + 0x1e5e, 501, /* Ṟ ṟ */ + 0x1e60, 501, /* Ṡ ṡ */ + 0x1e62, 501, /* Ṣ ṣ */ + 0x1e64, 501, /* Ṥ ṥ */ + 0x1e66, 501, /* Ṧ ṧ */ + 0x1e68, 501, /* Ṩ ṩ */ + 0x1e6a, 501, /* Ṫ ṫ */ + 0x1e6c, 501, /* Ṭ ṭ */ + 0x1e6e, 501, /* Ṯ ṯ */ + 0x1e70, 501, /* Ṱ ṱ */ + 0x1e72, 501, /* Ṳ ṳ */ + 0x1e74, 501, /* Ṵ ṵ */ + 0x1e76, 501, /* Ṷ ṷ */ + 0x1e78, 501, /* Ṹ ṹ */ + 0x1e7a, 501, /* Ṻ ṻ */ + 0x1e7c, 501, /* Ṽ ṽ */ + 0x1e7e, 501, /* Ṿ ṿ */ + 0x1e80, 501, /* Ẁ ẁ */ + 0x1e82, 501, /* Ẃ ẃ */ + 0x1e84, 501, /* Ẅ ẅ */ + 0x1e86, 501, /* Ẇ ẇ */ + 0x1e88, 501, /* Ẉ ẉ */ + 0x1e8a, 501, /* Ẋ ẋ */ + 0x1e8c, 501, /* Ẍ ẍ */ + 0x1e8e, 501, /* Ẏ ẏ */ + 0x1e90, 501, /* Ẑ ẑ */ + 0x1e92, 501, /* Ẓ ẓ */ + 0x1e94, 501, /* Ẕ ẕ */ + 0x1ea0, 501, /* Ạ ạ */ + 0x1ea2, 501, /* Ả ả */ + 0x1ea4, 501, /* Ấ ấ */ + 0x1ea6, 501, /* Ầ ầ */ + 0x1ea8, 501, /* Ẩ ẩ */ + 0x1eaa, 501, /* Ẫ ẫ */ + 0x1eac, 501, /* Ậ ậ */ + 0x1eae, 501, /* Ắ ắ */ + 0x1eb0, 501, /* Ằ ằ */ + 0x1eb2, 501, /* Ẳ ẳ */ + 0x1eb4, 501, /* Ẵ ẵ */ + 0x1eb6, 501, /* Ặ ặ */ + 0x1eb8, 501, /* Ẹ ẹ */ + 0x1eba, 501, /* Ẻ ẻ */ + 0x1ebc, 501, /* Ẽ ẽ */ + 0x1ebe, 501, /* Ế ế */ + 0x1ec0, 501, /* Ề ề */ + 0x1ec2, 501, /* Ể ể */ + 0x1ec4, 501, /* Ễ ễ */ + 0x1ec6, 501, /* Ệ ệ */ + 0x1ec8, 501, /* Ỉ ỉ */ + 0x1eca, 501, /* Ị ị */ + 0x1ecc, 501, /* Ọ ọ */ + 0x1ece, 501, /* Ỏ ỏ */ + 0x1ed0, 501, /* Ố ố */ + 0x1ed2, 501, /* Ồ ồ */ + 0x1ed4, 501, /* Ổ ổ */ + 0x1ed6, 501, /* Ỗ ỗ */ + 0x1ed8, 501, /* Ộ ộ */ + 0x1eda, 501, /* Ớ ớ */ + 0x1edc, 501, /* Ờ ờ */ + 0x1ede, 501, /* Ở ở */ + 0x1ee0, 501, /* Ỡ ỡ */ + 0x1ee2, 501, /* Ợ ợ */ + 0x1ee4, 501, /* Ụ ụ */ + 0x1ee6, 501, /* Ủ ủ */ + 0x1ee8, 501, /* Ứ ứ */ + 0x1eea, 501, /* Ừ ừ */ + 0x1eec, 501, /* Ử ử */ + 0x1eee, 501, /* Ữ ữ */ + 0x1ef0, 501, /* Ự ự */ + 0x1ef2, 501, /* Ỳ ỳ */ + 0x1ef4, 501, /* Ỵ ỵ */ + 0x1ef6, 501, /* Ỷ ỷ */ + 0x1ef8, 501, /* Ỹ ỹ */ + 0x1f59, 492, /* Ὑ ὑ */ + 0x1f5b, 492, /* Ὓ ὓ */ + 0x1f5d, 492, /* Ὕ ὕ */ + 0x1f5f, 492, /* Ὗ ὗ */ + 0x1fbc, 491, /* ᾼ ᾳ */ + 0x1fcc, 491, /* ῌ ῃ */ + 0x1fec, 493, /* Ῥ ῥ */ + 0x1ffc, 491 /* ῼ ῳ */ +] + +/* + * title characters are those between + * upper and lower case. ie DZ Dz dz + */ +const rtotitle1 = [ + 0x01c4, 501, /* DŽ Dž */ + 0x01c6, 499, /* dž Dž */ + 0x01c7, 501, /* LJ Lj */ + 0x01c9, 499, /* lj Lj */ + 0x01ca, 501, /* NJ Nj */ + 0x01cc, 499, /* nj Nj */ + 0x01f1, 501, /* DZ Dz */ + 0x01f3, 499 /* dz Dz */ +] + +const findc = {c, t, sz, nelt, ret + var l + var m + + /* we're processing in chunks of size + nelt, so 1 chunk is of length 'nelt' */ + while t.len > nelt + sz /= 2 + m = sz*nelt + l = t[m:] + if c >= l[0] + t = l[0:m] + else + t = t[0:m] + ;; + ;; + + if t.len != 0 && c >= t[0] + ret# = t + -> true + else + -> false + ;; +} + + +const isalpha = {c + var l + + if isupper(c) || islower(c) + -> true + elif findc(c, ralpha2[:], ralpha2.len/2, 2, &l) + if (c >= l[0] && c <= l[1]) + -> true + ;; + elif findc(c, ralpha1[:], ralpha1.len, 1, &l) + if (c == l[0]) + -> true + ;; + ;; + -> false +} + +const isnum = {c + var l + + if findc(c, rnums[:], rnums.len/2, 2, &l) + if(c >= l[0] && c <= l[1]) + -> true + ;; + ;; + -> false +} + +const isalnum = {c + -> isalpha(c) || isnum(c) +} + +const isblank = {c + var l + var sl + var len + + l = rspace2[:] + sl = rspace2[:] + len = rspace2.len/2 + if findc(c, sl, len, 2, &l) + if(c >= l[0] && c <= l[1]) + -> true + ;; + ;; + -> false +} + +const isspace = {c + -> c == '\n' || isblank(c) +} + +const islower = {c + var l + + if findc(c, rtoupper2[:], rtoupper2.len, 2, &l) + if (c >= l[0] && c <= l[1]) + -> true + ;; + elif findc(c, rtoupper1[:], rtoupper1.len, 1, &l) + if (c == l[0]) + -> true + ;; + ;; + -> false +} + +const isupper = {c + var l + + if findc(c, rtolower2[:], rtolower2.len, 2, &l) + if (c >= l[0] && c <= l[1]) + -> true + ;; + elif findc(c, rtolower1[:], rtolower1.len, 1, &l) + if (c == l[0]) + -> true + ;; + ;; + -> false +} + +const istitle = {c + -> isupper(c) && islower(c) +} + +const tolower = {c + var l + + if findc(c, rtolower2[:], rtolower2.len/3, 3, &l) + if c >= l[0] && c <= l[1] + -> c + l[2] - 500; + ;; + elif findc(c, rtolower1[:], rtolower1.len/2, 2, &l) + if c == l[0] + -> c + l[1] - 500; + ;; + ;; + -> c +} + +const toupper = {c + var l + + if findc(c, rtoupper2[:], rtoupper2.len/3, 3, &l); + if c >= l[0] && c <= l[1] + -> c + l[2] - 500; + ;; + elif findc(c, rtoupper1[:], rtoupper1.len/2, 2, &l); + if c == l[0] + -> c + l[1] - 500; + ;; + ;; + -> c +} + +const totitle = {c + var l + + if findc(c, rtotitle1[:], rtotitle1.len/2, 2, &l); + if c == l[0] + -> c + l[1] - 500; + ;; + ;; + -> c +} + +generic charval = {c, base -> @a::(numeric,integral) + var v = -1 + + if c >= '0' && c <= '9' + v = (c - '0') castto(@a::(integral,numeric)) + elif c >= 'a' && c <= 'z' + v = (c - 'a' + 10) castto(@a::(integral,numeric)) + elif c >= 'A' && c <= 'Z' + v = (c - 'A' + 10) castto(@a::(integral,numeric)) + ;; + + if v < 0 || v > (base castto(@a::(integral,numeric))) + -> -1 + ;; + -> v +} diff --git a/libstd/cmp.myr b/libstd/cmp.myr new file mode 100644 index 0000000..edc5fe3 --- /dev/null +++ b/libstd/cmp.myr @@ -0,0 +1,54 @@ +use "extremum.use" +use "types.use" + +pkg std = + type order = union + `Before + `Equal + `After + ;; + + generic numcmp : (a : @a, b : @a -> order) + const strcmp : (a : byte[:], b : byte[:] -> order) + const strncmp : (a : byte[:], b : byte[:], n : size -> order) +;; + +generic numcmp = {a, b + if a < b + -> `Before + elif a == b + -> `Equal + else + -> `After + ;; +} + +const strcmp = {a, b + var l + var i + + l = min(a.len, b.len) + for i = 0; i < l; i++ + if a[i] < b[i] + -> `Before + elif a[i] > b[i] + -> `After + ;; + ;; + + if a.len < b.len + -> `Before + elif a.len > b.len + -> `After + else + -> `Equal + ;; + +} + +const strncmp = {a, b, n + a = a[:min(a.len, n)] + b = b[:min(b.len, n)] + -> strcmp(a, b) +} + diff --git a/libstd/dial.myr b/libstd/dial.myr new file mode 100644 index 0000000..095f2de --- /dev/null +++ b/libstd/dial.myr @@ -0,0 +1,131 @@ +use "alloc.use" +use "chartype.use" +use "die.use" +use "result.use" +use "sys.use" +use "sleq.use" +use "option.use" +use "ipparse.use" +use "resolve.use" +use "fmt.use" +use "endian.use" +use "intparse.use" +use "hasprefix.use" +use "utf.use" + +pkg std = + const dial : (dialstr : byte[:] -> result(fd, byte[:])) +;; + +/* + a map from service name to a list of (port,proto) + pairs in order of preference +*/ +/* FIXME: implement +var services : htab(byte[:], [int, byte[:]][:])# +var inited = false +*/ + +/* takes a plan 9 style dial string */ +const dial = {str + var proto, host, port + var socktype, portnum + var sa : sockaddr_in /* we only support inet sockets right now.. ugh. */ + var sock + + (proto, str) = nameseg(str) + (host, str) = nameseg(str) + (port, str) = nameseg(str) + + if proto.len == 0 + -> `Fail "missing proto" + elif host.len == 0 + -> `Fail "missing host" + elif port.len == 0 + -> `Fail "missing port" + ;; + + if sleq(proto, "net") + -> `Fail "net wildcard proto not yet supported\n" + elif sleq(proto, "unix") + -> `Fail "net unix proto not yet supported\n" + elif sleq(proto, "tcp") + socktype = Sockstream + elif sleq(proto, "udp") + socktype = Sockdgram + ;; + + match parseport(port) + | `Some n: portnum = n + | `None: -> `Fail "bad port" + ;; + + match getaddr(host) + | `Ipv4 bits: + sa.fam = Afinet + sa.addr = bits + sa.port = hosttonet(portnum) + | `Ipv6 bits: + -> `Fail "ipv6 not yet supported" + ;; + + sock = socket(sa.fam, socktype, 0) + if sock < 0 + -> `Fail "failed to connect to socket" + ;; + var err + err = connect(sock, (&sa) castto(sockaddr#), sizeof(sockaddr_in)) + if err < 0 + put("Errno %i\n", -err) + close(sock) + -> `Fail "Failed to bind socket" + ;; + + -> `Ok sock +} + +const parseport = {port + match intparse(port) + | `Some n: -> `Some n + | `None: + /* a small number of hardcoded ports */ + if sleq(port, "http") + -> `Some 80 + elif sleq(port, "https") + -> `Some 443 + elif sleq(port, "ircd") + -> `Some 6667 + elif sleq(port, "dns") + -> `Some 53 + ;; + ;; + -> `None +} + +const getaddr = {addr + var ip + + match ipparse(addr) + | `Some a: ip = a + | `None: + match resolve(addr) + | `Ok hi: + ip = hi[0].addr + slfree(hi) + | `Fail m: + ;; + ;; + -> ip +} + +const nameseg = {str + var len + + for len = 0; len < str.len; len++ + if str[len] == '!' castto(byte) + -> (str[:len], str[len+1:]) + ;; + ;; + -> (str[:], str[len:]) +} + diff --git a/libstd/die.myr b/libstd/die.myr new file mode 100644 index 0000000..ae7fe66 --- /dev/null +++ b/libstd/die.myr @@ -0,0 +1,19 @@ +use "sys.use" +use "types.use" + +pkg std = + const die : (msg : byte[:] -> void) + const assert : (cond : bool, msg : byte[:] -> void) +;; + +const die = {msg + write(2, msg) + kill(getpid(), 6) +} + +const assert = {cond, msg + if !cond + die(msg) + ;; +} + diff --git a/libstd/endian.myr b/libstd/endian.myr new file mode 100644 index 0000000..8bac74f --- /dev/null +++ b/libstd/endian.myr @@ -0,0 +1,32 @@ +pkg std = + generic hosttonet : (v : @a -> @a) + generic nettohost : (v : @a -> @a) +;; + +/* FIXME: we only support little endian platforms right now, + so we assume a little endian machine. FIX THIS. */ +generic hosttonet = {v : @a::(integral,numeric) + var i + var ret + + ret = 0 + for i = 0; i < sizeof(@a); i++ + ret <<= 8 + ret |= v & 0xff + v >>= 8 + ;; + -> ret +} + +generic nettohost = {v : @a::(integral,numeric) + var i + var ret + + ret = 0 + for i = 0; i < sizeof(@a); i++ + ret <<= 8 + ret |= v & 0xff + v >>= 8 + ;; + -> ret +} diff --git a/libstd/env.myr b/libstd/env.myr new file mode 100644 index 0000000..85bced9 --- /dev/null +++ b/libstd/env.myr @@ -0,0 +1,21 @@ +use "extremum.use" +use "option.use" +use "sleq.use" + +pkg std = + extern var _environment : byte[:][:] + + const getenv : (name : byte[:] -> std.option(byte[:])) +;; + +const getenv = {name + var n + for env in _environment + n = min(name.len, env.len) + if sleq(name, env[:n]) && sleq(env[n:n+1], "=") + -> `Some env[n+1:] + ;; + ;; + -> `None +} + diff --git a/libstd/execvp.myr b/libstd/execvp.myr new file mode 100644 index 0000000..f12308a --- /dev/null +++ b/libstd/execvp.myr @@ -0,0 +1,57 @@ +use "alloc.use" +use "env.use" +use "fmt.use" +use "option.use" +use "strfind.use" +use "strsplit.use" +use "sys.use" + +pkg std = + const execvp : (cmd : byte[:], args : byte[:][:] -> int64) + const execvpe : (cmd : byte[:], args : byte[:][:], env : byte[:][:] -> int64) +;; + +const execvp = {cmd, args + var paths, cmdlen + var buf : byte[512] + + match strfind(cmd, "/") + | `Some _: + -> execv(cmd, args) + | `None: + paths = getpaths() + for p in paths + cmdlen = bfmt(buf[:], "%s/%s", p, cmd) + execv(buf[:cmdlen], args) + ;; + slfree(paths) + ;; + -> -1 +} + +const execvpe = {cmd, args, env + var paths, cmdlen + var buf : byte[512] + + match strfind(cmd, "/") + | `Some _: + -> execve(cmd, args, env) + | `None: + paths = getpaths() + for p in paths + cmdlen = bfmt(buf[:], "%s/%s", p, cmd) + execve(buf[:cmdlen], args, env) + ;; + slfree(paths) + ;; + -> -1 +} + +const getpaths = { + var path + match getenv("PATH") + | `Some p: path = p + | `None: path = "/usr/local/bin:/bin:/usr/bin" + ;; + -> strsplit(path, ":") +} diff --git a/libstd/extremum.myr b/libstd/extremum.myr new file mode 100644 index 0000000..c4998da --- /dev/null +++ b/libstd/extremum.myr @@ -0,0 +1,20 @@ +pkg std = + generic min : (a : @a::numeric, b : @a::numeric -> @a::numeric) + generic max : (a : @a::numeric, b : @a::numeric -> @a::numeric) +;; + +generic min = {a, b + if a < b + -> a + else + -> b + ;; +} + +generic max = {a, b + if a > b + -> a + else + -> b + ;; +} diff --git a/libstd/floatbits.myr b/libstd/floatbits.myr new file mode 100644 index 0000000..50930a8 --- /dev/null +++ b/libstd/floatbits.myr @@ -0,0 +1,60 @@ +pkg std = + const float64bits : (flt : float64 -> uint64) + const float32bits : (flt : float32 -> uint32) + const float64frombits : (bits : uint64 -> float64) + const float32frombits : (bits : uint32 -> float32) + const float64explode : (flt : float64 -> [bool, uint64, int32]) + const float32explode : (flt : float64 -> [bool, uint64, int32]) +;; + +const float64bits = {flt; -> (&flt castto(uint64#))#} +const float32bits = {flt; -> (&flt castto(uint32#))#} +const float64frombits = {bits; -> (&bits castto(float64#))#} +const float32frombits = {bits; -> (&bits castto(float32#))#} + +const float64explode = {flt + var bits, isneg, mant, exp + + bits = float64bits(flt) + isneg = (bits >> 63) == 0 /* msb is sign bit */ + exp = (bits >> 52) & 0x7ff /* exp is in bits [52..63] */ + mant = bits & ((1ul << 52) - 1) /* msb is in bits [..51] */ + + /* add back the implicit bit if this is not a denormal */ + if exp != 0 + mant |= 1ul << 52 + else + exp = 1 + ;; + /* + adjust for exponent bias. nb: because we are + treating the mantissa as m.0 instead of 0.m, + our exponent bias needs to be offset by the + size of m + */ + -> (isneg, mant, (exp castto(int32)) - 1075) +} + +const float32explode = {flt + var bits, isneg, mant, exp + + bits = float64bits(flt) castto(uint64) + isneg = (bits >> 31) == 0 /* msb is sign bit */ + exp = (bits >> 22) & 0xff /* exp is in bits [23..30] */ + mant = bits & ((1ul << 52) - 1) /* msb is in bits [0..22] */ + + /* add back the implicit bit if this is not a denormal */ + if exp != 0 + mant |= 1ul << 22 + else + exp = 1 + ;; + /* + adjust for exponent bias. nb: because we are + treating the mantissa as m.0 instead of 0.m, + our exponent bias needs to be offset by the + size of m + */ + -> (isneg, mant, (exp castto(int32)) - 149) +} + diff --git a/libstd/fmt.myr b/libstd/fmt.myr new file mode 100644 index 0000000..bc389c3 --- /dev/null +++ b/libstd/fmt.myr @@ -0,0 +1,282 @@ +use "alloc.use" +use "die.use" +use "sys.use" +use "types.use" +use "utf.use" +use "varargs.use" +use "extremum.use" + +/* + printf-like functions. These use a different syntax from the C printf, + as described below: + + %s - A string, ie, a utf8 encoded byte slice. + %t - A boolean + %b - A byte. + %w - A 16 bit integer + %i - A 32 bit integer + %l - A 64 bit integer + %z - A size + %p - A pointer + %c - A char +*/ + +pkg std = + const put : (fmt : byte[:], args : ... -> size) + const putv : (fmt : byte[:], ap : valist -> size) + const fatal : (status : int, fmt : byte[:], args : ... -> void) + const fatalv : (status : int, fmt : byte[:], ap : valist -> void) + const fmt : (fmt : byte[:], args : ... -> byte[:]) + const fmtv : (fmt : byte[:], ap : valist -> byte[:]) + const bfmt : (buf : byte[:], fmt : byte[:], args : ... -> size) + const bfmtv : (buf : byte[:], fmt : byte[:], ap : valist -> size) +;; + +/* Writes a string of text up to 2 kb in size to stdout */ +const put = {fmt, args + -> putv(fmt, vastart(&args)) +} + +/* Writes a string of text up to 2kb long to stdout, using a valist + as the source of the arguments */ +const putv = {fmt, ap + var buf : byte[2048] + var n + + n = bfmtv(buf[:], fmt, ap) + write(1, buf[:n]) + -> n +} + +/* same as 'put', but exits the program after printing */ +const fatal = {status, fmt, args + putv(fmt, vastart(&args)) + exit(status) +} + +/* same as 'putv', but exits the program after printing */ +const fatalv = {status, fmt, ap + putv(fmt, ap) + exit(status) +} + +/* formats a string, allocating the slice. FIXME: calculate the + size needed. */ +const fmt = {fmt, args + -> fmtv(fmt, vastart(&args)) +} + +/* formats a string, allocating the slice. FIXME: calculate the + size needed. Takes a valist as it's last argument. */ +const fmtv = {fmt, ap + var buf + var sz + + buf = slalloc(2048) + sz = bfmtv(buf, fmt, ap) + -> buf[:sz] +} + +/* formats a string of text as specified by 'fmt' into 'buf' */ +const bfmt = {buf, fmt, args + -> bfmtv(buf, fmt, vastart(&args)) +} + +const digitchars = [ + '0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f' +] +generic intfmt = {buf : byte[:], bits : @a::(integral,numeric), base, signed + var isneg + var val + var b : char[32] + var i + var j + var n + + n = 0 + i = 0 + if signed && bits < 0 + val = -bits castto(uint64) + isneg = true + else + val = bits castto(uint64) + val &= ~0 >> (8*(sizeof(uint64)-sizeof(@a))) + isneg = false + ;; + + if val == 0 + b[0] = '0' + i++ + ;; + while val != 0 + b[i] = digitchars[val % base] + val /= base + i++ + ;; + n = 0 + if isneg + n += encode(buf[n:], '-') + ;; + for j = i; j != 0; j-- + n += encode(buf[n:], b[j - 1]) + ;; + -> n +} + +/* formats a string of text as specified by 'fmt' into 'buf', + using a valist for the arguments */ +const bfmtv = {buf, fmt, ap + var c + var n + var base + var signed + var s_val : byte[:] + var t_val : bool + var b_val : int8, ub_val : uint8 + var w_val : int16, uw_val : uint16 + var i_val : int32, ui_val : uint32 + var l_val : int64, ul_val : uint64 + var z_val : size + var p_val : byte# + var c_val : char + var f_val : float64, F_val : float32 + + n = 0 + while fmt.len != 0 + (c, fmt) = striter(fmt) + if c == '%' + base = 10 + signed = true + (c, fmt) = striter(fmt) + /* modifiers */ + if fmt.len > 0 + match c + | 'x': + (c, fmt) = striter(fmt) + base = 16 + signed = false + + | 'u': + (c, fmt) = striter(fmt) + signed = false + ;; + ;; + /* format specifiers */ + match c + | 's': + (s_val, ap) = vanext(ap) + n += strfmt(buf[n:], s_val) + | 't': + (t_val, ap) = vanext(ap) + n += boolfmt(buf[n:], t_val) + | 'f': + (f_val, ap) = vanext(ap) + n += floatfmt(buf[n:], f_val, 0, 0) + /* FIXME: float casts are currently broken + | 'F': + (F_val, ap) = vanext(ap) + n += floatfmt(buf[n:], F_val castto(float64)) + */ + /* format integers */ + | 'b': + if signed + (b_val, ap) = vanext(ap) + n += intfmt(buf[n:], b_val, base, signed) + else + (ub_val, ap) = vanext(ap) + n += intfmt(buf[n:], ub_val, base, signed) + ;; + | 'w': + if signed + (w_val, ap) = vanext(ap) + n += intfmt(buf[n:], w_val, base, signed) + else + (uw_val, ap) = vanext(ap) + n += intfmt(buf[n:], uw_val, base, signed) + ;; + | 'i': + if signed + (i_val, ap) = vanext(ap) + n += intfmt(buf[n:], i_val, base, signed) + else + (ui_val, ap) = vanext(ap) + n += intfmt(buf[n:], ui_val, base, signed) + ;; + | 'l': + if signed + (l_val, ap) = vanext(ap) + n += intfmt(buf[n:], l_val, base, signed) + else + (ul_val, ap) = vanext(ap) + n += intfmt(buf[n:], ul_val, base, signed) + ;; + + | 'z': + (z_val, ap) = vanext(ap) + n += intfmt(buf[n:], z_val castto(int64), base, signed) + | 'p': + (p_val, ap) = vanext(ap) + n += intfmt(buf[n:], p_val castto(int64), 16, false) + | 'c': (c_val, ap) = vanext(ap) + n += encode(buf[n:], c_val) + | _: + die("Unknown format specifier\n") + ;; + else + n += encode(buf[n:], c) + ;; + ;; + -> n +} + +const strfmt = {buf, str + var i + + for i = 0; i < min(str.len, buf.len); i++ + buf[i] = str[i] + ;; + -> i +} + +const boolfmt = {buf, val + var s + + if val + s = "true" + else + s = "false" + ;; + -> strfmt(buf, s) +} + +/* + buf: the output buffer. + val: the value to format, in float64 format. + mode: the truncation mode. + 0 => print until precision exhausted. + 1 => print until precision exhausted or maxdigits produced. + 2 => print until maxdigits produced, paddding with zeros. + */ + +const floatfmt = {buf, val, mode, maxdigits + var i + var n + /* + var isneg + */ + /*var b, e, f*/ + + /* handle 0 specially to avoid special cases */ + if val == 0.0 + n = strfmt(buf, "0.0") + if mode == 0 && maxdigits > 2 + for i = 1; i < maxdigits; i++ + n += strfmt(buf[n:], "0") + ;; + ;; + -> n + ;; + + -> strfmt(buf, "floats not implemented") +} + diff --git a/libstd/hashfuncs.myr b/libstd/hashfuncs.myr new file mode 100644 index 0000000..49aa0e5 --- /dev/null +++ b/libstd/hashfuncs.myr @@ -0,0 +1,89 @@ +use "sleq.use" +use "types.use" + +pkg std = + const strhash : (s : byte[:] -> uint32) + const streq : (a : byte[:], b : byte[:] -> bool) + + generic ptrhash : (p : @a# -> uint32) + generic ptreq : (a : @a#, b : @a# -> bool) + + generic inthash : (v : @a::(integral,numeric) -> uint32) + generic inteq : (a : @a::(integral,numeric), b : @a::(integral,numeric) -> bool) + + const murmurhash2 : (data : byte[:], seed : uint32 -> uint32) +;; + +const Seed = 1234 + +/* Supremely simple djb hash. */ +const strhash = {s + -> murmurhash2(s, Seed) +} + +const streq = {a, b + -> sleq(a, b) +} + +generic ptrhash = {p : @a# + var x + + x = &p castto(byte#) + -> murmurhash2(x[0:sizeof(@a)], Seed) +} + +generic ptreq = {a, b + -> a == b +} + +generic inthash = {v : @a::(integral,numeric) + var p + + p = &v castto(byte#) + -> murmurhash2(p[0:sizeof(@a)], Seed) +} + +generic inteq = {a, b + -> a == b +} + +const murmurhash2 = {data, seed + const m = 0x5bd1e995; + const r = 24 + var h, k + + h = seed ^ data.len + while data.len >= 4 + k = (data[0] castto(uint32)) + k |= (data[1] castto(uint32)) << 8 + k |= (data[2] castto(uint32)) << 16 + k |= (data[3] castto(uint32)) << 24 + + k *= m + k ^= k >> r + k *= m + + h *= m + h ^= k + data = data[4:] + ;; + + match data.len + | 3: + h ^= (data[2] castto(uint32)) << 16 + h ^= (data[1] castto(uint32)) <<8 + h ^= (data[0] castto(uint32)) + | 2: + h ^= (data[1] castto(uint32)) <<8 + h ^= (data[0] castto(uint32)) + | 1: + h ^= (data[0] castto(uint32)) + ;; + h *= m + + h ^= h >> 13 + h *= m + h ^= h >> 15 + + -> h +} diff --git a/libstd/hasprefix.myr b/libstd/hasprefix.myr new file mode 100644 index 0000000..c9749f6 --- /dev/null +++ b/libstd/hasprefix.myr @@ -0,0 +1,12 @@ +use "cmp.use" +pkg std = + const hasprefix : (s : byte[:], pre : byte[:] -> bool) +;; + +const hasprefix = {s, pre + match strncmp(s, pre, pre.len) + | `Equal: -> true + | _: -> false + ;; +} + diff --git a/libstd/hassuffix.myr b/libstd/hassuffix.myr new file mode 100644 index 0000000..e8db517 --- /dev/null +++ b/libstd/hassuffix.myr @@ -0,0 +1,17 @@ +use "cmp.use" +pkg std = + const hassuffix : (s : byte[:], suff : byte[:] -> bool) +;; + +const hassuffix = {s, suff + var tail + + if suff.len >= s.len + tail = s[s.len - suff.len:] + match strncmp(tail, suff, suff.len) + | `Equal: -> true + | _: -> false + ;; + ;; +} + diff --git a/libstd/htab.myr b/libstd/htab.myr new file mode 100644 index 0000000..f4217f7 --- /dev/null +++ b/libstd/htab.myr @@ -0,0 +1,199 @@ +use "alloc.use" +use "die.use" +use "extremum.use" +use "fmt.use" +use "option.use" +use "types.use" + +pkg std = + type htab(@k, @v) = struct + hash : (k : @k -> uint32) + eq : (a : @k, b : @k -> bool) + + nelt : size + keys : @k[:] + vals : @v[:] + hashes : uint32[:] + dead : bool[:] + ;; + + generic mkht : (h : (k : @k -> uint32), eq : (a : @k, b : @k -> bool) -> htab(@k, @v)#) + generic htfree : (ht : htab(@k, @v)# -> void) + generic htput : (ht : htab(@k, @v)#, k : @k, v : @v -> void) + generic htdel : (ht : htab(@k, @v)#, k : @k -> void) + generic htget : (ht : htab(@k, @v)#, k : @k -> option(@v)) + generic htgetv : (ht : htab(@k, @v)#, k : @k, fallback : @v-> @v) + generic hthas : (ht : htab(@k, @v)#, k : @k -> bool) + generic htkeys : (ht : htab(@k, @v)# -> @k[:]) +;; + +const Initsz = 32 + +generic hash = {ht, k + var h + + h = ht.hash(k) + if h == 0 + -> 1 + else + -> h + ;; +} + +generic resize = {ht + var oldk + var oldv + var oldh + var oldd + var sz + var i + + oldk = ht.keys + oldv = ht.vals + oldh = ht.hashes + oldd = ht.dead + sz = 2*max(ht.keys.len, 1) + ht.keys = slalloc(sz) + ht.vals = slalloc(sz) + ht.hashes = slzalloc(sz) + ht.dead = slzalloc(sz) + + ht.nelt = 0 + for i = 0; i < oldk.len; i++ + if oldh[i] != 0 && !oldd[i] + htput(ht, oldk[i], oldv[i]) + ;; + ;; + slfree(oldk) + slfree(oldv) + slfree(oldh) + slfree(oldd) +} + +generic idx = {ht, k + var i, di + var h + + di = 0 + h = hash(ht, k) + i = h & (ht.keys.len - 1) + while true + while ht.hashes[i] != 0 && !ht.dead[i] && ht.hashes[i] != h + di++ + i = (h + di) & (ht.keys.len - 1) + ;; + + if ht.hashes[i] == 0 || ht.dead[i] + -> `None + ;; + if ht.eq(ht.keys[i], k) + -> `Some i + ;; + ;; +} + +generic mkht = {h, eq + var ht + + ht = alloc() + + ht.hash = h + ht.eq = eq + + ht.nelt = 0 + ht.keys = slalloc(Initsz) + ht.vals = slalloc(Initsz) + ht.hashes = slzalloc(Initsz) + ht.dead = slzalloc(Initsz) + -> ht +} + +generic htfree = {ht + slfree(ht.keys) + slfree(ht.vals) + slfree(ht.hashes) + slfree(ht.dead) + free(ht) +} + +generic htput = {ht, k, v + var i, di + var h + var done + + di = 0 + h = hash(ht, k) + i = h & (ht.keys.len - 1) + done = false + while ht.hashes[i] != 0 && !ht.dead[i] && !done + /* + second insertion just overwrites first. + nb: comparing keys for dead values is bad. + */ + if ht.hashes[i] == h && (ht.dead[i] || ht.eq(ht.keys[i], k)) + done = true + else + di++ + i = (h + di) & (ht.keys.len - 1) + ;; + ;; + ht.hashes[i] = h + ht.keys[i] = k + ht.vals[i] = v + ht.dead[i] = false + ht.nelt++ + if ht.keys.len < ht.nelt * 2 + resize(ht) + ;; +} + +generic htdel = {ht, k + match idx(ht, k) + | `Some i: + ht.dead[i] = true + ht.nelt-- + /* remove tombstones if we shrink enough */ + if ht.keys.len > ht.nelt * 4 + resize(ht) + ;; + | _: + /* do nothing */ + ;; +} + +generic htget = {ht, k + match idx(ht, k) + | `Some i: -> `Some ht.vals[i] + | `None: -> `None + ;; +} + +generic htgetv = {ht, k, v + match idx(ht, k) + | `Some i: -> ht.vals[i] + | `None: -> v + ;; +} + +generic hthas = {ht, k + match idx(ht, k) + | `Some i: -> true + | `None: -> false + ;; +} + +generic htkeys = {ht + var keys + var i + var j + + keys = slalloc(ht.nelt) + j = 0 + for i = 0; i < ht.keys.len; i++ + if ht.hashes[i] != 0 && !ht.dead[i] + keys[j++] = ht.keys[i] + ;; + ;; + -> keys +} + diff --git a/libstd/ifreq-linux.myr b/libstd/ifreq-linux.myr new file mode 100644 index 0000000..b9c5ee1 --- /dev/null +++ b/libstd/ifreq-linux.myr @@ -0,0 +1,67 @@ +use "sys.use" + +pkg std = + const Ifnamesz = 16 + + type ifreq_addr = struct + name : byte[Ifnamesz] + addr : sockaddr + ;; + + type ifreq_dstaddr = struct + name : byte[Ifnamesz] + dstaddr : sockaddr + ;; + + type ifreq_broadaddr = struct + name : byte[Ifnamesz] + broadaddr : sockaddr + ;; + + type ifreq_netmask = struct + name : byte[Ifnamesz] + netmask : sockaddr + ;; + + + type ifreq_hwaddr = struct + name : byte[Ifnamesz] + hwaddr : sockaddr + ;; + + type ifreq_flags = struct + name : byte[Ifnamesz] + flags : int16 + ;; + + type ifreq_ifindex = struct + name : byte[Ifnamesz] + index : int32 + ;; + + type ifreq_metric = struct + name : byte[Ifnamesz] + metric : int32 + ;; + + + type ifreq_mtu = struct + name : byte[Ifnamesz] + mtu : int32 + ;; + + type ifreq_slave = struct + name : byte[Ifnamesz] + slave : byte[Ifnamesz] + ;; + + type ifreq_newname = struct + name : byte[Ifnamesz] + newname : byte[Ifnamesz] + ;; + + type ifreq_data = struct + name : byte[Ifnamesz] + data : void# + ;; +;; diff --git a/libstd/ifreq-osx.myr b/libstd/ifreq-osx.myr new file mode 100644 index 0000000..93ec177 --- /dev/null +++ b/libstd/ifreq-osx.myr @@ -0,0 +1,77 @@ +use "sys.use" + +pkg std = + const Ifnamesz = 16 + + type ifreq_addr = struct + name : byte[Ifnamesz] + addr : sockaddr + ;; + + type ifreq_dstaddr = struct + name : byte[Ifnamesz] + dstaddr : sockaddr + ;; + + type ifreq_broadaddr = struct + name : byte[Ifnamesz] + broadaddr : sockaddr + ;; + + type ifreq_flags = struct + name : byte[Ifnamesz] + flags : int16 + ;; + + type ifreq_metric = struct + name : byte[Ifnamesz] + metric : int32 + ;; + + + type ifreq_phys = struct + name : byte[Ifnamesz] + phys : int32 + ;; + + type ifreq_media = struct + name : byte[Ifnamesz] + media : int32 + ;; + + type ifreq_data = struct + name : byte[Ifnamesz] + data : void# + ;; + + type ifreq_devmtu = struct + name : byte[Ifnamesz] + cur : uint32 + min : uint32 + max : uint32 + ;; + + type ifreq_kpi = struct + name : byte[Ifnamesz] + modid : uint32 + typeid : uint32 + ptr : void# + ;; + + type ifreq_wakeflg = struct + name : byte[Ifnamesz] + wakeflg : uint32 + ;; + + type ifreq_routerefs = struct + name : byte[Ifnamesz] + refs : uint32 + ;; + + type ifreq_icaps = struct + name : byte[Ifnamesz] + req : uint32 + cur : uint32 + ;; + +;; diff --git a/libstd/intparse.myr b/libstd/intparse.myr new file mode 100644 index 0000000..685c6cd --- /dev/null +++ b/libstd/intparse.myr @@ -0,0 +1,71 @@ +use "chartype.use" +use "die.use" +use "fmt.use" +use "hasprefix.use" +use "option.use" +use "types.use" +use "utf.use" + +pkg std = + generic intparsebase : (s : byte[:], base : int -> option(@a::(integral,numeric))) + generic intparse : (s : byte[:] -> option(@a::(integral,numeric))) +;; + +generic intparse = {s + var isneg + + isneg = false + if hasprefix(s, "-") + s = s[1:] + isneg = true + ;; + + if hasprefix(s, "0x") + -> doparse(s[2:], isneg, 16) + elif hasprefix(s, "0o") + -> doparse(s[2:], isneg, 8) + elif hasprefix(s, "0b") + -> doparse(s[2:], isneg, 2) + else + -> doparse(s, isneg, 10) + ;; +} + +generic intparsebase = {s, base + var isneg + + isneg = false + if hasprefix(s, "-") + s = s[1:] + isneg = true + ;; + + -> doparse(s, isneg, base) +} + +generic doparse = {s, isneg, base + var c + var v + var cv : int32 + + v = 0 + while s.len != 0 + (c, s) = striter(s) + if c == '_' + continue + ;; + cv = charval(c, base) + if cv >= 0 + v *= (base castto(@a::(integral,numeric))) + v += cv castto(@a::(integral,numeric)) + else + -> `None + ;; + ;; + + if isneg + -> `Some -v + else + -> `Some v + ;; +} diff --git a/libstd/ipparse.myr b/libstd/ipparse.myr new file mode 100644 index 0000000..7b5a345 --- /dev/null +++ b/libstd/ipparse.myr @@ -0,0 +1,74 @@ +use "die.use" +use "intparse.use" +use "option.use" +use "strfind.use" +use "types.use" +use "chartype.use" + + /* FIXME: needed for decls which should be pulled in as hidden */ +use "hasprefix.use" +use "utf.use" + +pkg std = + + type netaddr = union + `Ipv4 byte[4] + `Ipv6 byte[16] + ;; + + const ipparse : (ip : byte[:] -> option(netaddr)) + const ip4parse : (ip : byte[:] -> option(netaddr)) + const ip6parse : (ip : byte[:] -> option(netaddr)) +;; + +const ipparse = {ip + match strfind(ip, ":") + | `Some _: -> ip6parse(ip) + | `None: -> ip4parse(ip) + ;; +} + +const ip4parse = {ip + var addr + var last : size + var x : option(int32) + var val : int32 /* need int32 to check for overflow */ + var i + var j : size + + i = 0 + last = 0 + for j = 0; j < ip.len; j++ + if ip[j] == '.' castto(byte) + match intparsebase(ip[last:j], 10) + | `Some v: + val = v + if val < 0 || val > 255 + -> `None + ;; + addr[i++] = val castto(byte) + last = j + 1 + | `None: + -> `None + ;; + ;; + ;; + match intparsebase(ip[last:j], 10) + | `Some v: + val = v + if val < 0 || val > 255 + -> `None + ;; + addr[i] = val castto(byte) + | `None: + -> `None + ;; + if j != ip.len + -> `None + ;; + -> `Some (`Ipv4 addr) +} + +const ip6parse = {ip + -> `None +} diff --git a/libstd/mk.myr b/libstd/mk.myr new file mode 100644 index 0000000..19b46d2 --- /dev/null +++ b/libstd/mk.myr @@ -0,0 +1,22 @@ +use "alloc.use" + +pkg std = + generic mk : (val : @a -> @a#) +;; + +/* Takes a value, and heapifies it. + +FIXME: This depends on inlining and copy propagation +in order to be efficient. Neither of those are +currently implemented. That means that this function +is not efficient. + +It's still damn convenient, though, so it's in. +*/ +generic mk = {val + var p + + p = alloc() + p# = val + -> p +} diff --git a/libstd/now.myr b/libstd/now.myr new file mode 100644 index 0000000..cf24781 --- /dev/null +++ b/libstd/now.myr @@ -0,0 +1,22 @@ +use "sys.use" +use "types.use" +use "fmt.use" + +pkg std = + const now : (-> time) +;; + +/* milliseconds since epoch */ +const now = { + var tm + var sec + var nsec + + sec = tm.sec + nsec = tm.nsec castto(uint64) + if clock_gettime(`Clockrealtime, &tm) == 0 + -> (sec*1000 + nsec/(1000*1000)) castto(time) + else + -> -1 + ;; +} diff --git a/libstd/option.myr b/libstd/option.myr new file mode 100644 index 0000000..d037b0a --- /dev/null +++ b/libstd/option.myr @@ -0,0 +1,11 @@ +use "types.use" +use "fmt.use" +use "varargs.use" + +pkg std = + type option(@a) = union + `Some @a + `None + ;; +;; + diff --git a/libstd/optparse.myr b/libstd/optparse.myr new file mode 100644 index 0000000..0da8aae --- /dev/null +++ b/libstd/optparse.myr @@ -0,0 +1,138 @@ +use "alloc.use" +use "die.use" +use "extremum.use" +use "fmt.use" +use "option.use" +use "slpush.use" +use "sys.use" +use "types.use" +use "utf.use" + +pkg std = + type optctx = struct + /* public variables */ + args : byte[:][:] + + /* data passed in */ + optstr : byte[:] + optargs : byte[:][:] + + /* state */ + optdone : bool /* if we've seen '--', everything's an arg */ + finished : bool /* if we've processed all the optargs */ + argidx : size + curarg : byte[:] + ;; + + const optinit : (optstr: byte[:], optargs : byte[:][:] -> optctx#) + const optnext : (ctx : optctx# -> [char, byte[:]]) + const optdone : (ctx : optctx# -> bool) + const optfin : (ctx : optctx# -> byte[:][:]) +;; + +const optinit = {optstr, optargs + var ctx + + ctx = alloc() + ctx.optstr= optstr + ctx.optargs =optargs + + ctx.optdone = false + ctx.finished = false + ctx.argidx = 0 + ctx.curarg = [][:] + + ctx.args = [][:] + + next(ctx) + -> ctx +} + +const optfin = {ctx + var a + + a = ctx.args + free(ctx) + -> a +} + +const optnext = {ctx + var c + var arg + + (c, ctx.curarg) = striter(ctx.curarg) + + match optinfo(ctx, c) + | `None: + fatal(1, "Unexpected argument %c\n", c) + | `Some (true, needed): + /* -arg => '-a' 'rg' */ + if ctx.curarg.len > 0 + arg = ctx.curarg + ctx.curarg = ctx.curarg[ctx.curarg.len:] + next(ctx) + /* '-a rg' => '-a' 'rg' */ + elif ctx.argidx < (ctx.optargs.len - 1) + arg = ctx.optargs[ctx.argidx + 1] + ctx.argidx++ + next(ctx) + elif needed + put("Expected argument for %c\n", c) + exit(1) + ;; + | `Some (false, _): + arg = "" + if !ctx.curarg.len + next(ctx) + ;; + ;; + + + -> (c, arg) +} + +const optdone = {ctx + -> !ctx.curarg.len && ctx.finished +} + +const optinfo = {ctx, arg + var s + var c + + s = ctx.optstr + while s.len != 0 + (c, s) = striter(s) + if c == arg + (c, s) = striter(s) + /* mandatory arg */ + if c == ':' + -> `Some (true, true) + /* optional arg */ + elif c == '?' + -> `Some (true, false) + /* no arg */ + else + -> `Some (false, false) + ;; + ;; + ;; + -> `None +} + +const next = {ctx + var i + + for i = ctx.argidx + 1; i < ctx.optargs.len; i++ + if !ctx.optdone && decode(ctx.optargs[i]) == '-' + goto foundopt + else + ctx.args = slpush(ctx.args, ctx.optargs[i]) + ;; + ;; + ctx.finished = true + -> false +:foundopt + ctx.argidx = i + ctx.curarg = ctx.optargs[i][1:] + -> true +} diff --git a/libstd/rand.myr b/libstd/rand.myr new file mode 100644 index 0000000..eede48f --- /dev/null +++ b/libstd/rand.myr @@ -0,0 +1,161 @@ +use "die.use" +use "fmt.use" +use "types.use" +use "alloc.use" +/* + Translated from C by Ori Bernstein + */ + +/* + A C-program for MT19937, with initialization improved 2002/1/26. + Coded by Takuji Nishimura and Makoto Matsumoto. + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + Any feedback is very welcome. + http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html + email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) + */ + +pkg std = + type rng + + const mksrng : (seed : uint32 -> rng#) + generic rand : (rng : rng#, lo : @a::(numeric,integral), hi : @a::(numeric,integral) -> @a::(numeric,integral)) + generic randN : (rng : rng# -> @a::(numeric,integral)) + const rand32 : (rng : rng# -> uint32) +;; + +type rng = struct + state : uint32[624] + i : uint32 +;; + +/* allocates and initializes a random number generator */ +const mksrng = {seed + var rng + + rng = alloc() + init(rng, seed) + -> rng +} + +/* initializes a random number generator from the seed `seed`. */ +const init = {rng, seed + var i + + for i = 0; i < 624; i++ + rng.state[i] = seed + seed = 1812433253 * (seed ^ (seed >> 30)) + i + 1 + ;; + rng.i = i +} + +/* + Generates a random integer from `rng` in the range [lo, hi), + returning the value. The range [lo, hi) must be positive, + nonempty, and the difference between hi and lo must be + less then 2^(type_bits - 1) +*/ +generic rand = {rng, lo, hi -> @a::(integral,numeric) + var span, lim + var maxrand + var val + + assert(hi - lo > 0, "rand.myr: range for random values must be >= 1") + + span = hi - lo + maxrand = (1 << (8*sizeof(@a))) - 1 /* max for signed value */ + if maxrand < 0 /* signed */ + maxrand = (1 << (8*sizeof(@a)-1)) - 1 /* max for signed value */ + ;; + + lim = (maxrand/span)*span + val = (randN(rng) & maxrand) + while val > lim + val = (randN(rng) & maxrand) + ;; + -> val % span + lo +} + +/* + Generates a random integer of any size from the + random number generator `rng`. The returned value + may be negative, if the type is signed. +*/ +generic randN = {rng -> @a::(integral,numeric) + var i, val + + val = 0 + for i = 0; i < sizeof(@a)/4; i++ + val <<= 8*sizeof(@a) + val |= rand32(rng) castto(@a::(integral,numeric)) + ;; + -> val +} + +/* + generates a 32 bit unsigned random number + from the random number generator `rng`. +*/ +const rand32 = {rng + var x + + if rng.i == 624 + next(rng) + ;; + x = rng.state[rng.i] + rng.i++ + + x ^= x >> 11 + x ^= (x << 7) & 0x9D2C5680 + x ^= (x << 15) & 0xEFC60000 + -> x ^ (x >> 18) +} + + +/* updates random number generator state when we tick over. */ +const next = {rng + var k + var y + + for k = 0; k < 227; k++ + y = (rng.state[k] & 0x80000000) | (rng.state[k + 1] & 0x7FFFFFFF) + rng.state[k] = rng.state[k + 397] ^ (y >> 1) ^ ((y & 1) * 0x9908B0DF) + ;; + for ; k < 623; k++ + y = (rng.state[k] & 0x80000000) | (rng.state[k + 1] & 0x7FFFFFFF) + rng.state[k] = rng.state[k - 227] ^ (y >> 1) ^ ((y & 1) * 0x9908B0DF); + ;; + y = (rng.state[623] & 0x80000000) | (rng.state[0] & 0x7FFFFFFF) + rng.state[623] = rng.state[396] ^ (y >> 1) ^ ((y & 1) * 0x9908B0DF); + rng.i = 0 +} diff --git a/libstd/resolve.myr b/libstd/resolve.myr new file mode 100644 index 0000000..94125a3 --- /dev/null +++ b/libstd/resolve.myr @@ -0,0 +1,433 @@ +use "alloc.use" +use "chartype.use" +use "die.use" +use "endian.use" +use "result.use" +use "extremum.use" +use "fmt.use" +use "hashfuncs.use" +use "htab.use" +use "ipparse.use" +use "option.use" +use "slcp.use" +use "sleq.use" +use "slpush.use" +use "slurp.use" +use "strfind.use" +use "strsplit.use" +use "strstrip.use" +use "sys.use" +use "types.use" +use "utf.use" + +pkg std = + type rectype = uint16 + + const DnsA : rectype = 1 /* host address */ + const DnsNS : rectype = 2 /* authoritative name server */ + const DnsMD : rectype = 3 /* mail destination (Obsolete - use MX) */ + const DnsMF : rectype = 4 /* mail forwarder (Obsolete - use MX) */ + const DnsCNAME : rectype = 5 /* canonical name for an alias */ + const DnsSOA : rectype = 6 /* marks the start of a zone of authority */ + const DnsMB : rectype = 7 /* mailbox domain name (EXPERIMENTAL) */ + const DnsMG : rectype = 8 /* mail group member (EXPERIMENTAL) */ + const DnsMR : rectype = 9 /* mail rename domain name (EXPERIMENTAL) */ + const DnsNULL : rectype = 10 /* null RR (EXPERIMENTAL) */ + const DnsWKS : rectype = 11 /* well known service description */ + const DnsPTR : rectype = 12 /* domain name pointer */ + const DnsHINFO : rectype = 13 /* host information */ + const DnsMINFO : rectype = 14 /* mailbox or mail list information */ + const DnsMX : rectype = 15 /* mail exchange */ + const DnsTXT : rectype = 16 /* text strings */ + const DnsAAAA : rectype = 28 /* ipv6 host address */ + + + type resolveerr = union + `Badhost + `Badsrv + `Badquery + `Badresp + ;; + + type hostinfo = struct + fam : sockfam + stype : socktype + ttl : uint32 + addr : netaddr + /* + flags : uint32 + addr : sockaddr[:] + canon : byte[:] + */ + ;; + + const resolve : (host : byte[:] -> result(hostinfo[:], resolveerr)) + const resolvemx : (host : byte[:] -> result(hostinfo[:], resolveerr)) + const resolverec : (host : byte[:], t : rectype -> result(hostinfo[:], resolveerr)) +;; + +const Hostfile = "/etc/hosts" +const Resolvfile = "/etc/resolv.conf" + +var hostmap : htab(byte[:], hostinfo)# +var search : byte[:][:] +var nameservers : netaddr[:] +var inited : bool = false + + +const resolve = {host + -> resolverec(host, DnsA) +} + +const resolvemx = {host + -> resolverec(host, DnsMX) +} + +const resolverec = {host, t + match hostfind(host) + | `Some hinf: + -> `Ok slpush([][:], hinf) + | `None: + -> dnsresolve(host, DnsA) + ;; +} + +const hostfind = {host + if !inited + hostmap = mkht(strhash, streq) + loadhosts() + loadresolv() + inited = true + ;; + -> htget(hostmap, host) +} + +const loadhosts = { + var h + var lines + + match slurp(Hostfile) + | `Ok d: h = d + | `Fail m: -> + ;; + + lines = strsplit(h, "\n") + for l in lines + /* trim comment */ + match strfind(l, "#") + | `Some _idx: l = l[:_idx] + ;; + + match word(l) + | `Some (ip, rest): + match ipparse(ip) + | `Some addr: + addhosts(addr, ip, rest) + ;; + | `None: + ;; + ;; + slfree(lines) +} + +const addhosts = {addr, as, str + var hinf + var fam + + match addr + | `Ipv4 _: fam = Afinet + | `Ipv6 _: fam = Afinet6 + ;; + while true + match word(str) + | `Some (name, rest): + if !hthas(hostmap, name) + hinf = [ + .fam=fam, + .stype = 0, + .ttl = 0, + .addr = addr + ] + htput(hostmap, name, hinf) + ;; + str = rest + | `None: + -> + ;; + ;; +} + +const loadresolv = { + var h + var lines + + match slurp(Resolvfile) + | `Ok d: h = d + | `Fail m: -> + ;; + + lines = strsplit(h, "\n") + for l in lines + match strfind(l, "#") + | `Some _idx: l = l[:_idx] + | `None: + ;; + + match word(l) + | `Some (cmd, rest): + if sleq(cmd, "nameserver") + addns(rest) + ;; + ;; + ;; + slfree(lines) +} + +const addns = {rest + match word(rest) + | `Some (name, _): + match ipparse(name) + | `Some addr: + nameservers = slpush(nameservers, addr) + | `None: + ;; + ;; +} + +const word = {s + var c, len + + len = 0 + s = strfstrip(s) + for c = decode(s[len:]); c != Badchar && !isblank(c); c = decode(s[len:]) + len += charlen(c) + ;; + if len == 0 + -> `None + else + -> `Some (s[:len], s[len:]) + ;; +} + + +const dnsresolve = {host, t + var nsrv + + if !valid(host) + -> `Fail (`Badhost) + ;; + for ns in nameservers + nsrv = dnsconnect(ns) + if nsrv >= 0 + -> dnsquery(nsrv, host, t) + ;; + ;; + -> `Fail (`Badsrv) +} + +const dnsconnect = {ns + match ns + | `Ipv4 addr: -> dnsconnectv4(addr) + | `Ipv6 addr: die("don't support ipv6 yet\n") + ;; +} + +const dnsconnectv4 = {addr + var sa : sockaddr_in + var s + var status + + s = socket(Afinet, Sockdgram, 0) + if s < 0 + -> -1 + ;; + sa.fam = Afinet + sa.port = hosttonet(53) + sa.addr = addr + status = connect(s, (&sa) castto(sockaddr#), sizeof(sockaddr_in)) + if status < 0 + -> -1 + ;; + -> s +} + +const dnsquery = {srv, host, t + var id + var r + + id = tquery(srv, host, t) + r = rquery(srv, id) + -> r +} + +const Qr : uint16 = 1 << 0 +const Aa : uint16 = 1 << 5 +const Tc : uint16 = 1 << 6 +const Rd : uint16 = 1 << 7 +const Ra : uint16 = 1 << 8 + +var nextid : uint16 = 42 +const tquery = {srv, host, t + var pkt : byte[512] /* big enough */ + var off : size + + /* header */ + off = 0 + off += pack16(pkt[:], off, nextid) /* id */ + off += pack16(pkt[:], off, Ra) /* flags */ + off += pack16(pkt[:], off, 1) /* qdcount */ + off += pack16(pkt[:], off, 0) /* ancount */ + off += pack16(pkt[:], off, 0) /* nscount */ + off += pack16(pkt[:], off, 0) /* arcount */ + + /* query */ + off += packname(pkt[:], off, host) /* host */ + off += pack16(pkt[:], off, t castto(uint16)) /* qtype: a record */ + off += pack16(pkt[:], off, 0x1) /* qclass: inet4 */ + + write(srv, pkt[:off]) + -> nextid++ +} + +const rquery = {srv, id + var pktbuf : byte[1024] + var pkt + var n + + n = read(srv, pktbuf[:]) + if n < 0 + ;; + pkt = pktbuf[:n] + -> hosts(pkt, id) +} + +const hosts = {pkt, id : uint16 + var off + var v, q, a + var i + var hinf : hostinfo[:] + + off = 0 + /* parse header */ + (v, off) = unpack16(pkt, off) /* id */ + if v != id + -> `Fail (`Badresp) + ;; + (v, off) = unpack16(pkt, off) /* flags */ + (q, off) = unpack16(pkt, off) /* qdcount */ + (a, off) = unpack16(pkt, off) /* ancount */ + (v, off) = unpack16(pkt, off) /* nscount */ + (v, off) = unpack16(pkt, off) /* arcount */ + + /* skip past query records */ + for i = 0; i < q; i++ + off = skipname(pkt, off) /* name */ + (v, off) = unpack16(pkt, off) /* type */ + (v, off) = unpack16(pkt, off) /* class */ + ;; + + /* parse answer records */ + hinf = slalloc(a castto(size)) + for i = 0; i < a; i++ + off = skipname(pkt, off) /* name */ + (v, off) = unpack16(pkt, off) /* type */ + (v, off) = unpack16(pkt, off) /* class */ + (hinf[i].ttl, off) = unpack32(pkt, off) /* ttl */ + (v, off) = unpack16(pkt, off) /* rdatalen */ + /* the thing we're interested in: our IP address */ + hinf[i].addr = `Ipv4 [pkt[off], pkt[off+1], pkt[off+2], pkt[off+3]] + off += 4; + ;; + -> `Ok hinf +} + + +const skipname = {pkt, off + var sz + + for sz = pkt[off] castto(size); sz != 0; sz = pkt[off] castto(size) + /* ptr is 2 bytes */ + if sz & 0xC0 == 0xC0 + -> off + 2 + else + off += sz + 1 + ;; + ;; + -> off + 1 +} + + +const pack16 = {buf, off, v + buf[off] = (v & 0xff00) >> 8 castto(byte) + buf[off+1] = (v & 0x00ff) castto(byte) + -> sizeof(uint16) /* we always write one uint16 */ +} + +const unpack16 = {buf, off + var v + + v = (buf[off] castto(uint16)) << 8 + v |= (buf[off + 1] castto(uint16)) + -> (v, off+sizeof(uint16)) +} + +const unpack32 = {buf, off + var v + + v = (buf[off] castto(uint32)) << 24 + v |= (buf[off+1] castto(uint32)) << 32 + v |= (buf[off+2] castto(uint32)) << 8 + v |= (buf[off+3] castto(uint32)) + -> (v, off+sizeof(uint32)) +} + +const packname = {buf, off : size, host + var i + var start + var last + + start = off + last = 0 + for i = 0; i < host.len; i++ + if host[i] == ('.' castto(byte)) + off += addseg(buf, off, host[last:i]) + last = i + 1 + ;; + ;; + if host[host.len - 1] != ('.' castto(byte)) + off += addseg(buf, off, host[last:]) + ;; + off += addseg(buf, off, "") /* null terminating segment */ + -> off - start +} + +const addseg = {buf, off, str + buf[off] = str.len castto(byte) + slcp(buf[off + 1 : off + str.len + 1], str) + -> str.len + 1 +} + +const valid = {host : byte[:] + var i + var seglen + + /* maximum length: 255 chars */ + if host.len > 255 + -> false + ;; + + seglen = 0 + for i = 0; i < host.len; i++ + if host[i] == ('.' castto(byte)) + seglen = 0 + ;; + if seglen > 63 + -> false + ;; + if host[i] & 0x80 != 0 + -> false + ;; + ;; + + -> true +} diff --git a/libstd/result.myr b/libstd/result.myr new file mode 100644 index 0000000..58ef45b --- /dev/null +++ b/libstd/result.myr @@ -0,0 +1,9 @@ +use "die.use" + +pkg std = + type result(@a, @b) = union + `Ok @a + `Fail @b + ;; +;; + diff --git a/libstd/search.myr b/libstd/search.myr new file mode 100644 index 0000000..54deb21 --- /dev/null +++ b/libstd/search.myr @@ -0,0 +1,42 @@ +use "cmp.use" +use "option.use" +use "fmt.use" + +pkg std = + generic lsearch : (sl : @t[:], val : @t, cmp : (a : @t, b : @t -> order) -> option(@idx::(integral,numeric))) + generic bsearch : (sl : @t[:], val : @t, cmp : (a : @t, b : @t -> order) -> option(@idx::(integral,numeric))) +;; + +/* linear search over a list of values */ +generic lsearch = {sl, val, cmp + var i + + for i = 0; i < sl.len; i++ + match cmp(sl[i], val) + | `Equal: + -> `Some i + ;; + ;; + -> `None +} + +/* binary search over a sorted list of values. */ +generic bsearch = {sl, val, cmp + var hi, lo, mid + + lo = 0 + hi = sl.len - 1 + + while lo <= hi + mid = (hi + lo) / 2 + match cmp(val, sl[mid]) + | `Before: hi = mid - 1 + | `After: lo = mid + 1 + | `Equal: + -> `Some mid + ;; + ;; + -> `None +} + + diff --git a/libstd/slcp.myr b/libstd/slcp.myr new file mode 100644 index 0000000..1035a29 --- /dev/null +++ b/libstd/slcp.myr @@ -0,0 +1,26 @@ +use "die.use" +use "types.use" + +pkg std = + generic slcp : (a : @a[:], b : @a[:] -> void) +;; + +generic slcp = {a : @a[:], b : @a[:] + var i + var addr_a, addr_b + + assert(a.len == b.len, "arguments to slcp() must be of equal length") + + addr_a = a castto(@a#) castto(intptr) + addr_b = b castto(@a#) castto(intptr) + if addr_a <= addr_b + for i = 0; i < a.len; i++ + a[i] = b[i] + ;; + else + for i = a.len; i > 0; i-- + a[i - 1] = b[i - 1] + ;; + ;; + +} diff --git a/libstd/sldup.myr b/libstd/sldup.myr new file mode 100644 index 0000000..70cbee4 --- /dev/null +++ b/libstd/sldup.myr @@ -0,0 +1,15 @@ +use "alloc.use" +use "die.use" +use "slcp.use" + +pkg std = + generic sldup : (sl : @a[:] -> @a[:]) +;; + +generic sldup = {sl + var ret + + ret = slalloc(sl.len) + slcp(ret, sl) + -> ret +} diff --git a/libstd/sleq.myr b/libstd/sleq.myr new file mode 100644 index 0000000..cfc60b4 --- /dev/null +++ b/libstd/sleq.myr @@ -0,0 +1,18 @@ +pkg std = + generic sleq : (a : @a[:], b : @a[:] -> bool) +;; + +generic sleq = {a, b + var i + + if a.len != b.len + -> false + ;; + + for i = 0; i < a.len; i++ + if a[i] != b[i] + -> false + ;; + ;; + -> true +} diff --git a/libstd/slfill.myr b/libstd/slfill.myr new file mode 100644 index 0000000..d90e8f5 --- /dev/null +++ b/libstd/slfill.myr @@ -0,0 +1,12 @@ +pkg std = + generic slfill : (sl : @a[:], v : @a -> @a[:]) +;; + +generic slfill = {sl, v + var i + + for i = 0; i < sl.len; i++ + sl[i] = v + ;; + -> sl +} diff --git a/libstd/sljoin.myr b/libstd/sljoin.myr new file mode 100644 index 0000000..f1fe58a --- /dev/null +++ b/libstd/sljoin.myr @@ -0,0 +1,15 @@ +use "alloc.use" +use "slcp.use" + +pkg std = + generic sljoin : (dst : @a[:], src : @a[:] -> @a[:]) +;; + +generic sljoin = {dst, src + var len + + len = dst.len + dst = slgrow(dst, len + src.len) + slcp(dst[len:], src) + -> dst +} diff --git a/libstd/slpush.myr b/libstd/slpush.myr new file mode 100644 index 0000000..1f94285 --- /dev/null +++ b/libstd/slpush.myr @@ -0,0 +1,13 @@ +use "types.use" +use "alloc.use" +use "fmt.use" + +pkg std = + generic slpush : (sl : @a[:], elt : @a -> @a[:]) +;; + +generic slpush = {sl, elt + sl = slgrow(sl, sl.len + 1) + sl[sl.len - 1] = elt + -> sl +} diff --git a/libstd/slput.myr b/libstd/slput.myr new file mode 100644 index 0000000..c1a7d33 --- /dev/null +++ b/libstd/slput.myr @@ -0,0 +1,21 @@ +use "types.use" +use "alloc.use" +use "die.use" +use "fmt.use" + +pkg std = + generic slput : (sl : @a[:], idx : size, elt : @a -> @a[:]) +;; + +generic slput = {sl, idx, elt + var i + var len + + len = sl.len + sl = slgrow(sl, sl.len + 1) + for i = len - 1; i >= idx; i-- + sl[i + 1] = sl[i] + ;; + sl[idx] = elt + -> sl +} diff --git a/libstd/slurp.myr b/libstd/slurp.myr new file mode 100644 index 0000000..016d068 --- /dev/null +++ b/libstd/slurp.myr @@ -0,0 +1,38 @@ +use "alloc.use" +use "die.use" +use "result.use" +use "extremum.use" +use "fmt.use" +use "sys.use" +use "types.use" + +pkg std = + const slurp : (path : byte[:] -> result(byte[:], byte[:])) +;; + +const Bufinc = 4096 + +const slurp = {path + var fd + var n + var len + var buf + + fd = open(path, Ordonly) + if fd < 0 + -> `Fail "Could not open file" + ;; + + len = 0 + buf = slalloc(Bufinc) + while true + n = read(fd, buf[len:]) + if n == 0 + goto done + ;; + len += n + buf = slgrow(buf, len + Bufinc) + ;; +:done + -> `Ok buf[:len] +} diff --git a/libstd/sort.myr b/libstd/sort.myr new file mode 100644 index 0000000..0307e1c --- /dev/null +++ b/libstd/sort.myr @@ -0,0 +1,59 @@ +use "cmp.use" + +pkg std = + generic sort : (sl:@a[:], cmp:(a:@a, b:@a -> order) -> @a[:]) +;; + +generic sort = {sl, cmp + var end + var tmp + + heapify(sl, cmp) + end = sl.len - 1 + while end > 0 + tmp = sl[end] + sl[end] = sl[0] + sl[0] = tmp + end-- + siftdown(sl[:end], 0, cmp) + ;; + -> sl +} + +generic heapify = {sl, cmp + var start + + start = sl.len/2 - 1 + while start >= 0 + siftdown(sl, start, cmp) + start-- + ;; +} + +generic siftdown = {sl, start, cmp + var r, c, s + var tmp + + r = start + while 2*r + 1 <= sl.len + c = r*2 + 1 + s = r + match cmp(sl[s], sl[c]) + | `Before: s = c + ;; + if c + 1 < sl.len + match cmp(sl[s], sl[c + 1]) + | `Before: s = c + 1 + ;; + ;; + if s != r + tmp = sl[r] + sl[r] = sl[s] + sl[s] = tmp + r = s + else + -> + ;; + ;; +} + diff --git a/libstd/start-linux.s b/libstd/start-linux.s new file mode 100644 index 0000000..48d37d8 --- /dev/null +++ b/libstd/start-linux.s @@ -0,0 +1,121 @@ +.data +/* std._environment : byte[:][:] */ +.globl std$_environment +std$_environment: +.envbase: +.quad 0 /* env size */ +.envlen: +.quad 0 /* env ptr */ + +.globl std$__cenvp +std$__cenvp: +.quad 0 + +.text +/* + * counts the length of the string pointed to + * by %r8, returning len in %r9. Does not modify + * any registers outside of %r9 + */ +cstrlen: + xorq %r9,%r9 + jmp .lentest + + .lenloop: + incq %r9 + .lentest: + cmpb $0,(%r8,%r9) + jne .lenloop + ret + + +/* + * Counts the size of the null terminated string vector + * pointed to by %rbx. Clobbers %r10,%r11 + */ +count: + xorq %r9,%r9 + movq %rbx,%r11 +.countloop: + movq (%r11),%r10 + testq %r10,%r10 + jz .countdone + addq $1,%r9 + addq $8,%r11 + jmp .countloop +.countdone: + ret + +/* + * iterate over the strings for argc, and put + * them into the args array. + * + * argc in %rax, argv in %rbx, dest vector in %rcx + */ +cvt: + jmp .cvttest +.cvtloop: + subq $1,%rax + movq (%rbx),%r8 + call cstrlen + movq %r8, (%rcx) + movq %r9, 8(%rcx) + addq $8, %rbx + addq $16, %rcx +.cvttest: + testq %rax,%rax + jnz .cvtloop +.cvtdone: + ret + +/* + * The entry point for the whole program. + * This is called by the OS. In order, it: + * - Sets up all argc entries as slices + * - Sets up all envp entries as slices + * - Converts argc/argv to a slice + * - Stashes envp in std._environment + * - Stashes a raw envp copy in __cenvp (for syscalls to use) + * - Calls main() + */ +.globl _start +_start: + /* turn args into a slice */ + movq %rsp,%rbp + + /* stack allocate sizeof(byte[:])*(argc + len(envp)) */ + movq (%rbp),%rax + leaq 16(%rbp,%rax,8), %rbx /* argp = argv + 8*argc + 8 */ + call count + addq %r9,%rax + imulq $16,%rax + subq %rax,%rsp + movq %rsp, %rdx /* saved args[:] */ + + /* convert envp to byte[:][:] for std._environment */ + movq (%rbp),%rax + leaq 16(%rbp,%rax,8), %rbx /* envp = argv + 8*argc + 8 */ + /* store envp for some syscalls to use without converting */ + movq %rbx,std$__cenvp(%rip) + movq %r9,%rax + movq %rsp, %rcx + movq %r9,.envlen + movq %rdx,.envbase + call cvt + movq %rcx,%rdx + + /* convert argc, argv to byte[:][:] for args. */ + movq (%rbp), %rax /* argc */ + leaq 8(%rbp), %rbx /* argv */ + movq (%rbp), %rsi /* saved argc */ + call cvt + pushq %rsi + pushq %rdx + + /* enter the main program */ + call main + /* exit(0) */ + xorq %rdi,%rdi + movq $60,%rax + syscall + diff --git a/libstd/start-osx.s b/libstd/start-osx.s new file mode 100644 index 0000000..27e3316 --- /dev/null +++ b/libstd/start-osx.s @@ -0,0 +1,119 @@ +.data +/* std._environment : byte[:][:] */ +.globl _std$_environment +_std$_environment: +.envbase: +.quad 0 /* env size */ +.envlen: +.quad 0 /* env ptr */ + +.globl _std$__cenvp +_std$__cenvp: +.quad 0 + +.text +/* + * counts the length of the string pointed to + * by %r8, returning len in %r9. Does not modify + * any registers outside of %r9 + */ +cstrlen: + xorq %r9,%r9 + jmp .lentest + + .lenloop: + incq %r9 + .lentest: + cmpb $0,(%r8,%r9) + jne .lenloop + ret + + +/* + * Counts the size of the null terminated string vector + * pointed to by %rbx. Clobbers %r10,%r11 + */ +count: + xorq %r9,%r9 + movq %rbx,%r11 +.countloop: + movq (%r11),%r10 + testq %r10,%r10 + jz .countdone + addq $1,%r9 + addq $8,%r11 + jmp .countloop +.countdone: + ret + +/* + * iterate over the strings for argc, and put + * them into the args array. + * + * argc in %rax, argv in %rbx, dest vector in %rcx + */ +cvt: + jmp .cvttest +.cvtloop: + subq $1,%rax + movq (%rbx),%r8 + call cstrlen + movq %r8, (%rcx) + movq %r9, 8(%rcx) + addq $8, %rbx + addq $16, %rcx +.cvttest: + testq %rax,%rax + jnz .cvtloop +.cvtdone: + ret + +/* + * The entry point for the whole program. + * This is called by the OS. In order, it: + * - Sets up all argc entries as slices + * - Sets up all envp entries as slices + * - Converts argc/argv to a slice + * - Stashes envp in std._environment + * - Stashes a raw envp copy in __cenvp (for syscalls to use) + * - Calls main() + */ +.globl start +start: + /* turn args into a slice */ + movq %rsp,%rbp + /* stack allocate sizeof(byte[:])*(argc + len(envp)) */ + movq (%rbp),%rax + leaq 16(%rbp,%rax,8), %rbx /* argp = argv + 8*argc + 8 */ + call count + addq %r9,%rax + imulq $16,%rax + subq %rax,%rsp + movq %rsp, %rdx /* saved args[:] */ + + /* convert envp to byte[:][:] for std._environment */ + movq (%rbp),%rax + leaq 16(%rbp,%rax,8), %rbx /* envp = argv + 8*argc + 8 */ + movq %rbx,_std$__cenvp(%rip) + movq %r9,%rax + movq %rsp, %rcx + movq %r9,.envlen(%rip) + movq %rdx,.envbase(%rip) + call cvt + movq %rcx,%rdx + + /* convert argc, argv to byte[:][:] for args. */ + movq (%rbp), %rax /* argc */ + leaq 8(%rbp), %rbx /* argv */ + movq (%rbp), %rsi /* saved argc */ + call cvt + pushq %rsi + pushq %rdx + + /* enter the main program */ + call _main + /* exit */ + xorq %rdi,%rdi + movq $0x2000001,%rax + syscall + diff --git a/libstd/strfind.myr b/libstd/strfind.myr new file mode 100644 index 0000000..2ad25f7 --- /dev/null +++ b/libstd/strfind.myr @@ -0,0 +1,26 @@ +use "types.use" +use "option.use" + +pkg std = + const strfind : (haystack : byte[:], needle : byte[:] -> option(size)) +;; + +const strfind = {haystack, needle + var i, j + + for i = 0; i < haystack.len; i++ + if i + needle.len > haystack.len + -> `None + ;; + if haystack[i] == needle[0] + for j = 0; j < needle.len; j++ + if haystack[i + j] != needle[j] + goto nextiter + ;; + ;; + -> `Some i + ;; +:nextiter + ;; + -> `None +} diff --git a/libstd/strjoin.myr b/libstd/strjoin.myr new file mode 100644 index 0000000..518a4dc --- /dev/null +++ b/libstd/strjoin.myr @@ -0,0 +1,41 @@ +use "alloc.use" +use "die.use" +use "slcp.use" + +pkg std = + const strcat : (a : byte[:], b : byte[:] -> byte[:]) + const strjoin : (strings : byte[:][:], delim : byte[:] -> byte[:]) +;; + +const strcat = {a, b + -> strjoin([a, b][:], "") +} + +const strjoin = {strings, delim + var len, off + var i + var s + + len = 0 + for i = 0; i < strings.len; i++ + len += strings[i].len + ;; + if strings.len > 0 + len += (strings.len - 1)*delim.len + ;; + + s = slalloc(len) + off = 0 + for i = 0; i < strings.len; i++ + slcp(s[off:off + strings[i].len], strings[i]) + off += strings[i].len + /* we don't want to terminate the last string with delim */ + if i != strings.len - 1 + slcp(s[off:off + delim.len], delim) + off += delim.len + ;; + ;; + -> s +} + + diff --git a/libstd/strsplit.myr b/libstd/strsplit.myr new file mode 100644 index 0000000..8546520 --- /dev/null +++ b/libstd/strsplit.myr @@ -0,0 +1,34 @@ +use "alloc.use" +use "die.use" +use "extremum.use" +use "fmt.use" +use "option.use" +use "slpush.use" +use "strfind.use" +use "sys.use" +use "types.use" + +pkg std = + const strsplit : (s : byte[:], delim : byte[:] -> byte[:][:]) +;; + +const strsplit = {s, delim + var last + var sp + + sp = [][:] + last = 0 + while true + match strfind(s, delim) + | `Some i: + sp = slpush(sp, s[:i]) + s = s[i + delim.len:] + | `None: + goto donesplit + ;; + ;; +:donesplit + sp = slpush(sp, s[:]) + -> sp +} + diff --git a/libstd/strstrip.myr b/libstd/strstrip.myr new file mode 100644 index 0000000..d7b25b3 --- /dev/null +++ b/libstd/strstrip.myr @@ -0,0 +1,44 @@ +use "types.use" +use "utf.use" +use "chartype.use" + +pkg std = + const strstrip : (str : byte[:] -> byte[:]) + const strfstrip : (str : byte[:] -> byte[:]) + const strrstrip : (str : byte[:] -> byte[:]) +;; + +/* strip blanks from both head and tail of str */ +const strstrip = {str + -> strrstrip(strfstrip(str)) +} + +/* strip forward on str */ +const strfstrip = {str + var c + + for c = decode(str); isblank(c); c = decode(str) + str = str[charlen(c):] + ;; + -> str + +} + +/* strip reverse on str */ +const strrstrip = {str + var i + var end + + /* scan backwards for start of utf8 char */ + end = str.len + for i = str.len; i != 0; i-- + if str[i] & 0x80 == 0 + if !isspace(decode(str[i-1:])) + goto donestrip + ;; + end = i - 1 + ;; + ;; +:donestrip + -> str[:end] +} diff --git a/libstd/sys-linux.myr b/libstd/sys-linux.myr new file mode 100644 index 0000000..9c487e3 --- /dev/null +++ b/libstd/sys-linux.myr @@ -0,0 +1,630 @@ +use "types.use" +use "varargs.use" + +pkg std = + type pid = int64 + type scno = int64 /* syscall */ + type fdopt = int64 /* fd options */ + type fd = int64 /* fd */ + type mprot = int64 /* memory protection */ + type mopt = int64 /* memory mapping options */ + type socktype = int64 /* socket type */ + type sockproto = int64 /* socket protocol */ + type sockfam = uint16 /* socket family */ + type whence = uint64 + + type clock = union + `Clockrealtime + `Clockmonotonic + `Clockproccpu + `Clockthreadcpu + `Clockmonotonicraw + `Clockrealtimecoarse + `Clockmonotoniccoarse + `Clockboottime + `Clockrealtimealarm + `Clockboottimealarm + ;; + + type timespec = struct + sec : uint64 + nsec : uint64 + ;; + + type timeval = struct + sec : uint64 + usec : uint64 + ;; + + type rusage = struct + utime : timeval /* user time */ + stime : timeval /* system time */ + _opaque : uint64[14] /* padding (darwin-specific data) */ + ;; + + type statbuf = struct + dev : uint64 + __inotrunc : uint32 + __pad0 : uint32 + mode : uint32 + nlink : uint32 + uid : uint32 + gid : uint32 + rdev : uint64 + __pad1 : uint32 + size : uint64 + blksize : uint32 + blocks : uint64 + atime : uint64 + atimens : uint64 + mtime : uint64 + mtimens : uint64 + ctime : uint64 + ctimens : uint64 + ino : uint64 + ;; + + type utsname = struct + system : byte[65] + node : byte[65] + release : byte[65] + version : byte[65] + machine : byte[65] + domain : byte[65] + ;; + + type sockaddr = struct + fam : sockfam + data : byte[14] + ;; + + type sockaddr_in = struct + fam : sockfam + port : uint16 + addr : byte[4] + zero : byte[8] + ;; + + type sockaddr_storage = struct + fam : sockfam + __align : uint32 + __pad : byte[112] + ;; + + /* open options */ + const Ordonly : fdopt = 0x0 + const Owronly : fdopt = 0x1 + const Ordwr : fdopt = 0x2 + const Oappend : fdopt = 0x80 + const Ocreat : fdopt = 0x40 + const Onofollow : fdopt = 0x20000 + const Ondelay : fdopt = 0x800 + const Otrunc : fdopt = 0x200 + + /* mmap protection */ + const Mprotnone : mprot = 0x0 + const Mprotrd : mprot = 0x1 + const Mprotwr : mprot = 0x2 + const Mprotexec : mprot = 0x4 + const Mprotrw : mprot = 0x3 /* convenience */ + + /* mmap options */ + const Mshared : mopt = 0x1 + const Mpriv : mopt = 0x2 + const Mfixed : mopt = 0x10 + const Mfile : mopt = 0x0 + const Manon : mopt = 0x20 + const M32bit : mopt = 0x40 + + /* socket families. INCOMPLETE. */ + const Afunspec : sockfam = 0 + const Afunix : sockfam = 1 + const Afinet : sockfam = 2 + const Afinet6 : sockfam = 10 + + /* socket types. */ + const Sockstream : socktype = 1 /* sequenced, reliable byte stream */ + const Sockdgram : socktype = 2 /* datagrams */ + const Sockraw : socktype = 3 /* raw proto */ + const Sockrdm : socktype = 4 /* reliably delivered messages */ + const Sockseqpacket : socktype = 5 /* sequenced, reliable packets */ + const Sockdccp : socktype = 6 /* data congestion control protocol */ + const Sockpack : socktype = 10 /* linux specific packet */ + + const Seekset : whence = 0 + const Seekcur : whence = 1 + const Seekend : whence = 2 + + /* return value for a failed mapping */ + const Mapbad : byte# = -1 castto(byte#) + + /* syscalls */ + const Sysread : scno = 0 + const Syswrite : scno = 1 + const Sysopen : scno = 2 + const Sysclose : scno = 3 + const Sysstat : scno = 4 + const Sysfstat : scno = 5 + const Syslstat : scno = 6 + const Syspoll : scno = 7 + const Syslseek : scno = 8 + const Sysmmap : scno = 9 + const Sysmprotect : scno = 10 + const Sysmunmap : scno = 11 + const Sysbrk : scno = 12 + const Sysrt_sigaction : scno = 13 + const Sysrt_sigprocmask : scno = 14 + const Sysrt_sigreturn : scno = 15 + const Sysioctl : scno = 16 + const Syspread64 : scno = 17 + const Syspwrite64 : scno = 18 + const Sysreadv : scno = 19 + const Syswritev : scno = 20 + const Sysaccess : scno = 21 + const Syspipe : scno = 22 + const Sysselect : scno = 23 + const Syssched_yield : scno = 24 + const Sysmremap : scno = 25 + const Sysmsync : scno = 26 + const Sysmincore : scno = 27 + const Sysmadvise : scno = 28 + const Sysshmget : scno = 29 + const Sysshmat : scno = 30 + const Sysshmctl : scno = 31 + const Sysdup : scno = 32 + const Sysdup2 : scno = 33 + const Syspause : scno = 34 + const Sysnanosleep : scno = 35 + const Sysgetitimer : scno = 36 + const Sysalarm : scno = 37 + const Syssetitimer : scno = 38 + const Sysgetpid : scno = 39 + const Syssendfile : scno = 40 + const Syssocket : scno = 41 + const Sysconnect : scno = 42 + const Sysaccept : scno = 43 + const Syssendto : scno = 44 + const Sysrecvfrom : scno = 45 + const Syssendmsg : scno = 46 + const Sysrecvmsg : scno = 47 + const Sysshutdown : scno = 48 + const Sysbind : scno = 49 + const Syslisten : scno = 50 + const Sysgetsockname : scno = 51 + const Sysgetpeername : scno = 52 + const Syssocketpair : scno = 53 + const Syssetsockopt : scno = 54 + const Sysgetsockopt : scno = 55 + const Sysclone : scno = 56 + const Sysfork : scno = 57 + const Sysvfork : scno = 58 + const Sysexecve : scno = 59 + const Sysexit : scno = 60 + const Syswait4 : scno = 61 + const Syskill : scno = 62 + const Sysuname : scno = 63 + const Syssemget : scno = 64 + const Syssemop : scno = 65 + const Syssemctl : scno = 66 + const Sysshmdt : scno = 67 + const Sysmsgget : scno = 68 + const Sysmsgsnd : scno = 69 + const Sysmsgrcv : scno = 70 + const Sysmsgctl : scno = 71 + const Sysfcntl : scno = 72 + const Sysflock : scno = 73 + const Sysfsync : scno = 74 + const Sysfdatasync : scno = 75 + const Systruncate : scno = 76 + const Sysftruncate : scno = 77 + const Sysgetdents : scno = 78 + const Sysgetcwd : scno = 79 + const Syschdir : scno = 80 + const Sysfchdir : scno = 81 + const Sysrename : scno = 82 + const Sysmkdir : scno = 83 + const Sysrmdir : scno = 84 + const Syscreat : scno = 85 + const Syslink : scno = 86 + const Sysunlink : scno = 87 + const Syssymlink : scno = 88 + const Sysreadlink : scno = 89 + const Syschmod : scno = 90 + const Sysfchmod : scno = 91 + const Syschown : scno = 92 + const Sysfchown : scno = 93 + const Syslchown : scno = 94 + const Sysumask : scno = 95 + const Sysgettimeofday : scno = 96 + const Sysgetrlimit : scno = 97 + const Sysgetrusage : scno = 98 + const Syssysinfo : scno = 99 + const Systimes : scno = 100 + const Sysptrace : scno = 101 + const Sysgetuid : scno = 102 + const Syssyslog : scno = 103 + const Sysgetgid : scno = 104 + const Syssetuid : scno = 105 + const Syssetgid : scno = 106 + const Sysgeteuid : scno = 107 + const Sysgetegid : scno = 108 + const Syssetpgid : scno = 109 + const Sysgetppid : scno = 110 + const Sysgetpgrp : scno = 111 + const Syssetsid : scno = 112 + const Syssetreuid : scno = 113 + const Syssetregid : scno = 114 + const Sysgetgroups : scno = 115 + const Syssetgroups : scno = 116 + const Syssetresuid : scno = 117 + const Sysgetresuid : scno = 118 + const Syssetresgid : scno = 119 + const Sysgetresgid : scno = 120 + const Sysgetpgid : scno = 121 + const Syssetfsuid : scno = 122 + const Syssetfsgid : scno = 123 + const Sysgetsid : scno = 124 + const Syscapget : scno = 125 + const Syscapset : scno = 126 + const Sysrt_sigpending : scno = 127 + const Sysrt_sigtimedwait : scno = 128 + const Sysrt_sigqueueinfo : scno = 129 + const Sysrt_sigsuspend : scno = 130 + const Syssigaltstack : scno = 131 + const Sysutime : scno = 132 + const Sysmknod : scno = 133 + const Sysuselib : scno = 134 + const Syspersonality : scno = 135 + const Sysustat : scno = 136 + const Sysstatfs : scno = 137 + const Sysfstatfs : scno = 138 + const Syssysfs : scno = 139 + const Sysgetpriority : scno = 140 + const Syssetpriority : scno = 141 + const Syssched_setparam : scno = 142 + const Syssched_getparam : scno = 143 + const Syssched_setscheduler : scno = 144 + const Syssched_getscheduler : scno = 145 + const Syssched_get_priority_max : scno = 146 + const Syssched_get_priority_min : scno = 147 + const Syssched_rr_get_interval : scno = 148 + const Sysmlock : scno = 149 + const Sysmunlock : scno = 150 + const Sysmlockall : scno = 151 + const Sysmunlockall : scno = 152 + const Sysvhangup : scno = 153 + const Sysmodify_ldt : scno = 154 + const Syspivot_root : scno = 155 + const Sys_sysctl : scno = 156 + const Sysprctl : scno = 157 + const Sysarch_prctl : scno = 158 + const Sysadjtimex : scno = 159 + const Syssetrlimit : scno = 160 + const Syschroot : scno = 161 + const Syssync : scno = 162 + const Sysacct : scno = 163 + const Syssettimeofday : scno = 164 + const Sysmount : scno = 165 + const Sysumount2 : scno = 166 + const Sysswapon : scno = 167 + const Sysswapoff : scno = 168 + const Sysreboot : scno = 169 + const Syssethostname : scno = 170 + const Syssetdomainname : scno = 171 + const Sysiopl : scno = 172 + const Sysioperm : scno = 173 + const Syscreate_module : scno = 174 + const Sysinit_module : scno = 175 + const Sysdelete_module : scno = 176 + const Sysget_kernel_syms : scno = 177 + const Sysquery_module : scno = 178 + const Sysquotactl : scno = 179 + const Sysnfsservctl : scno = 180 + const Sysgetpmsg : scno = 181 + const Sysputpmsg : scno = 182 + const Sysafs_syscall : scno = 183 + const Systuxcall : scno = 184 + const Syssecurity : scno = 185 + const Sysgettid : scno = 186 + const Sysreadahead : scno = 187 + const Syssetxattr : scno = 188 + const Syslsetxattr : scno = 189 + const Sysfsetxattr : scno = 190 + const Sysgetxattr : scno = 191 + const Syslgetxattr : scno = 192 + const Sysfgetxattr : scno = 193 + const Syslistxattr : scno = 194 + const Sysllistxattr : scno = 195 + const Sysflistxattr : scno = 196 + const Sysremovexattr : scno = 197 + const Syslremovexattr : scno = 198 + const Sysfremovexattr : scno = 199 + const Systkill : scno = 200 + const Systime : scno = 201 + const Sysfutex : scno = 202 + const Syssched_setaffinity : scno = 203 + const Syssched_getaffinity : scno = 204 + const Sysset_thread_area : scno = 205 + const Sysio_setup : scno = 206 + const Sysio_destroy : scno = 207 + const Sysio_getevents : scno = 208 + const Sysio_submit : scno = 209 + const Sysio_cancel : scno = 210 + const Sysget_thread_area : scno = 211 + const Syslookup_dcookie : scno = 212 + const Sysepoll_create : scno = 213 + const Sysepoll_ctl_old : scno = 214 + const Sysepoll_wait_old : scno = 215 + const Sysremap_file_pages : scno = 216 + const Sysgetdents64 : scno = 217 + const Sysset_tid_address : scno = 218 + const Sysrestart_syscall : scno = 219 + const Syssemtimedop : scno = 220 + const Sysfadvise64 : scno = 221 + const Systimer_create : scno = 222 + const Systimer_settime : scno = 223 + const Systimer_gettime : scno = 224 + const Systimer_getoverrun : scno = 225 + const Systimer_delete : scno = 226 + const Sysclock_settime : scno = 227 + const Sysclock_gettime : scno = 228 + const Sysclock_getres : scno = 229 + const Sysclock_nanosleep : scno = 230 + const Sysexit_group : scno = 231 + const Sysepoll_wait : scno = 232 + const Sysepoll_ctl : scno = 233 + const Systgkill : scno = 234 + const Sysutimes : scno = 235 + const Sysvserver : scno = 236 + const Sysmbind : scno = 237 + const Sysset_mempolicy : scno = 238 + const Sysget_mempolicy : scno = 239 + const Sysmq_open : scno = 240 + const Sysmq_unlink : scno = 241 + const Sysmq_timedsend : scno = 242 + const Sysmq_timedreceive : scno = 243 + const Sysmq_notify : scno = 244 + const Sysmq_getsetattr : scno = 245 + const Syskexec_load : scno = 246 + const Syswaitid : scno = 247 + const Sysadd_key : scno = 248 + const Sysrequest_key : scno = 249 + const Syskeyctl : scno = 250 + const Sysioprio_set : scno = 251 + const Sysioprio_get : scno = 252 + const Sysinotify_init : scno = 253 + const Sysinotify_add_watch : scno = 254 + const Sysinotify_rm_watch : scno = 255 + const Sysmigrate_pages : scno = 256 + const Sysopenat : scno = 257 + const Sysmkdirat : scno = 258 + const Sysmknodat : scno = 259 + const Sysfchownat : scno = 260 + const Sysfutimesat : scno = 261 + const Sysnewfstatat : scno = 262 + const Sysunlinkat : scno = 263 + const Sysrenameat : scno = 264 + const Syslinkat : scno = 265 + const Syssymlinkat : scno = 266 + const Sysreadlinkat : scno = 267 + const Sysfchmodat : scno = 268 + const Sysfaccessat : scno = 269 + const Syspselect6 : scno = 270 + const Sysppoll : scno = 271 + const Sysunshare : scno = 272 + const Sysset_robust_list : scno = 273 + const Sysget_robust_list : scno = 274 + const Syssplice : scno = 275 + const Systee : scno = 276 + const Syssync_file_range : scno = 277 + const Sysvmsplice : scno = 278 + const Sysmove_pages : scno = 279 + const Sysutimensat : scno = 280 + const Sysepoll_pwait : scno = 281 + const Syssignalfd : scno = 282 + const Systimerfd_create : scno = 283 + const Syseventfd : scno = 284 + const Sysfallocate : scno = 285 + const Systimerfd_settime : scno = 286 + const Systimerfd_gettime : scno = 287 + const Sysaccept4 : scno = 288 + const Syssignalfd4 : scno = 289 + const Syseventfd2 : scno = 290 + const Sysepoll_create1 : scno = 291 + const Sysdup3 : scno = 292 + const Syspipe2 : scno = 293 + const Sysinotify_init1 : scno = 294 + const Syspreadv : scno = 295 + const Syspwritev : scno = 296 + const Sysrt_tgsigqueueinfo : scno = 297 + const Sysperf_event_open : scno = 298 + const Sysrecvmmsg : scno = 299 + const Sysfanotify_init : scno = 300 + const Sysfanotify_mark : scno = 301 + const Sysprlimit64 : scno = 302 + const Sysname_to_handle_at : scno = 303 + const Sysopen_by_handle_at : scno = 304 + const Sysclock_adjtime : scno = 305 + const Syssyncfs : scno = 306 + const Syssendmmsg : scno = 307 + const Syssetns : scno = 308 + const Sysgetcpu : scno = 309 + const Sysprocess_vm_readv : scno = 310 + const Sysprocess_vm_writev : scno = 311 + + /* network protocols */ + const Ipproto_ip : sockproto = 0 + const Ipproto_icmp : sockproto = 1 + const Ipproto_tcp : sockproto = 6 + const Ipproto_udp : sockproto = 17 + + /* getting to the os */ + extern const syscall : (sc:scno, args:... -> int64) + + /* process management */ + const exit : (status:int -> void) + const getpid : ( -> int64) + const kill : (pid:int64, sig:int64 -> int64) + const fork : (-> int64) + const wait4 : (pid:int64, loc:int32#, opt : int64, usage:rusage# -> int64) + const waitpid : (pid:int64, loc:int32#, opt : int64 -> int64) + const execv : (cmd : byte[:], args : byte[:][:] -> int64) + const execve : (cmd : byte[:], args : byte[:][:], env : byte[:][:] -> int64) + + + /* fd manipulation */ + const open : (path:byte[:], opts:fdopt -> fd) + const openmode : (path:byte[:], opts:fdopt, mode:int64 -> fd) + const close : (fd:fd -> int64) + const creat : (path:byte[:], mode:int64 -> fd) + const read : (fd:fd, buf:byte[:] -> size) + const write : (fd:fd, buf:byte[:] -> size) + const lseek : (fd:fd, off:uint64, whence:int64 -> int64) + const stat : (path:byte[:], sb:statbuf# -> int64) + const fstat : (fd:fd, sb:statbuf# -> int64) + const mkdir : (path : byte[:], mode : int64 -> int64) + const ioctl : (fd:fd, req : int64, args:... -> int64) + + /* networking */ + const socket : (dom : sockfam, stype : socktype, proto : sockproto -> fd) + const connect : (sock : fd, addr : sockaddr#, len : size -> int) + const accept : (sock : fd, addr : sockaddr#, len : size# -> fd) + const listen : (sock : fd, backlog : int -> int) + const bind : (sock : fd, addr : sockaddr#, len : size -> int) + + /* memory mapping */ + const munmap : (addr:byte#, len:size -> int64) + const mmap : (addr:byte#, len:size, prot:mprot, flags:mopt, fd:fd, off:off -> byte#) + + /* time */ + const clock_getres : (clk : clock, ts : timespec# -> int32) + const clock_gettime : (clk : clock, ts : timespec# -> int32) + const clock_settime : (clk : clock, ts : timespec# -> int32) + const sleep : (time : uint64 -> int32) + const nanosleep : (req : timespec#, rem : timespec# -> int32) + + /* system information */ + const uname : (buf : utsname# -> int) +;; + +extern const cstring : (str : byte[:] -> byte#) +extern const alloca : (sz : size -> byte#) +extern const __cenvp : byte## + +/* process management */ +const exit = {status; syscall(Sysexit, status castto(int64))} +const getpid = {; -> syscall(Sysgetpid, 1)} +const kill = {pid, sig; -> syscall(Syskill, pid, sig)} +const fork = {; -> syscall(Sysfork)} +const wait4 = {pid, loc, opt, usage; -> syscall(Syswait4, pid, loc, opt, usage)} +const waitpid = {pid, loc, opt; + var rusage + -> wait4(pid, loc, opt, &rusage) +} + +const execv = {cmd, args + var p, cargs, i + + /* of course we fucking have to duplicate this code everywhere, + * since we want to stack allocate... */ + p = alloca((args.len + 1)*sizeof(byte#)) + cargs = (p castto(byte##))[:args.len] + for i = 0; i < args.len; i++ + cargs[i] = cstring(args[i]) + ;; + cargs[args.len] = 0 castto(byte#) + -> syscall(Sysexecve, cstring(cmd), p, __cenvp) +} + +const execve = {cmd, args, env + var cargs, cenv, i + var p + + /* copy the args */ + p = alloca((args.len + 1)*sizeof(byte#)) + cargs = (p castto(byte##))[:args.len] + for i = 0; i < args.len; i++ + cargs[i] = cstring(args[i]) + ;; + cargs[args.len] = 0 castto(byte#) + + /* + copy the env. + of course we fucking have to duplicate this code everywhere, + since we want to stack allocate... + */ + p = alloca((env.len + 1)*sizeof(byte#)) + cenv = (p castto(byte##))[:env.len] + for i = 0; i < env.len; i++ + cenv[i] = cstring(env[i]) + ;; + cenv[env.len] = 0 castto(byte#) + + -> syscall(Sysexecve, cstring(cmd), p, cenv) +} + +/* fd manipulation */ +const open = {path, opts; -> syscall(Sysopen, cstring(path), opts, 0o777) castto(fd)} +const openmode = {path, opts, mode; -> syscall(Sysopen, cstring(path), opts, mode) castto(fd)} +const close = {fd; -> syscall(Sysclose, fd)} +const creat = {path, mode; -> syscall(Syscreat, cstring(path), mode) castto(fd)} +const read = {fd, buf; -> syscall(Sysread, fd, buf castto(byte#), buf.len castto(size)) castto(size)} +const write = {fd, buf; -> syscall(Syswrite, fd, buf castto(byte#), buf.len castto(size)) castto(size)} +const lseek = {fd, off, whence; -> syscall(Syslseek, fd, off, whence)} +const stat = {path, sb; -> syscall(Sysstat, cstring(path), sb)} +const fstat = {fd, sb; -> syscall(Sysfstat, fd, sb)} +const mkdir = {path, mode; -> syscall(Sysmkdir, cstring(path), mode) castto(int64)} +const ioctl = {fd, req, args + var arg : byte# + var ap + + ap = vastart(&args) + (arg, ap) = vanext(ap) + -> syscall(Sysioctl, fd, req, arg) castto(int64) +} + +/* networking */ +const socket = {dom, stype, proto; -> syscall(Syssocket, dom castto(int64), stype, proto) castto(fd)} +const connect = {sock, addr, len; -> syscall(Sysconnect, sock, addr, len) castto(int)} +const bind = {sock, addr, len; -> syscall(Sysbind, sock, addr, len) castto(int)} +const listen = {sock, backlog; -> syscall(Syslisten, sock, backlog castto(int64)) castto(int)} +const accept = {sock, addr, lenp; -> syscall(Sysaccept, sock, addr, lenp) castto(fd)} + +/* memory mapping */ +const munmap = {addr, len; -> syscall(Sysmunmap, addr, len)} +const mmap = {addr, len, prot, flags, fd, off; -> syscall(Sysmmap, addr, len, prot, flags, fd, off) castto(byte#)} + +/* time */ +const clock_getres = {clk, ts; -> syscall(Sysclock_getres, clockid(clk), ts) castto(int32)} +const clock_gettime = {clk, ts; -> syscall(Sysclock_gettime, clockid(clk), ts) castto(int32)} +const clock_settime = {clk, ts; -> syscall(Sysclock_settime, clockid(clk), ts) castto(int32)} + +const sleep = {time + var req, rem + req = [.sec = time, .nsec = 0] + -> nanosleep(&req, &rem) +} + +const nanosleep = {req, rem; + -> syscall(Sysnanosleep, req, rem) castto(int32) +} + +/* system information */ +const uname = {buf; -> syscall(Sysuname, buf) castto(int)} + +const clockid = {clk + match clk + | `Clockrealtime: -> 0 + | `Clockmonotonic: -> 1 + | `Clockproccpu: -> 2 + | `Clockthreadcpu: -> 3 + | `Clockmonotonicraw: -> 4 + | `Clockrealtimecoarse: -> 5 + | `Clockmonotoniccoarse:-> 6 + | `Clockboottime: -> 7 + | `Clockrealtimealarm: -> 8 + | `Clockboottimealarm: -> 9 + ;; + -> -1 +} + diff --git a/libstd/sys-osx.myr b/libstd/sys-osx.myr new file mode 100644 index 0000000..b797b40 --- /dev/null +++ b/libstd/sys-osx.myr @@ -0,0 +1,750 @@ +use "types.use" +use "varargs.use" + +pkg std = + type scno = int64 /* syscall */ + type fdopt = int64 /* fd options */ + type fd = int64 /* fd */ + type mprot = int64 /* memory protection */ + type mopt = int64 /* memory mapping options */ + type socktype = int64 /* socket type */ + type sockproto = int64 /* socket protocol */ + type sockfam = uint8 /* socket family */ + + type timespec = struct + sec : uint64 + nsec : uint32 + ;; + + type timeval = struct + sec : uint64 + usec : uint32 + ;; + + type timezone = struct + minwest : int32 /* of greenwich */ + dsttime : int32 /* nonzero if DST applies */ + ;; + + type clock = union + `Clockrealtime + `Clockmonotonic + ;; + + type statbuf = struct + dev : int32 + mode : uint16 + nlink : uint32 + ino : uint64 /* 32/64? which do I use? */ + uid : uint32 + gid : uint32 + rdev : int32 + atime : timespec + atimens : timespec + mtime : timespec + mtimens : timespec + ctime : timespec + ctimens : timespec + btime : timespec + btimens : timespec + size : off + blocks : uint + blocksz : uint + flags : uint32 + gen : uint32 + lspare : int32 + qspare0 : int64 + qspare1 : int64 + ;; + + type rusage = struct + utime : timeval /* user time */ + stime : timeval /* system time */ + _opaque : uint64[14] /* padding (darwin-specific data) */ + ;; + + type utsname = struct + system : byte[256] + node : byte[256] + release : byte[256] + version : byte[256] + machine : byte[256] + ;; + + type sockaddr = struct + len : byte + fam : sockfam + data : byte[14] /* what is the *actual* length? */ + ;; + + type sockaddr_in = struct + len : byte + fam : sockfam + port : uint16 + addr : byte[4] + zero : byte[8] + ;; + + type sockaddr_storage = struct + len : byte + fam : sockfam + __pad1 : byte[6] + __align : uint64 + __pad2 : byte[112] + ;; + + type dirent = struct + ino : uint64 + seekoff : uint64 /* seek offset (optional, used by servers) */ + reclen : uint16 /* length of this record */ + namlen : uint16 /* length of string in d_name */ + typeid : uint8 /* file type, see below */ + name : byte[1024] + ;; + + /* open options */ + const Ordonly : fdopt = 0x0 + const Owronly : fdopt = 0x1 + const Ordwr : fdopt = 0x2 + const Ondelay : fdopt = 0x4 + const Oappend : fdopt = 0x8 + const Ocreat : fdopt = 0x200 + const Onofollow : fdopt = 0x100 + const Otrunc : fdopt = 0x400 + const Odir : fdopt = 0x100000 + + /* mmap protection */ + const Mprotnone : mprot = 0x0 + const Mprotrd : mprot = 0x1 + const Mprotwr : mprot = 0x2 + const Mprotexec : mprot = 0x4 + const Mprotrw : mprot = 0x3 + + /* mmap options */ + const Mshared : mopt = 0x1 + const Mpriv : mopt = 0x2 + const Mfixed : mopt = 0x10 + const Mfile : mopt = 0x0 + const Manon : mopt = 0x1000 + /* Only on Linux + const M32bit : mopt = 0x40 + */ + + /* socket families. INCOMPLETE. */ + const Afunspec : sockfam = 0 + const Afunix : sockfam = 1 + const Afinet : sockfam = 2 + const Afinet6 : sockfam = 30 + + /* socket types. */ + const Sockstream : socktype = 1 + const Sockdgram : socktype = 2 + const Sockraw : socktype = 3 + const Sockrdm : socktype = 4 + const Sockseqpacket : socktype = 5 + + + /* return value for a failed mapping */ + const Mapbad : byte# = -1 castto(byte#) + + /* syscalls. + note, creat() implemented as open(path, Creat|Trunc|Wronly) */ + const Syssyscall : scno = 0x2000000 + const Sysexit : scno = 0x2000001 + const Sysfork : scno = 0x2000002 + const Sysread : scno = 0x2000003 + const Syswrite : scno = 0x2000004 + const Sysopen : scno = 0x2000005 + const Sysclose : scno = 0x2000006 + const Syswait4 : scno = 0x2000007 + const Syslink : scno = 0x2000009 + const Sysunlink : scno = 0x200000a + const Syschdir : scno = 0x200000c + const Sysfchdir : scno = 0x200000d + const Sysmknod : scno = 0x200000e + const Syschmod : scno = 0x200000f + const Syschown : scno = 0x2000010 + const Sysgetfsstat : scno = 0x2000012 + const Sysgetpid : scno = 0x2000014 + const Syssetuid : scno = 0x2000017 + const Sysgetuid : scno = 0x2000018 + const Sysgeteuid : scno = 0x2000019 + const Sysptrace : scno = 0x200001a + const Sysrecvmsg : scno = 0x200001b + const Syssendmsg : scno = 0x200001c + const Sysrecvfrom : scno = 0x200001d + const Sysaccept : scno = 0x200001e + const Sysgetpeername : scno = 0x200001f + const Sysgetsockname : scno = 0x2000020 + const Sysaccess : scno = 0x2000021 + const Syschflags : scno = 0x2000022 + const Sysfchflags : scno = 0x2000023 + const Syssync : scno = 0x2000024 + const Syskill : scno = 0x2000025 + const Sysgetppid : scno = 0x2000027 + const Sysdup : scno = 0x2000029 + const Syspipe : scno = 0x200002a + const Sysgetegid : scno = 0x200002b + const Sysprofil : scno = 0x200002c + const Syssigaction : scno = 0x200002e + const Sysgetgid : scno = 0x200002f + const Syssigprocmask : scno = 0x2000030 + const Sysgetlogin : scno = 0x2000031 + const Syssetlogin : scno = 0x2000032 + const Sysacct : scno = 0x2000033 + const Syssigpending : scno = 0x2000034 + const Syssigaltstack : scno = 0x2000035 + const Sysioctl : scno = 0x2000036 + const Sysreboot : scno = 0x2000037 + const Sysrevoke : scno = 0x2000038 + const Syssymlink : scno = 0x2000039 + const Sysreadlink : scno = 0x200003a + const Sysexecve : scno = 0x200003b + const Sysumask : scno = 0x200003c + const Syschroot : scno = 0x200003d + const Sysmsync : scno = 0x2000041 + const Sysvfork : scno = 0x2000042 + const Sysmunmap : scno = 0x2000049 + const Sysmprotect : scno = 0x200004a + const Sysmadvise : scno = 0x200004b + const Sysmincore : scno = 0x200004e + const Sysgetgroups : scno = 0x200004f + const Syssetgroups : scno = 0x2000050 + const Sysgetpgrp : scno = 0x2000051 + const Syssetpgid : scno = 0x2000052 + const Syssetitimer : scno = 0x2000053 + const Sysswapon : scno = 0x2000055 + const Sysgetitimer : scno = 0x2000056 + const Sysgetdtablesize : scno = 0x2000059 + const Sysdup2 : scno = 0x200005a + const Sysfcntl : scno = 0x200005c + const Sysselect : scno = 0x200005d + const Sysfsync : scno = 0x200005f + const Syssetpriority : scno = 0x2000060 + const Syssocket : scno = 0x2000061 + const Sysconnect : scno = 0x2000062 + const Sysgetpriority : scno = 0x2000064 + const Sysbind : scno = 0x2000068 + const Syssetsockopt : scno = 0x2000069 + const Syslisten : scno = 0x200006a + const Syssigsuspend : scno = 0x200006f + const Sysgettimeofday : scno = 0x2000074 + const Sysgetrusage : scno = 0x2000075 + const Sysgetsockopt : scno = 0x2000076 + const Sysreadv : scno = 0x2000078 + const Syswritev : scno = 0x2000079 + const Syssettimeofday : scno = 0x200007a + const Sysfchown : scno = 0x200007b + const Sysfchmod : scno = 0x200007c + const Syssetreuid : scno = 0x200007e + const Syssetregid : scno = 0x200007f + const Sysrename : scno = 0x2000080 + const Sysflock : scno = 0x2000083 + const Sysmkfifo : scno = 0x2000084 + const Syssendto : scno = 0x2000085 + const Sysshutdown : scno = 0x2000086 + const Syssocketpair : scno = 0x2000087 + const Sysmkdir : scno = 0x2000088 + const Sysrmdir : scno = 0x2000089 + const Sysutimes : scno = 0x200008a + const Sysfutimes : scno = 0x200008b + const Sysadjtime : scno = 0x200008c + const Sysgethostuuid : scno = 0x200008e + const Syssetsid : scno = 0x2000093 + const Sysgetpgid : scno = 0x2000097 + const Syssetprivexec : scno = 0x2000098 + const Syspread : scno = 0x2000099 + const Syspwrite : scno = 0x200009a + const Sysnfssvc : scno = 0x200009b + const Sysstatfs : scno = 0x200009d + const Sysfstatfs : scno = 0x200009e + const Sysunmount : scno = 0x200009f + const Sysgetfh : scno = 0x20000a1 + const Sysquotactl : scno = 0x20000a5 + const Sysmount : scno = 0x20000a7 + const Syscsops : scno = 0x20000a9 + const Syswaitid : scno = 0x20000ad + const Sysadd_profil : scno = 0x20000b0 + const Syskdebug_trace : scno = 0x20000b4 + const Syssetgid : scno = 0x20000b5 + const Syssetegid : scno = 0x20000b6 + const Sysseteuid : scno = 0x20000b7 + const Syssigreturn : scno = 0x20000b8 + const Syschud : scno = 0x20000b9 + const Sysfdatasync : scno = 0x20000bb + const Sysstat : scno = 0x20000bc + const Sysfstat : scno = 0x20000bd + const Syslstat : scno = 0x20000be + const Syspathconf : scno = 0x20000bf + const Sysfpathconf : scno = 0x20000c0 + const Sysgetrlimit : scno = 0x20000c2 + const Syssetrlimit : scno = 0x20000c3 + const Sysgetdirentries : scno = 0x20000c4 + const Sysmmap : scno = 0x20000c5 + const Syslseek : scno = 0x20000c7 + const Systruncate : scno = 0x20000c8 + const Sysftruncate : scno = 0x20000c9 + const Sys__sysctl : scno = 0x20000ca + const Sysmlock : scno = 0x20000cb + const Sysmunlock : scno = 0x20000cc + const Sysundelete : scno = 0x20000cd + const SysATsocket : scno = 0x20000ce + const SysATgetmsg : scno = 0x20000cf + const SysATputmsg : scno = 0x20000d0 + const SysATPsndreq : scno = 0x20000d1 + const SysATPsndrsp : scno = 0x20000d2 + const SysATPgetreq : scno = 0x20000d3 + const SysATPgetrsp : scno = 0x20000d4 + const Sysmkcomplex : scno = 0x20000d8 + const Sysstatv : scno = 0x20000d9 + const Syslstatv : scno = 0x20000da + const Sysfstatv : scno = 0x20000db + const Sysgetattrlist : scno = 0x20000dc + const Syssetattrlist : scno = 0x20000dd + const Sysgetdirentriesattr : scno = 0x20000de + const Sysexchangedata : scno = 0x20000df + const Syssearchfs : scno = 0x20000e1 + const Sysdelete : scno = 0x20000e2 + const Syscopyfile : scno = 0x20000e3 + const Sysfgetattrlist : scno = 0x20000e4 + const Sysfsetattrlist : scno = 0x20000e5 + const Syspoll : scno = 0x20000e6 + const Syswatchevent : scno = 0x20000e7 + const Syswaitevent : scno = 0x20000e8 + const Sysmodwatch : scno = 0x20000e9 + const Sysgetxattr : scno = 0x20000ea + const Sysfgetxattr : scno = 0x20000eb + const Syssetxattr : scno = 0x20000ec + const Sysfsetxattr : scno = 0x20000ed + const Sysremovexattr : scno = 0x20000ee + const Sysfremovexattr : scno = 0x20000ef + const Syslistxattr : scno = 0x20000f0 + const Sysflistxattr : scno = 0x20000f1 + const Sysfsctl : scno = 0x20000f2 + const Sysinitgroups : scno = 0x20000f3 + const Sysposix_spawn : scno = 0x20000f4 + const Sysffsctl : scno = 0x20000f5 + const Sysnfsclnt : scno = 0x20000f7 + const Sysfhopen : scno = 0x20000f8 + const Sysminherit : scno = 0x20000fa + const Syssemsys : scno = 0x20000fb + const Sysmsgsys : scno = 0x20000fc + const Sysshmsys : scno = 0x20000fd + const Syssemctl : scno = 0x20000fe + const Syssemget : scno = 0x20000ff + const Syssemop : scno = 0x2000100 + const Sysmsgctl : scno = 0x2000102 + const Sysmsgget : scno = 0x2000103 + const Sysmsgsnd : scno = 0x2000104 + const Sysmsgrcv : scno = 0x2000105 + const Sysshmat : scno = 0x2000106 + const Sysshmctl : scno = 0x2000107 + const Sysshmdt : scno = 0x2000108 + const Sysshmget : scno = 0x2000109 + const Sysshm_open : scno = 0x200010a + const Sysshm_unlink : scno = 0x200010b + const Syssem_open : scno = 0x200010c + const Syssem_close : scno = 0x200010d + const Syssem_unlink : scno = 0x200010e + const Syssem_wait : scno = 0x200010f + const Syssem_trywait : scno = 0x2000110 + const Syssem_post : scno = 0x2000111 + const Syssem_getvalue : scno = 0x2000112 + const Syssem_init : scno = 0x2000113 + const Syssem_destroy : scno = 0x2000114 + const Sysopen_extended : scno = 0x2000115 + const Sysumask_extended : scno = 0x2000116 + const Sysstat_extended : scno = 0x2000117 + const Syslstat_extended : scno = 0x2000118 + const Sysfstat_extended : scno = 0x2000119 + const Syschmod_extended : scno = 0x200011a + const Sysfchmod_extended : scno = 0x200011b + const Sysaccess_extended : scno = 0x200011c + const Syssettid : scno = 0x200011d + const Sysgettid : scno = 0x200011e + const Syssetsgroups : scno = 0x200011f + const Sysgetsgroups : scno = 0x2000120 + const Syssetwgroups : scno = 0x2000121 + const Sysgetwgroups : scno = 0x2000122 + const Sysmkfifo_extended : scno = 0x2000123 + const Sysmkdir_extended : scno = 0x2000124 + const Sysidentitysvc : scno = 0x2000125 + const Sysshared_region_check_np : scno = 0x2000126 + const Sysshared_region_map_np : scno = 0x2000127 + const Sysvm_pressure_monitor : scno = 0x2000128 + const Syspsynch_rw_longrdlock : scno = 0x2000129 + const Syspsynch_rw_yieldwrlock : scno = 0x200012a + const Syspsynch_rw_downgrade : scno = 0x200012b + const Syspsynch_rw_upgrade : scno = 0x200012c + const Syspsynch_mutexwait : scno = 0x200012d + const Syspsynch_mutexdrop : scno = 0x200012e + const Syspsynch_cvbroad : scno = 0x200012f + const Syspsynch_cvsignal : scno = 0x2000130 + const Syspsynch_cvwait : scno = 0x2000131 + const Syspsynch_rw_rdlock : scno = 0x2000132 + const Syspsynch_rw_wrlock : scno = 0x2000133 + const Syspsynch_rw_unlock : scno = 0x2000134 + const Syspsynch_rw_unlock2 : scno = 0x2000135 + const Sysgetsid : scno = 0x2000136 + const Syssettid_with_pid : scno = 0x2000137 + const Sysaio_fsync : scno = 0x2000139 + const Sysaio_return : scno = 0x200013a + const Sysaio_suspend : scno = 0x200013b + const Sysaio_cancel : scno = 0x200013c + const Sysaio_error : scno = 0x200013d + const Sysaio_read : scno = 0x200013e + const Sysaio_write : scno = 0x200013f + const Syslio_listio : scno = 0x2000140 + const Sysiopolicysys : scno = 0x2000142 + const Sysmlockall : scno = 0x2000144 + const Sysmunlockall : scno = 0x2000145 + const Sysissetugid : scno = 0x2000147 + const Sys__pthread_kill : scno = 0x2000148 + const Sys__pthread_sigmask : scno = 0x2000149 + const Sys__sigwait : scno = 0x200014a + const Sys__disable_threadsignal : scno = 0x200014b + const Sys__pthread_markcancel : scno = 0x200014c + const Sys__pthread_canceled : scno = 0x200014d + const Sys__semwait_signal : scno = 0x200014e + const Sysproc_info : scno = 0x2000150 + const Syssendfile : scno = 0x2000151 + const Sysstat64 : scno = 0x2000152 + const Sysfstat64 : scno = 0x2000153 + const Syslstat64 : scno = 0x2000154 + const Sysstat64_extended : scno = 0x2000155 + const Syslstat64_extended : scno = 0x2000156 + const Sysfstat64_extended : scno = 0x2000157 + const Sysgetdirentries64 : scno = 0x2000158 + const Sysstatfs64 : scno = 0x2000159 + const Sysfstatfs64 : scno = 0x200015a + const Sysgetfsstat64 : scno = 0x200015b + const Sys__pthread_chdir : scno = 0x200015c + const Sys__pthread_fchdir : scno = 0x200015d + const Sysaudit : scno = 0x200015e + const Sysauditon : scno = 0x200015f + const Sysgetauid : scno = 0x2000161 + const Syssetauid : scno = 0x2000162 + const Sysgetaudit : scno = 0x2000163 + const Syssetaudit : scno = 0x2000164 + const Sysgetaudit_addr : scno = 0x2000165 + const Syssetaudit_addr : scno = 0x2000166 + const Sysauditctl : scno = 0x2000167 + const Sysbsdthread_create : scno = 0x2000168 + const Sysbsdthread_terminate : scno = 0x2000169 + const Syskqueue : scno = 0x200016a + const Syskevent : scno = 0x200016b + const Syslchown : scno = 0x200016c + const Sysstack_snapshot : scno = 0x200016d + const Sysbsdthread_register : scno = 0x200016e + const Sysworkq_open : scno = 0x200016f + const Sysworkq_kernreturn : scno = 0x2000170 + const Syskevent64 : scno = 0x2000171 + const Sys__old_semwait_signal : scno = 0x2000172 + const Sys__old_semwait_signal_nocancel : scno = 0x2000173 + const Systhread_selfid : scno = 0x2000174 + const Sys__mac_execve : scno = 0x200017c + const Sys__mac_syscall : scno = 0x200017d + const Sys__mac_get_file : scno = 0x200017e + const Sys__mac_set_file : scno = 0x200017f + const Sys__mac_get_link : scno = 0x2000180 + const Sys__mac_set_link : scno = 0x2000181 + const Sys__mac_get_proc : scno = 0x2000182 + const Sys__mac_set_proc : scno = 0x2000183 + const Sys__mac_get_fd : scno = 0x2000184 + const Sys__mac_set_fd : scno = 0x2000185 + const Sys__mac_get_pid : scno = 0x2000186 + const Sys__mac_get_lcid : scno = 0x2000187 + const Sys__mac_get_lctx : scno = 0x2000188 + const Sys__mac_set_lctx : scno = 0x2000189 + const Syssetlcid : scno = 0x200018a + const Sysgetlcid : scno = 0x200018b + const Sysread_nocancel : scno = 0x200018c + const Syswrite_nocancel : scno = 0x200018d + const Sysopen_nocancel : scno = 0x200018e + const Sysclose_nocancel : scno = 0x200018f + const Syswait4_nocancel : scno = 0x2000190 + const Sysrecvmsg_nocancel : scno = 0x2000191 + const Syssendmsg_nocancel : scno = 0x2000192 + const Sysrecvfrom_nocancel : scno = 0x2000193 + const Sysaccept_nocancel : scno = 0x2000194 + const Sysmsync_nocancel : scno = 0x2000195 + const Sysfcntl_nocancel : scno = 0x2000196 + const Sysselect_nocancel : scno = 0x2000197 + const Sysfsync_nocancel : scno = 0x2000198 + const Sysconnect_nocancel : scno = 0x2000199 + const Syssigsuspend_nocancel : scno = 0x200019a + const Sysreadv_nocancel : scno = 0x200019b + const Syswritev_nocancel : scno = 0x200019c + const Syssendto_nocancel : scno = 0x200019d + const Syspread_nocancel : scno = 0x200019e + const Syspwrite_nocancel : scno = 0x200019f + const Syswaitid_nocancel : scno = 0x20001a0 + const Syspoll_nocancel : scno = 0x20001a1 + const Sysmsgsnd_nocancel : scno = 0x20001a2 + const Sysmsgrcv_nocancel : scno = 0x20001a3 + const Syssem_wait_nocancel : scno = 0x20001a4 + const Sysaio_suspend_nocancel : scno = 0x20001a5 + const Sys__sigwait_nocancel : scno = 0x20001a6 + const Sys__semwait_signal_nocancel : scno = 0x20001a7 + const Sys__mac_mount : scno = 0x20001a8 + const Sys__mac_get_mount : scno = 0x20001a9 + const Sys__mac_getfsstat : scno = 0x20001aa + const Sysfsgetpath : scno = 0x20001ab + const Sysaudit_session_self : scno = 0x20001ac + const Sysaudit_session_join : scno = 0x20001ad + const Syspid_suspend : scno = 0x20001ae + const Syspid_resume : scno = 0x20001af + const Sysfileport_makeport : scno = 0x20001b0 + const Sysfileport_makefd : scno = 0x20001b1 + + extern const syscall : (sc:scno, args:... -> int64) + + /* process control */ + const exit : (status:int -> void) + const getpid : ( -> int64) + const kill : (pid:int64, sig:int64 -> int64) + const fork : (-> int64) + const wait4 : (pid:int64, loc:int32#, opt : int64, usage:rusage# -> int64) + const waitpid : (pid:int64, loc:int32#, opt : int64 -> int64) + const execv : (cmd : byte[:], args : byte[:][:] -> int64) + const execve : (cmd : byte[:], args : byte[:][:], env : byte[:][:] -> int64) + + /* fd manipulation */ + const open : (path:byte[:], opts:fdopt -> fd) + const openmode : (path:byte[:], opts:fdopt, mode:int64 -> fd) + const close : (fd:fd -> int64) + const creat : (path:byte[:], mode:int64 -> fd) + const read : (fd:fd, buf:byte[:] -> size) + const write : (fd:fd, buf:byte[:] -> size) + const lseek : (fd:fd, off:uint64, whence:int64 -> int64) + const stat : (path:byte[:], sb:statbuf# -> int64) + const fstat : (fd:fd, sb:statbuf# -> int64) + const mkdir : (path : byte[:], mode : int64 -> int64) + const ioctl : (fd:fd, req : int64, args:... -> int64) + const getdirentries64 : (fd : fd, buf : byte[:], basep : uint64# -> int64) + + /* networking */ + const socket : (dom : sockfam, stype : socktype, proto : sockproto -> fd) + const connect : (sock : fd, addr : sockaddr#, len : size -> int) + const accept : (sock : fd, addr : sockaddr#, len : size# -> fd) + const listen : (sock : fd, backlog : int -> int) + const bind : (sock : fd, addr : sockaddr#, len : size -> int) + + /* memory mapping */ + const munmap : (addr:byte#, len:size -> int64) + const mmap : (addr:byte#, len:size, prot:mprot, flags:mopt, fd:fd, off:off -> byte#) + + /* time */ + const gettimeofday : (tv : timeval#, tz : timezone# -> int) + const settimeofday : (tv : timeval#, tz : timezone# -> int) + /* faked with gettimeofday */ + const clock_getres : (clk : clock, ts : timespec# -> int) + const clock_gettime : (clk : clock, ts : timespec# -> int) + const clock_settime : (clk : clock, ts : timespec# -> int) + + /* system information */ + const uname : (buf : utsname# -> int) + const sysctl : (mib : int[:], old : byte[:]#, new : byte[:] -> int) +;; + +extern const __osx_fork : (->int64) +extern const cstring : (str : byte[:] -> byte#) +extern const alloca : (sz : size -> byte#) +extern const __cenvp : byte## + +/* process control */ +const exit = {status; syscall(Sysexit, status castto(int64))} +const getpid = {; -> syscall(Sysgetpid, 1)} +const kill = {pid, sig; -> syscall(Syskill, pid, sig)} +const fork = {; -> __osx_fork()} +const wait4 = {pid, loc, opt, usage; -> syscall(Syswait4, pid, loc, opt, usage)} +const waitpid = {pid, loc, opt; + -> wait4(pid, loc, opt, 0 castto(rusage#)) +} + +const execv = {cmd, args + var p, cargs, i + + /* doesn't just call execve() for efficiency's sake. */ + p = alloca((args.len + 1)*sizeof(byte#)) + cargs = (p castto(byte##))[:args.len] + for i = 0; i < args.len; i++ + cargs[i] = cstring(args[i]) + ;; + cargs[args.len] = 0 castto(byte#) + -> syscall(Sysexecve, cstring(cmd), p, __cenvp) +} + +const execve = {cmd, args, env + var cargs, cenv, i + var p + + /* copy the args */ + p = alloca((args.len + 1)*sizeof(byte#)) + cargs = (p castto(byte##))[:args.len] + for i = 0; i < args.len; i++ + cargs[i] = cstring(args[i]) + ;; + cargs[args.len] = 0 castto(byte#) + + /* + copy the env. + of course we fucking have to duplicate this code everywhere, + since we want to stack allocate... + */ + p = alloca((env.len + 1)*sizeof(byte#)) + cenv = (p castto(byte##))[:env.len] + for i = 0; i < env.len; i++ + cenv[i] = cstring(env[i]) + ;; + cenv[env.len] = 0 castto(byte#) + + -> syscall(Sysexecve, cstring(cmd), p, cenv) +} + + +/* fd manipulation */ +const open = {path, opts; -> syscall(Sysopen, cstring(path), opts, 0o777) castto(fd)} +const openmode = {path, opts, mode; -> syscall(Sysopen, cstring(path), opts, mode) castto(fd)} +const close = {fd; -> syscall(Sysclose, fd)} +const creat = {path, mode; -> openmode(path, Ocreat | Otrunc | Owronly, mode) castto(fd)} +const read = {fd, buf; -> syscall(Sysread, fd, buf castto(byte#), buf.len castto(size)) castto(size)} +const write = {fd, buf; -> syscall(Syswrite, fd, buf castto(byte#), buf.len castto(size)) castto(size)} +const lseek = {fd, off, whence; -> syscall(Syslseek, fd, off, whence)} +const stat = {path, sb; -> syscall(Sysstat, cstring(path), sb)} +const fstat = {fd, sb; -> syscall(Sysfstat, fd, sb)} +const mkdir = {path, mode; -> syscall(Sysmkdir, cstring(path), mode) castto(int64)} +const ioctl = {fd, req, args + var arg : byte# + var ap + + ap = vastart(&args) + (arg, ap) = vanext(ap) + -> syscall(Sysioctl, fd, req, arg) castto(int64) +} +const getdirentries64 = {fd, buf, basep; -> syscall(Sysgetdirentries64, fd, buf castto(byte#), buf.len castto(size), basep)} + +/* networking */ +const socket = {dom, stype, proto; -> syscall(Syssocket, dom castto(int64), stype, proto) castto(fd) } +const connect = {sock, addr, len; -> syscall(Sysconnect, sock, addr, len) castto(int)} +const accept = {sock, addr, len; -> syscall(Sysaccept, sock, addr, len) castto(fd)} +const listen = {sock, backlog; -> syscall(Syslisten, sock, backlog castto(int64)) castto(int)} +const bind = {sock, addr, len; -> syscall(Sysbind, sock, addr, len) castto(int)} + +/* memory management */ +const munmap = {addr, len; -> syscall(Sysmunmap, addr, len)} +const mmap = {addr, len, prot, flags, fd, off; -> syscall(Sysmmap, addr, len, prot, flags, fd, off) castto(byte#)} + +/* time */ +const gettimeofday = {tv, tz; -> syscall(Sysgettimeofday, tv, tz) castto(int)} +const settimeofday = {tv, tz; -> syscall(Syssettimeofday, tv, tz) castto(int)} + +/* faked with gettimeofday */ +const clock_getres = {clk, ts + ts.sec = 0 + ts.nsec = 1000*10 /* 10ms is reasonable resolution */ + -> 0 +} + +const clock_gettime = {clk, ts + var tv + var ret + + ret = gettimeofday(&tv, 0 castto(timezone#)) + ts.sec = tv.sec + ts.nsec = tv.usec * 1000 + -> ret +} + +const clock_settime = {clk, ts + var tv + + tv.sec = ts.sec + tv.usec = ts.nsec / 1000 + -> settimeofday(&tv, 0 castto(timezone#)) +} + +/* system information */ +const uname = {buf; + buf.system[0] = 'D' castto(byte) + buf.system[1] = 'a' castto(byte) + buf.system[2] = 'r' castto(byte) + buf.system[3] = 'w' castto(byte) + buf.system[4] = 'i' castto(byte) + buf.system[5] = 'n' castto(byte) + buf.system[6] = 0 + -> 0 + /* + FIXME: THIS IS BROKEN. Miscompiled? DEBUG IT. + var mib : int[2] + var ret + var sys + var nod + var rel + var ver + var mach + + ret = 0 + mib[0] = 1 /* CTL_KERN */ + mib[1] = 1 /* KERN_OSTYPE */ + sys = buf.system[:] + if sysctl(mib[:], &sys, [][:]) < 0 + ret = -1 + ;; + std.put("%s\n", sys) + + mib[0] = 1 /* CTL_KERN */ + mib[1] = 10 /* KERN_HOSTNAME */ + nod = buf.node[:] + if sysctl(mib[:], &nod, [][:]) < 0 + ret = -1 + ;; + + mib[0] = 1 /* CTL_KERN */ + mib[1] = 2 /* KERN_OSRELEASE */ + rel = buf.release[:] + if sysctl(mib[:], &rel, [][:]) < 0 + ret = -1 + ;; + + mib[0] = 1 /* CTL_KERN */ + mib[1] = 4 /* KERN_VERSION */ + ver = buf.version[:] + if sysctl(mib[:], &ver, [][:]) < 0 + ret = -1 + ;; + + mib[0] = 6 /* CTL_HW */ + mib[1] = 1 /* HW_MACHINE */ + mach = buf.machine[:] + if sysctl(mib[:], &mach, [][:]) < 0 + ret = -1 + ;; + + -> ret + */ +} + +const sysctl = {mib, old, new + var mibp + var mibsz + var o + var oldp + var oldsz + var newp + var newsz + var ret + + mibp = mib castto(byte#) + mibsz = mib.len castto(uint64) + o = old# + oldp = o castto(byte#) + oldsz = o.len castto(uint64) + newp = new castto(byte#) + newsz = new castto(uint64) + + ret = syscall(Sys__sysctl, mibp, mibsz, oldp, &oldsz, newp, newsz) castto(int) + + old# = o[:oldsz] + -> ret +} diff --git a/libstd/syscall-linux.s b/libstd/syscall-linux.s new file mode 100644 index 0000000..c810945 --- /dev/null +++ b/libstd/syscall-linux.s @@ -0,0 +1,39 @@ +.globl std$syscall +std$syscall: + pushq %rbp + pushq %rdi + pushq %rsi + pushq %rdx + pushq %r10 + pushq %r8 + pushq %r9 + pushq %rcx + pushq %r11 + /* + hack: We load 6 args regardless of + how many we actually have. This may + load junk values, but if the syscall + doesn't use them, it's going to be + harmless. + */ + movq 80 (%rsp),%rax + movq 88 (%rsp),%rdi + movq 96 (%rsp),%rsi + movq 104(%rsp),%rdx + movq 112(%rsp),%r10 + movq 120(%rsp),%r8 + movq 128(%rsp),%r9 + + syscall + + popq %r11 + popq %rcx + popq %r9 + popq %r8 + popq %r10 + popq %rdx + popq %rsi + popq %rdi + popq %rbp + ret + diff --git a/libstd/syscall-osx.s b/libstd/syscall-osx.s new file mode 100644 index 0000000..33aa160 --- /dev/null +++ b/libstd/syscall-osx.s @@ -0,0 +1,85 @@ +.globl _std$syscall +_std$syscall: + pushq %rbp + pushq %rdi + pushq %rsi + pushq %rdx + pushq %r10 + pushq %r8 + pushq %r9 + pushq %rcx + pushq %r11 + /* + hack: We load 6 args regardless of + how many we actually have. This may + load junk values, but if the syscall + doesn't use them, it's going to be + harmless. + */ + movq 80 (%rsp),%rax + movq 88 (%rsp),%rdi + movq 96 (%rsp),%rsi + movq 104(%rsp),%rdx + movq 112(%rsp),%r10 + movq 120(%rsp),%r8 + movq 128(%rsp),%r9 + + syscall + jae success + negq %rax + +success: + popq %r11 + popq %rcx + popq %r9 + popq %r8 + popq %r10 + popq %rdx + popq %rsi + popq %rdi + popq %rbp + ret + +/* + * OSX is dumb about fork, and needs an assembly wrapper. + * The fork() syscall, when called directly, returns the pid in both + * processes, which means that both parent and child think they're + * the parent. + * + * checking this involves peeking in %edx, so we need to do this in asm. + */ +.globl _std$__osx_fork +_std$__osx_fork: + pushq %rbp + pushq %rdi + pushq %rsi + pushq %rdx + pushq %r10 + pushq %r8 + pushq %r9 + pushq %rcx + pushq %r11 + + movq $0x2000002,%rax + syscall + + jae forksuccess + negq %rax + +forksuccess: + testl %edx,%edx + jz isparent + xorq %rax,%rax +isparent: + + popq %r11 + popq %rcx + popq %r9 + popq %r8 + popq %r10 + popq %rdx + popq %rsi + popq %rdi + popq %rbp + ret + diff --git a/libstd/test.myr b/libstd/test.myr new file mode 100644 index 0000000..2d545f1 --- /dev/null +++ b/libstd/test.myr @@ -0,0 +1,130 @@ +use std + +const ntstr = {s + var n + + n = 0 + while s[n] != 0 && n < s.len + n++ + ;; + -> s[:n] +} + +const main = {args : byte[:][:] + var x : byte#[1024] + var sz + var i + var ctx + var o + var a + var buf + + std.put("The time is %l seconds past the epoch\n", std.now()/1000); + std.uname(&buf) + std.put("And you are running on:\n") + std.put("\tsystem:\t\"%s\"\n", ntstr(buf.system[:])) + std.put("\tnode:\t\"%s\"\n", ntstr(buf.node[:])) + std.put("\trelease:\t\"%s\"\n", ntstr(buf.release[:])) + std.put("\tmachine:\t\"%s\"\n", ntstr(buf.machine[:])) + ctx = std.optinit("asdf:g?h", args) + std.put("arglen = %i\n", ctx.args.len) + while !std.optdone(ctx) + (o, a) = std.optnext(ctx) + if o == 'h' + usage() + ;; + std.put("option %c, arg = %s\n", o, a) + ;; + + std.put("env.len = %i\n", std._environment.len) + for i = 0; i < std._environment.len; i++ + std.put("env[%i] = %s\n", i, std._environment[i]) + ;; + std.put("args.len = %i\n", args.len) + for i = 0; i < args.len; i++ + std.put("args[%i] = %s\n", i, args[i]) + ;; + + for i = 0; i < ctx.args.len; i++ + std.put("arg %s\n", ctx.args[i]) + ;; + printenv("SHELL") + + + /* try the byte allocator for large variety of sizes. */ + for sz = 1; sz < 65536; sz *= 2 + for i = 0; i < 1024; i++ + x[i] = std.bytealloc(sz) + ;; + for i = 0; i < 1024; i++ + std.bytefree(x[i], sz) + ;; + ;; + + /* make sure the generic allocator works */ + for i = 0; i < 1024; i++ + x[i] = std.alloc() + ;; + for i = 0; i < 1024; i++ + std.free(x[i]) + ;; + std.write(1, "Hello, 世界\n") + chartypes() + testrng() + std.put("format output %i %i %s %s\n", 123, 321, "asdf", "מִלָּה") + std.put("format with no args\n") +} + +const chartypes = { + var s + var c + var buf : byte[32] + + s = " 1世界 äa\n" + while s.len != 0 + (c, s) = std.striter(s) + if std.isspace(c) + std.write(1, "Space\n") + elif std.isalpha(c) + std.write(1, "Alpha\n") + elif std.isnum(c) + std.write(1, "Num\n") + else + std.write(1, "Dunno\n") + ;; + if !std.encode(buf[:std.charlen(c)], c) + std.write(1, "couldn't encode\n") + ;; + std.write(1, buf[:std.charlen(c)]) + std.write(1, "\n") + ;; + if !std.encode(buf[0:3], -1) + std.write(1, "couldn't encode\n") + ;; +} + +const testrng = { + var r + var i + + r = std.mksrng(10) + for i = 0; i < 300; i++ + std.put("r[%i] = %l\n", i, std.rand(r, 5, 10) castto(int64)) + ;; + std.put("\n"); +} + +const usage = { + std.put("Pokes a bit at the standard library.\n") + std.put("Option string is asdf:g?h\n") + std.exit(0) +} + +const printenv = {name + match std.getenv(name) + | `std.Some env: + std.put("Value of %s is %s\n", name, env) + | `std.None: + std.put("No env var %s is set\n", name) + ;; +} diff --git a/libstd/try.myr b/libstd/try.myr new file mode 100644 index 0000000..1539a98 --- /dev/null +++ b/libstd/try.myr @@ -0,0 +1,13 @@ +use "option.use" +use "fmt.use" + +pkg std = + generic try : (v : option(@a) -> @a) +;; + +generic try = {v + match v + | `Some x: -> x + | `None: fatal(1, "expected `Some @a, got `None\n") + ;; +} diff --git a/libstd/types.myr b/libstd/types.myr new file mode 100644 index 0000000..c11cbd5 --- /dev/null +++ b/libstd/types.myr @@ -0,0 +1,7 @@ +pkg std = + type size = uint64 /* spans entire address space */ + type ssize = int64 /* signed size */ + type off = uint64 /* file offsets */ + type intptr = uint64 /* can hold any pointer losslessly */ + type time = int64 /* milliseconds since epoch */ +;; diff --git a/libstd/units.myr b/libstd/units.myr new file mode 100644 index 0000000..f1a99e9 --- /dev/null +++ b/libstd/units.myr @@ -0,0 +1,11 @@ +pkg std = + /* JEDEC 100B.1 memory sizes */ + generic KiB : @a::(integral,numeric) = 1024 + generic MiB : @a::(integral,numeric) = KiB*1024 + generic GiB : @a::(integral,numeric) = MiB*1024 + generic TiB : @a::(integral,numeric) = GiB*1024 + generic PiB : @a::(integral,numeric) = TiB*1024 + generic EiB : @a::(integral,numeric) = PiB*1024 + generic ZiB : @a::(integral,numeric) = EiB*1024 + generic YiB : @a::(integral,numeric) = ZiB*1024 +;; diff --git a/libstd/utf.myr b/libstd/utf.myr new file mode 100644 index 0000000..672ca05 --- /dev/null +++ b/libstd/utf.myr @@ -0,0 +1,104 @@ +use "die.use" +use "sys.use" +use "types.use" + +pkg std = + const Badchar : char = -1 castto(char) + const Maxcharlen : size = 4 + const Maxcharval : char = 0x10FFFF + + const charlen : (chr : char -> size) + const encode : (buf : byte[:], chr : char -> size) + const decode : (buf : byte[:] -> char) + const striter : (str : byte[:] -> [char, byte[:]]) +;; + +const charlen = {c + if c < 0x80 + -> 1 + elif c < 0x800 + -> 2 + elif c < 0x10000 + -> 3 + elif c < 0x200000 + -> 4 + else + -> -1 + ;; +} + +const encode = {buf, c + var len + var mark + var i + + len = charlen(c) + if len < 0 || buf.len < len + -> -1 + ;; + + if (len == 1) + mark = 0 + else + mark = (((1 << (8 - len)) - 1) ^ 0xff) castto(char) + ;; + + for i = len - 1; i > 0; i-- + buf[i] = (c & 0x3f | 0x80) castto(byte) + c >>= 6 + ;; + + buf[0] = (c | mark) castto(byte) + -> len +} + +const decode = {buf + var c + var b + + (c, b) = striter(buf) + -> c +} + +const striter = {str + var len + var mask + var chr + var i + var c + var tmp + + if !str.len + /* empty string: no resync needed */ + -> (Badchar, str) + ;; + c = str[0] + len = 0 + if c & 0x80 == 0 /* 0b0xxx_xxxx */ + len = 1 + elif c & 0xe0 == 0xc0 /* 0b110x_xxxx */ + len = 2 + elif c & 0xf0 == 0xe0 /* 0b1110_xxxx */ + len = 3 + elif c & 0xf8 == 0xf0 /* 0b1111_0xxx */ + len = 4 + else + /* skip one char forward so we can try + resyncing the character stream */ + -> (Badchar, str[1:]) + ;; + + if len == 0 || len > str.len + /* again, we want to try to resync */ + -> (Badchar, str[1:]) + ;; + + mask = (1 << (8 - len)) - 1 + chr = (c castto(uint32)) & mask + for i = 1; i < len; i++ + tmp = str[i] castto(uint32) + chr = (chr << 6) | (tmp & 0x3f) + ;; + + -> (chr castto(char), str[len:]) +} diff --git a/libstd/util.s b/libstd/util.s new file mode 100644 index 0000000..630cff8 --- /dev/null +++ b/libstd/util.s @@ -0,0 +1,47 @@ +/* + * Allocates a C string on the stack, for + * use within system calls, which is the only + * place the Myrddin stack should need nul-terminated + * strings. + * + * This is in assembly, because for efficiency we + * allocate the C strings on the stack, and don't adjust + * %rsp when returning. + */ +.globl std$cstring +.globl _std$cstring +_std$cstring: +std$cstring: + movq (%rsp),%r15 /* ret addr */ + movq 8(%rsp),%rsi /* src */ + movq 16(%rsp),%rcx /* len */ + + subq %rcx,%rsp /* get stack */ + movq %rsp,%rdi /* dest */ + movq %rsp,%rax /* ret val */ + subq $16,%rsp /* "unpop" the args */ + subq $1,%rsp /* nul */ + andq $(~15),%rsp /* align */ + + cld + rep movsb + movb $0,(%rdi) /* terminate */ + + pushq %r15 /* ret addr */ + ret + +.globl std$alloca +.globl _std$alloca +_std$alloca: +std$alloca: + movq (%rsp),%r15 /* ret addr */ + movq 8(%rsp),%rbx /* len */ + + /* get stack space */ + subq %rbx,%rsp /* get stack space */ + movq %rsp,%rax /* top of stack (return value) */ + subq $16,%rsp /* "unpop" the args for return */ + andq $(~15),%rsp /* align */ + + pushq %r15 /* ret addr */ + ret diff --git a/libstd/varargs.myr b/libstd/varargs.myr new file mode 100644 index 0000000..f7b7ffb --- /dev/null +++ b/libstd/varargs.myr @@ -0,0 +1,52 @@ +use "types.use" + +pkg std = + type valist + + const vastart : (args : ...# -> valist) + generic vanext : (ap : valist -> [@a, valist]) +;; + +type valist = byte# + +/* + * a valist is really just a pointer to the varargs. + * we assume that these sit on the stack nicely, + * and don't need special handling to get to. + * + * This will be a problem when we switch to a + * register based convention. We might want to + * force varargs onto the stack regardless. + */ +const vastart = {args + -> args castto(valist) +} + +generic vanext = {ap -> [@a, valist] + var v : @a + var align + var p + + /* + Assumptions about the ABI: + * all types smaller than a word are + * aligned to their own size. Larger + * types are aligned to word size. + */ + if sizeof(@a) > 8 + align = 8 + else + align = sizeof(@a) + ;; + + /* apply the alignment to the arg pointer */ + p = ap castto(intptr) + p = (p + align - 1) & ~(align - 1) + ap = p castto(valist) + + v = (ap castto(@a#))# + + /* only move on after we read through the value */ + ap = ((p castto(intptr)) + sizeof(@a)) castto(valist) + -> (v, ap) +} diff --git a/libstd/waitstatus-linux.myr b/libstd/waitstatus-linux.myr new file mode 100644 index 0000000..4068395 --- /dev/null +++ b/libstd/waitstatus-linux.myr @@ -0,0 +1,22 @@ +use "die.use" +pkg std = + type waitstatus = union + `Waitexit int32 + `Waitsig int32 + `Waitstop int32 + ;; + + const waitstatus : (st : int32 -> waitstatus) +;; + +const waitstatus = {st + if st & 0x7f == 0 /* if exited */ + -> `Waitexit ((st & 0xff00) >> 8) + elif ((st & 0xffff)-1) < 0xff /* if signaled */ + -> `Waitsig ((st) & 0x7f) + elif (((st & 0xffff)*0x10001)>>8) > 0x7f00 + -> `Waitstop ((st & 0xff00) >> 8) + ;; + die("unreachable") +} + diff --git a/libstd/waitstatus-osx.myr b/libstd/waitstatus-osx.myr new file mode 100644 index 0000000..15b8da7 --- /dev/null +++ b/libstd/waitstatus-osx.myr @@ -0,0 +1,19 @@ +use "die.use" +pkg std = + type waitstatus = union + `Waitexit int32 + `Waitsig int32 + `Waitstop int32 + ;; + + const waitstatus : (st : int32 -> waitstatus) +;; + +const waitstatus = {st + match st & 0o177 + | 0: -> `Waitexit (st >> 8) + | 0o177:-> `Waitstop (st >> 8) + | sig: -> `Waitsig sig + ;; + die("unreachable") +} diff --git a/mi/Makefile b/mi/Makefile new file mode 100644 index 0000000..b0ac3a6 --- /dev/null +++ b/mi/Makefile @@ -0,0 +1,8 @@ +LIB=libmi.a +OBJ=cfg.o \ + fold.o \ + df.o \ + +DEPS=../parse/libparse.a + +include ../mk/c.mk diff --git a/mi/cfg.c b/mi/cfg.c new file mode 100644 index 0000000..1e1497e --- /dev/null +++ b/mi/cfg.c @@ -0,0 +1,196 @@ +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <stdarg.h> +#include <ctype.h> +#include <string.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "parse.h" +#include "opt.h" + + +static Bb *mkbb(Cfg *cfg) +{ + Bb *bb; + + bb = zalloc(sizeof(Bb)); + bb->id = cfg->nextbbid++; + bb->pred = mkbs(); + bb->succ = mkbs(); + lappend(&cfg->bb, &cfg->nbb, bb); + return bb; +} + +static char *lblstr(Node *n) +{ + assert(exprop(n) == Olit); + assert(n->expr.args[0]->type == Nlit); + assert(n->expr.args[0]->lit.littype == Llbl); + return n->expr.args[0]->lit.lblval; +} + +static void label(Cfg *cfg, Node *lbl, Bb *bb) +{ + htput(cfg->lblmap, lblstr(lbl), bb); + lappend(&bb->lbls, &bb->nlbls, lblstr(lbl)); +} + +static int addnode(Cfg *cfg, Bb *bb, Node *n) +{ + switch (exprop(n)) { + case Ojmp: + case Ocjmp: + lappend(&bb->nl, &bb->nnl, n); + lappend(&cfg->fixjmp, &cfg->nfixjmp, n); + lappend(&cfg->fixblk, &cfg->nfixblk, bb); + return 1; + break; + default: + lappend(&bb->nl, &bb->nnl, n); + break; + } + return 0; +} + +static int islabel(Node *n) +{ + Node *l; + if (n->type != Nexpr) + return 0; + if (exprop(n) != Olit) + return 0; + l = n->expr.args[0]; + if (l->type != Nlit) + return 0; + if (l->lit.littype != Llbl) + return 0; + return 1; +} + +static Bb *addlabel(Cfg *cfg, Bb *bb, Node **nl, size_t i) +{ + /* if the current block assumes fall-through, insert an explicit jump */ + if (i > 0 && nl[i - 1]->type == Nexpr) { + if (exprop(nl[i - 1]) != Ocjmp && exprop(nl[i - 1]) != Ojmp) + addnode(cfg, bb, mkexpr(-1, Ojmp, mklbl(-1, lblstr(nl[i])), NULL)); + } + if (bb->nnl) + bb = mkbb(cfg); + label(cfg, nl[i], bb); + return bb; +} + +Cfg *mkcfg(Node **nl, size_t nn) +{ + Cfg *cfg; + Bb *pre, *post; + Bb *bb, *targ; + Node *a, *b; + size_t i; + + cfg = zalloc(sizeof(Cfg)); + cfg->lblmap = mkht(strhash, streq); + pre = mkbb(cfg); + bb = mkbb(cfg); + for (i = 0; i < nn; i++) { + switch (nl[i]->type) { + case Nexpr: + if (islabel(nl[i])) + bb = addlabel(cfg, bb, nl, i); + else if (addnode(cfg, bb, nl[i])) + bb = mkbb(cfg); + break; + break; + case Ndecl: + break; + default: + die("Invalid node type %s in mkcfg", nodestr(nl[i]->type)); + } + } + post = mkbb(cfg); + bsput(pre->succ, cfg->bb[1]->id); + bsput(cfg->bb[1]->pred, pre->id); + bsput(cfg->bb[cfg->nbb - 2]->succ, post->id); + bsput(post->pred, cfg->bb[cfg->nbb - 2]->id); + for (i = 0; i < cfg->nfixjmp; i++) { + bb = cfg->fixblk[i]; + switch (exprop(cfg->fixjmp[i])) { + case Ojmp: + a = cfg->fixjmp[i]->expr.args[0]; + b = NULL; + break; + case Ocjmp: + a = cfg->fixjmp[i]->expr.args[1]; + b = cfg->fixjmp[i]->expr.args[2]; + break; + default: + die("Bad jump fix thingy"); + break; + } + if (a) { + targ = htget(cfg->lblmap, lblstr(a)); + if (!targ) + die("No bb with label \"%s\"", lblstr(a)); + bsput(bb->succ, targ->id); + bsput(targ->pred, bb->id); + } + if (b) { + targ = htget(cfg->lblmap, lblstr(b)); + if (!targ) + die("No bb with label \"%s\"", lblstr(b)); + bsput(bb->succ, targ->id); + bsput(targ->pred, bb->id); + } + } + return cfg; +} + +void dumpcfg(Cfg *cfg, FILE *fd) +{ + size_t i, j; + Bb *bb; + char *sep; + + for (j = 0; j < cfg->nbb; j++) { + bb = cfg->bb[j]; + fprintf(fd, "\n"); + fprintf(fd, "Bb: %d labels=(", bb->id); + sep = ""; + for (i = 0; i < bb->nlbls; i++) {; + fprintf(fd, "%s%s", bb->lbls[i], sep); + sep = ","; + } + fprintf(fd, ")\n"); + + /* in edges */ + fprintf(fd, "Pred: "); + sep = ""; + for (i = 0; i < bsmax(bb->pred); i++) { + if (bshas(bb->pred, i)) { + fprintf(fd, "%s%zd", sep, i); + sep = ","; + } + } + fprintf(fd, "\n"); + + /* out edges */ + fprintf(fd, "Succ: "); + sep = ""; + for (i = 0; i < bsmax(bb->succ); i++) { + if (bshas(bb->succ, i)) { + fprintf(fd, "%s%zd", sep, i); + sep = ","; + } + } + fprintf(fd, "\n"); + + for (i = 0; i < bb->nnl; i++) + dump(bb->nl[i], fd); + fprintf(fd, "\n"); + } +} @@ -0,0 +1,40 @@ +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <stdarg.h> +#include <ctype.h> +#include <string.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "parse.h" +#include "opt.h" + +/* +static void nodeuse(Node *n, Bitset *bs) +{ +} + +static void nodedef(Node *n, Bitset *bs) +{ +} + +static void bbuse(Bb *bb, Bitset *bs) +{ +} + +static void bbdef(Bb *bb, Bitset *bs) +{ +} +*/ + +void flow(Cfg *cfg) +{ +} + +void checkret(Cfg *cfg) +{ +} diff --git a/mi/fold.c b/mi/fold.c new file mode 100644 index 0000000..05739e0 --- /dev/null +++ b/mi/fold.c @@ -0,0 +1,212 @@ +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <stdarg.h> +#include <ctype.h> +#include <string.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "parse.h" +#include "opt.h" + +static int islit(Node *n, vlong *v) +{ + Node *l; + + if (exprop(n) != Olit) + return 0; + l = n->expr.args[0]; + if (l->lit.littype != Lint) + return 0; + *v = l->lit.intval; + return 1; +} + +static int isval(Node *n, vlong val) +{ + vlong v; + + if (!islit(n, &v)) + return 0; + return v == val; +} + +static Node *val(int line, vlong val, Type *t) +{ + Node *n; + + n = mkint(line, val); + n = mkexpr(line, Olit, n, NULL); + n->expr.type = t; + return n; +} + +static int issmallconst(Node *dcl) +{ + Type *t; + + if (!dcl->decl.isconst) + return 0; + if (!dcl->decl.init) + return 0; + t = tybase(exprtype(dcl->decl.init)); + if (t->type <= Tyfloat64) + return 1; + return 0; +} + +static Node *foldcast(Node *n) +{ + Type *to, *from; + Node *sub; + + sub = n->expr.args[0]; + to = exprtype(n); + from = exprtype(sub); + + switch (tybase(to)->type) { + case Tybool: + case Tyint8: case Tyint16: case Tyint32: case Tyint64: + case Tyuint8: case Tyuint16: case Tyuint32: case Tyuint64: + case Tyint: case Tyuint: case Tylong: case Tyulong: + case Tychar: case Tybyte: + case Typtr: + switch (tybase(from)->type) { + case Tybool: + case Tyint8: case Tyint16: case Tyint32: case Tyint64: + case Tyuint8: case Tyuint16: case Tyuint32: case Tyuint64: + case Tyint: case Tyuint: case Tylong: case Tyulong: + case Tychar: case Tybyte: + case Typtr: + if (exprop(sub) == Olit || tybase(from)->type == tybase(to)->type) { + sub->expr.type = to; + return sub; + } else { + return n; + } + default: + return n; + } + default: + return n; + } + return n; +} + + + +Node *fold(Node *n, int foldvar) +{ + Node **args, *r; + Type *t; + vlong a, b; + size_t i; + + if (!n) + return NULL; + if (n->type != Nexpr) + return n; + + r = NULL; + args = n->expr.args; + for (i = 0; i < n->expr.nargs; i++) + args[i] = fold(args[i], foldvar); + switch (exprop(n)) { + case Ovar: + if (foldvar && issmallconst(decls[n->expr.did])) + r = fold(decls[n->expr.did]->decl.init, foldvar); + break; + case Oadd: + /* x + 0 = 0 */ + if (isval(args[0], 0)) + r = args[1]; + if (isval(args[1], 0)) + r = args[0]; + if (islit(args[0], &a) && islit(args[1], &b)) + r = val(n->line, a + b, exprtype(n)); + break; + case Osub: + /* x - 0 = 0 */ + if (isval(args[1], 0)) + r = args[0]; + if (islit(args[0], &a) && islit(args[1], &b)) + r = val(n->line, a - b, exprtype(n)); + break; + case Omul: + /* 1 * x = x */ + if (isval(args[0], 1)) + r = args[1]; + if (isval(args[1], 1)) + r = args[0]; + /* 0 * x = 0 */ + if (isval(args[0], 0)) + r = args[0]; + if (isval(args[1], 0)) + r = args[1]; + if (islit(args[0], &a) && islit(args[1], &b)) + r = val(n->line, a * b, exprtype(n)); + break; + case Odiv: + /* x/1 = x */ + if (isval(args[1], 1)) + r = args[0]; + /* 0/x = 0 */ + if (isval(args[1], 0)) + r = args[1]; + if (islit(args[0], &a) && islit(args[1], &b)) + r = val(n->line, a / b, exprtype(n)); + break; + case Omod: + /* x%1 = x */ + if (isval(args[1], 0)) + r = args[0]; + if (islit(args[0], &a) && islit(args[1], &b)) + r = val(n->line, a % b, exprtype(n)); + break; + case Oneg: + if (islit(args[0], &a)) + r = val(n->line, -a, exprtype(n)); + break; + case Obsl: + if (islit(args[0], &a) && islit(args[1], &b)) + r = val(n->line, a << b, exprtype(n)); + break; + case Obsr: + if (islit(args[0], &a) && islit(args[1], &b)) + r = val(n->line, a >> b, exprtype(n)); + break; + case Obor: + if (islit(args[0], &a) && islit(args[1], &b)) + r = val(n->line, a | b, exprtype(n)); + break; + case Oband: + if (islit(args[0], &a) && islit(args[1], &b)) + r = val(n->line, a & b, exprtype(n)); + break; + case Obxor: + if (islit(args[0], &a) && islit(args[1], &b)) + r = val(n->line, a ^ b, exprtype(n)); + break; + case Omemb: + t = tybase(exprtype(args[0])); + /* we only fold lengths right now */ + if (t->type == Tyarray && !strcmp(namestr(args[1]), "len")) + r = t->asize; + break; + case Ocast: + r = foldcast(n); + break; + default: + break; + } + + if (r) + return r; + else + return n; +} + diff --git a/mi/opt.h b/mi/opt.h new file mode 100644 index 0000000..7179cb4 --- /dev/null +++ b/mi/opt.h @@ -0,0 +1,34 @@ +typedef struct Cfg Cfg; +typedef struct Bb Bb; + +struct Cfg { + Bb **bb; + Bb *start; + Bb *end; + size_t nbb; + + /* for building bb */ + int nextbbid; + Htab *lblmap; /* label => Bb mapping */ + Node **fixjmp; + size_t nfixjmp; + Bb **fixblk; + size_t nfixblk; +}; + +struct Bb { + int id; + char **lbls; + size_t nlbls; + Node **nl; + size_t nnl; + Bitset *pred; + Bitset *succ; +}; + +/* expression folding */ +Node *fold(Node *n, int foldvar); +/* Takes a reduced block, and returns a flow graph. */ +Cfg *mkcfg(Node **nl, size_t nn); +void dumpcfg(Cfg *c, FILE *fd); +void flow(Cfg *cfg); @@ -0,0 +1,113 @@ +.DEFAULT_GOAL=all +_DEPSDIR = .deps +_DEPS=$(addprefix $(_DEPSDIR)/, $(OBJ:.o=.d)) + +_LIBSRCHPATHS=$(addprefix -L, $(dir $(DEPS))) +_LIBINCPATHS=$(addprefix -I, $(dir $(DEPS))) +_LIBPATHS=$(addprefix -l, $(patsubst lib%.a,%,$(notdir $(DEPS)))) + +CFLAGS += -Wall -Werror -Wextra -Wno-unused-parameter -Wno-missing-field-initializers +CFLAGS += -g +CFLAGS += -MMD -MP -MF ${_DEPSDIR}/$(subst /,-,$*).d + +.PHONY: clean clean-gen clean-bin clean-obj clean-misc clean-backups +.PHONY: all + +all: subdirs $(BIN) $(LIB) $(EXTRA) + +$(LIB): $(OBJ) $(DEPS) + $(AR) -rcs $@ $(OBJ) + +$(BIN): $(OBJ) $(EXTRADEP) $(DEPS) + $(CC) -o $@ $(OBJ) $(_LIBSRCHPATHS) $(_LIBPATHS) + +$(DEPS): + @cd $(dir $@) && $(MAKE) + +subdirs: + @for i in $(SUB); do (\ + cd $$i && \ + $(MAKE) || \ + exit 1 \ + ) || exit 1; done + +subdirs-clean: + @for i in $(SUB); do (\ + cd $$i && \ + $(MAKE) clean|| \ + exit 1 \ + ); done + +subdirs-install: + @for i in $(SUB); do (\ + cd $$i && \ + $(MAKE) install|| \ + exit 1 \ + ); done + + +clean: subdirs-clean + rm -f ${BIN} ${OBJ} ${CLEAN} + + +install: subdirs-install $(INSTBIN) $(INSTLIB) $(INSTHDR) $(INSTPKG) + @if [ ! -z "$(INSTBIN)" ]; then \ + echo install $(abspath $(INSTBIN) $(DESTDIR)/$(INST_ROOT)/bin); \ + mkdir -p $(abspath $(DESTDIR)/$(INST_ROOT)/bin); \ + install $(INSTBIN) $(abspath $(DESTDIR)/$(INST_ROOT)/bin); \ + fi + @if [ ! -z "$(INSTLIB)" ]; then \ + echo install -m 644 $(INSTLIB) $(abspath $(DESTDIR)/$(INST_ROOT)/lib); \ + mkdir -p $(abspath $(DESTDIR)/$(INST_ROOT)/lib); \ + install -m 644 $(INSTLIB) $(abspath $(DESTDIR)/$(INST_ROOT)/lib); \ + fi + @if [ ! -z "$(INSTHDR)" ]; then \ + echo install $(INSTHDR) $(abspath $(DESTDIR)/$(INST_ROOT)/include); \ + mkdir -p $(abspath $(DESTDIR)/$(INST_ROOT)/include); \ + install $(INSTHDR) $(abspath $(DESTDIR)/$(INST_ROOT)/include); \ + fi + @if [ ! -z "$(INSTPKG)" ]; then \ + echo install $(abspath $(INSTPKG) $(DESTDIR)/$(INST_ROOT)/lib/pkgconfig); \ + mkdir -p $(abspath $(DESTDIR)/$(INST_ROOT)/lib/pkgconfig); \ + install $(abspath $(INSTPKG) $(DESTDIR)/$(INST_ROOT)/lib/pkgconfig); \ + fi + +subdirs-uninstall: + @for i in $(SUB); do (\ + cd $$i && \ + $(MAKE) uninstall|| \ + exit 1 \ + ); done + +uninstall: subdirs-uninstall + @for i in $(INSTBIN); do \ + echo rm -f $(abspath $(DESTDIR)/$(INST_ROOT)/bin/$$i); \ + rm -f $(abspath $(DESTDIR)/$(INST_ROOT)/bin/$$i); \ + done + @for i in $(INSTLIB); do \ + echo rm -f $(abspath $(DESTDIR)/$(INST_ROOT)/lib/$$i); \ + rm -f $(abspath $(DESTDIR)/$(INST_ROOT)/lib/$$i); \ + done + @for i in $(INSTHDR); do \ + echo rm -f $(abspath $(DESTDIR)/$(INST_ROOT)/include/$$i); \ + rm -f $(abspath $(DESTDIR)/$(INST_ROOT)/include/$$i); \ + done + @for i in $(INSTPKG); do \ + echo rm -f $(abspath $(DESTDIR)/$(INST_ROOT)/lib/pkgconfig/$$i); \ + rm -f $(abspath $(DESTDIR)/$(INST_ROOT)/lib/pkgconfig/$$i); \ + done + +clean-backups: + find ./ -name .*.sw* -exec rm -f {} \; + find ./ -name *.bak -exec rm -f {} \; + +%.o: %.c $(GENHDR) .deps + $(CC) -c $(CFLAGS) $(_LIBINCPATHS) $< + +.deps: + mkdir -p $(_DEPSDIR) + +config.mk: configure + ./configure + +-include $(_DEPS) diff --git a/mk/lexyacc.mk b/mk/lexyacc.mk new file mode 100644 index 0000000..ebd7b19 --- /dev/null +++ b/mk/lexyacc.mk @@ -0,0 +1,7 @@ +NECFLAGS = $(subst -Werror,,$(subst -Wall,,$(CFLAGS))) + +%.h %.c: %.y + yacc -d -o$*.c $< + +%.c: %.l + flex -o$*.c $< diff --git a/muse/Makefile b/muse/Makefile new file mode 100644 index 0000000..7cc6da5 --- /dev/null +++ b/muse/Makefile @@ -0,0 +1,7 @@ +INSTBIN=muse +BIN=muse +OBJ=muse.o + +DEPS=../parse/libparse.a + +include ../mk/c.mk diff --git a/muse/muse.c b/muse/muse.c new file mode 100644 index 0000000..ca96593 --- /dev/null +++ b/muse/muse.c @@ -0,0 +1,150 @@ +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <ctype.h> +#include <string.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "parse.h" + +#include "../config.h" + +/* FIXME: move into one place...? */ +Node *file; +char *outfile; +int merge; +int debug; +char debugopt[128]; +char **incpaths; +size_t nincpaths; + +static void usage(char *prog) +{ + printf("%s [-hIdos] [-o outfile] [-m] inputs\n", prog); + printf("\t-h\tprint this help\n"); + printf("\t-m\ttreat the inputs as usefiles and merge them into outfile\n"); + printf("\t\tThe outfile must be the same name as each package merged.\n"); + printf("\t-I path\tAdd 'path' to use search path\n"); + printf("\t-d\tPrint debug dumps\n"); + printf("\t-o out\tOutput to outfile\n"); + printf("\t-s\tShow the contents of usefiles `inputs`\n"); +} + +static void dumpuse(char *path) +{ + Stab *globls; + FILE *f; + + globls = file->file.globls; + f = fopen(path, "r"); + loaduse(f, globls); + fclose(f); + dumpstab(globls, stdout); +} + +static void genuse(char *path) +{ + Stab *globls; + char *p; + FILE *f; + char buf[1024]; + + globls = file->file.globls; + tyinit(globls); + tokinit(path); + yyparse(); + + infer(file); + if (outfile) { + p = outfile; + } else { + swapsuffix(buf, sizeof buf, path, ".myr", ".use"); + p = buf; + } + f = fopen(p, "w"); + writeuse(f, file); + fclose(f); +} + +static void mergeuse(char *path) +{ + FILE *f; + Stab *st; + + st = file->file.exports; + f = fopen(path, "r"); + if (!f) + die("Couldn't open %s\n", path); + loaduse(f, st); + fclose(f); +} + +int main(int argc, char **argv) +{ + FILE *f; + int opt; + int i; + + while ((opt = getopt(argc, argv, "d::hmo:I:")) != -1) { + switch (opt) { + case 'h': + usage(argv[0]); + exit(0); + break; + case 'm': + merge = 1; + break; + case 'o': + outfile = optarg; + break; + case 'd': + debug = 1; + while (optarg && *optarg) + debugopt[*optarg++ & 0x7f] = 1; + break; + case 'I': + lappend(&incpaths, &nincpaths, optarg); + break; + default: + usage(argv[0]); + exit(0); + break; + } + } + + lappend(&incpaths, &nincpaths, Instroot "/lib/myr"); + if (merge) { + if (!outfile) { + fprintf(stderr, "Output file needed when merging usefiles."); + exit(1); + } + + file = mkfile("internal"); + file->file.exports = mkstab(); + file->file.globls = mkstab(); + updatens(file->file.exports, outfile); + tyinit(file->file.globls); + for (i = optind; i < argc; i++) + mergeuse(argv[i]); + infer(file); + f = fopen(outfile, "w"); + writeuse(f, file); + fclose(f); + } else { + for (i = optind; i < argc; i++) { + file = mkfile(argv[i]); + file->file.exports = mkstab(); + file->file.globls = mkstab(); + if (debugopt['s']) + dumpuse(argv[i]); + else + genuse(argv[i]); + } + } + + return 0; +} diff --git a/myrbuild/Makefile b/myrbuild/Makefile new file mode 100644 index 0000000..d631b0d --- /dev/null +++ b/myrbuild/Makefile @@ -0,0 +1,8 @@ +INSTBIN=myrbuild +BIN=myrbuild +OBJ=myrbuild.o + +DEPS=../parse/libparse.a + +include ../config.mk +include ../mk/c.mk diff --git a/myrbuild/myrbuild.c b/myrbuild/myrbuild.c new file mode 100644 index 0000000..a25978a --- /dev/null +++ b/myrbuild/myrbuild.c @@ -0,0 +1,495 @@ +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <sys/utsname.h> +#include <ctype.h> +#include <fcntl.h> +#include <unistd.h> +#include <regex.h> +#include <err.h> + +#include "parse.h" + +#include "../config.h" + +/* make libparse happy */ +Node *file; +char *filename; + +/* options to pass along to the compiler */ +int genasm = 0; + +/* binaries we call out to */ +char *mc = "6m"; +char *as = "as"; +char *ar = "ar"; +char *ld = "ld"; +char *muse = "muse"; +/* the name of the output file */ +char *libname; +char *binname; +/* additional paths to search for packages */ +char **incpaths; +size_t nincpaths; +/* libraries to link against, and their deps */ +Htab *libgraph; /* string -> null terminated string list */ +/* the linker script to use */ +char *ldscript; + +char *sysname; + +regex_t usepat; +Htab *compiled; /* used as string set */ +Htab *loopdetect; /* used as string set */ + +static void usage(char *prog) +{ + printf("%s [-h] [-I path] [-l lib] [-b bin] inputs...\n", prog); + printf("\t-h\tprint this help\n"); + printf("\t-b bin\tBuild a binary called 'bin'\n"); + printf("\t-l lib\tBuild a library called 'name'\n"); + printf("\t-s script\tUse the linker script 'script' when linking\n"); + printf("\t-I path\tAdd 'path' to use search path\n"); + printf("\t-S\tGenerate assembly files for all compiled code\n"); +} + +int hassuffix(char *path, char *suffix) +{ + int pathlen; + int sufflen; + + pathlen = strlen(path); + sufflen = strlen(suffix); + + if (sufflen > pathlen) + return 0; + return !strcmp(&path[pathlen-sufflen], suffix); +} + +int isquoted(char *path) +{ + return path[0] == '"' && path[strlen(path) - 1] == '"'; +} + +char *usetomyr(char *path) +{ + char buf[1024]; + /* skip initial quote */ + path++; + if (!hassuffix(path, ".use\"")) { + fprintf(stderr, "\"%s, should end with \".use\"\n", path); + exit(1); + } + swapsuffix(buf, 1024, path, ".use\"", ".myr"); + return strdup(buf); +} + +void printl(char **lst) +{ + printf("\t"); + printf("%s\t", *lst++); + while (*lst) + printf("%s ", *lst++); + printf("\n"); +} + +void gencmd(char ***cmd, size_t *ncmd, char *bin, char *file, char **extra, size_t nextra) +{ + size_t i; + + *cmd = NULL; + *ncmd = 0; + lappend(cmd, ncmd, bin); + for (i = 0; i < nincpaths; i++) { + lappend(cmd, ncmd, "-I"); + lappend(cmd, ncmd, incpaths[i]); + } + for (i = 0; i < nextra; i++) + lappend(cmd, ncmd, extra[i]); + lappend(cmd, ncmd, file); + lappend(cmd, ncmd, NULL); +} + +void run(char **cmd) +{ + pid_t pid; + int status; + + printl(cmd); + pid = fork(); + status = 0; + if (pid == -1) { + err(1, "Could not fork"); + } else if (pid == 0) { + if (execvp(cmd[0], cmd) == -1) + err(1, "Failed to exec %s", cmd[0]); + } else { + waitpid(pid, &status, 0); + } + if (WIFEXITED(status) && WEXITSTATUS(status) != 0) + exit(WEXITSTATUS(status)); + else if (WIFSIGNALED(status)) + die("%s: exited with signal %d\n", cmd[0], WTERMSIG(status)); +} + +int isfresh(char *from, char *to) +{ + struct stat from_sb, to_sb; + + if (stat(from, &from_sb)) + err(1, "Could not find %s", from); + if (stat(to, &to_sb) == -1) + return 0; + + return from_sb.st_mtime <= to_sb.st_mtime; +} + +int inlist(char **list, size_t sz, char *str) +{ + size_t i; + + for (i = 0; i < sz; i++) + if (!strcmp(list[i], str)) + return 1; + return 0; +} + +void getdeps(char *file, char **deps, size_t depsz, size_t *ndeps) +{ + char buf[2048]; + + regmatch_t m[2]; + size_t i; + FILE *f; + char *dep; + + f = fopen(file, "r"); + if (!f) + err(1, "Could not open file \"%s\"", file); + + i = 0; + while (fgets(buf, sizeof buf, f)) { + if (regexec(&usepat, buf, 2, m, 0) == REG_NOMATCH) + continue; + if (i == depsz) + die("Too many deps for file %s", file); + dep = strdupn(&buf[m[1].rm_so], m[1].rm_eo - m[1].rm_so); + if (!inlist(deps, i, dep)) + deps[i++] = dep; + else + free(dep); + } + fclose(f); + *ndeps = i; +} + +FILE *openlib(char *lib) +{ + FILE *f; + char buf[1024]; + size_t i; + + snprintf(buf, sizeof buf, "%s/%s/%s", Instroot, "/lib/myr", lib); + f = fopen(buf, "r"); + if (f) + return f; + for (i = 0; i < nincpaths; i++) { + snprintf(buf, sizeof buf, "%s/%s", incpaths[i], lib); + f = fopen(buf, "r"); + if (f) + return f; + } + err(1, "could not open library file %s\n", lib); +} + +void scrapelib(Htab *g, char *lib) +{ + char **deps; + size_t ndeps; + FILE *use; + char *l; + + if (hthas(libgraph, lib)) + return; + deps = NULL; + ndeps = 0; + use = openlib(lib); + if (fgetc(use) != 'U') + err(1, "library \"%s\" is not a usefile.", lib); + /* we don't care about the usefile's name */ + free(rdstr(use)); + while (fgetc(use) == 'L') { + l = rdstr(use); + lappend(&deps, &ndeps, l); + scrapelib(g, l); + } + lappend(&deps, &ndeps, NULL); + htput(g, lib, deps); +} + +void compile(char *file) +{ + size_t i, ndeps; + char **cmd; + size_t ncmd; + char *s; + char *localdep; + char *deps[512]; + char use[1024]; + char obj[1024]; + char *extra[32]; + size_t nextra = 0; + + if (hthas(compiled, file)) + return; + if (hthas(loopdetect, file)) + die("Cycle in dependency graph, involving %s\n", file); + htput(loopdetect, file, file); + if (hassuffix(file, ".myr")) { + swapsuffix(use, sizeof use, file, ".myr", ".use"); + swapsuffix(obj, sizeof obj, file, ".myr", ".o"); + getdeps(file, deps, 512, &ndeps); + for (i = 0; i < ndeps; i++) { + if (isquoted(deps[i])) { + localdep = usetomyr(deps[i]); + compile(localdep); + free(localdep); + } else { + scrapelib(libgraph, deps[i]); + } + } + if (isfresh(file, use)) + goto done; + if (isfresh(file, obj)) + goto done; + if (genasm) + extra[nextra++] = "-S"; + gencmd(&cmd, &ncmd, mc, file, extra, nextra); + run(cmd); + } else if (hassuffix(file, ".s")) { + swapsuffix(obj, sizeof obj, file, ".s", ".o"); + if (isfresh(file, obj)) + goto done; + extra[nextra++] = "-g"; + extra[nextra++] = "-o"; + extra[nextra++] = obj; + gencmd(&cmd, &ncmd, as, file, extra, nextra); + run(cmd); + } +done: + s = strdup(file); + htput(compiled, s, s); + htdel(loopdetect, file); +} + +void mergeuse(char **files, size_t nfiles) +{ + char **args; + size_t i, nargs; + char buf[1024]; + + args = NULL; + nargs = 0; + lappend(&args, &nargs, strdup(muse)); + lappend(&args, &nargs, strdup("-mo")); + lappend(&args, &nargs, strdup(libname)); + for (i = 0; i < nfiles; i++) { + if (hassuffix(files[i], ".myr")) { + swapsuffix(buf, sizeof buf, files[i], ".myr", ".use"); + lappend(&args, &nargs, strdup(buf)); + } else if (!hassuffix(files[i], ".s")) { + die("Unknown file type %s", files[i]); + } + } + lappend(&args, &nargs, NULL); + + run(args); + + for (i = 0; i < nargs; i++) + free(args[i]); + lfree(&args, &nargs); +} + +void archive(char **files, size_t nfiles) +{ + char **args; + size_t i, nargs; + char buf[1024]; + + args = NULL; + nargs = 0; + snprintf(buf, sizeof buf, "lib%s.a", libname); + lappend(&args, &nargs, strdup(ar)); + lappend(&args, &nargs, strdup("-rcs")); + lappend(&args, &nargs, strdup(buf)); + for (i = 0; i < nfiles; i++) { + if (hassuffix(files[i], ".myr")) + swapsuffix(buf, sizeof buf, files[i], ".myr", ".o"); + else if (hassuffix(files[i], ".s")) + swapsuffix(buf, sizeof buf, files[i], ".s", ".o"); + else + die("Unknown file type %s", files[i]); + lappend(&args, &nargs, strdup(buf)); + } + lappend(&args, &nargs, NULL); + + run(args); + + for (i = 0; i < nargs; i++) + free(args[i]); + lfree(&args, &nargs); +} + +void visit(char ***args, size_t *nargs, size_t head, Htab *g, char *n, Htab *looped, Htab *marked) +{ + char **deps; + char buf[1024]; + + if (hthas(looped, n)) + err(1, "cycle in library dependency graph involving %s\n", n); + if (hthas(marked, n)) + return; + htput(looped, n, n); + for (deps = htget(g, n); *deps; deps++) + visit(args, nargs, head, g, *deps, looped, marked); + htdel(looped, n); + htput(marked, n, n); + snprintf(buf, sizeof buf, "-l%s", n); + linsert(args, nargs, head, strdup(buf)); +} + +/* topologically sorts the dependency graph of the libraries. */ +void addlibs(char ***args, size_t *nargs, Htab *g) +{ + void **libs; + size_t nlibs; + size_t i; + size_t head; + Htab *looped; + Htab *marked; + + libs = htkeys(g, &nlibs); + looped = mkht(strhash, streq); + marked = mkht(strhash, streq); + head = *nargs; + for (i = 0; i < nlibs; i++) + visit(args, nargs, head, g, libs[i], looped, marked); +} + +void linkobj(char **files, size_t nfiles) +{ + char **args; + size_t i, nargs; + char buf[1024]; + + if (!binname) + binname = "a.out"; + + args = NULL; + nargs = 0; + + /* ld -T ldscript -o outfile */ + lappend(&args, &nargs, strdup(ld)); + lappend(&args, &nargs, strdup("-o")); + lappend(&args, &nargs, strdup(binname)); + + /* ld -T ldscript */ + if (ldscript) { + snprintf(buf, sizeof buf, "-T%s", ldscript); + lappend(&args, &nargs, strdup(buf)); + } + + /* ld -T ldscript -o outfile foo.o bar.o baz.o */ + for (i = 0; i < nfiles; i++) { + if (hassuffix(files[i], ".myr")) + swapsuffix(buf, sizeof buf, files[i], ".myr", ".o"); + else if (hassuffix(files[i], ".s")) + swapsuffix(buf, sizeof buf, files[i], ".s", ".o"); + else + die("Unknown file type %s", files[i]); + lappend(&args, &nargs, strdup(buf)); + } + + /* ld -T ldscript -o outfile foo.o bar.o baz.o -L/path1 -L/path2 */ + for (i = 0; i < nincpaths; i++) { + snprintf(buf, sizeof buf, "-L%s", incpaths[i]); + lappend(&args, &nargs, strdup(buf)); + } + snprintf(buf, sizeof buf, "-L%s%s", Instroot, "/lib/myr"); + lappend(&args, &nargs, strdup(buf)); + + /* ld -T ldscript -o outfile foo.o bar.o baz.o -L/path1 -L/path2 -llib1 -llib2*/ + addlibs(&args, &nargs, libgraph); + + /* OSX wants a minimum version specified to prevent warnings*/ + if (!strcmp(sysname, "Darwin")) { + lappend(&args, &nargs, strdup("-macosx_version_min")); + lappend(&args, &nargs, strdup("10.6")); + } + + /* the null terminator for exec() */ + lappend(&args, &nargs, NULL); + + run(args); + + for (i = 0; i < nargs; i++) + free(args[i]); + lfree(&args, &nargs); +} + +int main(int argc, char **argv) +{ + int opt; + int i; + struct utsname name; + + if (uname(&name) == 0) + sysname = strdup(name.sysname); + while ((opt = getopt(argc, argv, "hb:l:s:SI:C:A:M:L:R:")) != -1) { + switch (opt) { + case 'b': binname = optarg; break; + case 'l': libname = optarg; break; + case 's': ldscript = optarg; break; + case 'S': genasm = 1; break; + case 'C': mc = optarg; break; + case 'A': as = optarg; break; + case 'M': muse = optarg; break; + case 'L': ld = optarg; break; + case 'R': ar = optarg; break; + case 'I': + lappend(&incpaths, &nincpaths, optarg); + break; + case 'h': + usage(argv[0]); + exit(0); + break; + default: + usage(argv[0]); + exit(0); + break; + } + } + + if (libname && binname) + die("Can't specify both library and binary names"); + + libgraph = mkht(strhash, streq); + compiled = mkht(strhash, streq); + loopdetect = mkht(strhash, streq); + regcomp(&usepat, "^[[:space:]]*use[[:space:]]+([^[:space:]]+)", REG_EXTENDED); + for (i = optind; i < argc; i++) + compile(argv[i]); + if (libname) { + mergeuse(&argv[optind], argc - optind); + archive(&argv[optind], argc - optind); + } else { + linkobj(&argv[optind], argc - optind); + } + + return 0; +} diff --git a/myrtypes/Makefile b/myrtypes/Makefile new file mode 100644 index 0000000..e23ed22 --- /dev/null +++ b/myrtypes/Makefile @@ -0,0 +1,7 @@ +INSTBIN=myrtypes +BIN=myrtypes +OBJ=myrtypes.o + +DEPS=../parse/libparse.a + +include ../mk/c.mk diff --git a/myrtypes/myrtypes.c b/myrtypes/myrtypes.c new file mode 100644 index 0000000..a23c1ab --- /dev/null +++ b/myrtypes/myrtypes.c @@ -0,0 +1,222 @@ +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <ctype.h> +#include <string.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "parse.h" + +#include "../config.h" + +/* FIXME: move into one place...? */ +Node *file; +int fromuse; +int debug; +char debugopt[128]; +char **incpaths; +size_t nincpaths; + +static void printindent(int n) +{ + int i; + for (i = 0; i < n; i++) + printf(" "); +} + +static void dumptypes(Node *n, int indent) +{ + size_t i; + char *ty; + + if (!n) + return; + switch (n->type) { + case Nfile: + for (i = 0; i < n->file.nuses; i++) + dumptypes(n->file.uses[i], indent); + for (i = 0; i < n->file.nstmts; i++) + dumptypes(n->file.stmts[i], indent); + break; + case Ndecl: + printindent(indent); + if (n->decl.isconst) + printf("const "); + else + printf("var "); + ty = tystr(n->decl.type); + printf("%s : %s\n", namestr(n->decl.name), ty); + free(ty); + dumptypes(n->decl.init, indent + 1); + break; + case Nblock: + for (i = 0; i < n->block.nstmts; i++) + dumptypes(n->block.stmts[i], indent); + break; + case Nifstmt: + dumptypes(n->ifstmt.cond, indent); + dumptypes(n->ifstmt.iftrue, indent); + dumptypes(n->ifstmt.iffalse, indent); + break; + case Nloopstmt: + dumptypes(n->loopstmt.init, indent); + dumptypes(n->loopstmt.cond, indent); + dumptypes(n->loopstmt.step, indent); + dumptypes(n->loopstmt.body, indent); + break; + case Niterstmt: + dumptypes(n->iterstmt.elt, indent); + dumptypes(n->iterstmt.seq, indent); + dumptypes(n->iterstmt.body, indent); + break; + case Nmatchstmt: + dumptypes(n->matchstmt.val, indent); + for (i = 0; i < n->matchstmt.nmatches; i++) + dumptypes(n->matchstmt.matches[i], indent); + break; + case Nmatch: + dumptypes(n->match.pat, indent); + dumptypes(n->match.block, indent); + break; + case Nuse: + printindent(indent); + if (n->use.islocal) + printf("Use \"%s\"\n", n->use.name); + else + printf("Use %s\n", n->use.name); + break; + case Nexpr: + dumptypes(n->expr.idx, indent); + for (i = 0; i < n->expr.nargs; i++) + dumptypes(n->expr.args[i], indent); + break; + case Nlit: + switch (n->lit.littype) { + case Lfunc: dumptypes(n->lit.fnval, indent); break; + default: break; + } + break; + case Nfunc: + printindent(indent); + printf("Args:\n"); + for (i = 0; i < n->func.nargs; i++) + dumptypes(n->func.args[i], indent+1); + printindent(indent); + printf("Body:\n"); + dumptypes(n->func.body, indent + 1); + break; + case Nimpl: + die("Ntrait/Nimpl not yet supported!"); + break; + case Nname: + break; + case Nnone: + die("Nnone not a real node type!"); + break; + } +} + +void dumpucon(Ucon *uc, int indent) +{ + printindent(indent); + printf("`"); + if (uc->name->name.ns) + printf("%s.", uc->name->name.ns); + printf("%s\n", uc->name->name.name); +} + +void dumpsyms(Stab *st, int indent) +{ + size_t i, n; + void **k; + + /* decls */ + k = htkeys(st->dcl, &n); + for (i = 0; i < n; i++) { + dumptypes(getdcl(st, k[i]), indent); + } + free(k); + + /* union constructors */ + k = htkeys(st->uc, &n); + for (i = 0; i < n; i++) + dumpucon(getucon(st, k[i]), indent + 1); + + + /* sub-namespaces */ + k = htkeys(st->ns, &n); + for (i = 0; i < n; i++) { + printindent(indent + 1); + printf("namespace %s:\n", (char*)k[i]); + dumpsyms(getns_str(st, k[i]), indent + 2); + } + + free(k); +} + +static void usage(char *prog) +{ + printf("%s [-hu] [-d opt][-I path] inputs\n", prog); + printf("\t-h\tprint this help\n"); + printf("\t-I path\tAdd 'path' to use search path\n"); + printf("\t-d\tPrint debug dumps\n"); + printf("\t-u\tLoad the symbols to dump from a use file\n"); +} + +int main(int argc, char **argv) +{ + FILE *f; + int opt; + int i; + + while ((opt = getopt(argc, argv, "hud:I:")) != -1) { + switch (opt) { + case 'h': + usage(argv[0]); + exit(0); + break; + case 'u': + fromuse = 1; + break; + case 'd': + debug = 1; + while (optarg && *optarg) + debugopt[*optarg++ & 0x7f] = 1; + break; + case 'I': + lappend(&incpaths, &nincpaths, optarg); + break; + default: + usage(argv[0]); + exit(0); + break; + } + } + + for (i = optind; i < argc; i++) { + lappend(&incpaths, &nincpaths, Instroot "/lib/myr"); + file = mkfile(argv[i]); + file->file.exports = mkstab(); + file->file.globls = mkstab(); + tyinit(file->file.globls); + printf("%s:\n", argv[i]); + if (fromuse) { + f = fopen(argv[i], "r"); + if (!f) + die("Unable to open usefile %s\n", argv[i]); + loaduse(f, file->file.globls); + dumpsyms(file->file.globls, 1); + } else { + tokinit(argv[i]); + yyparse(); + infer(file); + dumpsyms(file->file.globls, 1); + } + } + + return 0; +} diff --git a/parse/Makefile b/parse/Makefile new file mode 100644 index 0000000..36d9fc7 --- /dev/null +++ b/parse/Makefile @@ -0,0 +1,20 @@ +LIB=libparse.a +OBJ=bitset.o \ + dump.o \ + gram.o \ + htab.o \ + infer.o \ + names.o \ + node.o \ + specialize.o \ + stab.o \ + tok.o \ + type.o \ + use.o \ + util.o + +GENHDR=gram.h +CLEAN=gram.c gram.h + +include ../mk/lexyacc.mk +include ../mk/c.mk diff --git a/parse/bitset.c b/parse/bitset.c new file mode 100644 index 0000000..84f2402 --- /dev/null +++ b/parse/bitset.c @@ -0,0 +1,210 @@ +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <assert.h> +#include <limits.h> +#include <string.h> + +#include "parse.h" + +#define Sizetbits (CHAR_BIT*sizeof(size_t)) /* used in graph reprs */ + +/* Equalizes the size of a and b by + * growing the smaller to the size of the + * larger, zeroing out the new elements. + * This allows the code to simply iterate + * over both without keeping track of the + * minimum size. + */ +static void eqsz(Bitset *a, Bitset *b) +{ + size_t sz; + size_t i; + size_t *p; + + if (a->nchunks > b->nchunks) + sz = a->nchunks; + else + sz = b->nchunks; + + p = zalloc(sz * sizeof(size_t)); + for (i = 0; i < a->nchunks; i++) + p[i] = a->chunks[i]; + free(a->chunks); + a->chunks = p; + a->nchunks = sz; + + p = zalloc(sz * sizeof(size_t)); + for (i = 0; i < b->nchunks; i++) + p[i] = b->chunks[i]; + free(b->chunks); + b->chunks = p; + b->nchunks = sz; +} + +/* Creates a new all-zero bit set */ +Bitset *mkbs() +{ + Bitset *bs; + + bs = xalloc(sizeof(Bitset)); + bs->nchunks = 1; + bs->chunks = zalloc(1*sizeof(size_t)); + return bs; +} + +/* Frees a bitset. Safe to call on NULL. */ +void bsfree(Bitset *bs) +{ + if (!bs) + return; + free(bs->chunks); + free(bs); +} + +/* Duplicates a bitset. NULL is duplicated to NULL. */ +Bitset *bsdup(Bitset *a) +{ + Bitset *bs; + + if (!a) + return NULL; + bs = xalloc(sizeof(Bitset)); + bs->nchunks = a->nchunks; + bs->chunks = xalloc(a->nchunks*sizeof(size_t)); + memcpy(bs->chunks, a->chunks, a->nchunks*sizeof(size_t)); + return bs; +} + +/* Zeroes all values in a bit set */ +Bitset *bsclear(Bitset *bs) +{ + size_t i; + + if (!bs) + return mkbs(); + for (i = 0; i < bs->nchunks; i++) + bs->chunks[i] = 0; + return bs; +} + +/* Counts the number of values held in a bit set */ +size_t bscount(Bitset *bs) +{ + size_t i, j, n; + + n = 0; + for (i = 0; i < bs->nchunks; i++) + for (j = 0; j < sizeof(size_t)*CHAR_BIT; j++) + if (bs->chunks[i] & 1ULL << j) + n++; + return n; +} + +/* A slightly tricky function to iterate over the contents + * of a bitset. It returns true immediately if 'elt' is in + * the bitset, otherwise it seeks forward to the next value + * held in the bitset and stores it in elt. If there are no + * more values, it returns false to stop iteration. Note, + * this means that you need to increment elt every time you + * pass through. + * + * Typical usage of this function: + * + * for (i = 0; bsiter(set, &i); i++) + * use(i); + * + * The increment of 'i' in the for loop is needed in order + * to prevent the function from returning the same value + * repeatedly. + */ +int bsiter(Bitset *bs, size_t *elt) +{ + size_t i; + + for (i = *elt; i < bsmax(bs); i++) { + while (i < bsmax(bs) && !bs->chunks[i/Sizetbits]) + i = (i + Sizetbits) & ~(Sizetbits - 1); + if (bshas(bs, i)) { + *elt = i; + return 1; + } + } + return 0; +} + +/* Returns the largest value that the bitset can possibly + * hold. It's conservative, but scanning the entire bitset + * is a bit slow. This is mostly an aid to iterate over it. */ +size_t bsmax(Bitset *bs) +{ + return bs->nchunks*Sizetbits; +} + +void bsput(Bitset *bs, size_t elt) +{ + size_t sz; + if (elt >= bs->nchunks*Sizetbits) { + sz = (elt/Sizetbits)+1; + bs->chunks = zrealloc(bs->chunks, bs->nchunks*sizeof(size_t), sz*sizeof(size_t)); + bs->nchunks = sz; + } + bs->chunks[elt/Sizetbits] |= 1ULL << (elt % Sizetbits); +} + +void bsdel(Bitset *bs, size_t elt) +{ + if (elt < bs->nchunks*Sizetbits) + bs->chunks[elt/Sizetbits] &= ~(1ULL << (elt % Sizetbits)); +} + + +void bsunion(Bitset *a, Bitset *b) +{ + size_t i; + + eqsz(a, b); + for (i = 0; i < a->nchunks; i++) + a->chunks[i] |= b->chunks[i]; +} + +void bsintersect(Bitset *a, Bitset *b) +{ + size_t i; + + eqsz(a, b); + for (i = 0; i < a->nchunks; i++) + a->chunks[i] &= b->chunks[i]; +} + +void bsdiff(Bitset *a, Bitset *b) +{ + size_t i; + + eqsz(a, b); + for (i = 0; i < a->nchunks; i++) + a->chunks[i] &= ~b->chunks[i]; +} + +int bseq(Bitset *a, Bitset *b) +{ + size_t i; + + eqsz(a, b); + for (i = 0; i < a->nchunks; i++) { + if (a->chunks[i] != b->chunks[i]) + return 0; + } + return 1; +} + +int bsissubset(Bitset *set, Bitset *sub) +{ + size_t i; + + eqsz(set, sub); + for (i = 0; i < set->nchunks; i++) + if ((sub->chunks[i] & set->chunks[i]) != set->chunks[i]) + return 0; + return 1; +} diff --git a/parse/dump.c b/parse/dump.c new file mode 100644 index 0000000..e921b1c --- /dev/null +++ b/parse/dump.c @@ -0,0 +1,251 @@ +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <ctype.h> +#include <string.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "parse.h" + +static void indent(FILE *fd, int depth) +{ + int i; + for (i = 0; i < depth; i++) + fprintf(fd, " "); +} + +/* outputs a fully qualified name */ +static void outname(Node *n, FILE *fd) +{ + if (n->name.ns) + fprintf(fd, "%s.", n->name.ns); + fprintf(fd, "%s", n->name.name); +} + +/* outputs a sym in a one-line short form (ie, + * the initializer is not printed, and the node is not + * expressed in indented tree. */ +static void outsym(Node *s, FILE *fd, int depth) +{ + char buf[1024]; + + indent(fd, depth); + if (s->decl.isconst) + fprintf(fd, "const "); + else + fprintf(fd, "var "); + outname(s->decl.name, fd); + fprintf(fd, " : %s\n", tyfmt(buf, 1024, s->decl.type)); +} + +void dumpsym(Node *s, FILE *fd) +{ + outsym(s, fd, 0); +} + +/* Outputs a symbol table, and it's sub-tables + * recursively, with a sigil describing the symbol + * type, as follows: + * T type + * S symbol + * N namespace + * + * Does not print captured variables. + */ +static void outstab(Stab *st, FILE *fd, int depth) +{ + size_t i, n; + void **k; + char *ty; + Type *t; + + indent(fd, depth); + fprintf(fd, "Stab %p (super = %p, name=\"%s\")\n", st, st->super, namestr(st->name)); + if (!st) + return; + + /* print types */ + k = htkeys(st->ty, &n); + for (i = 0; i < n; i++) { + indent(fd, depth + 1); + fprintf(fd, "T "); + /* already indented */ + outname(k[i], fd); + t = gettype(st, k[i]); + ty = tystr(t); + fprintf(fd, " = %s [tid=%d]\n", ty, t->tid); + free(ty); + } + free(k); + + /* dump declarations */ + k = htkeys(st->dcl, &n); + for (i = 0; i < n; i++) { + indent(fd, depth + 1); + fprintf(fd, "S "); + /* already indented */ + outsym(getdcl(st, k[i]), fd, 0); + } + free(k); + + /* dump sub-namespaces */ + k = htkeys(st->ns, &n); + for (i = 0; i < n; i++) { + indent(fd, depth + 1); + fprintf(fd, "N %s\n", (char*)k[i]); + outstab(getns_str(st, k[i]), fd, depth + 1); + } + free(k); +} + +void dumpstab(Stab *st, FILE *fd) +{ + outstab(st, fd, 0); +} + +/* Outputs a node in indented tree form. This is + * not a full serialization, but mainly an aid for + * understanding and debugging. */ +static void outnode(Node *n, FILE *fd, int depth) +{ + size_t i; + char *ty; + char *tr; + int tid; + char buf[1024]; + + indent(fd, depth); + if (!n) { + fprintf(fd, "Nil\n"); + return; + } + fprintf(fd, "%s", nodestr(n->type)); + switch(n->type) { + case Nfile: + fprintf(fd, "(name = %s)\n", n->file.name); + indent(fd, depth + 1); + fprintf(fd, "Globls:\n"); + outstab(n->file.globls, fd, depth + 2); + indent(fd, depth + 1); + fprintf(fd, "Exports:\n"); + outstab(n->file.exports, fd, depth + 2); + for (i = 0; i < n->file.nuses; i++) + outnode(n->file.uses[i], fd, depth + 1); + for (i = 0; i < n->file.nstmts; i++) + outnode(n->file.stmts[i], fd, depth + 1); + break; + case Ndecl: + tr = ""; + if (n->decl.trait) + tr = namestr(n->decl.trait->name); + fprintf(fd, "(did = %zd, trait=%s, isconst = %d, isgeneric = %d, isextern = %d, vis = %d)\n", + n->decl.did, tr, n->decl.isconst, n->decl.isgeneric, n->decl.isextern, n->decl.vis); + outsym(n, fd, depth + 1); + outnode(n->decl.init, fd, depth + 1); + break; + case Nblock: + fprintf(fd, "\n"); + outstab(n->block.scope, fd, depth + 1); + for (i = 0; i < n->block.nstmts; i++) + outnode(n->block.stmts[i], fd, depth+1); + break; + case Nifstmt: + fprintf(fd, "\n"); + outnode(n->ifstmt.cond, fd, depth+1); + outnode(n->ifstmt.iftrue, fd, depth+1); + outnode(n->ifstmt.iffalse, fd, depth+1); + break; + case Nloopstmt: + fprintf(fd, "\n"); + outnode(n->loopstmt.init, fd, depth+1); + outnode(n->loopstmt.cond, fd, depth+1); + outnode(n->loopstmt.step, fd, depth+1); + outnode(n->loopstmt.body, fd, depth+1); + break; + case Niterstmt: + fprintf(fd, "\n"); + outnode(n->iterstmt.elt, fd, depth+1); + outnode(n->iterstmt.seq, fd, depth+1); + outnode(n->iterstmt.body, fd, depth+1); + break; + case Nmatchstmt: + fprintf(fd, "\n"); + outnode(n->matchstmt.val, fd, depth+1); + for (i = 0; i < n->matchstmt.nmatches; i++) + outnode(n->matchstmt.matches[i], fd, depth+1); + break; + case Nmatch: + fprintf(fd, "\n"); + outnode(n->match.pat, fd, depth+1); + outnode(n->match.block, fd, depth+1); + break; + case Nuse: + fprintf(fd, " (name = %s, islocal = %d)\n", n->use.name, n->use.islocal); + break; + case Nexpr: + if (exprop(n) == Ovar) + assert(decls[n->expr.did]->decl.did == n->expr.did); + ty = tystr(n->expr.type); + if (n->expr.type) + tid = n->expr.type->tid; + else + tid = -1; + fprintf(fd, " (type = %s [tid %d], op = %s, isconst = %d, did=%zd)\n", + ty, tid, opstr(n->expr.op), n->expr.isconst, n->expr.did); + free(ty); + outnode(n->expr.idx, fd, depth + 1); + for (i = 0; i < n->expr.nargs; i++) + outnode(n->expr.args[i], fd, depth+1); + break; + case Nlit: + switch (n->lit.littype) { + case Lchr: fprintf(fd, " Lchr %c\n", n->lit.chrval); break; + case Lbool: fprintf(fd, " Lbool %s\n", n->lit.boolval ? "true" : "false"); break; + case Lint: fprintf(fd, " Lint %llu\n", n->lit.intval); break; + case Lflt: fprintf(fd, " Lflt %lf\n", n->lit.fltval); break; + case Lstr: fprintf(fd, " Lstr %s\n", n->lit.strval); break; + case Llbl: fprintf(fd, " Llbl %s\n", n->lit.lblval); break; + case Lfunc: + fprintf(fd, " Lfunc\n"); + outnode(n->lit.fnval, fd, depth+1); + break; + } + break; + case Nfunc: + fprintf(fd, " (args =\n"); + for (i = 0; i < n->func.nargs; i++) + outnode(n->func.args[i], fd, depth+1); + indent(fd, depth); + fprintf(fd, ")\n"); + outstab(n->func.scope, fd, depth + 1); + outnode(n->func.body, fd, depth+1); + break; + case Nname: + fprintf(fd, "("); + if (n->name.ns) + fprintf(fd, "%s.", n->name.ns); + fprintf(fd, "%s", n->name.name); + fprintf(fd, ")\n"); + break; + case Nimpl: + fprintf(fd, "(name = %s, type = %s)\n", namestr(n->impl.traitname), tyfmt(buf, sizeof buf, n->impl.type)); + indent(fd, depth); + outnode(n->impl.traitname, fd, depth + 1); + for (i = 0; i < n->impl.ndecls; i++) + outnode(n->impl.decls[i], fd, depth+1); + break; + case Nnone: + fprintf(stderr, "Nnone not a real node type!"); + fprintf(fd, "Nnone\n"); + break; + } +} + +void dump(Node *n, FILE *fd) +{ + outnode(n, fd, 0); +} diff --git a/parse/gram.y b/parse/gram.y new file mode 100644 index 0000000..6a6cd1b --- /dev/null +++ b/parse/gram.y @@ -0,0 +1,970 @@ +%{ +#define YYERROR_VERBOSE +#define YYDEBUG 1 + +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <ctype.h> +#include <string.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "parse.h" + +Stab *curscope; + +void yyerror(const char *s); +int yylex(void); + +static Op binop(int toktype); +static Node *mkpseudodecl(Type *t); +static void installucons(Stab *st, Type *t); +static void addtrait(Type *t, char *str); + +%} + +%token<tok> Terror +%token<tok> Tplus /* + */ +%token<tok> Tminus /* - */ +%token<tok> Tmul /* * */ +%token<tok> Tdiv /* / */ +%token<tok> Tinc /* ++ */ +%token<tok> Tdec /* -- */ +%token<tok> Tmod /* % */ +%token<tok> Tasn /* = */ +%token<tok> Taddeq /* += */ +%token<tok> Tsubeq /* -= */ +%token<tok> Tmuleq /* *= */ +%token<tok> Tdiveq /* /= */ +%token<tok> Tmodeq /* %= */ +%token<tok> Tboreq /* |= */ +%token<tok> Tbxoreq /* ^= */ +%token<tok> Tbandeq /* &= */ +%token<tok> Tbsleq /* <<= */ +%token<tok> Tbsreq /* >>= */ + +%token<tok> Tbor /* | */ +%token<tok> Tbxor /* ^ */ +%token<tok> Tband /* & */ +%token<tok> Tbsl /* << */ +%token<tok> Tbsr /* >> */ +%token<tok> Tbnot /* ~ */ + +%token<tok> Teq /* == */ +%token<tok> Tgt /* > */ +%token<tok> Tlt /* < */ +%token<tok> Tge /* >= */ +%token<tok> Tle /* <= */ +%token<tok> Tne /* != */ + +%token<tok> Tlor /* || */ +%token<tok> Tland /* && */ +%token<tok> Tlnot /* ! */ + +%token<tok> Tobrace /* { */ +%token<tok> Tcbrace /* } */ +%token<tok> Toparen /* ( */ +%token<tok> Tcparen /* ) */ +%token<tok> Tosqbrac /* [ */ +%token<tok> Tcsqbrac /* ] */ +%token<tok> Tat /* @ */ +%token<tok> Ttick /* ` */ +%token<tok> Tderef /* # */ + +%token<tok> Ttype /* type */ +%token<tok> Tfor /* for */ +%token<tok> Tin /* in */ +%token<tok> Twhile /* while */ +%token<tok> Tif /* if */ +%token<tok> Telse /* else */ +%token<tok> Telif /* else */ +%token<tok> Tmatch /* match */ +%token<tok> Tgoto /* goto */ +%token<tok> Tbreak /* break */ +%token<tok> Tcontinue /* continue */ + +%token<tok> Tintlit +%token<tok> Tstrlit +%token<tok> Tfloatlit +%token<tok> Tchrlit +%token<tok> Tboollit + +%token<tok> Ttrait /* trait */ +%token<tok> Timpl /* trait */ +%token<tok> Tstruct /* struct */ +%token<tok> Tunion /* union */ +%token<tok> Ttyparam /* @typename */ + +%token<tok> Tconst /* const */ +%token<tok> Tvar /* var */ +%token<tok> Tgeneric /* var */ +%token<tok> Textern /* extern */ +%token<tok> Tcast /* castto */ + +%token<tok> Texport /* export */ +%token<tok> Tprotect /* protect */ + +%token<tok> Tellipsis/* ... */ +%token<tok> Tendln /* ; or \n */ +%token<tok> Tendblk /* ;; */ +%token<tok> Tcolon /* : */ +%token<tok> Twith /* :: */ +%token<tok> Tdot /* . */ +%token<tok> Tcomma /* , */ +%token<tok> Tret /* -> */ +%token<tok> Tuse /* use */ +%token<tok> Tpkg /* pkg */ +%token<tok> Tsizeof /* sizeof */ + +%token<tok> Tident +%token<tok> Teof + +%start file + +%type <ty> type structdef uniondef tupledef compoundtype functype funcsig +%type <ty> generictype +%type <tylist> typelist typarams +%type <nodelist> typaramlist + +%type <tok> asnop cmpop addop mulop shiftop optident + +%type <tydef> tydef typeid +%type <trait> traitdef + +%type <node> exprln retexpr goto continue break expr atomicexpr +%type <node> littok literal asnexpr lorexpr landexpr borexpr +%type <node> bandexpr cmpexpr unionexpr addexpr mulexpr shiftexpr prefixexpr postfixexpr +%type <node> funclit seqlit tuplit name block stmt label use +%type <node> declbody declcore structent arrayelt structelt tuphead +%type <node> ifstmt forstmt whilestmt matchstmt elifs optexprln optexpr +%type <node> match +%type <node> castexpr +%type <ucon> unionelt +%type <node> blkbody +%type <node> implstmt + +%type <nodelist> arglist argdefs params matches +%type <nodelist> structbody structelts arrayelts +%type <nodelist> tupbody tuprest +%type <nodelist> decl decllist +%type <nodelist> traitbody implbody + +%type <uconlist> unionbody + +%union { + struct { + int line; + Node **nl; + size_t nn; + } nodelist; + struct { + int line; + Ucon **ucl; + size_t nucl; + } uconlist; + struct { + int line; + Type **types; + size_t ntypes; + } tylist; + struct { /* FIXME: unused */ + int line; + char *name; + Type *type; + Type **params; + size_t nparams; + } tydef; + Trait *trait; + Node *node; + Tok *tok; + Type *ty; + Ucon *ucon; +} + +%% + +file : toplev + | file Tendln toplev + ; + +toplev : package + | use {lappend(&file->file.uses, &file->file.nuses, $1);} + | implstmt { + lappend(&file->file.stmts, &file->file.nstmts, $1); + putimpl(file->file.globls, $1); + } + | traitdef { + size_t i; + puttrait(file->file.globls, $1->name, $1); + for (i = 0; i < $1->nfuncs; i++) + putdcl(file->file.exports, $1->funcs[i]); + } + | tydef { + puttype(file->file.globls, mkname($1.line, $1.name), $1.type); + installucons(file->file.globls, $1.type); + } + | decl { + size_t i; + for (i = 0; i < $1.nn; i++) { + lappend(&file->file.stmts, &file->file.nstmts, $1.nl[i]); + $1.nl[i]->decl.isglobl = 1; + putdcl(file->file.globls, $1.nl[i]); + } + } + | /* empty */ + ; + +decl : Tvar decllist {$$ = $2;} + | Tconst decllist { + size_t i; + for (i = 0; i < $2.nn; i++) + $2.nl[i]->decl.isconst = 1; + $$ = $2; + } + | Tgeneric decllist { + size_t i; + for (i = 0; i < $2.nn; i++) { + $2.nl[i]->decl.isconst = 1; + $2.nl[i]->decl.isgeneric = 1; + } + $$ = $2;} + | Textern Tvar decllist { + size_t i; + for (i = 0; i < $3.nn; i++) + $3.nl[i]->decl.isextern = 1; + $$ = $3; + } + | Textern Tconst decllist { + size_t i; + for (i = 0; i < $3.nn; i++) { + $3.nl[i]->decl.isconst = 1; + $3.nl[i]->decl.isextern = 1; + } + $$ = $3; + } + ; + +decllist: declbody { + $$.nl = NULL; $$.nn = 0; + lappend(&$$.nl, &$$.nn, $1); + } + | declbody Tcomma decllist { + linsert(&$3.nl, &$3.nn, 0, $1); + $$=$3; + } + ; + +use : Tuse Tident {$$ = mkuse($1->line, $2->str, 0);} + | Tuse Tstrlit {$$ = mkuse($1->line, $2->str, 1);} + ; + +optident: Tident {$$ = $1;} + | /* empty */ {$$ = NULL;} + ; + +package : Tpkg optident Tasn pkgbody Tendblk { + if (file->file.exports->name) + fatal($1->line, "Package already declared\n"); + if ($2) { + updatens(file->file.exports, $2->str); + updatens(file->file.globls, $2->str); + } + } + ; + +pkgbody : pkgitem + | pkgbody Tendln pkgitem + ; + +pkgitem : decl { + size_t i; + for (i = 0; i < $1.nn; i++) { + putdcl(file->file.exports, $1.nl[i]); + if ($1.nl[i]->decl.init) + lappend(&file->file.stmts, &file->file.nstmts, $1.nl[i]); + } + } + | tydef { + puttype(file->file.exports, mkname($1.line, $1.name), $1.type); + installucons(file->file.exports, $1.type); + } + | traitdef { + size_t i; + $1->vis = Visexport; + puttrait(file->file.exports, $1->name, $1); + for (i = 0; i < $1->nfuncs; i++) + putdcl(file->file.exports, $1->funcs[i]); + } + | implstmt { + $1->impl.vis = Visexport; + putimpl(file->file.exports, $1); + } + | visdef {die("Unimplemented visdef");} + | /* empty */ + ; + +visdef : Texport Tcolon + | Tprotect Tcolon + ; + +declbody: declcore Tasn expr {$$ = $1; $1->decl.init = $3;} + | declcore + ; + +declcore: name {$$ = mkdecl($1->line, $1, mktyvar($1->line));} + | name Tcolon type {$$ = mkdecl($1->line, $1, $3);} + ; + +name : Tident {$$ = mkname($1->line, $1->str);} + | Tident Tdot name {$$ = $3; setns($3, $1->str);} + ; + +implstmt: Timpl name type { + $$ = mkimplstmt($1->line, $2, $3, NULL, 0); + $$->impl.isproto = 1; + } + | Timpl name type Tasn Tendln implbody Tendblk { + $$ = mkimplstmt($1->line, $2, $3, $6.nl, $6.nn); + } + ; + +implbody + : optendlns {$$.nl = NULL; $$.nn = 0;} + | implbody Tident Tasn exprln optendlns { + Node *d; + $$ = $1; + d = mkdecl($2->line, mkname($2->line, $2->str), mktyvar($2->line)); + d->decl.init = $4; + d->decl.isconst = 1; + lappend(&$$.nl, &$$.nn, d); + } + ; + +traitdef: Ttrait Tident generictype /* trait prototype */ { + $$ = mktrait($1->line, mkname($2->line, $2->str), $3, NULL, 0, NULL, 0, 1); + } + | Ttrait Tident generictype Tasn traitbody Tendblk /* trait definition */ { + size_t i; + $$ = mktrait($1->line, mkname($2->line, $2->str), $3, NULL, 0, $5.nl, $5.nn, 0); + for (i = 0; i < $5.nn; i++) { + $5.nl[i]->decl.trait = $$; + $5.nl[i]->decl.isgeneric = 1; + } + } + ; + +traitbody + : optendlns {$$.nl = NULL; $$.nn = 0;} + | traitbody Tident Tcolon type optendlns { + Node *d; + $$ = $1; + d = mkdecl($2->line, mkname($2->line, $2->str), $4); + d->decl.isgeneric = 1; + lappend(&$$.nl, &$$.nn, d); + } + ; + + +tydef : Ttype typeid {$$ = $2;} + | Ttype typeid Tasn type { + $$ = $2; + $$.type = mktyname($2.line, mkname($2.line, $2.name), $2.params, $2.nparams, $4); + } + ; + +typeid : Tident { + $$.line = $1->line; + $$.name = $1->str; + $$.params = NULL; + $$.type = NULL; + } + | Tident Toparen typarams Tcparen { + $$.line = $1->line; + $$.name = $1->str; + $$.params = $3.types; + $$.nparams = $3.ntypes; + $$.type = NULL; + } + ; + +typarams: generictype { + $$.types = NULL; $$.ntypes = 0; + lappend(&$$.types, &$$.ntypes, $1); + } + | typarams Tcomma generictype {lappend(&$$.types, &$$.ntypes, $3);} + ; + +type : structdef + | tupledef + | uniondef + | compoundtype + | generictype + | Tellipsis {$$ = mktype($1->line, Tyvalist);} + ; + +generictype + : Ttyparam {$$ = mktyparam($1->line, $1->str);} + | Ttyparam Twith name { + $$ = mktyparam($1->line, $1->str); + addtrait($$, $3->name.name); + } + | Ttyparam Twith Toparen typaramlist Tcparen { + size_t i; + $$ = mktyparam($1->line, $1->str); + for (i = 0; i < $4.nn; i++) + addtrait($$, $4.nl[i]->name.name); + } + ; + +typaramlist + : name { + $$.nl = NULL; $$.nn = 0; + lappend(&$$.nl, &$$.nn, $1); + } + | typaramlist Tcomma name {lappend(&$$.nl, &$$.nn, $3);} + ; + +compoundtype + : functype {$$ = $1;} + | type Tosqbrac Tcolon Tcsqbrac {$$ = mktyslice($2->line, $1);} + | type Tosqbrac expr Tcsqbrac {$$ = mktyarray($2->line, $1, $3);} + | type Tderef {$$ = mktyptr($2->line, $1);} + | Tat Tident {$$ = mktyparam($1->line, $2->str);} + | name {$$ = mktyunres($1->line, $1, NULL, 0);} + | name Toparen typelist Tcparen {$$ = mktyunres($1->line, $1, $3.types, $3.ntypes);} + ; + +functype: Toparen funcsig Tcparen {$$ = $2;} + ; + +funcsig : argdefs + {$$ = mktyfunc($1.line, $1.nl, $1.nn, mktyvar($1.line));} + | argdefs Tret type + {$$ = mktyfunc($1.line, $1.nl, $1.nn, $3);} + ; + +argdefs : declcore { + $$.line = $1->line; + $$.nl = NULL; + $$.nn = 0; lappend(&$$.nl, &$$.nn, $1); + } + | argdefs Tcomma declcore {lappend(&$$.nl, &$$.nn, $3);} + | /* empty */ { + $$.line = line; + $$.nl = NULL; + $$.nn = 0; + } + ; + +tupledef: Tosqbrac typelist Tcsqbrac + {$$ = mktytuple($1->line, $2.types, $2.ntypes);} + ; + +typelist: type { + $$.types = NULL; $$.ntypes = 0; + lappend(&$$.types, &$$.ntypes, $1); + } + | typelist Tcomma type + {lappend(&$$.types, &$$.ntypes, $3);} + ; + +structdef + : Tstruct structbody Tendblk + {$$ = mktystruct($1->line, $2.nl, $2.nn);} + ; + +structbody + : structent { + if ($1) { + $$.nl = NULL; + $$.nn = 0; + lappend(&$$.nl, &$$.nn, $1); + } + } + | structbody structent { + if ($2) + lappend(&$$.nl, &$$.nn, $2); + } + ; + +structent + : declcore Tendln {$$ = $1;} + | visdef Tendln {$$ = NULL;} + | Tendln {$$ = NULL;} + ; + +uniondef + : Tunion unionbody Tendblk + {$$ = mktyunion($1->line, $2.ucl, $2.nucl);} + ; + +unionbody + : unionelt { + $$.ucl = NULL; + $$.nucl = 0; + if ($1) + lappend(&$$.ucl, &$$.nucl, $1); + } + | unionbody unionelt { + if ($2) + lappend(&$$.ucl, &$$.nucl, $2); + } + ; + +unionelt /* nb: the ucon union type gets filled in when we have context */ + : Ttick name type Tendln {$$ = mkucon($2->line, $2, NULL, $3);} + | Ttick name Tendln {$$ = mkucon($2->line, $2, NULL, NULL);} + | visdef Tendln {$$ = NULL;} + | Tendln {$$ = NULL;} + ; + +goto : Tgoto Tident {$$ = mkexpr($1->line, Ojmp, mklbl($2->line, $2->str), NULL);} + ; + +retexpr : Tret expr {$$ = mkexpr($1->line, Oret, $2, NULL);} + | Tret {$$ = mkexpr($1->line, Oret, NULL);} + | expr + ; + +optexpr : expr {$$ = $1;} + | /* empty */ {$$ = NULL;} + ; + +optexprln: exprln {$$ = $1;} + | Tendln {$$ = NULL;} + ; + +exprln : expr Tendln + ; + +expr : asnexpr + ; + +asnexpr : lorexpr asnop asnexpr + {$$ = mkexpr($1->line, binop($2->type), $1, $3, NULL);} + | lorexpr + ; + +asnop : Tasn + | Taddeq /* += */ + | Tsubeq /* -= */ + | Tmuleq /* *= */ + | Tdiveq /* /= */ + | Tmodeq /* %= */ + | Tboreq /* |= */ + | Tbxoreq /* ^= */ + | Tbandeq /* &= */ + | Tbsleq /* <<= */ + | Tbsreq /* >>= */ + ; + +lorexpr : lorexpr Tlor landexpr + {$$ = mkexpr($1->line, binop($2->type), $1, $3, NULL);} + | landexpr + ; + +landexpr: landexpr Tland cmpexpr + {$$ = mkexpr($1->line, binop($2->type), $1, $3, NULL);} + | cmpexpr + ; + +cmpexpr : cmpexpr cmpop castexpr + {$$ = mkexpr($1->line, binop($2->type), $1, $3, NULL);} + | castexpr + ; + + +cmpop : Teq | Tgt | Tlt | Tge | Tle | Tne ; + +castexpr: castexpr Tcast Toparen type Tcparen { + $$ = mkexpr($1->line, Ocast, $1, NULL); + $$->expr.type = $4; + } + | unionexpr + ; + +unionexpr + : Ttick name unionexpr {$$ = mkexpr($1->line, Oucon, $2, $3, NULL);} + | Ttick name {$$ = mkexpr($1->line, Oucon, $2, NULL);} + | borexpr + ; + + +borexpr : borexpr Tbor bandexpr + {$$ = mkexpr($1->line, binop($2->type), $1, $3, NULL);} + | borexpr Tbxor bandexpr + {$$ = mkexpr($1->line, binop($2->type), $1, $3, NULL);} + | bandexpr + ; + +bandexpr: bandexpr Tband addexpr + {$$ = mkexpr($1->line, binop($2->type), $1, $3, NULL);} + | addexpr + ; + +addexpr : addexpr addop mulexpr + {$$ = mkexpr($1->line, binop($2->type), $1, $3, NULL);} + | mulexpr + ; + +addop : Tplus | Tminus ; + +mulexpr : mulexpr mulop shiftexpr + {$$ = mkexpr($1->line, binop($2->type), $1, $3, NULL);} + | shiftexpr + ; + +mulop : Tmul | Tdiv | Tmod + ; + +shiftexpr + : shiftexpr shiftop prefixexpr + {$$ = mkexpr($1->line, binop($2->type), $1, $3, NULL);} + | prefixexpr + ; + +shiftop : Tbsl | Tbsr; + +prefixexpr + : Tinc prefixexpr {$$ = mkexpr($1->line, Opreinc, $2, NULL);} + | Tdec prefixexpr {$$ = mkexpr($1->line, Opredec, $2, NULL);} + | Tband prefixexpr {$$ = mkexpr($1->line, Oaddr, $2, NULL);} + | Tlnot prefixexpr {$$ = mkexpr($1->line, Olnot, $2, NULL);} + | Tbnot prefixexpr {$$ = mkexpr($1->line, Obnot, $2, NULL);} + | Tminus prefixexpr {$$ = mkexpr($1->line, Oneg, $2, NULL);} + | Tplus prefixexpr {$$ = $2;} /* positive is a nop */ + | postfixexpr + ; + +postfixexpr + : postfixexpr Tdot Tident + {$$ = mkexpr($1->line, Omemb, $1, mkname($3->line, $3->str), NULL);} + | postfixexpr Tinc + {$$ = mkexpr($1->line, Opostinc, $1, NULL);} + | postfixexpr Tdec + {$$ = mkexpr($1->line, Opostdec, $1, NULL);} + | postfixexpr Tosqbrac expr Tcsqbrac + {$$ = mkexpr($1->line, Oidx, $1, $3, NULL);} + | postfixexpr Tosqbrac optexpr Tcolon optexpr Tcsqbrac + {$$ = mksliceexpr($1->line, $1, $3, $5);} + | postfixexpr Tderef + {$$ = mkexpr($1->line, Oderef, $1, NULL);} + | postfixexpr Toparen arglist Tcparen + {$$ = mkcall($1->line, $1, $3.nl, $3.nn);} + | atomicexpr + ; + +arglist : asnexpr + {$$.nl = NULL; $$.nn = 0; lappend(&$$.nl, &$$.nn, $1);} + | arglist Tcomma asnexpr + {lappend(&$$.nl, &$$.nn, $3);} + | /* empty */ + {$$.nl = NULL; $$.nn = 0;} + ; + +atomicexpr + : Tident + {$$ = mkexpr($1->line, Ovar, mkname($1->line, $1->str), NULL);} + | literal + | Toparen expr Tcparen + {$$ = $2;} + | Tsizeof Toparen type Tcparen + {$$ = mkexpr($1->line, Osize, mkpseudodecl($3), NULL);} + ; + +tupbody : tuphead tuprest + {$$ = $2; + linsert(&$$.nl, &$$.nn, 0, $1);} + ; + +tuphead : expr Tcomma {$$ = $1;} + ; + +tuprest : /*empty */ + {$$.nl = NULL; $$.nn = 0;} + | expr { + $$.nl = NULL; $$.nn = 0; + lappend(&$$.nl, &$$.nn, $1); + } + | tuprest Tcomma expr {lappend(&$$.nl, &$$.nn, $3);} + ; + +literal : funclit {$$ = mkexpr($1->line, Olit, $1, NULL);} + | littok {$$ = mkexpr($1->line, Olit, $1, NULL);} + | seqlit {$$ = $1;} + | tuplit {$$ = $1;} + ; + +tuplit : Toparen tupbody Tcparen + {$$ = mkexprl($1->line, Otup, $2.nl, $2.nn);} + +littok : Tstrlit {$$ = mkstr($1->line, $1->str);} + | Tchrlit {$$ = mkchar($1->line, $1->chrval);} + | Tfloatlit {$$ = mkfloat($1->line, $1->fltval);} + | Tboollit {$$ = mkbool($1->line, !strcmp($1->str, "true"));} + | Tintlit { + $$ = mkint($1->line, $1->intval); + if ($1->inttype) + $$->lit.type = mktype($1->line, $1->inttype); + } + ; + +funclit : Tobrace params Tendln blkbody Tcbrace + {$$ = mkfunc($1->line, $2.nl, $2.nn, mktyvar($3->line), $4);} + | Tobrace params Tret type Tendln blkbody Tcbrace + {$$ = mkfunc($1->line, $2.nl, $2.nn, $4, $6);} + ; + +params : declcore { + $$.nl = NULL; + $$.nn = 0; + lappend(&$$.nl, &$$.nn, $1); + } + | params Tcomma declcore {lappend(&$$.nl, &$$.nn, $3);} + | /* empty */ {$$.nl = NULL; $$.nn = 0;} + ; + +seqlit : Tosqbrac arrayelts Tcsqbrac + {$$ = mkexprl($1->line, Oarr, $2.nl, $2.nn);} + | Tosqbrac structelts Tcsqbrac + {$$ = mkexprl($1->line, Ostruct, $2.nl, $2.nn);} + | Tosqbrac Tcsqbrac /* [] is the empty array. */ + {$$ = mkexprl($1->line, Oarr, NULL, 0);} + ; + +arrayelts + : optendlns arrayelt { + $$.nl = NULL; + $$.nn = 0; + lappend(&$$.nl, &$$.nn, mkidxinit($2->line, mkint($2->line, 0), $2)); + } + | arrayelts Tcomma optendlns arrayelt + {lappend(&$$.nl, &$$.nn, mkidxinit($4->line, mkint($4->line, $$.nn), $4));} + | arrayelts Tcomma optendlns + ; + +arrayelt: expr optendlns {$$ = $1;} + ; + +structelts + : structelt { + $$.nl = NULL; + $$.nn = 0; + lappend(&$$.nl, &$$.nn, $1); + } + | structelts Tcomma structelt + {lappend(&$$.nl, &$$.nn, $3);} + ; + +structelt: optendlns Tdot Tident Tasn expr optendlns + {$$ = mkidxinit($2->line, mkname($3->line, $3->str), $5);} + ; + +optendlns : /* none */ + | optendlns Tendln + ; + +stmt : goto + | break + | continue + | retexpr + | label + | ifstmt + | forstmt + | whilestmt + | matchstmt + | /* empty */ {$$ = NULL;} + ; + +break : Tbreak + {$$ = mkexpr($1->line, Obreak, NULL);} + ; + +continue : Tcontinue + {$$ = mkexpr($1->line, Ocontinue, NULL);} + ; + +forstmt : Tfor optexprln optexprln optexprln block + {$$ = mkloopstmt($1->line, $2, $3, $4, $5);} + | Tfor expr Tin exprln block + {$$ = mkiterstmt($1->line, $2, $4, $5);} + /* FIXME: allow decls in for loops + | Tfor decl Tendln optexprln optexprln block + {$$ = mkloopstmt($1->line, $2, $4, $5, $6);} + */ + ; + +whilestmt + : Twhile exprln block + {$$ = mkloopstmt($1->line, NULL, $2, NULL, $3);} + ; + +ifstmt : Tif exprln blkbody elifs + {$$ = mkifstmt($1->line, $2, $3, $4);} + ; + +elifs : Telif exprln blkbody elifs + {$$ = mkifstmt($1->line, $2, $3, $4);} + | Telse block + {$$ = $2;} + | Tendblk + {$$ = NULL;} + ; + +matchstmt: Tmatch exprln optendlns Tbor matches Tendblk + {$$ = mkmatchstmt($1->line, $2, $5.nl, $5.nn);} + ; + +matches : match { + $$.nl = NULL; + $$.nn = 0; + if ($1) + lappend(&$$.nl, &$$.nn, $1); + } + | matches Tbor match { + if ($2) + lappend(&$$.nl, &$$.nn, $3); + } + ; + +match : expr Tcolon blkbody Tendln {$$ = mkmatch($1->line, $1, $3);} + ; + +block : blkbody Tendblk + ; + +blkbody : decl { + size_t i; + $$ = mkblock(line, mkstab()); + for (i = 0; i < $1.nn; i++) { + putdcl($$->block.scope, $1.nl[i]); + lappend(&$$->block.stmts, &$$->block.nstmts, $1.nl[i]); + } + } + | stmt { + $$ = mkblock(line, mkstab()); + if ($1) + lappend(&$$->block.stmts, &$$->block.nstmts, $1); + } + | blkbody Tendln stmt { + if ($3) + lappend(&$1->block.stmts, &$1->block.nstmts, $3); + $$ = $1; + } + | blkbody Tendln decl { + size_t i; + for (i = 0; i < $3.nn; i++){ + putdcl($$->block.scope, $3.nl[i]); + lappend(&$1->block.stmts, &$1->block.nstmts, $3.nl[i]); + } + } + ; + +label : Tcolon Tident + {$$ = mklbl($2->line, $2->str);} + ; + +%% + +static void addtrait(Type *t, char *str) +{ + size_t i; + + for (i = 0; i < ntraittab; i++) { + if (!strcmp(namestr(traittab[i]->name), str)) { + settrait(t, traittab[i]); + return; + } + } + fatal(t->line, "Constraint %s does not exist", str); +} + +static Node *mkpseudodecl(Type *t) +{ + static int nextpseudoid; + char buf[128]; + + snprintf(buf, 128, ".pdecl%d", nextpseudoid++); + return mkdecl(-1, mkname(-1, buf), t); +} + +static void installucons(Stab *st, Type *t) +{ + Type *b; + size_t i; + + if (!t) + return; + b = tybase(t); + switch (b->type) { + case Tystruct: + for (i = 0; i < b->nmemb; i++) + installucons(st, b->sdecls[i]->decl.type); + break; + case Tyunion: + for (i = 0; i < b->nmemb; i++) { + b->udecls[i]->utype = t; + b->udecls[i]->id = i; + putucon(st, b->udecls[i]); + } + break; + default: + break; + } +} + +void yyerror(const char *s) +{ + fprintf(stderr, "%s:%d: %s", filename, line, s); + if (curtok->str) + fprintf(stderr, " near \"%s\"", curtok->str); + fprintf(stderr, "\n"); + exit(1); +} + +static Op binop(int tt) +{ + Op o; + + o = Obad; + switch (tt) { + case Tplus: o = Oadd; break; + case Tminus: o = Osub; break; + case Tmul: o = Omul; break; + case Tdiv: o = Odiv; break; + case Tmod: o = Omod; break; + case Tasn: o = Oasn; break; + case Taddeq: o = Oaddeq; break; + case Tsubeq: o = Osubeq; break; + case Tmuleq: o = Omuleq; break; + case Tdiveq: o = Odiveq; break; + case Tmodeq: o = Omodeq; break; + case Tboreq: o = Oboreq; break; + case Tbxoreq: o = Obxoreq; break; + case Tbandeq: o = Obandeq; break; + case Tbsleq: o = Obsleq; break; + case Tbsreq: o = Obsreq; break; + case Tbor: o = Obor; break; + case Tbxor: o = Obxor; break; + case Tband: o = Oband; break; + case Tbsl: o = Obsl; break; + case Tbsr: o = Obsr; break; + case Teq: o = Oeq; break; + case Tgt: o = Ogt; break; + case Tlt: o = Olt; break; + case Tge: o = Oge; break; + case Tle: o = Ole; break; + case Tne: o = One; break; + case Tlor: o = Olor; break; + case Tland: o = Oland; break; + default: + die("Unimplemented binop\n"); + break; + } + return o; +} + diff --git a/parse/htab.c b/parse/htab.c new file mode 100644 index 0000000..88c2232 --- /dev/null +++ b/parse/htab.c @@ -0,0 +1,256 @@ +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <assert.h> +#include <limits.h> +#include <string.h> + +#include "parse.h" + +#define Initsz 16 + +/* Creates a new empty hash table, using 'hash' as the + * hash funciton, and 'cmp' to verify that there are no + * hash collisions. */ +Htab *mkht(ulong (*hash)(void *key), int (*cmp)(void *k1, void *k2)) +{ + Htab *ht; + + ht = xalloc(sizeof(Htab)); + ht->nelt = 0; + ht->sz = Initsz; + ht->hash = hash; + ht->cmp = cmp; + ht->keys = zalloc(Initsz*sizeof(void*)); + ht->vals = zalloc(Initsz*sizeof(void*)); + ht->hashes = zalloc(Initsz*sizeof(void*)); + ht->dead = zalloc(Initsz*sizeof(char)); + + return ht; +} + +/* Frees a hash table. Passing this function + * NULL is a no-op. */ +void htfree(Htab *ht) +{ + if (!ht) + return; + free(ht->keys); + free(ht->vals); + free(ht->hashes); + free(ht->dead); + free(ht); +} + +/* Offsets the hash so that '0' can be + * used as a 'no valid value */ +static ulong hash(Htab *ht, void *k) +{ + ulong h; + h = ht->hash(k); + if (h == 0) + return 1; + else + return h; +} + +/* Resizes the hash table by copying all + * the old keys into the right slots in a + * new table. */ +static void grow(Htab *ht, int sz) +{ + void **oldk; + void **oldv; + ulong *oldh; + char *oldd; + int oldsz; + int i; + + oldk = ht->keys; + oldv = ht->vals; + oldh = ht->hashes; + oldd = ht->dead; + oldsz = ht->sz; + + ht->nelt = 0; + ht->sz = sz; + ht->keys = zalloc(sz*sizeof(void*)); + ht->vals = zalloc(sz*sizeof(void*)); + ht->hashes = zalloc(sz*sizeof(void*)); + ht->dead = zalloc(sz*sizeof(void*)); + + for (i = 0; i < oldsz; i++) + if (oldh[i] && !oldd[i]) + htput(ht, oldk[i], oldv[i]); + free(oldh); + free(oldk); + free(oldv); + free(oldd); +} + +/* Inserts 'k' into the hash table, possibly + * killing any previous key that compares + * as equal. */ +int htput(Htab *ht, void *k, void *v) +{ + int i; + ulong h; + int di; + + di = 0; + h = hash(ht, k); + i = h & (ht->sz - 1); + while (ht->hashes[i] && !ht->dead[i]) { + /* second insertion overwrites first. nb, we shouldn't touch the + * keys for dead values */ + if (ht->hashes[i] == h) { + if (ht->dead[i]) + break; + else if (ht->cmp(ht->keys[i], k)) + goto conflicted; + } + di++; + i = (h + di) & (ht->sz - 1); + } + ht->nelt++; +conflicted: + ht->hashes[i] = h; + ht->keys[i] = k; + ht->vals[i] = v; + ht->dead[i] = 0; + if (ht->sz < ht->nelt*2) + grow(ht, ht->sz*2); + return 1; +} + +/* Finds the index that we would insert + * the key into */ +static ssize_t htidx(Htab *ht, void *k) +{ + ssize_t i; + ulong h; + int di; + + di = 0; + h = hash(ht, k); + i = h & (ht->sz - 1); + while (ht->hashes[i] && !ht->dead[i] && ht->hashes[i] != h) { +searchmore: + di++; + i = (h + di) & (ht->sz - 1); + } + if (!ht->hashes[i] || ht->dead[i]) + return -1; + if (!ht->cmp(ht->keys[i], k)) + goto searchmore; /* collision */ + return i; +} + +/* Looks up a key, returning NULL if + * the value is not present. Note, + * if NULL is a valid value, you need + * to check with hthas() to see if it's + * not there */ +void *htget(Htab *ht, void *k) +{ + ssize_t i; + + i = htidx(ht, k); + if (i < 0) + return NULL; + else + return ht->vals[i]; +} + +void htdel(Htab *ht, void *k) +{ + ssize_t i; + + i = htidx(ht, k); + if (i < 0) + return; + ht->dead[i] = 1; + ht->nelt--; +} + + +/* Tests for 'k's presence in 'ht' */ +int hthas(Htab *ht, void *k) +{ + return htidx(ht, k) >= 0; +} + +/* Returns a list of all keys in the hash + * table, storing the size of the returned + * array in 'nkeys'. NB: the value returned + * is allocated on the heap, and it is the + * job of the caller to free it */ +void **htkeys(Htab *ht, size_t *nkeys) +{ + void **k; + size_t i, j; + + j = 0; + k = xalloc(sizeof(void*)*ht->nelt); + for (i = 0; i < ht->sz; i++) + if (ht->hashes[i] && !ht->dead[i]) + k[j++] = ht->keys[i]; + *nkeys = ht->nelt; + return k; +} + +ulong strhash(void *_s) +{ + char *s; + ulong h; + ulong g; + + s = _s; + h = 0; + while (s && *s) { + h = ((h << 4) + *s++); + + if ((g = (h & 0xF0000000))) + h ^= (g >> 24); + + h &= ~g; + } + return h; +} + +int streq(void *a, void *b) +{ + if (a == b) + return 1; + if (a == NULL || b == NULL) + return 0; + return !strcmp(a, b); +} + +ulong ptrhash(void *key) +{ + return inthash((intptr_t)key); +} + +ulong inthash(uint64_t key) +{ + intptr_t h; + + h = (intptr_t) key; + h *= 357913941; + h ^= h << 24; + h += ~357913941; + h ^= h >> 31; + h ^= h << 31; + return h; +} + +int inteq(uint64_t a, uint64_t b) +{ + return a == b; +} + +int ptreq(void *a, void *b) +{ + return a == b; +} diff --git a/parse/infer.c b/parse/infer.c new file mode 100644 index 0000000..a52de7b --- /dev/null +++ b/parse/infer.c @@ -0,0 +1,2109 @@ +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <inttypes.h> +#include <ctype.h> +#include <string.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <assert.h> + +#include "parse.h" + +typedef struct Inferstate Inferstate; +struct Inferstate { + int ingeneric; + int sawret; + Type *ret; + + /* bound by patterns turn into decls in the action block */ + Node **binds; + size_t nbinds; + /* nodes that need post-inference checking/unification */ + Node **postcheck; + size_t npostcheck; + Stab **postcheckscope; + size_t npostcheckscope; + /* the type params bound at the current point */ + Htab **tybindings; + size_t ntybindings; + /* generic declarations to be specialized */ + Node **genericdecls; + size_t ngenericdecls; + /* delayed unification -- we fall back to these types in a post pass if we + * haven't unifed to something more specific */ + Htab *delayed; + /* the nodes that we've specialized them to, and the scopes they + * appear in */ + Node **specializations; + size_t nspecializations; + Stab **specializationscope; + size_t nspecializationscope; +}; + +static void infernode(Inferstate *st, Node *n, Type *ret, int *sawret); +static void inferexpr(Inferstate *st, Node *n, Type *ret, int *sawret); +static void inferdecl(Inferstate *st, Node *n); +static void typesub(Inferstate *st, Node *n); +static void tybind(Inferstate *st, Type *t); +static void bind(Inferstate *st, Node *n); +static void tyunbind(Inferstate *st, Type *t); +static void unbind(Inferstate *st, Node *n); +static Type *unify(Inferstate *st, Node *ctx, Type *a, Type *b); +static Type *tf(Inferstate *st, Type *t); + +/* Tries to give a good string describing the context + * for the sake of error messages. */ +static char *ctxstr(Inferstate *st, Node *n) +{ + char *s; + char *t; + char *u; + char *idx; + char buf[512]; + + idx = NULL; + switch (n->type) { + default: + s = strdup(nodestr(n->type)); + break; + case Ndecl: + u = declname(n); + t = tystr(tf(st, decltype(n))); + snprintf(buf, sizeof buf, "%s:%s", u, t); + s = strdup(buf); + free(t); + break; + case Nname: + s = strdup(namestr(n)); + break; + case Nexpr: + if (n->expr.idx) + idx = ctxstr(st, n->expr.idx); + if (exprop(n) == Ovar) + u = namestr(n->expr.args[0]); + else + u = opstr(exprop(n)); + if (exprtype(n)) + t = tystr(tf(st, exprtype(n))); + else + t = strdup("unknown"); + if (idx) + snprintf(buf, sizeof buf, ".%s=%s:%s", idx, u, t); + else + snprintf(buf, sizeof buf, "%s:%s", u, t); + free(idx); + free(t); + s = strdup(buf); + break; + } + return s; +} + +static void delayedcheck(Inferstate *st, Node *n, Stab *s) +{ + lappend(&st->postcheck, &st->npostcheck, n); + lappend(&st->postcheckscope, &st->npostcheckscope, s); +} + +static void typeerror(Inferstate *st, Type *a, Type *b, Node *ctx, char *msg) +{ + char *t1, *t2, *c; + + t1 = tystr(a); + t2 = tystr(b); + c = ctxstr(st, ctx); + if (msg) + fatal(ctx->line, "Type \"%s\" incompatible with \"%s\" near %s: %s", t1, t2, c, msg); + else + fatal(ctx->line, "Type \"%s\" incompatible with \"%s\" near %s", t1, t2, c); + free(t1); + free(t2); + free(c); +} + + +/* Set a scope's enclosing scope up correctly. + * We don't do this in the parser for some reason. */ +static void setsuper(Stab *st, Stab *super) +{ + Stab *s; + + /* verify that we don't accidentally create loops */ + for (s = super; s; s = s->super) + assert(s->super != st); + st->super = super; +} + +/* If the current environment binds a type, + * we return true */ +static int isbound(Inferstate *st, Type *t) +{ + ssize_t i; + Type *p; + + for (i = st->ntybindings - 1; i >= 0; i--) { + p = htget(st->tybindings[i], t->pname); + if (p == t) + return 1; + } + return 0; +} + +/* Checks if a type that directly contains itself. + * Recursive types that contain themselves through + * pointers or slices are fine, but any other self-inclusion + * would lead to a value of infinite size */ +static int tyinfinite(Inferstate *st, Type *t, Type *sub) +{ + size_t i; + + assert(t != NULL); + if (t == sub) /* FIXME: is this actually right? */ + return 1; + /* if we're on the first iteration, the subtype is the type + * itself. The assignment must come after the equality check + * for obvious reasons. */ + if (!sub) + sub = t; + + switch (sub->type) { + case Tystruct: + for (i = 0; i < sub->nmemb; i++) + if (tyinfinite(st, t, decltype(sub->sdecls[i]))) + return 1; + break; + case Tyunion: + for (i = 0; i < sub->nmemb; i++) { + if (sub->udecls[i]->etype && tyinfinite(st, t, sub->udecls[i]->etype)) + return 1; + } + break; + + case Typtr: + case Tyslice: + return 0; + default: + for (i = 0; i < sub->nsub; i++) + if (tyinfinite(st, t, sub->sub[i])) + return 1; + break; + } + return 0; +} + + +static int needfreshen(Inferstate *st, Type *t) +{ + size_t i; + + switch (t->type) { + case Typaram: return 1; + case Tyname: return isgeneric(t); + case Tystruct: + for (i = 0; i < t->nmemb; i++) + if (needfreshen(st, decltype(t->sdecls[i]))) + return 1; + break; + case Tyunion: + for (i = 0; i < t->nmemb; i++) + if (t->udecls[i]->etype && needfreshen(st, t->udecls[i]->etype)) + return 1; + break; + default: + for (i = 0; i < t->nsub; i++) + if (needfreshen(st, t->sub[i])) + return 1; + break; + } + return 0; +} + +/* Freshens the type of a declaration. */ +static Type *tyfreshen(Inferstate *st, Type *t) +{ + Htab *ht; + + if (!needfreshen(st, t)) { + if (debugopt['u']) + printf("%s isn't generic: skipping freshen\n", tystr(t)); + return t; + } + + if (debugopt['u']) + printf("Freshen %s => ", tystr(t)); + tybind(st, t); + ht = mkht(tyhash, tyeq); + t = tyspecialize(t, ht); + htfree(ht); + tyunbind(st, t); + if (debugopt['u']) + printf("%s\n", tystr(t)); + + return t; +} + + +/* Resolves a type and all it's subtypes recursively.*/ +static void tyresolve(Inferstate *st, Type *t) +{ + size_t i; + Type *base; + + if (t->resolved) + return; + /* type resolution should never throw errors about non-generics + * showing up within a generic type, so we push and pop a generic + * around resolution */ + st->ingeneric++; + t->resolved = 1; + /* Walk through aggregate type members */ + if (t->type == Tystruct) { + for (i = 0; i < t->nmemb; i++) + infernode(st, t->sdecls[i], NULL, NULL); + } else if (t->type == Tyunion) { + for (i = 0; i < t->nmemb; i++) { + t->udecls[i]->utype = t; + t->udecls[i]->utype = tf(st, t->udecls[i]->utype); + if (t->udecls[i]->etype) { + tyresolve(st, t->udecls[i]->etype); + t->udecls[i]->etype = tf(st, t->udecls[i]->etype); + } + } + } else if (t->type == Tyarray) { + infernode(st, t->asize, NULL, NULL); + } + + for (i = 0; i < t->nsub; i++) + t->sub[i] = tf(st, t->sub[i]); + base = tybase(t); + /* no-ops if base == t */ + if (t->traits) + bsunion(t->traits, base->traits); + else + t->traits = bsdup(base->traits); + if (tyinfinite(st, t, NULL)) + fatal(t->line, "Type %s includes itself", tystr(t)); + st->ingeneric--; +} + +/* Look up the best type to date in the unification table, returning it */ +static Type *tysearch(Inferstate *st, Type *t) +{ + Type *lu; + Stab *ns; + + assert(t != NULL); + lu = NULL; + while (1) { + if (!tytab[t->tid] && t->type == Tyunres) { + ns = curstab(); + if (t->name->name.ns) { + ns = getns_str(ns, t->name->name.ns); + } + if (!ns) + fatal(t->name->line, "Could not resolve namespace \"%s\"", t->name->name.ns); + if (!(lu = gettype(ns, t->name))) + fatal(t->name->line, "Could not resolve type %s", namestr(t->name)); + tytab[t->tid] = lu; + } + + if (!tytab[t->tid]) + break; + /* compress paths: shift the link up one level */ + if (tytab[tytab[t->tid]->tid]) + tytab[t->tid] = tytab[tytab[t->tid]->tid]; + t = tytab[t->tid]; + } + return t; +} + +/* fixd the most accurate type mapping we have (ie, + * the end of the unification chain */ +static Type *tf(Inferstate *st, Type *orig) +{ + Type *t; + size_t i; + int is; + + t = tysearch(st, orig); + is = isgeneric(orig); + st->ingeneric += isgeneric(orig); + tyresolve(st, t); + /* If this is an instantiation of a generic type, we want the params to + * match the instantiation */ + if (orig->type == Tyunres && isgeneric(t)) { + t = tyfreshen(st, t); + for (i = 0; i < t->narg; i++) { + unify(st, NULL, t->arg[i], orig->arg[i]); + } + } + assert(is == isgeneric(orig)); + st->ingeneric -= isgeneric(orig); + return t; +} + +/* set the type of any typable node */ +static void settype(Inferstate *st, Node *n, Type *t) +{ + t = tf(st, t); + switch (n->type) { + case Nexpr: n->expr.type = t; break; + case Ndecl: n->decl.type = t; break; + case Nlit: n->lit.type = t; break; + case Nfunc: n->func.type = t; break; + default: + die("untypable node %s", nodestr(n->type)); + break; + } +} + +/* Gets the type of a literal value */ +static Type *littype(Node *n) +{ + if (n->lit.type) + return n->lit.type; + switch (n->lit.littype) { + case Lchr: return mktype(n->line, Tychar); break; + case Lbool: return mktype(n->line, Tybool); break; + case Lint: return mktylike(n->line, Tyint); break; + case Lflt: return mktylike(n->line, Tyfloat64); break; + case Lstr: return mktyslice(n->line, mktype(n->line, Tybyte)); break; + case Llbl: return mktyptr(n->line, mktype(n->line, Tyvoid)); break; + case Lfunc: return n->lit.fnval->func.type; break; + }; + die("Bad lit type %d", n->lit.littype); + return NULL; +} + +static Type *delayeducon(Inferstate *st, Type *fallback) +{ + Type *t; + + if (fallback->type != Tyunion) + return fallback; + t = mktylike(fallback->line, fallback->type); + htput(st->delayed, t, fallback); + return t; +} + +/* Finds the type of any typable node */ +static Type *type(Inferstate *st, Node *n) +{ + Type *t; + + switch (n->type) { + case Nlit: t = littype(n); break; + case Nexpr: t = n->expr.type; break; + case Ndecl: t = decltype(n); break; + case Nfunc: t = n->func.type; break; + default: + t = NULL; + die("untypeable node %s", nodestr(n->type)); + break; + }; + return tf(st, t); +} + +static Ucon *uconresolve(Inferstate *st, Node *n) +{ + Ucon *uc; + Node **args; + Stab *ns; + + args = n->expr.args; + ns = curstab(); + if (args[0]->name.ns) + ns = getns_str(ns, args[0]->name.ns); + if (!ns) + fatal(n->line, "No namespace %s\n", args[0]->name.ns); + uc = getucon(ns, args[0]); + if (!uc) + fatal(n->line, "no union constructor `%s", ctxstr(st, args[0])); + if (!uc->etype && n->expr.nargs > 1) + fatal(n->line, "nullary union constructor `%s passed arg ", ctxstr(st, args[0])); + else if (uc->etype && n->expr.nargs != 2) + fatal(n->line, "union constructor `%s needs arg ", ctxstr(st, args[0])); + return uc; +} + +/* Binds the type parameters present in the + * current type into the type environment */ +static void putbindings(Inferstate *st, Htab *bt, Type *t) +{ + size_t i; + char *s; + + if (!t) + return; + if (t->type != Typaram) + return; + + if (debugopt['u']) { + s = tystr(t); + printf("\tBind %s", s); + free(s); + } + if (hthas(bt, t->pname)) + unify(st, NULL, htget(bt, t->pname), t); + else if (isbound(st, t)) + return; + + htput(bt, t->pname, t); + for (i = 0; i < t->narg; i++) + putbindings(st, bt, t->arg[i]); +} + +static void tybind(Inferstate *st, Type *t) +{ + Htab *bt; + char *s; + + if (t->type != Tyname && !isgeneric(t)) + return; + if (debugopt['u']) { + s = tystr(t); + printf("Binding %s", s); + free(s); + } + bt = mkht(strhash, streq); + lappend(&st->tybindings, &st->ntybindings, bt); + putbindings(st, bt, t); +} + +/* Binds the type parameters in the + * declaration into the type environment */ +static void bind(Inferstate *st, Node *n) +{ + Htab *bt; + + assert(n->type == Ndecl); + if (!n->decl.isgeneric) + return; + if (!n->decl.init) + fatal(n->line, "generic %s has no initializer", n->decl); + + st->ingeneric++; + bt = mkht(strhash, streq); + lappend(&st->tybindings, &st->ntybindings, bt); + + putbindings(st, bt, n->decl.type); + putbindings(st, bt, n->decl.init->expr.type); +} + +/* Rolls back the binding of type parameters in + * the type environment */ +static void unbind(Inferstate *st, Node *n) +{ + if (!n->decl.isgeneric) + return; + htfree(st->tybindings[st->ntybindings - 1]); + lpop(&st->tybindings, &st->ntybindings); + st->ingeneric--; +} + +static void tyunbind(Inferstate *st, Type *t) +{ + if (t->type != Tyname && !isgeneric(t)) + return; + htfree(st->tybindings[st->ntybindings - 1]); + lpop(&st->tybindings, &st->ntybindings); +} + +/* Constrains a type to implement the required constraints. On + * type variables, the constraint is added to the required + * constraint list. Otherwise, the type is checked to see + * if it has the required constraint */ +static void constrain(Inferstate *st, Node *ctx, Type *a, Trait *c) +{ + if (a->type == Tyvar) { + if (!a->traits) + a->traits = mkbs(); + settrait(a, c); + } else if (!a->traits || !bshas(a->traits, c->uid)) { + fatal(ctx->line, "%s needs %s near %s", tystr(a), namestr(c->name), ctxstr(st, ctx)); + } +} + +/* does b satisfy all the constraints of a? */ +static int checktraits(Type *a, Type *b) +{ + /* a has no traits to satisfy */ + if (!a->traits) + return 1; + /* b satisfies no traits; only valid if a requires none */ + if (!b->traits) + return bscount(a->traits) == 0; + /* if a->traits is a subset of b->traits, all of + * a's constraints are satisfied by b. */ + return bsissubset(a->traits, b->traits); +} + +/* Merges the constraints on types */ +static void mergetraits(Inferstate *st, Node *ctx, Type *a, Type *b) +{ + size_t i, n; + char *sep; + char traitbuf[1024], abuf[1024], bbuf[1024]; + + if (b->type == Tyvar) { + /* make sure that if a = b, both have same traits */ + if (a->traits && b->traits) + bsunion(b->traits, a->traits); + else if (a->traits) + b->traits = bsdup(a->traits); + else if (b->traits) + a->traits = bsdup(b->traits); + } else { + if (!checktraits(a, b)) { + sep = ""; + n = 0; + for (i = 0; bsiter(a->traits, &i); i++) { + if (!b->traits || !bshas(b->traits, i)) + n += snprintf(traitbuf + n, sizeof(traitbuf) - n, "%s%s", sep, namestr(traittab[i]->name)); + sep = ","; + } + tyfmt(abuf, sizeof abuf, a); + tyfmt(bbuf, sizeof bbuf, b); + fatal(ctx->line, "%s missing traits %s for %s near %s", bbuf, traitbuf, abuf, ctxstr(st, ctx)); + } + } +} + +/* Tells us if we have an index hack on the type */ +static int idxhacked(Type *a, Type *b) +{ + return (a->type == Tyvar && a->nsub > 0) || a->type == Tyarray || a->type == Tyslice; +} + +/* prevents types that contain themselves in the unification; + * eg @a U (@a -> foo) */ +static int occurs(Type *a, Type *b) +{ + size_t i; + + if (a == b) + return 1; + for (i = 0; i < b->nsub; i++) + if (occurs(a, b->sub[i])) + return 1; + return 0; +} + +/* Computes the 'rank' of the type; ie, in which + * direction should we unify. A lower ranked type + * should be mapped to the higher ranked (ie, more + * specific) type. */ +static int tyrank(Type *t) +{ + /* plain tyvar */ + if (t->type == Tyvar && t->nsub == 0) + return 0; + /* parameterized tyvar */ + if (t->type == Tyvar && t->nsub > 0) + return 1; + /* concrete type */ + return 2; +} + +static int hasparam(Type *t) +{ + return t->type == Tyname && t->narg > 0; +} + +static void membunify(Inferstate *st, Node *ctx, Type *u, Type *v) { + size_t i; + + if (hthas(st->delayed, u)) + u = htget(st->delayed, u); + u = tybase(u); + if (hthas(st->delayed, v)) + v = htget(st->delayed, v); + v = tybase(v); + if (u->type == Tyunion && v->type == Tyunion && u != v) { + assert(u->nmemb = v->nmemb); + for (i = 0; i < v->nmemb; i++) { + if (u->udecls[i]->etype) + unify(st, ctx, u->udecls[i]->etype, v->udecls[i]->etype); + } + } else if (u->type == Tystruct && v->type == Tystruct && u != v) { + assert(u->nmemb = v->nmemb); + for (i = 0; i < v->nmemb; i++) { + assert(!strcmp(namestr(u->sdecls[i]->decl.name), namestr(v->sdecls[i]->decl.name))); + unify(st, u->sdecls[i], type(st, u->sdecls[i]), type(st, v->sdecls[i])); + } + } +} + +/* Unifies two types, or errors if the types are not unifiable. */ +static Type *unify(Inferstate *st, Node *ctx, Type *u, Type *v) +{ + Type *t, *r; + Type *a, *b; + char *from, *to; + char buf[256]; + size_t i; + + /* a ==> b */ + a = tf(st, u); + b = tf(st, v); + if (a == b) + return a; + + /* we unify from lower to higher ranked types */ + if (tyrank(b) < tyrank(a)) { + t = a; + a = b; + b = t; + } + + if (debugopt['u']) { + from = tystr(a); + to = tystr(b); + printf("Unify %s => %s\n", from, to); + free(from); + free(to); + } + + r = NULL; + if (a->type == Tyvar) { + tytab[a->tid] = b; + r = b; + } + + /* Disallow recursive types */ + if (a->type == Tyvar && b->type != Tyvar) { + if (occurs(a, b)) + typeerror(st, a, b, ctx, "Infinite type\n"); + } + + /* if the tyrank of a is 0 (ie, a raw tyvar), just unify. + * Otherwise, match up subtypes. */ + if ((a->type == b->type || idxhacked(a, b)) && tyrank(a) != 0) { + if (a->type == Tyname && !nameeq(a->name, b->name)) + typeerror(st, a, b, ctx, NULL); + if (a->nsub != b->nsub) { + snprintf(buf, sizeof buf, "Wrong subtype count - Got %zu, expected %zu", a->nsub, b->nsub); + typeerror(st, a, b, ctx, buf); + } + for (i = 0; i < b->nsub; i++) + unify(st, ctx, a->sub[i], b->sub[i]); + r = b; + } else if (hasparam(a) && hasparam(b)) { + /* Only Tygeneric and Tyname should be able to unify. And they + * should have the same names for this to be true. */ + if (!nameeq(a->name, b->name)) + typeerror(st, a, b, ctx, NULL); + if (a->narg != b->narg) + typeerror(st, a, b, ctx, "Incompatible parameter lists"); + for (i = 0; i < a->narg; i++) + unify(st, ctx, a->arg[i], b->arg[i]); + } else if (a->type != Tyvar) { + typeerror(st, a, b, ctx, NULL); + } + mergetraits(st, ctx, a, b); + membunify(st, ctx, a, b); + + /* if we have delayed types for a tyvar, transfer it over. */ + if (a->type == Tyvar && b->type == Tyvar) { + if (hthas(st->delayed, a) && !hthas(st->delayed, b)) + htput(st->delayed, b, htget(st->delayed, a)); + else if (hthas(st->delayed, b) && !hthas(st->delayed, a)) + htput(st->delayed, a, htget(st->delayed, b)); + } else if (hthas(st->delayed, a)) { + unify(st, ctx, htget(st->delayed, a), tybase(b)); + } + + return r; +} + +/* Applies unifications to function calls. + * Funciton application requires a slightly + * different approach to unification. */ +static void unifycall(Inferstate *st, Node *n) +{ + size_t i; + Type *ft; + + ft = type(st, n->expr.args[0]); + if (ft->type == Tyvar) { + /* the first arg is the function itself, so it shouldn't be counted */ + ft = mktyfunc(n->line, &n->expr.args[1], n->expr.nargs - 1, mktyvar(n->line)); + unify(st, n, ft, type(st, n->expr.args[0])); + } + for (i = 1; i < n->expr.nargs; i++) { + if (i == ft->nsub) + fatal(n->line, "%s arity mismatch (expected %zd args, got %zd)", + ctxstr(st, n->expr.args[0]), ft->nsub - 1, n->expr.nargs - 1); + + if (ft->sub[i]->type == Tyvalist) + break; + inferexpr(st, n->expr.args[i], NULL, NULL); + unify(st, n->expr.args[0], ft->sub[i], type(st, n->expr.args[i])); + } + if (i < ft->nsub && ft->sub[i]->type != Tyvalist) + fatal(n->line, "%s arity mismatch (expected %zd args, got %zd)", + ctxstr(st, n->expr.args[0]), ft->nsub - 1, i - 1); + settype(st, n, ft->sub[0]); +} + +static void unifyparams(Inferstate *st, Node *ctx, Type *a, Type *b) +{ + size_t i; + + /* The only types with unifiable params are Tyunres and Tyname. + * Tygeneric should always be freshened, and no other types have + * parameters attached. + * + * FIXME: Is it possible to have parameterized typarams? */ + if (a->type != Tyunres && a->type != Tyname) + return; + if (b->type != Tyunres && b->type != Tyname) + return; + + if (a->narg != b->narg) + fatal(ctx->line, "Mismatched parameter list sizes: %s with %s near %s", tystr(a), tystr(b), ctxstr(st, ctx)); + for (i = 0; i < a->narg; i++) + unify(st, ctx, a->arg[i], b->arg[i]); +} + +static void loaduses(Node *n) +{ + size_t i; + + /* uses only allowed at top level. Do we want to keep it this way? */ + for (i = 0; i < n->file.nuses; i++) + readuse(n->file.uses[i], n->file.globls); +} + +static void fiximpls(Inferstate *st, Stab *s) +{ + Node *n; + void **k; + size_t nk, i; + + k = htkeys(s->impl, &nk); + for (i = 0; i < nk; i++) { + n = getimpl(s, k[i]); + htdel(s->impl, k[i]); + n->impl.type = tf(st, n->impl.type); + putimpl(s, n); + } + free(k); +} + +/* The exports in package declarations + * need to be merged with the declarations + * at the global scope. Declarations in + * one may set the type of the other, + * so this should be done early in the + * process */ +static void mergeexports(Inferstate *st, Node *file) +{ + Stab *exports, *globls; + size_t i, nk; + void **k; + /* export, global version */ + Node *nx, *ng; + Type *tx, *tg; + Trait *trx, *trg; + Ucon *ux, *ug; + + exports = file->file.exports; + globls = file->file.globls; + + /* export the types */ + pushstab(globls); + k = htkeys(exports->ty, &nk); + for (i = 0; i < nk; i++) { + tx = gettype(exports, k[i]); + nx = k[i]; + if (tx) { + tg = gettype(globls, nx); + if (!tg) + puttype(globls, nx, tx); + else + fatal(nx->line, "Exported type %s already declared on line %d", namestr(nx), tg->line); + } else { + tg = gettype(globls, nx); + if (tg) + updatetype(exports, nx, tf(st, tg)); + else + fatal(nx->line, "Exported type %s not declared", namestr(nx)); + } + } + free(k); + + /* export the traits */ + k = htkeys(exports->tr, &nk); + for (i = 0; i < nk; i++) { + trx = gettrait(exports, k[i]); + nx = k[i]; + if (!trx->isproto) { + trg = gettrait(globls, nx); + if (!trg) + puttrait(globls, nx, trx); + else + fatal(nx->line, "Exported trait %s already declared on line %d", namestr(nx), trg->name->line); + } else { + trg = gettrait(globls, nx); + if (trg && !trg->isproto) { + *trx = *trg; + } else { + fatal(nx->line, "Exported trait %s not declared", namestr(nx)); + } + } + trx->vis = Visexport; + } + free(k); + + /* + * if we neglect to fix the types for impls before + * lookups, getimpl() on the global with the key from + * the export table will fail. + */ + fiximpls(st, exports); + fiximpls(st, globls); + + /* export the impls */ + k = htkeys(exports->impl, &nk); + for (i = 0; i < nk; i++) { + nx = getimpl(exports, k[i]); + ng = getimpl(globls, k[i]); + + if (nx->impl.isproto) { + if (!ng) + fatal(nx->line, "Missing trait impl body for %s %s\n", namestr(nx->impl.traitname), tystr(nx->impl.type)); + htdel(exports->impl, k[i]); + putimpl(exports, ng); + ng->impl.vis = Visexport; + } else { + if (!ng) { + putimpl(globls, nx); + } else { + fatal(nx->line, "Double trait impl body for %s %s on line %d\n", + namestr(nx->impl.traitname), tystr(nx->impl.type), ng->line); + } + } + lappend(&exportimpls, &nexportimpls, ng); + } + free(k); + + /* export the declarations */ + k = htkeys(exports->dcl, &nk); + for (i = 0; i < nk; i++) { + nx = getdcl(exports, k[i]); + ng = getdcl(globls, k[i]); + /* if an export has an initializer, it shouldn't be declared in the + * body */ + if (nx->decl.init && ng) + fatal(nx->line, "Export %s double-defined on line %d", ctxstr(st, nx), ng->line); + if (ng && nx->decl.isgeneric != ng->decl.isgeneric) + fatal(nx->line, "Export %s defined with different genericness on line %d", ctxstr(st, nx), ng->line); + if (!ng) + putdcl(globls, nx); + else + unify(st, nx, type(st, ng), type(st, nx)); + } + free(k); + + + + /* export the union constructors */ + k = htkeys(exports->uc, &nk); + for (i = 0; i < nk; i++) { + ux = getucon(exports, k[i]); + ug = getucon(globls, k[i]); + /* if an export has an initializer, it shouldn't be declared in the + * body */ + if (ux && ug) + fatal(ux->line, "Union constructor double defined on %d", ux->line); + else if (!ug) + putucon(globls, ux); + else + putucon(exports, ug); + } + free(k); + + popstab(); +} + +static Type *initvar(Inferstate *st, Node *n, Node *s) +{ + Type *t; + + if (s->decl.ishidden) + fatal(n->line, "attempting to refer to hidden decl %s", ctxstr(st, n)); + if (s->decl.isgeneric) + t = tyfreshen(st, tf(st, s->decl.type)); + else + t = s->decl.type; + settype(st, n, t); + n->expr.did = s->decl.did; + n->expr.isconst = s->decl.isconst; + if (s->decl.isgeneric && !st->ingeneric) { + lappend(&st->specializationscope, &st->nspecializationscope, curstab()); + lappend(&st->specializations, &st->nspecializations, n); + lappend(&st->genericdecls, &st->ngenericdecls, s); + } + return t; +} + +/* Finds out if the member reference is actually + * referring to a namespaced name, instead of a struct + * member. If it is, it transforms it into the variable + * reference we should have, instead of the Omemb expr + * that we do have */ +static void checkns(Inferstate *st, Node *n, Node **ret) +{ + Node *var, *name, *nsname; + Node **args; + Stab *stab; + Node *s; + + /* check that this is a namespaced declaration */ + if (n->type != Nexpr) + return; + if (!n->expr.nargs) + return; + args = n->expr.args; + if (args[0]->type != Nexpr || exprop(args[0]) != Ovar) + return; + name = args[0]->expr.args[0]; + stab = getns(curstab(), name); + if (!stab) + return; + + /* substitute the namespaced name */ + nsname = mknsname(n->line, namestr(name), namestr(args[1])); + s = getdcl(stab, args[1]); + if (!s) + fatal(n->line, "Undeclared var %s.%s", nsname->name.ns, nsname->name.name); + var = mkexpr(n->line, Ovar, nsname, NULL); + initvar(st, var, s); + *ret = var; +} + +static void inferstruct(Inferstate *st, Node *n, int *isconst) +{ + size_t i; + + *isconst = 1; + for (i = 0; i < n->expr.nargs; i++) { + infernode(st, n->expr.args[i], NULL, NULL); + if (!n->expr.args[i]->expr.isconst) + *isconst = 0; + } + settype(st, n, mktyvar(n->line)); + delayedcheck(st, n, curstab()); +} + +static void inferarray(Inferstate *st, Node *n, int *isconst) +{ + size_t i; + Type *t; + Node *len; + + *isconst = 1; + len = mkintlit(n->line, n->expr.nargs); + t = mktyarray(n->line, mktyvar(n->line), len); + for (i = 0; i < n->expr.nargs; i++) { + infernode(st, n->expr.args[i], NULL, NULL); + unify(st, n, t->sub[0], type(st, n->expr.args[i])); + if (!n->expr.args[i]->expr.isconst) + *isconst = 0; + } + settype(st, n, t); +} + +static void infertuple(Inferstate *st, Node *n, int *isconst) +{ + Type **types; + size_t i; + + *isconst = 1; + types = xalloc(sizeof(Type *)*n->expr.nargs); + for (i = 0; i < n->expr.nargs; i++) { + infernode(st, n->expr.args[i], NULL, NULL); + n->expr.isconst = n->expr.isconst && n->expr.args[i]->expr.isconst; + types[i] = type(st, n->expr.args[i]); + } + *isconst = n->expr.isconst; + settype(st, n, mktytuple(n->line, types, n->expr.nargs)); +} + +static void inferucon(Inferstate *st, Node *n, int *isconst) +{ + Ucon *uc; + Type *t; + + uc = uconresolve(st, n); + t = tyfreshen(st, tf(st, uc->utype)); + uc = tybase(t)->udecls[uc->id]; + if (uc->etype) { + inferexpr(st, n->expr.args[1], NULL, NULL); + unify(st, n, uc->etype, type(st, n->expr.args[1])); + } + *isconst = n->expr.args[0]->expr.isconst; + settype(st, n, delayeducon(st, t)); +} + +static void inferpat(Inferstate *st, Node *n, Node *val, Node ***bind, size_t *nbind) +{ + size_t i; + Node **args; + Node *s; + Type *t; + + args = n->expr.args; + for (i = 0; i < n->expr.nargs; i++) + if (args[i]->type == Nexpr) + inferpat(st, args[i], val, bind, nbind); + switch (exprop(n)) { + case Otup: + case Ostruct: + case Oarr: + case Olit: + case Omemb: + infernode(st, n, NULL, NULL); break; + /* arithmetic expressions just need to be constant */ + case Oneg: + case Oadd: + case Osub: + case Omul: + case Odiv: + case Obsl: + case Obsr: + case Oband: + case Obor: + case Obxor: + case Obnot: + infernode(st, n, NULL, NULL); + if (!n->expr.isconst) + fatal(n->line, "matching against non-constant expression"); + break; + case Oucon: inferucon(st, n, &n->expr.isconst); break; + case Ovar: + s = getdcl(curstab(), args[0]); + if (s && !s->decl.ishidden) { + if (s->decl.isgeneric) + t = tyfreshen(st, s->decl.type); + else if (s->decl.isconst) + t = s->decl.type; + else + fatal(n->line, "Can't match against non-constant variables near %s", ctxstr(st, n)); + } else { + t = mktyvar(n->line); + s = mkdecl(n->line, n->expr.args[0], t); + s->decl.init = val; + settype(st, n, t); + lappend(bind, nbind, s); + } + settype(st, n, t); + n->expr.did = s->decl.did; + break; + default: + fatal(n->line, "invalid pattern"); + break; + } +} + +void addbindings(Inferstate *st, Node *n, Node **bind, size_t nbind) +{ + size_t i; + + /* order of binding shouldn't matter, so push them into the block + * in reverse order. */ + for (i = 0; i < nbind; i++) { + putdcl(n->block.scope, bind[i]); + linsert(&n->block.stmts, &n->block.nstmts, 0, bind[i]); + } +} + +static void infersub(Inferstate *st, Node *n, Type *ret, int *sawret, int *exprconst) +{ + Node **args; + size_t i, nargs; + int isconst; + + args = n->expr.args; + nargs = n->expr.nargs; + isconst = 1; + for (i = 0; i < nargs; i++) { + /* Nlit, Nvar, etc should not be inferred as exprs */ + if (args[i]->type == Nexpr) { + /* Omemb can sometimes resolve to a namespace. We have to check + * this. Icky. */ + inferexpr(st, args[i], ret, sawret); + isconst = isconst && |