diff -Nru /sys/src/9k/386/ahci.h /sys/src/9k/386/ahci.h --- /sys/src/9k/386/ahci.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/ahci.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,293 @@ +/* + * advanced host controller interface (sata) + * © 2007 coraid, inc + */ + +/* ata errors */ +enum { + Emed = 1<<0, /* media error */ + Enm = 1<<1, /* no media */ + Eabrt = 1<<2, /* abort */ + Emcr = 1<<3, /* media change request */ + Eidnf = 1<<4, /* no user-accessible address */ + Emc = 1<<5, /* media change */ + Eunc = 1<<6, /* data error */ + Ewp = 1<<6, /* write protect */ + Eicrc = 1<<7, /* interface crc error */ + + Efatal = Eidnf|Eicrc, /* must sw reset */ +}; + +/* ata status */ +enum { + ASerr = 1<<0, /* error */ + ASdrq = 1<<3, /* request */ + ASdf = 1<<5, /* fault */ + ASdrdy = 1<<6, /* ready */ + ASbsy = 1<<7, /* busy */ + + ASobs = 1<<1|1<<2|1<<4, +}; + +/* pci configuration */ +enum { + Abar = 5, +}; + +/* + * ahci memory configuration + * + * 0000-0023 generic host control + * 0024-009f reserved + * 00a0-00ff vendor specific. + * 0100-017f port 0 + * ... + * 1080-1100 port 31 + */ + +/* cap bits: supported features */ +enum { + Hs64a = 1<<31, /* 64-bit addressing */ + Hsncq = 1<<30, /* ncq */ + Hssntf = 1<<29, /* snotification reg. */ + Hsmps = 1<<28, /* mech pres switch */ + Hsss = 1<<27, /* staggered spinup */ + Hsalp = 1<<26, /* aggressive link pm */ + Hsal = 1<<25, /* activity led */ + Hsclo = 1<<24, /* command-list override */ + Hiss = 1<<20, /* for interface speed */ +// Hsnzo = 1<<19, + Hsam = 1<<18, /* ahci-mode only */ + Hspm = 1<<17, /* port multiplier */ +// Hfbss = 1<<16, + Hpmb = 1<<15, /* multiple-block pio */ + Hssc = 1<<14, /* slumber state */ + Hpsc = 1<<13, /* partial-slumber state */ + Hncs = 1<<8, /* n command slots */ + Hcccs = 1<<7, /* coal */ + Hems = 1<<6, /* enclosure mgmt. */ + Hsxs = 1<<5, /* external sata */ + Hnp = 1<<0, /* n ports */ +}; + +/* ghc bits */ +enum { + Hae = 1<<31, /* enable ahci */ + Hie = 1<<1, /* " interrupts */ + Hhr = 1<<0, /* hba reset */ +}; + +typedef struct { + ulong cap; + ulong ghc; + ulong isr; + ulong pi; /* ports implemented */ + ulong ver; + ulong ccc; /* coaleasing control */ + ulong cccports; + ulong emloc; + ulong emctl; +} Ahba; + +enum { + Acpds = 1<<31, /* cold port detect status */ + Atfes = 1<<30, /* task file error status */ + Ahbfs = 1<<29, /* hba fatal */ + Ahbds = 1<<28, /* hba error (parity error) */ + Aifs = 1<<27, /* interface fatal §6.1.2 */ + Ainfs = 1<<26, /* interface error (recovered) */ + Aofs = 1<<24, /* too many bytes from disk */ + Aipms = 1<<23, /* incorrect prt mul status */ + Aprcs = 1<<22, /* PhyRdy change status Pxserr.diag.n */ + Adpms = 1<<7, /* mechanical presence status */ + Apcs = 1<<6, /* port connect diag.x */ + Adps = 1<<5, /* descriptor processed */ + Aufs = 1<<4, /* unknown fis diag.f */ + Asdbs = 1<<3, /* set device bits fis received w/ i bit set */ + Adss = 1<<2, /* dma setup */ + Apio = 1<<1, /* pio setup fis */ + Adhrs = 1<<0, /* device to host register fis */ + + IEM = Acpds|Atfes|Ahbds|Ahbfs|Ahbds|Aifs|Ainfs|Aprcs|Apcs|Adps| + Aufs|Asdbs|Adss|Adhrs, + Ifatal = Atfes|Ahbfs|Ahbds|Aifs, +}; + +/* serror bits */ +enum { + SerrX = 1<<26, /* exchanged */ + SerrF = 1<<25, /* unknown fis */ + SerrT = 1<<24, /* transition error */ + SerrS = 1<<23, /* link sequence */ + SerrH = 1<<22, /* handshake */ + SerrC = 1<<21, /* crc */ + SerrD = 1<<20, /* not used by ahci */ + SerrB = 1<<19, /* 10-tp-8 decode */ + SerrW = 1<<18, /* comm wake */ + SerrI = 1<<17, /* phy internal */ + SerrN = 1<<16, /* phyrdy change */ + + ErrE = 1<<11, /* internal */ + ErrP = 1<<10, /* ata protocol violation */ + ErrC = 1<<9, /* communication */ + ErrT = 1<<8, /* transient */ + ErrM = 1<<1, /* recoverd comm */ + ErrI = 1<<0, /* recovered data integrety */ + + ErrAll = ErrE|ErrP|ErrC|ErrT|ErrM|ErrI, + SerrAll = SerrX|SerrF|SerrT|SerrS|SerrH|SerrC|SerrD|SerrB|SerrW| + SerrI|SerrN|ErrAll, + SerrBad = 0x7f<<19, +}; + +/* cmd register bits */ +enum { + Aicc = 1<<28, /* interface communcations control. 4 bits */ + Aasp = 1<<27, /* aggressive slumber & partial sleep */ + Aalpe = 1<<26, /* aggressive link pm enable */ + Adlae = 1<<25, /* drive led on atapi */ + Aatapi = 1<<24, /* device is atapi */ + Aesp = 1<<21, /* external sata port */ + Acpd = 1<<20, /* cold presence detect */ + Ampsp = 1<<19, /* mechanical pres. */ + Ahpcp = 1<<18, /* hot plug capable */ + Apma = 1<<17, /* pm attached */ + Acps = 1<<16, /* cold presence state */ + Acr = 1<<15, /* cmdlist running */ + Afr = 1<<14, /* fis running */ + Ampss = 1<<13, /* mechanical presence switch state */ + Accs = 1<<8, /* current command slot 12:08 */ + Afre = 1<<4, /* fis enable receive */ + Aclo = 1<<3, /* command list override */ + Apod = 1<<2, /* power on dev (requires cold-pres. detect) */ + Asud = 1<<1, /* spin-up device; requires ss capability */ + Ast = 1<<0, /* start */ + + Arun = Ast|Acr|Afre|Afr, +}; + +/* ctl register bits */ +enum { + Aipm = 1<<8, /* interface power mgmt. 3=off */ + Aspd = 1<<4, + Adet = 1<<0, /* device detection */ +}; + +#define sstatus scr0 +#define sctl scr2 +#define serror scr1 +#define sactive scr3 + +typedef struct { + ulong list; /* PxCLB must be 1kb aligned. */ + ulong listhi; + ulong fis; /* 256-byte aligned */ + ulong fishi; + ulong isr; + ulong ie; /* interrupt enable */ + ulong cmd; + ulong res1; + ulong task; + ulong sig; + ulong scr0; + ulong scr2; + ulong scr1; + ulong scr3; + ulong ci; /* command issue */ + ulong ntf; + uchar res2[8]; + ulong vendor; +} Aport; + +enum { + /* + * Aport sstatus bits (actually states): + * 11-8 interface power management + * 7-4 current interface speed (generation #) + * 3-0 device detection + */ + Intslumber = 0x600, + Intpartpwr = 0x200, + Intactive = 0x100, + Intpm = 0xf00, + + Devphyoffline = 4, + Devphycomm = 2, /* phy communication established */ + Devpresent = 1, + Devdet = Devpresent | Devphycomm | Devphyoffline, +}; + +/* in host's memory; not memory mapped */ +typedef struct { + uchar *base; + uchar *d; + uchar *p; + uchar *r; + uchar *u; + ulong *devicebits; +} Afis; + +enum { + Lprdtl = 1<<16, /* physical region descriptor table len */ + Lpmp = 1<<12, /* port multiplier port */ + Lclear = 1<<10, /* clear busy on R_OK */ + Lbist = 1<<9, + Lreset = 1<<8, + Lpref = 1<<7, /* prefetchable */ + Lwrite = 1<<6, + Latapi = 1<<5, + Lcfl = 1<<0, /* command fis length in double words */ +}; + +/* in hosts memory; memory mapped */ +typedef struct { + ulong flags; + ulong len; + ulong ctab; + ulong ctabhi; + uchar reserved[16]; +} Alist; + +typedef struct { + ulong dba; + ulong dbahi; + ulong pad; + ulong count; +} Aprdt; + +typedef struct { + uchar cfis[0x40]; + uchar atapi[0x10]; + uchar pad[0x30]; + Aprdt prdt; +} Actab; + +enum { + Ferror = 1, + Fdone = 2, +}; + +enum { + Dllba = 1, + Dsmart = 1<<1, + Dpower = 1<<2, + Dnop = 1<<3, + Datapi = 1<<4, + Datapi16= 1<<5, +}; + +typedef struct { + QLock; + Rendez; + uchar flag; + uchar feat; + uchar smart; + Afis fis; + Alist *list; + Actab *ctab; +} Aportm; + +typedef struct { + Aport *p; + Aportm *pm; +} Aportc; diff -Nru /sys/src/9k/386/devether.c /sys/src/9k/386/devether.c --- /sys/src/9k/386/devether.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/devether.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,566 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "io.h" + +#include "../port/netif.h" + +#include "etherif.h" + +static Ether *etherxx[MaxEther]; + +Chan* +etherattach(char* spec) +{ + int ctlrno; + char *p; + Chan *chan; + + ctlrno = 0; + if(spec && *spec){ + ctlrno = strtoul(spec, &p, 0); + if((ctlrno == 0 && p == spec) || *p != 0) + error(Ebadarg); + if(ctlrno < 0 || ctlrno >= MaxEther) + error(Ebadarg); + } + if(etherxx[ctlrno] == 0) + error(Enodev); + + chan = devattach('l', spec); + if(waserror()){ + chanfree(chan); + nexterror(); + } + chan->devno = ctlrno; + if(etherxx[ctlrno]->attach) + etherxx[ctlrno]->attach(etherxx[ctlrno]); + poperror(); + return chan; +} + +static Walkqid* +etherwalk(Chan* chan, Chan* nchan, char** name, int nname) +{ + return netifwalk(etherxx[chan->devno], chan, nchan, name, nname); +} + +static long +etherstat(Chan* chan, uchar* dp, long n) +{ + return netifstat(etherxx[chan->devno], chan, dp, n); +} + +static Chan* +etheropen(Chan* chan, int omode) +{ + return netifopen(etherxx[chan->devno], chan, omode); +} + +static void +ethercreate(Chan*, char*, int, int) +{ +} + +static void +etherclose(Chan* chan) +{ + netifclose(etherxx[chan->devno], chan); +} + +static long +etherread(Chan* chan, void* buf, long n, vlong off) +{ + Ether *ether; + ulong offset = off; + + ether = etherxx[chan->devno]; + if((chan->qid.type & QTDIR) == 0 && ether->ifstat){ + /* + * With some controllers it is necessary to reach + * into the chip to extract statistics. + */ + if(NETTYPE(chan->qid.path) == Nifstatqid) + return ether->ifstat(ether, buf, n, offset); + else if(NETTYPE(chan->qid.path) == Nstatqid) + ether->ifstat(ether, buf, 0, offset); + } + + return netifread(ether, chan, buf, n, offset); +} + +static Block* +etherbread(Chan* chan, long n, vlong offset) +{ + return netifbread(etherxx[chan->devno], chan, n, offset); +} + +static long +etherwstat(Chan* chan, uchar* dp, long n) +{ + return netifwstat(etherxx[chan->devno], chan, dp, n); +} + +static void +etherrtrace(Netfile* f, Etherpkt* pkt, int len) +{ + int i, n; + Block *bp; + + if(qwindow(f->iq) <= 0) + return; + if(len > 58) + n = 58; + else + n = len; + bp = iallocb(64); + if(bp == nil) + return; + memmove(bp->wp, pkt->d, n); + i = TK2MS(sys->ticks); + bp->wp[58] = len>>8; + bp->wp[59] = len; + bp->wp[60] = i>>24; + bp->wp[61] = i>>16; + bp->wp[62] = i>>8; + bp->wp[63] = i; + bp->wp += 64; + qpass(f->iq, bp); +} + +Block* +etheriq(Ether* ether, Block* bp, int fromwire) +{ + Etherpkt *pkt; + ushort type; + int len, multi, tome, fromme; + Netfile **ep, *f, **fp, *fx; + Block *xbp; + + ether->inpackets++; + + pkt = (Etherpkt*)bp->rp; + len = BLEN(bp); + type = (pkt->type[0]<<8)|pkt->type[1]; + fx = 0; + ep = ðer->f[Ntypes]; + + multi = pkt->d[0] & 1; + /* check for valid multcast addresses */ + if(multi && memcmp(pkt->d, ether->bcast, sizeof(pkt->d)) != 0 && ether->prom == 0){ + if(!activemulti(ether, pkt->d, sizeof(pkt->d))){ + if(fromwire){ + freeb(bp); + bp = 0; + } + return bp; + } + } + + /* is it for me? */ + tome = memcmp(pkt->d, ether->ea, sizeof(pkt->d)) == 0; + fromme = memcmp(pkt->s, ether->ea, sizeof(pkt->s)) == 0; + + /* + * Multiplex the packet to all the connections which want it. + * If the packet is not to be used subsequently (fromwire != 0), + * attempt to simply pass it into one of the connections, thereby + * saving a copy of the data (usual case hopefully). + */ + for(fp = ether->f; fp < ep; fp++){ + if(f = *fp) + if(f->type == type || f->type < 0) + if(tome || multi || f->prom){ + /* Don't want to hear bridged packets */ + if(f->bridge && !fromwire && !fromme) + continue; + if(!f->headersonly){ + if(fromwire && fx == 0) + fx = f; + else if(xbp = iallocb(len)){ + memmove(xbp->wp, pkt, len); + xbp->wp += len; + if(qpass(f->iq, xbp) < 0) + ether->inoverflows++; + } + else + ether->inoverflows++; + } + else + etherrtrace(f, pkt, len); + } + } + + if(fx){ + if(qpass(fx->iq, bp) < 0) + ether->inoverflows++; + return 0; + } + if(fromwire){ + freeb(bp); + return 0; + } + + return bp; +} + +static int +etheroq(Ether* ether, Block* bp) +{ + int len, loopback, s; + Etherpkt *pkt; + + ether->outpackets++; + + /* + * Check if the packet has to be placed back onto the input queue, + * i.e. if it's a loopback or broadcast packet or the interface is + * in promiscuous mode. + * If it's a loopback packet indicate to etheriq that the data isn't + * needed and return, etheriq will pass-on or free the block. + * To enable bridging to work, only packets that were originated + * by this interface are fed back. + */ + pkt = (Etherpkt*)bp->rp; + len = BLEN(bp); + loopback = memcmp(pkt->d, ether->ea, sizeof(pkt->d)) == 0; + if(loopback || memcmp(pkt->d, ether->bcast, sizeof(pkt->d)) == 0 || ether->prom){ + s = splhi(); + etheriq(ether, bp, 0); + splx(s); + } + + if(!loopback){ + qbwrite(ether->oq, bp); + if(ether->transmit != nil) + ether->transmit(ether); + } else + freeb(bp); + + return len; +} + +static long +etherwrite(Chan* chan, void* buf, long n, vlong) +{ + Ether *ether; + Block *bp; + int nn, onoff; + Cmdbuf *cb; + + ether = etherxx[chan->devno]; + if(NETTYPE(chan->qid.path) != Ndataqid) { + nn = netifwrite(ether, chan, buf, n); + if(nn >= 0) + return nn; + cb = parsecmd(buf, n); + if(cb->f[0] && strcmp(cb->f[0], "nonblocking") == 0){ + if(cb->nf <= 1) + onoff = 1; + else + onoff = atoi(cb->f[1]); + qnoblock(ether->oq, onoff); + free(cb); + return n; + } + free(cb); + if(ether->ctl!=nil) + return ether->ctl(ether,buf,n); + + error(Ebadctl); + } + + if(n > ether->maxmtu) + error(Etoobig); + if(n < ether->minmtu) + error(Etoosmall); + + bp = allocb(n); + if(waserror()){ + freeb(bp); + nexterror(); + } + memmove(bp->rp, buf, n); + memmove(bp->rp+Eaddrlen, ether->ea, Eaddrlen); + poperror(); + bp->wp += n; + + return etheroq(ether, bp); +} + +static long +etherbwrite(Chan* chan, Block* bp, vlong) +{ + Ether *ether; + long n; + + n = BLEN(bp); + if(NETTYPE(chan->qid.path) != Ndataqid){ + if(waserror()) { + freeb(bp); + nexterror(); + } + n = etherwrite(chan, bp->rp, n, 0); + poperror(); + freeb(bp); + return n; + } + ether = etherxx[chan->devno]; + + if(n > ether->maxmtu){ + freeb(bp); + error(Etoobig); + } + if(n < ether->minmtu){ + freeb(bp); + error(Etoosmall); + } + + return etheroq(ether, bp); +} + +Ether* +etherc2e(Chan *chan) +{ + return etherxx[chan->devno]; +} + +static struct { + char* type; + int (*reset)(Ether*); +} cards[MaxEther+1]; + +void +addethercard(char* t, int (*r)(Ether*)) +{ + static int ncard; + + if(ncard == MaxEther) + panic("too many ether cards"); + cards[ncard].type = t; + cards[ncard].reset = r; + ncard++; +} + +int +parseether(uchar *to, char *from) +{ + char nip[4]; + char *p; + int i; + + p = from; + for(i = 0; i < Eaddrlen; i++){ + if(*p == 0) + return -1; + nip[0] = *p++; + if(*p == 0) + return -1; + nip[1] = *p++; + nip[2] = 0; + to[i] = strtoul(nip, 0, 16); + if(*p == ':') + p++; + } + return 0; +} + +static Ether* +etherprobe(int cardno, int ctlrno) +{ + int i; + Ether *ether; + char buf[128], name[32]; + + ether = malloc(sizeof(Ether)); + memset(ether, 0, sizeof(Ether)); + ether->ctlrno = ctlrno; + ether->tbdf = -1; + ether->mbps = 10; + ether->minmtu = ETHERMINTU; + ether->maxmtu = ETHERMAXTU; + + if(cardno < 0){ + if(isaconfig("ether", ctlrno, ether) == 0){ + free(ether); + return nil; + } + for(cardno = 0; cards[cardno].type; cardno++){ + if(cistrcmp(cards[cardno].type, ether->type)) + continue; + for(i = 0; i < ether->nopt; i++){ + if(strncmp(ether->opt[i], "ea=", 3)) + continue; + if(parseether(ether->ea, ðer->opt[i][3])) + memset(ether->ea, 0, Eaddrlen); + } + break; + } + } + + if(cardno >= MaxEther || cards[cardno].type == nil){ + free(ether); + return nil; + } + if(cards[cardno].reset(ether) < 0){ + free(ether); + return nil; + } + + /* + * IRQ2 doesn't really exist, it's used to gang the interrupt + * controllers together. A device set to IRQ2 will appear on + * the second interrupt controller as IRQ9. + */ + if(ether->irq == 2) + ether->irq = 9; + snprint(name, sizeof(name), "ether%d", ctlrno); + + /* + * If ether->irq is <0, it is a hack to indicate no interrupt + * used by ethersink. + * Or perhaps the driver has some other way to configure + * interrupts for itself, e.g. HyperTransport MSI. + */ + if(ether->irq >= 0) + ether->vector = intrenable(ether->irq, ether->interrupt, ether, ether->tbdf, name); + + i = sprint(buf, "#l%d: %s: %dMbps port %#p irq %d", + ctlrno, cards[cardno].type, ether->mbps, ether->port, ether->irq); + if(ether->mem) + i += sprint(buf+i, " addr %#p", ether->mem); + if(ether->size) + i += sprint(buf+i, " size %ld", ether->size); + i += sprint(buf+i, ": %2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux", + ether->ea[0], ether->ea[1], ether->ea[2], + ether->ea[3], ether->ea[4], ether->ea[5]); + sprint(buf+i, "\n"); + print(buf); + + if (ether->mbps >= 1000) { + netifinit(ether, name, Ntypes, 512*1024); + if(ether->oq == 0) + ether->oq = qopen(512*1024, Qmsg, 0, 0); + } else if(ether->mbps >= 100){ + netifinit(ether, name, Ntypes, 256*1024); + if(ether->oq == 0) + ether->oq = qopen(256*1024, Qmsg, 0, 0); + } + else{ + netifinit(ether, name, Ntypes, 128*1024); + if(ether->oq == 0) + ether->oq = qopen(128*1024, Qmsg, 0, 0); + } + if(ether->oq == 0) + panic("etherreset %s", name); + ether->alen = Eaddrlen; + memmove(ether->addr, ether->ea, Eaddrlen); + memset(ether->bcast, 0xFF, Eaddrlen); + + return ether; +} + +static void +etherreset(void) +{ + Ether *ether; + int cardno, ctlrno; + + for(ctlrno = 0; ctlrno < MaxEther; ctlrno++){ + if((ether = etherprobe(-1, ctlrno)) == nil) + continue; + etherxx[ctlrno] = ether; + } + + if(getconf("*noetherprobe")) + return; + + cardno = ctlrno = 0; + while(cards[cardno].type != nil && ctlrno < MaxEther){ + if(etherxx[ctlrno] != nil){ + ctlrno++; + continue; + } + if((ether = etherprobe(cardno, ctlrno)) == nil){ + cardno++; + continue; + } + etherxx[ctlrno] = ether; + ctlrno++; + } +} + +static void +ethershutdown(void) +{ + Ether *ether; + int i; + + for(i = 0; i < MaxEther; i++){ + ether = etherxx[i]; + if(ether == nil) + continue; + if(ether->irq >= 0) + intrdisable(ether->vector); + if(ether->shutdown == nil) { + print("#l%d: no shutdown function\n", i); + continue; + } + (*ether->shutdown)(ether); + } +} + +int +ethercfgmatch(Ether *e, Pcidev *p, uintmem port) +{ + if((e->port == 0 || e->port == port) && + (e->tbdf == BUSUNKNOWN || p == nil || e->tbdf == p->tbdf)) + return 0; + return -1; +} + + +#define POLY 0xedb88320 + +/* really slow 32 bit crc for ethers */ +ulong +ethercrc(uchar *p, int len) +{ + int i, j; + ulong crc, b; + + crc = 0xffffffff; + for(i = 0; i < len; i++){ + b = *p++; + for(j = 0; j < 8; j++){ + crc = (crc>>1) ^ (((crc^b) & 1) ? POLY : 0); + b >>= 1; + } + } + return crc; +} + +Dev etherdevtab = { + 'l', + "ether", + + etherreset, + devinit, + ethershutdown, + etherattach, + etherwalk, + etherstat, + etheropen, + ethercreate, + etherclose, + etherread, + etherbread, + etherwrite, + etherbwrite, + devremove, + etherwstat, +}; diff -Nru /sys/src/9k/386/devpmc.c /sys/src/9k/386/devpmc.c --- /sys/src/9k/386/devpmc.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/devpmc.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,380 @@ +/* + * Performance counters + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "pmc.h" + + +enum{ + Qdir = 0, + Qdesc, + Qcore, + PmcCtlRdStr = 4*1024, +}; + +#define PMCTYPE(x) (((unsigned)x)&0xffful) +#define PMCID(x) (((unsigned)x)>>12) +#define PMCQID(i, t) ((((unsigned)i)<<12)|(t)) + +static Dirtab *toptab; +static Lock toptablck; +static int ntoptab; +int pmcdebug; + +static void +topdirinit(void) +{ + Dirtab *d; + int nent; + + nent = 1 + MACHMAX; + toptab = mallocz(nent * sizeof(Dirtab), 1); + if (toptab == nil) + return; + d = toptab; + strncpy(d->name, "ctrdesc", KNAMELEN); + mkqid(&d->qid, Qdesc, 0, 0); + d->perm = 0440; + +} + +static int +corefilesinit(void) +{ + int i, nc, newn; + Dirtab *d; + Mach *mp; + + nc = 0; + lock(&toptablck); + for(i = 0; i < MACHMAX; i++) { + if((mp = sys->machptr[i]) != nil && mp->online != 0){ + d = &toptab[nc + 1]; + /* if you take them out, be careful in pmcgen too */ + if(d->name[0] != '\0'){ + if(PMCQID(i, Qcore) == d->qid.path){ + nc++; + continue; + }else{ + /* a new one appeared, make space, should almost never happen */ + memmove(d + 1, d, (MACHMAX - i)*sizeof(*d)); + memset(d, 0, sizeof(*d)); + } + } + snprint(d->name, KNAMELEN, "core%4.4ud", i); + mkqid(&d->qid, PMCQID(i, Qcore), 0, 0); + d->perm = 0660; + nc++; + } + } + newn = 1 + nc; + ntoptab = newn; + unlock(&toptablck); + return newn; + +} + + +static void +pmcinit(void) +{ + pmcconfigure(); + topdirinit(); + corefilesinit(); +} + +static Chan * +pmcattach(char *spec) +{ + corefilesinit(); + return devattach(L'ε', spec); +} + +int +pmcgen(Chan *c, char *, Dirtab*, int, int s, Dir *dp) +{ + int ntab; + Dirtab *d; + + ntab = corefilesinit(); + if(s == DEVDOTDOT){ + devdir(c, (Qid){Qdir, 0, QTDIR}, "#ε", 0, eve, 0555, dp); + c->aux = nil; + return 1; + } + /* first, for directories, generate children */ + switch((int)PMCTYPE(c->qid.path)){ + case Qdir: + case Qcore: + if(s >= ntab) + return -1; + d = &toptab[s]; + devdir(c, d->qid, d->name, d->length, eve, d->perm, dp); + return 1; + default: + return -1; + } +} + +static Walkqid* +pmcwalk(Chan *c, Chan *nc, char **name, int nname) +{ + if(PMCTYPE(c->qid.path) == Qcore) + c->aux = (void *)PMCID(c->qid.path); /* core no */ + return devwalk(c, nc, name, nname, nil, 0, pmcgen); +} + +static long +pmcstat(Chan *c, uchar *dp, long n) +{ + return devstat(c, dp, n, nil, 0, pmcgen); +} + +static Chan* +pmcopen(Chan *c, int omode) +{ + if (!iseve()) + error(Eperm); + return devopen(c, omode, nil, 0, pmcgen); +} + +static void +pmcclose(Chan *) +{ +} + +static int +pmcctlstr(char *str, int nstr, PmcCtl *p, vlong v) +{ + int ns; + + ns = 0; + ns += snprint(str + ns, nstr - ns, "%#ullx ", v); + if (p->enab && p->enab != PmcCtlNullval) + ns += snprint(str + ns, nstr - ns, "on "); + else + ns += snprint(str + ns, nstr - ns, "off "); + + if (p->user && p->user != PmcCtlNullval) + ns += snprint(str + ns, nstr - ns, "user "); + else + ns += snprint(str + ns, nstr - ns, "nouser "); + + if (p->os && p->user != PmcCtlNullval) + ns += snprint(str + ns, nstr - ns, "os "); + else + ns += snprint(str + ns, nstr - ns, "noos "); + + /* TODO, inverse pmctrans? */ + if(!p->nodesc) + ns += snprint(str + ns, nstr - ns, "%s", p->descstr); + else + ns += snprint(str + ns, nstr - ns, "no desc"); + ns += snprint(str + ns, nstr - ns, "\n"); + return ns; +} + + +/* this should be safe to use even if there is no core anymore */ +static long +pmcread(Chan *c, void *a, long n, vlong offset) +{ + ulong type; + PmcCtl p; + char *s; + u64int v; + u64int coreno; + int nr, i, ns, nn; + + type = PMCTYPE(c->qid.path); + coreno = PMCID(c->qid.path); + + if(type == Qdir) + return devdirread(c, a, n, nil, 0, pmcgen); + s = malloc(PmcCtlRdStr); + if(waserror()){ + free(s); + nexterror(); + } + + p.coreno = coreno; + nr = pmcnregs(); + switch(type){ + case Qcore: + ns = 0; + for(i = 0; i < nr; i ++){ + if (pmcgetctl(coreno, &p, i) < 0) + error("bad ctr"); + if(! p.enab) + continue; + v = pmcgetctr(coreno, i); + ns += snprint(s + ns, PmcCtlRdStr - ns, "%2.2ud ", i); + nn = pmcctlstr(s + ns, PmcCtlRdStr - ns, &p, v); + if (n < 0) + error("bad pmc"); + ns += nn; + } + break; + case Qdesc: + if (pmcdescstr(s, PmcCtlRdStr) < 0) + error("bad pmc"); + break; + default: + error(Eperm); + } + n = readstr(offset, a, n, s); + free(s); + poperror(); + return n; +} + +static int +isset(char *str) +{ + return strncmp(str, "-", 2) != 0; +} + +static int +pickregno(int coreno) +{ + PmcCtl p; + int nr, i; + + nr = pmcnregs(); + for(i = 0; i < nr; i++){ + if (pmcgetctl(coreno, &p, i) || p.enab) + continue; + return i; + } + + return -1; +} + +static int +fillctl(PmcCtl *p, Cmdbuf *cb, int start, int end) +{ + int i; + + if(end > cb->nf -1) + end = cb->nf -1; + for(i = start; i <= end; i++){ + if(pmcdebug != 0) + print("setting field %d to %s\n", i, cb->f[i]); + if(!isset(cb->f[i])) + continue; + else if(strcmp("on", cb->f[i]) == 0) + p->enab = 1; + else if(strcmp("off", cb->f[i]) == 0) + p->enab = 0; + else if(strcmp("user", cb->f[i]) == 0) + p->user = 1; + else if(strcmp("os", cb->f[i]) == 0) + p->os = 1; + else if(strcmp("nouser", cb->f[i]) == 0) + p->user = 0; + else if(strcmp("noos", cb->f[i]) == 0) + p->os = 0; + else + error("bad ctl"); + } + return 0; +} + +/* this should be safe to use even if there is no core anymore */ +static long +pmcwrite(Chan *c, void *a, long n, vlong) +{ + Cmdbuf *cb; + u64int coreno; + int regno, i, ns; + PmcCtl p; + char *s; + + if (c->qid.type == QTDIR) + error(Eperm); + if (c->qid.path == Qdesc) + error(Eperm); + + coreno = PMCID(c->qid.path);; + p.coreno = coreno; + + /* TODO, multiple lines? */ + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + if(cb->nf < 1) + error("short ctl"); + if(strcmp("debug", cb->f[0]) == 0) + pmcdebug = ~pmcdebug; + else{ + if(cb->nf < 2) + error("short ctl"); + if(!isset(cb->f[0])){ + /* racy, it does not reserve the core */ + regno = pickregno(coreno); + if(regno < 0) + error("no free regno"); + if(pmcdebug != 0) + print("picked regno %d\n", regno); + }else{ + regno = strtoull(cb->f[0], 0, 0); + if(regno > pmcnregs()) + error("ctr number too big"); + if(pmcdebug != 0) + print("setting regno %d\n", regno); + } + if(isset(cb->f[1])) + pmcsetctr(coreno, strtoull(cb->f[1], 0, 0), regno); + + pmcinitctl(&p); + fillctl(&p, cb, 2, 4); + ns = 0; + s = p.descstr; + s[0] = '\0'; + for(i = 5; i < cb->nf; i++){ + if(!isset(cb->f[i])) + continue; + ns += snprint(s + ns, KNAMELEN - ns, "%s ", cb->f[i]); + p.nodesc = 0; + } + if(pmcdebug != 0) + print("setting desc to %s\n", p.descstr); + pmcsetctl(coreno, &p, regno); + } + free(cb); + poperror(); + + + return n; +} + + +Dev pmcdevtab = { + L'ε', + "pmc", + + pmcinit, + devinit, + devshutdown, + pmcattach, + pmcwalk, + pmcstat, + pmcopen, + devcreate, + pmcclose, + pmcread, + devbread, + pmcwrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru /sys/src/9k/386/devrtc.c /sys/src/9k/386/devrtc.c --- /sys/src/9k/386/devrtc.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/devrtc.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,451 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +/* + * real time clock and non-volatile ram + */ + +enum { + Addr= 0x70, /* address port */ + Data= 0x71, /* data port */ + + Seconds= 0x00, + Minutes= 0x02, + Hours= 0x04, + Mday= 0x07, + Month= 0x08, + Year= 0x09, + Status= 0x0A, + + Nvoff= 128, /* where usable nvram lives */ + Nvsize= 256, + + Nbcd= 6, +}; + +typedef struct Rtc Rtc; +struct Rtc +{ + int sec; + int min; + int hour; + int mday; + int mon; + int year; +}; + + +enum{ + Qdir = 0, + Qrtc, + Qnvram, +}; + +Dirtab rtcdir[]={ + ".", {Qdir, 0, QTDIR}, 0, 0555, + "nvram", {Qnvram, 0}, Nvsize, 0600, + "rtc", {Qrtc, 0}, 0, 0664, +}; + +static ulong rtc2sec(Rtc*); +static void sec2rtc(ulong, Rtc*); + +void +rtcinit(void) +{ + if(ioalloc(Addr, 2, 0, "rtc/nvr") < 0) + panic("rtcinit: ioalloc failed"); +} + +static Chan* +rtcattach(char* spec) +{ + return devattach('r', spec); +} + +static Walkqid* +rtcwalk(Chan* c, Chan *nc, char** name, int nname) +{ + return devwalk(c, nc, name, nname, rtcdir, nelem(rtcdir), devgen); +} + +static long +rtcstat(Chan* c, uchar* dp, long n) +{ + return devstat(c, dp, n, rtcdir, nelem(rtcdir), devgen); +} + +static Chan* +rtcopen(Chan* c, int omode) +{ + return devopen(c, openmode(omode), rtcdir, nelem(rtcdir), devgen); +} + +static void +rtcclose(Chan*) +{ +} + +#define GETBCD(o) ((bcdclock[o]&0xf) + 10*(bcdclock[o]>>4)) + +static long +rtcextract(void) +{ + uchar bcdclock[Nbcd]; + Rtc rtc; + int i; + + /* don't do the read until the clock is no longer busy */ + for(i = 0; i < 10000; i++){ + outb(Addr, Status); + if(inb(Data) & 0x80) + continue; + + /* read clock values */ + outb(Addr, Seconds); bcdclock[0] = inb(Data); + outb(Addr, Minutes); bcdclock[1] = inb(Data); + outb(Addr, Hours); bcdclock[2] = inb(Data); + outb(Addr, Mday); bcdclock[3] = inb(Data); + outb(Addr, Month); bcdclock[4] = inb(Data); + outb(Addr, Year); bcdclock[5] = inb(Data); + + outb(Addr, Status); + if((inb(Data) & 0x80) == 0) + break; + } + + /* + * convert from BCD + */ + rtc.sec = GETBCD(0); + rtc.min = GETBCD(1); + rtc.hour = GETBCD(2); + rtc.mday = GETBCD(3); + rtc.mon = GETBCD(4); + rtc.year = GETBCD(5); + + /* + * the world starts jan 1 1970 + */ + if(rtc.year < 70) + rtc.year += 2000; + else + rtc.year += 1900; + return rtc2sec(&rtc); +} + +static Lock nvrtlock; + +long +rtctime(void) +{ + int i; + long t, ot; + + ilock(&nvrtlock); + + /* loop till we get two reads in a row the same */ + t = rtcextract(); + for(i = 0; i < 100; i++){ + ot = t; + t = rtcextract(); + if(t == ot) + break; + } + iunlock(&nvrtlock); + + if(i == 100) print("we are boofheads\n"); + + return t; +} + +static long +rtcread(Chan* c, void* buf, long n, vlong off) +{ + ulong t; + char *a, *start; + ulong offset = off; + + if(c->qid.type & QTDIR) + return devdirread(c, buf, n, rtcdir, nelem(rtcdir), devgen); + + switch((ulong)c->qid.path){ + case Qrtc: + t = rtctime(); + n = readnum(offset, buf, n, t, 12); + return n; + case Qnvram: + if(n == 0) + return 0; + if(n > Nvsize) + n = Nvsize; + a = start = smalloc(n); + + ilock(&nvrtlock); + for(t = offset; t < offset + n; t++){ + if(t >= Nvsize) + break; + outb(Addr, Nvoff+t); + *a++ = inb(Data); + } + iunlock(&nvrtlock); + + if(waserror()){ + free(start); + nexterror(); + } + memmove(buf, start, t - offset); + poperror(); + + free(start); + return t - offset; + } + error(Ebadarg); + return 0; +} + +#define PUTBCD(n,o) bcdclock[o] = (n % 10) | (((n / 10) % 10)<<4) + +static long +rtcwrite(Chan* c, void* buf, long n, vlong off) +{ + int t; + char *a, *start; + Rtc rtc; + ulong secs; + uchar bcdclock[Nbcd]; + char *cp, *ep; + ulong offset = off; + + if(offset!=0) + error(Ebadarg); + + + switch((ulong)c->qid.path){ + case Qrtc: + /* + * read the time + */ + cp = ep = buf; + ep += n; + while(cp < ep){ + if(*cp>='0' && *cp<='9') + break; + cp++; + } + secs = strtoul(cp, 0, 0); + + /* + * convert to bcd + */ + sec2rtc(secs, &rtc); + PUTBCD(rtc.sec, 0); + PUTBCD(rtc.min, 1); + PUTBCD(rtc.hour, 2); + PUTBCD(rtc.mday, 3); + PUTBCD(rtc.mon, 4); + PUTBCD(rtc.year, 5); + + /* + * write the clock + */ + ilock(&nvrtlock); + outb(Addr, Seconds); outb(Data, bcdclock[0]); + outb(Addr, Minutes); outb(Data, bcdclock[1]); + outb(Addr, Hours); outb(Data, bcdclock[2]); + outb(Addr, Mday); outb(Data, bcdclock[3]); + outb(Addr, Month); outb(Data, bcdclock[4]); + outb(Addr, Year); outb(Data, bcdclock[5]); + iunlock(&nvrtlock); + return n; + case Qnvram: + if(n == 0) + return 0; + if(n > Nvsize) + n = Nvsize; + + start = a = smalloc(n); + if(waserror()){ + free(start); + nexterror(); + } + memmove(a, buf, n); + poperror(); + + ilock(&nvrtlock); + for(t = offset; t < offset + n; t++){ + if(t >= Nvsize) + break; + outb(Addr, Nvoff+t); + outb(Data, *a++); + } + iunlock(&nvrtlock); + + free(start); + return t - offset; + } + error(Ebadarg); + return 0; +} + +Dev rtcdevtab = { + 'r', + "rtc", + + devreset, + rtcinit, + devshutdown, + rtcattach, + rtcwalk, + rtcstat, + rtcopen, + devcreate, + rtcclose, + rtcread, + devbread, + rtcwrite, + devbwrite, + devremove, + devwstat, +}; + +#define SEC2MIN 60L +#define SEC2HOUR (60L*SEC2MIN) +#define SEC2DAY (24L*SEC2HOUR) + +/* + * days per month plus days/year + */ +static int dmsize[] = +{ + 365, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 +}; +static int ldmsize[] = +{ + 366, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 +}; + +/* + * return the days/month for the given year + */ +static int* +yrsize(int y) +{ + if((y%4) == 0 && ((y%100) != 0 || (y%400) == 0)) + return ldmsize; + else + return dmsize; +} + +/* + * compute seconds since Jan 1 1970 + */ +static ulong +rtc2sec(Rtc *rtc) +{ + ulong secs; + int i; + int *d2m; + + secs = 0; + + /* + * seconds per year + */ + for(i = 1970; i < rtc->year; i++){ + d2m = yrsize(i); + secs += d2m[0] * SEC2DAY; + } + + /* + * seconds per month + */ + d2m = yrsize(rtc->year); + for(i = 1; i < rtc->mon; i++) + secs += d2m[i] * SEC2DAY; + + secs += (rtc->mday-1) * SEC2DAY; + secs += rtc->hour * SEC2HOUR; + secs += rtc->min * SEC2MIN; + secs += rtc->sec; + + return secs; +} + +/* + * compute rtc from seconds since Jan 1 1970 + */ +static void +sec2rtc(ulong secs, Rtc *rtc) +{ + int d; + long hms, day; + int *d2m; + + /* + * break initial number into days + */ + hms = secs % SEC2DAY; + day = secs / SEC2DAY; + if(hms < 0) { + hms += SEC2DAY; + day -= 1; + } + + /* + * generate hours:minutes:seconds + */ + rtc->sec = hms % 60; + d = hms / 60; + rtc->min = d % 60; + d /= 60; + rtc->hour = d; + + /* + * year number + */ + if(day >= 0) + for(d = 1970; day >= *yrsize(d); d++) + day -= *yrsize(d); + else + for (d = 1970; day < 0; d--) + day += *yrsize(d-1); + rtc->year = d; + + /* + * generate month + */ + d2m = yrsize(rtc->year); + for(d = 1; day >= d2m[d]; d++) + day -= d2m[d]; + rtc->mday = day + 1; + rtc->mon = d; + + return; +} + +uchar +nvramread(int addr) +{ + uchar data; + + ilock(&nvrtlock); + outb(Addr, addr); + data = inb(Data); + iunlock(&nvrtlock); + + return data; +} + +void +nvramwrite(int addr, uchar data) +{ + ilock(&nvrtlock); + outb(Addr, addr); + outb(Data, data); + iunlock(&nvrtlock); +} diff -Nru /sys/src/9k/386/ether8139.c /sys/src/9k/386/ether8139.c --- /sys/src/9k/386/ether8139.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/ether8139.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,880 @@ +/* + * Realtek 8139 (but not the 8129). + * Error recovery for the various over/under -flow conditions + * may need work. + * The device can support only 32-bit physical addresses. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/netif.h" + +#include "etherif.h" + +enum { /* registers */ + Idr0 = 0x0000, /* MAC address */ + Mar0 = 0x0008, /* Multicast address */ + Tsd0 = 0x0010, /* Transmit Status Descriptor0 */ + Tsad0 = 0x0020, /* Transmit Start Address Descriptor0 */ + Rbstart = 0x0030, /* Receive Buffer Start Address */ + Erbcr = 0x0034, /* Early Receive Byte Count */ + Ersr = 0x0036, /* Early Receive Status */ + Cr = 0x0037, /* Command Register */ + Capr = 0x0038, /* Current Address of Packet Read */ + Cbr = 0x003A, /* Current Buffer Address */ + Imr = 0x003C, /* Interrupt Mask */ + Isr = 0x003E, /* Interrupt Status */ + Tcr = 0x0040, /* Transmit Configuration */ + Rcr = 0x0044, /* Receive Configuration */ + Tctr = 0x0048, /* Timer Count */ + Mpc = 0x004C, /* Missed Packet Counter */ + Cr9346 = 0x0050, /* 9346 Command Register */ + Config0 = 0x0051, /* Configuration Register 0 */ + Config1 = 0x0052, /* Configuration Register 1 */ + TimerInt = 0x0054, /* Timer Interrupt */ + Msr = 0x0058, /* Media Status */ + Config3 = 0x0059, /* Configuration Register 3 */ + Config4 = 0x005A, /* Configuration Register 4 */ + Mulint = 0x005C, /* Multiple Interrupt Select */ + RerID = 0x005E, /* PCI Revision ID */ + Tsad = 0x0060, /* Transmit Status of all Descriptors */ + + Bmcr = 0x0062, /* Basic Mode Control */ + Bmsr = 0x0064, /* Basic Mode Status */ + Anar = 0x0066, /* Auto-Negotiation Advertisment */ + Anlpar = 0x0068, /* Auto-Negotiation Link Partner */ + Aner = 0x006A, /* Auto-Negotiation Expansion */ + Dis = 0x006C, /* Disconnect Counter */ + Fcsc = 0x006E, /* False Carrier Sense Counter */ + Nwaytr = 0x0070, /* N-way Test */ + Rec = 0x0072, /* RX_ER Counter */ + Cscr = 0x0074, /* CS Configuration */ + Phy1parm = 0x0078, /* PHY Parameter 1 */ + Twparm = 0x007C, /* Twister Parameter */ + Phy2parm = 0x0080, /* PHY Parameter 2 */ +}; + +enum { /* Cr */ + Bufe = 0x01, /* Rx Buffer Empty */ + Te = 0x04, /* Transmitter Enable */ + Re = 0x08, /* Receiver Enable */ + Rst = 0x10, /* Software Reset */ +}; + +enum { /* Imr/Isr */ + Rok = 0x0001, /* Receive OK */ + Rer = 0x0002, /* Receive Error */ + Tok = 0x0004, /* Transmit OK */ + Ter = 0x0008, /* Transmit Error */ + Rxovw = 0x0010, /* Receive Buffer Overflow */ + PunLc = 0x0020, /* Packet Underrun or Link Change */ + Fovw = 0x0040, /* Receive FIFO Overflow */ + Clc = 0x2000, /* Cable Length Change */ + Timerbit = 0x4000, /* Timer */ + Serr = 0x8000, /* System Error */ +}; + +enum { /* Tcr */ + Clrabt = 0x00000001, /* Clear Abort */ + TxrrSHIFT = 4, /* Transmit Retry Count */ + TxrrMASK = 0x000000F0, + MtxdmaSHIFT = 8, /* Max. DMA Burst Size */ + MtxdmaMASK = 0x00000700, + Mtxdma2048 = 0x00000700, + Acrc = 0x00010000, /* Append CRC (not) */ + LbkSHIFT = 17, /* Loopback Test */ + LbkMASK = 0x00060000, + Rtl8139ArevG = 0x00800000, /* RTL8139A Rev. G ID */ + IfgSHIFT = 24, /* Interframe Gap */ + IfgMASK = 0x03000000, + HwveridSHIFT = 26, /* Hardware Version ID */ + HwveridMASK = 0x7C000000, +}; + +enum { /* Rcr */ + Aap = 0x00000001, /* Accept All Packets */ + Apm = 0x00000002, /* Accept Physical Match */ + Am = 0x00000004, /* Accept Multicast */ + Ab = 0x00000008, /* Accept Broadcast */ + Ar = 0x00000010, /* Accept Runt */ + Aer = 0x00000020, /* Accept Error */ + Sel9356 = 0x00000040, /* 9356 EEPROM used */ + Wrap = 0x00000080, /* Rx Buffer Wrap Control */ + MrxdmaSHIFT = 8, /* Max. DMA Burst Size */ + MrxdmaMASK = 0x00000700, + Mrxdmaunlimited = 0x00000700, + RblenSHIFT = 11, /* Receive Buffer Length */ + RblenMASK = 0x00001800, + Rblen8K = 0x00000000, /* 8KB+16 */ + Rblen16K = 0x00000800, /* 16KB+16 */ + Rblen32K = 0x00001000, /* 32KB+16 */ + Rblen64K = 0x00001800, /* 64KB+16 */ + RxfthSHIFT = 13, /* Receive Buffer Length */ + RxfthMASK = 0x0000E000, + Rxfth256 = 0x00008000, + Rxfthnone = 0x0000E000, + Rer8 = 0x00010000, /* Accept Error Packets > 8 bytes */ + MulERINT = 0x00020000, /* Multiple Early Interrupt Select */ + ErxthSHIFT = 24, /* Early Rx Threshold */ + ErxthMASK = 0x0F000000, + Erxthnone = 0x00000000, +}; + +enum { /* Received Packet Status */ + Rcok = 0x0001, /* Receive Completed OK */ + Fae = 0x0002, /* Frame Alignment Error */ + Crc = 0x0004, /* CRC Error */ + Long = 0x0008, /* Long Packet */ + Runt = 0x0010, /* Runt Packet Received */ + Ise = 0x0020, /* Invalid Symbol Error */ + Bar = 0x2000, /* Broadcast Address Received */ + Pam = 0x4000, /* Physical Address Matched */ + Mar = 0x8000, /* Multicast Address Received */ +}; + +enum { /* Media Status Register */ + Rxpf = 0x01, /* Pause Flag */ + Txpf = 0x02, /* Pause Flag */ + Linkb = 0x04, /* Inverse of Link Status */ + Speed10 = 0x08, /* 10Mbps */ + Auxstatus = 0x10, /* Aux. Power Present Status */ + Rxfce = 0x40, /* Receive Flow Control Enable */ + Txfce = 0x80, /* Transmit Flow Control Enable */ +}; + +typedef struct Td Td; +struct Td { /* Soft Transmit Descriptor */ + int tsd; + int tsad; + uchar* data; + Block* bp; +}; + +enum { /* Tsd0 */ + SizeSHIFT = 0, /* Descriptor Size */ + SizeMASK = 0x00001FFF, + Own = 0x00002000, + Tun = 0x00004000, /* Transmit FIFO Underrun */ + Tcok = 0x00008000, /* Transmit COmpleted OK */ + EtxthSHIFT = 16, /* Early Tx Threshold */ + EtxthMASK = 0x001F0000, + NccSHIFT = 24, /* Number of Collisions Count */ + NccMASK = 0x0F000000, + Cdh = 0x10000000, /* CD Heartbeat */ + Owc = 0x20000000, /* Out of Window Collision */ + Tabt = 0x40000000, /* Transmit Abort */ + Crs = 0x80000000, /* Carrier Sense Lost */ +}; + +enum { + Rblen = Rblen64K, /* Receive Buffer Length */ + Ntd = 4, /* Number of Transmit Descriptors */ + Tdbsz = ROUNDUP(sizeof(Etherpkt), 4), +}; + +typedef struct Ctlr Ctlr; +typedef struct Ctlr { + int port; + Pcidev* pcidev; + Ctlr* next; + int active; + int id; + + QLock alock; /* attach */ + Lock ilock; /* init */ + void* alloc; /* base of per-Ctlr allocated data */ + + int pcie; /* flag: pci-express device? */ + + uvlong mchash; /* multicast hash */ + + int rcr; /* receive configuration register */ + uchar* rbstart; /* receive buffer */ + int rblen; /* receive buffer length */ + int ierrs; /* receive errors */ + + Lock tlock; /* transmit */ + Td td[Ntd]; + int ntd; /* descriptors active */ + int tdh; /* host index into td */ + int tdi; /* interface index into td */ + int etxth; /* early transmit threshold */ + int taligned; /* packet required no alignment */ + int tunaligned; /* packet required alignment */ + + int dis; /* disconnect counter */ + int fcsc; /* false carrier sense counter */ + int rec; /* RX_ER counter */ + uint mcast; +} Ctlr; + +static Ctlr* ctlrhead; +static Ctlr* ctlrtail; + +#define csr8r(c, r) (inb((c)->port+(r))) +#define csr16r(c, r) (ins((c)->port+(r))) +#define csr32r(c, r) (inl((c)->port+(r))) +#define csr8w(c, r, b) (outb((c)->port+(r), (int)(b))) +#define csr16w(c, r, w) (outs((c)->port+(r), (ushort)(w))) +#define csr32w(c, r, l) (outl((c)->port+(r), (ulong)(l))) + +static void +rtl8139promiscuous(void* arg, int on) +{ + Ether *edev; + Ctlr * ctlr; + + edev = arg; + ctlr = edev->ctlr; + ilock(&ctlr->ilock); + + if(on) + ctlr->rcr |= Aap; + else + ctlr->rcr &= ~Aap; + csr32w(ctlr, Rcr, ctlr->rcr); + iunlock(&ctlr->ilock); +} + +enum { + /* everyone else uses 0x04c11db7, but they both produce the same crc */ + Etherpolybe = 0x04c11db6, + Bytemask = (1<<8) - 1, +}; + +static ulong +ethercrcbe(uchar *addr, long len) +{ + int i, j; + ulong c, crc, carry; + + crc = ~0UL; + for (i = 0; i < len; i++) { + c = addr[i]; + for (j = 0; j < 8; j++) { + carry = ((crc & (1UL << 31))? 1: 0) ^ (c & 1); + crc <<= 1; + c >>= 1; + if (carry) + crc = (crc ^ Etherpolybe) | carry; + } + } + return crc; +} + +static ulong +swabl(ulong l) +{ + return l>>24 | (l>>8) & (Bytemask<<8) | + (l<<8) & (Bytemask<<16) | l<<24; +} + +static void +rtl8139multicast(void* ether, uchar *eaddr, int add) +{ + Ether *edev; + Ctlr *ctlr; + + if (!add) + return; /* ok to keep receiving on old mcast addrs */ + + edev = ether; + ctlr = edev->ctlr; + ilock(&ctlr->ilock); + + ctlr->mchash |= 1ULL << (ethercrcbe(eaddr, Eaddrlen) >> 26); + + ctlr->rcr |= Am; + csr32w(ctlr, Rcr, ctlr->rcr); + + /* pci-e variants reverse the order of the hash byte registers */ + if (0 && ctlr->pcie) { + csr32w(ctlr, Mar0, swabl(ctlr->mchash>>32)); + csr32w(ctlr, Mar0+4, swabl(ctlr->mchash)); + } else { + csr32w(ctlr, Mar0, ctlr->mchash); + csr32w(ctlr, Mar0+4, ctlr->mchash>>32); + } + + iunlock(&ctlr->ilock); +} + +static long +rtl8139ifstat(Ether* edev, void* a, long n, ulong offset) +{ + int l; + char *p; + Ctlr *ctlr; + + ctlr = edev->ctlr; + p = malloc(READSTR); + if(p == nil) + error(Enomem); + l = snprint(p, READSTR, "rcr %#8.8ux\n", ctlr->rcr); + l += snprint(p+l, READSTR-l, "multicast %ud\n", ctlr->mcast); + l += snprint(p+l, READSTR-l, "ierrs %d\n", ctlr->ierrs); + l += snprint(p+l, READSTR-l, "etxth %d\n", ctlr->etxth); + l += snprint(p+l, READSTR-l, "taligned %d\n", ctlr->taligned); + l += snprint(p+l, READSTR-l, "tunaligned %d\n", ctlr->tunaligned); + ctlr->dis += csr16r(ctlr, Dis); + l += snprint(p+l, READSTR-l, "dis %d\n", ctlr->dis); + ctlr->fcsc += csr16r(ctlr, Fcsc); + l += snprint(p+l, READSTR-l, "fcscnt %d\n", ctlr->fcsc); + ctlr->rec += csr16r(ctlr, Rec); + l += snprint(p+l, READSTR-l, "rec %d\n", ctlr->rec); + + l += snprint(p+l, READSTR-l, "Tcr %#8.8lux\n", csr32r(ctlr, Tcr)); + l += snprint(p+l, READSTR-l, "Config0 %#2.2ux\n", csr8r(ctlr, Config0)); + l += snprint(p+l, READSTR-l, "Config1 %#2.2ux\n", csr8r(ctlr, Config1)); + l += snprint(p+l, READSTR-l, "Msr %#2.2ux\n", csr8r(ctlr, Msr)); + l += snprint(p+l, READSTR-l, "Config3 %#2.2ux\n", csr8r(ctlr, Config3)); + l += snprint(p+l, READSTR-l, "Config4 %#2.2ux\n", csr8r(ctlr, Config4)); + + l += snprint(p+l, READSTR-l, "Bmcr %#4.4ux\n", csr16r(ctlr, Bmcr)); + l += snprint(p+l, READSTR-l, "Bmsr %#4.4ux\n", csr16r(ctlr, Bmsr)); + l += snprint(p+l, READSTR-l, "Anar %#4.4ux\n", csr16r(ctlr, Anar)); + l += snprint(p+l, READSTR-l, "Anlpar %#4.4ux\n", csr16r(ctlr, Anlpar)); + l += snprint(p+l, READSTR-l, "Aner %#4.4ux\n", csr16r(ctlr, Aner)); + l += snprint(p+l, READSTR-l, "Nwaytr %#4.4ux\n", csr16r(ctlr, Nwaytr)); + snprint(p+l, READSTR-l, "Cscr %#4.4ux\n", csr16r(ctlr, Cscr)); + n = readstr(offset, a, n, p); + free(p); + + return n; +} + +static int +rtl8139reset(Ctlr* ctlr) +{ + int timeo; + + /* stop interrupts */ + csr16w(ctlr, Imr, 0); + csr16w(ctlr, Isr, ~0); + csr32w(ctlr, TimerInt, 0); + + /* + * Soft reset the controller. + */ + csr8w(ctlr, Cr, Rst); + for(timeo = 0; timeo < 1000; timeo++){ + if(!(csr8r(ctlr, Cr) & Rst)) + return 0; + delay(1); + } + + return -1; +} + +static void +rtl8139halt(Ctlr* ctlr) +{ + int i; + + csr8w(ctlr, Cr, 0); + csr16w(ctlr, Imr, 0); + csr16w(ctlr, Isr, ~0); + csr32w(ctlr, TimerInt, 0); + + for(i = 0; i < Ntd; i++){ + if(ctlr->td[i].bp == nil) + continue; + freeb(ctlr->td[i].bp); + ctlr->td[i].bp = nil; + } +} + +static void +rtl8139shutdown(Ether *edev) +{ + Ctlr *ctlr; + + ctlr = edev->ctlr; + ilock(&ctlr->ilock); + rtl8139halt(ctlr); + rtl8139reset(ctlr); + iunlock(&ctlr->ilock); +} + +static void +rtl8139init(Ether* edev) +{ + int i; + ulong r; + Ctlr *ctlr; + uchar *alloc; + + ctlr = edev->ctlr; + ilock(&ctlr->ilock); + + rtl8139halt(ctlr); + + /* + * MAC Address. + */ + r = (edev->ea[3]<<24)|(edev->ea[2]<<16)|(edev->ea[1]<<8)|edev->ea[0]; + csr32w(ctlr, Idr0, r); + r = (edev->ea[5]<<8)|edev->ea[4]; + csr32w(ctlr, Idr0+4, r); + + /* + * Receiver + */ + alloc = (uchar*)ROUNDUP((uintptr)ctlr->alloc, 32); + ctlr->rbstart = alloc; + alloc += ctlr->rblen+16; + memset(ctlr->rbstart, 0, ctlr->rblen+16); + csr32w(ctlr, Rbstart, PCIWADDR32(ctlr->rbstart)); + ctlr->rcr = Rxfth256|Rblen|Mrxdmaunlimited|Ab|Am|Apm; + + /* + * Transmitter. + */ + for(i = 0; i < Ntd; i++){ + ctlr->td[i].tsd = Tsd0+i*4; + ctlr->td[i].tsad = Tsad0+i*4; + ctlr->td[i].data = alloc; + alloc += Tdbsz; + ctlr->td[i].bp = nil; + } + ctlr->ntd = ctlr->tdh = ctlr->tdi = 0; + ctlr->etxth = 128/32; + + /* + * Enable receiver/transmitter. + * Need to enable before writing the Rcr or it won't take. + */ + csr8w(ctlr, Cr, Te|Re); + csr32w(ctlr, Tcr, Mtxdma2048); + csr32w(ctlr, Rcr, ctlr->rcr); + csr32w(ctlr, Mar0, 0); + csr32w(ctlr, Mar0+4, 0); + ctlr->mchash = 0; + + /* + * Interrupts. + */ + csr32w(ctlr, TimerInt, 0); + csr16w(ctlr, Imr, Serr|Timerbit|Fovw|PunLc|Rxovw|Ter|Tok|Rer|Rok); + csr32w(ctlr, Mpc, 0); + + iunlock(&ctlr->ilock); +} + +static void +rtl8139attach(Ether* edev) +{ + Ctlr *ctlr; + + if(edev == nil) { + print("rtl8139attach: nil edev\n"); + return; + } + ctlr = edev->ctlr; + if(ctlr == nil) { + print("rtl8139attach: nil ctlr for Ether %#p\n", edev); + return; + } + qlock(&ctlr->alock); + if(ctlr->alloc == nil){ + ctlr->rblen = 1<<((Rblen>>RblenSHIFT)+13); + ctlr->alloc = malloc(ctlr->rblen+16 + Ntd*Tdbsz + 32); + if(ctlr->alloc == nil) { + qunlock(&ctlr->alock); + error(Enomem); + } + rtl8139init(edev); + } + qunlock(&ctlr->alock); +} + +static void +rtl8139txstart(Ether* edev) +{ + Td *td; + int size; + Block *bp; + Ctlr *ctlr; + + ctlr = edev->ctlr; + while(ctlr->ntd < Ntd){ + bp = qget(edev->oq); + if(bp == nil) + break; + size = BLEN(bp); + + td = &ctlr->td[ctlr->tdh]; + if(((uintptr)bp->rp) & 0x03){ + memmove(td->data, bp->rp, size); + freeb(bp); + csr32w(ctlr, td->tsad, PCIWADDR32(td->data)); + ctlr->tunaligned++; + } + else{ + td->bp = bp; + csr32w(ctlr, td->tsad, PCIWADDR32(bp->rp)); + ctlr->taligned++; + } + csr32w(ctlr, td->tsd, (ctlr->etxth<ntd++; + ctlr->tdh = NEXT(ctlr->tdh, Ntd); + } +} + +static void +rtl8139transmit(Ether* edev) +{ + Ctlr *ctlr; + + ctlr = edev->ctlr; + ilock(&ctlr->tlock); + rtl8139txstart(edev); + iunlock(&ctlr->tlock); +} + +static void +rtl8139receive(Ether* edev) +{ + Block *bp; + Ctlr *ctlr; + ushort capr; + uchar cr, *p; + int l, length, status; + + ctlr = edev->ctlr; + + /* + * Capr is where the host is reading from, + * Cbr is where the NIC is currently writing. + */ + if(ctlr->rblen == 0) + return; /* not attached yet (shouldn't happen) */ + capr = (csr16r(ctlr, Capr)+16) % ctlr->rblen; + while(!(csr8r(ctlr, Cr) & Bufe)){ + p = ctlr->rbstart+capr; + + /* + * Apparently the packet length may be 0xFFF0 if + * the NIC is still copying the packet into memory. + */ + length = (*(p+3)<<8)|*(p+2); + if(length == 0xFFF0) + break; + status = (*(p+1)<<8)|*p; + + if(!(status & Rcok)){ + if(status & (Ise|Fae)) + edev->frames++; + if(status & Crc) + edev->crcs++; + if(status & (Runt|Long)) + edev->buffs++; + + /* + * Reset the receiver. + * Also may have to restore the multicast list + * here too if it ever gets used. + */ + cr = csr8r(ctlr, Cr); + csr8w(ctlr, Cr, cr & ~Re); + csr32w(ctlr, Rbstart, PCIWADDR32(ctlr->rbstart)); + csr8w(ctlr, Cr, cr); + csr32w(ctlr, Rcr, ctlr->rcr); + + continue; + } + + /* + * Receive Completed OK. + * Very simplistic; there are ways this could be done + * without copying, but the juice probably isn't worth + * the squeeze. + * The packet length includes a 4 byte CRC on the end. + */ + capr = (capr+4) % ctlr->rblen; + p = ctlr->rbstart+capr; + capr = (capr+length) % ctlr->rblen; + if(status & Mar) + ctlr->mcast++; + + if((bp = iallocb(length)) != nil){ + if(p+length >= ctlr->rbstart+ctlr->rblen){ + l = ctlr->rbstart+ctlr->rblen - p; + memmove(bp->wp, p, l); + bp->wp += l; + length -= l; + p = ctlr->rbstart; + } + if(length > 0){ + memmove(bp->wp, p, length); + bp->wp += length; + } + bp->wp -= 4; + etheriq(edev, bp, 1); + } + + capr = ROUNDUP(capr, 4); + csr16w(ctlr, Capr, capr-16); + } +} + +static void +rtl8139interrupt(Ureg*, void* arg) +{ + Td *td; + Ctlr *ctlr; + Ether *edev; + int isr, msr, tsd; + + edev = arg; + ctlr = edev->ctlr; + if(ctlr == nil) { /* not attached yet? (shouldn't happen) */ + print("rtl8139interrupt: interrupt for unattached Ether %#p\n", + edev); + return; + } + + while((isr = csr16r(ctlr, Isr)) != 0){ + csr16w(ctlr, Isr, isr); + if(ctlr->alloc == nil) { + print("rtl8139interrupt: interrupt for unattached Ctlr " + "%#p port %#p\n", ctlr, (void *)ctlr->port); + return; /* not attached yet (shouldn't happen) */ + } + if(isr & (Fovw|PunLc|Rxovw|Rer|Rok)){ + rtl8139receive(edev); + if(!(isr & Rok)) + ctlr->ierrs++; + isr &= ~(Fovw|Rxovw|Rer|Rok); + } + + if(isr & (Ter|Tok)){ + ilock(&ctlr->tlock); + while(ctlr->ntd){ + td = &ctlr->td[ctlr->tdi]; + tsd = csr32r(ctlr, td->tsd); + if(!(tsd & (Tabt|Tun|Tcok))) + break; + + if(!(tsd & Tcok)){ + if(tsd & Tun){ + if(ctlr->etxth < ETHERMAXTU/32) + ctlr->etxth++; + } + edev->oerrs++; + } + + if(td->bp != nil){ + freeb(td->bp); + td->bp = nil; + } + + ctlr->ntd--; + ctlr->tdi = NEXT(ctlr->tdi, Ntd); + } + rtl8139txstart(edev); + iunlock(&ctlr->tlock); + isr &= ~(Ter|Tok); + } + + if(isr & PunLc){ + /* + * Maybe the link changed - do we care very much? + */ + msr = csr8r(ctlr, Msr); + if(!(msr & Linkb)){ + if(!(msr & Speed10) && edev->mbps != 100){ + edev->mbps = 100; + qsetlimit(edev->oq, 256*1024); + } + else if((msr & Speed10) && edev->mbps != 10){ + edev->mbps = 10; + qsetlimit(edev->oq, 65*1024); + } + } + isr &= ~(Clc|PunLc); + } + + /* + * Only Serr|Timerbit should be left by now. + * Should anything be done to tidy up? TimerInt isn't + * used so that can be cleared. A PCI bus error is indicated + * by Serr, that's pretty serious; is there anyhing to do + * other than try to reinitialise the chip? + */ + if((isr & (Serr|Timerbit)) != 0){ + iprint("rtl8139interrupt: imr %#4.4ux isr %#4.4ux\n", + csr16r(ctlr, Imr), isr); + if(isr & Timerbit) + csr32w(ctlr, TimerInt, 0); + if(isr & Serr) + rtl8139init(edev); + } + } +} + +static Ctlr* +rtl8139match(Ether* edev, int id) +{ + Pcidev *p; + Ctlr *ctlr; + int i, port; + + /* + * Any adapter matches if no edev->port is supplied, + * otherwise the ports must match. + */ + for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){ + if(ctlr->active) + continue; + p = ctlr->pcidev; + if(((p->did<<16)|p->vid) != id) + continue; + port = p->mem[0].bar & ~0x01; + if(edev->port != 0 && edev->port != port) + continue; + + if(ioalloc(port, p->mem[0].size, 0, "rtl8139") < 0){ + print("rtl8139: port %#ux in use\n", port); + continue; + } + + if(pcigetpms(p) > 0){ + pcisetpms(p, 0); + + for(i = 0; i < 6; i++) + pcicfgw32(p, PciBAR0+i*4, p->mem[i].bar); + pcicfgw8(p, PciINTL, p->intl); + pcicfgw8(p, PciLTR, p->ltr); + pcicfgw8(p, PciCLS, p->cls); + pcicfgw16(p, PciPCR, p->pcr); + } + + ctlr->port = port; + if(rtl8139reset(ctlr)) { + iofree(port); + continue; + } + pcisetbme(p); + + ctlr->active = 1; + return ctlr; + } + return nil; +} + +static struct { + char* name; + int id; +} rtl8139pci[] = { + { "rtl8139", (0x8139<<16)|0x10EC, }, /* generic */ + { "smc1211", (0x1211<<16)|0x1113, }, /* SMC EZ-Card */ + { "dfe-538tx", (0x1300<<16)|0x1186, }, /* D-Link DFE-538TX */ + { "dfe-560txd", (0x1340<<16)|0x1186, }, /* D-Link DFE-560TXD */ + { nil }, +}; + +static int +rtl8139pnp(Ether* edev) +{ + int i, id; + Pcidev *p; + Ctlr *ctlr; + uchar ea[Eaddrlen]; + + /* + * Make a list of all ethernet controllers + * if not already done. + */ + if(ctlrhead == nil){ + p = nil; + while(p = pcimatch(p, 0, 0)){ + if(p->ccrb != 0x02 || p->ccru != 0) + continue; + ctlr = malloc(sizeof(Ctlr)); + if(ctlr == nil) + error(Enomem); + ctlr->pcidev = p; + ctlr->id = (p->did<<16)|p->vid; + + if(ctlrhead != nil) + ctlrtail->next = ctlr; + else + ctlrhead = ctlr; + ctlrtail = ctlr; + } + } + + /* + * Is it an RTL8139 under a different name? + * Normally a search is made through all the found controllers + * for one which matches any of the known vid+did pairs. + * If a vid+did pair is specified a search is made for that + * specific controller only. + */ + id = 0; + for(i = 0; i < edev->nopt; i++){ + if(cistrncmp(edev->opt[i], "id=", 3) == 0) + id = strtol(&edev->opt[i][3], nil, 0); + } + + ctlr = nil; + if(id != 0) + ctlr = rtl8139match(edev, id); + else for(i = 0; rtl8139pci[i].name; i++){ + if((ctlr = rtl8139match(edev, rtl8139pci[i].id)) != nil) + break; + } + if(ctlr == nil) + return -1; + + edev->ctlr = ctlr; + edev->port = ctlr->port; + edev->irq = ctlr->pcidev->intl; + edev->tbdf = ctlr->pcidev->tbdf; + + /* + * Check if the adapter's station address is to be overridden. + * If not, read it from the device and set in edev->ea. + */ + memset(ea, 0, Eaddrlen); + if(memcmp(ea, edev->ea, Eaddrlen) == 0){ + i = csr32r(ctlr, Idr0); + edev->ea[0] = i; + edev->ea[1] = i>>8; + edev->ea[2] = i>>16; + edev->ea[3] = i>>24; + i = csr32r(ctlr, Idr0+4); + edev->ea[4] = i; + edev->ea[5] = i>>8; + } + + edev->arg = edev; + edev->attach = rtl8139attach; + edev->transmit = rtl8139transmit; + edev->interrupt = rtl8139interrupt; + edev->ifstat = rtl8139ifstat; + + edev->promiscuous = rtl8139promiscuous; + edev->multicast = rtl8139multicast; + edev->shutdown = rtl8139shutdown; + + /* + * This should be much more dynamic but will do for now. + */ + if((csr8r(ctlr, Msr) & (Speed10|Linkb)) == 0) + edev->mbps = 100; + + return 0; +} + +void +ether8139link(void) +{ + addethercard("rtl8139", rtl8139pnp); +} diff -Nru /sys/src/9k/386/ether8169.c /sys/src/9k/386/ether8169.c --- /sys/src/9k/386/ether8169.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/ether8169.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1238 @@ +/* + * Realtek RTL8110S/8169S Gigabit Ethernet Controllers. + * Mostly there. There are some magic register values used + * which are not described in any datasheet or driver but seem + * to be necessary. + * No tuning has been done. Only tested on an RTL8110S, there + * are slight differences between the chips in the series so some + * tweaks may be needed. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" + +#include "../port/ethermii.h" +#include "../port/netif.h" + +#include "etherif.h" + +enum { /* registers */ + Idr0 = 0x00, /* MAC address */ + Mar0 = 0x08, /* Multicast address */ + Dtccr = 0x10, /* Dump Tally Counter Command */ + Tnpds = 0x20, /* Transmit Normal Priority Descriptors */ + Thpds = 0x28, /* Transmit High Priority Descriptors */ + Flash = 0x30, /* Flash Memory Read/Write */ + Erbcr = 0x34, /* Early Receive Byte Count */ + Ersr = 0x36, /* Early Receive Status */ + Cr = 0x37, /* Command Register */ + Tppoll = 0x38, /* Transmit Priority Polling */ + Imr = 0x3C, /* Interrupt Mask */ + Isr = 0x3E, /* Interrupt Status */ + Tcr = 0x40, /* Transmit Configuration */ + Rcr = 0x44, /* Receive Configuration */ + Tctr = 0x48, /* Timer Count */ + Mpc = 0x4C, /* Missed Packet Counter */ + Cr9346 = 0x50, /* 9346 Command Register */ + Config0 = 0x51, /* Configuration Register 0 */ + Config1 = 0x52, /* Configuration Register 1 */ + Config2 = 0x53, /* Configuration Register 2 */ + Config3 = 0x54, /* Configuration Register 3 */ + Config4 = 0x55, /* Configuration Register 4 */ + Config5 = 0x56, /* Configuration Register 5 */ + Timerint = 0x58, /* Timer Interrupt */ + Mulint = 0x5C, /* Multiple Interrupt Select */ + Phyar = 0x60, /* PHY Access */ + Tbicsr0 = 0x64, /* TBI Control and Status */ + Tbianar = 0x68, /* TBI Auto-Negotiation Advertisment */ + Tbilpar = 0x6A, /* TBI Auto-Negotiation Link Partner */ + Phystatus = 0x6C, /* PHY Status */ + + Rms = 0xDA, /* Receive Packet Maximum Size */ + Cplusc = 0xE0, /* C+ Command */ + Coal = 0xE2, /* Interrupt Mitigation (Coalesce) */ + Rdsar = 0xE4, /* Receive Descriptor Start Address */ + Etx = 0xEC, /* Early Transmit Threshold */ +}; + +enum { /* Dtccr */ + Cmd = 0x00000008, /* Command */ +}; + +enum { /* Cr */ + Te = 0x04, /* Transmitter Enable */ + Re = 0x08, /* Receiver Enable */ + Rst = 0x10, /* Software Reset */ +}; + +enum { /* Tppoll */ + Fswint = 0x01, /* Forced Software Interrupt */ + Npq = 0x40, /* Normal Priority Queue polling */ + Hpq = 0x80, /* High Priority Queue polling */ +}; + +enum { /* Imr/Isr */ + Rok = 0x0001, /* Receive OK */ + Rer = 0x0002, /* Receive Error */ + Tok = 0x0004, /* Transmit OK */ + Ter = 0x0008, /* Transmit Error */ + Rdu = 0x0010, /* Receive Descriptor Unavailable */ + Punlc = 0x0020, /* Packet Underrun or Link Change */ + Fovw = 0x0040, /* Receive FIFO Overflow */ + Tdu = 0x0080, /* Transmit Descriptor Unavailable */ + Swint = 0x0100, /* Software Interrupt */ + Timeout = 0x4000, /* Timer */ + Serr = 0x8000, /* System Error */ +}; + +enum { /* Tcr */ + MtxdmaSHIFT = 8, /* Max. DMA Burst Size */ + MtxdmaMASK = 0x00000700, + Mtxdmaunlimited = 0x00000700, + Acrc = 0x00010000, /* Append CRC (not) */ + Lbk0 = 0x00020000, /* Loopback Test 0 */ + Lbk1 = 0x00040000, /* Loopback Test 1 */ + Ifg2 = 0x00080000, /* Interframe Gap 2 */ + HwveridSHIFT = 23, /* Hardware Version ID */ + HwveridMASK = 0x7C800000, + Macv01 = 0x00000000, /* RTL8169 */ + Macv02 = 0x00800000, /* RTL8169S/8110S */ + Macv03 = 0x04000000, /* RTL8169S/8110S */ + Macv04 = 0x10000000, /* RTL8169SB/8110SB */ + Macv05 = 0x18000000, /* RTL8169SC/8110SC */ + Macv07 = 0x24800000, /* RTL8102e */ + Macv07a = 0x34800000, /* RTL8102e */ + Macv11 = 0x30000000, /* RTL8168B/8111B */ + Macv12 = 0x38000000, /* RTL8169B/8111B */ + Macv12a = 0x3c000000, /* RTL8169C/8111C/8111C-GR (Macv19) */ + Macv13 = 0x34000000, /* RTL8101E */ + Macv14 = 0x30800000, /* RTL8100E */ + Macv15 = 0x38800000, /* RTL8100E */ + Macv25 = 0x28000000, /* RTL8168D */ + Macv2c = 0x2c000000, /* RTL8168E */ + Macv2ca = 0x2c800000, /* RTL8111E */ + Ifg0 = 0x01000000, /* Interframe Gap 0 */ + Ifg1 = 0x02000000, /* Interframe Gap 1 */ +}; + +enum { /* Rcr */ + Aap = 0x00000001, /* Accept All Packets */ + Apm = 0x00000002, /* Accept Physical Match */ + Am = 0x00000004, /* Accept Multicast */ + Ab = 0x00000008, /* Accept Broadcast */ + Ar = 0x00000010, /* Accept Runt */ + Aer = 0x00000020, /* Accept Error */ + Sel9356 = 0x00000040, /* 9356 EEPROM used */ + MrxdmaSHIFT = 8, /* Max. DMA Burst Size */ + MrxdmaMASK = 0x00000700, + Mrxdmaunlimited = 0x00000700, + RxfthSHIFT = 13, /* Receive Buffer Length */ + RxfthMASK = 0x0000E000, + Rxfth256 = 0x00008000, + Rxfthnone = 0x0000E000, + Rer8 = 0x00010000, /* Accept Error Packets > 8 bytes */ + MulERINT = 0x01000000, /* Multiple Early Interrupt Select */ +}; + +enum { /* Cr9346 */ + Eedo = 0x01, /* */ + Eedi = 0x02, /* */ + Eesk = 0x04, /* */ + Eecs = 0x08, /* */ + Eem0 = 0x40, /* Operating Mode */ + Eem1 = 0x80, +}; + +enum { /* Phyar */ + DataMASK = 0x0000FFFF, /* 16-bit GMII/MII Register Data */ + DataSHIFT = 0, + RegaddrMASK = 0x001F0000, /* 5-bit GMII/MII Register Address */ + RegaddrSHIFT = 16, + Flag = 0x80000000, /* */ +}; + +enum { /* Phystatus */ + Fd = 0x01, /* Full Duplex */ + Linksts = 0x02, /* Link Status */ + Speed10 = 0x04, /* */ + Speed100 = 0x08, /* */ + Speed1000 = 0x10, /* */ + Rxflow = 0x20, /* */ + Txflow = 0x40, /* */ + Entbi = 0x80, /* */ +}; + +enum { /* Cplusc */ + Mulrw = 0x0008, /* PCI Multiple R/W Enable */ + Dac = 0x0010, /* PCI Dual Address Cycle Enable */ + Rxchksum = 0x0020, /* Receive Checksum Offload Enable */ + Rxvlan = 0x0040, /* Receive VLAN De-tagging Enable */ + Endian = 0x0200, /* Endian Mode */ +}; + +typedef struct D D; /* Transmit/Receive Descriptor */ +struct D { + u32int control; + u32int vlan; + u32int addrlo; + u32int addrhi; +}; + +enum { /* Transmit Descriptor control */ + TxflMASK = 0x0000FFFF, /* Transmit Frame Length */ + TxflSHIFT = 0, + Tcps = 0x00010000, /* TCP Checksum Offload */ + Udpcs = 0x00020000, /* UDP Checksum Offload */ + Ipcs = 0x00040000, /* IP Checksum Offload */ + Lgsen = 0x08000000, /* Large Send */ +}; + +enum { /* Receive Descriptor control */ + RxflMASK = 0x00001FFF, /* Receive Frame Length */ + RxflSHIFT = 0, + Tcpf = 0x00004000, /* TCP Checksum Failure */ + Udpf = 0x00008000, /* UDP Checksum Failure */ + Ipf = 0x00010000, /* IP Checksum Failure */ + Pid0 = 0x00020000, /* Protocol ID0 */ + Pid1 = 0x00040000, /* Protocol ID1 */ + Crce = 0x00080000, /* CRC Error */ + Runt = 0x00100000, /* Runt Packet */ + Res = 0x00200000, /* Receive Error Summary */ + Rwt = 0x00400000, /* Receive Watchdog Timer Expired */ + Fovf = 0x00800000, /* FIFO Overflow */ + Bovf = 0x01000000, /* Buffer Overflow */ + Bar = 0x02000000, /* Broadcast Address Received */ + Pam = 0x04000000, /* Physical Address Matched */ + Mar = 0x08000000, /* Multicast Address Received */ +}; + +enum { /* General Descriptor control */ + Ls = 0x10000000, /* Last Segment Descriptor */ + Fs = 0x20000000, /* First Segment Descriptor */ + Eor = 0x40000000, /* End of Descriptor Ring */ + Own = 0x80000000, /* Ownership */ +}; + +/* + */ +enum { /* Ring sizes (<= 1024) */ + Ntd = 32, /* Transmit Ring */ + Nrd = 128, /* Receive Ring */ + + Mtu = ETHERMAXTU, + Mps = ROUNDUP(ETHERMAXTU+4, 128), +// Mps = Mtu + 8 + 14, /* if(mtu>ETHERMAXTU) */ +}; + +typedef struct Dtcc Dtcc; +struct Dtcc { + u64int txok; + u64int rxok; + u64int txer; + u32int rxer; + u16int misspkt; + u16int fae; + u32int tx1col; + u32int txmcol; + u64int rxokph; + u64int rxokbrd; + u32int rxokmu; + u16int txabt; + u16int txundrn; +}; + +enum { /* Variants */ + Rtl8100e = (0x8136<<16)|0x10EC, /* RTL810[01]E: pci -e */ + Rtl8169c = (0x0116<<16)|0x16EC, /* RTL8169C+ (USR997902) */ + Rtl8169sc = (0x8167<<16)|0x10EC, /* RTL8169SC */ + Rtl8168b = (0x8168<<16)|0x10EC, /* RTL8168B: pci-e */ + Rtl8169 = (0x8169<<16)|0x10EC, /* RTL8169 */ +}; + +typedef struct Ctlr Ctlr; +typedef struct Ctlr { + int port; + Pcidev* pcidev; + Ctlr* next; + int active; + + QLock alock; /* attach */ + Lock ilock; /* init */ + int init; /* */ + + int pciv; /* */ + int macv; /* MAC version */ + int phyv; /* PHY version */ + int pcie; /* flag: pci-express device? */ + + uvlong mchash; /* multicast hash */ + + Mii* mii; + + Lock tlock; /* transmit */ + D* td; /* descriptor ring */ + Block** tb; /* transmit buffers */ + int ntd; + + int tdh; /* head - producer index (host) */ + int tdt; /* tail - consumer index (NIC) */ + int ntdfree; + int ntq; + + Lock rlock; /* receive */ + D* rd; /* descriptor ring */ + Block** rb; /* receive buffers */ + int nrd; + + int rdh; /* head - producer index (NIC) */ + int rdt; /* tail - consumer index (host) */ + int nrdfree; + + int tcr; /* transmit configuration register */ + int rcr; /* receive configuration register */ + int imr; + + QLock slock; /* statistics */ + Dtcc* dtcc; + uint txdu; + uint tcpf; + uint udpf; + uint ipf; + uint fovf; + uint ierrs; + uint rer; + uint rdu; + uint punlc; + uint fovw; + uint mcast; + uint frag; /* partial packets; rb was too small */ +} Ctlr; + +static Ctlr* rtl8169ctlrhead; +static Ctlr* rtl8169ctlrtail; + +#define csr8r(c, r) (inb((c)->port+(r))) +#define csr16r(c, r) (ins((c)->port+(r))) +#define csr32r(c, r) (inl((c)->port+(r))) +#define csr8w(c, r, b) (outb((c)->port+(r), (u8int)(b))) +#define csr16w(c, r, w) (outs((c)->port+(r), (u16int)(w))) +#define csr32w(c, r, l) (outl((c)->port+(r), (u32int)(l))) + +static int +rtl8169miimir(Ctlr* ctlr, int pa, int ra) +{ + uint r; + int timeo; + + if(pa != 1) + return -1; + + r = (ra<<16) & RegaddrMASK; + csr32w(ctlr, Phyar, r); + delay(1); + for(timeo = 0; timeo < 2000; timeo++){ + if((r = csr32r(ctlr, Phyar)) & Flag) + break; + microdelay(100); + } + if(!(r & Flag)) + return -1; + + return (r & DataMASK)>>DataSHIFT; +} + +static int +rtl8169miimiw(Ctlr* ctlr, int pa, int ra, int data) +{ + uint r; + int timeo; + + if(pa != 1) + return -1; + + r = Flag|((ra<<16) & RegaddrMASK)|((data<ctlr, pa, ra, data); + + return rtl8169miimir(mii->ctlr, pa, ra); +} + +static Mii* +rtl8169mii(Ctlr* ctlr) +{ + Mii* mii; + MiiPhy *phy; + + /* + * Link management. + * + * Get rev number out of Phyidr2 so can config properly. + * There's probably more special stuff for Macv0[234] needed here. + */ + ctlr->phyv = rtl8169miimir(ctlr, 1, Phyidr2) & 0x0F; + if(ctlr->macv == Macv02){ + csr8w(ctlr, 0x82, 1); /* magic */ + rtl8169miimiw(ctlr, 1, 0x0B, 0x0000); /* magic */ + } + if((mii = miiattach(ctlr, (1<<1), rtl8169miirw)) == nil) + return nil; + + phy = mii->curphy; + print("oui %#ux phyno %d, macv = %#8.8ux phyv = %#4.4ux\n", + phy->oui, phy->phyno, ctlr->macv, ctlr->phyv); + + if(miistatus(mii) < 0){ + miireset(mii); + miiane(mii, ~0, ~0, ~0); + } + + return mii; +} + +static void +rtl8169promiscuous(void* arg, int on) +{ + Ether *edev; + Ctlr * ctlr; + + edev = arg; + ctlr = edev->ctlr; + ilock(&ctlr->ilock); + + if(on) + ctlr->rcr |= Aap; + else + ctlr->rcr &= ~Aap; + csr32w(ctlr, Rcr, ctlr->rcr); + iunlock(&ctlr->ilock); +} + +enum { + /* everyone else uses 0x04c11db7, but they both produce the same crc */ + Etherpolybe = 0x04c11db6, + Bytemask = (1<<8) - 1, +}; + +static ulong +ethercrcbe(uchar *addr, long len) +{ + int i, j; + ulong c, crc, carry; + + crc = ~0UL; + for (i = 0; i < len; i++) { + c = addr[i]; + for (j = 0; j < 8; j++) { + carry = ((crc & (1UL << 31))? 1: 0) ^ (c & 1); + crc <<= 1; + c >>= 1; + if (carry) + crc = (crc ^ Etherpolybe) | carry; + } + } + return crc; +} + +static ulong +swabl(ulong l) +{ + return l>>24 | (l>>8) & (Bytemask<<8) | + (l<<8) & (Bytemask<<16) | l<<24; +} + +static void +rtl8169multicast(void* ether, uchar *eaddr, int add) +{ + Ether *edev; + Ctlr *ctlr; + + if (!add) + return; /* ok to keep receiving on old mcast addrs */ + + edev = ether; + ctlr = edev->ctlr; + ilock(&ctlr->ilock); + + ctlr->mchash |= 1ULL << (ethercrcbe(eaddr, Eaddrlen) >> 26); + + ctlr->rcr |= Am; + csr32w(ctlr, Rcr, ctlr->rcr); + + /* pci-e variants reverse the order of the hash byte registers */ + if (ctlr->pcie) { + csr32w(ctlr, Mar0, swabl(ctlr->mchash>>32)); + csr32w(ctlr, Mar0+4, swabl(ctlr->mchash)); + } else { + csr32w(ctlr, Mar0, ctlr->mchash); + csr32w(ctlr, Mar0+4, ctlr->mchash>>32); + } + + iunlock(&ctlr->ilock); +} + +static long +rtl8169ifstat(Ether* edev, void* a, long n, ulong offset) +{ + Ctlr *ctlr; + Dtcc *dtcc; + int timeo; + char *alloc, *e, *p; + + ctlr = edev->ctlr; + qlock(&ctlr->slock); + + alloc = nil; + if(waserror()){ + qunlock(&ctlr->slock); + free(alloc); + nexterror(); + } + + csr32w(ctlr, Dtccr+4, PCIWADDRH(ctlr->dtcc)); + csr32w(ctlr, Dtccr, PCIWADDRL(ctlr->dtcc)|Cmd); + for(timeo = 0; timeo < 1000; timeo++){ + if(!(csr32r(ctlr, Dtccr) & Cmd)) + break; + delay(1); + } + if(csr32r(ctlr, Dtccr) & Cmd) + error(Eio); + dtcc = ctlr->dtcc; + + edev->oerrs = dtcc->txer; + edev->crcs = dtcc->rxer; + edev->frames = dtcc->fae; + edev->buffs = dtcc->misspkt; + edev->overflows = ctlr->txdu+ctlr->rdu; + + if(n == 0){ + qunlock(&ctlr->slock); + poperror(); + return 0; + } + + if((alloc = malloc(READSTR)) == nil) + error(Enomem); + e = alloc+READSTR; + + p = seprint(alloc, e, "TxOk: %llud\n", dtcc->txok); + p = seprint(p, e, "RxOk: %llud\n", dtcc->rxok); + p = seprint(p, e, "TxEr: %llud\n", dtcc->txer); + p = seprint(p, e, "RxEr: %ud\n", dtcc->rxer); + p = seprint(p, e, "MissPkt: %ud\n", dtcc->misspkt); + p = seprint(p, e, "FAE: %ud\n", dtcc->fae); + p = seprint(p, e, "Tx1Col: %ud\n", dtcc->tx1col); + p = seprint(p, e, "TxMCol: %ud\n", dtcc->txmcol); + p = seprint(p, e, "RxOkPh: %llud\n", dtcc->rxokph); + p = seprint(p, e, "RxOkBrd: %llud\n", dtcc->rxokbrd); + p = seprint(p, e, "RxOkMu: %ud\n", dtcc->rxokmu); + p = seprint(p, e, "TxAbt: %ud\n", dtcc->txabt); + p = seprint(p, e, "TxUndrn: %ud\n", dtcc->txundrn); + + p = seprint(p, e, "txdu: %ud\n", ctlr->txdu); + p = seprint(p, e, "tcpf: %ud\n", ctlr->tcpf); + p = seprint(p, e, "udpf: %ud\n", ctlr->udpf); + p = seprint(p, e, "ipf: %ud\n", ctlr->ipf); + p = seprint(p, e, "fovf: %ud\n", ctlr->fovf); + p = seprint(p, e, "ierrs: %ud\n", ctlr->ierrs); + p = seprint(p, e, "rer: %ud\n", ctlr->rer); + p = seprint(p, e, "rdu: %ud\n", ctlr->rdu); + p = seprint(p, e, "punlc: %ud\n", ctlr->punlc); + p = seprint(p, e, "fovw: %ud\n", ctlr->fovw); + + p = seprint(p, e, "tcr: %#8.8ux\n", ctlr->tcr); + p = seprint(p, e, "rcr: %#8.8ux\n", ctlr->rcr); + p = seprint(p, e, "multicast: %ud\n", ctlr->mcast); + + if(ctlr->mii != nil && ctlr->mii->curphy != nil) + miidumpphy(ctlr->mii, p, e); + + n = readstr(offset, a, n, alloc); + + qunlock(&ctlr->slock); + poperror(); + free(alloc); + + return n; +} + +static void +rtl8169halt(Ctlr* ctlr) +{ + csr32w(ctlr, Timerint, 0); + csr8w(ctlr, Cr, 0); + csr16w(ctlr, Imr, 0); + csr16w(ctlr, Isr, ~0); +} + +static int +rtl8169reset(Ctlr* ctlr) +{ + u32int r; + int timeo; + + /* + * Soft reset the controller. + */ + csr8w(ctlr, Cr, Rst); + for(r = timeo = 0; timeo < 1000; timeo++){ + r = csr8r(ctlr, Cr); + if(!(r & Rst)) + break; + delay(1); + } + rtl8169halt(ctlr); + + if(r & Rst) + return -1; + return 0; +} + +static void +rtl8169shutdown(Ether *ether) +{ + rtl8169reset(ether->ctlr); +} + +static void +rtl8169replenish(Ctlr* ctlr) +{ + D *d; + int rdt; + Block *bp; + + rdt = ctlr->rdt; + while(NEXT(rdt, ctlr->nrd) != ctlr->rdh){ + d = &ctlr->rd[rdt]; + if(ctlr->rb[rdt] == nil){ + /* + * Simple allocation for now. + * This better be aligned on 8. + */ + bp = iallocb(Mps); + if(bp == nil){ + iprint("no available buffers\n"); + break; + } + ctlr->rb[rdt] = bp; + d->addrlo = PCIWADDRL(bp->rp); + d->addrhi = PCIWADDRH(bp->rp); + coherence(); + } + else + iprint("i8169: rx overrun\n"); + d->control |= Own|Mps; + rdt = NEXT(rdt, ctlr->nrd); + ctlr->nrdfree++; + } + ctlr->rdt = rdt; +} + +static int +rtl8169init(Ether* edev) +{ + u32int r; + Ctlr *ctlr; + u8int cplusc; + + ctlr = edev->ctlr; + ilock(&ctlr->ilock); + + rtl8169reset(ctlr); + + /* + * MAC Address is not settable on some (all?) chips. + * Must put chip into config register write enable mode. + */ + csr8w(ctlr, Cr9346, Eem1|Eem0); + + /* + * Transmitter. + */ + memset(ctlr->td, 0, sizeof(D)*ctlr->ntd); + ctlr->tdh = ctlr->tdt = 0; + ctlr->td[ctlr->ntd-1].control = Eor; + + /* + * Receiver. + * Need to do something here about the multicast filter. + */ + memset(ctlr->rd, 0, sizeof(D)*ctlr->nrd); + ctlr->nrdfree = ctlr->rdh = ctlr->rdt = 0; + ctlr->rd[ctlr->nrd-1].control = Eor; + + rtl8169replenish(ctlr); + ctlr->rcr = Rxfthnone|Mrxdmaunlimited|Ab|Am|Apm; + + /* + * Setting Mulrw in Cplusc disables the Tx/Rx DMA burst + * settings in Tcr/Rcr; the (1<<14) is magic. + */ + cplusc = csr16r(ctlr, Cplusc) & ~(1<<14); + cplusc |= /*Rxchksum|*/Mulrw; + switch(ctlr->macv){ + default: + /* + * If it isn't recognised, assume it behaves + * like all the recent chips. + */ + print("rtl8169: unrecognised macv %#ux\n", ctlr->macv); + break; + case Macv01: + break; + case Macv02: + case Macv03: + cplusc |= (1<<14); /* magic */ + break; + case Macv05: + /* + * This is interpreted from clearly bogus code + * in the manufacturer-supplied driver, it could + * be wrong. Untested. + */ + r = csr8r(ctlr, Config2) & 0x07; + if(r == 0x01) /* 66MHz PCI */ + csr32w(ctlr, 0x7C, 0x0007FFFF); /* magic */ + else + csr32w(ctlr, 0x7C, 0x0007FF00); /* magic */ + pciclrmwi(ctlr->pcidev); + break; + case Macv13: + /* + * This is interpreted from clearly bogus code + * in the manufacturer-supplied driver, it could + * be wrong. Untested. + */ + pcicfgw8(ctlr->pcidev, 0x68, 0x00); /* magic */ + pcicfgw8(ctlr->pcidev, 0x69, 0x08); /* magic */ + break; + case Macv04: + case Macv07: + case Macv07a: + case Macv11: + case Macv12: + case Macv12a: + case Macv14: + case Macv15: + case Macv25: + case Macv2c: + case Macv2ca: + break; + } + + /* + * Enable receiver/transmitter. + * Need to do this first or some of the settings below + * won't take. + */ + switch(ctlr->pciv){ + default: + csr8w(ctlr, Cr, Te|Re); + csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited); + csr32w(ctlr, Rcr, ctlr->rcr); + csr32w(ctlr, Mar0, 0); + csr32w(ctlr, Mar0+4, 0); + ctlr->mchash = 0; + case Rtl8169sc: + case Rtl8168b: + break; + } + + /* + * Interrupts. + * Disable Tdu|Tok for now, the transmit routine will tidy. + * Tdu means the NIC ran out of descriptors to send, so it + * doesn't really need to ever be on. + */ + csr32w(ctlr, Timerint, 0); + ctlr->imr = Serr|Timeout|Fovw|Punlc|Rdu|Ter|Rer|Rok; + csr16w(ctlr, Imr, ctlr->imr); + + /* + * Clear missed-packet counter; + * clear early transmit threshold value; + * set the descriptor ring base addresses; + * set the maximum receive packet size; + * no early-receive interrupts. + * + * note: the maximum rx size is a filter. the size of the buffer + * in the descriptor ring is still honored. we will toss >Mtu + * packets because they've been fragmented into multiple + * rx buffers. + */ + csr32w(ctlr, Mpc, 0); + csr8w(ctlr, Etx, 0x3f); /* magic */ + csr32w(ctlr, Tnpds+4, PCIWADDRH(ctlr->td)); + csr32w(ctlr, Tnpds, PCIWADDRL(ctlr->td)); + csr32w(ctlr, Rdsar+4, PCIWADDRH(ctlr->rd)); + csr32w(ctlr, Rdsar, PCIWADDRL(ctlr->rd)); + csr16w(ctlr, Rms, 16383); /* was Mps; see above comment */ + r = csr16r(ctlr, Mulint) & 0xF000; /* no early rx interrupts */ + csr16w(ctlr, Mulint, r); + csr16w(ctlr, Cplusc, cplusc); + csr16w(ctlr, Coal, 0); + + /* + * Set configuration. + */ + switch(ctlr->pciv){ + default: + break; + case Rtl8169sc: + csr8w(ctlr, Cr, Te|Re); + csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited); + csr32w(ctlr, Rcr, ctlr->rcr); + break; + case Rtl8168b: + case Rtl8169c: + csr16w(ctlr, Cplusc, 0x2000); /* magic */ + csr8w(ctlr, Cr, Te|Re); + csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited); + csr32w(ctlr, Rcr, ctlr->rcr); + break; + } + ctlr->tcr = csr32r(ctlr, Tcr); + csr8w(ctlr, Cr9346, 0); + + iunlock(&ctlr->ilock); + +// rtl8169mii(ctlr); + + return 0; +} + +static void +rtl8169attach(Ether* edev) +{ + int timeo; + Ctlr *ctlr; + + ctlr = edev->ctlr; + qlock(&ctlr->alock); + if(ctlr->init == 0){ + ctlr->td = mallocalign(sizeof(D)*Ntd, 256, 0, 0); + ctlr->tb = malloc(Ntd*sizeof(Block*)); + ctlr->ntd = Ntd; + ctlr->rd = mallocalign(sizeof(D)*Nrd, 256, 0, 0); + ctlr->rb = malloc(Nrd*sizeof(Block*)); + ctlr->nrd = Nrd; + ctlr->dtcc = mallocalign(sizeof(Dtcc), 64, 0, 0); + if(ctlr->td == nil || ctlr->tb == nil || ctlr->rd == nil || + ctlr->rb == nil || ctlr->dtcc == nil) { + free(ctlr->td); + free(ctlr->tb); + free(ctlr->rd); + free(ctlr->rb); + free(ctlr->dtcc); + qunlock(&ctlr->alock); + error(Enomem); + } + memset(ctlr->dtcc, 0, sizeof(Dtcc)); /* paranoia */ + rtl8169init(edev); + ctlr->init = 1; + } + qunlock(&ctlr->alock); + + /* + * Wait for link to be ready. + */ + for(timeo = 0; timeo < 350; timeo++){ + if(miistatus(ctlr->mii) == 0) + break; + tsleep(&up->sleep, return0, 0, 10); + } +} + +static void +rtl8169link(Ether* edev) +{ + int limit; + Ctlr *ctlr; + MiiPhy *phy; + + ctlr = edev->ctlr; + + /* + * Maybe the link changed - do we care very much? + * Could stall transmits if no link, maybe? + */ + if(ctlr->mii == nil || ctlr->mii->curphy == nil) + return; + + phy = ctlr->mii->curphy; + if(miistatus(ctlr->mii) < 0){ + iprint("%slink n: speed %d fd %d link %d rfc %d tfc %d\n", + edev->name, phy->speed, phy->fd, phy->link, + phy->rfc, phy->tfc); + edev->link = 0; + return; + } + edev->link = 1; + + limit = 256*1024; + if(phy->speed == 10){ + edev->mbps = 10; + limit = 65*1024; + } + else if(phy->speed == 100) + edev->mbps = 100; + else if(phy->speed == 1000) + edev->mbps = 1000; + iprint("%slink y: speed %d fd %d link %d rfc %d tfc %d\n", + edev->name, phy->speed, phy->fd, phy->link, + phy->rfc, phy->tfc); + + if(edev->oq != nil) + qsetlimit(edev->oq, limit); +} + +static void +rtl8169transmit(Ether* edev) +{ + D *d; + Block *bp; + Ctlr *ctlr; + int control, x; + + ctlr = edev->ctlr; + + ilock(&ctlr->tlock); + for(x = ctlr->tdh; ctlr->ntq > 0; x = NEXT(x, ctlr->ntd)){ + d = &ctlr->td[x]; + if((control = d->control) & Own) + break; + + /* + * Check errors and log here. + */ + USED(control); + + /* + * Free it up. + * Need to clean the descriptor here? Not really. + * Simple freeb for now (no chain and freeblist). + * Use ntq count for now. + */ + freeb(ctlr->tb[x]); + ctlr->tb[x] = nil; + d->control &= Eor; + + ctlr->ntq--; + } + ctlr->tdh = x; + + x = ctlr->tdt; + while(ctlr->ntq < (ctlr->ntd-1)){ + if((bp = qget(edev->oq)) == nil) + break; + + d = &ctlr->td[x]; + d->addrlo = PCIWADDRL(bp->rp); + d->addrhi = PCIWADDRH(bp->rp); + ctlr->tb[x] = bp; + coherence(); + d->control |= Own|Fs|Ls|((BLEN(bp)<ntd); + ctlr->ntq++; + } + if(x != ctlr->tdt){ + ctlr->tdt = x; + csr8w(ctlr, Tppoll, Npq); + } + else if(ctlr->ntq >= (ctlr->ntd-1)) + ctlr->txdu++; + + iunlock(&ctlr->tlock); +} + +static void +rtl8169receive(Ether* edev) +{ + D *d; + int rdh; + Block *bp; + Ctlr *ctlr; + u32int control; + + ctlr = edev->ctlr; + + rdh = ctlr->rdh; + for(;;){ + d = &ctlr->rd[rdh]; + + if(d->control & Own) + break; + + control = d->control; + if((control & (Fs|Ls|Res)) == (Fs|Ls)){ + bp = ctlr->rb[rdh]; + ctlr->rb[rdh] = nil; + bp->wp = bp->rp + ((control & RxflMASK)>>RxflSHIFT)-4; + + if(control & Fovf) + ctlr->fovf++; + if(control & Mar) + ctlr->mcast++; + + switch(control & (Pid1|Pid0)){ + default: + break; + case Pid0: + if(control & Tcpf){ + ctlr->tcpf++; + break; + } + bp->flag |= Btcpck; + break; + case Pid1: + if(control & Udpf){ + ctlr->udpf++; + break; + } + bp->flag |= Budpck; + break; + case Pid1|Pid0: + if(control & Ipf){ + ctlr->ipf++; + break; + } + bp->flag |= Bipck; + break; + } + etheriq(edev, bp, 1); + } + else{ + if(!(control & Res)) + ctlr->frag++; + /* iprint("i8169: control %#.8ux\n", control); */ + freeb(ctlr->rb[rdh]); + } + ctlr->rb[rdh] = nil; + d->control &= Eor; + ctlr->nrdfree--; + rdh = NEXT(rdh, ctlr->nrd); + + if(ctlr->nrdfree < ctlr->nrd/2) + rtl8169replenish(ctlr); + } + ctlr->rdh = rdh; +} + +static void +rtl8169interrupt(Ureg*, void* arg) +{ + Ctlr *ctlr; + Ether *edev; + u32int isr; + + edev = arg; + ctlr = edev->ctlr; + + while((isr = csr16r(ctlr, Isr)) != 0 && isr != 0xFFFF){ + csr16w(ctlr, Isr, isr); + if((isr & ctlr->imr) == 0) + break; + if(isr & (Fovw|Punlc|Rdu|Rer|Rok)){ + rtl8169receive(edev); + if(!(isr & (Punlc|Rok))) + ctlr->ierrs++; + if(isr & Rer) + ctlr->rer++; + if(isr & Rdu) + ctlr->rdu++; + if(isr & Punlc) + ctlr->punlc++; + if(isr & Fovw) + ctlr->fovw++; + isr &= ~(Fovw|Rdu|Rer|Rok); + } + + if(isr & (Tdu|Ter|Tok)){ + rtl8169transmit(edev); + isr &= ~(Tdu|Ter|Tok); + } + + if(isr & Punlc){ + rtl8169link(edev); + isr &= ~Punlc; + } + + /* + * Some of the reserved bits get set sometimes... + */ + if(isr & (Serr|Timeout|Tdu|Fovw|Punlc|Rdu|Ter|Tok|Rer|Rok)) + panic("rtl8169interrupt: imr %#4.4ux isr %#4.4ux", + csr16r(ctlr, Imr), isr); + } +} + +static void +rtl8169pci(void) +{ + Pcidev *p; + Ctlr *ctlr; + int i, port, pcie; + + p = nil; + while(p = pcimatch(p, 0, 0)){ + if(p->ccrb != 0x02 || p->ccru != 0) + continue; + + pcie = 0; + switch(i = ((p->did<<16)|p->vid)){ + default: + continue; + case Rtl8100e: /* RTL810[01]E ? */ + case Rtl8168b: /* RTL8168B */ + pcie = 1; + break; + case Rtl8169c: /* RTL8169C */ + case Rtl8169sc: /* RTL8169SC */ + case Rtl8169: /* RTL8169 */ + break; + case (0xC107<<16)|0x1259: /* Corega CG-LAPCIGT */ + i = Rtl8169; + break; + } + + port = p->mem[0].bar & ~0x01; + if(ioalloc(port, p->mem[0].size, 0, "rtl8169") < 0){ + print("rtl8169: port %#ux in use\n", port); + continue; + } + + ctlr = malloc(sizeof(Ctlr)); + ctlr->port = port; + ctlr->pcidev = p; + ctlr->pciv = i; + ctlr->pcie = pcie; + + if(pcigetpms(p) > 0){ + pcisetpms(p, 0); + + for(i = 0; i < 6; i++) + pcicfgw32(p, PciBAR0+i*4, p->mem[i].bar); + pcicfgw8(p, PciINTL, p->intl); + pcicfgw8(p, PciLTR, p->ltr); + pcicfgw8(p, PciCLS, p->cls); + pcicfgw16(p, PciPCR, p->pcr); + } + + if(rtl8169reset(ctlr)){ + iofree(port); + free(ctlr); + continue; + } + + /* + * Extract the chip hardware version, + * needed to configure each properly. + */ + ctlr->macv = csr32r(ctlr, Tcr) & HwveridMASK; + if((ctlr->mii = rtl8169mii(ctlr)) == nil){ + iofree(port); + free(ctlr); + continue; + } + + rtl8169mii(ctlr); + + pcisetbme(p); + + if(rtl8169ctlrhead != nil) + rtl8169ctlrtail->next = ctlr; + else + rtl8169ctlrhead = ctlr; + rtl8169ctlrtail = ctlr; + } +} + +static int +rtl8169pnp(Ether* edev) +{ + u32int r; + Ctlr *ctlr; + uchar ea[Eaddrlen]; + + if(rtl8169ctlrhead == nil) + rtl8169pci(); + + /* + * Any adapter matches if no edev->port is supplied, + * otherwise the ports must match. + */ + for(ctlr = rtl8169ctlrhead; ctlr != nil; ctlr = ctlr->next){ + if(ctlr->active) + continue; + if(edev->port == 0 || edev->port == ctlr->port){ + ctlr->active = 1; + break; + } + } + if(ctlr == nil) + return -1; + + edev->ctlr = ctlr; + edev->port = ctlr->port; + edev->irq = ctlr->pcidev->intl; + edev->tbdf = ctlr->pcidev->tbdf; + edev->mbps = 1000; + edev->maxmtu = Mtu; + + /* + * Check if the adapter's station address is to be overridden. + * If not, read it from the device and set in edev->ea. + */ + memset(ea, 0, Eaddrlen); + if(memcmp(ea, edev->ea, Eaddrlen) == 0){ + r = csr32r(ctlr, Idr0); + edev->ea[0] = r; + edev->ea[1] = r>>8; + edev->ea[2] = r>>16; + edev->ea[3] = r>>24; + r = csr32r(ctlr, Idr0+4); + edev->ea[4] = r; + edev->ea[5] = r>>8; + } + + edev->attach = rtl8169attach; + edev->transmit = rtl8169transmit; + edev->interrupt = rtl8169interrupt; + edev->ifstat = rtl8169ifstat; + + edev->arg = edev; + edev->promiscuous = rtl8169promiscuous; + edev->multicast = rtl8169multicast; + edev->shutdown = rtl8169shutdown; + + rtl8169link(edev); + + return 0; +} + +void +ether8169link(void) +{ + addethercard("rtl8169", rtl8169pnp); +} diff -Nru /sys/src/9k/386/ether82557.c /sys/src/9k/386/ether82557.c --- /sys/src/9k/386/ether82557.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/ether82557.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1349 @@ +/* + * Intel 82557 Fast Ethernet PCI Bus LAN Controller + * as found on the Intel EtherExpress PRO/100B. This chip is full + * of smarts, unfortunately they're not all in the right place. + * To do: + * the PCI scanning code could be made common to other adapters; + * auto-negotiation, full-duplex; + * optionally use memory-mapped registers; + * detach for PCI reset problems (also towards loadable drivers). + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/netif.h" + +#include "etherif.h" +#include "io.h" + +enum { + Nrfd = 64, /* receive frame area */ + Ncb = 64, /* maximum control blocks queued */ + + NullPointer = 0xFFFFFFFF, /* 82557 NULL pointer */ +}; + +enum { /* CSR */ + Status = 0x00, /* byte or word (word includes Ack) */ + Ack = 0x01, /* byte */ + CommandR = 0x02, /* byte or word (word includes Interrupt) */ + Interrupt = 0x03, /* byte */ + General = 0x04, /* dword */ + Port = 0x08, /* dword */ + Fcr = 0x0C, /* Flash control register */ + Ecr = 0x0E, /* EEPROM control register */ + Mcr = 0x10, /* MDI control register */ + Gstatus = 0x1D, /* General status register */ +}; + +enum { /* Status */ + RUidle = 0x0000, + RUsuspended = 0x0004, + RUnoresources = 0x0008, + RUready = 0x0010, + RUrbd = 0x0020, /* bit */ + RUstatus = 0x003F, /* mask */ + + CUidle = 0x0000, + CUsuspended = 0x0040, + CUactive = 0x0080, + CUstatus = 0x00C0, /* mask */ + + StatSWI = 0x0400, /* SoftWare generated Interrupt */ + StatMDI = 0x0800, /* MDI r/w done */ + StatRNR = 0x1000, /* Receive unit Not Ready */ + StatCNA = 0x2000, /* Command unit Not Active (Active->Idle) */ + StatFR = 0x4000, /* Finished Receiving */ + StatCX = 0x8000, /* Command eXecuted */ + StatTNO = 0x8000, /* Transmit NOT OK */ +}; + +enum { /* Command (byte) */ + CUnop = 0x00, + CUstart = 0x10, + CUresume = 0x20, + LoadDCA = 0x40, /* Load Dump Counters Address */ + DumpSC = 0x50, /* Dump Statistical Counters */ + LoadCUB = 0x60, /* Load CU Base */ + ResetSA = 0x70, /* Dump and Reset Statistical Counters */ + + RUstart = 0x01, + RUresume = 0x02, + RUabort = 0x04, + LoadHDS = 0x05, /* Load Header Data Size */ + LoadRUB = 0x06, /* Load RU Base */ + RBDresume = 0x07, /* Resume frame reception */ +}; + +enum { /* Interrupt (byte) */ + InterruptM = 0x01, /* interrupt Mask */ + InterruptSI = 0x02, /* Software generated Interrupt */ +}; + +enum { /* Ecr */ + EEsk = 0x01, /* serial clock */ + EEcs = 0x02, /* chip select */ + EEdi = 0x04, /* serial data in */ + EEdo = 0x08, /* serial data out */ + + EEstart = 0x04, /* start bit */ + EEread = 0x02, /* read opcode */ +}; + +enum { /* Mcr */ + MDIread = 0x08000000, /* read opcode */ + MDIwrite = 0x04000000, /* write opcode */ + MDIready = 0x10000000, /* ready bit */ + MDIie = 0x20000000, /* interrupt enable */ +}; + +typedef struct Rfd { + int field; + ulong link; + ulong rbd; + ushort count; + ushort size; + + uchar data[1700]; +} Rfd; + +enum { /* field */ + RfdCollision = 0x00000001, + RfdIA = 0x00000002, /* IA match */ + RfdRxerr = 0x00000010, /* PHY character error */ + RfdType = 0x00000020, /* Type frame */ + RfdRunt = 0x00000080, + RfdOverrun = 0x00000100, + RfdBuffer = 0x00000200, + RfdAlignment = 0x00000400, + RfdCRC = 0x00000800, + + RfdOK = 0x00002000, /* frame received OK */ + RfdC = 0x00008000, /* reception Complete */ + RfdSF = 0x00080000, /* Simplified or Flexible (1) Rfd */ + RfdH = 0x00100000, /* Header RFD */ + + RfdI = 0x20000000, /* Interrupt after completion */ + RfdS = 0x40000000, /* Suspend after completion */ + RfdEL = 0x80000000, /* End of List */ +}; + +enum { /* count */ + RfdF = 0x4000, + RfdEOF = 0x8000, +}; + +typedef struct Cb Cb; +typedef struct Cb { + ushort status; + ushort command; + ulong link; + union { + uchar data[24]; /* CbIAS + CbConfigure */ + struct { + ulong tbd; + ushort count; + uchar threshold; + uchar number; + + ulong tba; + ushort tbasz; + ushort pad; + }; + }; + + Block* bp; + Cb* next; +} Cb; + +enum { /* action command */ + CbU = 0x1000, /* transmit underrun */ + CbOK = 0x2000, /* DMA completed OK */ + CbC = 0x8000, /* execution Complete */ + + CbNOP = 0x0000, + CbIAS = 0x0001, /* Individual Address Setup */ + CbConfigure = 0x0002, + CbMAS = 0x0003, /* Multicast Address Setup */ + CbTransmit = 0x0004, + CbDump = 0x0006, + CbDiagnose = 0x0007, + CbCommand = 0x0007, /* mask */ + + CbSF = 0x0008, /* Flexible-mode CbTransmit */ + + CbI = 0x2000, /* Interrupt after completion */ + CbS = 0x4000, /* Suspend after completion */ + CbEL = 0x8000, /* End of List */ +}; + +enum { /* CbTransmit count */ + CbEOF = 0x8000, +}; + +typedef struct Ctlr Ctlr; +typedef struct Ctlr { + Lock slock; /* attach */ + int state; + + int port; + Pcidev* pcidev; + Ctlr* next; + int active; + + int eepromsz; /* address size in bits */ + ushort* eeprom; + + Lock miilock; + + int tick; + + Lock rlock; /* registers */ + int command; /* last command issued */ + + Block* rfdhead; /* receive side */ + Block* rfdtail; + int nrfd; + + Lock cblock; /* transmit side */ + int action; + int nop; + uchar configdata[24]; + int threshold; + int ncb; + Cb* cbr; + Cb* cbhead; + Cb* cbtail; + int cbq; + int cbqmax; + int cbqmaxhw; + + Lock dlock; /* dump statistical counters */ + ulong dump[17]; +} Ctlr; + +static Ctlr* ctlrhead; +static Ctlr* ctlrtail; + +static uchar configdata[24] = { + 0x16, /* byte count */ + 0x08, /* Rx/Tx FIFO limit */ + 0x00, /* adaptive IFS */ + 0x00, + 0x00, /* Rx DMA maximum byte count */ +// 0x80, /* Tx DMA maximum byte count */ + 0x00, /* Tx DMA maximum byte count */ + 0x32, /* !late SCB, CNA interrupts */ + 0x03, /* discard short Rx frames */ + 0x00, /* 503/MII */ + + 0x00, + 0x2E, /* normal operation, NSAI */ + 0x00, /* linear priority */ + 0x60, /* inter-frame spacing */ + 0x00, + 0xF2, + 0xC8, /* 503, promiscuous mode off */ + 0x00, + 0x40, + 0xF3, /* transmit padding enable */ + 0x80, /* full duplex pin enable */ + 0x3F, /* no Multi IA */ + 0x05, /* no Multi Cast ALL */ +}; + +#define csr8r(c, r) (inb((c)->port+(r))) +#define csr16r(c, r) (ins((c)->port+(r))) +#define csr32r(c, r) (inl((c)->port+(r))) +#define csr8w(c, r, b) (outb((c)->port+(r), (int)(b))) +#define csr16w(c, r, w) (outs((c)->port+(r), (ushort)(w))) +#define csr32w(c, r, l) (outl((c)->port+(r), (ulong)(l))) + +static void +command(Ctlr* ctlr, int c, int v) +{ + int timeo; + + ilock(&ctlr->rlock); + + /* + * Only back-to-back CUresume can be done + * without waiting for any previous command to complete. + * This should be the common case. + * Unfortunately there's a chip errata where back-to-back + * CUresumes can be lost, the fix is to always wait. + if(c == CUresume && ctlr->command == CUresume){ + csr8w(ctlr, CommandR, c); + iunlock(&ctlr->rlock); + return; + } + */ + + for(timeo = 0; timeo < 100; timeo++){ + if(!csr8r(ctlr, CommandR)) + break; + microdelay(1); + } + if(timeo >= 100){ + ctlr->command = -1; + iunlock(&ctlr->rlock); + iprint("i82557: command %#ux %#ux timeout\n", c, v); + return; + } + + switch(c){ + + case CUstart: + case LoadDCA: + case LoadCUB: + case RUstart: + case LoadHDS: + case LoadRUB: + csr32w(ctlr, General, v); + break; + + /* + case CUnop: + case CUresume: + case DumpSC: + case ResetSA: + case RUresume: + case RUabort: + */ + default: + break; + } + csr8w(ctlr, CommandR, c); + ctlr->command = c; + + iunlock(&ctlr->rlock); +} + +static Block* +rfdalloc(ulong link) +{ + Block *bp; + Rfd *rfd; + + if(bp = iallocb(sizeof(Rfd))){ + rfd = (Rfd*)bp->rp; + rfd->field = 0; + rfd->link = link; + rfd->rbd = NullPointer; + rfd->count = 0; + rfd->size = sizeof(Etherpkt); + } + + return bp; +} + +static void +ethwatchdog(void* arg) +{ + Ether *ether; + Ctlr *ctlr; + static void txstart(Ether*); + + ether = arg; + for(;;){ + tsleep(&up->sleep, return0, 0, 4000); + + /* + * Hmmm. This doesn't seem right. Currently + * the device can't be disabled but it may be in + * the future. + */ + ctlr = ether->ctlr; + if(ctlr == nil || ctlr->state == 0){ + print("%s: exiting\n", up->text); + pexit("disabled", 0); + } + + ilock(&ctlr->cblock); + if(ctlr->tick++){ + ctlr->action = CbMAS; + txstart(ether); + } + iunlock(&ctlr->cblock); + } +} + +static void +attach(Ether* ether) +{ + Ctlr *ctlr; + char name[KNAMELEN]; + + ctlr = ether->ctlr; + lock(&ctlr->slock); + if(ctlr->state == 0){ + ilock(&ctlr->rlock); + csr8w(ctlr, Interrupt, 0); + iunlock(&ctlr->rlock); + command(ctlr, RUstart, PADDR(ctlr->rfdhead->rp)); + ctlr->state = 1; + + /* + * Start the watchdog timer for the receive lockup errata + * unless the EEPROM compatibility word indicates it may be + * omitted. + */ + if((ctlr->eeprom[0x03] & 0x0003) != 0x0003){ + snprint(name, KNAMELEN, "#l%dwatchdog", ether->ctlrno); + kproc(name, ethwatchdog, ether); + } + } + unlock(&ctlr->slock); +} + +static long +ifstat(Ether* ether, void* a, long n, ulong offset) +{ + char *alloc, *e, *p; + int i, phyaddr; + Ctlr *ctlr; + ulong dump[17]; + + ctlr = ether->ctlr; + lock(&ctlr->dlock); + + /* + * Start the command then + * wait for completion status, + * should be 0xA005. + */ + ctlr->dump[16] = 0; + command(ctlr, DumpSC, 0); + while(ctlr->dump[16] == 0) + ; + + ether->oerrs = ctlr->dump[1]+ctlr->dump[2]+ctlr->dump[3]; + ether->crcs = ctlr->dump[10]; + ether->frames = ctlr->dump[11]; + ether->buffs = ctlr->dump[12]+ctlr->dump[15]; + ether->overflows = ctlr->dump[13]; + + if(n == 0){ + unlock(&ctlr->dlock); + return 0; + } + + memmove(dump, ctlr->dump, sizeof(dump)); + unlock(&ctlr->dlock); + + if((alloc = malloc(READSTR)) == nil) + error(Enomem); + p = alloc; + e = p + READSTR; + + p = seprint(p, e, "transmit good frames: %lud\n", dump[0]); + p = seprint(p, e, "transmit maximum collisions errors: %lud\n", dump[1]); + p = seprint(p, e, "transmit late collisions errors: %lud\n", dump[2]); + p = seprint(p, e, "transmit underrun errors: %lud\n", dump[3]); + p = seprint(p, e, "transmit lost carrier sense: %lud\n", dump[4]); + p = seprint(p, e, "transmit deferred: %lud\n", dump[5]); + p = seprint(p, e, "transmit single collisions: %lud\n", dump[6]); + p = seprint(p, e, "transmit multiple collisions: %lud\n", dump[7]); + p = seprint(p, e, "transmit total collisions: %lud\n", dump[8]); + p = seprint(p, e, "receive good frames: %lud\n", dump[9]); + p = seprint(p, e, "receive CRC errors: %lud\n", dump[10]); + p = seprint(p, e, "receive alignment errors: %lud\n", dump[11]); + p = seprint(p, e, "receive resource errors: %lud\n", dump[12]); + p = seprint(p, e, "receive overrun errors: %lud\n", dump[13]); + p = seprint(p, e, "receive collision detect errors: %lud\n", dump[14]); + p = seprint(p, e, "receive short frame errors: %lud\n", dump[15]); + p = seprint(p, e, "nop: %d\n", ctlr->nop); + if(ctlr->cbqmax > ctlr->cbqmaxhw) + ctlr->cbqmaxhw = ctlr->cbqmax; + p = seprint(p, e, "cbqmax: %d\n", ctlr->cbqmax); + ctlr->cbqmax = 0; + p = seprint(p, e, "threshold: %d\n", ctlr->threshold); + + p = seprint(p, e, "eeprom:"); + for(i = 0; i < (1<eepromsz); i++){ + if(i && ((i & 0x07) == 0)) + p = seprint(p, e, "\n "); + p = seprint(p, e, " %4.4ux", ctlr->eeprom[i]); + } + + if((ctlr->eeprom[6] & 0x1F00) && !(ctlr->eeprom[6] & 0x8000)){ + phyaddr = ctlr->eeprom[6] & 0x00FF; + p = seprint(p, e, "\nphy %2d:", phyaddr); + for(i = 0; i < 6; i++){ + static int miir(Ctlr*, int, int); + + p = seprint(p, e, " %4.4ux", miir(ctlr, phyaddr, i)); + } + } + seprint(p, e, "\n"); + + n = readstr(offset, a, n, alloc); + free(alloc); + + return n; +} + +static void +txstart(Ether* ether) +{ + Ctlr *ctlr; + Block *bp; + Cb *cb; + + ctlr = ether->ctlr; + while(ctlr->cbq < (ctlr->ncb-1)){ + cb = ctlr->cbhead->next; + if(ctlr->action == 0){ + bp = qget(ether->oq); + if(bp == nil) + break; + + cb->command = CbS|CbSF|CbTransmit; + cb->tbd = PADDR(&cb->tba); + cb->count = 0; + cb->threshold = ctlr->threshold; + cb->number = 1; + cb->tba = PADDR(bp->rp); + cb->bp = bp; + cb->tbasz = BLEN(bp); + } + else if(ctlr->action == CbConfigure){ + cb->command = CbS|CbConfigure; + memmove(cb->data, ctlr->configdata, sizeof(ctlr->configdata)); + ctlr->action = 0; + } + else if(ctlr->action == CbIAS){ + cb->command = CbS|CbIAS; + memmove(cb->data, ether->ea, Eaddrlen); + ctlr->action = 0; + } + else if(ctlr->action == CbMAS){ + cb->command = CbS|CbMAS; + memset(cb->data, 0, sizeof(cb->data)); + ctlr->action = 0; + } + else{ + print("#l%d: action %#ux\n", ether->ctlrno, ctlr->action); + ctlr->action = 0; + break; + } + cb->status = 0; + + coherence(); + ctlr->cbhead->command &= ~CbS; + ctlr->cbhead = cb; + ctlr->cbq++; + } + + /* + * Workaround for some broken HUB chips + * when connected at 10Mb/s half-duplex. + */ + if(ctlr->nop){ + command(ctlr, CUnop, 0); + microdelay(1); + } + command(ctlr, CUresume, 0); + + if(ctlr->cbq > ctlr->cbqmax) + ctlr->cbqmax = ctlr->cbq; +} + +static void +configure(Ether* ether, int promiscuous) +{ + Ctlr *ctlr; + + ctlr = ether->ctlr; + ilock(&ctlr->cblock); + if(promiscuous){ + ctlr->configdata[6] |= 0x80; /* Save Bad Frames */ + //ctlr->configdata[6] &= ~0x40; /* !Discard Overrun Rx Frames */ + ctlr->configdata[7] &= ~0x01; /* !Discard Short Rx Frames */ + ctlr->configdata[15] |= 0x01; /* Promiscuous mode */ + ctlr->configdata[18] &= ~0x01; /* (!Padding enable?), !stripping enable */ + ctlr->configdata[21] |= 0x08; /* Multi Cast ALL */ + } + else{ + ctlr->configdata[6] &= ~0x80; + //ctlr->configdata[6] |= 0x40; + ctlr->configdata[7] |= 0x01; + ctlr->configdata[15] &= ~0x01; + ctlr->configdata[18] |= 0x01; /* 0x03? */ + ctlr->configdata[21] &= ~0x08; + } + ctlr->action = CbConfigure; + txstart(ether); + iunlock(&ctlr->cblock); +} + +static void +promiscuous(void* arg, int on) +{ + configure(arg, on); +} + +static void +multicast(void* ether, uchar *addr, int add) +{ + USED(addr); + /* + * TODO: if (add) add addr to list of mcast addrs in controller + * else remove addr from list of mcast addrs in controller + * enable multicast input (see CbMAS) instead of promiscuous mode. + */ + if (add) + configure(ether, 1); +} + +static void +transmit(Ether* ether) +{ + Ctlr *ctlr; + + ctlr = ether->ctlr; + ilock(&ctlr->cblock); + txstart(ether); + iunlock(&ctlr->cblock); +} + +static void +receive(Ether* ether) +{ + Rfd *rfd; + Ctlr *ctlr; + int count; + Block *bp, *pbp, *xbp; + + ctlr = ether->ctlr; + bp = ctlr->rfdhead; + for(rfd = (Rfd*)bp->rp; rfd->field & RfdC; rfd = (Rfd*)bp->rp){ + /* + * If it's an OK receive frame + * 1) save the count + * 2) if it's small, try to allocate a block and copy + * the data, then adjust the necessary fields for reuse; + * 3) if it's big, try to allocate a new Rfd and if + * successful + * adjust the received buffer pointers for the + * actual data received; + * initialise the replacement buffer to point to + * the next in the ring; + * initialise bp to point to the replacement; + * 4) if there's a good packet, pass it on for disposal. + */ + if(rfd->field & RfdOK){ + pbp = nil; + count = rfd->count & 0x3FFF; + if((count < ETHERMAXTU/4) && (pbp = iallocb(count))){ + memmove(pbp->rp, bp->rp+offsetof(Rfd, data[0]), count); + pbp->wp = pbp->rp + count; + + rfd->count = 0; + rfd->field = 0; + } + else if(xbp = rfdalloc(rfd->link)){ + bp->rp += offsetof(Rfd, data[0]); + bp->wp = bp->rp + count; + + xbp->next = bp->next; + bp->next = 0; + + pbp = bp; + bp = xbp; + } + if(pbp != nil) + etheriq(ether, pbp, 1); + } + else{ + rfd->count = 0; + rfd->field = 0; + } + + /* + * The ring tail pointer follows the head with with one + * unused buffer in between to defeat hardware prefetch; + * once the tail pointer has been bumped on to the next + * and the new tail has the Suspend bit set, it can be + * removed from the old tail buffer. + * As a replacement for the current head buffer may have + * been allocated above, ensure that the new tail points + * to it (next and link). + */ + rfd = (Rfd*)ctlr->rfdtail->rp; + ctlr->rfdtail = ctlr->rfdtail->next; + ctlr->rfdtail->next = bp; + ((Rfd*)ctlr->rfdtail->rp)->link = PADDR(bp->rp); + ((Rfd*)ctlr->rfdtail->rp)->field |= RfdS; + coherence(); + rfd->field &= ~RfdS; + + /* + * Finally done with the current (possibly replaced) + * head, move on to the next and maintain the sentinel + * between tail and head. + */ + ctlr->rfdhead = bp->next; + bp = ctlr->rfdhead; + } +} + +static void +interrupt(Ureg*, void* arg) +{ + Cb* cb; + Ctlr *ctlr; + Ether *ether; + int status; + + ether = arg; + ctlr = ether->ctlr; + + for(;;){ + ilock(&ctlr->rlock); + status = csr16r(ctlr, Status); + csr8w(ctlr, Ack, (status>>8) & 0xFF); + iunlock(&ctlr->rlock); + + if(!(status & (StatCX|StatFR|StatCNA|StatRNR|StatMDI|StatSWI))) + break; + + /* + * If the watchdog timer for the receiver lockup errata is running, + * let it know the receiver is active. + */ + if(status & (StatFR|StatRNR)){ + ilock(&ctlr->cblock); + ctlr->tick = 0; + iunlock(&ctlr->cblock); + } + + if(status & StatFR){ + receive(ether); + status &= ~StatFR; + } + + if(status & StatRNR){ + command(ctlr, RUresume, 0); + status &= ~StatRNR; + } + + if(status & StatCNA){ + ilock(&ctlr->cblock); + + cb = ctlr->cbtail; + while(ctlr->cbq){ + if(!(cb->status & CbC)) + break; + if(cb->bp){ + freeb(cb->bp); + cb->bp = nil; + } + if((cb->status & CbU) && ctlr->threshold < 0xE0) + ctlr->threshold++; + + ctlr->cbq--; + cb = cb->next; + } + ctlr->cbtail = cb; + + txstart(ether); + iunlock(&ctlr->cblock); + + status &= ~StatCNA; + } + + if(status & (StatCX|StatFR|StatCNA|StatRNR|StatMDI|StatSWI)){ + iprint("#l%d: status %#ux\n", ether->ctlrno, status); + break; + } + } +} + +static void +ctlrinit(Ctlr* ctlr) +{ + int i; + Block *bp; + Rfd *rfd; + ulong link; + + /* + * Create the Receive Frame Area (RFA) as a ring of allocated + * buffers. + * A sentinel buffer is maintained between the last buffer in + * the ring (marked with RfdS) and the head buffer to defeat the + * hardware prefetch of the next RFD and allow dynamic buffer + * allocation. + */ + link = NullPointer; + for(i = 0; i < Nrfd; i++){ + bp = rfdalloc(link); + if(ctlr->rfdhead == nil) + ctlr->rfdtail = bp; + bp->next = ctlr->rfdhead; + ctlr->rfdhead = bp; + link = PADDR(bp->rp); + } + ctlr->rfdtail->next = ctlr->rfdhead; + rfd = (Rfd*)ctlr->rfdtail->rp; + rfd->link = PADDR(ctlr->rfdhead->rp); + rfd->field |= RfdS; + ctlr->rfdhead = ctlr->rfdhead->next; + + /* + * Create a ring of control blocks for the + * transmit side. + */ + ilock(&ctlr->cblock); + ctlr->cbr = malloc(ctlr->ncb*sizeof(Cb)); + if(ctlr->cbr == nil) { + iunlock(&ctlr->cblock); + error(Enomem); + } + for(i = 0; i < ctlr->ncb; i++){ + ctlr->cbr[i].status = CbC|CbOK; + ctlr->cbr[i].command = CbS|CbNOP; + ctlr->cbr[i].link = PADDR(&ctlr->cbr[NEXT(i, ctlr->ncb)].status); + ctlr->cbr[i].next = &ctlr->cbr[NEXT(i, ctlr->ncb)]; + } + ctlr->cbhead = ctlr->cbr; + ctlr->cbtail = ctlr->cbr; + ctlr->cbq = 0; + + memmove(ctlr->configdata, configdata, sizeof(configdata)); + ctlr->threshold = 80; + ctlr->tick = 0; + + iunlock(&ctlr->cblock); +} + +static int +miir(Ctlr* ctlr, int phyadd, int regadd) +{ + int mcr, timo; + + lock(&ctlr->miilock); + csr32w(ctlr, Mcr, MDIread|(phyadd<<21)|(regadd<<16)); + mcr = 0; + for(timo = 64; timo; timo--){ + mcr = csr32r(ctlr, Mcr); + if(mcr & MDIready) + break; + microdelay(1); + } + unlock(&ctlr->miilock); + + if(mcr & MDIready) + return mcr & 0xFFFF; + + return -1; +} + +static int +miiw(Ctlr* ctlr, int phyadd, int regadd, int data) +{ + int mcr, timo; + + lock(&ctlr->miilock); + csr32w(ctlr, Mcr, MDIwrite|(phyadd<<21)|(regadd<<16)|(data & 0xFFFF)); + mcr = 0; + for(timo = 64; timo; timo--){ + mcr = csr32r(ctlr, Mcr); + if(mcr & MDIready) + break; + microdelay(1); + } + unlock(&ctlr->miilock); + + if(mcr & MDIready) + return 0; + + return -1; +} + +static int +hy93c46r(Ctlr* ctlr, int r) +{ + int data, i, op, size; + + /* + * Hyundai HY93C46 or equivalent serial EEPROM. + * This sequence for reading a 16-bit register 'r' + * in the EEPROM is taken straight from Section + * 3.3.4.2 of the Intel 82557 User's Guide. + */ +reread: + csr16w(ctlr, Ecr, EEcs); + op = EEstart|EEread; + for(i = 2; i >= 0; i--){ + data = (((op>>i) & 0x01)<<2)|EEcs; + csr16w(ctlr, Ecr, data); + csr16w(ctlr, Ecr, data|EEsk); + microdelay(1); + csr16w(ctlr, Ecr, data); + microdelay(1); + } + + /* + * First time through must work out the EEPROM size. + */ + if((size = ctlr->eepromsz) == 0) + size = 8; + + for(size = size-1; size >= 0; size--){ + data = (((r>>size) & 0x01)<<2)|EEcs; + csr16w(ctlr, Ecr, data); + csr16w(ctlr, Ecr, data|EEsk); + delay(1); + csr16w(ctlr, Ecr, data); + microdelay(1); + if(!(csr16r(ctlr, Ecr) & EEdo)) + break; + } + + data = 0; + for(i = 15; i >= 0; i--){ + csr16w(ctlr, Ecr, EEcs|EEsk); + microdelay(1); + if(csr16r(ctlr, Ecr) & EEdo) + data |= (1<eepromsz == 0){ + ctlr->eepromsz = 8-size; + ctlr->eeprom = malloc((1<eepromsz)*sizeof(ushort)); + if(ctlr->eeprom == nil) + error(Enomem); + goto reread; + } + + return data; +} + +static void +i82557pci(void) +{ + Pcidev *p; + Ctlr *ctlr; + int i, nop, port; + + p = nil; + nop = 0; + while(p = pcimatch(p, 0x8086, 0)){ + switch(p->did){ + default: + continue; + case 0x1031: /* Intel 82562EM */ + case 0x103B: /* Intel 82562EM */ + case 0x103C: /* Intel 82562EM */ + case 0x1050: /* Intel 82562EZ */ + case 0x1039: /* Intel 82801BD PRO/100 VE */ + case 0x103A: /* Intel 82562 PRO/100 VE */ + case 0x103D: /* Intel 82562 PRO/100 VE */ + case 0x1064: /* Intel 82562 PRO/100 VE */ + case 0x2449: /* Intel 82562ET */ + case 0x27DC: /* Intel 82801G PRO/100 VE */ + nop = 1; + /*FALLTHROUGH*/ + case 0x1209: /* Intel 82559ER */ + case 0x1229: /* Intel 8255[789] */ + case 0x1030: /* Intel 82559 InBusiness 10/100 */ + break; + } + + if(pcigetpms(p) > 0){ + pcisetpms(p, 0); + + for(i = 0; i < 6; i++) + pcicfgw32(p, PciBAR0+i*4, p->mem[i].bar); + pcicfgw8(p, PciINTL, p->intl); + pcicfgw8(p, PciLTR, p->ltr); + pcicfgw8(p, PciCLS, p->cls); + pcicfgw16(p, PciPCR, p->pcr); + } + + /* + * bar[0] is the memory-mapped register address (4KB), + * bar[1] is the I/O port register address (32 bytes) and + * bar[2] is for the flash ROM (1MB). + */ + port = p->mem[1].bar & ~0x01; + if(ioalloc(port, p->mem[1].size, 0, "i82557") < 0){ + print("i82557: port %#ux in use\n", port); + continue; + } + + ctlr = malloc(sizeof(Ctlr)); + if(ctlr == nil) + error(Enomem); + ctlr->port = port; + ctlr->pcidev = p; + ctlr->nop = nop; + + if(ctlrhead != nil) + ctlrtail->next = ctlr; + else + ctlrhead = ctlr; + ctlrtail = ctlr; + + pcisetbme(p); + } +} + +static char* mediatable[9] = { + "10BASE-T", /* TP */ + "10BASE-2", /* BNC */ + "10BASE-5", /* AUI */ + "100BASE-TX", + "10BASE-TFD", + "100BASE-TXFD", + "100BASE-T4", + "100BASE-FX", + "100BASE-FXFD", +}; + +static int +scanphy(Ctlr* ctlr) +{ + int i, oui, x; + + for(i = 0; i < 32; i++){ + if((oui = miir(ctlr, i, 2)) == -1 || oui == 0 || oui == 0xFFFF) + continue; + oui <<= 6; + x = miir(ctlr, i, 3); + oui |= x>>10; + //print("phy%d: oui %#ux reg1 %#ux\n", i, oui, miir(ctlr, i, 1)); + + ctlr->eeprom[6] = i; + if(oui == 0xAA00) + ctlr->eeprom[6] |= 0x07<<8; + else if(oui == 0x80017){ + if(x & 0x01) + ctlr->eeprom[6] |= 0x0A<<8; + else + ctlr->eeprom[6] |= 0x04<<8; + } + return i; + } + return -1; +} + +static void +shutdown(Ether* ether) +{ + Ctlr *ctlr = ether->ctlr; + + csr32w(ctlr, Port, 0); + delay(1); + csr8w(ctlr, Interrupt, InterruptM); +} + + +static int +reset(Ether* ether) +{ + int anar, anlpar, bmcr, bmsr, i, k, medium, phyaddr, x; + unsigned short sum; + uchar ea[Eaddrlen]; + Ctlr *ctlr; + + if(ctlrhead == nil) + i82557pci(); + + /* + * Any adapter matches if no ether->port is supplied, + * otherwise the ports must match. + */ + for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){ + if(ctlr->active) + continue; + if(ether->port == 0 || ether->port == ctlr->port){ + ctlr->active = 1; + break; + } + } + if(ctlr == nil) + return -1; + + /* + * Initialise the Ctlr structure. + * Perform a software reset after which should ensure busmastering + * is still enabled. The EtherExpress PRO/100B appears to leave + * the PCI configuration alone (see the 'To do' list above) so punt + * for now. + * Load the RUB and CUB registers for linear addressing (0). + */ + ether->ctlr = ctlr; + ether->port = ctlr->port; + ether->irq = ctlr->pcidev->intl; + ether->tbdf = ctlr->pcidev->tbdf; + + ilock(&ctlr->rlock); + csr32w(ctlr, Port, 0); + delay(1); + csr8w(ctlr, Interrupt, InterruptM); + iunlock(&ctlr->rlock); + + command(ctlr, LoadRUB, 0); + command(ctlr, LoadCUB, 0); + command(ctlr, LoadDCA, PADDR(ctlr->dump)); + + /* + * Initialise the receive frame, transmit ring and configuration areas. + */ + ctlr->ncb = Ncb; + ctlrinit(ctlr); + + /* + * Read the EEPROM. + * Do a dummy read first to get the size + * and allocate ctlr->eeprom. + */ + hy93c46r(ctlr, 0); + sum = 0; + for(i = 0; i < (1<eepromsz); i++){ + x = hy93c46r(ctlr, i); + ctlr->eeprom[i] = x; + sum += x; + } + if(sum != 0xBABA) + print("#l%d: EEPROM checksum - %#4.4ux\n", ether->ctlrno, sum); + + /* + * Eeprom[6] indicates whether there is a PHY and whether + * it's not 10Mb-only, in which case use the given PHY address + * to set any PHY specific options and determine the speed. + * Unfortunately, sometimes the EEPROM is blank except for + * the ether address and checksum; in this case look at the + * controller type and if it's am 82558 or 82559 it has an + * embedded PHY so scan for that. + * If no PHY, assume 82503 (serial) operation. + */ + if((ctlr->eeprom[6] & 0x1F00) && !(ctlr->eeprom[6] & 0x8000)) + phyaddr = ctlr->eeprom[6] & 0x00FF; + else + switch(ctlr->pcidev->rid){ + case 0x01: /* 82557 A-step */ + case 0x02: /* 82557 B-step */ + case 0x03: /* 82557 C-step */ + default: + phyaddr = -1; + break; + case 0x04: /* 82558 A-step */ + case 0x05: /* 82558 B-step */ + case 0x06: /* 82559 A-step */ + case 0x07: /* 82559 B-step */ + case 0x08: /* 82559 C-step */ + case 0x09: /* 82559ER A-step */ + phyaddr = scanphy(ctlr); + break; + } + if(phyaddr >= 0){ + /* + * Resolve the highest common ability of the two + * link partners. In descending order: + * 0x0100 100BASE-TX Full Duplex + * 0x0200 100BASE-T4 + * 0x0080 100BASE-TX + * 0x0040 10BASE-T Full Duplex + * 0x0020 10BASE-T + */ + anar = miir(ctlr, phyaddr, 0x04); + anlpar = miir(ctlr, phyaddr, 0x05) & 0x03E0; + anar &= anlpar; + bmcr = 0; + if(anar & 0x380) + bmcr = 0x2000; + if(anar & 0x0140) + bmcr |= 0x0100; + + switch((ctlr->eeprom[6]>>8) & 0x001F){ + + case 0x04: /* DP83840 */ + case 0x0A: /* DP83840A */ + /* + * The DP83840[A] requires some tweaking for + * reliable operation. + * The manual says bit 10 should be unconditionally + * set although it supposedly only affects full-duplex + * operation (an & 0x0140). + */ + x = miir(ctlr, phyaddr, 0x17) & ~0x0520; + x |= 0x0420; + for(i = 0; i < ether->nopt; i++){ + if(cistrcmp(ether->opt[i], "congestioncontrol")) + continue; + x |= 0x0100; + break; + } + miiw(ctlr, phyaddr, 0x17, x); + + /* + * If the link partner can't autonegotiate, determine + * the speed from elsewhere. + */ + if(anlpar == 0){ + miir(ctlr, phyaddr, 0x01); + bmsr = miir(ctlr, phyaddr, 0x01); + x = miir(ctlr, phyaddr, 0x19); + if((bmsr & 0x0004) && !(x & 0x0040)) + bmcr = 0x2000; + } + break; + + case 0x07: /* Intel 82555 */ + /* + * Auto-negotiation may fail if the other end is + * a DP83840A and the cable is short. + */ + miir(ctlr, phyaddr, 0x01); + bmsr = miir(ctlr, phyaddr, 0x01); + if((miir(ctlr, phyaddr, 0) & 0x1000) && !(bmsr & 0x0020)){ + miiw(ctlr, phyaddr, 0x1A, 0x2010); + x = miir(ctlr, phyaddr, 0); + miiw(ctlr, phyaddr, 0, 0x0200|x); + for(i = 0; i < 3000; i++){ + delay(1); + if(miir(ctlr, phyaddr, 0x01) & 0x0020) + break; + } + miiw(ctlr, phyaddr, 0x1A, 0x2000); + + anar = miir(ctlr, phyaddr, 0x04); + anlpar = miir(ctlr, phyaddr, 0x05) & 0x03E0; + anar &= anlpar; + bmcr = 0; + if(anar & 0x380) + bmcr = 0x2000; + if(anar & 0x0140) + bmcr |= 0x0100; + } + break; + } + + /* + * Force speed and duplex if no auto-negotiation. + */ + if(anlpar == 0){ + medium = -1; + for(i = 0; i < ether->nopt; i++){ + for(k = 0; k < nelem(mediatable); k++){ + if(cistrcmp(mediatable[k], ether->opt[i])) + continue; + medium = k; + break; + } + + switch(medium){ + default: + break; + + case 0x00: /* 10BASE-T */ + case 0x01: /* 10BASE-2 */ + case 0x02: /* 10BASE-5 */ + bmcr &= ~(0x2000|0x0100); + ctlr->configdata[19] &= ~0x40; + break; + + case 0x03: /* 100BASE-TX */ + case 0x06: /* 100BASE-T4 */ + case 0x07: /* 100BASE-FX */ + ctlr->configdata[19] &= ~0x40; + bmcr |= 0x2000; + break; + + case 0x04: /* 10BASE-TFD */ + bmcr = (bmcr & ~0x2000)|0x0100; + ctlr->configdata[19] |= 0x40; + break; + + case 0x05: /* 100BASE-TXFD */ + case 0x08: /* 100BASE-FXFD */ + bmcr |= 0x2000|0x0100; + ctlr->configdata[19] |= 0x40; + break; + } + } + if(medium != -1) + miiw(ctlr, phyaddr, 0x00, bmcr); + } + + if(bmcr & 0x2000) + ether->mbps = 100; + + ctlr->configdata[8] = 1; + ctlr->configdata[15] &= ~0x80; + } + else{ + ctlr->configdata[8] = 0; + ctlr->configdata[15] |= 0x80; + } + + /* + * Workaround for some broken HUB chips when connected at 10Mb/s + * half-duplex. + * This is a band-aid, but as there's no dynamic auto-negotiation + * code at the moment, only deactivate the workaround code in txstart + * if the link is 100Mb/s. + */ + if(ether->mbps != 10) + ctlr->nop = 0; + + /* + * Load the chip configuration and start it off. + */ + if(ether->oq == 0) + ether->oq = qopen(256*1024, Qmsg, 0, 0); + configure(ether, 0); + command(ctlr, CUstart, PADDR(&ctlr->cbr->status)); + + /* + * Check if the adapter's station address is to be overridden. + * If not, read it from the EEPROM and set in ether->ea prior to loading + * the station address with the Individual Address Setup command. + */ + memset(ea, 0, Eaddrlen); + if(memcmp(ea, ether->ea, Eaddrlen) == 0){ + for(i = 0; i < Eaddrlen/2; i++){ + x = ctlr->eeprom[i]; + ether->ea[2*i] = x; + ether->ea[2*i+1] = x>>8; + } + } + + ilock(&ctlr->cblock); + ctlr->action = CbIAS; + txstart(ether); + iunlock(&ctlr->cblock); + + /* + * Linkage to the generic ethernet driver. + */ + ether->attach = attach; + ether->transmit = transmit; + ether->interrupt = interrupt; + ether->ifstat = ifstat; + ether->shutdown = shutdown; + + ether->promiscuous = promiscuous; + ether->multicast = multicast; + ether->arg = ether; + + return 0; +} + +void +ether82557link(void) +{ + addethercard("i82557", reset); +} diff -Nru /sys/src/9k/386/ether82563.c /sys/src/9k/386/ether82563.c --- /sys/src/9k/386/ether82563.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/ether82563.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,2123 @@ +/* + * Intel 8256[367], 8257[1-9], 8258[03], i21[01], i350 + * Gigabit Ethernet PCI-Express Controllers + * Coraid EtherDrive® hba + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/netif.h" + +#include "etherif.h" + +/* + * note: the 82575, 82576 and 82580 are operated using registers aliased + * to the 82563-style architecture. many features seen in the 82598 + * are also seen in the 82575 part. + */ + +enum { + /* General */ + + Ctrl = 0x0000, /* Device Control */ + Status = 0x0008, /* Device Status */ + Eec = 0x0010, /* EEPROM/Flash Control/Data */ + Eerd = 0x0014, /* EEPROM Read */ + Ctrlext = 0x0018, /* Extended Device Control */ + Fla = 0x001c, /* Flash Access */ + Mdic = 0x0020, /* MDI Control */ + Fcal = 0x0028, /* Flow Control Address Low */ + Fcah = 0x002C, /* Flow Control Address High */ + Fct = 0x0030, /* Flow Control Type */ + Kumctrlsta = 0x0034, /* Kumeran Control and Status Register */ + Connsw = 0x0034, /* copper / fiber switch control; 82575/82576 */ + Vet = 0x0038, /* VLAN EtherType */ + Fcttv = 0x0170, /* Flow Control Transmit Timer Value */ + Txcw = 0x0178, /* Transmit Configuration Word */ + Rxcw = 0x0180, /* Receive Configuration Word */ + Ledctl = 0x0E00, /* LED control */ + Pba = 0x1000, /* Packet Buffer Allocation */ + Pbs = 0x1008, /* Packet Buffer Size */ + + /* Interrupt */ + + Icr = 0x00C0, /* Interrupt Cause Read */ + Itr = 0x00c4, /* Interrupt Throttling Rate */ + Ics = 0x00C8, /* Interrupt Cause Set */ + Ims = 0x00D0, /* Interrupt Mask Set/Read */ + Imc = 0x00D8, /* Interrupt mask Clear */ + Iam = 0x00E0, /* Interrupt acknowledge Auto Mask */ + Eitr = 0x1680, /* Extended itr; 82575/6 80 only */ + + /* Receive */ + + Rctl = 0x0100, /* Control */ + Ert = 0x2008, /* Early Receive Threshold (573[EVL], 82578 only) */ + Fcrtl = 0x2160, /* Flow Control RX Threshold Low */ + Fcrth = 0x2168, /* Flow Control Rx Threshold High */ + Psrctl = 0x2170, /* Packet Split Receive Control */ + Drxmxod = 0x2540, /* dma max outstanding bytes (82575) */ + Rdbal = 0x2800, /* Rdesc Base Address Low Queue 0 */ + Rdbah = 0x2804, /* Rdesc Base Address High Queue 0 */ + Rdlen = 0x2808, /* Descriptor Length Queue 0 */ + Srrctl = 0x280c, /* split and replication rx control (82575) */ + Rdh = 0x2810, /* Descriptor Head Queue 0 */ + Rdt = 0x2818, /* Descriptor Tail Queue 0 */ + Rdtr = 0x2820, /* Descriptor Timer Ring */ + Rxdctl = 0x2828, /* Descriptor Control */ + Radv = 0x282C, /* Interrupt Absolute Delay Timer */ + Rsrpd = 0x2c00, /* Small Packet Detect */ + Raid = 0x2c08, /* ACK interrupt delay */ + Cpuvec = 0x2c10, /* CPU Vector */ + Rxcsum = 0x5000, /* Checksum Control */ + Rmpl = 0x5004, /* rx maximum packet length (82575) */ + Rfctl = 0x5008, /* Filter Control */ + Mta = 0x5200, /* Multicast Table Array */ + Ral = 0x5400, /* Receive Address Low */ + Rah = 0x5404, /* Receive Address High */ + Vfta = 0x5600, /* VLAN Filter Table Array */ + Mrqc = 0x5818, /* Multiple Receive Queues Command */ + + /* Transmit */ + + Tctl = 0x0400, /* Transmit Control */ + Tipg = 0x0410, /* Transmit IPG */ + Tkabgtxd = 0x3004, /* glci afe band gap transmit ref data, or something */ + Tdbal = 0x3800, /* Tdesc Base Address Low */ + Tdbah = 0x3804, /* Tdesc Base Address High */ + Tdlen = 0x3808, /* Descriptor Length */ + Tdh = 0x3810, /* Descriptor Head */ + Tdt = 0x3818, /* Descriptor Tail */ + Tidv = 0x3820, /* Interrupt Delay Value */ + Txdctl = 0x3828, /* Descriptor Control */ + Tadv = 0x382C, /* Interrupt Absolute Delay Timer */ + Tarc0 = 0x3840, /* Arbitration Counter Queue 0 */ + + /* Statistics */ + + Statistics = 0x4000, /* Start of Statistics Area */ + Gorcl = 0x88/4, /* Good Octets Received Count */ + Gotcl = 0x90/4, /* Good Octets Transmitted Count */ + Torl = 0xC0/4, /* Total Octets Received */ + Totl = 0xC8/4, /* Total Octets Transmitted */ + Nstatistics = 0x124/4, +}; + +enum { /* Ctrl */ + Lrst = 1<<3, /* link reset */ + Slu = 1<<6, /* Set Link Up */ + Devrst = 1<<26, /* Device Reset */ + Rfce = 1<<27, /* Receive Flow Control Enable */ + Tfce = 1<<28, /* Transmit Flow Control Enable */ + Phyrst = 1<<31, /* Phy Reset */ +}; + +enum { /* Status */ + Lu = 1<<1, /* Link Up */ + Lanid = 3<<2, /* mask for Lan ID. */ + Txoff = 1<<4, /* Transmission Paused */ + Tbimode = 1<<5, /* TBI Mode Indication */ + Phyra = 1<<10, /* PHY Reset Asserted */ + GIOme = 1<<19, /* GIO Master Enable Status */ +}; + +enum { + /* Eec */ + Nvpres = 1<<8, /* nvram present */ + Autord = 1<<9, /* autoread complete */ + Sec1val = 1<<22, /* sector 1 valid (!sec0) */ +}; + +enum { /* Eerd */ + EEstart = 1<<0, /* Start Read */ + EEdone = 1<<1, /* Read done */ +}; + +enum { /* Ctrlext */ + Eerst = 1<<13, /* EEPROM Reset */ + Linkmode = 3<<22, /* linkmode */ + Internalphy = 0<<22, /* " internal phy (copper) */ + Sgmii = 2<<22, /* " sgmii */ + Serdes = 3<<22, /* " serdes */ +}; + +enum { + /* Connsw */ + Enrgirq = 1<<2, /* interrupt on power detect (enrgsrc) */ +}; + +enum { /* EEPROM content offsets */ + Ea = 0x00, /* Ethernet Address */ +}; + +enum { /* Mdic */ + MDIdMASK = 0x0000FFFF, /* Data */ + MDIdSHIFT = 0, + MDIrMASK = 0x001F0000, /* PHY Register Address */ + MDIrSHIFT = 16, + MDIpMASK = 0x03E00000, /* PHY Address */ + MDIpSHIFT = 21, + MDIwop = 0x04000000, /* Write Operation */ + MDIrop = 0x08000000, /* Read Operation */ + MDIready = 0x10000000, /* End of Transaction */ + MDIie = 0x20000000, /* Interrupt Enable */ + MDIe = 0x40000000, /* Error */ +}; + +enum { /* phy interface */ + Phyctl = 0, /* phy ctl register */ + Phyisr = 19, /* 82563 phy interrupt status register */ + Phylhr = 19, /* 8257[12] link health register */ + Physsr = 17, /* phy secondary status register */ + Phyprst = 193<<8 | 17, /* 8256[34] phy port reset */ + Phyier = 18, /* 82573 phy interrupt enable register */ + Phypage = 22, /* 8256[34] page register */ + Phystat = 26, /* 82580 phy status */ + Phyapage = 29, + Phy79page = 31, /* 82579 phy page register (all pages) */ + + Rtlink = 1<<10, /* realtime link status */ + Phyan = 1<<11, /* phy has autonegotiated */ + + /* Phyctl bits */ + Ran = 1<<9, /* restart auto negotiation */ + Ean = 1<<12, /* enable auto negotiation */ + + /* Phyprst bits */ + Prst = 1<<0, /* reset the port */ + + /* 82573 Phyier bits */ + Lscie = 1<<10, /* link status changed ie */ + Ancie = 1<<11, /* auto negotiation complete ie */ + Spdie = 1<<14, /* speed changed ie */ + Panie = 1<<15, /* phy auto negotiation error ie */ + + /* Phylhr/Phyisr bits */ + Anf = 1<<6, /* lhr: auto negotiation fault */ + Ane = 1<<15, /* isr: auto negotiation error */ + + /* 82580 Phystat bits */ + Ans = 1<<14 | 1<<15, /* 82580 autoneg. status */ + Link = 1<<6, /* 82580 Link */ + + /* Rxcw builtin serdes */ + Anc = 1<<31, + Rxsynch = 1<<30, + Rxcfg = 1<<29, + Rxcfgch = 1<<28, + Rxcfgbad = 1<<27, + Rxnc = 1<<26, + + /* Txcw */ + Txane = 1<<31, + Txcfg = 1<<30, +}; + +enum { /* fiber (pcs) interface */ + Pcsctl = 0x4208, /* pcs control */ + Pcsstat = 0x420c, /* pcs status */ + + /* Pcsctl bits */ + Pan = 1<<16, /* autonegotiate */ + Prestart = 1<<17, /* restart an (self clearing) */ + + /* Pcsstat bits */ + Linkok = 1<<0, /* link is okay */ + Andone = 1<<16, /* an phase is done see below for success */ + Anbad = 1<<19 | 1<<20, /* Anerror | Anremfault */ +}; + +enum { /* Icr, Ics, Ims, Imc */ + Txdw = 0x00000001, /* Transmit Descriptor Written Back */ + Txqe = 0x00000002, /* Transmit Queue Empty */ + Lsc = 0x00000004, /* Link Status Change */ + Rxseq = 0x00000008, /* Receive Sequence Error */ + Rxdmt0 = 0x00000010, /* Rdesc Minimum Threshold Reached */ + Rxo = 0x00000040, /* Receiver Overrun */ + Rxt0 = 0x00000080, /* Receiver Timer Interrupt; !82575/6/80 only */ + Rxdw = 0x00000080, /* Rdesc write back; 82575/6/80 only */ + Mdac = 0x00000200, /* MDIO Access Completed */ + Rxcfgset = 0x00000400, /* Receiving /C/ ordered sets */ + Ack = 0x00020000, /* Receive ACK frame */ + Omed = 1<<20, /* media change; pcs interface */ +}; + +enum { /* Txcw */ + TxcwFd = 0x00000020, /* Full Duplex */ + TxcwHd = 0x00000040, /* Half Duplex */ + TxcwPauseMASK = 0x00000180, /* Pause */ + TxcwPauseSHIFT = 7, + TxcwPs = 1<nic+((r)/4))) +#define csr32w(c, r, v) (*((c)->nic+((r)/4)) = (v)) + +static Ctlr *i82563ctlr; +static Rbpool rbtab[Npool]; + +static char *statistics[Nstatistics] = { + "CRC Error", + "Alignment Error", + "Symbol Error", + "RX Error", + "Missed Packets", + "Single Collision", + "Excessive Collisions", + "Multiple Collision", + "Late Collisions", + nil, + "Collision", + "Transmit Underrun", + "Defer", + "Transmit - No CRS", + "Sequence Error", + "Carrier Extension Error", + "Receive Error Length", + nil, + "XON Received", + "XON Transmitted", + "XOFF Received", + "XOFF Transmitted", + "FC Received Unsupported", + "Packets Received (64 Bytes)", + "Packets Received (65-127 Bytes)", + "Packets Received (128-255 Bytes)", + "Packets Received (256-511 Bytes)", + "Packets Received (512-1023 Bytes)", + "Packets Received (1024-mtu Bytes)", + "Good Packets Received", + "Broadcast Packets Received", + "Multicast Packets Received", + "Good Packets Transmitted", + nil, + "Good Octets Received", + nil, + "Good Octets Transmitted", + nil, + nil, + nil, + "Receive No Buffers", + "Receive Undersize", + "Receive Fragment", + "Receive Oversize", + "Receive Jabber", + "Management Packets Rx", + "Management Packets Drop", + "Management Packets Tx", + "Total Octets Received", + nil, + "Total Octets Transmitted", + nil, + "Total Packets Received", + "Total Packets Transmitted", + "Packets Transmitted (64 Bytes)", + "Packets Transmitted (65-127 Bytes)", + "Packets Transmitted (128-255 Bytes)", + "Packets Transmitted (256-511 Bytes)", + "Packets Transmitted (512-1023 Bytes)", + "Packets Transmitted (1024-mtu Bytes)", + "Multicast Packets Transmitted", + "Broadcast Packets Transmitted", + "TCP Segmentation Context Transmitted", + "TCP Segmentation Context Fail", + "Interrupt Assertion", + "Interrupt Rx Pkt Timer", + "Interrupt Rx Abs Timer", + "Interrupt Tx Pkt Timer", + "Interrupt Tx Abs Timer", + "Interrupt Tx Queue Empty", + "Interrupt Tx Desc Low", + "Interrupt Rx Min", + "Interrupt Rx Overrun", +}; + +static char* +cname(Ctlr *c) +{ + return cttab[c->type].name; +} + +static long +i82563ifstat(Ether *edev, void *a, long n, usize offset) +{ + char *s, *p, *e, *stat; + int i, r; + uvlong tuvl, ruvl; + Ctlr *ctlr; + Rbpool *b; + + ctlr = edev->ctlr; + qlock(&ctlr->slock); + p = s = malloc(READSTR); + e = p + READSTR; + + for(i = 0; i < Nstatistics; i++){ + r = csr32r(ctlr, Statistics + i*4); + if((stat = statistics[i]) == nil) + continue; + switch(i){ + case Gorcl: + case Gotcl: + case Torl: + case Totl: + ruvl = r; + ruvl += (uvlong)csr32r(ctlr, Statistics+(i+1)*4) << 32; + tuvl = ruvl; + tuvl += ctlr->statistics[i]; + tuvl += (uvlong)ctlr->statistics[i+1] << 32; + if(tuvl == 0) + continue; + ctlr->statistics[i] = tuvl; + ctlr->statistics[i+1] = tuvl >> 32; + p = seprint(p, e, "%s: %llud %llud\n", stat, tuvl, ruvl); + i++; + break; + + default: + ctlr->statistics[i] += r; + if(ctlr->statistics[i] == 0) + continue; + p = seprint(p, e, "%s: %ud %ud\n", stat, + ctlr->statistics[i], r); + break; + } + } + + p = seprint(p, e, "lintr: %ud %ud\n", ctlr->lintr, ctlr->lsleep); + p = seprint(p, e, "rintr: %ud %ud\n", ctlr->rintr, ctlr->rsleep); + p = seprint(p, e, "tintr: %ud %ud\n", ctlr->tintr, ctlr->txdw); + p = seprint(p, e, "ixcs: %ud %ud %ud\n", ctlr->ixsm, ctlr->ipcs, ctlr->tcpcs); + p = seprint(p, e, "rdtr: %ud\n", ctlr->rdtr); + p = seprint(p, e, "radv: %ud\n", ctlr->radv); + p = seprint(p, e, "ctrl: %.8ux\n", csr32r(ctlr, Ctrl)); + p = seprint(p, e, "ctrlext: %.8ux\n", csr32r(ctlr, Ctrlext)); + p = seprint(p, e, "status: %.8ux\n", csr32r(ctlr, Status)); + p = seprint(p, e, "txcw: %.8ux\n", csr32r(ctlr, Txcw)); + p = seprint(p, e, "txdctl: %.8ux\n", csr32r(ctlr, Txdctl)); + p = seprint(p, e, "pba: %.8ux\n", ctlr->pba); + + b = rbtab + ctlr->pool; + p = seprint(p, e, "pool: fast %ud slow %ud nstarve %ud nwakey %ud starve %ud\n", + b->nfast, b->nslow, b->nstarve, b->nwakey, b->starve); + p = seprint(p, e, "speeds: 10:%ud 100:%ud 1000:%ud ?:%ud\n", + ctlr->speeds[0], ctlr->speeds[1], ctlr->speeds[2], ctlr->speeds[3]); + p = seprint(p, e, "type: %s\n", cname(ctlr)); + + USED(p); + n = readstr(offset, a, n, s); + free(s); + qunlock(&ctlr->slock); + + return n; +} + +static void +i82563promiscuous(void *arg, int on) +{ + int rctl; + Ctlr *ctlr; + Ether *edev; + + edev = arg; + ctlr = edev->ctlr; + + rctl = csr32r(ctlr, Rctl); + rctl &= ~MoMASK; + if(on) + rctl |= Upe|Mpe; + else + rctl &= ~(Upe|Mpe); + csr32w(ctlr, Rctl, rctl); +} + +static void +i82563multicast(void *arg, uchar *addr, int on) +{ + int bit, x; + Ctlr *ctlr; + Ether *edev; + + edev = arg; + ctlr = edev->ctlr; + + x = addr[5]>>1; + if(ctlr->type == i82566) + x &= 31; + if(ctlr->type == i210 || ctlr->type == i217) + x &= 15; + bit = ((addr[5] & 1)<<4)|(addr[4]>>4); + /* + * multiple ether addresses can hash to the same filter bit, + * so it's never safe to clear a filter bit. + * if we want to clear filter bits, we need to keep track of + * all the multicast addresses in use, clear all the filter bits, + * then set the ones corresponding to in-use addresses. + */ + if(on) + ctlr->mta[x] |= 1<mta[x] &= ~(1<mta[x]); +} + +static int +icansleep(void *v) +{ + Rbpool *p; + int r; + + p = v; + ilock(p); + r = p->starve == 0; + iunlock(p); + + return r; +} + +static Block* +i82563rballoc(Rbpool *p) +{ + Block *b; + + for(;;){ + if((b = p->x) != nil){ + p->nfast++; + p->x = b->next; + b->next = nil; + return b; + } + + ilock(p); + b = p->b; + p->b = nil; + if(b == nil){ + p->nstarve++; + iunlock(p); + return nil; + } + p->nslow++; + iunlock(p); + p->x = b; + } +} + +static void +rbfree(Block *b, int t) +{ + Rbpool *p; + + p = rbtab + t; + b->rp = b->wp = (uchar*)ROUNDUP((uintptr)b->base, Rbalign); + b->flag &= ~(Bipck | Budpck | Btcpck | Bpktck); + + ilock(p); + b->next = p->b; + p->b = b; + if(p->starve){ + if(0) + iprint("wakey %d; %d %d\n", t, p->nstarve, p->nwakey); + p->nwakey++; + p->starve = 0; + iunlock(p); + wakeup(p); + }else + iunlock(p); +} + +static void +rbfree0(Block *b) +{ + rbfree(b, 0); +} + +static void +rbfree1(Block *b) +{ + rbfree(b, 1); +} + +static void +rbfree2(Block *b) +{ + rbfree(b, 2); +} + +static void +rbfree3(Block *b) +{ + rbfree(b, 3); +} + +static void +rbfree4(Block *b) +{ + rbfree(b, 4); +} + +static void +rbfree5(Block *b) +{ + rbfree(b, 5); +} + +static void +rbfree6(Block *b) +{ + rbfree(b, 6); +} + +static void +rbfree7(Block *b) +{ + rbfree(b, 7); +} + +static void +rbfree8(Block *b) +{ + rbfree(b, 8); +} + +static void +rbfree9(Block *b) +{ + rbfree(b, 9); +} + +static Freefn freetab[Npool] = { + rbfree0, + rbfree1, + rbfree2, + rbfree3, + rbfree4, + rbfree5, + rbfree6, + rbfree7, + rbfree8, + rbfree9, +}; + +static int +newpool(void) +{ + static int seq; + + if(seq == nelem(freetab)) + return -1; + if(freetab[seq] == nil){ + print("82563: bad freetab\n"); + return -1; + } + return seq++; +} + +static void +i82563im(Ctlr *ctlr, int im) +{ + ilock(&ctlr->imlock); + ctlr->im |= im; + csr32w(ctlr, Ims, ctlr->im); + iunlock(&ctlr->imlock); +} + +static void +i82563txinit(Ctlr *ctlr) +{ + int i; + u32int r; + Block *b; + + if(cttab[ctlr->type].flag & F75) + csr32w(ctlr, Tctl, 0x0F<tdba)); + csr32w(ctlr, Tdbah, PCIWADDRH(ctlr->tdba)); + csr32w(ctlr, Tdlen, ctlr->ntd * sizeof(Td)); + ctlr->tdh = PREV(0, ctlr->ntd); + csr32w(ctlr, Tdh, 0); + ctlr->tdt = 0; + csr32w(ctlr, Tdt, 0); + for(i = 0; i < ctlr->ntd; i++){ + if((b = ctlr->tb[i]) != nil){ + ctlr->tb[i] = nil; + freeb(b); + } + memset(&ctlr->tdba[i], 0, sizeof(Td)); + } + csr32w(ctlr, Tidv, 128); + csr32w(ctlr, Tadv, 64); + csr32w(ctlr, Tctl, csr32r(ctlr, Tctl) | Ten); + r = csr32r(ctlr, Txdctl) & ~WthreshMASK; + r |= 4<type].flag & F75) + r |= Enable; + csr32w(ctlr, Txdctl, r); +} + +#define Next(x, m) (((x)+1) & (m)) + +static int +i82563cleanup(Ether *e) +{ + Block *b; + Ctlr *c; + int tdh, m, n; + + c = e->ctlr; + tdh = c->tdh; + m = c->ntd-1; + while(c->tdba[n = Next(tdh, m)].status & Tdd){ + tdh = n; + if((b = c->tb[tdh]) != nil){ + c->tb[tdh] = nil; + freeb(b); + }else + iprint("#l%d: %s tx underrun! %d\n", e->ctlrno, cname(c), n); + c->tdba[tdh].status = 0; + } + + return c->tdh = tdh; +} + +static int +notrim(void *v) +{ + Ctlr *c; + + c = v; + return (c->im & Txdw) == 0; +} + +static void +i82563tproc(void *v) +{ + Td *td; + Block *bp; + Ether *edev; + Ctlr *ctlr; + int tdh, tdt, m; + + edev = v; + ctlr = edev->ctlr; + tdt = ctlr->tdt; + m = ctlr->ntd-1; + + i82563txinit(ctlr); + + for(;;){ + tdh = i82563cleanup(edev); + + if(Next(tdt, m) == tdh){ + ctlr->txdw++; + i82563im(ctlr, Txdw); + sleep(&ctlr->trendez, notrim, ctlr); + continue; + } + bp = qbread(edev->oq, 100000); + td = &ctlr->tdba[tdt]; + td->addr[0] = PCIWADDRL(bp->rp); + td->addr[1] = PCIWADDRH(bp->rp); + td->control = Ide|Rs|Ifcs|Teop|BLEN(bp); + ctlr->tb[tdt] = bp; + tdt = Next(tdt, m); + sfence(); + csr32w(ctlr, Tdt, tdt); + } +} + +static int +i82563replenish(Ctlr *ctlr, int maysleep) +{ + uint rdt, m, i; + Block *bp; + Rbpool *p; + Rd *rd; + + rdt = ctlr->rdt; + m = ctlr->nrd-1; + p = rbtab + ctlr->pool; + i = 0; + for(; Next(rdt, m) != ctlr->rdh; rdt = Next(rdt, m)){ + rd = &ctlr->rdba[rdt]; + if(ctlr->rb[rdt] != nil){ + iprint("%s: tx overrun\n", cname(ctlr)); + break; + } + redux: + bp = i82563rballoc(p); + if(bp == nil){ + if(rdt - ctlr->rdh >= 16) + break; + print("%s: pool %d: no rx buffers\n", cname(ctlr), ctlr->pool); + if(maysleep == 0) + return -1; + ilock(p); + p->starve = 1; + iunlock(p); + sleep(p, icansleep, p); + goto redux; + } + i++; + ctlr->rb[rdt] = bp; + rd->addr[0] = PCIWADDRL(bp->rp); + rd->addr[1] = PCIWADDRH(bp->rp); + rd->status = 0; + ctlr->rdfree++; + } + if(i != 0){ + ctlr->rdt = rdt; + sfence(); + csr32w(ctlr, Rdt, rdt); + } + return 0; +} + +static void +i82563rxinit(Ctlr *ctlr) +{ + int i; + Block *bp; + + if(ctlr->rbsz <= 2048) + csr32w(ctlr, Rctl, Dpf|Bsize2048|Bam|RdtmsHALF); + else{ + i = ctlr->rbsz / 1024; + if(ctlr->rbsz % 1024) + i++; + if(cttab[ctlr->type].flag & F75){ + csr32w(ctlr, Rctl, Lpe|Dpf|Bsize2048|Bam|RdtmsHALF|Secrc); + if(ctlr->type != i82575) + i |= (ctlr->nrd/2>>4)<<20; /* RdmsHalf */ + csr32w(ctlr, Srrctl, i | Dropen); + csr32w(ctlr, Rmpl, ctlr->rbsz); +// csr32w(ctlr, Drxmxod, 0x7ff); + }else + csr32w(ctlr, Rctl, Lpe|Dpf|BsizeFlex*i|Bam|RdtmsHALF|Secrc); + } + + if(cttab[ctlr->type].flag & Fert) + csr32w(ctlr, Ert, 1024/8); + + if(ctlr->type == i82566) + csr32w(ctlr, Pbs, 16); + + csr32w(ctlr, Rdbal, PCIWADDRL(ctlr->rdba)); + csr32w(ctlr, Rdbah, PCIWADDRH(ctlr->rdba)); + csr32w(ctlr, Rdlen, ctlr->nrd * sizeof(Rd)); + ctlr->rdh = 0; + csr32w(ctlr, Rdh, 0); + ctlr->rdt = 0; + csr32w(ctlr, Rdt, 0); + ctlr->rdtr = 0; //25; + ctlr->radv = 0; //500; + csr32w(ctlr, Rdtr, ctlr->rdtr); + csr32w(ctlr, Radv, ctlr->radv); + + for(i = 0; i < ctlr->nrd; i++) + if((bp = ctlr->rb[i]) != nil){ + ctlr->rb[i] = nil; + freeb(bp); + } + if(cttab[ctlr->type].flag & F75) + csr32w(ctlr, Rxdctl, 1<rim != 0; +} + +static void +i82563rproc(void *arg) +{ + uint m, rdh, rim, im; + Block *bp; + Ctlr *ctlr; + Ether *edev; + Rd *rd; + + edev = arg; + ctlr = edev->ctlr; + + i82563rxinit(ctlr); + csr32w(ctlr, Rctl, csr32r(ctlr, Rctl) | Ren); + if(cttab[ctlr->type].flag & F75){ + csr32w(ctlr, Rxdctl, csr32r(ctlr, Rxdctl) | Enable); + im = Rxt0|Rxo|Rxdmt0|Rxseq|Ack; + }else + im = Rxt0|Rxo|Rxdmt0|Rxseq|Ack; + m = ctlr->nrd-1; + + for(;;){ + i82563im(ctlr, im); + ctlr->rsleep++; + i82563replenish(ctlr, 1); + sleep(&ctlr->rrendez, i82563rim, ctlr); + + rdh = ctlr->rdh; + for(;;){ + rd = &ctlr->rdba[rdh]; + rim = ctlr->rim; + ctlr->rim = 0; + if(!(rd->status & Rdd)) + break; + + /* + * Accept eop packets with no errors. + * With no errors and the Ixsm bit set, + * the descriptor status Tpcs and Ipcs bits give + * an indication of whether the checksums were + * calculated and valid. + */ + bp = ctlr->rb[rdh]; + if((rd->status & Reop) && rd->errors == 0){ + bp->wp += rd->length; + bp->lim = bp->wp; /* lie like a dog. avoid packblock. */ + if(!(rd->status & Ixsm)){ + ctlr->ixsm++; + if(rd->status & Ipcs){ + /* + * IP checksum calculated + * (and valid as errors == 0). + */ + ctlr->ipcs++; + bp->flag |= Bipck; + } + if(rd->status & Tcpcs){ + /* + * TCP/UDP checksum calculated + * (and valid as errors == 0). + */ + ctlr->tcpcs++; + bp->flag |= Btcpck|Budpck; + } + bp->checksum = rd->checksum; + bp->flag |= Bpktck; + } + etheriq(edev, bp, 1); + } else + freeb(bp); + ctlr->rb[rdh] = nil; + rd->status = 0; + ctlr->rdfree--; + ctlr->rdh = rdh = Next(rdh, m); + if(ctlr->nrd-ctlr->rdfree >= 32 || (rim & Rxdmt0)) + if(i82563replenish(ctlr, 0) == -1) + break; + } + } +} + +static int +i82563lim(void *v) +{ + return ((Ctlr*)v)->lim != 0; +} + +static int speedtab[] = { + 10, 100, 1000, 0 +}; + +static uint phywrite0(Ctlr*, int, int, ushort); + +static uint +setpage(Ctlr *c, uint phyno, uint p, uint r) +{ + uint pr; + + switch(c->type){ + case i82563: + if(r >= 16 && r <= 28 && r != 22) + pr = Phypage; + else if(r == 30 || r == 31) + pr = Phyapage; + else + return 0; + return phywrite0(c, phyno, pr, p); + case i82576: + case i82577: + case i82578: + return phywrite0(c, phyno, Phy79page, p); /* unverified */ + case i82579: + return phywrite0(c, phyno, Phy79page, p<<5); + default: + if(p == 0) + return 0; + return ~0; + } +} + +static uint +phyread0(Ctlr *c, int phyno, int reg) +{ + uint phy, i; + + csr32w(c, Mdic, MDIrop | phyno<type].name, phyno, phy); + return ~0; + } + return phy & 0xffff; +} + +static uint +phyread(Ctlr *c, uint phyno, uint reg) +{ + if(setpage(c, phyno, reg>>8, reg & 0xff) == ~0){ + print("%s: phyread: bad phy page %d\n", cname(c), reg>>8); + return ~0; + } + return phyread0(c, phyno, reg & 0xff); +} + +static uint +phywrite0(Ctlr *c, int phyno, int reg, ushort val) +{ + uint phy, i; + + csr32w(c, Mdic, MDIwop | phyno<>8, reg & 0xff) == ~0) + panic("%s: bad phy reg %.4ux", cname(c), reg); + return phywrite0(c, phyno, reg & 0xff, v); +} + +static void +phyerrata(Ether *e, Ctlr *c, uint phyno) +{ + if(e->mbps == 0) + if(c->phyerrata == 0){ + c->phyerrata++; + phywrite(c, phyno, Phyprst, Prst); /* try a port reset */ + print("ether%d: %s: phy port reset\n", e->ctlrno, cname(c)); + } + else + c->phyerrata = 0; +} + +static void +phyl79proc(void *v) +{ + uint a, i, r, phy, phyno; + Ctlr *c; + Ether *e; + + e = v; + c = e->ctlr; + + phyno = cttab[c->type].phyno; + for(;;){ + phy = phyread(c, phyno, Phystat); + if(phy == ~0){ + phy = 0; + i = 3; + goto next; + } + i = (phy>>8) & 3; + a = phy & Ans; + if(a){ + r = phyread(c, phyno, Phyctl); + phywrite(c, phyno, Phyctl, r | Ran | Ean); + } +next: + e->link = i != 3 && (phy & Link) != 0; + if(e->link == 0) + i = 3; + c->speeds[i]++; + e->mbps = speedtab[i]; + c->lim = 0; + i82563im(c, Lsc); + c->lsleep++; + sleep(&c->lrendez, i82563lim, c); + } +} + +static void +phylproc(void *v) +{ + uint a, i, phy, phyno; + Ctlr *c; + Ether *e; + + e = v; + c = e->ctlr; + phyno = cttab[c->type].phyno; + + if(c->type == i82573 && (phy = phyread(c, 1, Phyier)) != ~0) + phywrite(c, phyno, Phyier, phy | Lscie | Ancie | Spdie | Panie); + for(;;){ + phy = phyread(c, phyno, Physsr); + if(phy == ~0){ + phy = 0; + i = 3; + goto next; + } + i = (phy>>14) & 3; + switch(c->type){ + default: + a = 0; + break; + case i82563: + case i82578: + case i82578m: + case i82583: + a = phyread(c, phyno, Phyisr) & Ane; + break; + case i82571: + case i82572: + case i82575: + case i82576: + a = phyread(c, phyno, Phylhr) & Anf; + i = (i-1) & 3; + break; + } + if(a) + phywrite(c, phyno, Phyctl, phyread(c, phyno, Phyctl) | Ran | Ean); +next: + e->link = (phy & Rtlink) != 0; + if(e->link == 0) + i = 3; + c->speeds[i]++; + e->mbps = speedtab[i]; + if(c->type == i82563) + phyerrata(e, c, phyno); + c->lim = 0; + i82563im(c, Lsc); + c->lsleep++; + sleep(&c->lrendez, i82563lim, c); + } +} + +static void +pcslproc(void *v) +{ + uint i, phy; + Ctlr *c; + Ether *e; + + e = v; + c = e->ctlr; + + if(c->type == i82575 || c->type == i82576) + csr32w(c, Connsw, Enrgirq); + for(;;){ + phy = csr32r(c, Pcsstat); + e->link = phy & Linkok; + i = 3; + if(e->link) + i = (phy & 6) >> 1; + else if(phy & Anbad) + csr32w(c, Pcsctl, csr32r(c, Pcsctl) | Pan | Prestart); + c->speeds[i]++; + e->mbps = speedtab[i]; + c->lim = 0; + i82563im(c, Lsc | Omed); + c->lsleep++; + sleep(&c->lrendez, i82563lim, c); + } +} + +static void +serdeslproc(void *v) +{ + uint i, tx, rx; + Ctlr *c; + Ether *e; + + e = v; + c = e->ctlr; + + for(;;){ + rx = csr32r(c, Rxcw); + tx = csr32r(c, Txcw); + USED(tx); + e->link = (rx & 1<<31) != 0; +// e->link = (csr32r(c, Status) & Lu) != 0; + i = 3; + if(e->link) + i = 2; + c->speeds[i]++; + e->mbps = speedtab[i]; + c->lim = 0; + i82563im(c, Lsc); + c->lsleep++; + sleep(&c->lrendez, i82563lim, c); + } +} + +static void +i82563attach(Ether *edev) +{ + char name[KNAMELEN]; + int i; + Block *bp; + Ctlr *ctlr; + + ctlr = edev->ctlr; + qlock(&ctlr->alock); + if(ctlr->alloc != nil){ + qunlock(&ctlr->alock); + return; + } + + ctlr->nrd = Nrd; + ctlr->ntd = Ntd; + ctlr->alloc = malloc(ctlr->nrd*sizeof(Rd)+ctlr->ntd*sizeof(Td) + 255); + if(ctlr->alloc == nil){ + qunlock(&ctlr->alock); + error(Enomem); + } + ctlr->rdba = (Rd*)ROUNDUP((uintptr)ctlr->alloc, 256); + ctlr->tdba = (Td*)(ctlr->rdba + ctlr->nrd); + + ctlr->rb = malloc(ctlr->nrd * sizeof(Block*)); + ctlr->tb = malloc(ctlr->ntd * sizeof(Block*)); + + if(waserror()){ + while(bp = i82563rballoc(rbtab + ctlr->pool)){ + bp->free = nil; + freeb(bp); + } + free(ctlr->tb); + ctlr->tb = nil; + free(ctlr->rb); + ctlr->rb = nil; + free(ctlr->alloc); + ctlr->alloc = nil; + qunlock(&ctlr->alock); + nexterror(); + } + + for(i = 0; i < Nrb; i++){ + bp = allocb(ctlr->rbsz + Rbalign); + bp->free = freetab[ctlr->pool]; + freeb(bp); + } + + snprint(name, sizeof name, "#l%dl", edev->ctlrno); + if(csr32r(ctlr, Status) & Tbimode) + kproc(name, serdeslproc, edev); /* mac based serdes */ + else if((csr32r(ctlr, Ctrlext) & Linkmode) == Serdes) + kproc(name, pcslproc, edev); /* phy based serdes */ + else if(cttab[ctlr->type].flag & F79phy) + kproc(name, phyl79proc, edev); + else + kproc(name, phylproc, edev); + + snprint(name, sizeof name, "#l%dr", edev->ctlrno); + kproc(name, i82563rproc, edev); + + snprint(name, sizeof name, "#l%dt", edev->ctlrno); + kproc(name, i82563tproc, edev); + + qunlock(&ctlr->alock); + poperror(); +} + +static void +i82563interrupt(Ureg*, void *arg) +{ + Ctlr *ctlr; + Ether *edev; + u32int icr, im; + + edev = arg; + ctlr = edev->ctlr; + + ilock(&ctlr->imlock); + csr32w(ctlr, Imc, ~0); + im = ctlr->im; + + while(icr = csr32r(ctlr, Icr) & ctlr->im){ + if(icr & (Lsc | Omed)){ + im &= ~(Lsc | Omed); + ctlr->lim = icr & (Lsc | Omed); + wakeup(&ctlr->lrendez); + ctlr->lintr++; + } + if(icr & (Rxt0|Rxo|Rxdmt0|Rxseq|Ack)){ + ctlr->rim = icr & (Rxt0|Rxo|Rxdmt0|Rxseq|Ack); + im &= ~(Rxt0|Rxo|Rxdmt0|Rxseq|Ack); + wakeup(&ctlr->rrendez); + ctlr->rintr++; + } + if(icr & Txdw){ + im &= ~Txdw; + ctlr->tintr++; + wakeup(&ctlr->trendez); + } + } + + ctlr->im = im; + csr32w(ctlr, Ims, im); + iunlock(&ctlr->imlock); +} + +static int +i82563detach(Ctlr *ctlr) +{ + int r, timeo; + + /* balance rx/tx packet buffer; survives reset */ + if(ctlr->rbsz > 8192 && cttab[ctlr->type].flag & Fpba){ + ctlr->pba = csr32r(ctlr, Pba); + r = ctlr->pba >> 16; + r += ctlr->pba & 0xffff; + r >>= 1; + csr32w(ctlr, Pba, r); + }else if(ctlr->type == i82573 && ctlr->rbsz > 1514) + csr32w(ctlr, Pba, 14); + ctlr->pba = csr32r(ctlr, Pba); + + /* + * Perform a device reset to get the chip back to the + * power-on state, followed by an EEPROM reset to read + * the defaults for some internal registers. + */ + csr32w(ctlr, Imc, ~0); + csr32w(ctlr, Rctl, 0); + csr32w(ctlr, Tctl, csr32r(ctlr, Tctl) & ~Ten); + + delay(10); + + r = csr32r(ctlr, Ctrl); + if(ctlr->type == i82566 || ctlr->type == i82579) + r |= Phyrst; + csr32w(ctlr, Ctrl, Devrst | r); + delay(1); + for(timeo = 0;; timeo++){ + if((csr32r(ctlr, Ctrl) & (Devrst|Phyrst)) == 0) + break; + if(timeo >= 1000) + return -1; + delay(1); + } + + r = csr32r(ctlr, Ctrl); + csr32w(ctlr, Ctrl, Slu|r); + + r = csr32r(ctlr, Ctrlext); + csr32w(ctlr, Ctrlext, r|Eerst); + delay(1); + for(timeo = 0; timeo < 1000; timeo++){ + if(!(csr32r(ctlr, Ctrlext) & Eerst)) + break; + delay(1); + } + if(csr32r(ctlr, Ctrlext) & Eerst) + return -1; + + csr32w(ctlr, Imc, ~0); + delay(1); + for(timeo = 0; timeo < 1000; timeo++){ + if((csr32r(ctlr, Icr) & ~Rxcfg) == 0) + break; + delay(1); + } + if(csr32r(ctlr, Icr) & ~Rxcfg) + return -1; + + return 0; +} + +static void +i82563shutdown(Ether *edev) +{ + i82563detach(edev->ctlr); +} + +static ushort +eeread(Ctlr *ctlr, int adr) +{ + csr32w(ctlr, Eerd, EEstart | adr << 2); + while ((csr32r(ctlr, Eerd) & EEdone) == 0) + ; + return csr32r(ctlr, Eerd) >> 16; +} + +static int +eeload(Ctlr *ctlr) +{ + u16int sum; + int data, adr; + + sum = 0; + for (adr = 0; adr < 0x40; adr++) { + data = eeread(ctlr, adr); + ctlr->eeprom[adr] = data; + sum += data; + } + return sum; +} + +static int +fcycle(Ctlr*, Flash *f) +{ + u16int s, i; + + s = f->reg[Fsts]; + if((s&Fvalid) == 0) + return -1; + f->reg[Fsts] |= Fcerr | Ael; + for(i = 0; i < 10; i++){ + if((s&Scip) == 0) + return 0; + delay(1); + s = f->reg[Fsts]; + } + return -1; +} + +static int +fread(Ctlr *c, Flash *f, int ladr) +{ + u16int s; + + delay(1); + if(fcycle(c, f) == -1) + return -1; + f->reg[Fsts] |= Fdone; + f->reg32[Faddr] = ladr; + + /* setup flash control register */ + s = f->reg[Fctl] & ~0x3ff; + f->reg[Fctl] = s | 1<<8 | Fgo; /* 2 byte read */ + + while((f->reg[Fsts] & Fdone) == 0) + ; + if(f->reg[Fsts] & (Fcerr|Ael)) + return -1; + return f->reg32[Fdata] & 0xffff; +} + +static int +fload(Ctlr *c) +{ + uint data, r, adr; + u16int sum; + uintmem io; + Flash f; + + io = c->pcidev->mem[1].bar & ~(uintmem)0xf; + f.reg = vmap(io, c->pcidev->mem[1].size); + if(f.reg == nil) + return -1; + f.reg32 = (u32int*)f.reg; + f.base = f.reg32[Bfpr] & 0x1fff; + f.lim = f.reg32[Bfpr]>>16 & 0x1fff; + if(csr32r(c, Eec) & Sec1val) + f.base += f.lim+1 - f.base >> 1; + r = f.base << 12; + sum = 0; + for(adr = 0; adr < 0x40; adr++) { + data = fread(c, &f, r + adr*2); + if(data == -1) + return -1; + c->eeprom[adr] = data; + sum += data; + } + vunmap(f.reg, c->pcidev->mem[1].size); + return sum; +} + +static void +defaultea(Ctlr *ctlr, uchar *ra) +{ + uint i, r; + uvlong u; + static uchar nilea[Eaddrlen]; + + if(memcmp(ra, nilea, Eaddrlen) != 0) + return; + if(cttab[ctlr->type].flag & Fflashea){ + /* intel mb bug */ + u = (uvlong)csr32r(ctlr, Rah)<<32u | (uint)csr32r(ctlr, Ral); + for(i = 0; i < Eaddrlen; i++) + ra[i] = u >> 8*i; + } + if(memcmp(ra, nilea, Eaddrlen) != 0) + return; + for(i = 0; i < Eaddrlen/2; i++){ + ra[2*i] = ctlr->eeprom[Ea+i]; + ra[2*i+1] = ctlr->eeprom[Ea+i] >> 8; + } + r = (csr32r(ctlr, Status) & Lanid) >> 2; + ra[5] += r; /* ea ctlr[n] = ea ctlr[0]+n */ +} + +static int +i82563reset(Ctlr *ctlr) +{ + uchar *ra; + int i, r; + + if(i82563detach(ctlr)) + return -1; + if(cttab[ctlr->type].flag & Fload) + r = fload(ctlr); + else + r = eeload(ctlr); + if(r != 0 && r != 0xbaba){ + print("%s: bad eeprom checksum - %#.4ux\n", + cname(ctlr), r); + return -1; + } + + ra = ctlr->ra; + defaultea(ctlr, ra); + csr32w(ctlr, Ral, ra[3]<<24 | ra[2]<<16 | ra[1]<<8 | ra[0]); + csr32w(ctlr, Rah, 1<<31 | ra[5]<<8 | ra[4]); + for(i = 1; i < 16; i++){ + csr32w(ctlr, Ral+i*8, 0); + csr32w(ctlr, Rah+i*8, 0); + } + memset(ctlr->mta, 0, sizeof(ctlr->mta)); + for(i = 0; i < 128; i++) + csr32w(ctlr, Mta + i*4, 0); + csr32w(ctlr, Fcal, 0x00C28001); + csr32w(ctlr, Fcah, 0x0100); + if((cttab[ctlr->type].flag & Fnofct) == 0) + csr32w(ctlr, Fct, 0x8808); + csr32w(ctlr, Fcttv, 0x0100); + csr32w(ctlr, Fcrtl, ctlr->fcrtl); + csr32w(ctlr, Fcrth, ctlr->fcrth); + if(cttab[ctlr->type].flag & F75) + csr32w(ctlr, Eitr, 128<<2); /* 128 ¼ microsecond intervals */ + return 0; +} + +enum { + CMrdtr, + CMradv, + CMpause, + CMan, +}; + +static Cmdtab i82563ctlmsg[] = { + CMrdtr, "rdtr", 2, + CMradv, "radv", 2, + CMpause, "pause", 1, + CMan, "an", 1, +}; + +static long +i82563ctl(Ether *edev, void *buf, long n) +{ + char *p; + u32int v; + Ctlr *ctlr; + Cmdbuf *cb; + Cmdtab *ct; + + if((ctlr = edev->ctlr) == nil) + error(Enonexist); + + cb = parsecmd(buf, n); + if(waserror()){ + free(cb); + nexterror(); + } + + ct = lookupcmd(cb, i82563ctlmsg, nelem(i82563ctlmsg)); + switch(ct->index){ + case CMrdtr: + v = strtoul(cb->f[1], &p, 0); + if(*p || v > 0xffff) + error(Ebadarg); + ctlr->rdtr = v; + csr32w(ctlr, Rdtr, v); + break; + case CMradv: + v = strtoul(cb->f[1], &p, 0); + if(*p || v > 0xffff) + error(Ebadarg); + ctlr->radv = v; + csr32w(ctlr, Radv, v); + break; + case CMpause: + csr32w(ctlr, Ctrl, csr32r(ctlr, Ctrl) ^ (Rfce | Tfce)); + break; + case CMan: + csr32w(ctlr, Ctrl, csr32r(ctlr, Ctrl) | Lrst | Phyrst); + break; + } + free(cb); + poperror(); + + return n; +} + +static int +didtype(int d) +{ + switch(d){ + case 0x1096: + case 0x10ba: /* “gilgal” */ + case 0x1098: /* serdes; not seen */ + case 0x10bb: /* serdes */ + return i82563; + case 0x1049: /* mm */ + case 0x104a: /* dm */ + case 0x104b: /* dc */ + case 0x104d: /* v “ninevah” */ + case 0x10bd: /* dm-2 */ + case 0x294c: /* ich 9 */ + return i82566; + case 0x10de: /* lm ich10d */ + case 0x10df: /* lf ich10 */ + case 0x10e5: /* lm ich9 */ + case 0x10f5: /* lm ich9m; “boazman” */ + return i82567; + case 0x10bf: /* lf ich9m */ + case 0x10cb: /* v ich9m */ + case 0x10cd: /* lf ich10 */ + case 0x10ce: /* v ich10 */ + case 0x10cc: /* lm ich10 */ + return i82567m; + case 0x105e: /* eb */ + case 0x105f: /* eb */ + case 0x1060: /* eb */ + case 0x10a4: /* eb */ + case 0x10a5: /* eb fiber */ + case 0x10bc: /* eb */ + case 0x10d9: /* eb serdes */ + case 0x10da: /* eb serdes “ophir” */ + return i82571; + case 0x107d: /* eb copper */ + case 0x107e: /* ei fiber */ + case 0x107f: /* ei */ + case 0x10b9: /* ei “rimon” */ + return i82572; + case 0x108b: /* e “vidalia” */ + case 0x108c: /* e (iamt) */ + case 0x109a: /* l “tekoa” */ + return i82573; + case 0x10d3: /* l or it; “hartwell” */ + return i82574; + case 0x10a7: + case 0x10a9: /* fiber/serdes */ + return i82575; + case 0x10c9: /* copper */ + case 0x10e6: /* fiber */ + case 0x10e7: /* serdes; “kawela” */ + case 0x150d: /* backplane */ + return i82576; + case 0x10ea: /* lc “calpella”; aka pch lan */ + return i82577; + case 0x10eb: /* lm “calpella” */ + return i82577m; + case 0x10ef: /* dc “piketon” */ + return i82578; + case 0x1502: /* lm */ + case 0x1503: /* v “lewisville” */ + return i82579; + case 0x10f0: /* dm “king's creek” */ + return i82578m; + case 0x150e: /* “barton hills” */ + case 0x150f: /* fiber */ + case 0x1510: /* backplane */ + case 0x1511: /* sfp */ + case 0x1516: + return i82580; + case 0x1506: /* v */ + return i82583; + case 0x1533: /* i210-t1 */ + case 0x1534: + case 0x1536: /* fiber */ + case 0x1537: /* backplane */ + case 0x1538: + case 0x1539: /* i211 */ + return i210; + case 0x153a: /* i217-lm */ + case 0x153b: /* i217-v */ + case 0x15a0: /* i218-lm */ + case 0x15a1: /* i218-v */ + case 0x15a2: /* i218-lm */ + case 0x15a3: /* i218-v */ + return i217; + case 0x151f: /* “powerville” eeprom-less */ + case 0x1521: /* copper */ + case 0x1522: /* fiber */ + case 0x1523: /* serdes */ + case 0x1524: /* sgmii */ + return i350; + } + return -1; +} + +static void +hbafixup(Pcidev *p) +{ + uint i; + + i = pcicfgr32(p, PciSVID); + if((i & 0xffff) == 0x1b52 && p->did == 1) + p->did = i>>16; +} + +static void +i82563pci(void) +{ + int type; + Ctlr *c, **cc; + Pcidev *p; + + cc = &i82563ctlr; + for(p = nil; p = pcimatch(p, 0x8086, 0);){ + hbafixup(p); + if((type = didtype(p->did)) == -1) + continue; + c = malloc(sizeof *c); + c->type = type; + c->pcidev = p; + c->rbsz = cttab[type].mtu; + c->port = p->mem[0].bar & ~(uintmem)0xf; + *cc = c; + cc = &c->next; + } +} + +static int +setup(Ctlr *ctlr) +{ + Pcidev *p; + + if((ctlr->pool = newpool()) == -1){ + print("%s: no pool\n", cname(ctlr)); + return -1; + } + p = ctlr->pcidev; + ctlr->nic = vmap(ctlr->port, p->mem[0].size); + if(ctlr->nic == nil){ + print("%s: can't map %#P\n", cname(ctlr), ctlr->port); + return -1; + } + if(i82563reset(ctlr)){ + vunmap(ctlr->nic, p->mem[0].size); + return -1; + } + pcisetbme(ctlr->pcidev); + return 0; +} + +static int +pnp(Ether *edev, int type) +{ + Ctlr *ctlr; + static int done; + + if(!done) { + i82563pci(); + done = 1; + } + + /* + * Any adapter matches if no edev->port is supplied, + * otherwise the ports must match. + */ + for(ctlr = i82563ctlr; ; ctlr = ctlr->next){ + if(ctlr == nil) + return -1; + if(ctlr->active) + continue; + if(type != -1 && ctlr->type != type) + continue; + if(ethercfgmatch(edev, ctlr->pcidev, ctlr->port) == 0){ + ctlr->active = 1; + memmove(ctlr->ra, edev->ea, Eaddrlen); + if(setup(ctlr) == 0) + break; + } + } + + edev->ctlr = ctlr; + edev->port = ctlr->port; + edev->irq = ctlr->pcidev->intl; + edev->tbdf = ctlr->pcidev->tbdf; + edev->mbps = 1000; + edev->maxmtu = ctlr->rbsz; + memmove(edev->ea, ctlr->ra, Eaddrlen); + + /* + * Linkage to the generic ethernet driver. + */ + edev->attach = i82563attach; + edev->interrupt = i82563interrupt; + edev->ifstat = i82563ifstat; + edev->ctl = i82563ctl; + + edev->arg = edev; + edev->promiscuous = i82563promiscuous; + edev->shutdown = i82563shutdown; + edev->multicast = i82563multicast; + + return 0; +} + +static int +anypnp(Ether *e) +{ + return pnp(e, -1); +} + +void +ether82563link(void) +{ + addethercard("i82563", anypnp); +} diff -Nru /sys/src/9k/386/ether82598.c /sys/src/9k/386/ether82598.c --- /sys/src/9k/386/ether82598.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/ether82598.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1199 @@ +/* + * intel 10gbe pcie driver + * copyright © 2007—2012, coraid, inc. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/netif.h" +#include "etherif.h" + +enum{ + /* general */ + Ctrl = 0x00000/4, /* Device Control */ + Status = 0x00008/4, /* Device Status */ + Ctrlext = 0x00018/4, /* Extended Device Control */ + Esdp = 0x00020/4, /* extended sdp control */ + Esodp = 0x00028/4, /* extended od sdp control */ + Ledctl = 0x00200/4, /* led control */ + Tcptimer = 0x0004c/4, /* tcp timer */ + Ecc = 0x110b0/4, /* errata ecc control magic */ + + /* nvm */ + Eec = 0x10010/4, /* eeprom/flash control */ + Eerd = 0x10014/4, /* eeprom read */ + Fla = 0x1001c/4, /* flash access */ + Flop = 0x1013c/4, /* flash opcode */ + Grc = 0x10200/4, /* general rx control */ + + /* interrupt */ + Icr = 0x00800/4, /* interrupt cause read */ + Ics = 0x00808/4, /* " set */ + Ims = 0x00880/4, /* " mask read/set */ + Imc = 0x00888/4, /* " mask clear */ + Iac = 0x00810/4, /* " auto clear */ + Iam = 0x00890/4, /* " auto mask enable */ + Itr = 0x00820/4, /* " throttling rate (0-19) */ + Ivar = 0x00900/4, /* " vector allocation regs. */ + /*msi interrupt */ + Msixt = 0x0000/4, /* msix table (bar3) */ + Msipba = 0x2000/4, /* msix pending bit array (bar3) */ + Pbacl = 0x11068/4, /* pba clear */ + Gpie = 0x00898/4, /* general purpose int enable */ + + /* flow control */ + Pfctop = 0x03008/4, /* priority flow ctl type opcode */ + Fcttv = 0x03200/4, /* " transmit timer value (0-3) */ + Fcrtl = 0x03220/4, /* " rx threshold low (0-7) +8n */ + Fcrth = 0x03260/4, /* " rx threshold high (0-7) +8n */ + Rcrtv = 0x032a0/4, /* " refresh value threshold */ + Tfcs = 0x0ce00/4, /* " tx status */ + + /* rx dma */ + Rbal = 0x01000/4, /* rx desc base low (0-63) +0x40n */ + Rbah = 0x01004/4, /* " high */ + Rdlen = 0x01008/4, /* " length */ + Rdh = 0x01010/4, /* " head */ + Rdt = 0x01018/4, /* " tail */ + Rxdctl = 0x01028/4, /* " control */ + + Srrctl = 0x02100/4, /* split and replication rx ctl. */ + Dcarxctl = 0x02200/4, /* rx dca control */ + Rdrxctl = 0x02f00/4, /* rx dma control */ + Rxpbsize = 0x03c00/4, /* rx packet buffer size */ + Rxctl = 0x03000/4, /* rx control */ + Dropen = 0x03d04/4, /* drop enable control */ + + /* rx */ + Rxcsum = 0x05000/4, /* rx checksum control */ + Rfctl = 0x04008/4, /* rx filter control */ + Mta = 0x05200/4, /* multicast table array (0-127) */ + Ral = 0x05400/4, /* rx address low */ + Rah = 0x05404/4, + Psrtype = 0x05480/4, /* packet split rx type. */ + Vfta = 0x0a000/4, /* vlan filter table array. */ + Fctrl = 0x05080/4, /* filter control */ + Vlnctrl = 0x05088/4, /* vlan control */ + Msctctrl = 0x05090/4, /* multicast control */ + Mrqc = 0x05818/4, /* multiple rx queues cmd */ + Vmdctl = 0x0581c/4, /* vmdq control */ + Imir = 0x05a80/4, /* immediate irq rx (0-7) */ + Imirext = 0x05aa0/4, /* immediate irq rx ext */ + Imirvp = 0x05ac0/4, /* immediate irq vlan priority */ + Reta = 0x05c00/4, /* redirection table */ + Rssrk = 0x05c80/4, /* rss random key */ + + /* tx */ + Tdbal = 0x06000/4, /* tx desc base low +0x40n */ + Tdbah = 0x06004/4, /* " high */ + Tdlen = 0x06008/4, /* " len */ + Tdh = 0x06010/4, /* " head */ + Tdt = 0x06018/4, /* " tail */ + Txdctl = 0x06028/4, /* " control */ + Tdwbal = 0x06038/4, /* " write-back address low */ + Tdwbah = 0x0603c/4, + + Dtxctl = 0x04a80/4, /* tx dma control !82598 */ + Tdcatxctrl = 0x07200/4, /* tx dca register (0-15) */ + Tipg = 0x0cb00/4, /* tx inter-packet gap */ + Txpbsize = 0x0cc00/4, /* tx packet-buffer size (0-15) */ + + /* mac */ + Hlreg0 = 0x04240/4, /* highlander control reg 0 */ + Hlreg1 = 0x04244/4, /* highlander control reg 1 (ro) */ + Msca = 0x0425c/4, /* mdi signal cmd & addr */ + Msrwd = 0x04260/4, /* mdi single rw data */ + Mhadd = 0x04268/4, /* mac addr high & max frame */ + Pcss1 = 0x04288/4, /* xgxs status 1 */ + Pcss2 = 0x0428c/4, + Xpcss = 0x04290/4, /* 10gb-x pcs status */ + Serdesc = 0x04298/4, /* serdes control */ + Macs = 0x0429c/4, /* fifo control & report */ + Autoc = 0x042a0/4, /* autodetect control & status */ + Links = 0x042a4/4, /* link status */ + Autoc2 = 0x042a8/4, +}; + +enum{ + /* Ctrl */ + Rst = 1<<26, /* full nic reset */ + + /* Txdctl */ + Ten = 1<<25, + + /* Dtxctl */ + Den = 1<<0, + + /* Fctrl */ + Rfce = 1<<15, /* rcv flow control enable */ + Dpf = 1<<13, /* discard pause frames */ + Bam = 1<<10, /* broadcast accept mode */ + Upe = 1<<9, /* unicast promiscuous */ + Mpe = 1<<8, /* multicast promiscuous */ + + /* Rxdctl */ + Pthresh = 0, /* prefresh threshold shift in bits */ + Hthresh = 8, /* host buffer minimum threshold " */ + Wthresh = 16, /* writeback threshold */ + Renable = 1<<25, + + /* Rxctl */ + Rxen = 1<<0, + Dmbyps = 1<<1, + + /* Rdrxctl */ + Rdmt½ = 0, + Rdmt¼ = 1, + Rdmt⅛ = 2, + + /* Rxcsum */ + Ippcse = 1<<12, /* ip payload checksum enable */ + + /* Eerd */ + EEstart = 1<<0, /* Start Read */ + EEdone = 1<<1, /* Read done */ + + /* interrupts */ + Irx0 = 1<<0, /* driver defined */ + Itx0 = 1<<1, /* driver defined */ + Lsc = 1<<20, /* link status change */ + Ioc = 1<<31, /* other cause */ + + /* Links */ + Lnkup = 1<<30, + Lnkspd8 = 1<<29, + Lnkspd9 = 3<<28, + + /* Hlreg0 */ + Txcrcen = 1<<0, + Jumboen = 1<<2, + + /* Ivar */ + Ivtx = 1|1<<7, /* transmit interrupt */ + Ivrx = 0|1<<7, /* receive interrupt */ +}; + +typedef struct Ctlr Ctlr; +typedef struct Ctlrtype Ctlrtype; +typedef struct Rd Rd; +typedef struct Rbpool Rbpool; +typedef struct Stat Stat; +typedef struct Td Td; + +enum { + i82598, + i82599, + x540, + Nctlrtype, +}; + +struct Ctlrtype { + int type; + int mtu; + int flag; + char *name; +}; + +enum { + Fphyoc = 1<<0, /* phy link needs other cause interrupt */ + Fsplitivar = 1<<1, /* tx and rx use different ivar entries */ + Fphyspd = 1<<2, /* phy speed useful (part supports <10gbe) */ + Ftxctl = 1<<3, /* part has txctl register */ +}; + +/* real mtu is 12k. use standard 9k to save memory */ +static Ctlrtype cttab[Nctlrtype] = { + i82598, 9*1024, Fsplitivar|Fphyoc, "i82598", + i82599, 9*1024, Fphyspd|Ftxctl, "i82599", + x540, 9*1024, Fphyspd|Ftxctl, "x540", +}; + +struct Stat { + uint reg; + char *name; +}; + +Stat stattab[] = { + 0x4000, "crc error", + 0x4004, "illegal byte", + 0x4008, "short packet", + 0x3fa0, "missed pkt0", + 0x4034, "mac local flt", + 0x4038, "mac rmt flt", + 0x4040, "rx length err", + 0x3f60, "xon tx", + 0xcf60, "xon rx", + 0x3f68, "xoff tx", + 0xcf68, "xoff rx", + 0x405c, "rx 040", + 0x4060, "rx 07f", + 0x4064, "rx 100", + 0x4068, "rx 200", + 0x406c, "rx 3ff", + 0x4070, "rx big", + 0x4074, "rx ok", + 0x4078, "rx bcast", + 0x3fc0, "rx no buf0", + 0x40a4, "rx runt", + 0x40a8, "rx frag", + 0x40ac, "rx ovrsz", + 0x40b0, "rx jab", + 0x40d0, "rx pkt", + + 0x40d4, "tx pkt", + 0x40d8, "tx 040", + 0x40dc, "tx 07f", + 0x40e0, "tx 100", + 0x40e4, "tx 200", + 0x40e8, "tx 3ff", + 0x40ec, "tx big", + 0x40f4, "tx bcast", + 0x4120, "xsum err", +}; + +uchar statmask[Nctlrtype][nelem(stattab)] = { +[i82599][7] = 1, +[i82599][8] = 1, +[i82599][9] = 1, +[i82599][10] = 1, +}; + +/* status */ +enum{ + Pif = 1<<7, /* past exact filter (sic) */ + Ipcs = 1<<6, /* ip checksum calcuated */ + L4cs = 1<<5, /* layer 2 */ + Tcpcs = 1<<4, /* tcp checksum calcuated */ + Vp = 1<<3, /* 802.1q packet matched vet */ + Ixsm = 1<<2, /* ignore checksum */ + Reop = 1<<1, /* end of packet */ + Rdd = 1<<0, /* descriptor done */ +}; + +struct Rd { + u32int addr[2]; + u16int length; + u16int cksum; + uchar status; + uchar errors; + u16int vlan; +}; + +enum{ + /* Td cmd */ + Rs = 1<<3, + Ic = 1<<2, + Ifcs = 1<<1, + Teop = 1<<0, + + /* Td status */ + Tdd = 1<<0, +}; + +struct Td { + u32int addr[2]; + u16int length; + uchar cso; + uchar cmd; + uchar status; + uchar css; + u16int vlan; +}; + +enum{ + Factive = 1<<0, + Fstarted = 1<<1, +}; + +typedef void (*Freefn)(Block*); + +struct Ctlr { + Pcidev *p; + uintmem port; + u32int *reg; + uchar flag; + uint poolno; + Rbpool *pool; + int nrd, ntd, nrb, rbsz; + QLock slock, alock, tlock; + Rendez lrendez, trendez, rrendez; + uint im, lim, rim, tim; + Lock imlock; + char *alloc; + Rd *rdba; + Block **rb; + uint rdt, rdfree; + Td *tdba; + uint tdh, tdt; + Block **tb; + uchar ra[Eaddrlen]; + uchar mta[128]; + uvlong stats[nelem(stattab)]; + int type; + uint speeds[4]; + uint nobufs; +}; + +struct Rbpool { + union { + struct { + Lock; + Block *b; + uint nstarve; + uint nwakey; + uint starve; + Rendez; + }; + uchar pad[64]; /* cacheline */ + }; + union { + struct { + Block *x; + uint nfast; + uint nslow; + }; + uchar pad[64]; /* cacheline */ + }; +}; + +/* tweakable parameters */ +enum{ + Nrd = 256, + Ntd = 256, + Nrb = 2048, + Nctlr = 8, + Rbalign = 8, /* ideally, 4k */ +}; + +static Ctlr *ctlrtab[Nctlr]; +static Lock rblock[Nctlr]; +static Rbpool rbtab[Nctlr]; +static int nctlr; + +char* +cname(Ctlr *c) +{ + return cttab[c->type].name; +} + +static void +readstats(Ctlr *c) +{ + int i; + + qlock(&c->slock); + for(i = 0; i < nelem(c->stats); i++) + if(statmask[c->type][i] == 0) + c->stats[i] += c->reg[stattab[i].reg>>2]; + qunlock(&c->slock); +} + +static int speedtab[] = { + 0, + 100, + 1000, + 10000, +}; + +static long +ifstat(Ether *e, void *a, long n, usize offset) +{ + Ctlr *c; + char *s, *p, *q; + uint i, *t; + + c = e->ctlr; + p = s = malloc(READSTR); + q = p+READSTR; + + readstats(c); + for(i = 0; istats[i]>0) + p = seprint(p, q, "%.10s %llud\n", stattab[i].name, c->stats[i]); + t = c->speeds; + p = seprint(p, q, "type: %s\n", cttab[c->type].name); + p = seprint(p, q, "speeds: 0:%d 100:%d 1000:%d 10000:%d\n", t[0], t[1], t[2], t[3]); + p = seprint(p, q, "rdfree: %d rdh %d rdt %d\n", c->rdfree, c->reg[Rdt], c->reg[Rdh]); + seprint(p, q, "nobufs: %ud\n", c->nobufs); + n = readstr(offset, a, n, s); + free(s); + + return n; +} + +static void +im(Ctlr *c, int i) +{ + ilock(&c->imlock); + c->im |= i; + c->reg[Ims] = c->im; + iunlock(&c->imlock); +} + +static int +lim(void *v) +{ + return ((Ctlr*)v)->lim != 0; +} + +static void +lproc(void *v) +{ + Ether *e; + Ctlr *c; + int r, i; + + e = v; + c = e->ctlr; +loop: + r = c->reg[Links]; + e->link = (r&Lnkup) != 0; + i = 0; + if(e->link){ + if(cttab[c->type].flag & Fphyspd) + i = (r&Lnkspd9)>>28; + else + i = 2+((r&Lnkspd8) != 0); + } + c->speeds[i]++; + e->mbps = speedtab[i]; + if(cttab[c->type].flag & Fphyoc) + im(c, Lsc|Ioc); + else + im(c, Lsc); + sleep(&c->lrendez, lim, c); + c->lim = 0; + goto loop; +} + +static long +ctl(Ether *, void *, long) +{ + error(Ebadarg); + return -1; +} + +static int +icansleep(void *v) +{ + Rbpool *p; + int r; + + p = v; + ilock(p); + r = p->starve == 0; + iunlock(p); + + return r; +} + +static Block* +rballoc(Rbpool *p) +{ + Block *b; + + for(;;){ + if((b = p->x) != nil){ + p->nfast++; + p->x = b->next; + b->next = nil; + return b; + } + + ilock(p); + b = p->b; + p->b = nil; + if(b == nil){ + p->starve = 1; + p->nstarve++; + iunlock(p); + return nil; + } + p->nslow++; + iunlock(p); + p->x = b; + } +} + +static void +rbfree(Block *b, int t) +{ + Rbpool *p; + + p = rbtab + t; + b->rp = b->wp = (uchar*)ROUNDUP((uintptr)b->base, Rbalign); + b->flag &= ~(Bipck | Budpck | Btcpck | Bpktck); + + ilock(p); + b->next = p->b; + p->b = b; + if(p->starve){ + if(1) + iprint("wakey %d; %d %d\n", t, p->nstarve, p->nwakey); + p->nwakey++; + p->starve = 0; + iunlock(p); + wakeup(p); + }else + iunlock(p); +} + +static void +rbfree0(Block *b) +{ + rbfree(b, 0); +} + +static void +rbfree1(Block *b) +{ + rbfree(b, 1); +} + +static void +rbfree2(Block *b) +{ + rbfree(b, 2); +} + +static void +rbfree3(Block *b) +{ + rbfree(b, 3); +} + +static void +rbfree4(Block *b) +{ + rbfree(b, 4); +} + +static void +rbfree5(Block *b) +{ + rbfree(b, 5); +} + +static void +rbfree6(Block *b) +{ + rbfree(b, 6); +} + +static void +rbfree7(Block *b) +{ + rbfree(b, 7); +} + +static Freefn freetab[Nctlr] = { + rbfree0, + rbfree1, + rbfree2, + rbfree3, + rbfree4, + rbfree5, + rbfree6, + rbfree7, +}; + +#define Next(x, m) (((x)+1) & (m)) +static int +cleanup(Ctlr *c, int tdh) +{ + Block *b; + uint m, n; + + m = c->ntd-1; + while(c->tdba[n = Next(tdh, m)].status&Tdd){ + tdh = n; + b = c->tb[tdh]; + c->tb[tdh] = 0; + freeb(b); + c->tdba[tdh].status = 0; + } + return tdh; +} + +static void +transmit(Ether *e) +{ + uint i, m, tdt, tdh; + Ctlr *c; + Block *b; + Td *t; + + c = e->ctlr; +// qlock(&c->tlock); + if(!canqlock(&c->tlock)){ + im(c, Itx0); + return; + } + tdh = c->tdh = cleanup(c, c->tdh); + tdt = c->tdt; + m = c->ntd-1; + for(i = 0; i<8; i++){ + if(Next(tdt, m) == tdh){ + im(c, Itx0); + break; + } + if(!(b = qget(e->oq))) + break; + t = c->tdba+tdt; + t->addr[0] = PCIWADDRL(b->rp); + t->addr[1] = PCIWADDRH(b->rp); + t->length = BLEN(b); + t->cmd = Rs|Ifcs|Teop; + c->tb[tdt] = b; + tdt = Next(tdt, m); + } + if(i){ + c->tdt = tdt; + coherence(); + c->reg[Tdt] = tdt; + } + qunlock(&c->tlock); +} + +static int +tim(void *c) +{ + return ((Ctlr*)c)->tim != 0; +} + +static void +tproc(void *v) +{ + Ether *e; + Ctlr *c; + + e = v; + c = e->ctlr; +loop: + sleep(&c->trendez, tim, c); /* transmit kicks us */ + c->tim = 0; + transmit(e); + goto loop; +} + +static void +rxinit(Ctlr *c) +{ + Block *b; + int i; + + c->reg[Rxctl] &= ~Rxen; + for(i = 0; inrd; i++){ + b = c->rb[i]; + c->rb[i] = 0; + if(b) + freeb(b); + } + c->rdfree = 0; + + c->reg[Fctrl] |= Bam|Rfce|Dpf; + c->reg[Rxcsum] |= Ipcs; + c->reg[Srrctl] = (c->rbsz+1023)/1024; + c->reg[Mhadd] = c->rbsz<<16; + c->reg[Hlreg0] |= Txcrcen|Jumboen; + + c->reg[Rbal] = PCIWADDRL(c->rdba); + c->reg[Rbah] = PCIWADDRH(c->rdba); + c->reg[Rdlen] = c->nrd*sizeof(Rd); + c->reg[Rdh] = 0; + c->reg[Rdt] = c->rdt = 0; + + c->reg[Rdrxctl] = Rdmt¼; + c->reg[Rxdctl] = 8<reg[Rxctl] |= Rxen|Dmbyps; +} + +static int +replenish(Ether *e, Ctlr *c, uint rdh, int maysleep) +{ + int rdt, m, i; + Block *b; + Rd *r; + Rbpool *p; + + m = c->nrd-1; + i = 0; + p = c->pool; + for(rdt = c->rdt; Next(rdt, m) != rdh; rdt = Next(rdt, m)){ + r = c->rdba+rdt; + while((b = rballoc(c->pool)) == nil){ + c->nobufs++; + if(maysleep == 0) + goto nobufs; + if(1){ + iprint("%s:%d: starve %d\n", cname(c), c->poolno, qlen(e->oq)); + for(int j = 0; j < Ntypes; j++){ + if(e->f[j] == nil) + continue; + print(" %.4ux %d\n", e->f[j]->type, qlen(e->f[j]->iq)); + } + } + sleep(p, icansleep, p); + } + c->rb[rdt] = b; + r->addr[0] = PCIWADDRL(b->rp); + r->addr[1] = PCIWADDRH(b->rp); + r->status = 0; + c->rdfree++; + i++; + } +nobufs: + if(i){ + coherence(); + c->reg[Rdt] = c->rdt = rdt; + } + if(rdt == rdh) + return -1; + return 0; +} + +static int +rim(void *v) +{ + return ((Ctlr*)v)->rim != 0; +} + +static void +rproc(void *v) +{ + Ether *e; + Ctlr *c; + Block *b; + Rd *r; + uint m, rdh; + + e = v; + c = e->ctlr; + m = c->nrd-1; + rdh = 0; +loop: + replenish(e, c, rdh, 1); + im(c, Irx0); + sleep(&c->rrendez, rim, c); +loop1: + c->rim = 0; + if(c->nrd-c->rdfree >= 16) + if(replenish(e, c, rdh, 0) == -1) + goto loop; + r = c->rdba+rdh; + if(!(r->status&Rdd)) + goto loop; + b = c->rb[rdh]; + c->rb[rdh] = 0; + b->wp += r->length; + b->lim = b->wp; /* lie like a dog */ + if(!(r->status&Ixsm)){ + if(r->status&Ipcs) + b->flag |= Bipck; + if(r->status&Tcpcs) + b->flag |= Btcpck|Budpck; + b->checksum = r->cksum; + } + r->status = 0; + etheriq(e, b, 1); + c->rdfree--; + rdh = Next(rdh, m); + goto loop1; +} + +static void +promiscuous(void *a, int on) +{ + Ether *e; + Ctlr *c; + + e = a; + c = e->ctlr; + if(on) + c->reg[Fctrl] |= Upe|Mpe; + else + c->reg[Fctrl] &= ~(Upe|Mpe); +} + +static void +multicast(void *a, uchar *ea, int on) +{ + Ether *e; + Ctlr *c; + int b, i; + + e = a; + c = e->ctlr; + + i = ea[5]>>1; + b = (ea[5]&1)<<4|ea[4]>>4; + b = 1<mta[i] |= b; + else + c->mta[i] &= ~b; + c->reg[Mta+i] = c->mta[i]; +} + +static int +detach(Ctlr *c) +{ + int i; + + c->reg[Imc] = ~0; + c->reg[Ctrl] |= Rst; + for(i = 0; i < 100; i++){ + delay(1); + if((c->reg[Ctrl]&Rst) == 0) + goto good; + } + return -1; +good: + /* errata */ + delay(50); + c->reg[Ecc] &= ~(1<<21|1<<18|1<<9|1<<6); + + /* not cleared by reset; kill it manually. */ + for(i = 1; i<16; i++) + c->reg[Rah] &= ~(1<<31); + for(i = 0; i<128; i++) + c->reg[Mta+i] = 0; + for(i = 1; i<640; i++) + c->reg[Vfta+i] = 0; + return 0; +} + +static void +shutdown(Ether *e) +{ + detach(e->ctlr); +} + +/* ≤ 20ms */ +static ushort +eeread(Ctlr *c, int i) +{ + c->reg[Eerd] = EEstart|i<<2; + while((c->reg[Eerd]&EEdone) == 0) + ; + return c->reg[Eerd]>>16; +} + +static int +eeload(Ctlr *c) +{ + ushort u, v, p, l, i, j; + + if((eeread(c, 0)&0xc0) != 0x40) + return -1; + u = 0; + for(i = 0; i < 0x40; i++) + u += eeread(c, i); + for(i = 3; i < 0xf; i++){ + if(c->type == x540 && (i == 4 || i == 5)) + continue; + p = eeread(c, i); + l = eeread(c, p++); + if((int)p+l+1 > 0xffff) + continue; + for(j = p; j < p+l; j++) + u += eeread(c, j); + } + if(u != 0xbaba) + return -1; + if(c->reg[Status]&1<<3) + u = eeread(c, 10); + else + u = eeread(c, 9); + u++; + for(i = 0; ira[i++] = v; + c->ra[i++] = v>>8; + } + c->ra[5] += (c->reg[Status]&0xc)>>2; + return 0; +} + +static int +reset(Ctlr *c) +{ + uchar *p; + int i; + + if(detach(c)){ + print("%s: reset timeout\n", cname(c)); + return -1; + } + if(eeload(c)){ + print("%s: eeprom failure\n", cname(c)); + return -1; + } + p = c->ra; + c->reg[Ral] = p[3]<<24|p[2]<<16|p[1]<<8|p[0]; + c->reg[Rah] = p[5]<<8|p[4]|1<<31; + + readstats(c); + for(i = 0; istats); i++) + c->stats[i] = 0; + + c->reg[Ctrlext] |= 1<<16; + /* make some guesses for flow control */ + c->reg[Fcrtl] = 0x10000|1<<31; + c->reg[Fcrth] = 0x40000|1<<31; + c->reg[Rcrtv] = 0x6000; + + /* configure interrupt mapping (don't ask) */ + if(cttab[c->type].flag & Fsplitivar){ + c->reg[Ivar+0] = Ivrx; + c->reg[Ivar+64/4] = Ivtx; +// c->reg[Ivar+97/4] = (2|1<<7)<<8*(97%4); + }else + c->reg[Ivar+0] = Ivtx<<8 | Ivrx; + + /* interrupt throttling goes here. */ + for(i = Itr; ireg[i] = 128; /* ¼µs intervals */ + c->reg[Itr+Itx0] = 256; + return 0; +} + +static void +txinit(Ctlr *c) +{ + Block *b; + int i; + + c->reg[Txdctl] = 16<ntd; i++){ + b = c->tb[i]; + c->tb[i] = 0; + if(b) + freeb(b); + } + memset(c->tdba, 0, c->ntd*sizeof(Td)); + c->reg[Tdbal] = PCIWADDRL(c->tdba); + c->reg[Tdbah] = PCIWADDRH(c->tdba); + c->reg[Tdlen] = c->ntd*sizeof(Td); + c->reg[Tdh] = 0; + c->reg[Tdt] = 0; + c->tdh = c->ntd-1; + c->tdt = 0; + if(cttab[c->type].flag & Ftxctl) + c->reg[Dtxctl] |= Den; + c->reg[Txdctl] |= Ten; +} + +static void +attach(Ether *e) +{ + Block *b; + Ctlr *c; + int t; + char buf[KNAMELEN]; + + c = e->ctlr; + qlock(&c->alock); + if(c->alloc){ + qunlock(&c->alock); + return; + } + + c->nrd = Nrd; + c->ntd = Ntd; + t = c->nrd*sizeof *c->rdba+255; + t += c->ntd*sizeof *c->tdba+255; + t += (c->ntd+c->nrd)*sizeof(Block*); + c->alloc = malloc(t); + qunlock(&c->alock); + if(c->alloc == 0) + error(Enomem); + + c->rdba = (Rd*)ROUNDUP((uintptr)c->alloc, 256); + c->tdba = (Td*)ROUNDUP((uintptr)(c->rdba+c->nrd), 256); + c->rb = (Block**)(c->tdba+c->ntd); + c->tb = (Block**)(c->rb+c->nrd); + + if(waserror()){ + while(b = rballoc(c->pool)){ + b->free = nil; + freeb(b); + } + free(c->alloc); + c->alloc = 0; + nexterror(); + } + for(c->nrb = 0; c->nrb < Nrb; c->nrb++){ + if(!(b = allocb(c->rbsz+Rbalign))) + error(Enomem); + b->free = freetab[c->poolno]; + freeb(b); + } + poperror(); + + rxinit(c); + txinit(c); + + sprint(buf, "#l%dl", e->ctlrno); + kproc(buf, lproc, e); + sprint(buf, "#l%dr", e->ctlrno); + kproc(buf, rproc, e); + sprint(buf, "#l%dt", e->ctlrno); + kproc(buf, tproc, e); +} + +static void +interrupt(Ureg*, void *v) +{ + Ether *e; + Ctlr *c; + int icr, im; + + e = v; + c = e->ctlr; + ilock(&c->imlock); + c->reg[Imc] = ~0; + im = c->im; + while(icr = c->reg[Icr]&c->im){ + if(icr&Lsc){ + im &= ~Lsc; + c->lim = icr&Lsc; + wakeup(&c->lrendez); + } + if(icr&Irx0){ + im &= ~Irx0; + c->rim = icr&Irx0; + wakeup(&c->rrendez); + } + if(icr&Itx0){ + im &= ~Itx0; + c->tim = icr&Itx0; + wakeup(&c->trendez); + } + } + c->reg[Ims] = c->im = im; + iunlock(&c->imlock); +} + +static void +hbafixup(Pcidev *p) +{ + uint i; + + i = pcicfgr32(p, PciSVID); + if((i & 0xffff) == 0x1b52 && p->did == 1) + p->did = i>>16; +} + +static void +scan(void) +{ + char *name; + uintmem io; + int type; + void *mem; + Ctlr *c; + Pcidev *p; + + p = 0; + while(p = pcimatch(p, 0x8086, 0)){ + hbafixup(p); + switch(p->did){ + case 0x10c6: /* 82598 af dual port */ + case 0x10c7: /* 82598 af single port */ + case 0x10b6: /* 82598 backplane */ + case 0x10dd: /* 82598 at cx4 */ + case 0x10ec: /* 82598 at cx4 */ + type = i82598; + break; + case 0x10f7: /* 82599 kx/kx4 */ + case 0x10f8: /* 82599 backplane */ + case 0x10f9: /* 82599 cx4 */ + case 0x10fb: /* 82599 sfi/sfp+ */ + case 0x10fc: /* 82599 xaui */ + case 0x151c: /* 82599 base t kx/kx4 “niantic” */ + type = i82599; + break; + case 0x1528: /* x540-at2 “twinville” */ + type = x540; + break; + default: + continue; + } + name = cttab[type].name; + if(nctlr == nelem(ctlrtab)){ + print("%s: %T: too many controllers\n", name, p->tbdf); + return; + } + io = p->mem[0].bar&~(uintmem)0xf; + mem = vmap(io, p->mem[0].size); + if(mem == 0){ + print("%s: %T: cant map bar\n", name, p->tbdf); + continue; + } + c = malloc(sizeof *c); + c->p = p; + c->port = io; + c->reg = (u32int*)mem; + c->rbsz = cttab[type].mtu; + c->type = type; + if(reset(c)){ + print("%s: %T: cant reset\n", name, p->tbdf); + free(c); + vunmap(mem, p->mem[0].size); + continue; + } + pcisetbme(p); + c->poolno = nctlr; + c->pool = rbtab + c->poolno; + ctlrtab[nctlr++] = c; + } +} + +static int +pnp(Ether *e) +{ + Ctlr *c; + int i; + + if(nctlr == 0) + scan(); + for(i = 0; iflag&Factive) + continue; + if(ethercfgmatch(e, c->p, c->port) == 0) + goto found; + } + return -1; +found: + c->flag |= Factive; + e->ctlr = c; + e->port = (uintptr)c->reg; + e->irq = c->p->intl; + e->tbdf = c->p->tbdf; + e->mbps = 10000; + e->maxmtu = c->rbsz; + memmove(e->ea, c->ra, Eaddrlen); + e->arg = e; + e->attach = attach; + e->ctl = ctl; + e->ifstat = ifstat; + e->interrupt = interrupt; + e->multicast = multicast; + e->promiscuous = promiscuous; + e->shutdown = shutdown; + e->transmit = transmit; + + return 0; +} + +void +ether82598link(void) +{ + addethercard("i82598", pnp); +} diff -Nru /sys/src/9k/386/etherbcm.c /sys/src/9k/386/etherbcm.c --- /sys/src/9k/386/etherbcm.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/etherbcm.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,886 @@ +/* + * Broadcom BCM57xx + * Not implemented: + * proper fatal error handling + * multiple rings + * checksum offloading + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/netif.h" + +#include "etherif.h" +#include "../port/ethermii.h" + +#define dprint(...) do{ if(debug)print(__VA_ARGS__); }while(0) +#define Rbsz ROUNDUP(1514+4, 4) + +typedef struct Ctlr Ctlr; +struct Ctlr { + Lock txlock, imlock; + Ether *ether; + Ctlr *next; + Pcidev *pdev; + u32int *nic, *status; + + u32int *recvret, *recvprod, *sendr; + uintptr port; + uint recvreti, recvprodi, sendri, sendcleani; + Block **sends; + Block **rxs; + int active, duplex; + int type; + + uint nobuf; + uint partial; + uint rxerr; + uint qfull; + uint dmaerr; +}; + +enum { + /* configurable constants */ + RxRetRingLen = 0x200, + RxProdRingLen = 0x200, + SendRingLen = 0x200, + + Reset = 1<<0, + Enable = 1<<1, + Attn = 1<<2, + + Pwrctlstat = 0x4C, + + MiscHostCtl = 0x68, + TaggedStatus = 1<<9, + IndirAccessEn = 1<<7, + EnableClockCtl = 1<<5, + PCIStateRegEn = 1<<4, + WordSwap = 1<<3, + ByteSwap = 1<<2, + MaskPCIInt = 1<<1, + ClearIntA = 1<<0, + + Fwmbox = 0x0b50, /* magic value exchange */ + Fwmagic = 0x4b657654, + + Dmarwctl = 0x6C, + DMAWaterMask = ~(7<<19), + DMAWaterValue = 3<<19, + + Memwind = 0x7C, + MemwindData = 0x84, + + SendRCB = 0x100, + RxRetRCB = 0x200, + + InterruptMailbox = 0x204, + + RxProdBDRingIdx = 0x26c, + RxBDRetRingIdx = 0x284, + SendBDRingHostIdx = 0x304, + + MACMode = 0x400, + MACPortMask = ~(1<<3 | 1<<2), + MACPortGMII = 1<<3, + MACPortMII = 1<<2, + MACEnable = 1<<23 | 1<<22 | 1<<21 | 1 << 15 | 1 << 14 | 1<<12 | 1<<11, + MACHalfDuplex = 1<<1, + + MACEventStatus = 0x404, + MACEventEnable = 0x408, + MACAddress = 0x410, + RandomBackoff = 0x438, + RxMTU = 0x43C, + MIComm = 0x44C, + MIStatus = 0x450, + MIMode = 0x454, + RxMACMode = 0x468, + TxMACMode = 0x45C, + TxMACLengths = 0x464, + MACHash = 0x470, + RxRules = 0x480, + + RxRulesConf = 0x500, + LowWaterMax = 0x504, + LowWaterMaxMask = ~0xFFFF, + LowWaterMaxValue = 2, + + SendDataInitiatorMode = 0xC00, + SendInitiatorConf = 0x0C08, + SendStats = 1<<0, + SendInitiatorMask = 0x0C0C, + + SendDataCompletionMode = 0x1000, + SendBDSelectorMode = 0x1400, + SendBDInitiatorMode = 0x1800, + SendBDCompletionMode = 0x1C00, + + RxListPlacementMode = 0x2000, + RxListPlacement = 0x2010, + RxListPlacementConf = 0x2014, + RxStats = 1<<0, + RxListPlacementMask = 0x2018, + + RxDataBDInitiatorMode = 0x2400, + RxBDHostAddr = 0x2450, + RxBDFlags = 0x2458, + RxBDNIC = 0x245C, + RxDataCompletionMode = 0x2800, + RxBDInitiatorMode = 0x2C00, + RxBDRepl = 0x2C18, + + RxBDCompletionMode = 0x3000, + HostCoalMode = 0x3C00, + HostCoalRxTicks = 0x3C08, + HostCoalSendTicks = 0x3C0C, + RxMaxCoalFrames = 0x3C10, + SendMaxCoalFrames = 0x3C14, + RxMaxCoalFramesInt = 0x3C20, + SendMaxCoalFramesInt = 0x3C24, + StatusBlockHostAddr = 0x3C38, + FlowAttention = 0x3C48, + + MemArbiterMode = 0x4000, + + BufferManMode = 0x4400, + + MBUFLowWater = 0x4414, + MBUFHighWater = 0x4418, + + ReadDMAMode = 0x4800, + ReadDMAStatus = 0x4804, + WriteDMAMode = 0x4C00, + WriteDMAStatus = 0x4C04, + + RISCState = 0x5004, + FTQReset = 0x5C00, + MSIMode = 0x6000, + + ModeControl = 0x6800, + ByteWordSwap = 1<<4 | 1<<5 | 1<<2, // | 1<<1, + HostStackUp = 1<<16, + HostSendBDs = 1<<17, + InterruptOnMAC = 1<<26, + + MiscConf = 0x6804, + CoreClockBlocksReset = 1<<0, + GPHYPwrdnOverride = 1<<26, + DisableGRCRstOnPpcie = 1<<29, + TimerMask = ~0xFF, + TimerValue = 65<<1, + MiscLocalControl = 0x6808, + InterruptOnAttn = 1<<3, + AutoSEEPROM = 1<<24, + + SwArbitration = 0x7020, + SwArbitSet1 = 1<<1, + SwArbitWon1 = 1<<9, + Pcitlplpl = 0x7C00, /* "lower 1k of the pcie pl regs" ?? */ + + PhyAuxControl = 0x18, + PhyIntStatus = 0x1A, + PhyIntMask = 0x1B, + + Updated = 1<<0, + LinkStateChange = 1<<1, + Error = 1<<2, + + PacketEnd = 1<<2, + FrameError = 1<<10, +}; + +enum { + b5722, + b5751, + b5754, + b5755, + b5756, + b5782, + b5787, + b5906, + Nctlrtype, +}; + +typedef struct Ctlrtype Ctlrtype; +struct Ctlrtype { + int mtu; + int flag; + char *name; +}; + +static Ctlrtype cttab[Nctlrtype] = { +[b5722] 1514, 0, "b5722", +[b5751] 1514, 0, "b5751", +[b5754] 1514, 0, "b5754", +[b5755] 1514, 0, "b5755", +[b5756] 1514, 0, "b5756", +[b5782] 1514, 0, "b5782", +[b5787] 1514, 0, "b5787", +[b5906] 1514, 0, "b5906", +}; + +#define csr32(c, r) ((c)->nic[(r)/4]) + +static Ctlr *bcmhead; +static int debug; + +static char* +cname(Ctlr *c) +{ + return cttab[c->type].name; +} + +static long +bcmifstat(Ether *edev, void *a, long n, usize offset) +{ + char *s, *p, *e; + Ctlr *c; + + c = edev->ctlr; + p = s = malloc(READSTR); + e = p + READSTR; + + p = seprint(p, e, "nobuf %ud\n", c->nobuf); + p = seprint(p, e, "partial %ud\n", c->partial); + p = seprint(p, e, "rxerr %ud\n", c->rxerr); + p = seprint(p, e, "qfull %ud\n", c->qfull); + p = seprint(p, e, "dmaerr %ud\n", c->dmaerr); + p = seprint(p, e, "type: %s\n", cname(c)); + + USED(p); + n = readstr(offset, a, n, s); + free(s); + + return n; +} + +enum { + Phybusy = 1<<29, + Phyrdfail = 1<<28, + Phyrd = 1<<27, + Phywr = 1<<26, +}; +Lock miilock; + +static uint +miiwait(Ctlr *ctlr) +{ + uint i, v; + + for(i = 0; i < 100; i += 5){ + microdelay(10); + v = csr32(ctlr, MIComm); + if((v & Phybusy) == 0){ + microdelay(5); + return csr32(ctlr, MIComm); + } + microdelay(5); + } + print("#l%d: bcm: miiwait: timeout\n", ctlr->ether->ctlrno); + return ~0; +} + +static int +miir(Ctlr *ctlr, int r) +{ + uint v, phyno; + + phyno = 1; + lock(&miilock); + csr32(ctlr, MIComm) = r<<16 | phyno<<21 | Phyrd | Phybusy; + v = miiwait(ctlr); + unlock(&miilock); + if(v == ~0) + return -1; + if(v & Phyrdfail){ + print("#l%d: bcm: miir: fail\n", ctlr->ether->ctlrno); + return -1; + } + return v & 0xffff; +} + +static int +miiw(Ctlr *ctlr, int r, int v) +{ + uint phyno, w; + + phyno = 1; + lock(&miilock); + csr32(ctlr, MIComm) = r<<16 | v&0xffff | phyno<<21 | Phywr | Phybusy; + w = miiwait(ctlr); + unlock(&miilock); + if(w == ~0) + return -1; + return 0; +} + +static void +checklink(Ether *edev) +{ + uint i; + Ctlr *ctlr; + + ctlr = edev->ctlr; + miir(ctlr, Bmsr); /* read twice for current status as per 802.3 */ + if(!(miir(ctlr, Bmsr) & BmsrLs)) { + edev->link = 0; + edev->mbps = 1000; + ctlr->duplex = 1; + dprint("bcm: no link\n"); + goto out; + } + edev->link = 1; + while((miir(ctlr, Bmsr) & BmsrAnc) == 0) + ; + i = miir(ctlr, Mssr); + if(i & (Mssr1000THD | Mssr1000TFD)) { + edev->mbps = 1000; + ctlr->duplex = (i & Mssr1000TFD) != 0; + } else if(i = miir(ctlr, Anlpar), i & (AnaTXHD | AnaTXFD)) { + edev->mbps = 100; + ctlr->duplex = (i & AnaTXFD) != 0; + } else if(i & (Ana10HD | Ana10FD)) { + edev->mbps = 10; + ctlr->duplex = (i & Ana10FD) != 0; + } else { + edev->link = 0; + edev->mbps = 1000; + ctlr->duplex = 1; + dprint("bcm: link partner supports neither 10/100/1000 Mbps\n"); + goto out; + } + dprint("bcm: %d Mbps link, %s duplex\n", edev->mbps, ctlr->duplex ? "full" : "half"); +out: + if(ctlr->duplex) + csr32(ctlr, MACMode) &= ~MACHalfDuplex; + else + csr32(ctlr, MACMode) |= MACHalfDuplex; + if(edev->mbps >= 1000) + csr32(ctlr, MACMode) = (csr32(ctlr, MACMode) & MACPortMask) | MACPortGMII; + else + csr32(ctlr, MACMode) = (csr32(ctlr, MACMode) & MACPortMask) | MACPortMII; + csr32(ctlr, MACEventStatus) |= (1<<4) | (1<<3); /* undocumented bits (sync and config changed) */ +} + +static uint* +currentrecvret(Ctlr *ctlr) +{ + if(ctlr->recvreti == (ctlr->status[4] & 0xFFFF)) + return 0; + return ctlr->recvret + ctlr->recvreti * 8; +} + +static void +consumerecvret(Ctlr *ctlr) +{ + ctlr->recvreti = ctlr->recvreti+1 & RxRetRingLen-1; + csr32(ctlr, RxBDRetRingIdx) = ctlr->recvreti; +} + +static int +replenish(Ctlr *ctlr) +{ + uint incr; + u32int *next; + Block *bp; + + incr = (ctlr->recvprodi + 1) & (RxProdRingLen - 1); + if(incr == (ctlr->status[2] >> 16)) + return -1; + bp = iallocb(Rbsz); + if(bp == nil) { + /* iallocb never fails. this code is unnecessary */ + dprint("bcm: out of memory for receive buffers\n"); + ctlr->nobuf++; + return -1; + } + next = ctlr->recvprod + ctlr->recvprodi * 8; + memset(next, 0, 32); + next[0] = PCIWADDRH(bp->rp); + next[1] = PCIWADDRL(bp->rp); + next[2] = Rbsz; + next[7] = ctlr->recvprodi; + ctlr->rxs[ctlr->recvprodi] = bp; + coherence(); + csr32(ctlr, RxProdBDRingIdx) = ctlr->recvprodi = incr; + return 0; +} + +static void +bcmreceive(Ether *edev) +{ + uint len; + u32int *pkt; + Ctlr *ctlr; + Block *bp; + + ctlr = edev->ctlr; + for(; pkt = currentrecvret(ctlr); replenish(ctlr), consumerecvret(ctlr)) { + bp = ctlr->rxs[pkt[7]]; + len = pkt[2] & 0xFFFF; + bp->wp = bp->rp + len; + if((pkt[3] & PacketEnd) == 0){ + dprint("bcm: partial frame received -- shouldn't happen\n"); + ctlr->partial++; + freeb(bp); + continue; + } + if(pkt[3] & FrameError){ + ctlr->rxerr++; + freeb(bp); + continue; + } + etheriq(edev, bp, 1); + } +} + +static void +bcmtransclean(Ether *edev) +{ + Ctlr *ctlr; + + ctlr = edev->ctlr; + ilock(&ctlr->txlock); + while(ctlr->sendcleani != (ctlr->status[4] >> 16)) { + freeb(ctlr->sends[ctlr->sendcleani]); + ctlr->sends[ctlr->sendcleani] = nil; + ctlr->sendcleani = (ctlr->sendcleani + 1) & (SendRingLen - 1); + } + iunlock(&ctlr->txlock); +} + +static void +bcmtransmit(Ether *edev) +{ + uint incr; + u32int *next; + Ctlr *ctlr; + Block *bp; + + ctlr = edev->ctlr; + ilock(&ctlr->txlock); + for(;;){ + incr = (ctlr->sendri + 1) & (SendRingLen - 1); + if(incr == ctlr->sendcleani) { + dprint("bcm: send queue full\n"); + ctlr->qfull++; + break; + } + bp = qget(edev->oq); + if(bp == nil) + break; + next = ctlr->sendr + ctlr->sendri * 4; + next[0] = PCIWADDRH(bp->rp); + next[1] = PCIWADDRL(bp->rp); + next[2] = (BLEN(bp) << 16) | PacketEnd; + next[3] = 0; + ctlr->sends[ctlr->sendri] = bp; + coherence(); + csr32(ctlr, SendBDRingHostIdx) = ctlr->sendri = incr; + } + iunlock(&ctlr->txlock); +} + +static void +bcmerror(Ether *edev) +{ + Ctlr *ctlr; + + ctlr = edev->ctlr; + if(csr32(ctlr, FlowAttention)) { + if(csr32(ctlr, FlowAttention) & 0xf8ff8080) + print("bcm: fatal error %#.8ux", csr32(ctlr, FlowAttention)); + csr32(ctlr, FlowAttention) = 0; + } + csr32(ctlr, MACEventStatus) = 0; /* worth ignoring */ + if(csr32(ctlr, ReadDMAStatus) || csr32(ctlr, WriteDMAStatus)) { + dprint("bcm: DMA error\n"); + ctlr->dmaerr++; + csr32(ctlr, ReadDMAStatus) = 0; + csr32(ctlr, WriteDMAStatus) = 0; + } + if(csr32(ctlr, RISCState)) { + if(csr32(ctlr, RISCState) & 0x78000403) + print("bcm: RISC halted %#.8ux", csr32(ctlr, RISCState)); + csr32(ctlr, RISCState) = 0; + } +} + +static void +bcminterrupt(Ureg*, void *arg) +{ + u32int status, tag, dummy; + Ether *edev; + Ctlr *ctlr; + + edev = arg; + ctlr = edev->ctlr; + ilock(&ctlr->imlock); + dummy = csr32(ctlr, InterruptMailbox); + USED(dummy); + csr32(ctlr, InterruptMailbox) = 1; + status = ctlr->status[0]; + tag = ctlr->status[1]; + ctlr->status[0] = 0; + if(status & Error) + bcmerror(edev); + if(status & LinkStateChange) + checklink(edev); + if(0) + iprint("bcm: interrupt %.8ux %.8ux\n", ctlr->status[2], ctlr->status[4]); + bcmreceive(edev); + bcmtransclean(edev); + bcmtransmit(edev); + csr32(ctlr, InterruptMailbox) = tag << 24; + iunlock(&ctlr->imlock); +} + +static void +mem32w(Ctlr *c, uint r, uint v) +{ + pcicfgw32(c->pdev, Memwind, r); + pcicfgw32(c->pdev, MemwindData, v); +} + +static u32int +mem32r(Ctlr *c, uint r) +{ + u32int v; + + pcicfgw32(c->pdev, Memwind, r); + v = pcicfgr32(c->pdev, MemwindData); + pcicfgw32(c->pdev, Memwind, 0); + return v; +} + +static int +bcmµwait(Ctlr *ctlr, uint to, uint r, uint m, uint v) +{ + int i; + + for(i = 0;; i += 100){ + if((csr32(ctlr, r) & m) == v) + return 0; + if(i == to /* µs */) + return -1; + microdelay(100); + } +} + +static int +bcminit(Ether *edev) +{ + uint i; + u32int j; + Ctlr *ctlr; + + ctlr = edev->ctlr; + dprint("bcm: reset\n"); + /* initialization procedure according to the datasheet */ + csr32(ctlr, MiscHostCtl) |= MaskPCIInt | ClearIntA | WordSwap | IndirAccessEn; + csr32(ctlr, SwArbitration) |= SwArbitSet1; + if(bcmµwait(ctlr, 2000, SwArbitration, SwArbitWon1, SwArbitWon1) == -1){ + print("bcm: arbiter failed to respond\n"); + return -1; + } + csr32(ctlr, MemArbiterMode) |= Enable; + csr32(ctlr, MiscHostCtl) = WordSwap | IndirAccessEn | PCIStateRegEn | EnableClockCtl + | MaskPCIInt | ClearIntA; + csr32(ctlr, Memwind) = 0; + mem32w(ctlr, Fwmbox, Fwmagic); + csr32(ctlr, MiscConf) |= GPHYPwrdnOverride | DisableGRCRstOnPpcie | CoreClockBlocksReset; + delay(100); + pcicfgw32(ctlr->pdev, PciPCR, ctlr->pdev->pcr); /* restore pci bits lost */ + csr32(ctlr, MiscHostCtl) |= MaskPCIInt | ClearIntA; + csr32(ctlr, MemArbiterMode) |= Enable; + csr32(ctlr, MiscHostCtl) |= WordSwap | IndirAccessEn | PCIStateRegEn | EnableClockCtl | TaggedStatus; + csr32(ctlr, ModeControl) |= ByteWordSwap; + csr32(ctlr, MACMode) = (csr32(ctlr, MACMode) & MACPortMask) | MACPortGMII; + delay(40); + for(i = 0;; i += 100){ + if(mem32r(ctlr, Fwmbox) == ~Fwmagic) + break; + if(i == 20*10000 /* µs */){ + print("bcm: fw failed to respond %#.8ux\n", mem32r(ctlr, Fwmbox)); + break; //return -1; + } + microdelay(100); + } + /* + * there appears to be no justification for setting these bits in any driver + * i can find. nor to i have a datasheet that recommends this. - quanstro + * csr32(ctlr, Pcitlplpl) |= 1<<25 | 1<<29; + */ + memset(ctlr->status, 0, 20); + csr32(ctlr, Dmarwctl) = (csr32(ctlr, Dmarwctl) & DMAWaterMask) | DMAWaterValue; + csr32(ctlr, ModeControl) |= HostSendBDs | HostStackUp | InterruptOnMAC; + csr32(ctlr, MiscConf) = (csr32(ctlr, MiscConf) & TimerMask) | TimerValue; + csr32(ctlr, MBUFLowWater) = 0x20; + csr32(ctlr, MBUFHighWater) = 0x60; + csr32(ctlr, LowWaterMax) = (csr32(ctlr, LowWaterMax) & LowWaterMaxMask) | LowWaterMaxValue; + csr32(ctlr, BufferManMode) |= Enable | Attn; + if(bcmµwait(ctlr, 2000, BufferManMode, Enable, Enable) == -1){ + print("bcm: failed to enable buffers\n"); + return -1; + } + csr32(ctlr, FTQReset) = ~0; + csr32(ctlr, FTQReset) = 0; + if(bcmµwait(ctlr, 2000, FTQReset, ~0, 0) == -1){ + print("bcm: failed to bring ftq out of reset\n"); + return -1; + } + csr32(ctlr, RxBDHostAddr) = PCIWADDRH(ctlr->recvprod); + csr32(ctlr, RxBDHostAddr + 4) = PCIWADDRL(ctlr->recvprod); + csr32(ctlr, RxBDFlags) = RxProdRingLen << 16; + csr32(ctlr, RxBDNIC) = 0x6000; + csr32(ctlr, RxBDRepl) = 25; + csr32(ctlr, SendBDRingHostIdx) = 0; + csr32(ctlr, SendBDRingHostIdx+4) = 0; + mem32w(ctlr, SendRCB, PCIWADDRH(ctlr->sendr)); + mem32w(ctlr, SendRCB + 4, PCIWADDRL(ctlr->sendr)); + mem32w(ctlr, SendRCB + 8, SendRingLen << 16); + mem32w(ctlr, SendRCB + 12, 0x4000); + for(i=1; i<4; i++) + mem32w(ctlr, RxRetRCB + i * 0x10 + 8, 2); + mem32w(ctlr, RxRetRCB, PCIWADDRH(ctlr->recvret)); + mem32w(ctlr, RxRetRCB + 4, PCIWADDRL(ctlr->recvret)); + mem32w(ctlr, RxRetRCB + 8, RxRetRingLen << 16); + csr32(ctlr, RxProdBDRingIdx) = 0; + csr32(ctlr, RxProdBDRingIdx+4) = 0; + /* this delay is not in the datasheet, but necessary */ + delay(1); + i = csr32(ctlr, MACAddress); + j = edev->ea[0] = i >> 8; + j += edev->ea[1] = i; + i = csr32(ctlr, MACAddress + 4); + j += edev->ea[2] = i >> 24; + j += edev->ea[3] = i >> 16; + j += edev->ea[4] = i >> 8; + j += edev->ea[5] = i; + csr32(ctlr, RandomBackoff) = j & 0x3FF; + csr32(ctlr, RxMTU) = Rbsz; + csr32(ctlr, TxMACLengths) = 0x2620; + csr32(ctlr, RxListPlacement) = 1<<3; /* one list */ + csr32(ctlr, RxListPlacementMask) = 0xFFFFFF; + csr32(ctlr, RxListPlacementConf) |= RxStats; + csr32(ctlr, SendInitiatorMask) = 0xFFFFFF; + csr32(ctlr, SendInitiatorConf) |= SendStats; + csr32(ctlr, HostCoalMode) = 0; + if(bcmµwait(ctlr, 2000, HostCoalMode, ~0, 0) == -1){ + print("bcm: failed to unset coalescing\n"); + return -1; + } + csr32(ctlr, HostCoalRxTicks) = 150; + csr32(ctlr, HostCoalSendTicks) = 150; + csr32(ctlr, RxMaxCoalFrames) = 10; + csr32(ctlr, SendMaxCoalFrames) = 10; + csr32(ctlr, RxMaxCoalFramesInt) = 0; + csr32(ctlr, SendMaxCoalFramesInt) = 0; + csr32(ctlr, StatusBlockHostAddr) = PCIWADDRH(ctlr->status); + csr32(ctlr, StatusBlockHostAddr + 4) = PCIWADDRL(ctlr->status); + csr32(ctlr, HostCoalMode) |= Enable; + csr32(ctlr, RxBDCompletionMode) |= Enable | Attn; + csr32(ctlr, RxListPlacementMode) |= Enable; + csr32(ctlr, MACMode) |= MACEnable; + csr32(ctlr, MiscLocalControl) |= InterruptOnAttn | AutoSEEPROM; + csr32(ctlr, InterruptMailbox) = 0; + csr32(ctlr, WriteDMAMode) |= 0x200003fe; /* pulled out of my nose */ + csr32(ctlr, ReadDMAMode) |= 0x3fe; + csr32(ctlr, RxDataCompletionMode) |= Enable | Attn; + csr32(ctlr, SendDataCompletionMode) |= Enable; + csr32(ctlr, SendBDCompletionMode) |= Enable | Attn; + csr32(ctlr, RxBDInitiatorMode) |= Enable | Attn; + csr32(ctlr, RxDataBDInitiatorMode) |= Enable | (1<<4); + csr32(ctlr, SendDataInitiatorMode) |= Enable; + csr32(ctlr, SendBDInitiatorMode) |= Enable | Attn; + csr32(ctlr, SendBDSelectorMode) |= Enable | Attn; + ctlr->recvprodi = 0; + while(replenish(ctlr) >= 0) + ; + csr32(ctlr, TxMACMode) |= Enable; + csr32(ctlr, RxMACMode) |= Enable; + csr32(ctlr, Pwrctlstat) &= ~3; + csr32(ctlr, MIStatus) |= 1<<0; + csr32(ctlr, MACEventEnable) = 0; + csr32(ctlr, MACEventStatus) |= (1<<12); + csr32(ctlr, MIMode) = 0xC0000; /* set base mii clock */ + microdelay(40); + + if(0){ + /* bug (ours): can't reset phy without dropping into 100mbit mode */ + miiw(ctlr, Bmcr, BmcrR); + for(i = 0;; i += 100){ + if((miir(ctlr, Bmcr) & BmcrR) == 0) + break; + if(i == 10000 /* µs */){ + print("bcm: phy reset failure\n"); + return -1; + } + microdelay(100); + } + } + miiw(ctlr, Bmcr, BmcrAne | BmcrRan); + + miiw(ctlr, PhyAuxControl, 2); + miir(ctlr, PhyIntStatus); + miir(ctlr, PhyIntStatus); + miiw(ctlr, PhyIntMask, ~(1<<1)); + csr32(ctlr, MACEventEnable) |= 1<<12; + for(i = 0; i < 4; i++) + csr32(ctlr, MACHash + 4*i) = ~0; + for(i = 0; i < 8; i++) + csr32(ctlr, RxRules + 8 * i) = 0; + csr32(ctlr, RxRulesConf) = 1 << 3; + csr32(ctlr, MSIMode) |= Enable; + csr32(ctlr, MiscHostCtl) &= ~(MaskPCIInt | ClearIntA); + dprint("bcm: reset: fin\n"); + return 0; +} + +static int +didtype(Pcidev *p) +{ + if(p->vid != 0x14e4) + return -1; + + switch(p->did){ + default: + return -1; + case 0x165a: /* 5722 gbe */ + return b5722; + case 0x1670: /* ?? */ + return b5751; + case 0x1672: /* 5754m */ + return b5754; + case 0x1673: /* 5755m gbe */ + return b5755; + case 0x1674: /* 5756me gbe */ + return b5756; + case 0x1677: /* 5751 gbe */ + return b5751; + case 0x167a: /* 5754 gbe */ + return b5754; + case 0x167b: /* 5755 gbe */ + return b5755; + case 0x1693: /* 5787m gbe */ + return b5787; + case 0x1696: /* 5782 gbe; steve */ + return b5782; + case 0x169b: /* 5787 gbe */ + return b5787; + case 0x1712: /* 5906 fast */ + case 0x1713: /* 5906m fast */ + return b5906; + case 0x167d: /* 5751m gbe */ + case 0x167e: /* 5751f fast */ + return b5751; + } +} + +static void +bcmpci(void) +{ + int type; + void *mem; + Ctlr *ctlr, **xx; + Pcidev *p; + + xx = &bcmhead; + for(p = nil; p = pcimatch(p, 0, 0); ) { + if(p->ccrb != 2 || p->ccru != 0 || (type = didtype(p)) == -1) + continue; + pcisetbme(p); + pcisetpms(p, 0); + ctlr = malloc(sizeof(Ctlr)); + if(ctlr == nil) + continue; + ctlr->type = type; + ctlr->port = p->mem[0].bar & ~(uintmem)0xf; + mem = vmap(ctlr->port, p->mem[0].size); + if(mem == nil) { + print("bcm: can't map %#p\n", (uvlong)ctlr->port); + free(ctlr); + continue; + } + ctlr->pdev = p; + ctlr->nic = mem; + ctlr->status = mallocalign(20, 16, 0, 0); + ctlr->recvprod = mallocalign(32 * RxProdRingLen, 16, 0, 0); + ctlr->recvret = mallocalign(32 * RxRetRingLen, 16, 0, 0); + ctlr->sendr = mallocalign(16 * SendRingLen, 16, 0, 0); + ctlr->sends = malloc(sizeof *ctlr->sends * SendRingLen); + ctlr->rxs = malloc(sizeof *ctlr->sends * SendRingLen); + *xx = ctlr; + xx = &ctlr->next; + } +} + +static void +bcmpromiscuous(void* arg, int on) +{ + Ctlr *ctlr; + + ctlr = ((Ether*)arg)->ctlr; + if(on) + csr32(ctlr, RxMACMode) |= 1<<8; + else + csr32(ctlr, RxMACMode) &= ~(1<<8); +} + +static void +bcmmulticast(void*, uchar*, int) +{ +} + +static int +bcmpnp(Ether* edev) +{ + Ctlr *ctlr; + static int done; + + if(done == 0){ + bcmpci(); + done = 1; + } + +redux: + for(ctlr = bcmhead; ; ctlr = ctlr->next) { + if(ctlr == nil) + return -1; + if(ctlr->active) + continue; + if(ethercfgmatch(edev, ctlr->pdev, ctlr->port) == 0){ + ctlr->active = 1; + break; + } + } + + ctlr->ether = edev; + edev->ctlr = ctlr; + edev->port = ctlr->port; + edev->irq = ctlr->pdev->intl; + edev->tbdf = ctlr->pdev->tbdf; + edev->interrupt = bcminterrupt; + edev->ifstat = bcmifstat; + edev->transmit = bcmtransmit; + edev->multicast = bcmmulticast; + edev->promiscuous = bcmpromiscuous; + edev->arg = edev; + edev->mbps = 1000; + + if(bcminit(edev) == -1) + goto redux; + return 0; +} + +void +etherbcmlink(void) +{ + addethercard("bcm57xx", bcmpnp); +} diff -Nru /sys/src/9k/386/etherigbe.c /sys/src/9k/386/etherigbe.c --- /sys/src/9k/386/etherigbe.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/etherigbe.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,2049 @@ +/* + * Intel 8254[0-7]NN Gigabit Ethernet PCI Controllers + * as found on the Intel PRO/1000 series of adapters: + * 82543GC Intel PRO/1000 T + * 82544EI Intel PRO/1000 XT + * 82540EM Intel PRO/1000 MT + * 82541[GP]I + * 82547GI + * 82546GB + * 82546EB + * To Do: + * finish autonegotiation code; + * integrate fiber stuff back in (this ONLY handles + * the CAT5 cards at the moment); + * add checksum-offload; + * add tuning control via ctl file; + * this driver is little-endian specific. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/ethermii.h" +#include "../port/netif.h" + +#include "etherif.h" +#include "io.h" + +enum { + i82542 = (0x1000<<16)|0x8086, + i82543gc = (0x1004<<16)|0x8086, + i82544ei = (0x1008<<16)|0x8086, + i82544eif = (0x1009<<16)|0x8086, + i82544gc = (0x100d<<16)|0x8086, + i82540em = (0x100E<<16)|0x8086, + i82540eplp = (0x101E<<16)|0x8086, + i82545em = (0x100F<<16)|0x8086, + i82545gmc = (0x1026<<16)|0x8086, + i82547ei = (0x1019<<16)|0x8086, + i82547gi = (0x1075<<16)|0x8086, + i82541ei = (0x1013<<16)|0x8086, + i82541gi = (0x1076<<16)|0x8086, + i82541gi2 = (0x1077<<16)|0x8086, + i82541pi = (0x107c<<16)|0x8086, + i82546gb = (0x1079<<16)|0x8086, + i82546eb = (0x1010<<16)|0x8086, +}; + +enum { + Ctrl = 0x00000000, /* Device Control */ + Ctrldup = 0x00000004, /* Device Control Duplicate */ + Status = 0x00000008, /* Device Status */ + Eecd = 0x00000010, /* EEPROM/Flash Control/Data */ + Ctrlext = 0x00000018, /* Extended Device Control */ + Mdic = 0x00000020, /* MDI Control */ + Fcal = 0x00000028, /* Flow Control Address Low */ + Fcah = 0x0000002C, /* Flow Control Address High */ + Fct = 0x00000030, /* Flow Control Type */ + Icr = 0x000000C0, /* Interrupt Cause Read */ + Ics = 0x000000C8, /* Interrupt Cause Set */ + Ims = 0x000000D0, /* Interrupt Mask Set/Read */ + Imc = 0x000000D8, /* Interrupt mask Clear */ + Rctl = 0x00000100, /* Receive Control */ + Fcttv = 0x00000170, /* Flow Control Transmit Timer Value */ + Txcw = 0x00000178, /* Transmit Configuration Word */ + Rxcw = 0x00000180, /* Receive Configuration Word */ + /* on the oldest cards (8254[23]), the Mta register is at 0x200 */ + Tctl = 0x00000400, /* Transmit Control */ + Tipg = 0x00000410, /* Transmit IPG */ + Tbt = 0x00000448, /* Transmit Burst Timer */ + Ait = 0x00000458, /* Adaptive IFS Throttle */ + Fcrtl = 0x00002160, /* Flow Control RX Threshold Low */ + Fcrth = 0x00002168, /* Flow Control Rx Threshold High */ + Rdfh = 0x00002410, /* Receive data fifo head */ + Rdft = 0x00002418, /* Receive data fifo tail */ + Rdfhs = 0x00002420, /* Receive data fifo head saved */ + Rdfts = 0x00002428, /* Receive data fifo tail saved */ + Rdfpc = 0x00002430, /* Receive data fifo packet count */ + Rdbal = 0x00002800, /* Rd Base Address Low */ + Rdbah = 0x00002804, /* Rd Base Address High */ + Rdlen = 0x00002808, /* Receive Descriptor Length */ + Rdh = 0x00002810, /* Receive Descriptor Head */ + Rdt = 0x00002818, /* Receive Descriptor Tail */ + Rdtr = 0x00002820, /* Receive Descriptor Timer Ring */ + Rxdctl = 0x00002828, /* Receive Descriptor Control */ + Radv = 0x0000282C, /* Receive Interrupt Absolute Delay Timer */ + Txdmac = 0x00003000, /* Transfer DMA Control */ + Ett = 0x00003008, /* Early Transmit Control */ + Tdfh = 0x00003410, /* Transmit data fifo head */ + Tdft = 0x00003418, /* Transmit data fifo tail */ + Tdfhs = 0x00003420, /* Transmit data Fifo Head saved */ + Tdfts = 0x00003428, /* Transmit data fifo tail saved */ + Tdfpc = 0x00003430, /* Trasnmit data Fifo packet count */ + Tdbal = 0x00003800, /* Td Base Address Low */ + Tdbah = 0x00003804, /* Td Base Address High */ + Tdlen = 0x00003808, /* Transmit Descriptor Length */ + Tdh = 0x00003810, /* Transmit Descriptor Head */ + Tdt = 0x00003818, /* Transmit Descriptor Tail */ + Tidv = 0x00003820, /* Transmit Interrupt Delay Value */ + Txdctl = 0x00003828, /* Transmit Descriptor Control */ + Tadv = 0x0000382C, /* Transmit Interrupt Absolute Delay Timer */ + + Statistics = 0x00004000, /* Start of Statistics Area */ + Gorcl = 0x88/4, /* Good Octets Received Count */ + Gotcl = 0x90/4, /* Good Octets Transmitted Count */ + Torl = 0xC0/4, /* Total Octets Received */ + Totl = 0xC8/4, /* Total Octets Transmitted */ + Nstatistics = 64, + + Rxcsum = 0x00005000, /* Receive Checksum Control */ + Mta = 0x00005200, /* Multicast Table Array */ + Ral = 0x00005400, /* Receive Address Low */ + Rah = 0x00005404, /* Receive Address High */ + Manc = 0x00005820, /* Management Control */ +}; + +enum { /* Ctrl */ + Bem = 0x00000002, /* Big Endian Mode */ + Prior = 0x00000004, /* Priority on the PCI bus */ + Lrst = 0x00000008, /* Link Reset */ + Asde = 0x00000020, /* Auto-Speed Detection Enable */ + Slu = 0x00000040, /* Set Link Up */ + Ilos = 0x00000080, /* Invert Loss of Signal (LOS) */ + SspeedMASK = 0x00000300, /* Speed Selection */ + SspeedSHIFT = 8, + Sspeed10 = 0x00000000, /* 10Mb/s */ + Sspeed100 = 0x00000100, /* 100Mb/s */ + Sspeed1000 = 0x00000200, /* 1000Mb/s */ + Frcspd = 0x00000800, /* Force Speed */ + Frcdplx = 0x00001000, /* Force Duplex */ + SwdpinsloMASK = 0x003C0000, /* Software Defined Pins - lo nibble */ + SwdpinsloSHIFT = 18, + SwdpioloMASK = 0x03C00000, /* Software Defined Pins - I or O */ + SwdpioloSHIFT = 22, + Devrst = 0x04000000, /* Device Reset */ + Rfce = 0x08000000, /* Receive Flow Control Enable */ + Tfce = 0x10000000, /* Transmit Flow Control Enable */ + Vme = 0x40000000, /* VLAN Mode Enable */ +}; + +/* + * can't find Tckok nor Rbcok in any Intel docs, + * but even 82543gc docs define Lanid. + */ +enum { /* Status */ + Lu = 0x00000002, /* Link Up */ + Lanid = 0x0000000C, /* mask for Lan ID. (function id) */ +// Tckok = 0x00000004, /* Transmit clock is running */ +// Rbcok = 0x00000008, /* Receive clock is running */ + Txoff = 0x00000010, /* Transmission Paused */ + Tbimode = 0x00000020, /* TBI Mode Indication */ + LspeedMASK = 0x000000C0, /* Link Speed Setting */ + LspeedSHIFT = 6, + Lspeed10 = 0x00000000, /* 10Mb/s */ + Lspeed100 = 0x00000040, /* 100Mb/s */ + Lspeed1000 = 0x00000080, /* 1000Mb/s */ + Mtxckok = 0x00000400, /* MTX clock is running */ + Pci66 = 0x00000800, /* PCI Bus speed indication */ + Bus64 = 0x00001000, /* PCI Bus width indication */ + Pcixmode = 0x00002000, /* PCI-X mode */ + PcixspeedMASK = 0x0000C000, /* PCI-X bus speed */ + PcixspeedSHIFT = 14, + Pcix66 = 0x00000000, /* 50-66MHz */ + Pcix100 = 0x00004000, /* 66-100MHz */ + Pcix133 = 0x00008000, /* 100-133MHz */ +}; + +enum { /* Ctrl and Status */ + Fd = 0x00000001, /* Full-Duplex */ + AsdvMASK = 0x00000300, + AsdvSHIFT = 8, + Asdv10 = 0x00000000, /* 10Mb/s */ + Asdv100 = 0x00000100, /* 100Mb/s */ + Asdv1000 = 0x00000200, /* 1000Mb/s */ +}; + +enum { /* Eecd */ + Sk = 0x00000001, /* Clock input to the EEPROM */ + Cs = 0x00000002, /* Chip Select */ + Di = 0x00000004, /* Data Input to the EEPROM */ + Do = 0x00000008, /* Data Output from the EEPROM */ + Areq = 0x00000040, /* EEPROM Access Request */ + Agnt = 0x00000080, /* EEPROM Access Grant */ + Eepresent = 0x00000100, /* EEPROM Present */ + Eesz256 = 0x00000200, /* EEPROM is 256 words not 64 */ + Eeszaddr = 0x00000400, /* EEPROM size for 8254[17] */ + Spi = 0x00002000, /* EEPROM is SPI not Microwire */ +}; + +enum { /* Ctrlext */ + Gpien = 0x0000000F, /* General Purpose Interrupt Enables */ + SwdpinshiMASK = 0x000000F0, /* Software Defined Pins - hi nibble */ + SwdpinshiSHIFT = 4, + SwdpiohiMASK = 0x00000F00, /* Software Defined Pins - I or O */ + SwdpiohiSHIFT = 8, + Asdchk = 0x00001000, /* ASD Check */ + Eerst = 0x00002000, /* EEPROM Reset */ + Ips = 0x00004000, /* Invert Power State */ + Spdbyps = 0x00008000, /* Speed Select Bypass */ +}; + +enum { /* EEPROM content offsets */ + Ea = 0x00, /* Ethernet Address */ + Cf = 0x03, /* Compatibility Field */ + Pba = 0x08, /* Printed Board Assembly number */ + Icw1 = 0x0A, /* Initialization Control Word 1 */ + Sid = 0x0B, /* Subsystem ID */ + Svid = 0x0C, /* Subsystem Vendor ID */ + Did = 0x0D, /* Device ID */ + Vid = 0x0E, /* Vendor ID */ + Icw2 = 0x0F, /* Initialization Control Word 2 */ +}; + +enum { /* Mdic */ + MDIdMASK = 0x0000FFFF, /* Data */ + MDIdSHIFT = 0, + MDIrMASK = 0x001F0000, /* PHY Register Address */ + MDIrSHIFT = 16, + MDIpMASK = 0x03E00000, /* PHY Address */ + MDIpSHIFT = 21, + MDIwop = 0x04000000, /* Write Operation */ + MDIrop = 0x08000000, /* Read Operation */ + MDIready = 0x10000000, /* End of Transaction */ + MDIie = 0x20000000, /* Interrupt Enable */ + MDIe = 0x40000000, /* Error */ +}; + +enum { /* Icr, Ics, Ims, Imc */ + Txdw = 0x00000001, /* Transmit Descriptor Written Back */ + Txqe = 0x00000002, /* Transmit Queue Empty */ + Lsc = 0x00000004, /* Link Status Change */ + Rxseq = 0x00000008, /* Receive Sequence Error */ + Rxdmt0 = 0x00000010, /* Rd Minimum Threshold Reached */ + Rxo = 0x00000040, /* Receiver Overrun */ + Rxt0 = 0x00000080, /* Receiver Timer Interrupt */ + Mdac = 0x00000200, /* MDIO Access Completed */ + Rxcfg = 0x00000400, /* Receiving /C/ ordered sets */ + Gpi0 = 0x00000800, /* General Purpose Interrupts */ + Gpi1 = 0x00001000, + Gpi2 = 0x00002000, + Gpi3 = 0x00004000, +}; + +/* + * The Mdic register isn't implemented on the 82543GC, + * the software defined pins are used instead. + * These definitions work for the Intel PRO/1000 T Server Adapter. + * The direction pin bits are read from the EEPROM. + */ +enum { + Mdd = ((1<<2)<nic+((r)/4))) +#define csr32w(c, r, v) (*((c)->nic+((r)/4)) = (v)) + +static Ctlr* igbectlrhead; +static Ctlr* igbectlrtail; + +static Lock igberblock; /* free receive Blocks */ +static Block* igberbpool; /* receive Blocks for all igbe controllers */ + +static char* statistics[Nstatistics] = { + "CRC Error", + "Alignment Error", + "Symbol Error", + "RX Error", + "Missed Packets", + "Single Collision", + "Excessive Collisions", + "Multiple Collision", + "Late Collisions", + nil, + "Collision", + "Transmit Underrun", + "Defer", + "Transmit - No CRS", + "Sequence Error", + "Carrier Extension Error", + "Receive Error Length", + nil, + "XON Received", + "XON Transmitted", + "XOFF Received", + "XOFF Transmitted", + "FC Received Unsupported", + "Packets Received (64 Bytes)", + "Packets Received (65-127 Bytes)", + "Packets Received (128-255 Bytes)", + "Packets Received (256-511 Bytes)", + "Packets Received (512-1023 Bytes)", + "Packets Received (1024-1522 Bytes)", + "Good Packets Received", + "Broadcast Packets Received", + "Multicast Packets Received", + "Good Packets Transmitted", + nil, + "Good Octets Received", + nil, + "Good Octets Transmitted", + nil, + nil, + nil, + "Receive No Buffers", + "Receive Undersize", + "Receive Fragment", + "Receive Oversize", + "Receive Jabber", + nil, + nil, + nil, + "Total Octets Received", + nil, + "Total Octets Transmitted", + nil, + "Total Packets Received", + "Total Packets Transmitted", + "Packets Transmitted (64 Bytes)", + "Packets Transmitted (65-127 Bytes)", + "Packets Transmitted (128-255 Bytes)", + "Packets Transmitted (256-511 Bytes)", + "Packets Transmitted (512-1023 Bytes)", + "Packets Transmitted (1024-1522 Bytes)", + "Multicast Packets Transmitted", + "Broadcast Packets Transmitted", + "TCP Segmentation Context Transmitted", + "TCP Segmentation Context Fail", +}; + +static long +igbeifstat(Ether* edev, void* a, long n, ulong offset) +{ + Ctlr *ctlr; + int i, r; + uvlong tuvl, ruvl; + char *alloc, *e, *p, *s; + + if((alloc = malloc(READSTR)) == nil) + error(Enomem); + + ctlr = edev->ctlr; + qlock(&ctlr->slock); + + p = alloc; + e = p + READSTR; + for(i = 0; i < Nstatistics; i++){ + r = csr32r(ctlr, Statistics+i*4); + if((s = statistics[i]) == nil) + continue; + switch(i){ + case Gorcl: + case Gotcl: + case Torl: + case Totl: + ruvl = r; + ruvl += ((uvlong)csr32r(ctlr, Statistics+(i+1)*4))<<32; + tuvl = ruvl; + tuvl += ctlr->statistics[i]; + tuvl += ((uvlong)ctlr->statistics[i+1])<<32; + if(tuvl == 0) + continue; + ctlr->statistics[i] = tuvl; + ctlr->statistics[i+1] = tuvl>>32; + p = seprint(p, e, "%s: %llud %llud\n", s, tuvl, ruvl); + i++; + break; + + default: + ctlr->statistics[i] += r; + if(ctlr->statistics[i] == 0) + continue; + p = seprint(p, e, "%s: %ud %ud\n", + s, ctlr->statistics[i], r); + break; + } + } + + p = seprint(p, e, "lintr: %ud %ud\n", ctlr->lintr, ctlr->lsleep); + p = seprint(p, e, "rintr: %ud %ud\n", ctlr->rintr, ctlr->rsleep); + p = seprint(p, e, "tintr: %ud %ud\n", ctlr->tintr, ctlr->txdw); + p = seprint(p, e, "ixcs: %ud %ud %ud\n", + ctlr->ixsm, ctlr->ipcs, ctlr->tcpcs); + p = seprint(p, e, "rdtr: %ud\n", ctlr->rdtr); + p = seprint(p, e, "Ctrlext: %08x\n", csr32r(ctlr, Ctrlext)); + + p = seprint(p, e, "eeprom:"); + for(i = 0; i < 0x40; i++){ + if(i && ((i & 0x07) == 0)) + p = seprint(p, e, "\n "); + p = seprint(p, e, " %4.4ux", ctlr->eeprom[i]); + } + p = seprint(p, e, "\n"); + + if(ctlr->mii != nil && ctlr->mii->curphy != nil) + miidumpphy(ctlr->mii, p, e); + + n = readstr(offset, a, n, alloc); + + qunlock(&ctlr->slock); + free(alloc); + + return n; +} + +enum { + CMrdtr, +}; + +static Cmdtab igbectlmsg[] = { + CMrdtr, "rdtr", 2, +}; + +static long +igbectl(Ether* edev, void* buf, long n) +{ + int v; + char *p; + Ctlr *ctlr; + Cmdbuf *cb; + Cmdtab *ct; + + if((ctlr = edev->ctlr) == nil) + error(Enonexist); + + cb = parsecmd(buf, n); + if(waserror()){ + free(cb); + nexterror(); + } + + ct = lookupcmd(cb, igbectlmsg, nelem(igbectlmsg)); + switch(ct->index){ + case CMrdtr: + v = strtol(cb->f[1], &p, 0); + if(v < 0 || p == cb->f[1] || v > 0xFFFF) + error(Ebadarg); + ctlr->rdtr = v; + csr32w(ctlr, Rdtr, Fpd|v); + break; + } + free(cb); + poperror(); + + return n; +} + +static void +igbepromiscuous(void* arg, int on) +{ + int rctl; + Ctlr *ctlr; + Ether *edev; + + edev = arg; + ctlr = edev->ctlr; + + rctl = csr32r(ctlr, Rctl); + rctl &= ~MoMASK; + rctl |= Mo47b36; + if(on) + rctl |= Upe|Mpe; + else + rctl &= ~(Upe|Mpe); + csr32w(ctlr, Rctl, rctl|Mpe); /* temporarily keep Mpe on */ +} + +static void +igbemulticast(void* arg, uchar* addr, int add) +{ + int bit, x; + Ctlr *ctlr; + Ether *edev; + + edev = arg; + ctlr = edev->ctlr; + + x = addr[5]>>1; + bit = ((addr[5] & 1)<<4)|(addr[4]>>4); + /* + * multiple ether addresses can hash to the same filter bit, + * so it's never safe to clear a filter bit. + * if we want to clear filter bits, we need to keep track of + * all the multicast addresses in use, clear all the filter bits, + * then set the ones corresponding to in-use addresses. + */ + if(add) + ctlr->mta[x] |= 1<mta[x] &= ~(1<mta[x]); +} + +static Block* +igberballoc(void) +{ + Block *bp; + + ilock(&igberblock); + if((bp = igberbpool) != nil){ + igberbpool = bp->next; + bp->next = nil; +// ainc(&bp->ref); /* prevent bp from being freed */ + } + iunlock(&igberblock); + + return bp; +} + +static void +igberbfree(Block* bp) +{ + bp->rp = bp->lim - Rbsz; + bp->wp = bp->rp; + bp->flag &= ~(Bpktck|Btcpck|Budpck|Bipck); + + ilock(&igberblock); + bp->next = igberbpool; + igberbpool = bp; + iunlock(&igberblock); +} + +static void +igbeim(Ctlr* ctlr, int im) +{ + ilock(&ctlr->imlock); + ctlr->im |= im; + csr32w(ctlr, Ims, ctlr->im); + iunlock(&ctlr->imlock); +} + +static int +igbelim(void* ctlr) +{ + return ((Ctlr*)ctlr)->lim != 0; +} + +static void +igbelproc(void* arg) +{ + Ctlr *ctlr; + Ether *edev; + MiiPhy *phy; + int ctrl, r; + + edev = arg; + ctlr = edev->ctlr; + for(;;){ + if(ctlr->mii == nil || ctlr->mii->curphy == nil) { + sched(); + continue; + } + + /* + * To do: + * logic to manage status change, + * this is incomplete but should work + * one time to set up the hardware. + * + * MiiPhy.speed, etc. should be in Mii. + */ + if(miistatus(ctlr->mii) < 0) + //continue; + goto enable; + + phy = ctlr->mii->curphy; + ctrl = csr32r(ctlr, Ctrl); + + switch(ctlr->id){ + case i82543gc: + case i82544ei: + case i82544eif: + default: + if(!(ctrl & Asde)){ + ctrl &= ~(SspeedMASK|Ilos|Fd); + ctrl |= Frcdplx|Frcspd; + if(phy->speed == 1000) + ctrl |= Sspeed1000; + else if(phy->speed == 100) + ctrl |= Sspeed100; + if(phy->fd) + ctrl |= Fd; + } + break; + + case i82540em: + case i82540eplp: + case i82547gi: + case i82541gi: + case i82541gi2: + case i82541pi: + break; + } + + /* + * Collision Distance. + */ + r = csr32r(ctlr, Tctl); + r &= ~ColdMASK; + if(phy->fd) + r |= 64<rfc) + ctrl |= Rfce; + if(phy->tfc) + ctrl |= Tfce; + csr32w(ctlr, Ctrl, ctrl); + +enable: + ctlr->lim = 0; + igbeim(ctlr, Lsc); + + ctlr->lsleep++; + sleep(&ctlr->lrendez, igbelim, ctlr); + } +} + +static void +igbetxinit(Ctlr* ctlr) +{ + int i, r; + Block *bp; + + csr32w(ctlr, Tctl, (0x0F<id){ + default: + r = 6; + break; + case i82543gc: + case i82544ei: + case i82544eif: + case i82544gc: + case i82540em: + case i82540eplp: + case i82541ei: + case i82541gi: + case i82541gi2: + case i82541pi: + case i82545em: + case i82545gmc: + case i82546gb: + case i82546eb: + case i82547ei: + case i82547gi: + r = 8; + break; + } + csr32w(ctlr, Tipg, (6<<20)|(8<<10)|r); + csr32w(ctlr, Ait, 0); + csr32w(ctlr, Txdmac, 0); + + csr32w(ctlr, Tdbal, PCIWADDRL(ctlr->tdba)); + csr32w(ctlr, Tdbah, PCIWADDRH(ctlr->tdba)); + csr32w(ctlr, Tdlen, ctlr->ntd*sizeof(Td)); + ctlr->tdh = PREV(0, ctlr->ntd); + csr32w(ctlr, Tdh, 0); + ctlr->tdt = 0; + csr32w(ctlr, Tdt, 0); + + for(i = 0; i < ctlr->ntd; i++){ + if((bp = ctlr->tb[i]) != nil){ + ctlr->tb[i] = nil; + freeb(bp); + } + memset(&ctlr->tdba[i], 0, sizeof(Td)); + } + ctlr->tdfree = ctlr->ntd; + + csr32w(ctlr, Tidv, 128); + r = (4<id){ + default: + break; + case i82540em: + case i82540eplp: + case i82547gi: + case i82545em: + case i82545gmc: + case i82546gb: + case i82546eb: + case i82541gi: + case i82541gi2: + case i82541pi: + r = csr32r(ctlr, Txdctl); + r &= ~WthreshMASK; + r |= Gran|(4<ctlr; + + ilock(&ctlr->tlock); + + /* + * Free any completed packets + */ + tdh = ctlr->tdh; + while(NEXT(tdh, ctlr->ntd) != csr32r(ctlr, Tdh)){ + if((bp = ctlr->tb[tdh]) != nil){ + ctlr->tb[tdh] = nil; + freeb(bp); + } + memset(&ctlr->tdba[tdh], 0, sizeof(Td)); + tdh = NEXT(tdh, ctlr->ntd); + } + ctlr->tdh = tdh; + + /* + * Try to fill the ring back up. + */ + tdt = ctlr->tdt; + while(NEXT(tdt, ctlr->ntd) != tdh){ + if((bp = qget(edev->oq)) == nil) + break; + td = &ctlr->tdba[tdt]; + td->addr[0] = PCIWADDRL(bp->rp); + td->addr[1] = PCIWADDRH(bp->rp); + td->control = ((BLEN(bp) & LenMASK)<control |= Dext|Ifcs|Teop|DtypeDD; + ctlr->tb[tdt] = bp; + tdt = NEXT(tdt, ctlr->ntd); + if(NEXT(tdt, ctlr->ntd) == tdh){ + td->control |= Rs; + ctlr->txdw++; + ctlr->tdt = tdt; + csr32w(ctlr, Tdt, tdt); + igbeim(ctlr, Txdw); + break; + } + ctlr->tdt = tdt; + csr32w(ctlr, Tdt, tdt); + } + + iunlock(&ctlr->tlock); +} + +static void +igbereplenish(Ctlr* ctlr) +{ + Rd *rd; + int rdt; + Block *bp; + + rdt = ctlr->rdt; + while(NEXT(rdt, ctlr->nrd) != ctlr->rdh){ + rd = &ctlr->rdba[rdt]; + if(ctlr->rb[rdt] == nil){ + bp = igberballoc(); + if(bp == nil){ + iprint("#l%d: igbereplenish: no available buffers\n", + ctlr->edev->ctlrno); + break; + } + ctlr->rb[rdt] = bp; + rd->addr[0] = PCIWADDRL(bp->rp); + rd->addr[1] = PCIWADDRH(bp->rp); + } + coherence(); + rd->status = 0; + rdt = NEXT(rdt, ctlr->nrd); + ctlr->rdfree++; + } + ctlr->rdt = rdt; + csr32w(ctlr, Rdt, rdt); +} + +static void +igberxinit(Ctlr* ctlr) +{ + int i; + Block *bp; + + /* temporarily keep Mpe on */ + csr32w(ctlr, Rctl, Dpf|Bsize2048|Bam|RdtmsHALF|Mpe); + + csr32w(ctlr, Rdbal, PCIWADDRL(ctlr->rdba)); + csr32w(ctlr, Rdbah, PCIWADDRH(ctlr->rdba)); + csr32w(ctlr, Rdlen, ctlr->nrd*sizeof(Rd)); + ctlr->rdh = 0; + csr32w(ctlr, Rdh, 0); + ctlr->rdt = 0; + csr32w(ctlr, Rdt, 0); + ctlr->rdtr = 0; + csr32w(ctlr, Rdtr, Fpd|0); + + for(i = 0; i < ctlr->nrd; i++){ + if((bp = ctlr->rb[i]) != nil){ + ctlr->rb[i] = nil; + freeb(bp); + } + } + igbereplenish(ctlr); + + switch(ctlr->id){ + case i82540em: + case i82540eplp: + case i82541gi: + case i82541gi2: + case i82541pi: + case i82545em: + case i82545gmc: + case i82546gb: + case i82546eb: + case i82547gi: + csr32w(ctlr, Radv, 64); + break; + } + csr32w(ctlr, Rxdctl, (8<rim != 0; +} + +static void +igberproc(void* arg) +{ + Rd *rd; + Block *bp; + Ctlr *ctlr; + int r, rdh; + Ether *edev; + + edev = arg; + ctlr = edev->ctlr; + + igberxinit(ctlr); + r = csr32r(ctlr, Rctl); + r |= Ren; + csr32w(ctlr, Rctl, r); + + for(;;){ + ctlr->rim = 0; + igbeim(ctlr, Rxt0|Rxo|Rxdmt0|Rxseq); + ctlr->rsleep++; + sleep(&ctlr->rrendez, igberim, ctlr); + + rdh = ctlr->rdh; + for(;;){ + rd = &ctlr->rdba[rdh]; + + if(!(rd->status & Rdd)) + break; + + /* + * Accept eop packets with no errors. + * With no errors and the Ixsm bit set, + * the descriptor status Tpcs and Ipcs bits give + * an indication of whether the checksums were + * calculated and valid. + */ + if((rd->status & Reop) && rd->errors == 0){ + bp = ctlr->rb[rdh]; + ctlr->rb[rdh] = nil; + bp->wp += rd->length; + bp->next = nil; + if(!(rd->status & Ixsm)){ + ctlr->ixsm++; + if(rd->status & Ipcs){ + /* + * IP checksum calculated + * (and valid as errors == 0). + */ + ctlr->ipcs++; + bp->flag |= Bipck; + } + if(rd->status & Tcpcs){ + /* + * TCP/UDP checksum calculated + * (and valid as errors == 0). + */ + ctlr->tcpcs++; + bp->flag |= Btcpck|Budpck; + } + bp->checksum = rd->checksum; + bp->flag |= Bpktck; + } + etheriq(edev, bp, 1); + } + else if(ctlr->rb[rdh] != nil){ + freeb(ctlr->rb[rdh]); + ctlr->rb[rdh] = nil; + } + + memset(rd, 0, sizeof(Rd)); + coherence(); + ctlr->rdfree--; + rdh = NEXT(rdh, ctlr->nrd); + } + ctlr->rdh = rdh; + + if(ctlr->rdfree < ctlr->nrd/2 || (ctlr->rim & Rxdmt0)) + igbereplenish(ctlr); + } +} + +static void +igbeattach(Ether* edev) +{ + Block *bp; + Ctlr *ctlr; + char name[KNAMELEN]; + + ctlr = edev->ctlr; + ctlr->edev = edev; /* point back to Ether* */ + qlock(&ctlr->alock); + if(ctlr->alloc != nil){ /* already allocated? */ + qunlock(&ctlr->alock); + return; + } + + ctlr->tb = nil; + ctlr->rb = nil; + ctlr->alloc = nil; + ctlr->nrb = 0; + if(waserror()){ + while(ctlr->nrb > 0){ + bp = igberballoc(); + bp->free = nil; + freeb(bp); + ctlr->nrb--; + } + free(ctlr->tb); + ctlr->tb = nil; + free(ctlr->rb); + ctlr->rb = nil; + free(ctlr->alloc); + ctlr->alloc = nil; + qunlock(&ctlr->alock); + nexterror(); + } + + ctlr->nrd = ROUNDUP(Nrd, 8); + ctlr->ntd = ROUNDUP(Ntd, 8); + ctlr->alloc = malloc(ctlr->nrd*sizeof(Rd)+ctlr->ntd*sizeof(Td) + 127); + if(ctlr->alloc == nil) { + print("igbe: can't allocate ctlr->alloc\n"); + error(Enomem); + } + ctlr->rdba = (Rd*)ROUNDUP((uintptr)ctlr->alloc, 128); + ctlr->tdba = (Td*)(ctlr->rdba+ctlr->nrd); + + ctlr->rb = malloc(ctlr->nrd*sizeof(Block*)); + ctlr->tb = malloc(ctlr->ntd*sizeof(Block*)); + if (ctlr->rb == nil || ctlr->tb == nil) { + print("igbe: can't allocate ctlr->rb or ctlr->tb\n"); + error(Enomem); + } + + for(ctlr->nrb = 0; ctlr->nrb < Nrb; ctlr->nrb++){ + if((bp = allocb(Rbsz)) == nil) + break; + bp->free = igberbfree; + freeb(bp); + } + + snprint(name, KNAMELEN, "#l%dlproc", edev->ctlrno); + kproc(name, igbelproc, edev); + + snprint(name, KNAMELEN, "#l%drproc", edev->ctlrno); + kproc(name, igberproc, edev); + + igbetxinit(ctlr); + + qunlock(&ctlr->alock); + poperror(); +} + +static void +igbeinterrupt(Ureg*, void* arg) +{ + Ctlr *ctlr; + Ether *edev; + int icr, im, txdw; + + edev = arg; + ctlr = edev->ctlr; + + ilock(&ctlr->imlock); + csr32w(ctlr, Imc, ~0); + im = ctlr->im; + txdw = 0; + + while((icr = csr32r(ctlr, Icr) & ctlr->im) != 0){ + if(icr & Lsc){ + im &= ~Lsc; + ctlr->lim = icr & Lsc; + wakeup(&ctlr->lrendez); + ctlr->lintr++; + } + if(icr & (Rxt0|Rxo|Rxdmt0|Rxseq)){ + im &= ~(Rxt0|Rxo|Rxdmt0|Rxseq); + ctlr->rim = icr & (Rxt0|Rxo|Rxdmt0|Rxseq); + wakeup(&ctlr->rrendez); + ctlr->rintr++; + } + if(icr & Txdw){ + im &= ~Txdw; + txdw++; + ctlr->tintr++; + } + } + + ctlr->im = im; + csr32w(ctlr, Ims, im); + iunlock(&ctlr->imlock); + + if(txdw) + igbetransmit(edev); +} + +static int +i82543mdior(Ctlr* ctlr, int n) +{ + int ctrl, data, i, r; + + /* + * Read n bits from the Management Data I/O Interface. + */ + ctrl = csr32r(ctlr, Ctrl); + r = (ctrl & ~Mddo)|Mdco; + data = 0; + for(i = n-1; i >= 0; i--){ + if(csr32r(ctlr, Ctrl) & Mdd) + data |= (1<= 0; i--){ + if(bits & (1<ctlr; + + /* + * MII Management Interface Read. + * + * Preamble; + * ST+OP+PHYAD+REGAD; + * TA + 16 data bits. + */ + i82543mdiow(ctlr, 0xFFFFFFFF, 32); + i82543mdiow(ctlr, 0x1800|(pa<<5)|ra, 14); + data = i82543mdior(ctlr, 18); + + if(data & 0x10000) + return -1; + + return data & 0xFFFF; +} + +static int +i82543miimiw(Mii* mii, int pa, int ra, int data) +{ + Ctlr *ctlr; + + ctlr = mii->ctlr; + + /* + * MII Management Interface Write. + * + * Preamble; + * ST+OP+PHYAD+REGAD+TA + 16 data bits; + * Z. + */ + i82543mdiow(ctlr, 0xFFFFFFFF, 32); + data &= 0xFFFF; + data |= (0x05<<(5+5+2+16))|(pa<<(5+2+16))|(ra<<(2+16))|(0x02<<16); + i82543mdiow(ctlr, data, 32); + + return 0; +} + +static int +i82543miirw(Mii* mii, int write, int pa, int ra, int data) +{ + if(write) + return i82543miimiw(mii, pa, ra, data); + + return i82543miimir(mii, pa, ra); +} + +static int +igbemiimir(Mii* mii, int pa, int ra) +{ + Ctlr *ctlr; + int mdic, timo; + + ctlr = mii->ctlr; + + csr32w(ctlr, Mdic, MDIrop|(pa<ctlr; + + data &= MDIdMASK; + csr32w(ctlr, Mdic, MDIwop|(pa<id){ + case i82543gc: + ctrl |= Frcdplx|Frcspd; + csr32w(ctlr, Ctrl, ctrl); + + /* + * The reset pin direction (Mdro) should already + * be set from the EEPROM load. + * If it's not set this configuration is unexpected + * so bail. + */ + r = csr32r(ctlr, Ctrlext); + if(!(r & Mdro)){ + print("igbe: 82543gc Mdro not set\n"); + return nil; + } + csr32w(ctlr, Ctrlext, r); + delay(20); + r = csr32r(ctlr, Ctrlext); + r &= ~Mdr; + csr32w(ctlr, Ctrlext, r); + delay(20); + r = csr32r(ctlr, Ctrlext); + r |= Mdr; + csr32w(ctlr, Ctrlext, r); + delay(20); + + rw = i82543miirw; + break; + case i82544ei: + case i82544eif: + case i82544gc: + case i82540em: + case i82540eplp: + case i82547ei: + case i82547gi: + case i82541ei: + case i82541gi: + case i82541gi2: + case i82541pi: + case i82545em: + case i82545gmc: + case i82546gb: + case i82546eb: + ctrl &= ~(Frcdplx|Frcspd); + csr32w(ctlr, Ctrl, ctrl); + rw = igbemiirw; + break; + default: + return nil; + } + + if((mii = miiattach(ctlr, ~0, rw)) == nil) + return nil; + + /* + * 8254X-specific PHY registers not in 802.3: + * 0x10 PHY specific control + * 0x14 extended PHY specific control + * Set appropriate values then reset the PHY to have + * changes noted. + */ + switch(ctlr->id){ + case i82547gi: + case i82541gi: + case i82541gi2: + case i82541pi: + case i82545em: + case i82545gmc: + case i82546gb: + case i82546eb: + break; + default: + r = miimir(mii, 16); + r |= 0x0800; /* assert CRS on Tx */ + r |= 0x0060; /* auto-crossover all speeds */ + r |= 0x0002; /* polarity reversal enabled */ + miimiw(mii, 16, r); + + r = miimir(mii, 20); + r |= 0x0070; /* +25MHz clock */ + r &= ~0x0F00; + r |= 0x0100; /* 1x downshift */ + miimiw(mii, 20, r); + + miireset(mii); + p = 0; + if(ctlr->txcw & TxcwPs) + p |= AnaP; + if(ctlr->txcw & TxcwAs) + p |= AnaAP; + miiane(mii, ~0, p, ~0); + break; + } + + return mii; +} + +static int +at93c46io(Ctlr* ctlr, char* op, int data) +{ + char *lp, *p; + int i, loop, eecd, r; + + eecd = csr32r(ctlr, Eecd); + + r = 0; + loop = -1; + lp = nil; + for(p = op; *p != '\0'; p++){ + switch(*p){ + default: + return -1; + case ' ': + continue; + case ':': /* start of loop */ + loop = strtol(p+1, &lp, 0)-1; + lp--; + if(p == lp) + loop = 7; + p = lp; + continue; + case ';': /* end of loop */ + if(lp == nil) + return -1; + loop--; + if(loop >= 0) + p = lp; + else + lp = nil; + continue; + case 'C': /* assert clock */ + eecd |= Sk; + break; + case 'c': /* deassert clock */ + eecd &= ~Sk; + break; + case 'D': /* next bit in 'data' byte */ + if(loop < 0) + return -1; + if(data & (1<= 0) + r |= (i<= 0) + return -1; + return r; +} + +static int +at93c46r(Ctlr* ctlr) +{ + ushort sum; + char rop[20]; + int addr, areq, bits, data, eecd, i; + + eecd = csr32r(ctlr, Eecd); + if(eecd & Spi){ + print("igbe: SPI EEPROM access not implemented\n"); + return 0; + } + if(eecd & (Eeszaddr|Eesz256)) + bits = 8; + else + bits = 6; + + sum = 0; + + switch(ctlr->id){ + default: + areq = 0; + break; + case i82540em: + case i82540eplp: + case i82541ei: + case i82541gi: + case i82541gi2: + case i82541pi: + case i82545em: + case i82545gmc: + case i82546gb: + case i82546eb: + case i82547ei: + case i82547gi: + areq = 1; + csr32w(ctlr, Eecd, eecd|Areq); + for(i = 0; i < 1000; i++){ + if((eecd = csr32r(ctlr, Eecd)) & Agnt) + break; + microdelay(5); + } + if(!(eecd & Agnt)){ + print("igbe: not granted EEPROM access\n"); + goto release; + } + break; + } + snprint(rop, sizeof(rop), "S :%dDCc;", bits+3); + + for(addr = 0; addr < 0x40; addr++){ + /* + * Read a word at address 'addr' from the Atmel AT93C46 + * 3-Wire Serial EEPROM or compatible. The EEPROM access is + * controlled by 4 bits in Eecd. See the AT93C46 datasheet + * for protocol details. + */ + if(at93c46io(ctlr, rop, (0x06<eeprom[addr] = data; + sum += data; + } + +release: + if(areq) + csr32w(ctlr, Eecd, eecd & ~Areq); + return sum; +} + +static int +igbedetach(Ctlr* ctlr) +{ + int r, timeo; + + /* + * Perform a device reset to get the chip back to the + * power-on state, followed by an EEPROM reset to read + * the defaults for some internal registers. + */ + csr32w(ctlr, Imc, ~0); + csr32w(ctlr, Rctl, 0); + csr32w(ctlr, Tctl, 0); + + delay(10); + + csr32w(ctlr, Ctrl, Devrst); + delay(1); + for(timeo = 0; timeo < 1000; timeo++){ + if(!(csr32r(ctlr, Ctrl) & Devrst)) + break; + delay(1); + } + if(csr32r(ctlr, Ctrl) & Devrst) + return -1; + r = csr32r(ctlr, Ctrlext); + csr32w(ctlr, Ctrlext, r|Eerst); + delay(1); + for(timeo = 0; timeo < 1000; timeo++){ + if(!(csr32r(ctlr, Ctrlext) & Eerst)) + break; + delay(1); + } + if(csr32r(ctlr, Ctrlext) & Eerst) + return -1; + + switch(ctlr->id){ + default: + break; + case i82540em: + case i82540eplp: + case i82541gi: + case i82541gi2: + case i82541pi: + case i82545em: + case i82545gmc: + case i82547gi: + case i82546gb: + case i82546eb: + r = csr32r(ctlr, Manc); + r &= ~Arpen; + csr32w(ctlr, Manc, r); + break; + } + + csr32w(ctlr, Imc, ~0); + delay(1); + for(timeo = 0; timeo < 1000; timeo++){ + if(!csr32r(ctlr, Icr)) + break; + delay(1); + } + if(csr32r(ctlr, Icr)) + return -1; + + return 0; +} + +static void +igbeshutdown(Ether* ether) +{ + igbedetach(ether->ctlr); +} + +static int +igbereset(Ctlr* ctlr) +{ + int ctrl, i, pause, r, swdpio, txcw; + + if(igbedetach(ctlr)) + return -1; + + /* + * Read the EEPROM, validate the checksum + * then get the device back to a power-on state. + */ + if((r = at93c46r(ctlr)) != 0xBABA){ + print("igbe: bad EEPROM checksum - %#4.4ux\n", r); + return -1; + } + + /* + * Snarf and set up the receive addresses. + * There are 16 addresses. The first should be the MAC address. + * The others are cleared and not marked valid (MS bit of Rah). + */ + if ((ctlr->id == i82546gb || ctlr->id == i82546eb) && + BUSFNO(ctlr->pcidev->tbdf) == 1) + ctlr->eeprom[Ea+2] += 0x100; /* second interface */ + for(i = Ea; i < Eaddrlen/2; i++){ + ctlr->ra[2*i] = ctlr->eeprom[i]; + ctlr->ra[2*i+1] = ctlr->eeprom[i]>>8; + } + /* lan id seems to vary on 82543gc; don't use it */ + if (ctlr->id != i82543gc) { + r = (csr32r(ctlr, Status) & Lanid) >> 2; + ctlr->ra[5] += r; /* ea ctlr[1] = ea ctlr[0]+1 */ + } + + r = (ctlr->ra[3]<<24)|(ctlr->ra[2]<<16)|(ctlr->ra[1]<<8)|ctlr->ra[0]; + csr32w(ctlr, Ral, r); + r = 0x80000000|(ctlr->ra[5]<<8)|ctlr->ra[4]; + csr32w(ctlr, Rah, r); + for(i = 1; i < 16; i++){ + csr32w(ctlr, Ral+i*8, 0); + csr32w(ctlr, Rah+i*8, 0); + } + + /* + * Clear the Multicast Table Array. + * It's a 4096 bit vector accessed as 128 32-bit registers. + */ + memset(ctlr->mta, 0, sizeof(ctlr->mta)); + for(i = 0; i < 128; i++) + csr32w(ctlr, Mta+i*4, 0); + + /* + * Just in case the Eerst didn't load the defaults + * (doesn't appear to fully on the 82543GC), do it manually. + */ + if (ctlr->id == i82543gc) { + txcw = csr32r(ctlr, Txcw); + txcw &= ~(TxcwAne|TxcwPauseMASK|TxcwFd); + ctrl = csr32r(ctlr, Ctrl); + ctrl &= ~(SwdpioloMASK|Frcspd|Ilos|Lrst|Fd); + + if(ctlr->eeprom[Icw1] & 0x0400){ + ctrl |= Fd; + txcw |= TxcwFd; + } + if(ctlr->eeprom[Icw1] & 0x0200) + ctrl |= Lrst; + if(ctlr->eeprom[Icw1] & 0x0010) + ctrl |= Ilos; + if(ctlr->eeprom[Icw1] & 0x0800) + ctrl |= Frcspd; + swdpio = (ctlr->eeprom[Icw1] & 0x01E0)>>5; + ctrl |= swdpio<eeprom[Icw2] & 0x00F0)>>4; + if(ctlr->eeprom[Icw1] & 0x1000) + ctrl |= Ips; + ctrl |= swdpio<eeprom[Icw2] & 0x0800) + txcw |= TxcwAne; + pause = (ctlr->eeprom[Icw2] & 0x3000)>>12; + txcw |= pause<fcrtl = 0x00002000; + ctlr->fcrth = 0x00004000; + txcw |= TxcwAs|TxcwPs; + break; + case 0: + ctlr->fcrtl = 0x00002000; + ctlr->fcrth = 0x00004000; + break; + case 2: + ctlr->fcrtl = 0; + ctlr->fcrth = 0; + txcw |= TxcwAs; + break; + } + ctlr->txcw = txcw; + csr32w(ctlr, Txcw, txcw); + } + + + /* + * Flow control - values from the datasheet. + */ + csr32w(ctlr, Fcal, 0x00C28001); + csr32w(ctlr, Fcah, 0x00000100); + csr32w(ctlr, Fct, 0x00008808); + csr32w(ctlr, Fcttv, 0x00000100); + + csr32w(ctlr, Fcrtl, ctlr->fcrtl); + csr32w(ctlr, Fcrth, ctlr->fcrth); + + if((ctlr->mii = igbemii(ctlr)) == nil) + return -1; + + return 0; +} + +static void +igbepci(void) +{ + Pcidev *p; + Ctlr *ctlr; + void *mem; + + p = nil; + while(p = pcimatch(p, 0, 0)){ + if(p->ccrb != 0x02 || p->ccru != 0) + continue; + + switch((p->did<<16)|p->vid){ + default: + continue; + case i82543gc: + case i82544ei: + case i82544eif: + case i82544gc: + case i82547ei: + case i82547gi: + case i82540em: + case i82540eplp: + case i82541ei: + case i82541gi: + case i82541gi2: + case i82541pi: + case i82545em: + case i82545gmc: + case i82546gb: + case i82546eb: + break; + } + + mem = vmap(p->mem[0].bar & ~0x0F, p->mem[0].size); + if(mem == nil){ + print("igbe: can't map %#8.8lux\n", p->mem[0].bar); + continue; + } + switch(p->cls){ + default: + print("igbe: p->cls %#ux, setting to 0x10\n", p->cls); + p->cls = 0x10; + pcicfgw8(p, PciCLS, p->cls); + break; + case 0x08: + case 0x10: + break; + } + ctlr = malloc(sizeof(Ctlr)); + if(ctlr == nil) { + vunmap(mem, p->mem[0].size); + error(Enomem); + } + ctlr->port = p->mem[0].bar & ~0x0F; + ctlr->pcidev = p; + ctlr->id = (p->did<<16)|p->vid; + ctlr->cls = p->cls*4; + ctlr->nic = mem; + + if(igbereset(ctlr)){ + free(ctlr); + vunmap(mem, p->mem[0].size); + continue; + } + pcisetbme(p); + + if(igbectlrhead != nil) + igbectlrtail->next = ctlr; + else + igbectlrhead = ctlr; + igbectlrtail = ctlr; + } +} + +static int +igbepnp(Ether* edev) +{ + Ctlr *ctlr; + + if(igbectlrhead == nil) + igbepci(); + + /* + * Any adapter matches if no edev->port is supplied, + * otherwise the ports must match. + */ + for(ctlr = igbectlrhead; ctlr != nil; ctlr = ctlr->next){ + if(ctlr->active) + continue; + if(edev->port == 0 || edev->port == ctlr->port){ + ctlr->active = 1; + break; + } + } + if(ctlr == nil) + return -1; + + edev->ctlr = ctlr; + edev->port = ctlr->port; + edev->irq = ctlr->pcidev->intl; + edev->tbdf = ctlr->pcidev->tbdf; + edev->mbps = 1000; + memmove(edev->ea, ctlr->ra, Eaddrlen); + + /* + * Linkage to the generic ethernet driver. + */ + edev->attach = igbeattach; + edev->transmit = igbetransmit; + edev->interrupt = igbeinterrupt; + edev->ifstat = igbeifstat; + edev->ctl = igbectl; + + edev->arg = edev; + edev->promiscuous = igbepromiscuous; + edev->shutdown = igbeshutdown; + edev->multicast = igbemulticast; + + return 0; +} + +void +etherigbelink(void) +{ + addethercard("i82543", igbepnp); + addethercard("igbe", igbepnp); +} diff -Nru /sys/src/9k/386/etherm10g.c /sys/src/9k/386/etherm10g.c --- /sys/src/9k/386/etherm10g.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/etherm10g.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1602 @@ +/* + * myricom 10 Gb ethernet driver + * © 2007 erik quanstrom, coraid + * + * the card is big endian. + * we use u64int rather than uintptr to hold addresses so that + * we don't get "warning: stupid shift" on 32-bit architectures. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/netif.h" + +#include "etherif.h" +#include "io.h" + +#ifndef KiB +#define KiB 1024u /* Kibi 0x0000000000000400 */ +#define MiB 1048576u /* Mebi 0x0000000000100000 */ +#endif /* KiB */ + +#define dprint(...) if(debug) print(__VA_ARGS__) +#define pcicapdbg(...) +#define malign(n) mallocalign((n), 4*KiB, 0, 0) + +#include "etherm10g2k.i" +#include "etherm10g4k.i" + +static int debug = 0; +static char Etimeout[] = "timeout"; + +enum { + Epromsz = 256, + Maxslots= 1024, + Align = 4096, + Maxmtu = 9000, + Noconf = 0xffffffff, + + Fwoffset= 1*MiB, + Cmdoff = 0xf80000, /* command port offset */ + Fwsubmt = 0xfc0000, /* firmware submission command port offset */ + Rdmaoff = 0xfc01c0, /* rdma command port offset */ +}; + +enum { + CZero, + Creset, + Cversion, + + CSintrqdma, /* issue these before Cetherup */ + CSbigsz, /* in bytes bigsize = 2^n */ + CSsmallsz, + + CGsendoff, + CGsmallrxoff, + CGbigrxoff, + CGirqackoff, + CGirqdeassoff, + CGsendrgsz, + CGrxrgsz, + + CSintrqsz, /* 2^n */ + Cetherup, /* above parameters + mtu/mac addr must be set first. */ + Cetherdn, + + CSmtu, /* below may be issued live */ + CGcoaloff, /* in µs */ + CSstatsrate, /* in µs */ + CSstatsdma, + + Cpromisc, + Cnopromisc, + CSmac, + + Cenablefc, + Cdisablefc, + + Cdmatest, /* address in d[0-1], d[2]=length */ + + Cenableallmc, + Cdisableallmc, + + CSjoinmc, + CSleavemc, + Cleaveallmc, + + CSstatsdma2, /* adds (unused) multicast stats */ +}; + +typedef union { + uint i[2]; + uchar c[8]; +} Cmd; + +typedef ulong Slot; +typedef struct { + u16int cksum; + u16int len; +} Slotparts; + +enum { + SFsmall = 1, + SFfirst = 2, + SFalign = 4, + SFnotso = 16, +}; + +typedef struct { + u32int high; + u32int low; + u16int hdroff; + u16int len; + uchar pad; + uchar nrdma; + uchar chkoff; + uchar flags; +} Send; + +typedef struct { + QLock; + Send *lanai; /* tx ring (cksum+len in lanai memory) */ + Send *host; /* tx ring (data in our memory) */ + Block **bring; +// uchar *wcfifo; /* what the heck is a w/c fifo? */ + int size; /* of buffers in the z8's memory */ + u32int segsz; + uint n; /* rxslots */ + uint m; /* mask; rxslots must be a power of two */ + uint i; /* number of segments (not frames) queued */ + uint cnt; /* number of segments sent by the card */ + + ulong npkt; + vlong nbytes; +} Tx; + +typedef struct Bpool Bpool; +struct Bpool { + Lock; + Block *head; + uint size; /* buffer size of each block */ + uint n; /* n free buffers */ + uint cnt; +}; + +static Bpool smpool = { .size = 128, }; +static Bpool bgpool = { .size = Maxmtu, }; + +typedef struct { + Bpool *pool; /* free buffers */ + u32int *lanai; /* rx ring; we have no permanent host shadow */ + Block **host; /* called "info" in myricom driver */ +// uchar *wcfifo; /* cmd submission fifo */ + uint m; + uint n; /* rxslots */ + uint i; + uint cnt; /* number of buffers allocated (lifetime) */ + uint allocfail; +} Rx; + +/* dma mapped. unix network byte order. */ +typedef struct { + uchar txcnt[4]; + uchar linkstat[4]; + uchar dlink[4]; + uchar derror[4]; + uchar drunt[4]; + uchar doverrun[4]; + uchar dnosm[4]; + uchar dnobg[4]; + uchar nrdma[4]; + uchar txstopped; + uchar down; + uchar updated; + uchar valid; +} Stats; + +enum { + Detached, + Attached, + Runed, +}; + +typedef struct { + Slot *entry; + u64int busaddr; + uint m; + uint n; + uint i; +} Done; + +typedef struct Ctlr Ctlr; +typedef struct Ctlr { + QLock; + int state; + int kprocs; + u64int port; + Pcidev* pcidev; + Ctlr* next; + int active; + int id; /* do we need this? */ + + uchar ra[Eaddrlen]; + + int ramsz; + uchar *ram; + + u32int *irqack; + u32int *irqdeass; + u32int *coal; + + char eprom[Epromsz]; + ulong serial; /* unit serial number */ + + QLock cmdl; + Cmd *cmd; /* address of command return */ + u64int cprt; /* bus address of command */ + + u64int boot; /* boot address */ + + Done done; + Tx tx; + Rx sm; + Rx bg; + Stats *stats; + u64int statsprt; + + Rendez rxrendez; + Rendez txrendez; + + int msi; + u32int linkstat; + u32int nrdma; +} Ctlr; + +static Ctlr *ctlrs; + +enum { + PcieAERC = 1, + PcieVC, + PcieSNC, + PciePBC, +}; + +enum { + AercCCR = 0x18, /* control register */ +}; + +enum { + PcieCTL = 8, + PcieLCR = 12, + PcieMRD = 0x7000, /* maximum read size */ +}; + +/* + * this function doesn't work because pcicgr32 doesn't have access + * to the pcie extended configuration space. + */ +static int +pciecap(Pcidev *p, int cap) +{ + uint off, i; + + off = 0x100; + while(((i = pcicfgr32(p, off))&0xffff) != cap){ + off = i >> 20; + print("pciecap offset = %ud\n", off); + if(off < 0x100 || off >= 4*KiB - 1) + return 0; + } + print("pciecap found = %ud\n", off); + return off; +} + +static int +setpcie(Pcidev *p) +{ + int off; + + /* set 4k writes */ + off = pcicap(p, PciCapPCIe); + if(off < 64) + return -1; + off += PcieCTL; + pcicfgw16(p, off, (pcicfgr16(p, off) & ~PcieMRD) | 5<<12); + return 0; +} + +static int +whichfw(Pcidev *p) +{ + char *s; + int i, off, lanes, ecrc; + u32int cap; + + /* check the number of configured lanes. */ + off = pcicap(p, PciCapPCIe); + if(off < 64) + return -1; + off += PcieLCR; + cap = pcicfgr16(p, off); + lanes = (cap>>4) & 0x3f; + + /* check AERC register. we need it on. */ + off = pciecap(p, PcieAERC); + print("%d offset\n", off); + cap = 0; + if(off != 0){ + off += AercCCR; + cap = pcicfgr32(p, off); + print("%ud cap\n", cap); + } + ecrc = (cap>>4) & 0xf; + /* if we don't like the aerc, kick it here. */ + + print("m10g %d lanes; ecrc=%d; ", lanes, ecrc); + if(s = getconf("myriforce")){ + i = atoi(s); + if(i != 4*KiB || i != 2*KiB) + i = 2*KiB; + print("fw=%d [forced]\n", i); + return i; + } + if(lanes <= 4){ + print("fw = 4096 [lanes]\n"); + return 4*KiB; + } + if(ecrc & 10){ + print("fw = 4096 [ecrc set]\n"); + return 4*KiB; + } + print("fw = 4096 [default]\n"); + return 4*KiB; +} + +static int +parseeprom(Ctlr *c) +{ + int i, j, k, l, bits; + char *s; + + dprint("m10g eprom:\n"); + s = c->eprom; + bits = 3; + for(i = 0; s[i] && i < Epromsz; i++){ + l = strlen(s+i); + dprint("\t%s\n", s+i); + if(strncmp(s+i, "MAC=", 4) == 0 && l == 4+12+5){ + bits ^= 1; + j = i + 4; + for(k = 0; k < 6; k++) + c->ra[k] = strtoul(s+j+3*k, 0, 16); + }else if(strncmp(s+i, "SN=", 3) == 0){ + bits ^= 2; + c->serial = atoi(s+i+3); + } + i += l; + } + if(bits) + return -1; + return 0; +} + +static u16int +pbit16(u16int i) +{ + u16int j; + uchar *p; + + p = (uchar*)&j; + p[1] = i; + p[0] = i>>8; + return j; +} + +static u16int +gbit16(uchar i[2]) +{ + u16int j; + + j = i[1]; + j |= i[0]<<8; + return j; +} + +static u32int +pbit32(u32int i) +{ + u32int j; + uchar *p; + + p = (uchar*)&j; + p[3] = i; + p[2] = i>>8; + p[1] = i>>16; + p[0] = i>>24; + return j; +} + +static u32int +gbit32(uchar i[4]) +{ + u32int j; + + j = i[3]; + j |= i[2]<<8; + j |= i[1]<<16; + j |= i[0]<<24; + return j; +} + +static void +prepcmd(uint *cmd, int i) +{ + while(i-- > 0) + cmd[i] = pbit32(cmd[i]); +} + +/* + * the command looks like this (int 32bit integers) + * cmd type + * addr (low) + * addr (high) + * pad (used for dma testing) + * response (high) + * response (low) + * 40 byte = 5 int pad. + */ + +u32int +cmd(Ctlr *c, int type, u64int data) +{ + u32int buf[16], i; + Cmd *cmd; + + qlock(&c->cmdl); + cmd = c->cmd; + cmd->i[1] = Noconf; + memset(buf, 0, sizeof buf); + buf[0] = type; + buf[1] = data; + buf[2] = data >> 32; + buf[4] = c->cprt >> 32; + buf[5] = c->cprt; + prepcmd(buf, 6); + coherence(); + memmove(c->ram + Cmdoff, buf, sizeof buf); + + if(waserror()) + nexterror(); + for(i = 0; i < 15; i++){ + if(cmd->i[1] != Noconf){ + poperror(); + i = gbit32(cmd->c); + qunlock(&c->cmdl); + if(cmd->i[1] != 0) + dprint("[%ux]", i); + return i; + } + tsleep(&up->sleep, return0, 0, 1); + } + qunlock(&c->cmdl); + iprint("m10g: cmd timeout [%ux %ux] cmd=%d\n", + cmd->i[0], cmd->i[1], type); + error(Etimeout); + return ~0; /* silence! */ +} + +u32int +maccmd(Ctlr *c, int type, uchar *m) +{ + u32int buf[16], i; + Cmd *cmd; + + qlock(&c->cmdl); + cmd = c->cmd; + cmd->i[1] = Noconf; + memset(buf, 0, sizeof buf); + buf[0] = type; + buf[1] = m[0]<<24 | m[1]<<16 | m[2]<<8 | m[3]; + buf[2] = m[4]<< 8 | m[5]; + buf[4] = c->cprt >> 32; + buf[5] = c->cprt; + prepcmd(buf, 6); + coherence(); + memmove(c->ram + Cmdoff, buf, sizeof buf); + + if(waserror()) + nexterror(); + for(i = 0; i < 15; i++){ + if(cmd->i[1] != Noconf){ + poperror(); + i = gbit32(cmd->c); + qunlock(&c->cmdl); + if(cmd->i[1] != 0) + dprint("[%ux]", i); + return i; + } + tsleep(&up->sleep, return0, 0, 1); + } + qunlock(&c->cmdl); + iprint("m10g: maccmd timeout [%ux %ux] cmd=%d\n", + cmd->i[0], cmd->i[1], type); + error(Etimeout); + return ~0; /* silence! */ +} + +/* remove this garbage after testing */ +enum { + DMAread = 0x10000, + DMAwrite= 0x1, +}; + +u32int +dmatestcmd(Ctlr *c, int type, u64int addr, int len) +{ + u32int buf[16], i; + + memset(buf, 0, sizeof buf); + memset(c->cmd, Noconf, sizeof *c->cmd); + buf[0] = Cdmatest; + buf[1] = addr; + buf[2] = addr >> 32; + buf[3] = len * type; + buf[4] = c->cprt >> 32; + buf[5] = c->cprt; + prepcmd(buf, 6); + coherence(); + memmove(c->ram + Cmdoff, buf, sizeof buf); + + if(waserror()) + nexterror(); + for(i = 0; i < 15; i++){ + if(c->cmd->i[1] != Noconf){ + i = gbit32(c->cmd->c); + if(i == 0) + error(Eio); + poperror(); + return i; + } + tsleep(&up->sleep, return0, 0, 5); + } + error(Etimeout); + return ~0; /* silence! */ +} + +u32int +rdmacmd(Ctlr *c, int on) +{ + u32int buf[16], i; + + memset(buf, 0, sizeof buf); + c->cmd->i[0] = 0; + coherence(); + buf[0] = c->cprt >> 32; + buf[1] = c->cprt; + buf[2] = Noconf; + buf[3] = c->cprt >> 32; + buf[4] = c->cprt; + buf[5] = on; + prepcmd(buf, 6); + memmove(c->ram + Rdmaoff, buf, sizeof buf); + + if(waserror()) + nexterror(); + for(i = 0; i < 20; i++){ + if(c->cmd->i[0] == Noconf){ + poperror(); + return gbit32(c->cmd->c); + } + tsleep(&up->sleep, return0, 0, 1); + } + error(Etimeout); + iprint("m10g: rdmacmd timeout\n"); + return ~0; /* silence! */ +} + +static int +loadfw(Ctlr *c, int *align) +{ + uint *f, *s, sz; + int i; + + if((*align = whichfw(c->pcidev)) == 4*KiB){ + f = (u32int*)fw4k; + sz = sizeof fw4k; + }else{ + f = (u32int*)fw2k; + sz = sizeof fw2k; + } + + s = (u32int*)(c->ram + Fwoffset); + for(i = 0; i < sz / 4; i++) + s[i] = f[i]; + return sz & ~3; +} + +static int +bootfw(Ctlr *c) +{ + int i, sz, align; + uint buf[16]; + Cmd* cmd; + + if((sz = loadfw(c, &align)) == 0) + return 0; + dprint("bootfw %d bytes ... ", sz); + cmd = c->cmd; + + memset(buf, 0, sizeof buf); + c->cmd->i[0] = 0; + coherence(); + buf[0] = c->cprt >> 32; /* upper dma target address */ + buf[1] = c->cprt; /* lower */ + buf[2] = Noconf; /* writeback */ + buf[3] = Fwoffset + 8, + buf[4] = sz - 8; + buf[5] = 8; + buf[6] = 0; + prepcmd(buf, 7); + coherence(); + memmove(c->ram + Fwsubmt, buf, sizeof buf); + + for(i = 0; i < 20; i++){ + if(cmd->i[0] == Noconf) + break; + delay(1); + } + dprint("[%ux %ux]", gbit32(cmd->c), gbit32(cmd->c+4)); + if(i == 20){ + print("m10g: cannot load fw\n"); + return -1; + } + dprint("\n"); + c->tx.segsz = align; + return 0; +} + +static int +kickthebaby(Pcidev *p, Ctlr *c) +{ + /* don't kick the baby! */ + u32int code; + + pcicfgw8(p, 0x10 + c->boot, 0x3); + pcicfgw32(p, 0x18 + c->boot, 0xfffffff0); + code = pcicfgr32(p, 0x14 + c->boot); + + dprint("reboot status = %ux\n", code); + if(code != 0xfffffff0) + return -1; + return 0; +} + +typedef struct { + uchar len[4]; + uchar type[4]; + char version[128]; + uchar globals[4]; + uchar ramsz[4]; + uchar specs[4]; + uchar specssz[4]; +} Fwhdr; + +enum { + Tmx = 0x4d582020, + Tpcie = 0x70636965, + Teth = 0x45544820, + Tmcp0 = 0x4d435030, +}; + +static char * +fwtype(u32int type) +{ + switch(type){ + case Tmx: + return "mx"; + case Tpcie: + return "PCIe"; + case Teth: + return "eth"; + case Tmcp0: + return "mcp0"; + } + return "*GOK*"; +} + +static int +chkfw(Ctlr *c) +{ + uintptr off; + Fwhdr *h; + u32int type; + + off = gbit32(c->ram+0x3c); + dprint("firmware %llux\n", (u64int)off); + if((off&3) || off + sizeof *h > c->ramsz){ + print("!m10g: bad firmware %llux\n", (u64int)off); + return -1; + } + h = (Fwhdr*)(c->ram + off); + type = gbit32(h->type); + dprint("\t" "type %s\n", fwtype(type)); + dprint("\t" "vers %s\n", h->version); + dprint("\t" "ramsz %ux\n", gbit32(h->ramsz)); + if(type != Teth){ + print("!m10g: bad card type %s\n", fwtype(type)); + return -1; + } + + return bootfw(c) || rdmacmd(c, 0); +} + +static int +reset(Ether *e, Ctlr *c) +{ + u32int i, sz; + + if(waserror()){ + print("m10g: reset error\n"); + nexterror(); + return -1; + } + + chkfw(c); + cmd(c, Creset, 0); + + cmd(c, CSintrqsz, c->done.n * sizeof *c->done.entry); + cmd(c, CSintrqdma, c->done.busaddr); + c->irqack = (u32int*)(c->ram + cmd(c, CGirqackoff, 0)); + /* required only if we're not doing msi? */ + c->irqdeass = (u32int*)(c->ram + cmd(c, CGirqdeassoff, 0)); + /* this is the driver default, why fiddle with this? */ + c->coal = (u32int*)(c->ram + cmd(c, CGcoaloff, 0)); + *c->coal = pbit32(25); + + dprint("dma stats:\n"); + rdmacmd(c, 1); + sz = c->tx.segsz; + i = dmatestcmd(c, DMAread, c->done.busaddr, sz); + print("\t" "read: %ud MB/s\n", ((i>>16)*sz*2)/(i&0xffff)); + i = dmatestcmd(c, DMAwrite, c->done.busaddr, sz); + print("\t" "write: %ud MB/s\n", ((i>>16)*sz*2)/(i&0xffff)); + i = dmatestcmd(c, DMAwrite|DMAread, c->done.busaddr, sz); + print("\t" "r/w: %ud MB/s\n", ((i>>16)*sz*2*2)/(i&0xffff)); + memset(c->done.entry, 0, c->done.n * sizeof *c->done.entry); + + maccmd(c, CSmac, c->ra); +// cmd(c, Cnopromisc, 0); + cmd(c, Cenablefc, 0); + e->maxmtu = Maxmtu; + cmd(c, CSmtu, e->maxmtu); + dprint("CSmtu %d...\n", e->maxmtu); + + poperror(); + return 0; +} + +static void +ctlrfree(Ctlr *c) +{ + /* free up all the Block*s, too */ + free(c->tx.host); + free(c->sm.host); + free(c->bg.host); + free(c->cmd); + free(c->done.entry); + free(c->stats); + free(c); +} + +static int +setmem(Pcidev *p, Ctlr *c) +{ + u32int i; + u64int raddr; + Done *d; + void *mem; + + c->tx.segsz = 2048; + c->ramsz = 2*MiB - (2*48*KiB + 32*KiB) - 0x100; + if(c->ramsz > p->mem[0].size) + return -1; + + raddr = p->mem[0].bar & ~0x0F; + mem = vmap(raddr, p->mem[0].size); + if(mem == nil){ + print("m10g: can't map %8.8lux\n", p->mem[0].bar); + return -1; + } + dprint("%llux <- vmap(mem[0].size = %ux)\n", raddr, p->mem[0].size); + c->port = raddr; + c->ram = mem; + c->cmd = malign(sizeof *c->cmd); + c->cprt = PCIWADDR64(c->cmd); + + d = &c->done; + d->n = Maxslots; + d->m = d->n - 1; + i = d->n * sizeof *d->entry; + d->entry = malign(i); + memset(d->entry, 0, i); + d->busaddr = PCIWADDR64(d->entry); + + c->stats = malign(sizeof *c->stats); + memset(c->stats, 0, sizeof *c->stats); + c->statsprt = PCIWADDR64(c->stats); + + memmove(c->eprom, c->ram + c->ramsz - Epromsz, Epromsz-2); + return setpcie(p) || parseeprom(c); +} + +static Rx* +whichrx(Ctlr *c, int sz) +{ + if(sz <= smpool.size) + return &c->sm; + return &c->bg; +} + +static Block* +balloc(Rx* rx) +{ + Block *b; + + ilock(rx->pool); + if((b = rx->pool->head) != nil){ + rx->pool->head = b->next; + b->next = nil; + rx->pool->n--; + } + iunlock(rx->pool); + return b; +} + +static void +smbfree(Block *b) +{ + Bpool *p; + + b->rp = b->wp = (uchar*)ROUNDUP((uintptr)b->base, 4*KiB); + b->flag &= ~(Bpktck|Btcpck|Budpck|Bipck); + + p = &smpool; + ilock(p); + b->next = p->head; + p->head = b; + p->n++; + p->cnt++; + iunlock(p); +} + +static void +bgbfree(Block *b) +{ + Bpool *p; + + b->rp = b->wp = (uchar*)ROUNDUP((uintptr)b->base, 4*KiB); + b->flag &= ~(Bpktck|Btcpck|Budpck|Bipck); + + p = &bgpool; + ilock(p); + b->next = p->head; + p->head = b; + p->n++; + p->cnt++; + iunlock(p); +} + +static void +replenish(Rx *rx) +{ + u32int buf[16], i, idx, e; + Bpool *p; + Block *b; + + p = rx->pool; + if(p->n < 8) + return; + memset(buf, 0, sizeof buf); + e = (rx->i - rx->cnt) & ~7; + e += rx->n; + while(p->n >= 8 && e){ + idx = rx->cnt & rx->m; + for(i = 0; i < 8; i++){ + b = balloc(rx); + buf[i*2] = pbit32(PCIWADDRH(b->wp)); + buf[i*2+1] = pbit32(PCIWADDRL(b->wp)); + rx->host[idx+i] = b; + assert(b); + } + memmove(rx->lanai + 2*idx, buf, sizeof buf); + coherence(); + rx->cnt += 8; + e -= 8; + } + if(e && p->n > 7+1) + print("should panic? pool->n = %d\n", p->n); +} + +/* + * future: + * if (c->mtrr >= 0) { + * c->tx.wcfifo = c->ram+0x200000; + * c->sm.wcfifo = c->ram+0x300000; + * c->bg.wcfifo = c->ram+0x340000; + * } + */ + +static int +nextpow(int j) +{ + int i; + + for(i = 0; j > (1 << i); i++) + ; + return 1 << i; +} + +static void* +emalign(int sz) +{ + void *v; + + v = malign(sz); + if(v == nil) + error(Enomem); + memset(v, 0, sz); + return v; +} + +static void +open0(Ether *e, Ctlr *c) +{ + Block *b; + int i, sz, entries; + + entries = cmd(c, CGsendrgsz, 0) / sizeof *c->tx.lanai; + c->tx.lanai = (Send*)(c->ram + cmd(c, CGsendoff, 0)); + c->tx.host = emalign(entries * sizeof *c->tx.host); + c->tx.bring = emalign(entries * sizeof *c->tx.bring); + c->tx.n = entries; + c->tx.m = entries-1; + + entries = cmd(c, CGrxrgsz, 0)/8; + c->sm.pool = &smpool; + cmd(c, CSsmallsz, c->sm.pool->size); + c->sm.lanai = (u32int*)(c->ram + cmd(c, CGsmallrxoff, 0)); + c->sm.n = entries; + c->sm.m = entries-1; + c->sm.host = emalign(entries * sizeof *c->sm.host); + + c->bg.pool = &bgpool; + c->bg.pool->size = nextpow(2 + e->maxmtu); /* 2-byte alignment pad */ + cmd(c, CSbigsz, c->bg.pool->size); + c->bg.lanai = (u32int*)(c->ram + cmd(c, CGbigrxoff, 0)); + c->bg.n = entries; + c->bg.m = entries-1; + c->bg.host = emalign(entries * sizeof *c->bg.host); + + sz = c->sm.pool->size + 4*KiB; + for(i = 0; i < c->sm.n; i++){ + if((b = allocb(sz)) == 0) + break; + b->free = smbfree; + freeb(b); + } + sz = c->bg.pool->size + 4*KiB; + for(i = 0; i < c->bg.n; i++){ + if((b = allocb(sz)) == 0) + break; + b->free = bgbfree; + freeb(b); + } + + cmd(c, CSstatsdma, c->statsprt); + c->linkstat = ~0; + c->nrdma = 15; + + cmd(c, Cetherup, 0); +} + +static Block* +nextblock(Ctlr *c) +{ + uint i; + u16int l, k; + Block *b; + Done *d; + Rx *rx; + Slot *s; + Slotparts *sp; + + d = &c->done; + s = d->entry; + i = d->i & d->m; + sp = (Slotparts *)(s + i); + l = sp->len; + if(l == 0) + return 0; + k = sp->cksum; + s[i] = 0; + d->i++; + l = gbit16((uchar*)&l); +//dprint("nextb: i=%d l=%d\n", d->i, l); + rx = whichrx(c, l); + if(rx->i >= rx->cnt){ + iprint("m10g: overrun\n"); + return 0; + } + i = rx->i & rx->m; + b = rx->host[i]; + rx->host[i] = 0; + if(b == 0){ + iprint("m10g: error rx to no block. memory is hosed.\n"); + return 0; + } + rx->i++; + + b->flag |= Bipck|Btcpck|Budpck; + b->checksum = k; + b->rp += 2; + b->wp += 2+l; + b->lim = b->wp; /* lie like a dog. */ + return b; +} + +static int +rxcansleep(void *v) +{ + Ctlr *c; + Slot *s; + Slotparts *sp; + Done *d; + + c = v; + d = &c->done; + s = c->done.entry; + sp = (Slotparts *)(s + (d->i & d->m)); + if(sp->len != 0) + return -1; + c->irqack[0] = pbit32(3); + return 0; +} + +static void +m10rx(void *v) +{ + Ether *e; + Ctlr *c; + Block *b; + + e = v; + c = e->ctlr; + for(;;){ + replenish(&c->sm); + replenish(&c->bg); + sleep(&c->rxrendez, rxcansleep, c); + while(b = nextblock(c)) + etheriq(e, b, 1); + } +} + +static void +txcleanup(Tx *tx, u32int n) +{ + Block *b; + uint j, l, m; + + if(tx->npkt == n) + return; + l = 0; + m = tx->m; + /* + * if tx->cnt == tx->i, yet tx->npkt == n-1, we just + * caught ourselves and myricom card updating. + */ + for(;; tx->cnt++){ + j = tx->cnt & tx->m; + if(b = tx->bring[j]){ + tx->bring[j] = 0; + tx->nbytes += BLEN(b); + freeb(b); + if(++tx->npkt == n) + return; + } + if(tx->cnt == tx->i) + return; + if(l++ == m){ + iprint("tx ovrun: %ud %uld\n", n, tx->npkt); + return; + } + } +} + +static int +txcansleep(void *v) +{ + Ctlr *c; + + c = v; + if(c->tx.cnt != c->tx.i && c->tx.npkt != gbit32(c->stats->txcnt)) + return -1; + return 0; +} + +static void +txproc(void *v) +{ + Ether *e; + Ctlr *c; + Tx *tx; + + e = v; + c = e->ctlr; + tx = &c->tx; + for(;;){ + sleep(&c->txrendez, txcansleep, c); + txcleanup(tx, gbit32(c->stats->txcnt)); + } +} + +static void +submittx(Tx *tx, int n) +{ + Send *l, *h; + int i0, i, m; + + m = tx->m; + i0 = tx->i & m; + l = tx->lanai; + h = tx->host; + for(i = n-1; i >= 0; i--) + memmove(l+(i + i0 & m), h+(i + i0 & m), sizeof *h); + tx->i += n; +// coherence(); +} + +static int +nsegments(Block *b, uint segsz) +{ + uintmem bus, end, slen, len; + int i; + + bus = PCIWADDR64(b->rp); + i = 0; + for(len = BLEN(b); len; len -= slen){ + end = bus + segsz & ~(uintmem)(segsz-1); + slen = end - bus; + if(slen > len) + slen = len; + bus += slen; + i++; + } + return i; +} + +static void +m10gtransmit(Ether *e) +{ + u16int slen; + u32int i, cnt, rdma, nseg, count, len; + u64int bus, end, segsz; + uchar flags; + Block *b; + Ctlr *c; + Send *s, *s0, *s0m8; + Tx *tx; + + c = e->ctlr; + tx = &c->tx; + segsz = tx->segsz; + + qlock(tx); + count = 0; + s = tx->host + (tx->i & tx->m); + cnt = tx->cnt; + s0 = tx->host + (cnt & tx->m); + s0m8 = tx->host + ((cnt - 8) & tx->m); + i = tx->i; + for(; s >= s0 || s < s0m8; i += nseg){ + if((b = qget(e->oq)) == nil) + break; + flags = SFfirst|SFnotso; + if((len = BLEN(b)) < 1520) + flags |= SFsmall; + rdma = nseg = nsegments(b, segsz); + bus = PCIWADDR64(b->rp); + for(; len; len -= slen){ + end = bus + segsz & ~(segsz-1); + slen = end - bus; + if(slen > len) + slen = len; + s->high = pbit32(bus>>32); + s->low = pbit32(bus); + s->len = pbit16(slen); + s->nrdma = rdma; + s->flags = flags; + + bus += slen; + if(++s == tx->host + tx->n) + s = tx->host; + count++; + flags &= ~SFfirst; + rdma = 1; + } + tx->bring[i + nseg - 1 & tx->m] = b; + if(1 || count > 0){ + submittx(tx, count); + count = 0; + cnt = tx->cnt; + s0 = tx->host + (cnt & tx->m); + s0m8 = tx->host + ((cnt - 8) & tx->m); + } + } + qunlock(tx); +} + +static void +checkstats(Ether *e, Ctlr *c, Stats *s) +{ + u32int i; + + if(s->updated == 0) + return; + + i = gbit32(s->linkstat); + if(c->linkstat != i){ + e->link = i; + if(c->linkstat = i) + dprint("m10g: link up\n"); + else + dprint("m10g: link down\n"); + } + i = gbit32(s->nrdma); + if(i != c->nrdma){ + dprint("m10g: rdma timeout %d\n", i); + c->nrdma = i; + } +} + +static void +waitintx(Ctlr *c) +{ + int i; + + for(i = 0; i < 1024*1024; i++){ + if(c->stats->valid == 0) + break; + coherence(); + } +} + +static void +m10ginterrupt(Ureg *, void *v) +{ + Ether *e; + Ctlr *c; + + e = v; + c = e->ctlr; + + if(c->state != Runed || c->stats->valid == 0) /* not ready for us? */ + return; + + if(c->stats->valid & 1) + wakeup(&c->rxrendez); + if(gbit32(c->stats->txcnt) != c->tx.npkt) + wakeup(&c->txrendez); + if(c->msi == 0) + *c->irqdeass = 0; + else + c->stats->valid = 0; + waitintx(c); + checkstats(e, c, c->stats); + c->irqack[1] = pbit32(3); +} + +static void +m10gattach(Ether *e) +{ + Ctlr *c; + char name[12]; + + dprint("m10gattach\n"); + + qlock(e->ctlr); + c = e->ctlr; + if(c->state != Detached){ + qunlock(c); + return; + } + if(waserror()){ + c->state = Detached; + qunlock(c); + nexterror(); + } + reset(e, c); + c->state = Attached; + open0(e, c); + if(c->kprocs == 0){ + c->kprocs++; + snprint(name, sizeof name, "#l%drxproc", e->ctlrno); + kproc(name, m10rx, e); + snprint(name, sizeof name, "#l%dtxproc", e->ctlrno); + kproc(name, txproc, e); + } + c->state = Runed; + qunlock(c); + poperror(); +} + +static int +m10gdetach(Ctlr *c) +{ + dprint("m10gdetach\n"); +// reset(e->ctlr); + vunmap(c->ram, c->pcidev->mem[0].size); + ctlrfree(c); + return -1; +} + +static int +lstcount(Block *b) +{ + int i; + + i = 0; + for(; b; b = b->next) + i++; + return i; +} + +static long +m10gifstat(Ether *e, void *v, long n, ulong off) +{ + int l, lim; + char *p; + Ctlr *c; + Stats s; + + c = e->ctlr; + lim = 2*READSTR-1; + p = malloc(lim+1); + l = 0; + /* no point in locking this because this is done via dma. */ + memmove(&s, c->stats, sizeof s); + + // l += + snprint(p+l, lim, + "txcnt = %ud\n" "linkstat = %ud\n" "dlink = %ud\n" + "derror = %ud\n" "drunt = %ud\n" "doverrun = %ud\n" + "dnosm = %ud\n" "dnobg = %ud\n" "nrdma = %ud\n" + "txstopped = %ud\n" "down = %ud\n" "updated = %ud\n" + "valid = %ud\n\n" + "tx pkt = %uld\n" "tx bytes = %lld\n" + "tx cnt = %ud\n" "tx n = %ud\n" "tx i = %ud\n" + "sm cnt = %ud\n" "sm i = %ud\n" "sm n = %ud\n" + "sm lst = %ud\n" + "bg cnt = %ud\n" "bg i = %ud\n" "bg n = %ud\n" + "bg lst = %ud\n" + "segsz = %ud\n" "coal = %d\n", + gbit32(s.txcnt), gbit32(s.linkstat), gbit32(s.dlink), + gbit32(s.derror), gbit32(s.drunt), gbit32(s.doverrun), + gbit32(s.dnosm), gbit32(s.dnobg), gbit32(s.nrdma), + s.txstopped, s.down, s.updated, s.valid, + c->tx.npkt, c->tx.nbytes, + c->tx.cnt, c->tx.n, c->tx.i, + c->sm.cnt, c->sm.i, c->sm.pool->n, lstcount(c->sm.pool->head), + c->bg.cnt, c->bg.i, c->bg.pool->n, lstcount(c->bg.pool->head), + c->tx.segsz, gbit32((uchar*)c->coal)); + + n = readstr(off, v, n, p); + free(p); + return n; +} + +//static void +//summary(Ether *e) +//{ +// char *buf; +// int n, i, j; +// +// if(e == 0) +// return; +// buf = malloc(n=250); +// if(buf == 0) +// return; +// +// snprint(buf, n, "oq\n"); +// qsummary(e->oq, buf+3, n-3-1); +// iprint("%s", buf); +// +// if(e->f) for(i = 0; e->f[i]; i++){ +// j = snprint(buf, n, "f%d %d\n", i, e->f[i]->type); +// qsummary(e->f[i]->in, buf+j, n-j-1); +// print("%s", buf); +// } +// +// free(buf); +//} + +static void +rxring(Ctlr *c) +{ + Done *d; + Slot *s; + Slotparts *sp; + int i; + + d = &c->done; + s = d->entry; + for(i = 0; i < d->n; i++) { + sp = (Slotparts *)(s + i); + if(sp->len) + iprint("s[%d] = %d\n", i, sp->len); + } +} + +enum { + CMdebug, + CMcoal, + CMwakeup, + CMtxwakeup, + CMqsummary, + CMrxring, +}; + +static Cmdtab ctab[] = { + CMdebug, "debug", 2, + CMcoal, "coal", 2, + CMwakeup, "wakeup", 1, + CMtxwakeup, "txwakeup", 1, +// CMqsummary, "q", 1, + CMrxring, "rxring", 1, +}; + +static long +m10gctl(Ether *e, void *v, long n) +{ + int i; + Cmdbuf *c; + Cmdtab *t; + + dprint("m10gctl\n"); + if(e->ctlr == nil) + error(Enonexist); + + c = parsecmd(v, n); + if(waserror()){ + free(c); + nexterror(); + } + t = lookupcmd(c, ctab, nelem(ctab)); + switch(t->index){ + case CMdebug: + debug = (strcmp(c->f[1], "on") == 0); + break; + case CMcoal: + i = atoi(c->f[1]); + if(i < 0 || i > 1000) + error(Ebadarg); + *((Ctlr*)e->ctlr)->coal = pbit32(i); + break; + case CMwakeup: + wakeup(&((Ctlr*)e->ctlr)->rxrendez); /* you're kidding, right? */ + break; + case CMtxwakeup: + wakeup(&((Ctlr*)e->ctlr)->txrendez); /* you're kidding, right? */ + break; +// case CMqsummary: +// summary(e); +// break; + case CMrxring: + rxring(e->ctlr); + break; + default: + error(Ebadarg); + } + free(c); + poperror(); + return n; +} + +static void +m10gshutdown(Ether *e) +{ + dprint("m10gshutdown\n"); + m10gdetach(e->ctlr); +} + +static void +m10gpromiscuous(void *v, int on) +{ + Ether *e; + int i; + + dprint("m10gpromiscuous\n"); + e = v; + if(on) + i = Cpromisc; + else + i = Cnopromisc; + cmd(e->ctlr, i, 0); +} + +static int mcctab[] = { CSleavemc, CSjoinmc }; +static char *mcntab[] = { "leave", "join" }; + +static void +m10gmulticast(void *v, uchar *ea, int on) +{ + Ether *e; + int i; + + dprint("m10gmulticast\n"); + e = v; + if((i = maccmd(e->ctlr, mcctab[on], ea)) != 0) + print("m10g: can't %s %E: %d\n", mcntab[on], ea, i); +} + +static void +m10gpci(void) +{ + Pcidev *p; + Ctlr *t, *c; + + t = 0; + for(p = 0; p = pcimatch(p, 0x14c1, 0x0008); ){ + c = malloc(sizeof *c); + if(c == nil) + continue; + memset(c, 0, sizeof *c); + c->pcidev = p; + c->id = p->did<<16 | p->vid; +// c->boot = pcicap(p, PciCapVND); +// if(c->boot != ~(uintmem)0){ +// kickthebaby(p, c); +// } + pcisetbme(p); + if(setmem(p, c) == -1){ + print("m10g failed\n"); + free(c); + /* cleanup */ + continue; + } + if(t) + t->next = c; + else + ctlrs = c; + t = c; + } +} + +static int +m10gpnp(Ether *e) +{ + Ctlr *c; + + if(ctlrs == nil) + m10gpci(); + + for(c = ctlrs; c != nil; c = c->next) + if(c->active) + continue; + else if(e->port == 0 || e->port == c->port) + break; + if(c == nil) + return -1; + c->active = 1; + + e->ctlr = c; + e->port = c->port; + e->irq = c->pcidev->intl; + e->tbdf = c->pcidev->tbdf; + e->mbps = 10000; + memmove(e->ea, c->ra, Eaddrlen); + + e->attach = m10gattach; + e->detach = m10gshutdown; + e->transmit = m10gtransmit; + e->interrupt = m10ginterrupt; + e->ifstat = m10gifstat; + e->ctl = m10gctl; +// e->power = m10gpower; + e->shutdown = m10gshutdown; + + e->arg = e; + e->promiscuous = m10gpromiscuous; + e->multicast = m10gmulticast; + + return 0; +} + +void +etherm10glink(void) +{ + addethercard("m10g", m10gpnp); +} diff -Nru /sys/src/9k/386/fis.c /sys/src/9k/386/fis.c --- /sys/src/9k/386/fis.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/fis.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,545 @@ +/* + * sata fises and sas frames + * copyright © 2009-2010 erik quanstrom + */ +#include "u.h" +#include "../port/lib.h" +#include "fis.h" + +static char *flagname[9] = { + "lba", + "llba", + "smart", + "power", + "nop", + "atapi", + "atapi16", + "ata8", + "sct", +}; + +/* + * ata8 standard (llba) cmd layout + * + * feature 16 bits + * count 16 bits + * lba 48 bits + * device 8 bits + * command 8 bits + * + * response: + * + * status 8 bits + * error 8 bits + * reason 8 bits + * count 8 bits + * sstatus 8 bits + * sactive 8 bits +*/ + +/* + * sata fis layout for fistype 0x27: host-to-device: + * + * 0 fistype + * 1 fis flags + * 2 ata command + * 3 features + * 4 sector lba low 7:0 + * 5 cyl low lba mid 15:8 + * 6 cyl hi lba hi 23:16 + * 7 device / head + * 8 sec exp lba 31:24 + * 9 cy low e lba 39:32 + * 10 cy hi e lba 48:40 + * 11 features (exp) + * 12 sector count + * 13 sector count (exp) + * 14 r + * 15 control + */ + +void +setfissig(Sfis *x, uint sig) +{ + x->sig = sig; +} + +void +skelfis(uchar *c) +{ + memset(c, 0, Fissize); + c[Ftype] = H2dev; + c[Fflags] = Fiscmd; + c[Fdev] = Ataobs; +} + +int +nopfis(Sfis*, uchar *c, int srst) +{ + skelfis(c); + if(srst){ + c[Fflags] &= ~Fiscmd; + c[Fcontrol] = 1<<2; + return Preset|P28; + } + return Pnd|P28; +} + +int +txmodefis(Sfis *f, uchar *c, uchar d) +{ + int m; + + /* hack */ + if((f->sig >> 16) == 0xeb14) + return -1; + m = 0x40; + if(d == 0xff){ + d = 0; + m = 0; + } + skelfis(c); + c[Fcmd] = 0xef; + c[Ffeat] = 3; /* set transfer mode */ + c[Fsc] = m | d; /* sector count */ + return Pnd|P28; +} + +int +featfis(Sfis*, uchar *c, uchar f) +{ + skelfis(c); + c[Fcmd] = 0xef; + c[Ffeat] = f; + return Pnd|P28; +} + +int +identifyfis(Sfis *f, uchar *c) +{ + static uchar tab[] = { 0xec, 0xa1, }; + + skelfis(c); + c[Fcmd] = tab[f->sig>>16 == 0xeb14]; + return Pin|Ppio|P28|P512; +} + +int +flushcachefis(Sfis *f, uchar *c) +{ + static uchar tab[2] = {0xe7, 0xea}; + static uchar ptab[2] = {Pnd|P28, Pnd|P48}; + int llba; + + llba = (f->feat & Dllba) != 0; + skelfis(c); + c[Fcmd] = tab[llba]; + return ptab[llba]; +} + +static ushort +gbit16(void *a) +{ + ushort j; + uchar *i; + + i = a; + j = i[1] << 8; + j |= i[0]; + return j; +} + +static uint +gbit32(void *a) +{ + uint j; + uchar *i; + + i = a; + j = i[3] << 24; + j |= i[2] << 16; + j |= i[1] << 8; + j |= i[0]; + return j; +} + +static uvlong +gbit64(void *a) +{ + uchar *i; + + i = a; + return (uvlong)gbit32(i+4) << 32 | gbit32(a); +} + +ushort +id16(ushort *id, int i) +{ + return gbit16(id+i); +} + +uint +id32(ushort *id, int i) +{ + return gbit32(id+i); +} + +uvlong +id64(ushort *id, int i) +{ + return gbit64(id+i); +} + +/* acs-2 §7.18.7.4 */ +static ushort puistab[] = { + 0x37c8, Pspinup, + 0x738c, Pspinup | Pidready, + 0x8c73, 0, + 0xc837, Pidready, +}; + +int +idpuis(ushort *id) +{ + ushort u, i; + + u = gbit16(id + 2); + for(i = 0; i < nelem(puistab); i += 2) + if(u == puistab[i]) + return puistab[i + 1]; + return Pidready; /* annoying cdroms */ +} + +static ushort +onesc(ushort *id) +{ + ushort u; + + u = gbit16(id); + if(u == 0xffff) + u = 0; + return u; +} + +enum{ + Idmasp = 1<<8, + Ilbasp = 1<<9, + Illba = 1<<10, +}; + +vlong +idfeat(Sfis *f, ushort *id) +{ + int i, j; + vlong s; + + f->feat = 0; + if(f->sig>>16 == 0xeb14) + f->feat |= Datapi; + i = gbit16(id + 49); + if((i & Ilbasp) == 0){ + if(gbit16(id + 53) & 1){ + f->c = gbit16(id + 1); + f->h = gbit16(id + 3); + f->s = gbit16(id + 6); + }else{ + f->c = gbit16(id + 54); + f->h = gbit16(id + 55); + f->s = gbit16(id + 56); + } + s = f->c*f->h*f->s; + }else{ + f->c = f->h = f->s = 0; + f->feat |= Dlba; + j = gbit16(id + 83) | gbit16(id + 86); + if(j & Illba){ + f->feat |= Dllba; + s = gbit64(id + 100); + }else + s = gbit32(id + 60); + } + f->udma = 0xff; + if(i & Idmasp) + if(gbit16(id + 53) & 4) + for(i = gbit16(id + 88) & 0x7f; i; i >>= 1) + f->udma++; + + if(f->feat & Datapi){ + i = gbit16(id + 0); + if(i & 1) + f->feat |= Datapi16; + } + + i = gbit16(id+83); + if((i>>14) == 1){ + if(i & (1<<3)) + f->feat |= Dpower; + i = gbit16(id + 82); + if(i & 1) + f->feat |= Dsmart; + if(i & (1<<14)) + f->feat |= Dnop; + } + i = onesc(id + 80); + if(i & 1<<8){ + f->feat |= Data8; + i = onesc(id + 222); /* sata? */ + j = onesc(id + 76); + if(i != 0 && i >> 12 == 1 && j != 0){ + j >>= 1; + f->speeds = j & 7; + i = gbit16(id + 78) & gbit16(id + 79); + /* + * not acceptable for comreset to + * wipe out device configuration. + * reject drive. + */ + if((i & 1<<6) == 0) + return -1; + } + } + if(gbit16(id + 206) & 1) + f->feat |= Dsct; + idss(f, id); + return s; +} + +int +idss(Sfis *f, ushort *id) +{ + uint sw, i; + + if(f->sig>>16 == 0xeb14) + return 0; + f->lsectsz = 512; + f->physshift = 0; + i = gbit16(id + 106); + if(i >> 14 != 1) + return f->lsectsz; + if((sw = gbit32(id + 117)) >= 256) + f->lsectsz = sw * 2; + if(i & 1<<13) + f->physshift = i & 7; + return f->lsectsz * (1<physshift); +} + +uvlong +idwwn(Sfis*, ushort *id) +{ + uvlong u; + + u = 0; + if(id[108]>>12 == 5){ + u |= (uvlong)gbit16(id + 108) << 48; + u |= (uvlong)gbit16(id + 109) << 32; + u |= gbit16(id + 110) << 16; + u |= gbit16(id + 111) << 0; + } + return u; +} + +void +idmove(char *p, ushort *u, int n) +{ + int i; + char *op, *e, *s; + + op = p; + s = (char*)u; + for(i = 0; i < n; i += 2){ + *p++ = s[i + 1]; + *p++ = s[i + 0]; + } + *p = 0; + while(p > op && *--p == ' ') + *p = 0; + e = p; + p = op; + while(*p == ' ') + p++; + memmove(op, p, n - (e - p)); +} + +char* +pflag(char *s, char *e, Sfis *f) +{ + ushort i, u; + + u = f->feat; + for(i = 0; i < Dnflag; i++) + if(u & (1 << i)) + s = seprint(s, e, "%s ", flagname[i]); + return seprint(s, e, "\n"); +} + +int +atapirwfis(Sfis *f, uchar *c, uchar *cdb, int cdblen, int ndata) +{ + int fill, len; + + fill = f->feat&Datapi16? 16: 12; + if((len = cdblen) > fill) + len = fill; + memmove(c + 0x40, cdb, len); + memset(c + 0x40 + len, 0, fill - len); + + c[Ftype] = H2dev; + c[Fflags] = Fiscmd; + c[Fcmd] = Ataobs; + if(ndata != 0) + c[Ffeat] = 1; /* dma */ + else + c[Ffeat] = 0; /* features (exp); */ + c[Flba0] = 0; + c[Flba8] = ndata; + c[Flba16] = ndata >> 8; + c[Fdev] = Ataobs; + memset(c + 8, 0, Fissize - 8); + return P28|Ppkt; +} + +int +rwfis(Sfis *f, uchar *c, int rw, int nsect, uvlong lba) +{ + uchar acmd, llba, udma; + static uchar tab[2][2][2] = { 0x20, 0x24, 0x30, 0x34, 0xc8, 0x25, 0xca, 0x35, }; + static uchar ptab[2][2][2] = { + Pin|Ppio|P28, Pin|Ppio|P48, + Pout|Ppio|P28, Pout|Ppio|P48, + Pin|Pdma|P28, Pin|Pdma|P48, + Pout|Pdma|P28, Pout|Pdma|P48, + }; + + nsect >>= f->physshift; + lba >>= f->physshift; + + udma = f->udma != 0xff; + llba = (f->feat & Dllba) != 0; + acmd = tab[udma][rw][llba]; + + c[Ftype] = 0x27; + c[Fflags] = 0x80; + c[Fcmd] = acmd; + c[Ffeat] = 0; + + c[Flba0] = lba; + c[Flba8] = lba >> 8; + c[Flba16] = lba >> 16; + c[Fdev] = Ataobs | Atalba; + if(llba == 0) + c[Fdev] |= (lba>>24) & 0xf; + + c[Flba24] = lba >> 24; + c[Flba32] = lba >> 32; + c[Flba40] = lba >> 48; + c[Ffeat8] = 0; + + c[Fsc] = nsect; + c[Fsc8] = nsect >> 8; + c[Ficc] = 0; + c[Fcontrol] = 0; + + memset(c + 16, 0, Fissize - 16); + return ptab[udma][rw][llba]; +} + +uvlong +fisrw(Sfis *f, uchar *c, int *n) +{ + uvlong lba; + + lba = c[Flba0]; + lba |= c[Flba8] << 8; + lba |= c[Flba16] << 16; + lba |= c[Flba24] << 24; + lba |= (uvlong)(c[Flba32] | c[Flba40]<<8) << 32; + + *n = c[Fsc]; + *n |= c[Fsc8] << 8; + + *n >>= f->physshift; + lba >>= f->physshift; + + return lba; +} + +void +sigtofis(Sfis *f, uchar *c) +{ + uint u; + + u = f->sig; + memset(c, 0, Fissize); + c[Ftype] = 0x34; + c[Fflags] = 0x00; + c[Fcmd] = 0x50; + c[Ffeat] = 0x01; + c[Flba0] = u >> 8; + c[Flba8] = u >> 16; + c[Flba16] = u >> 24; + c[Fdev] = Ataobs; + c[Fsc] = u; +} + +uint +fistosig(uchar *u) +{ + return u[Fsc] | u[Flba0]<<8 | u[Flba8]<<16 | u[Flba16]<<24; +} + + +/* sas smp */ +void +smpskelframe(Cfis *f, uchar *c, int m) +{ + memset(c, 0, Fissize); + c[Ftype] = 0x40; + c[Fflags] = m; + if(f->phyid) + c[Flba32] = f->phyid; +} + +uint +sashash(uvlong u) +{ + uint poly, msb, l, r; + uvlong m; + + r = 0; + poly = 0x01db2777; + msb = 0x01000000; + for(m = 1ull<<63; m > 0; m >>= 1){ + l = 0; + if(m & u) + l = msb; + r <<= 1; + r ^= l; + if(r & msb) + r ^= poly; + } + return r & 0xffffff; +} + +uchar* +sasbhash(uchar *t, uchar *s) +{ + uint poly, msb, l, r, i, j; + + r = 0; + poly = 0x01db2777; + msb = 0x01000000; + for(i = 0; i < 8; i++) + for(j = 0x80; j != 0; j >>= 1){ + l = 0; + if(s[i] & j) + l = msb; + r <<= 1; + r ^= l; + if(r & msb) + r ^= poly; + } + t[0] = r>>16; + t[1] = r>>8; + t[2] = r; + return t; +} diff -Nru /sys/src/9k/386/fis.h /sys/src/9k/386/fis.h --- /sys/src/9k/386/fis.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/fis.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,163 @@ +#pragma lib "libfis.a" +#pragma src "/sys/src/libfis" + +/* ata errors */ +enum { + Emed = 1<<0, /* media error */ + Enm = 1<<1, /* no media */ + Eabrt = 1<<2, /* abort */ + Emcr = 1<<3, /* media change request */ + Eidnf = 1<<4, /* no user-accessible address */ + Emc = 1<<5, /* media change */ + Eunc = 1<<6, /* data error */ + Ewp = 1<<6, /* write protect */ + Eicrc = 1<<7, /* interface crc error */ + + Efatal = Eidnf|Eicrc, /* must sw reset */ +}; + +/* ata status */ +enum { + ASerr = 1<<0, /* error */ + ASdrq = 1<<3, /* request */ + ASdf = 1<<5, /* fault */ + ASdrdy = 1<<6, /* ready */ + ASbsy = 1<<7, /* busy */ + + ASobs = 1<<1|1<<2|1<<4, +}; + +enum { + /* fis types */ + H2dev = 0x27, + D2host = 0x34, + + /* fis flags bits */ + Fiscmd = 0x80, + + /* ata bits */ + Ataobs = 0xa0, + Atalba = 0x40, + + /* nominal fis size (fits any fis) */ + Fissize = 0x20, +}; + +/* sata device-to-host (0x27) fis layout */ +enum { + Ftype, + Fflags, + Fcmd, + Ffeat, + Flba0, + Flba8, + Flba16, + Fdev, + Flba24, + Flba32, + Flba40, + Ffeat8, + Fsc, + Fsc8, + Ficc, /* isochronous cmd completion */ + Fcontrol, +}; + +/* sata host-to-device fis (0x34) differences */ +enum{ + Fioport = 1, + Fstatus, + Frerror, +}; + +/* ata protcol type */ +enum{ + Pnd = 0<<0, /* data direction */ + Pin = 1<<0, + Pout = 2<<0, + Pdatam = 3<<0, + + Ppio = 1<<2, /* ata protocol */ + Pdma = 2<<2, + Pdmq = 3<<2, + Preset = 4<<2, + Pdiag = 5<<2, + Ppkt = 6<<2, + Pprotom = 7<<2, + + P48 = 0<<5, /* command “size” */ + P28 = 1<<5, + Pcmdszm = 1<<5, + + Pssn = 0<<6, /* sector size */ + P512 = 1<<6, + Pssm = 1<<6, +}; + +typedef struct Sfis Sfis; +struct Sfis { + ushort feat; + uchar udma; + uchar speeds; + uint sig; + uint lsectsz; + uint physshift; /* log2(log/phys) */ + uint c; /* disgusting, no? */ + uint h; + uint s; +}; + +enum { + Dlba = 1<<0, /* required for sata */ + Dllba = 1<<1, + Dsmart = 1<<2, + Dpower = 1<<3, + Dnop = 1<<4, + Datapi = 1<<5, + Datapi16= 1<<6, + Data8 = 1<<7, + Dsct = 1<<8, + Dnflag = 9, +}; + +enum { + Pspinup = 1<<0, + Pidready = 1<<1, +}; + +void setfissig(Sfis*, uint); +int txmodefis(Sfis*, uchar*, uchar); +int atapirwfis(Sfis*, uchar*, uchar*, int, int); +int featfis(Sfis*, uchar*, uchar); +int flushcachefis(Sfis*, uchar*); +int identifyfis(Sfis*, uchar*); +int nopfis(Sfis*, uchar*, int); +int rwfis(Sfis*, uchar*, int, int, uvlong); +void skelfis(uchar*); +void sigtofis(Sfis*, uchar*); +uvlong fisrw(Sfis*, uchar*, int*); + +void idmove(char*, ushort*, int); +vlong idfeat(Sfis*, ushort*); +uvlong idwwn(Sfis*, ushort*); +int idss(Sfis*, ushort*); +int idpuis(ushort*); +ushort id16(ushort*, int); +uint id32(ushort*, int); +uvlong id64(ushort*, int); +char *pflag(char*, char*, Sfis*); +uint fistosig(uchar*); + +/* scsi */ +typedef struct Cfis Cfis; +struct Cfis { + uchar phyid; + uchar encid[8]; + uchar tsasaddr[8]; + uchar ssasaddr[8]; + uchar ict[2]; +}; + +void smpskelframe(Cfis*, uchar*, int); +uint sashash(uvlong); +uchar *sasbhash(uchar*, uchar*); diff -Nru /sys/src/9k/386/kbd.c /sys/src/9k/386/kbd.c --- /sys/src/9k/386/kbd.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/kbd.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,638 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "io.h" + +enum { + Data= 0x60, /* data port */ + + Status= 0x64, /* status port */ + Inready= 0x01, /* input character ready */ + Outbusy= 0x02, /* output busy */ + Sysflag= 0x04, /* system flag */ + Cmddata= 0x08, /* cmd==0, data==1 */ + Inhibit= 0x10, /* keyboard/mouse inhibited */ + Minready= 0x20, /* mouse character ready */ + Rtimeout= 0x40, /* general timeout */ + Parity= 0x80, + + Cmd= 0x64, /* command port (write only) */ + + Spec= 0xF800, /* Unicode private space */ + PF= Spec|0x20, /* num pad function key */ + View= Spec|0x00, /* view (shift window up) */ + KF= 0xF000, /* function key (begin Unicode private space) */ + Shift= Spec|0x60, + Break= Spec|0x61, + Ctrl= Spec|0x62, + Latin= Spec|0x63, + Caps= Spec|0x64, + Num= Spec|0x65, + Middle= Spec|0x66, + Altgr= Spec|0x67, + Kmouse= Spec|0x100, + No= 0x00, /* peter */ + + Home= KF|13, + Up= KF|14, + Pgup= KF|15, + Print= KF|16, + Left= KF|17, + Right= KF|18, + End= KF|24, + Down= View, + Pgdown= KF|19, + Ins= KF|20, + Del= 0x7F, + Scroll= KF|21, + + Nscan= 128, +}; + +/* + * The codes at 0x79 and 0x81 are produed by the PFU Happy Hacking keyboard. + * A 'standard' keyboard doesn't produce anything above 0x58. + */ +Rune kbtab[Nscan] = +{ +[0x00] No, 0x1b, '1', '2', '3', '4', '5', '6', +[0x08] '7', '8', '9', '0', '-', '=', '\b', '\t', +[0x10] 'q', 'w', 'e', 'r', 't', 'y', 'u', 'i', +[0x18] 'o', 'p', '[', ']', '\n', Ctrl, 'a', 's', +[0x20] 'd', 'f', 'g', 'h', 'j', 'k', 'l', ';', +[0x28] '\'', '`', Shift, '\\', 'z', 'x', 'c', 'v', +[0x30] 'b', 'n', 'm', ',', '.', '/', Shift, '*', +[0x38] Latin, ' ', Ctrl, KF|1, KF|2, KF|3, KF|4, KF|5, +[0x40] KF|6, KF|7, KF|8, KF|9, KF|10, Num, Scroll, '7', +[0x48] '8', '9', '-', '4', '5', '6', '+', '1', +[0x50] '2', '3', '0', '.', No, No, No, KF|11, +[0x58] KF|12, No, No, No, No, No, No, No, +[0x60] No, No, No, No, No, No, No, No, +[0x68] No, No, No, No, No, No, No, No, +[0x70] No, No, No, No, No, No, No, No, +[0x78] No, View, No, Up, No, No, No, No, +}; + +Rune kbtabshift[Nscan] = +{ +[0x00] No, 0x1b, '!', '@', '#', '$', '%', '^', +[0x08] '&', '*', '(', ')', '_', '+', '\b', '\t', +[0x10] 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I', +[0x18] 'O', 'P', '{', '}', '\n', Ctrl, 'A', 'S', +[0x20] 'D', 'F', 'G', 'H', 'J', 'K', 'L', ':', +[0x28] '"', '~', Shift, '|', 'Z', 'X', 'C', 'V', +[0x30] 'B', 'N', 'M', '<', '>', '?', Shift, '*', +[0x38] Latin, ' ', Ctrl, KF|1, KF|2, KF|3, KF|4, KF|5, +[0x40] KF|6, KF|7, KF|8, KF|9, KF|10, Num, Scroll, '7', +[0x48] '8', '9', '-', '4', '5', '6', '+', '1', +[0x50] '2', '3', '0', '.', No, No, No, KF|11, +[0x58] KF|12, No, No, No, No, No, No, No, +[0x60] No, No, No, No, No, No, No, No, +[0x68] No, No, No, No, No, No, No, No, +[0x70] No, No, No, No, No, No, No, No, +[0x78] No, Up, No, Up, No, No, No, No, +}; + +Rune kbtabesc1[Nscan] = +{ +[0x00] No, No, No, No, No, No, No, No, +[0x08] No, No, No, No, No, No, No, No, +[0x10] No, No, No, No, No, No, No, No, +[0x18] No, No, No, No, '\n', Ctrl, No, No, +[0x20] No, No, No, No, No, No, No, No, +[0x28] No, No, Shift, No, No, No, No, No, +[0x30] No, No, No, No, No, '/', No, Print, +[0x38] Altgr, No, No, No, No, No, No, No, +[0x40] No, No, No, No, No, No, Break, Home, +[0x48] Up, Pgup, No, Left, No, Right, No, End, +[0x50] Down, Pgdown, Ins, Del, No, No, No, No, +[0x58] No, No, No, No, No, No, No, No, +[0x60] No, No, No, No, No, No, No, No, +[0x68] No, No, No, No, No, No, No, No, +[0x70] No, No, No, No, No, No, No, No, +[0x78] No, Up, No, No, No, No, No, No, +}; + +Rune kbtabaltgr[Nscan] = +{ +[0x00] No, No, No, No, No, No, No, No, +[0x08] No, No, No, No, No, No, No, No, +[0x10] No, No, No, No, No, No, No, No, +[0x18] No, No, No, No, '\n', Ctrl, No, No, +[0x20] No, No, No, No, No, No, No, No, +[0x28] No, No, Shift, No, No, No, No, No, +[0x30] No, No, No, No, No, '/', No, Print, +[0x38] Altgr, No, No, No, No, No, No, No, +[0x40] No, No, No, No, No, No, Break, Home, +[0x48] Up, Pgup, No, Left, No, Right, No, End, +[0x50] Down, Pgdown, Ins, Del, No, No, No, No, +[0x58] No, No, No, No, No, No, No, No, +[0x60] No, No, No, No, No, No, No, No, +[0x68] No, No, No, No, No, No, No, No, +[0x70] No, No, No, No, No, No, No, No, +[0x78] No, Up, No, No, No, No, No, No, +}; + +Rune kbtabctrl[Nscan] = +{ +[0x00] No, '', '', '', '', '', '', '', +[0x08] '', '', '', '', ' ', '', '\b', '\t', +[0x10] '', '', '', '', '', '', '', '\t', +[0x18] '', '', '', '', '\n', Ctrl, '', '', +[0x20] '', '', '', '\b', '\n', ' ', ' ', '', +[0x28] '', No, Shift, '', '', '', '', '', +[0x30] '', '', ' ', ' ', '', '', Shift, '\n', +[0x38] Latin, No, Ctrl, '', '', '', '', '', +[0x40] '', '', ' ', ' ', '', '', '', '', +[0x48] '', '', ' ', '', '', '', ' ', '', +[0x50] '', '', '', '', No, No, No, '', +[0x58] ' ', No, No, No, No, No, No, No, +[0x60] No, No, No, No, No, No, No, No, +[0x68] No, No, No, No, No, No, No, No, +[0x70] No, No, No, No, No, No, No, No, +[0x78] No, '', No, '\b', No, No, No, No, +}; + +enum +{ + /* controller command byte */ + Cscs1= (1<<6), /* scan code set 1 */ + Cauxdis= (1<<5), /* mouse disable */ + Ckbddis= (1<<4), /* kbd disable */ + Csf= (1<<2), /* system flag */ + Cauxint= (1<<1), /* mouse interrupt enable */ + Ckbdint= (1<<0), /* kbd interrupt enable */ +}; + +static Queue *kbdq; + +int mouseshifted; +void (*kbdmouse)(int); +static int nokbd = 1; + +static Lock i8042lock; +static uchar ccc; +static void (*auxputc)(int, int); + +/* + * wait for output no longer busy + */ +static int +outready(void) +{ + int tries; + + for(tries = 0; (inb(Status) & Outbusy); tries++){ + if(tries > 500) + return -1; + delay(2); + } + return 0; +} + +/* + * wait for input + */ +static int +inready(void) +{ + int tries; + + for(tries = 0; !(inb(Status) & Inready); tries++){ + if(tries > 500) + return -1; + delay(2); + } + return 0; +} + +/* + * ask 8042 to reset the machine + */ +void +i8042reset(void) +{ + int i, x; + + if(nokbd) + return; + + *(ushort*)KADDR(0x472) = 0x1234; /* BIOS warm-boot flag */ + + /* + * newer reset the machine command + */ + outready(); + outb(Cmd, 0xFE); + outready(); + + /* + * Pulse it by hand (old somewhat reliable) + */ + x = 0xDF; + for(i = 0; i < 5; i++){ + x ^= 1; + outready(); + outb(Cmd, 0xD1); + outready(); + outb(Data, x); /* toggle reset */ + delay(100); + } +} + +int +i8042auxcmd(int cmd) +{ + unsigned int c; + int tries; + + c = 0; + tries = 0; + + ilock(&i8042lock); + do{ + if(tries++ > 2) + break; + if(outready() < 0) + break; + outb(Cmd, 0xD4); + if(outready() < 0) + break; + outb(Data, cmd); + if(outready() < 0) + break; + if(inready() < 0) + break; + c = inb(Data); + } while(c == 0xFE || c == 0); + iunlock(&i8042lock); + + if(c != 0xFA){ + print("i8042: %2.2ux returned to the %2.2ux command\n", c, cmd); + return -1; + } + return 0; +} + +int +i8042auxcmds(uchar *cmd, int ncmd) +{ + int i; + + ilock(&i8042lock); + for(i=0; i sizeof kbtab){ + c |= keyup; + if(c != 0xFF) /* these come fairly often: CAPSLOCK U Y */ + print("unknown key %ux\n", c); + return; + } + + if(kbscan.esc1){ + c = kbtabesc1[c]; + kbscan.esc1 = 0; + } else if(kbscan.esc2){ + kbscan.esc2--; + return; + } else if(kbscan.shift) + c = kbtabshift[c]; + else if(kbscan.altgr) + c = kbtabaltgr[c]; + else if(kbscan.ctl) + c = kbtabctrl[c]; + else + c = kbtab[c]; + + if(kbscan.caps && c<='z' && c>='a') + c += 'A' - 'a'; + + /* + * keyup only important for shifts + */ + if(keyup){ + switch(c){ + case Latin: + kbscan.alt = 0; + break; + case Shift: + kbscan.shift = 0; + mouseshifted = 0; + break; + case Ctrl: + kbscan.ctl = 0; + break; + case Altgr: + kbscan.altgr = 0; + break; + case Kmouse|1: + case Kmouse|2: + case Kmouse|3: + case Kmouse|4: + case Kmouse|5: + kbscan.buttons &= ~(1<<(c-Kmouse-1)); + if(kbdmouse) + kbdmouse(kbscan.buttons); + break; + } + return; + } + + /* + * normal character + */ + if(!(c & (Spec|KF))){ + if(kbscan.ctl) + if(kbscan.alt && c == Del) + exit(0); + if(!kbscan.collecting){ + kbdputc(kbdq, c); + return; + } + kbscan.kc[kbscan.nk++] = c; + c = latin1(kbscan.kc, kbscan.nk); + if(c < -1) /* need more keystrokes */ + return; + if(c != -1) /* valid sequence */ + kbdputc(kbdq, c); + else /* dump characters */ + for(i=0; i 0 && (c = inb(Status)) & (Outbusy | Inready)) { + if(c & Inready) + inb(Data); + delay(1); + } + if (try <= 0) { + print(initfailed); + return; + } + + /* get current controller command byte */ + outb(Cmd, 0x20); + if(inready() < 0){ + print("i8042: kbdinit can't read ccc\n"); + ccc = 0; + } else + ccc = inb(Data); + + /* enable kbd xfers and interrupts */ + ccc &= ~Ckbddis; + ccc |= Csf | Ckbdint | Cscs1; + if(outready() < 0) { + print(initfailed); + return; + } + + nokbd = 0; + + /* disable mouse */ + if (outbyte(Cmd, 0x60) < 0 || outbyte(Data, ccc) < 0) + print("i8042: kbdinit mouse disable failed\n"); +} + +void +kbdenable(void) +{ + kbdq = qopen(4*1024, 0, 0, 0); + if(kbdq == nil) + panic("kbdinit"); + qnoblock(kbdq, 1); + addkbdq(kbdq, -1); + + ioalloc(Data, 1, 0, "kbd"); + ioalloc(Cmd, 1, 0, "kbd"); + + intrenable(IrqKBD, i8042intr, 0, BUSUNKNOWN, "kbd"); +} + +void +kbdputmap(ushort m, ushort scanc, Rune r) +{ + if(scanc >= Nscan) + error(Ebadarg); + switch(m) { + default: + error(Ebadarg); + case 0: + kbtab[scanc] = r; + break; + case 1: + kbtabshift[scanc] = r; + break; + case 2: + kbtabesc1[scanc] = r; + break; + case 3: + kbtabaltgr[scanc] = r; + break; + case 4: + kbtabctrl[scanc] = r; + break; + } +} + +int +kbdgetmap(int offset, int *t, int *sc, Rune *r) +{ + *t = offset/Nscan; + *sc = offset%Nscan; + if(*t < 0 || *sc < 0) + error(Ebadarg); + switch(*t) { + default: + return 0; + case 0: + *r = kbtab[*sc]; + return 1; + case 1: + *r = kbtabshift[*sc]; + return 1; + case 2: + *r = kbtabesc1[*sc]; + return 1; + case 3: + *r = kbtabaltgr[*sc]; + return 1; + case 4: + *r = kbtabctrl[*sc]; + return 1; + } +} diff -Nru /sys/src/9k/386/pci.c /sys/src/9k/386/pci.c --- /sys/src/9k/386/pci.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/pci.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1297 @@ +/* + * PCI support code. + * Needs a massive rewrite. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "io.h" + +struct +{ + char output[16384]; + int ptr; +}PCICONS; + +int +pcilog(char *fmt, ...) +{ + int n; + va_list arg; + char buf[PRINTSIZE]; + + va_start(arg, fmt); + n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf; + va_end(arg); + + memmove(PCICONS.output+PCICONS.ptr, buf, n); + PCICONS.ptr += n; + return n; +} + +enum +{ /* configuration mechanism #1 */ + PciADDR = 0xCF8, /* CONFIG_ADDRESS */ + PciDATA = 0xCFC, /* CONFIG_DATA */ + + MaxFNO = 7, + MaxDNO = 31, + MaxUBN = 255, +}; + +enum +{ /* command register */ + IOen = (1<<0), + MEMen = (1<<1), + MASen = (1<<2), + MemWrInv = (1<<4), + PErrEn = (1<<6), + SErrEn = (1<<8), +}; + +#define MKPCIX(b,d,f,r) ((((b)&0xFF)<<20)|(((d)&0x1F)<<15)|(((f)&7)<<12)|((r)&0xFFF)) + +static Lock pcicfglock; +static Lock pcicfginitlock; +static int pcicfgmode = -1; +static int pcimaxbno = 255; +static int pcimaxdno; +static int srxusehba; +static Pcidev* pciroot; +static Pcidev* pcilist; +static Pcidev* pcitail; +static int nobios, nopcirouting; + +static int pcicfgrw32(int, int, int, int); +static int pcicfgrw16(int, int, int, int); +static int pcicfgrw8(int, int, int, int); + +static char* bustypes[] = { + "CBUSI", + "CBUSII", + "EISA", + "FUTURE", + "INTERN", + "ISA", + "MBI", + "MBII", + "MCA", + "MPI", + "MPSA", + "NUBUS", + "PCI", + "PCMCIA", + "TC", + "VL", + "VME", + "XPRESS", +}; + +static int +strtobus(char *s) +{ + int i; + + for(i = 0; i < nelem(bustypes); i++) + if(cistrcmp(s, bustypes[i]) == 0) + return i; + return BUSUNKNOWN; +} + +int +strtotbdf(char *p, char **r, int base) +{ + char buf[12], *f0[4], **f; + int i, n, bus, t[4]; + + if(strchr(p, '.') == nil) + return strtoul(p, r, base); + + snprint(buf, sizeof buf, "%s", p); + f = f0; + n = gettokens(buf, f, nelem(f0), "."); + memset(t, 0, sizeof t); + t[0] = BusPCI; + if((bus = strtobus(f[0])) != BUSUNKNOWN){ + t[0] = bus; + f++; + n--; + } + for(i = 0; i < n; i++) + t[i+1] = strtoul(f[i], r, base); + if(r != nil) + *r = *r-buf + p; + return MKBUS(t[0], t[1], t[2], t[3]); +} + +static int +tbdffmt(Fmt* fmt) +{ + char buf[32], *p, *e; + u32int type, tbdf; + + p = buf; + e = buf+sizeof buf; + tbdf = va_arg(fmt->args, int); + if(tbdf == -1) + return fmtstrcpy(fmt, "isa"); + if(fmt->flags & FmtLong){ + type = BUSTYPE(tbdf); + if(type == 12) + p = seprint(p, e, "pci."); + else + p = seprint(p, e, "%d.", type); + } + seprint(p, e, "%d.%d.%d", + BUSBNO(tbdf), BUSDNO(tbdf), BUSFNO(tbdf)); + return fmtstrcpy(fmt, buf); +} + +ulong +pcibarsize(Pcidev *p, int rno) +{ + ulong v, size; + + v = pcicfgrw32(p->tbdf, rno, 0, 1); + pcicfgrw32(p->tbdf, rno, 0xFFFFFFF0, 0); + size = pcicfgrw32(p->tbdf, rno, 0, 1); + if(v & 1) + size |= 0xFFFF0000; + pcicfgrw32(p->tbdf, rno, v, 0); + + return -(size & ~0x0F); +} + +static int +pcisizcmp(void *a, void *b) +{ + Pcisiz *aa, *bb; + + aa = a; + bb = b; + return aa->siz - bb->siz; +} + +static ulong +pcimask(ulong v) +{ + ulong mask; + + mask = BI2BY*sizeof(v); + for(mask = 1<<(mask-1); mask != 0; mask >>= 1) { + if(mask & v) + break; + } + + mask--; + if((v & mask) == 0) + return v; + + v |= mask; + return v+1; +} + +static void +pcibusmap(Pcidev *root, ulong *pmema, ulong *pioa, int wrreg) +{ + Pcidev *p; + int ntb, i, size, rno, hole; + ulong v, mema, ioa, sioa, smema, base, limit; + Pcisiz *table, *tptr, *mtb, *itb; + extern void qsort(void*, long, long, int (*)(void*, void*)); + + if(!nobios) + return; + + ioa = *pioa; + mema = *pmema; + + DBG("pcibusmap wr=%d %T mem=%#lux io=%#lux\n", + wrreg, root->tbdf, mema, ioa); + + ntb = 0; + for(p = root; p != nil; p = p->link) + ntb++; + + ntb *= (PciCIS-PciBAR0)/4; + table = malloc(2*ntb*sizeof(Pcisiz)); + itb = table; + mtb = table+ntb; + + /* + * Build a table of sizes + */ + for(p = root; p != nil; p = p->link) { + if(p->ccrb == 0x06) { + if(p->ccru != 0x04 || p->bridge == nil) { + DBG("pci: ignored bridge %T\n", p->tbdf); + continue; + } + + sioa = ioa; + smema = mema; + pcibusmap(p->bridge, &smema, &sioa, 0); + + hole = pcimask(smema-mema); + if(hole < (1<<20)) + hole = 1<<20; + p->mema.size = hole; + + hole = pcimask(sioa-ioa); + if(hole < (1<<12)) + hole = 1<<12; + + p->ioa.size = hole; + + itb->dev = p; + itb->bar = -1; + itb->siz = p->ioa.size; + itb++; + + mtb->dev = p; + mtb->bar = -1; + mtb->siz = p->mema.size; + mtb++; + continue; + } + + for(i = 0; i <= 5; i++) { + rno = PciBAR0 + i*4; + v = pcicfgrw32(p->tbdf, rno, 0, 1); + size = pcibarsize(p, rno); + if(size == 0) + continue; + + if(v & 1) { + itb->dev = p; + itb->bar = i; + itb->siz = size; + itb++; + } + else { + mtb->dev = p; + mtb->bar = i; + mtb->siz = size; + mtb++; + if(v & 4) + i++; /* skip high word of 64-bit register */ + } + + p->mem[i].size = size; + } + } + + /* + * Sort both tables IO smallest first, Memory largest + */ + qsort(table, itb-table, sizeof(Pcisiz), pcisizcmp); + tptr = table+ntb; + qsort(tptr, mtb-tptr, sizeof(Pcisiz), pcisizcmp); + + /* + * Allocate IO address space on this bus + */ + for(tptr = table; tptr < itb; tptr++) { + hole = tptr->siz; + if(tptr->bar == -1) + hole = 1<<12; + ioa = (ioa+hole-1) & ~(hole-1); + + p = tptr->dev; + if(tptr->bar == -1) + p->ioa.bar = ioa; + else { + p->pcr |= IOen; + p->mem[tptr->bar].bar = ioa|1; + if(wrreg) + pcicfgrw32(p->tbdf, PciBAR0+(tptr->bar*4), ioa|1, 0); + } + + ioa += tptr->siz; + } + + /* + * Allocate Memory address space on this bus + */ + for(tptr = table+ntb; tptr < mtb; tptr++) { + hole = tptr->siz; + if(tptr->bar == -1) + hole = 1<<20; + mema = (mema+hole-1) & ~(hole-1); + + p = tptr->dev; + if(tptr->bar == -1) + p->mema.bar = mema; + else { + p->pcr |= MEMen; + p->mem[tptr->bar].bar = mema; + if(wrreg) + pcicfgrw32(p->tbdf, PciBAR0+(tptr->bar*4), mema, 0); + } + mema += tptr->siz; + } + + *pmema = mema; + *pioa = ioa; + free(table); + + if(wrreg == 0) + return; + + /* + * Finally set all the bridge addresses & registers + */ + for(p = root; p != nil; p = p->link) { + if(p->bridge == nil) { + pcicfgrw8(p->tbdf, PciLTR, 64, 0); + + p->pcr |= MASen; + pcicfgrw16(p->tbdf, PciPCR, p->pcr, 0); + continue; + } + + base = p->ioa.bar; + limit = base+p->ioa.size-1; + v = pcicfgrw32(p->tbdf, PciIBR, 0, 1); + v = (v&0xFFFF0000)|(limit & 0xF000)|((base & 0xF000)>>8); + pcicfgrw32(p->tbdf, PciIBR, v, 0); + v = (limit & 0xFFFF0000)|(base>>16); + pcicfgrw32(p->tbdf, PciIUBR, v, 0); + + base = p->mema.bar; + limit = base+p->mema.size-1; + v = (limit & 0xFFF00000)|((base & 0xFFF00000)>>16); + pcicfgrw32(p->tbdf, PciMBR, v, 0); + + /* + * Disable memory prefetch + */ + pcicfgrw32(p->tbdf, PciPMBR, 0x0000FFFF, 0); + pcicfgrw8(p->tbdf, PciLTR, 64, 0); + + /* + * Enable the bridge + */ + p->pcr |= IOen|MEMen|MASen; + pcicfgrw32(p->tbdf, PciPCR, 0xFFFF0000|p->pcr , 0); + + sioa = p->ioa.bar; + smema = p->mema.bar; + pcibusmap(p->bridge, &smema, &sioa, 1); + } +} + +static void +pxh6700(Pcidev *p) +{ + int l; + + /* + * errata #3, "signal integrity issues when driving secondary bus + * in PCI or PCI-X mode 1 ... (causes) parity errors and system hangs." + * + * Solution is: + * + * 1. Write `1's to Function 0/2 (F0 only for PXH-V) Register + * offset 224h Bits [29:17] prior to any PCI bus accesses to + * secondary interface. + * 2. Wait at least 1.5 micro seconds (us) before any secondary + * bus accesses after setting the register. + * 3. Warm (PCI-E reset) and Cold reset (PWROK reset, Front Panel + * reset) will clear these register bits requiring reprogramming. + * PXH Secondary Bus Reset will not clear the register. + */ + l = pcicfgrw32(p->tbdf, 0x224, 0, 1); + l |= 0x3FFE0000; + pcicfgrw32(p->tbdf, 0x224, l, 0); + microdelay(2); + +// print("6700pxh %uX.%uX: pmode=%d pfreq=%d\n", p->did, p->rid, (l>>14)&3, (l>>9)&3); +} + +static void +bridgecfg(Pcidev *p) +{ + int l; + + switch (p->vid<<16 | p->did) { + case 0x80860329: + case 0x8086032a: + pxh6700(p); +// case 0x80863500: +// case 0x80863501: +// case 0x80863502: +// case 0x80863503: + /* enable IOxAPIC space */ + l = pcicfgrw16(p->tbdf, 0x40, 0, 1); + l &= ~(1<<13); + pcicfgrw16(p->tbdf, 0x40, l, 0); + } +} + +static int +pcilscan(int bno, Pcidev** list) +{ + Pcidev *p, *head, *tail; + int dno, fno, i, hdt, l, maxfno, maxubn, rno, sbn, tbdf, ubn; + + maxubn = bno; + head = nil; + tail = nil; + for(dno = 0; dno <= pcimaxdno; dno++){ + maxfno = 0; + for(fno = 0; fno <= maxfno; fno++){ + /* + * For this possible device, form the + * bus+device+function triplet needed to address it + * and try to read the vendor and device ID. + * If successful, allocate a device struct and + * start to fill it in with some useful information + * from the device's configuration space. + */ + tbdf = MKBUS(BusPCI, bno, dno, fno); + l = pcicfgrw32(tbdf, PciVID, 0, 1); + if(l == 0xFFFFFFFF || l == 0) + continue; + p = malloc(sizeof(*p)); + p->tbdf = tbdf; + p->vid = l; + p->did = l>>16; + + l = pcicfgrw32(tbdf, PciSVID, 0, 1); + if(l != 0 && l != 0xFFFFFFFF) { + p->svid = l; + p->sdid = l>>16; + } + if(srxusehba && + p->svid == 0x1b52 && p->did == 1){ + /* coraid-modified pci device id: use substitute id */ + p->did = p->sdid; + p->sdid = 1; + } + + if(pcilist != nil) + pcitail->list = p; + else + pcilist = p; + pcitail = p; + + p->pcr = pcicfgr16(p, PciPCR); + p->rid = pcicfgr8(p, PciRID); + p->ccrp = pcicfgr8(p, PciCCRp); + p->ccru = pcicfgr8(p, PciCCRu); + p->ccrb = pcicfgr8(p, PciCCRb); + p->cls = pcicfgr8(p, PciCLS); + p->ltr = pcicfgr8(p, PciLTR); + + p->intl = pcicfgr8(p, PciINTL); + + /* + * If the device is a multi-function device adjust the + * loop count so all possible functions are checked. + */ + hdt = pcicfgr8(p, PciHDT); + if(hdt & 0x80) + maxfno = MaxFNO; + + /* + * If appropriate, read the base address registers + * and work out the sizes. + */ + switch(p->ccrb) { + default: + case 0x01: /* mass storage controller */ + case 0x02: /* network controller */ + case 0x03: /* display controller */ + case 0x04: /* multimedia device */ + case 0x07: /* simple comm. controllers */ + case 0x08: /* base system peripherals */ + case 0x09: /* input devices */ + case 0x0A: /* docking stations */ + case 0x0B: /* processors */ + case 0x0C: /* serial bus controllers */ + if((hdt & 0x7F) != 0) + break; + for(i = 0; i < nelem(p->mem); i++) { + rno = PciBAR0 + 4*i; + p->mem[i].bar = pcicfgr32(p, rno); + p->mem[i].size = pcibarsize(p, rno); + } + break; + + case 0x06: /* bridge device */ + bridgecfg(p); + break; + + case 0x00: + case 0x05: /* memory controller */ + break; + } + + if(head != nil) + tail->link = p; + else + head = p; + tail = p; + } + } + + *list = head; + for(p = head; p != nil; p = p->link){ + /* + * Find PCI-PCI bridges and recursively descend the tree. + */ + if(p->ccrb != 0x06 || p->ccru != 0x04) + continue; + + /* + * If the secondary or subordinate bus number is not + * initialised try to do what the PCI BIOS should have + * done and fill in the numbers as the tree is descended. + * On the way down the subordinate bus number is set to + * the maximum as it's not known how many buses are behind + * this one; the final value is set on the way back up. + */ + sbn = pcicfgr8(p, PciSBN); + ubn = pcicfgr8(p, PciUBN); + + if(sbn == 0 || ubn == 0 || nobios) { + + if(!nobios) + print("%T: unconfigured bridge\n", p->tbdf); + sbn = maxubn+1; + /* + * Make sure memory, I/O and master enables are + * off, set the primary, secondary and subordinate + * bus numbers and clear the secondary status before + * attempting to scan the secondary bus. + * + * Initialisation of the bridge should be done here. + */ + pcicfgw32(p, PciPCR, 0xFFFF0000); + l = (MaxUBN<<16)|(sbn<<8)|bno; + pcicfgw32(p, PciPBN, l); + pcicfgw16(p, PciSPSR, 0xFFFF); + maxubn = pcilscan(sbn, &p->bridge); + l = (maxubn<<16)|(sbn<<8)|bno; + + pcicfgw32(p, PciPBN, l); + } + else { + /* + * You can't go back. + * This shouldn't be possible, but the + * Iwill DK8-HTX seems to have subordinate + * bus numbers which get smaller on the + * way down. Need to look more closely at + * this. + */ + if(ubn > maxubn) + maxubn = ubn; + pcilscan(sbn, &p->bridge); + } + } + + return maxubn; +} + +static uchar +pIIxget(Pcidev *router, uchar link) +{ + uchar pirq; + + /* link should be 0x60, 0x61, 0x62, 0x63 */ + pirq = pcicfgr8(router, link); + return (pirq < 16)? pirq: 0; +} + +static void +pIIxset(Pcidev *router, uchar link, uchar irq) +{ + pcicfgw8(router, link, irq); +} + +static uchar +viaget(Pcidev *router, uchar link) +{ + uchar pirq; + + /* link should be 1, 2, 3, 5 */ + pirq = (link < 6)? pcicfgr8(router, 0x55 + (link>>1)): 0; + + return (link & 1)? (pirq >> 4): (pirq & 15); +} + +static void +viaset(Pcidev *router, uchar link, uchar irq) +{ + uchar pirq; + + pirq = pcicfgr8(router, 0x55 + (link >> 1)); + pirq &= (link & 1)? 0x0f: 0xf0; + pirq |= (link & 1)? (irq << 4): (irq & 15); + pcicfgw8(router, 0x55 + (link>>1), pirq); +} + +typedef struct Bridge Bridge; +struct Bridge +{ + ushort vid; + ushort did; + uchar (*get)(Pcidev *, uchar); + void (*set)(Pcidev *, uchar, uchar); +}; + +static Bridge southbridges[] = { + { 0x8086, 0xffff, pIIxget, pIIxset }, /* Intel * */ + { 0x1002, 0xffff, nil, nil }, /* ati (amd) */ + { 0x1022, 0xffff, nil, nil }, /* amd */ + + { 0x10DE, 0x00D1, nil, nil }, /* NVIDIA nForce 3 */ + { 0x10DE, 0x00E0, nil, nil }, /* NVIDIA nForce 3 250 Series */ + { 0x10DE, 0x00E1, nil, nil }, /* NVIDIA nForce 3 250 Series */ + + { 0x1106, 0x0586, viaget, viaset }, /* Viatech 82C586 */ + { 0x1106, 0x0596, viaget, viaset }, /* Viatech 82C596 */ + { 0x1106, 0x0686, viaget, viaset }, /* Viatech 82C686 */ + { 0x1106, 0x3227, viaget, viaset }, /* Viatech VT8237 */ + + { 0x1166, 0x0200, nil, nil }, /* ServerWorks ServerSet III LE */ +}; + +typedef struct Slot Slot; +struct Slot { + uchar bus; // Pci bus number + uchar dev; // Pci device number + uchar maps[12]; // Avoid structs! Link and mask. + uchar slot; // Add-in/built-in slot + uchar reserved; +}; + +typedef struct Router Router; +struct Router { + uchar signature[4]; // Routing table signature + uchar version[2]; // Version number + uchar size[2]; // Total table size + uchar bus; // Interrupt router bus number + uchar devfn; // Router's devfunc + uchar pciirqs[2]; // Exclusive PCI irqs + uchar compat[4]; // Compatible PCI interrupt router + uchar miniport[4]; // Miniport data + uchar reserved[11]; + uchar checksum; +}; + +static ushort pciirqs; // Exclusive PCI irqs +static Bridge *southbridge; // Which southbridge to use. + +static void +pcirouting(void) +{ + Slot *e; + Router *r; + int size, i, fn, tbdf; + Pcidev *sbpci, *pci; + uchar *p, pin, irq, link, *map; + + /* Search for PCI interrupt routing table in BIOS */ + for(p = (uchar *)KADDR(0xf0000); p < (uchar *)KADDR(0xfffff); p += 16) + if(p[0] == '$' && p[1] == 'P' && p[2] == 'I' && p[3] == 'R') + break; + + if(p >= (uchar *)KADDR(0xfffff)) + return; + + r = (Router *)p; + + print("PCI interrupt routing table version %d.%d at %#p\n", + r->version[0], r->version[1], r); + + tbdf = (BusPCI << 24)|(r->bus << 16)|(r->devfn << 8); + sbpci = pcimatchtbdf(tbdf); + if(sbpci == nil) { + print("pcirouting: Cannot find south bridge %T\n", tbdf); + return; + } + + for(i = 0; i != nelem(southbridges); i++) + if(sbpci->vid == southbridges[i].vid && + (southbridges[i].did == 0xffff || sbpci->did == southbridges[i].did)) + break; + + if(i == nelem(southbridges)) { + print("pcirouting: ignoring south bridge %T %.4ux/%.4ux\n", tbdf, sbpci->vid, sbpci->did); + return; + } + southbridge = &southbridges[i]; + if(southbridge->get == nil || southbridge->set == nil) + return; + + pciirqs = (r->pciirqs[1] << 8)|r->pciirqs[0]; + + size = (r->size[1] << 8)|r->size[0]; + for(e = (Slot *)&r[1]; (uchar *)e < p + size; e++) { + if(0){ + print("%.2ux/%.2ux %.2ux: ", e->bus, e->dev, e->slot); + for (i = 0; i != 4; i++) { + uchar *m = &e->maps[i * 3]; + print("[%d] %.2ux %.4ux ", + i, m[0], (m[2] << 8)|m[1]); + } + print("\n"); + } + + for(fn = 0; fn <= MaxFNO; fn++) { + tbdf = (BusPCI << 24)|(e->bus << 16)|((e->dev | fn) << 8); + pci = pcimatchtbdf(tbdf); + if(pci == nil) + continue; + pin = pcicfgr8(pci, PciINTP); + if(pin == 0 || pin == 0xff) + continue; + + map = &e->maps[(pin - 1) * 3]; + link = map[0]; + irq = southbridge->get(sbpci, link); + if(irq == 0 || irq == pci->intl) + continue; + if(pci->intl != 0 && pci->intl != 0xFF) { + print("pcirouting: BIOS workaround: %T at pin %d link %d irq %d -> %d\n", + tbdf, pin, link, irq, pci->intl); + southbridge->set(sbpci, link, pci->intl); + continue; + } + print("pcirouting: %T at pin %d link %d irq %d\n", tbdf, pin, link, irq); + pcicfgw8(pci, PciINTL, irq); + pci->intl = irq; + } + } +} + +static void pcireservemem(void); + +static void +pcicfginit(void) +{ + char *p; + Pcidev **list; + ulong mema, ioa; + int bno, n; + + if(pcicfgmode != -1) + return; + + lock(&pcicfginitlock); + if(pcicfgmode != -1){ + unlock(&pcicfginitlock); + return; + } + + if(getconf("*srusehba")) + srxusehba = 1; + if(getconf("*nobios")) + nobios = 1; + if(getconf("*nopcirouting")) + nopcirouting = 1; + + /* + * Assume Configuration Mechanism One. Method Two was deprecated + * a long time ago and was only for backwards compaibility with the + * Intel Saturn and Mercury chip sets. + */ + pcicfgmode = 1; + pcimaxdno = MaxDNO; + + fmtinstall('T', tbdffmt); + + if(p = getconf("*pcimaxbno")){ + n = strtoul(p, 0, 0); + if(n < pcimaxbno) + pcimaxbno = n; + } + if(p = getconf("*pcimaxdno")){ + n = strtoul(p, 0, 0); + if(n < pcimaxdno) + pcimaxdno = n; + } + + list = &pciroot; + for(bno = 0; bno <= pcimaxbno; bno++) { + int sbno = bno; + bno = pcilscan(bno, list); + + while(*list != nil) + list = &(*list)->link; + + if (sbno == 0) { + Pcidev *pci; + + /* + * If we have found a PCI-to-Cardbus bridge, make sure + * it has no valid mappings anymore. + */ + pci = pciroot; + while (pci) { + if (pci->ccrb == 6 && pci->ccru == 7) { + ushort bcr; + + /* reset the cardbus */ + bcr = pcicfgr16(pci, PciBCR); + pcicfgw16(pci, PciBCR, 0x40 | bcr); + delay(50); + } + pci = pci->link; + } + } + } + + if(pciroot == nil) + goto out; + + if(nobios) { + /* + * Work out how big the top bus is + */ + mema = 0; + ioa = 0; + pcibusmap(pciroot, &mema, &ioa, 0); + + DBG("Sizes: mem=%8.8lux size=%8.8lux io=%8.8lux\n", + mema, pcimask(mema), ioa); + + /* + * Align the windows and map it + */ + ioa = 0xD000; + mema = 0xFEA00000; + + pcilog("Mask sizes: mem=%lux io=%lux\n", mema, ioa); + + pcibusmap(pciroot, &mema, &ioa, 1); + DBG("Sizes2: mem=%lux io=%lux\n", mema, ioa); + + pcireservemem(); + unlock(&pcicfginitlock); + return; + } + + if(!nopcirouting) + pcirouting(); + +out: + pcireservemem(); + unlock(&pcicfginitlock); + + if(getconf("*pcihinv")) + pcihinv(nil); +} + +static void +pcireservemem(void) +{ + int i; + Pcidev *p; + + /* + * mark all the physical address space claimed by pci devices + * as in use, so that it's not given out elsewhere. + * beware the type and size of 'bar'. + */ + for(p=pciroot; p != nil; p=p->list) + for(i=0; imem); i++) + if(p->mem[i].bar && (p->mem[i].bar&1) == 0) + memreserve(p->mem[i].bar&~(uintmem)0x0F, p->mem[i].size); +} + +static int +pcicfgrw8(int tbdf, int rno, int data, int read) +{ + int o, type, x; + + if(pcicfgmode == -1) + pcicfginit(); + + if(BUSBNO(tbdf)) + type = 0x01; + else + type = 0x00; + x = -1; + if(BUSDNO(tbdf) > pcimaxdno) + return x; + + ilock(&pcicfglock); + o = rno & 0x03; + rno &= ~0x03; + outl(PciADDR, 0x80000000|BUSBDF(tbdf)|rno|type); + if(read) + x = inb(PciDATA+o); + else + outb(PciDATA+o, data); + outl(PciADDR, 0); + iunlock(&pcicfglock); + + return x; +} + +int +pcicfgr8(Pcidev* pcidev, int rno) +{ + return pcicfgrw8(pcidev->tbdf, rno, 0, 1); +} + +void +pcicfgw8(Pcidev* pcidev, int rno, int data) +{ + pcicfgrw8(pcidev->tbdf, rno, data, 0); +} + +static int +pcicfgrw16(int tbdf, int rno, int data, int read) +{ + int o, type, x; + + if(pcicfgmode == -1) + pcicfginit(); + + if(BUSBNO(tbdf)) + type = 0x01; + else + type = 0x00; + x = -1; + if(BUSDNO(tbdf) > pcimaxdno) + return x; + + ilock(&pcicfglock); + o = rno & 0x02; + rno &= ~0x03; + outl(PciADDR, 0x80000000|BUSBDF(tbdf)|rno|type); + if(read) + x = ins(PciDATA+o); + else + outs(PciDATA+o, data); + outl(PciADDR, 0); + iunlock(&pcicfglock); + + return x; +} + +int +pcicfgr16(Pcidev* pcidev, int rno) +{ + return pcicfgrw16(pcidev->tbdf, rno, 0, 1); +} + +void +pcicfgw16(Pcidev* pcidev, int rno, int data) +{ + pcicfgrw16(pcidev->tbdf, rno, data, 0); +} + +static int +pcicfgrw32(int tbdf, int rno, int data, int read) +{ + int type, x; + + if(pcicfgmode == -1) + pcicfginit(); + + if(BUSBNO(tbdf)) + type = 0x01; + else + type = 0x00; + x = -1; + if(BUSDNO(tbdf) > pcimaxdno) + return x; + + ilock(&pcicfglock); + rno &= ~0x03; + outl(PciADDR, 0x80000000|BUSBDF(tbdf)|rno|type); + if(read) + x = inl(PciDATA); + else + outl(PciDATA, data); + outl(PciADDR, 0); + iunlock(&pcicfglock); + + return x; +} + +uint +pcicfgr32(Pcidev* pcidev, int rno) +{ + return pcicfgrw32(pcidev->tbdf, rno, 0, 1); +} + +void +pcicfgw32(Pcidev* pcidev, int rno, int data) +{ + pcicfgrw32(pcidev->tbdf, rno, data, 0); +} + +Pcidev* +pcimatch(Pcidev* prev, int vid, int did) +{ + if(pcicfgmode == -1) + pcicfginit(); + + if(prev == nil) + prev = pcilist; + else + prev = prev->list; + + while(prev != nil){ + if((vid == 0 || prev->vid == vid) + && (did == 0 || prev->did == did)) + break; + prev = prev->list; + } + return prev; +} + +Pcidev* +pcimatchtbdf(int tbdf) +{ + Pcidev *p; + + if(pcicfgmode == -1) + pcicfginit(); + + for(p = pcilist; p != nil; p = p->list) { + if(p->tbdf == tbdf) + break; + } + return p; +} + +uchar +pciipin(Pcidev *pci, uchar pin) +{ + uchar intl; + + if (pci == nil) + pci = pcilist; + + for(; pci != nil; pci = pci->list){ + if(pcicfgr8(pci, PciINTP) == pin && pci->intl != 0 && pci->intl != 0xff) + return pci->intl; + + if(pci->bridge && (intl = pciipin(pci->bridge, pin)) != 0) + return intl; + } + return 0; +} + +static void +pcilhinv(Pcidev* p) +{ + int i; + Pcidev *t; + + if(p == nil) { + putstrn(PCICONS.output, PCICONS.ptr); + p = pciroot; + print("tbdf: type vid did intl memory\n"); + } + for(t = p; t != nil; t = t->link) { + print("%T: %.2ux %.2ux %.2ux %.4ux/%.4ux %3d ", + t->tbdf, t->ccrb, t->ccru, t->ccrp, t->vid, t->did, t->intl); + + for(i = 0; i < nelem(p->mem); i++) { + if(t->mem[i].size == 0) + continue; + print("%d:%.8lux %d ", i, + t->mem[i].bar, t->mem[i].size); + } + if(t->ioa.bar || t->ioa.size) + print("ioa:%.8lux %d ", t->ioa.bar, t->ioa.size); + if(t->mema.bar || t->mema.size) + print("mema:%.8lux %d ", t->mema.bar, t->mema.size); + if(t->bridge != nil) + print("->%d", BUSBNO(t->bridge->tbdf)); + print("\n"); + } + while(p != nil) { + if(p->bridge != nil) + pcilhinv(p->bridge); + p = p->link; + } +} + +void +pcihinv(Pcidev* p) +{ + if(pcicfgmode == -1) + pcicfginit(); + lock(&pcicfginitlock); + pcilhinv(p); + unlock(&pcicfginitlock); +} + +void +pcireset(void) +{ + Pcidev *p; + + if(pcicfgmode == -1) + pcicfginit(); + + for(p = pcilist; p != nil; p = p->list) { + /* don't mess with the bridges */ + if(p->ccrb != 0x06) + pciclrbme(p); + } +} + +void +pcisetioe(Pcidev* p) +{ + p->pcr |= IOen; + pcicfgw16(p, PciPCR, p->pcr); +} + +void +pciclrioe(Pcidev* p) +{ + p->pcr &= ~IOen; + pcicfgw16(p, PciPCR, p->pcr); +} + +void +pcisetbme(Pcidev* p) +{ + p->pcr |= MASen; + pcicfgw16(p, PciPCR, p->pcr); +} + +void +pciclrbme(Pcidev* p) +{ + p->pcr &= ~MASen; + pcicfgw16(p, PciPCR, p->pcr); +} + +void +pcisetmwi(Pcidev* p) +{ + p->pcr |= MemWrInv; + pcicfgw16(p, PciPCR, p->pcr); +} + +void +pciclrmwi(Pcidev* p) +{ + p->pcr &= ~MemWrInv; + pcicfgw16(p, PciPCR, p->pcr); +} + +int +pcicap(Pcidev *p, int cap) +{ + int i, c, off; + + /* status register bit 4 has capabilities */ + if((pcicfgr16(p, PciPSR) & 1<<4) == 0) + return -1; + switch(pcicfgr8(p, PciHDT) & 0x7f){ + default: + return -1; + case 0: /* etc */ + case 1: /* pci to pci bridge */ + off = 0x34; + break; + case 2: /* cardbus bridge */ + off = 0x14; + break; + } + for(i = 48; i--;){ + off = pcicfgr8(p, off); + if(off < 0x40 || (off & 3)) + break; + off &= ~3; + c = pcicfgr8(p, off); + if(c == 0xff) + break; + if(c == cap) + return off; + off++; + } + return -1; +} + +static int +pcigetpmrb(Pcidev* p) +{ + if(p->pmrb != 0) + return p->pmrb; + return p->pmrb = pcicap(p, PciCapPMG); +} + +enum { + Pmgcap = 2, /* capabilities; 2 bytes*/ + Pmgctl = 4, /* ctl/status; 2 bytes */ + Pmgbrg = 6, /* bridge support */ + Pmgdata = 7, +}; + +int +pcigetpms(Pcidev* p) +{ + int ptr; + + if((ptr = pcigetpmrb(p)) == -1) + return -1; + return pcicfgr16(p, ptr+Pmgctl) & 0x0003; +} + +int +pcisetpms(Pcidev* p, int state) +{ + int pmc, pmcsr, ptr; + + if((ptr = pcigetpmrb(p)) == -1) + return -1; + + pmc = pcicfgr16(p, ptr+Pmgcap); + pmcsr = pcicfgr16(p, ptr+Pmgctl); + + switch(state){ + default: + return -1; + case 0: + break; + case 1: + if(!(pmc & 0x0200)) + return -1; + break; + case 2: + if(!(pmc & 0x0400)) + return -1; + break; + case 3: + break; + } + pcicfgw16(p, ptr+4, (pmcsr & ~3) | state); + return pmcsr & 3; +} + +void* +pcixcfgaddr(Pcidev *p, int rno) +{ + ulong tbdf; + uintmem pa; + + tbdf = p->tbdf; + if(p->xcfg == nil){ + pa = pcixcfgspace(BUSBNO(tbdf)); + if(pa == 0) + return nil; + pa += MKPCIX(BUSBNO(tbdf), BUSDNO(tbdf), BUSFNO(tbdf), 0); + p->xcfg = vmap(pa, 4096); + if(p->xcfg == nil) + return nil; + } + return (uchar*)p->xcfg + rno; +} diff -Nru /sys/src/9k/386/pmc.h /sys/src/9k/386/pmc.h --- /sys/src/9k/386/pmc.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/pmc.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,59 @@ +typedef struct PmcCtl PmcCtl; +typedef struct PmcCtr PmcCtr; +typedef struct PmcCtlCtrId PmcCtlCtrId; + +/* + * HW performance counters + */ +struct PmcCtl { + u32int coreno; + int enab; + int user; + int os; + int nodesc; + char descstr[KNAMELEN]; +}; + +struct PmcCtr{ + int stale; + Rendez r; + u64int ctr; + int ctrset; + PmcCtl; + int ctlset; +}; + +enum { + PmcMaxCtrs = 4, +}; + +struct PmcCore{ + Lock; + PmcCtr ctr[PmcMaxCtrs]; +}; + +struct PmcCtlCtrId { + char portdesc[KNAMELEN]; + char archdesc[KNAMELEN]; +}; + +enum { + PmcIgn = 0, + PmcGet = 1, + PmcSet = 2, +}; + +enum { + PmcCtlNullval = 0xdead, +}; + +extern int pmcnregs(void); +extern void pmcinitctl(PmcCtl*); +extern int pmcsetctl(u32int, PmcCtl*, u32int); +extern int pmctrans(PmcCtl*); +extern int pmcgetctl(u32int, PmcCtl*, u32int); +extern int pmcdescstr(char*, int); +extern u64int pmcgetctr(u32int, u32int); +extern int pmcsetctr(u32int, u64int, u32int); + +extern void pmcconfigure(void); diff -Nru /sys/src/9k/386/pmcio.c /sys/src/9k/386/pmcio.c --- /sys/src/9k/386/pmcio.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/pmcio.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,586 @@ +/* + * Performance counters non portable part + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "amd64.h" +#include "pmc.h" + +typedef struct PmcCfg PmcCfg; +typedef struct PmcCore PmcCore; + +enum { + PeUnk, + PeAmd, + /* + * See Vol 3B Intel + * 64 Architecture's Software Developer's manual + */ + PeIntel, +}; + +enum { + _PeUnk, + /* Non architectural */ + PeIntelSandy, + PeIntelNehalem, + PeIntelWestmere, + /* + * See BKDG for AMD cfg.family 10 Processors + * section 2.16 and 3.14 + */ + PeK10, + +}; + +enum { + PeNregAmd = 4, /* Number of Pe/Pct regs for K10 */ +}; + +enum { /* MSRs */ + PerfCtrbaseIntel= 0x000000c1, /* Performance Counters */ + PerfEvtbaseIntel= 0x00000186, /* Performance Event Select */ + PerfGlobalCtr = 0x0000038f, /* Performance Event Global Ctrl, intel */ + + PerfEvtbaseAmd = 0xc0010000, /* Performance Event Select */ + PerfCtrbaseAmd = 0xc0010004, /* Performance Counters */ +}; + +enum { /* HW Performance Counters Event Selector */ + + PeHo = 0x0000020000000000ull,/* Host only */ + PeGo = 0x0000010000000000ull,/* Guest only */ + PeEvMskH = 0x0000000f00000000ull,/* Event mask H */ + PeCtMsk = 0x00000000ff000000ull,/* Counter mask */ + PeInMsk = 0x0000000000800000ull,/* Invert mask */ + PeCtEna = 0x0000000000400000ull,/* Counter enable */ + PeInEna = 0x0000000000100000ull,/* Interrupt enable */ + PePnCtl = 0x0000000000080000ull,/* Pin control */ + PeEdg = 0x0000000000040000ull,/* Edge detect */ + PeOS = 0x0000000000020000ull,/* OS mode */ + PeUsr = 0x0000000000010000ull,/* User mode */ + PeUnMsk = 0x000000000000ff00ull,/* Unit Mask */ + PeEvMskL = 0x00000000000000ffull,/* Event Mask L */ + + PeEvMsksh = 32ull, /* Event mask shift */ +}; + +struct PmcCfg { + int nregs; + u32int ctrbase; + u32int evtbase; + int vendor; + int family; + PmcCtlCtrId *pmcidsarch; + PmcCtlCtrId *pmcids; +}; + +extern int pmcdebug; + +static PmcCfg cfg; +static PmcCore pmccore[MACHMAX]; + +static void pmcmachupdate(void); + +int +pmcnregs(void) +{ + u32int info[4]; + int nregs; + + if(cfg.nregs != 0) + return cfg.nregs; /* don't call cpuid more than necessary */ + switch(cfg.vendor){ + case PeAmd: + nregs = PeNregAmd; + break; + case PeIntel: + cpuid(0xa, 0, info); + nregs = (info[0]>>8)&0xff; + break; + default: + nregs = 0; + } + if(nregs > PmcMaxCtrs) + nregs = PmcMaxCtrs; + return nregs; +} + +static u64int +pmcmsk(void) +{ + u32int info[4]; + u64int msk; + + msk = 0; + switch(cfg.vendor){ + case PeAmd: + msk = ~0ULL; + break; + case PeIntel: + cpuid(0xa, 0, info); + msk = (1<<((info[0]>>16)&0xff)) - 1; + break; + } + return msk; +} + +PmcCtlCtrId pmcidsk10[] = { + {"locked instr", "0x024 0x1"}, + {"locked cycles nonspecul", "0x024 0x4"}, /* in cycles */ + {"SMI intr", "0x02b 0x0"}, + {"DC access", "0x040 0x0"}, + {"DC miss", "0x041 0x0"}, + {"DC refills", "0x042 0x1f"}, + {"DC evicted", "0x042 0x3f"}, + {"L1 DTLB miss", "0x045 0x7"}, /* DTLB L2 hits */ + {"L2 DTLB miss", "0x046 0x7"}, + {"L1 DTLB hit", "0x04d 0x3"}, + {"global TLB flush", "0x054 0x0"}, + {"L2 hit", "0x07d 0x3f"}, + {"L2 miss", "0x07e 0xf"}, + {"IC miss", "0x081 0x0"}, + {"IC refill from L2", "0x082 0x0"}, + {"IC refill from system", "0x083 0x0"}, + {"L1 ITLB miss", "0x084 0x0"}, /* L2 ITLB hits */ + {"L2 ITLB miss", "0x085 0x3"}, + {"DRAM access", "0x0e0 0x3f"}, + //{"L3 miss core 0", "0x4e1 0x13"}, + //{"L3 miss core 1", "0x4e1 0x23"}, + //{"L3 miss core 2", "0x4e1 0x43"}, + //{"L3 miss core 3", "0x4e1 0x83"}, + {"L3 miss", "0x4e1 0xf3"}, /* all cores in the socket */ + {"", ""}, +}; + +/*18.2.3 Intel Software Deveveloper's Manual */ +PmcCtlCtrId pmcidsintel[] = { + {"unhalted cycles", "0x3c 0x0"}, + {"instr", "0xc0 0x0"}, + {"Llast misses", "0x2e 0x41"}, + {"branch instr", "0xc4 0x0"}, + {"branch misses", "0xc5 0x0 "}, + {"", ""}, +}; + +/* Table 19.7 Intel Software Deveveloper's Manual */ +PmcCtlCtrId pmcidsandy[] = { + {"DTLB walk cycles", "0x49 0x4"}, /* all levels */ + {"DTLB miss", "0x8 0x2"}, + {"DTLB hit", "0x8 0x4"}, + {"L2 hit", "0x24 0x4"}, + {"L2 miss", "0x24 0x8"}, + {"IL2 hit", "0x24 0x10"}, + {"IL2 miss", "0x24 0x20"}, + {"ITLB miss", "0x85 0x2"}, + {"ITLB walk cycles", "0x85 0x4"}, + {"ITLB flush", "0xae 0x1"}, + {"mem loads", "0xd0 0xf1"}, /* counts μops */ + {"mem stores", "0xd0 0xf2"}, + {"mem ops", "0xd0 0xf3"}, + {"", ""}, +}; + +#define X86MODEL(x) ((((x)>>4) & 0x0F) | (((x)>>16) & 0x0F)<<4) +#define X86FAMILY(x) ((((x)>>8) & 0x0F) | (((x)>>20) & 0xFF)<<4) + +static int +pmcintelfamily(void) +{ + u32int info, fam, mod; + + info = m->cpuinfo[1][0]; + + fam = X86FAMILY(info); + mod = X86MODEL(info); + if(fam != 0x6) + return PeUnk; + switch(mod){ + case 0x2a: + return PeIntelSandy; + case 0x1a: + case 0x1e: + case 0x1f: + return PeIntelNehalem; + case 0x25: + case 0x2c: + return PeIntelWestmere; + } + return PeUnk; +} + +void +pmcinitctl(PmcCtl *p) +{ + memset(p, 0xff, sizeof(PmcCtl)); + p->enab = PmcCtlNullval; + p->user = PmcCtlNullval; + p->os = PmcCtlNullval; + p->nodesc = 1; +} + +void +pmcconfigure(void) +{ + Mach *mach; + int i, j, isrecog; + + isrecog = 0; + + if(memcmp(&m->cpuinfo[0][1], "AuthcAMDenti", 12) == 0){ + isrecog++; + cfg.ctrbase = PerfCtrbaseAmd; + cfg.evtbase = PerfEvtbaseAmd; + cfg.vendor = PeAmd; + cfg.family = PeUnk; + cfg.pmcidsarch = pmcidsk10; + }else if(memcmp(&m->cpuinfo[0][1], "GenuntelineI", 12) == 0){ + isrecog++; + cfg.ctrbase = PerfCtrbaseIntel; + cfg.evtbase = PerfEvtbaseIntel; + cfg.vendor = PeIntel; + cfg.family = pmcintelfamily(); + cfg.pmcidsarch = pmcidsintel; + switch(cfg.family){ + case PeIntelSandy: + cfg.pmcids = pmcidsandy; + break; + case PeIntelNehalem: + case PeIntelWestmere: + break; + } + }else + cfg.vendor = PeUnk; + + cfg.nregs = pmcnregs(); + if(isrecog) + pmcupdate = pmcmachupdate; + + for(i = 0; i < MACHMAX; i++) { + if((mach = sys->machptr[i]) != nil && mach->online != 0){ + for(j = 0; j < cfg.nregs; j++) + pmcinitctl(&pmccore[i].ctr[j]); + } + } +} + +static void +pmcenab(void) +{ + switch(cfg.vendor){ + case PeAmd: + return; + case PeIntel: + wrmsr(PerfGlobalCtr, pmcmsk()); + break; + } +} + +/* so they can be read from user space */ +static int +pmcuserenab(int enable) +{ + u64int cr4; + + cr4 = cr4get(); + if (enable){ + cr4 |= Pce; + } else + cr4 &= ~Pce; + cr4put(cr4); + return cr4&Pce; +} + +int +pmctrans(PmcCtl *p) +{ + PmcCtlCtrId *pi; + int n; + + n = 0; + if(cfg.pmcidsarch != nil) + for (pi = &cfg.pmcidsarch[0]; pi->portdesc[0] != '\0'; pi++){ + if (strncmp(p->descstr, pi->portdesc, strlen(pi->portdesc)) == 0){ + strncpy(p->descstr, pi->archdesc, strlen(pi->archdesc) + 1); + n = 1; + break; + } + } + /* this ones supersede the other ones */ + if(cfg.pmcids != nil) + for (pi = &cfg.pmcids[0]; pi->portdesc[0] != '\0'; pi++){ + if (strncmp(p->descstr, pi->portdesc, strlen(pi->portdesc)) == 0){ + strncpy(p->descstr, pi->archdesc, strlen(pi->archdesc) + 1); + n = 1; + break; + } + } + if(pmcdebug != 0) + print("really setting %s\n", p->descstr); + return n; +} + +//PeHo|PeGo +#define PeAll (PeOS|PeUsr) +#define SetEvMsk(v, e) ((v)|(((e)&PeEvMskL)|(((e)<<(PeEvMsksh-8))&PeEvMskH))) +#define SetUMsk(v, u) ((v)|(((u)<<8ull)&PeUnMsk)) + +#define GetEvMsk(e) (((e)&PeEvMskL)|(((e)&PeEvMskH)>>(PeEvMsksh-8))) +#define GetUMsk(u) (((u)&PeUnMsk)>>8ull) + +static int +getctl(PmcCtl *p, u32int regno) +{ + u64int r, e, u; + + r = rdmsr(regno + cfg.evtbase); + p->enab = (r&PeCtEna) != 0; + p->user = (r&PeUsr) != 0; + p->os = (r&PeOS) != 0; + e = GetEvMsk(r); + u = GetUMsk(r); + /* TODO inverse translation */ + snprint(p->descstr, KNAMELEN, "%#ullx %#ullx", e, u); + p->nodesc = 0; + return 0; +} + +static int +pmcanyenab(void) +{ + int i; + PmcCtl p; + + for (i = 0; i < cfg.nregs; i++) { + if (getctl(&p, i) < 0) + return -1; + if (p.enab) + return 1; + } + + return 0; +} + + +static int +setctl(PmcCtl *p, int regno) +{ + u64int v, e, u; + char *toks[2]; + char str[KNAMELEN]; + + v = rdmsr(regno + cfg.evtbase); + v &= PeEvMskH|PeEvMskL|PeCtEna|PeOS|PeUsr|PeUnMsk; + if (p->enab != PmcCtlNullval) + if (p->enab) + v |= PeCtEna; + else + v &= ~PeCtEna; + + if (p->user != PmcCtlNullval) + if (p->user) + v |= PeUsr; + else + v &= ~PeUsr; + + if (p->os != PmcCtlNullval) + if (p->os) + v |= PeOS; + else + v &= ~PeOS; + + if (pmctrans(p) < 0) + return -1; + + if (p->nodesc == 0) { + memmove(str, p->descstr, KNAMELEN); + if (tokenize(str, toks, 2) != 2) + return -1; + e = atoi(toks[0]); + u = atoi(toks[1]); + v &= ~(PeEvMskL|PeEvMskH|PeUnMsk); + v |= SetEvMsk(v, e); + v |= SetUMsk(v, u); + } + wrmsr(regno+ cfg.evtbase, v); + pmcuserenab(pmcanyenab()); + if (pmcdebug) { + v = rdmsr(regno+ cfg.evtbase); + print("conf pmc[%#ux]: %#llux\n", regno, v); + } + return 0; +} + +int +pmcdescstr(char *str, int nstr) +{ + PmcCtlCtrId *pi; + int ns; + + ns = 0; + + if(pmcdebug != 0) + print("vendor %x family %x nregs %d pmcnregs %d\n", cfg.vendor, cfg.family, cfg.nregs, pmcnregs()); + if(cfg.pmcidsarch == nil && cfg.pmcids == nil){ + *str = 0; + return ns; + } + + if(cfg.pmcidsarch != nil) + for (pi = &cfg.pmcidsarch[0]; pi->portdesc[0] != '\0'; pi++) + ns += snprint(str + ns, nstr - ns, "%s\n",pi->portdesc); + if(cfg.pmcids != nil) + for (pi = &cfg.pmcids[0]; pi->portdesc[0] != '\0'; pi++) + ns += snprint(str + ns, nstr - ns, "%s\n",pi->portdesc); + return ns; +} + +static u64int +getctr(u32int regno) +{ + return rdmsr(regno + cfg.ctrbase); +} + +static int +setctr(u64int v, u32int regno) +{ + wrmsr(regno + cfg.ctrbase, v); + return 0; +} + +u64int +pmcgetctr(u32int coreno, u32int regno) +{ + PmcCtr *p; + u64int ctr; + + if (regno >= cfg.nregs) + error("invalid reg"); + p = &pmccore[coreno].ctr[regno]; + + ilock(&pmccore[coreno]); + if(coreno == m->machno) + ctr = getctr(regno); + else + ctr = p->ctr; + iunlock(&pmccore[coreno]); + + return ctr; +} + +int +pmcsetctr(u32int coreno, u64int v, u32int regno) +{ + PmcCtr *p; + int n; + + if (regno >= cfg.nregs) + error("invalid reg"); + p = &pmccore[coreno].ctr[regno]; + + ilock(&pmccore[coreno]); + if(coreno == m->machno) + n = setctr(v, regno); + else{ + p->ctr = v; + p->ctrset |= PmcSet; + p->stale = 1; + n = 0; + } + iunlock(&pmccore[coreno]); + + return n; +} + +static void +ctl2ctl(PmcCtl *dctl, PmcCtl *sctl) +{ + if(sctl->enab != PmcCtlNullval) + dctl->enab = sctl->enab; + if(sctl->user != PmcCtlNullval) + dctl->user = sctl->user; + if(sctl->os != PmcCtlNullval) + dctl->os = sctl->os; + if(sctl->nodesc == 0) { + memmove(dctl->descstr, sctl->descstr, KNAMELEN); + dctl->nodesc = 0; + } +} + +int +pmcsetctl(u32int coreno, PmcCtl *pctl, u32int regno) +{ + PmcCtr *p; + int n; + + if (regno >= cfg.nregs) + error("invalid reg"); + p = &pmccore[coreno].ctr[regno]; + + ilock(&pmccore[coreno]); + if(coreno == m->machno) + n = setctl(pctl, regno); + else{ + ctl2ctl(&p->PmcCtl, pctl); + p->ctlset |= PmcSet; + p->stale = 1; + n = 0; + } + iunlock(&pmccore[coreno]); + + return n; +} + +int +pmcgetctl(u32int coreno, PmcCtl *pctl, u32int regno) +{ + PmcCtr *p; + int n; + + if (regno >= cfg.nregs) + error("invalid reg"); + p = &pmccore[coreno].ctr[regno]; + + ilock(&pmccore[coreno]); + if(coreno == m->machno) + n = getctl(pctl, regno); + else{ + memmove(pctl, &p->PmcCtl, sizeof(PmcCtl)); + n = 0; + } + iunlock(&pmccore[coreno]); + + return n; +} + +static void +pmcmachupdate(void) +{ + PmcCtr *p; + int coreno, i, maxct; + + if((maxct = cfg.nregs) <= 0) + return; + coreno = m->machno; + + ilock(&pmccore[coreno]); + for (i = 0; i < maxct; i++) { + p = &pmccore[coreno].ctr[i]; + if(p->ctrset & PmcSet) + setctr(p->ctr, i); + if(p->ctlset & PmcSet) + setctl(p, i); + p->ctr = getctr(i); + getctl(p, i); + p->ctrset = PmcIgn; + p->ctlset = PmcIgn; + p->stale = 0; + } + iunlock(&pmccore[coreno]); +} diff -Nru /sys/src/9k/386/sdata.c /sys/src/9k/386/sdata.c --- /sys/src/9k/386/sdata.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/sdata.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,2312 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" +#include "../port/error.h" + +#include "../port/sd.h" + +extern SDifc sdataifc; + +enum { + DbgCONFIG = 0x0001, /* detected drive config info */ + DbgIDENTIFY = 0x0002, /* detected drive identify info */ + DbgSTATE = 0x0004, /* dump state on panic */ + DbgPROBE = 0x0008, /* trace device probing */ + DbgDEBUG = 0x0080, /* the current problem... */ + DbgINL = 0x0100, /* That Inil20+ message we hate */ + Dbg48BIT = 0x0200, /* 48-bit LBA */ + DbgBsy = 0x0400, /* interrupt but Bsy (shared IRQ) */ +}; +#define DEBUG (DbgDEBUG|DbgSTATE) + +enum { /* I/O ports */ + Data = 0, + Error = 1, /* (read) */ + Features = 1, /* (write) */ + Count = 2, /* sector count<7-0>, sector count<15-8> */ + Ir = 2, /* interrupt reason (PACKET) */ + Sector = 3, /* sector number */ + Lbalo = 3, /* LBA<7-0>, LBA<31-24> */ + Cyllo = 4, /* cylinder low */ + Bytelo = 4, /* byte count low (PACKET) */ + Lbamid = 4, /* LBA<15-8>, LBA<39-32> */ + Cylhi = 5, /* cylinder high */ + Bytehi = 5, /* byte count hi (PACKET) */ + Lbahi = 5, /* LBA<23-16>, LBA<47-40> */ + Dh = 6, /* Device/Head, LBA<27-24> */ + Status = 7, /* (read) */ + Command = 7, /* (write) */ + + As = 2, /* Alternate Status (read) */ + Dc = 2, /* Device Control (write) */ +}; + +enum { /* Error */ + Med = 0x01, /* Media error */ + Ili = 0x01, /* command set specific (PACKET) */ + Nm = 0x02, /* No Media */ + Eom = 0x02, /* command set specific (PACKET) */ + Abrt = 0x04, /* Aborted command */ + Mcr = 0x08, /* Media Change Request */ + Idnf = 0x10, /* no user-accessible address */ + Mc = 0x20, /* Media Change */ + Unc = 0x40, /* Uncorrectable data error */ + Wp = 0x40, /* Write Protect */ + Icrc = 0x80, /* Interface CRC error */ +}; + +enum { /* Features */ + Dma = 0x01, /* data transfer via DMA (PACKET) */ + Ovl = 0x02, /* command overlapped (PACKET) */ +}; + +enum { /* Interrupt Reason */ + Cd = 0x01, /* Command/Data */ + Io = 0x02, /* I/O direction: read */ + Rel = 0x04, /* Bus Release */ +}; + +enum { /* Device/Head */ + Dev0 = 0xA0, /* Master */ + Dev1 = 0xB0, /* Slave */ + Lba = 0x40, /* LBA mode */ +}; + +enum { /* Status, Alternate Status */ + Err = 0x01, /* Error */ + Chk = 0x01, /* Check error (PACKET) */ + Drq = 0x08, /* Data Request */ + Dsc = 0x10, /* Device Seek Complete */ + Serv = 0x10, /* Service */ + Df = 0x20, /* Device Fault */ + Dmrd = 0x20, /* DMA ready (PACKET) */ + Drdy = 0x40, /* Device Ready */ + Bsy = 0x80, /* Busy */ +}; + +enum { /* Command */ + Cnop = 0x00, /* NOP */ + Cdr = 0x08, /* Device Reset */ + Crs = 0x20, /* Read Sectors */ + Crs48 = 0x24, /* Read Sectors Ext */ + Crd48 = 0x25, /* Read w/ DMA Ext */ + Crdq48 = 0x26, /* Read w/ DMA Queued Ext */ + Crsm48 = 0x29, /* Read Multiple Ext */ + Cws = 0x30, /* Write Sectors */ + Cws48 = 0x34, /* Write Sectors Ext */ + Cwd48 = 0x35, /* Write w/ DMA Ext */ + Cwdq48 = 0x36, /* Write w/ DMA Queued Ext */ + Cwsm48 = 0x39, /* Write Multiple Ext */ + Cedd = 0x90, /* Execute Device Diagnostics */ + Cpkt = 0xA0, /* Packet */ + Cidpkt = 0xA1, /* Identify Packet Device */ + Crsm = 0xC4, /* Read Multiple */ + Cwsm = 0xC5, /* Write Multiple */ + Csm = 0xC6, /* Set Multiple */ + Crdq = 0xC7, /* Read DMA queued */ + Crd = 0xC8, /* Read DMA */ + Cwd = 0xCA, /* Write DMA */ + Cwdq = 0xCC, /* Write DMA queued */ + Cstandby = 0xE2, /* Standby */ + Cid = 0xEC, /* Identify Device */ + Csf = 0xEF, /* Set Features */ +}; + +enum { /* Device Control */ + Nien = 0x02, /* (not) Interrupt Enable */ + Srst = 0x04, /* Software Reset */ + Hob = 0x80, /* High Order Bit [sic] */ +}; + +enum { /* PCI Configuration Registers */ + Bmiba = 0x20, /* Bus Master Interface Base Address */ + Idetim = 0x40, /* IE Timing */ + Sidetim = 0x44, /* Slave IE Timing */ + Udmactl = 0x48, /* Ultra DMA/33 Control */ + Udmatim = 0x4A, /* Ultra DMA/33 Timing */ +}; + +enum { /* Bus Master IDE I/O Ports */ + Bmicx = 0, /* Command */ + Bmisx = 2, /* Status */ + Bmidtpx = 4, /* Descriptor Table Pointer */ +}; + +enum { /* Bmicx */ + Ssbm = 0x01, /* Start/Stop Bus Master */ + Rwcon = 0x08, /* Read/Write Control */ +}; + +enum { /* Bmisx */ + Bmidea = 0x01, /* Bus Master IDE Active */ + Idedmae = 0x02, /* IDE DMA Error (R/WC) */ + Ideints = 0x04, /* IDE Interrupt Status (R/WC) */ + Dma0cap = 0x20, /* Drive 0 DMA Capable */ + Dma1cap = 0x40, /* Drive 0 DMA Capable */ +}; +enum { /* Physical Region Descriptor */ + PrdEOT = 0x80000000, /* End of Transfer */ +}; + +enum { /* offsets into the identify info. */ + Iconfig = 0, /* general configuration */ + Ilcyl = 1, /* logical cylinders */ + Ilhead = 3, /* logical heads */ + Ilsec = 6, /* logical sectors per logical track */ + Iserial = 10, /* serial number */ + Ifirmware = 23, /* firmware revision */ + Imodel = 27, /* model number */ + Imaxrwm = 47, /* max. read/write multiple sectors */ + Icapabilities = 49, /* capabilities */ + Istandby = 50, /* device specific standby timer */ + Ipiomode = 51, /* PIO data transfer mode number */ + Ivalid = 53, + Iccyl = 54, /* cylinders if (valid&0x01) */ + Ichead = 55, /* heads if (valid&0x01) */ + Icsec = 56, /* sectors if (valid&0x01) */ + Iccap = 57, /* capacity if (valid&0x01) */ + Irwm = 59, /* read/write multiple */ + Ilba = 60, /* LBA size */ + Imwdma = 63, /* multiword DMA mode */ + Iapiomode = 64, /* advanced PIO modes supported */ + Iminmwdma = 65, /* min. multiword DMA cycle time */ + Irecmwdma = 66, /* rec. multiword DMA cycle time */ + Iminpio = 67, /* min. PIO cycle w/o flow control */ + Iminiordy = 68, /* min. PIO cycle with IORDY */ + Ipcktbr = 71, /* time from PACKET to bus release */ + Iserbsy = 72, /* time from SERVICE to !Bsy */ + Iqdepth = 75, /* max. queue depth */ + Imajor = 80, /* major version number */ + Iminor = 81, /* minor version number */ + Icsfs = 82, /* command set/feature supported */ + Icsfe = 85, /* command set/feature enabled */ + Iudma = 88, /* ultra DMA mode */ + Ierase = 89, /* time for security erase */ + Ieerase = 90, /* time for enhanced security erase */ + Ipower = 91, /* current advanced power management */ + Ilba48 = 100, /* 48-bit LBA size (64 bits in 100-103) */ + Irmsn = 127, /* removable status notification */ + Isecstat = 128, /* security status */ + Icfapwr = 160, /* CFA power mode */ + Imediaserial = 176, /* current media serial number */ + Icksum = 255, /* checksum */ +}; + +enum { /* bit masks for config identify info */ + Mpktsz = 0x0003, /* packet command size */ + Mincomplete = 0x0004, /* incomplete information */ + Mdrq = 0x0060, /* DRQ type */ + Mrmdev = 0x0080, /* device is removable */ + Mtype = 0x1F00, /* device type */ + Mproto = 0x8000, /* command protocol */ +}; + +enum { /* bit masks for capabilities identify info */ + Mdma = 0x0100, /* DMA supported */ + Mlba = 0x0200, /* LBA supported */ + Mnoiordy = 0x0400, /* IORDY may be disabled */ + Miordy = 0x0800, /* IORDY supported */ + Msoftrst = 0x1000, /* needs soft reset when Bsy */ + Mstdby = 0x2000, /* standby supported */ + Mqueueing = 0x4000, /* queueing overlap supported */ + Midma = 0x8000, /* interleaved DMA supported */ +}; + +enum { /* bit masks for supported/enabled features */ + Msmart = 0x0001, + Msecurity = 0x0002, + Mrmmedia = 0x0004, + Mpwrmgmt = 0x0008, + Mpkt = 0x0010, + Mwcache = 0x0020, + Mlookahead = 0x0040, + Mrelirq = 0x0080, + Msvcirq = 0x0100, + Mreset = 0x0200, + Mprotected = 0x0400, + Mwbuf = 0x1000, + Mrbuf = 0x2000, + Mnop = 0x4000, + Mmicrocode = 0x0001, + Mqueued = 0x0002, + Mcfa = 0x0004, + Mapm = 0x0008, + Mnotify = 0x0010, + Mstandby = 0x0020, + Mspinup = 0x0040, + Mmaxsec = 0x0100, + Mautoacoustic = 0x0200, + Maddr48 = 0x0400, + Mdevconfov = 0x0800, + Mflush = 0x1000, + Mflush48 = 0x2000, + Msmarterror = 0x0001, + Msmartselftest = 0x0002, + Mmserial = 0x0004, + Mmpassthru = 0x0008, + Mlogging = 0x0020, +}; + +typedef struct Ctlr Ctlr; +typedef struct Drive Drive; + +typedef struct Prd { /* Physical Region Descriptor */ + ulong pa; /* Physical Base Address */ + int count; +} Prd; + +enum { + BMspan = 64*1024, /* must be power of 2 <= 64*1024 */ + + Nprd = SDmaxio/BMspan+2, +}; + +typedef struct Ctlr { + int cmdport; + int ctlport; + int irq; + int tbdf; + int bmiba; /* bus master interface base address */ + int maxio; /* sector count transfer maximum */ + int span; /* don't span this boundary with dma */ + void* vector; + + Pcidev* pcidev; + void (*ienable)(Ctlr*); + void (*idisable)(Ctlr*); + SDev* sdev; + + Drive* drive[2]; + + Prd* prdt; /* physical region descriptor table */ + void (*irqack)(Ctlr*); /* call to extinguish ICH intrs */ + + QLock; /* current command */ + Drive* curdrive; + int command; /* last command issued (debugging) */ + Rendez; + int done; + + /* interrupt counts */ + ulong intnil; /* no drive */ + ulong intbusy; /* controller still busy */ + ulong intok; /* normal */ + + Lock; /* register access */ +} Ctlr; + +typedef struct Drive { + Ctlr* ctlr; + + int dev; + ushort info[256]; + int c; /* cylinder */ + int h; /* head */ + int s; /* sector */ + vlong sectors; /* total */ + int secsize; /* sector size */ + + int dma; /* DMA R/W possible */ + int dmactl; + int rwm; /* read/write multiple possible */ + int rwmctl; + + int pkt; /* PACKET device, length of pktcmd */ + uchar pktcmd[16]; + int pktdma; /* this PACKET command using dma */ + + uchar sense[18]; + uchar inquiry[48]; + + QLock; /* drive access */ + int command; /* current command */ + int write; + uchar* data; + int dlen; + uchar* limit; + int count; /* sectors */ + int block; /* R/W bytes per block */ + int status; + int error; + int flags; /* internal flags */ + + /* interrupt counts */ + ulong intcmd; /* commands */ + ulong intrd; /* reads */ + ulong intwr; /* writes */ +} Drive; + +enum { /* internal flags */ + Lba48 = 0x1, /* LBA48 mode */ + Lba48always = 0x2, /* ... */ +}; +enum { + Last28 = (1<<28) - 1 - 1, /* all-ones mask is not addressible */ +}; + +static void +pc87415ienable(Ctlr* ctlr) +{ + Pcidev *p; + int x; + + p = ctlr->pcidev; + if(p == nil) + return; + + x = pcicfgr32(p, 0x40); + if(ctlr->cmdport == p->mem[0].bar) + x &= ~0x00000100; + else + x &= ~0x00000200; + pcicfgw32(p, 0x40, x); +} + +static void +atadumpstate(Drive* drive, uchar* cmd, vlong lba, int count) +{ + Prd *prd; + Pcidev *p; + Ctlr *ctlr; + int i, bmiba; + + if(!(DEBUG & DbgSTATE)){ + USED(drive, cmd, lba, count); + return; + } + + ctlr = drive->ctlr; + print("sdata: command %2.2uX\n", ctlr->command); + print("data %8.8p limit %8.8p dlen %d status %uX error %uX\n", + drive->data, drive->limit, drive->dlen, + drive->status, drive->error); + if(cmd != nil){ + print("lba %d -> %lld, count %d -> %d (%d)\n", + (cmd[2]<<24)|(cmd[3]<<16)|(cmd[4]<<8)|cmd[5], lba, + (cmd[7]<<8)|cmd[8], count, drive->count); + } + if(!(inb(ctlr->ctlport+As) & Bsy)){ + for(i = 1; i < 7; i++) + print(" 0x%2.2uX", inb(ctlr->cmdport+i)); + print(" 0x%2.2uX\n", inb(ctlr->ctlport+As)); + } + if(drive->command == Cwd || drive->command == Crd){ + bmiba = ctlr->bmiba; + prd = ctlr->prdt; + print("bmicx %2.2uX bmisx %2.2uX prdt %8.8p\n", + inb(bmiba+Bmicx), inb(bmiba+Bmisx), prd); + for(;;){ + print("pa 0x%8.8luX count %8.8uX\n", + prd->pa, prd->count); + if(prd->count & PrdEOT) + break; + prd++; + } + } + if(ctlr->pcidev && ctlr->pcidev->vid == 0x8086){ + p = ctlr->pcidev; + print("0x40: %4.4uX 0x42: %4.4uX", + pcicfgr16(p, 0x40), pcicfgr16(p, 0x42)); + print("0x48: %2.2uX\n", pcicfgr8(p, 0x48)); + print("0x4A: %4.4uX\n", pcicfgr16(p, 0x4A)); + } +} + +static int +atadebug(int cmdport, int ctlport, char* fmt, ...) +{ + int i, n; + va_list arg; + char buf[PRINTSIZE]; + + if(!(DEBUG & DbgPROBE)){ + USED(cmdport, ctlport, fmt); + return 0; + } + + va_start(arg, fmt); + n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf; + va_end(arg); + + if(cmdport){ + if(buf[n-1] == '\n') + n--; + n += snprint(buf+n, PRINTSIZE-n, " ataregs 0x%uX:", + cmdport); + for(i = Features; i < Command; i++) + n += snprint(buf+n, PRINTSIZE-n, " 0x%2.2uX", + inb(cmdport+i)); + if(ctlport) + n += snprint(buf+n, PRINTSIZE-n, " 0x%2.2uX", + inb(ctlport+As)); + n += snprint(buf+n, PRINTSIZE-n, "\n"); + } + putstrn(buf, n); + + return n; +} + +static int +ataready(int cmdport, int ctlport, int dev, int reset, int ready, int micro) +{ + int as; + + atadebug(cmdport, ctlport, "ataready: dev %uX reset %uX ready %uX", + dev, reset, ready); + + for(;;){ + /* + * Wait for the controller to become not busy and + * possibly for a status bit to become true (usually + * Drdy). Must change to the appropriate device + * register set if necessary before testing for ready. + * Always run through the loop at least once so it + * can be used as a test for !Bsy. + */ + as = inb(ctlport+As); + if(as & reset){ + /* nothing to do */ + } + else if(dev){ + outb(cmdport+Dh, dev); + dev = 0; + } + else if(ready == 0 || (as & ready)){ + atadebug(0, 0, "ataready: %d 0x%2.2uX\n", micro, as); + return as; + } + + if(micro-- <= 0){ + atadebug(0, 0, "ataready: %d 0x%2.2uX\n", micro, as); + break; + } + microdelay(1); + } + atadebug(cmdport, ctlport, "ataready: timeout"); + + return -1; +} + +/* +static int +atacsf(Drive* drive, vlong csf, int supported) +{ + ushort *info; + int cmdset, i, x; + + if(supported) + info = &drive->info[Icsfs]; + else + info = &drive->info[Icsfe]; + + for(i = 0; i < 3; i++){ + x = (csf>>(16*i)) & 0xFFFF; + if(x == 0) + continue; + cmdset = info[i]; + if(cmdset == 0 || cmdset == 0xFFFF) + return 0; + return cmdset & x; + } + + return 0; +} +*/ + +static int +atadone(void* arg) +{ + return ((Ctlr*)arg)->done; +} + +static int +atarwmmode(Drive* drive, int cmdport, int ctlport, int dev) +{ + int as, maxrwm, rwm; + + maxrwm = (drive->info[Imaxrwm] & 0xFF); + if(maxrwm == 0) + return 0; + + /* + * Sometimes drives come up with the current count set + * to 0; if so, set a suitable value, otherwise believe + * the value in Irwm if the 0x100 bit is set. + */ + if(drive->info[Irwm] & 0x100) + rwm = (drive->info[Irwm] & 0xFF); + else + rwm = 0; + if(rwm == 0) + rwm = maxrwm; + if(rwm > 16) + rwm = 16; + if(ataready(cmdport, ctlport, dev, Bsy|Drq, Drdy, 102*1000) < 0) + return 0; + outb(cmdport+Count, rwm); + outb(cmdport+Command, Csm); + microdelay(1); + as = ataready(cmdport, ctlport, 0, Bsy, Drdy|Df|Err, 1000); + inb(cmdport+Status); + if(as < 0 || (as & (Df|Err))) + return 0; + + drive->rwm = rwm; + + return rwm; +} + +static int +atadmamode(Drive* drive) +{ + int dma; + + /* + * Check if any DMA mode enabled. + * Assumes the BIOS has picked and enabled the best. + * This is completely passive at the moment, no attempt is + * made to ensure the hardware is correctly set up. + */ + dma = drive->info[Imwdma] & 0x0707; + drive->dma = (dma>>8) & dma; + if(drive->dma == 0 && (drive->info[Ivalid] & 0x04)){ + dma = drive->info[Iudma] & 0x7F7F; + drive->dma = (dma>>8) & dma; + if(drive->dma) + drive->dma |= 'U'<<16; + } + + return dma; +} + +static int +ataidentify(int cmdport, int ctlport, int dev, int pkt, void* info) +{ + int as, command, drdy; + + if(pkt){ + command = Cidpkt; + drdy = 0; + } + else{ + command = Cid; + drdy = Drdy; + } + as = ataready(cmdport, ctlport, dev, Bsy|Drq, drdy, 103*1000); + if(as < 0) + return as; + outb(cmdport+Command, command); + microdelay(1); + + as = ataready(cmdport, ctlport, 0, Bsy, Drq|Err, 400*1000); + if(as < 0) + return -1; + if(as & Err) + return as; + + memset(info, 0, 512); + inss(cmdport+Data, info, 256); + inb(cmdport+Status); + + if(DEBUG & DbgIDENTIFY){ + int i; + ushort *sp; + + sp = (ushort*)info; + for(i = 0; i < 256; i++){ + if(i && (i%16) == 0) + print("\n"); + print(" %4.4uX", *sp); + sp++; + } + print("\n"); + } + + return 0; +} + +static Drive* +atadrive(int cmdport, int ctlport, int dev) +{ + Drive *drive; + int as, i, pkt; + uchar buf[512], *p; + ushort iconfig, *sp; + + atadebug(0, 0, "identify: port 0x%uX dev 0x%2.2uX\n", cmdport, dev); + pkt = 1; +retry: + as = ataidentify(cmdport, ctlport, dev, pkt, buf); + if(as < 0) + return nil; + if(as & Err){ + if(pkt == 0) + return nil; + pkt = 0; + goto retry; + } + + if((drive = malloc(sizeof(Drive))) == nil) + return nil; + drive->dev = dev; + memmove(drive->info, buf, sizeof(drive->info)); + drive->sense[0] = 0x70; + drive->sense[7] = sizeof(drive->sense)-7; + + drive->inquiry[2] = 2; + drive->inquiry[3] = 2; + drive->inquiry[4] = sizeof(drive->inquiry)-4; + p = &drive->inquiry[8]; + sp = &drive->info[Imodel]; + for(i = 0; i < 20; i++){ + *p++ = *sp>>8; + *p++ = *sp++; + } + + drive->secsize = 512; + + /* + * Beware the CompactFlash Association feature set. + * Now, why this value in Iconfig just walks all over the bit + * definitions used in the other parts of the ATA/ATAPI standards + * is a mystery and a sign of true stupidity on someone's part. + * Anyway, the standard says if this value is 0x848A then it's + * CompactFlash and it's NOT a packet device. + */ + iconfig = drive->info[Iconfig]; + if(iconfig != 0x848A && (iconfig & 0xC000) == 0x8000){ + if(iconfig & 0x01) + drive->pkt = 16; + else + drive->pkt = 12; + } + else{ + if(drive->info[Ivalid] & 0x0001){ + drive->c = drive->info[Iccyl]; + drive->h = drive->info[Ichead]; + drive->s = drive->info[Icsec]; + } + else{ + drive->c = drive->info[Ilcyl]; + drive->h = drive->info[Ilhead]; + drive->s = drive->info[Ilsec]; + } + if(drive->info[Icapabilities] & Mlba){ + if(drive->info[Icsfs+1] & Maddr48){ + drive->sectors = drive->info[Ilba48] + | (drive->info[Ilba48+1]<<16) + | ((vlong)drive->info[Ilba48+2]<<32); + drive->flags |= Lba48; + } + else{ + drive->sectors = (drive->info[Ilba+1]<<16) + |drive->info[Ilba]; + } + drive->dev |= Lba; + } + else + drive->sectors = drive->c*drive->h*drive->s; + atarwmmode(drive, cmdport, ctlport, dev); + } + atadmamode(drive); + + if(DEBUG & DbgCONFIG){ + print("dev %2.2uX port %uX config %4.4uX capabilities %4.4uX", + dev, cmdport, iconfig, drive->info[Icapabilities]); + print(" mwdma %4.4uX", drive->info[Imwdma]); + if(drive->info[Ivalid] & 0x04) + print(" udma %4.4uX", drive->info[Iudma]); + print(" dma %8.8uX rwm %ud", drive->dma, drive->rwm); + if(drive->flags&Lba48) + print("\tLLBA sectors %lld", drive->sectors); + print("\n"); + } + + return drive; +} + +static void +atasrst(int ctlport) +{ + /* + * Srst is a big stick and may cause problems if further + * commands are tried before the drives become ready again. + * Also, there will be problems here if overlapped commands + * are ever supported. + */ + microdelay(5); + outb(ctlport+Dc, Srst); + microdelay(5); + outb(ctlport+Dc, 0); + microdelay(2*1000); +} + +static SDev* +ataprobe(int cmdport, int ctlport, int irq) +{ + Ctlr* ctlr; + SDev *sdev; + Drive *drive; + int dev, error, rhi, rlo; + static int nonlegacy = 'C'; + + if(cmdport == 0) { + print("ataprobe: cmdport is 0\n"); + return nil; + } + if(ioalloc(cmdport, 8, 0, "atacmd") < 0) { + print("ataprobe: Cannot allocate %X\n", cmdport); + return nil; + } + if(ioalloc(ctlport+As, 1, 0, "atactl") < 0){ + print("ataprobe: Cannot allocate %X\n", ctlport + As); + iofree(cmdport); + return nil; + } + + /* + * Try to detect a floating bus. + * Bsy should be cleared. If not, see if the cylinder registers + * are read/write capable. + * If the master fails, try the slave to catch slave-only + * configurations. + * There's no need to restore the tested registers as they will + * be reset on any detected drives by the Cedd command. + * All this indicates is that there is at least one drive on the + * controller; when the non-existent drive is selected in a + * single-drive configuration the registers of the existing drive + * are often seen, only command execution fails. + */ + dev = Dev0; + if(inb(ctlport+As) & Bsy){ + outb(cmdport+Dh, dev); + microdelay(1); +trydev1: + atadebug(cmdport, ctlport, "ataprobe bsy"); + outb(cmdport+Cyllo, 0xAA); + outb(cmdport+Cylhi, 0x55); + outb(cmdport+Sector, 0xFF); + rlo = inb(cmdport+Cyllo); + rhi = inb(cmdport+Cylhi); + if(rlo != 0xAA && (rlo == 0xFF || rhi != 0x55)){ + if(dev == Dev1){ +release: + iofree(cmdport); + iofree(ctlport+As); + return nil; + } + dev = Dev1; + if(ataready(cmdport, ctlport, dev, Bsy, 0, 20*1000) < 0) + goto trydev1; + } + } + + /* + * Disable interrupts on any detected controllers. + */ + outb(ctlport+Dc, Nien); +tryedd1: + if(ataready(cmdport, ctlport, dev, Bsy|Drq, 0, 105*1000) < 0){ + /* + * There's something there, but it didn't come up clean, + * so try hitting it with a big stick. The timing here is + * wrong but this is a last-ditch effort and it sometimes + * gets some marginal hardware back online. + */ + atasrst(ctlport); + if(ataready(cmdport, ctlport, dev, Bsy|Drq, 0, 106*1000) < 0) + goto release; + } + + /* + * Can only get here if controller is not busy. + * If there are drives Bsy will be set within 400nS, + * must wait 2mS before testing Status. + * Wait for the command to complete (6 seconds max). + */ + outb(cmdport+Command, Cedd); + delay(2); + if(ataready(cmdport, ctlport, dev, Bsy|Drq, 0, 6*1000*1000) < 0) + goto release; + + /* + * If bit 0 of the error register is set then the selected drive + * exists. This is enough to detect single-drive configurations. + * However, if the master exists there is no way short of executing + * a command to determine if a slave is present. + * It appears possible to get here testing Dev0 although it doesn't + * exist and the EDD won't take, so try again with Dev1. + */ + error = inb(cmdport+Error); + atadebug(cmdport, ctlport, "ataprobe: dev %uX", dev); + if((error & ~0x80) != 0x01){ + if(dev == Dev1) + goto release; + dev = Dev1; + goto tryedd1; + } + + /* + * At least one drive is known to exist, try to + * identify it. If that fails, don't bother checking + * any further. + * If the one drive found is Dev0 and the EDD command + * didn't indicate Dev1 doesn't exist, check for it. + */ + if((drive = atadrive(cmdport, ctlport, dev)) == nil) + goto release; + if((ctlr = malloc(sizeof(Ctlr))) == nil){ + free(drive); + goto release; + } + memset(ctlr, 0, sizeof(Ctlr)); + if((sdev = malloc(sizeof(SDev))) == nil){ + free(ctlr); + free(drive); + goto release; + } + memset(sdev, 0, sizeof(SDev)); + drive->ctlr = ctlr; + if(dev == Dev0){ + ctlr->drive[0] = drive; + if(!(error & 0x80)){ + /* + * Always leave Dh pointing to a valid drive, + * otherwise a subsequent call to ataready on + * this controller may try to test a bogus Status. + * Ataprobe is the only place possibly invalid + * drives should be selected. + */ + drive = atadrive(cmdport, ctlport, Dev1); + if(drive != nil){ + drive->ctlr = ctlr; + ctlr->drive[1] = drive; + } + else{ + outb(cmdport+Dh, Dev0); + microdelay(1); + } + } + } + else + ctlr->drive[1] = drive; + + ctlr->cmdport = cmdport; + ctlr->ctlport = ctlport; + ctlr->irq = irq; + ctlr->tbdf = BUSUNKNOWN; + ctlr->command = Cedd; /* debugging */ + + switch(cmdport){ + default: + sdev->idno = nonlegacy; + break; + case 0x1F0: + sdev->idno = 'C'; + nonlegacy = 'E'; + break; + case 0x170: + sdev->idno = 'D'; + nonlegacy = 'E'; + break; + } + sdev->ifc = &sdataifc; + sdev->ctlr = ctlr; + sdev->nunit = 2; + ctlr->sdev = sdev; + + return sdev; +} + +static void +ataclear(SDev *sdev) +{ + Ctlr* ctlr; + + ctlr = sdev->ctlr; + iofree(ctlr->cmdport); + iofree(ctlr->ctlport + As); + + if (ctlr->drive[0]) + free(ctlr->drive[0]); + if (ctlr->drive[1]) + free(ctlr->drive[1]); + if (sdev->name) + free(sdev->name); + if (sdev->unitflg) + free(sdev->unitflg); + if (sdev->unit) + free(sdev->unit); + free(ctlr); + free(sdev); +} + +static char * +atastat(SDev *sdev, char *p, char *e) +{ + Ctlr *ctlr = sdev->ctlr; + + return seprint(p, e, "%s ata port %X ctl %X irq %d " + "intr-ok %lud intr-busy %lud intr-nil-drive %lud\n", + sdev->name, ctlr->cmdport, ctlr->ctlport, ctlr->irq, + ctlr->intok, ctlr->intbusy, ctlr->intnil); +} + +/* + * These are duplicated with sdsetsense, etc., in devsd.c, but + * those assume that the disk is not SCSI while in fact here + * ata drives are not SCSI but ATAPI ones kind of are. + */ +static int +atasetsense(Drive* drive, int status, int key, int asc, int ascq) +{ + drive->sense[2] = key; + drive->sense[12] = asc; + drive->sense[13] = ascq; + + return status; +} + +static int +atamodesense(Drive* drive, uchar* cmd) +{ + int len; + + /* + * Fake a vendor-specific request with page code 0, + * return the drive info. + */ + if((cmd[2] & 0x3F) != 0 && (cmd[2] & 0x3F) != 0x3F) + return atasetsense(drive, SDcheck, 0x05, 0x24, 0); + len = (cmd[7]<<8)|cmd[8]; + if(len == 0) + return SDok; + if(len < 8+sizeof(drive->info)) + return atasetsense(drive, SDcheck, 0x05, 0x1A, 0); + if(drive->data == nil || drive->dlen < len) + return atasetsense(drive, SDcheck, 0x05, 0x20, 1); + memset(drive->data, 0, 8); + drive->data[0] = sizeof(drive->info)>>8; + drive->data[1] = sizeof(drive->info); + memmove(drive->data+8, drive->info, sizeof(drive->info)); + drive->data += 8+sizeof(drive->info); + + return SDok; +} + +static int +atastandby(Drive* drive, int period) +{ + Ctlr* ctlr; + int cmdport, done; + + ctlr = drive->ctlr; + drive->command = Cstandby; + qlock(ctlr); + + cmdport = ctlr->cmdport; + ilock(ctlr); + outb(cmdport+Count, period); + outb(cmdport+Dh, drive->dev); + ctlr->done = 0; + ctlr->curdrive = drive; + ctlr->command = Cstandby; /* debugging */ + outb(cmdport+Command, Cstandby); + iunlock(ctlr); + + while(waserror()) + ; + tsleep(ctlr, atadone, ctlr, 60*1000); + poperror(); + + done = ctlr->done; + qunlock(ctlr); + + if(!done || (drive->status & Err)) + return atasetsense(drive, SDcheck, 4, 8, drive->error); + return SDok; +} + +static void +atanop(Drive* drive, int subcommand) +{ + Ctlr* ctlr; + int as, cmdport, ctlport, timeo; + + /* + * Attempt to abort a command by using NOP. + * In response, the drive is supposed to set Abrt + * in the Error register, set (Drdy|Err) in Status + * and clear Bsy when done. However, some drives + * (e.g. ATAPI Zip) just go Bsy then clear Status + * when done, hence the timeout loop only on Bsy + * and the forced setting of drive->error. + */ + ctlr = drive->ctlr; + cmdport = ctlr->cmdport; + outb(cmdport+Features, subcommand); + outb(cmdport+Dh, drive->dev); + ctlr->command = Cnop; /* debugging */ + outb(cmdport+Command, Cnop); + + microdelay(1); + ctlport = ctlr->ctlport; + for(timeo = 0; timeo < 1000; timeo++){ + as = inb(ctlport+As); + if(!(as & Bsy)) + break; + microdelay(1); + } + drive->error |= Abrt; +} + +static void +ataabort(Drive* drive, int dolock) +{ + /* + * If NOP is available (packet commands) use it otherwise + * must try a software reset. + */ + if(dolock) + ilock(drive->ctlr); + if(drive->info[Icsfs] & Mnop) + atanop(drive, 0); + else{ + atasrst(drive->ctlr->ctlport); + drive->error |= Abrt; + } + if(dolock) + iunlock(drive->ctlr); +} + +static int +atadmasetup(Drive* drive, int len) +{ + Prd *prd; + ulong pa; + Ctlr *ctlr; + int bmiba, bmisx, count, i, span; + + ctlr = drive->ctlr; + pa = PCIWADDR32(drive->data); + if(pa & 0x03) + return -1; + + /* + * Sometimes drives identify themselves as being DMA capable + * although they are not on a busmastering controller. + */ + prd = ctlr->prdt; + if(prd == nil){ + drive->dmactl = 0; + print("disabling dma: not on a busmastering controller\n"); + return -1; + } + + for(i = 0; len && i < Nprd; i++){ + prd->pa = pa; + span = ROUNDUP(pa, ctlr->span); + if(span == pa) + span += ctlr->span; + count = span - pa; + if(count >= len){ + prd->count = PrdEOT|len; + break; + } + prd->count = count; + len -= count; + pa += count; + prd++; + } + if(i == Nprd) + (prd-1)->count |= PrdEOT; + + bmiba = ctlr->bmiba; + outl(bmiba+Bmidtpx, PCIWADDR32(ctlr->prdt)); + if(drive->write) + outb(ctlr->bmiba+Bmicx, 0); + else + outb(ctlr->bmiba+Bmicx, Rwcon); + bmisx = inb(bmiba+Bmisx); + outb(bmiba+Bmisx, bmisx|Ideints|Idedmae); + + return 0; +} + +static void +atadmastart(Ctlr* ctlr, int write) +{ + if(write) + outb(ctlr->bmiba+Bmicx, Ssbm); + else + outb(ctlr->bmiba+Bmicx, Rwcon|Ssbm); +} + +static int +atadmastop(Ctlr* ctlr) +{ + int bmiba; + + bmiba = ctlr->bmiba; + outb(bmiba+Bmicx, inb(bmiba+Bmicx) & ~Ssbm); + + return inb(bmiba+Bmisx); +} + +static void +atadmainterrupt(Drive* drive, int count) +{ + Ctlr* ctlr; + int bmiba, bmisx; + + ctlr = drive->ctlr; + bmiba = ctlr->bmiba; + bmisx = inb(bmiba+Bmisx); + switch(bmisx & (Ideints|Idedmae|Bmidea)){ + case Bmidea: + /* + * Data transfer still in progress, nothing to do + * (this should never happen). + */ + return; + + case Ideints: + case Ideints|Bmidea: + /* + * Normal termination, tidy up. + */ + drive->data += count; + break; + + default: + /* + * What's left are error conditions (memory transfer + * problem) and the device is not done but the PRD is + * exhausted. For both cases must somehow tell the + * drive to abort. + */ + ataabort(drive, 0); + break; + } + atadmastop(ctlr); + ctlr->done = 1; +} + +static void +atapktinterrupt(Drive* drive) +{ + Ctlr* ctlr; + int cmdport, len, sts; + + ctlr = drive->ctlr; + cmdport = ctlr->cmdport; + sts = inb(cmdport+Ir) & (/*Rel|*/ Io|Cd); + /* a default case is impossible since all cases are enumerated */ + switch(sts){ + case Cd: /* write cmd */ + outss(cmdport+Data, drive->pktcmd, drive->pkt/2); + break; + + case 0: /* write data */ + len = (inb(cmdport+Bytehi)<<8)|inb(cmdport+Bytelo); + if(drive->data+len > drive->limit){ + atanop(drive, 0); + break; + } + outss(cmdport+Data, drive->data, len/2); + drive->data += len; + break; + + case Io: /* read data */ + len = (inb(cmdport+Bytehi)<<8)|inb(cmdport+Bytelo); + if(drive->data+len > drive->limit){ + atanop(drive, 0); + break; + } + inss(cmdport+Data, drive->data, len/2); + drive->data += len; + break; + + case Io|Cd: /* read cmd */ + if(drive->pktdma) + atadmainterrupt(drive, drive->dlen); + else + ctlr->done = 1; + break; + } + if(sts & Cd) + drive->intcmd++; + if(sts & Io) + drive->intrd++; + else + drive->intwr++; +} + +static int +atapktio(Drive* drive, uchar* cmd, int clen) +{ + Ctlr *ctlr; + int as, cmdport, ctlport, len, r, timeo; + + if(cmd[0] == 0x5A && (cmd[2] & 0x3F) == 0) + return atamodesense(drive, cmd); + + r = SDok; + + drive->command = Cpkt; + memmove(drive->pktcmd, cmd, clen); + memset(drive->pktcmd+clen, 0, drive->pkt-clen); + drive->limit = drive->data+drive->dlen; + + ctlr = drive->ctlr; + cmdport = ctlr->cmdport; + ctlport = ctlr->ctlport; + + qlock(ctlr); + + as = ataready(cmdport, ctlport, drive->dev, Bsy|Drq, Drdy, 107*1000); + /* used to test as&Chk as failure too, but some CD readers use that for media change */ + if(as < 0){ + qunlock(ctlr); + return -1; + } + + ilock(ctlr); + if(drive->dlen && drive->dmactl && !atadmasetup(drive, drive->dlen)) + drive->pktdma = Dma; + else + drive->pktdma = 0; + + outb(cmdport+Features, drive->pktdma); + outb(cmdport+Count, 0); + outb(cmdport+Sector, 0); + len = 16*drive->secsize; + outb(cmdport+Bytelo, len); + outb(cmdport+Bytehi, len>>8); + outb(cmdport+Dh, drive->dev); + ctlr->done = 0; + ctlr->curdrive = drive; + ctlr->command = Cpkt; /* debugging */ + if(drive->pktdma) + atadmastart(ctlr, drive->write); + outb(cmdport+Command, Cpkt); + + if((drive->info[Iconfig] & Mdrq) != 0x0020){ + microdelay(1); + as = ataready(cmdport, ctlport, 0, Bsy, Drq|Chk, 4*1000); + if(as < 0 || (as & (Bsy|Chk))){ + drive->status = as<0 ? 0 : as; + ctlr->curdrive = nil; + ctlr->done = 1; + r = SDtimeout; + }else + atapktinterrupt(drive); + } + iunlock(ctlr); + + while(waserror()) + ; + if(!drive->pktdma) + sleep(ctlr, atadone, ctlr); + else for(timeo = 0; !ctlr->done; timeo++){ + tsleep(ctlr, atadone, ctlr, 1000); + if(ctlr->done) + break; + ilock(ctlr); + atadmainterrupt(drive, 0); + if(!drive->error && timeo > 20){ + ataabort(drive, 0); + atadmastop(ctlr); + drive->dmactl = 0; + drive->error |= Abrt; + } + if(drive->error){ + drive->status |= Chk; + ctlr->curdrive = nil; + } + iunlock(ctlr); + } + poperror(); + + qunlock(ctlr); + + if(drive->status & Chk) + r = SDcheck; + + return r; +} + +static uchar cmd48[256] = { + [Crs] Crs48, + [Crd] Crd48, + [Crdq] Crdq48, + [Crsm] Crsm48, + [Cws] Cws48, + [Cwd] Cwd48, + [Cwdq] Cwdq48, + [Cwsm] Cwsm48, +}; + +static int +atageniostart(Drive* drive, uvlong lba) +{ + Ctlr *ctlr; + uchar cmd; + int as, c, cmdport, ctlport, h, len, s, use48; + + use48 = 0; + if((drive->flags&Lba48always) || lba > Last28 || drive->count > 256){ + if(!(drive->flags & Lba48)) + return -1; + use48 = 1; + c = h = s = 0; + } + else if(drive->dev & Lba){ + c = (lba>>8) & 0xFFFF; + h = (lba>>24) & 0x0F; + s = lba & 0xFF; + } + else{ + c = lba/(drive->s*drive->h); + h = ((lba/drive->s) % drive->h); + s = (lba % drive->s) + 1; + } + + ctlr = drive->ctlr; + cmdport = ctlr->cmdport; + ctlport = ctlr->ctlport; + if(ataready(cmdport, ctlport, drive->dev, Bsy|Drq, Drdy, 101*1000) < 0) + return -1; + + ilock(ctlr); + if(drive->dmactl && !atadmasetup(drive, drive->count*drive->secsize)){ + if(drive->write) + drive->command = Cwd; + else + drive->command = Crd; + } + else if(drive->rwmctl){ + drive->block = drive->rwm*drive->secsize; + if(drive->write) + drive->command = Cwsm; + else + drive->command = Crsm; + } + else{ + drive->block = drive->secsize; + if(drive->write) + drive->command = Cws; + else + drive->command = Crs; + } + drive->limit = drive->data + drive->count*drive->secsize; + cmd = drive->command; + if(use48){ + outb(cmdport+Count, drive->count>>8); + outb(cmdport+Count, drive->count); + outb(cmdport+Lbalo, lba>>24); + outb(cmdport+Lbalo, lba); + outb(cmdport+Lbamid, lba>>32); + outb(cmdport+Lbamid, lba>>8); + outb(cmdport+Lbahi, lba>>40); + outb(cmdport+Lbahi, lba>>16); + outb(cmdport+Dh, drive->dev|Lba); + cmd = cmd48[cmd]; + + if(DEBUG & Dbg48BIT) + print("using 48-bit commands\n"); + } + else{ + outb(cmdport+Count, drive->count); + outb(cmdport+Sector, s); + outb(cmdport+Cyllo, c); + outb(cmdport+Cylhi, c>>8); + outb(cmdport+Dh, drive->dev|h); + } + ctlr->done = 0; + ctlr->curdrive = drive; + ctlr->command = drive->command; /* debugging */ + outb(cmdport+Command, cmd); + + switch(drive->command){ + case Cws: + case Cwsm: + microdelay(1); + /* 10*1000 for flash ide drives - maybe detect them? */ + as = ataready(cmdport, ctlport, 0, Bsy, Drq|Err, 10*1000); + if(as < 0 || (as & Err)){ + iunlock(ctlr); + return -1; + } + len = drive->block; + if(drive->data+len > drive->limit) + len = drive->limit-drive->data; + outss(cmdport+Data, drive->data, len/2); + break; + + case Crd: + case Cwd: + atadmastart(ctlr, drive->write); + break; + } + iunlock(ctlr); + + return 0; +} + +static int +atagenioretry(Drive* drive) +{ + if(drive->dmactl){ + drive->dmactl = 0; + print("atagenioretry: disabling dma\n"); + } + else if(drive->rwmctl) + drive->rwmctl = 0; + else + return atasetsense(drive, SDcheck, 4, 8, drive->error); + + return SDretry; +} + +static int +atagenio(Drive* drive, uchar* cmd, int clen) +{ + uchar *p; + Ctlr *ctlr; + vlong lba, len; + int count, maxio; + + /* + * Map SCSI commands into ATA commands for discs. + * Fail any command with a LUN except INQUIRY which + * will return 'logical unit not supported'. + */ + if((cmd[1]>>5) && cmd[0] != 0x12) + return atasetsense(drive, SDcheck, 0x05, 0x25, 0); + + switch(cmd[0]){ + default: + return atasetsense(drive, SDcheck, 0x05, 0x20, 0); + + case 0x00: /* test unit ready */ + return SDok; + + case 0x03: /* request sense */ + if(cmd[4] < sizeof(drive->sense)) + len = cmd[4]; + else + len = sizeof(drive->sense); + if(drive->data && drive->dlen >= len){ + memmove(drive->data, drive->sense, len); + drive->data += len; + } + return SDok; + + case 0x12: /* inquiry */ + if(cmd[4] < sizeof(drive->inquiry)) + len = cmd[4]; + else + len = sizeof(drive->inquiry); + if(drive->data && drive->dlen >= len){ + memmove(drive->data, drive->inquiry, len); + drive->data += len; + } + return SDok; + + case 0x1B: /* start/stop unit */ + /* + * NOP for now, can use the power management feature + * set later. + */ + return SDok; + + case 0x25: /* read capacity */ + if((cmd[1] & 0x01) || cmd[2] || cmd[3]) + return atasetsense(drive, SDcheck, 0x05, 0x24, 0); + if(drive->data == nil || drive->dlen < 8) + return atasetsense(drive, SDcheck, 0x05, 0x20, 1); + /* + * Read capacity returns the LBA of the last sector. + */ + len = drive->sectors-1; + p = drive->data; + *p++ = len>>24; + *p++ = len>>16; + *p++ = len>>8; + *p++ = len; + len = drive->secsize; + *p++ = len>>24; + *p++ = len>>16; + *p++ = len>>8; + *p = len; + drive->data += 8; + return SDok; + + case 0x9E: /* long read capacity */ + if((cmd[1] & 0x01) || cmd[2] || cmd[3]) + return atasetsense(drive, SDcheck, 0x05, 0x24, 0); + if(drive->data == nil || drive->dlen < 8) + return atasetsense(drive, SDcheck, 0x05, 0x20, 1); + /* + * Read capacity returns the LBA of the last sector. + */ + len = drive->sectors-1; + p = drive->data; + *p++ = len>>56; + *p++ = len>>48; + *p++ = len>>40; + *p++ = len>>32; + *p++ = len>>24; + *p++ = len>>16; + *p++ = len>>8; + *p++ = len; + len = drive->secsize; + *p++ = len>>24; + *p++ = len>>16; + *p++ = len>>8; + *p = len; + drive->data += 12; + return SDok; + + case 0x28: /* read (10) */ + case 0x88: /* long read (16) */ + case 0x2a: /* write (10) */ + case 0x8a: /* long write (16) */ + case 0x2e: /* write and verify (10) */ + break; + + case 0x5A: + return atamodesense(drive, cmd); + } + + ctlr = drive->ctlr; + if(clen == 16){ + /* ata commands only go to 48-bit lba */ + if(cmd[2] || cmd[3]) + return atasetsense(drive, SDcheck, 3, 0xc, 2); + lba = (uvlong)cmd[4]<<40 | (uvlong)cmd[5]<<32; + lba |= cmd[6]<<24 | cmd[7]<<16 | cmd[8]<<8 | cmd[9]; + count = cmd[10]<<24 | cmd[11]<<16 | cmd[12]<<8 | cmd[13]; + }else{ + lba = cmd[2]<<24 | cmd[3]<<16 | cmd[4]<<8 | cmd[5]; + count = cmd[7]<<8 | cmd[8]; + } + if(drive->data == nil) + return SDok; + if(drive->dlen < count*drive->secsize) + count = drive->dlen/drive->secsize; + qlock(ctlr); + if(ctlr->maxio) + maxio = ctlr->maxio; + else if(drive->flags & Lba48) + maxio = 65536; + else + maxio = 256; + while(count){ + if(count > maxio) + drive->count = maxio; + else + drive->count = count; + if(atageniostart(drive, lba)){ + ilock(ctlr); + atanop(drive, 0); + iunlock(ctlr); + qunlock(ctlr); + return atagenioretry(drive); + } + + while(waserror()) + ; + tsleep(ctlr, atadone, ctlr, 60*1000); + poperror(); + if(!ctlr->done){ + /* + * What should the above timeout be? In + * standby and sleep modes it could take as + * long as 30 seconds for a drive to respond. + * Very hard to get out of this cleanly. + */ + atadumpstate(drive, cmd, lba, count); + ataabort(drive, 1); + qunlock(ctlr); + return atagenioretry(drive); + } + + if(drive->status & Err){ + qunlock(ctlr); + return atasetsense(drive, SDcheck, 4, 8, drive->error); + } + count -= drive->count; + lba += drive->count; + } + qunlock(ctlr); + + return SDok; +} + +static int +atario(SDreq* r) +{ + Ctlr *ctlr; + Drive *drive; + SDunit *unit; + uchar cmd10[10], *cmdp, *p; + int clen, reqstatus, status; + + unit = r->unit; + if((ctlr = unit->dev->ctlr) == nil || ctlr->drive[unit->subno] == nil){ + r->status = SDtimeout; + return SDtimeout; + } + drive = ctlr->drive[unit->subno]; + + /* + * Most SCSI commands can be passed unchanged except for + * the padding on the end. The few which require munging + * are not used internally. Mode select/sense(6) could be + * converted to the 10-byte form but it's not worth the + * effort. Read/write(6) are easy. + */ + switch(r->cmd[0]){ + case 0x08: /* read */ + case 0x0A: /* write */ + cmdp = cmd10; + memset(cmdp, 0, sizeof(cmd10)); + cmdp[0] = r->cmd[0]|0x20; + cmdp[1] = r->cmd[1] & 0xE0; + cmdp[5] = r->cmd[3]; + cmdp[4] = r->cmd[2]; + cmdp[3] = r->cmd[1] & 0x0F; + cmdp[8] = r->cmd[4]; + clen = sizeof(cmd10); + break; + + default: + cmdp = r->cmd; + clen = r->clen; + break; + } + + qlock(drive); +retry: + drive->write = r->write; + drive->data = r->data; + drive->dlen = r->dlen; + + drive->status = 0; + drive->error = 0; + if(drive->pkt) + status = atapktio(drive, cmdp, clen); + else + status = atagenio(drive, cmdp, clen); + if(status == SDretry){ + if(DbgDEBUG) + print("%s: retry: dma %8.8uX rwm %4.4uX\n", + unit->name, drive->dmactl, drive->rwmctl); + goto retry; + } + if(status == SDok){ + atasetsense(drive, SDok, 0, 0, 0); + if(drive->data){ + p = r->data; + r->rlen = drive->data - p; + } + else + r->rlen = 0; + } + else if(status == SDcheck && !(r->flags & SDnosense)){ + drive->write = 0; + memset(cmd10, 0, sizeof(cmd10)); + cmd10[0] = 0x03; + cmd10[1] = r->lun<<5; + cmd10[4] = sizeof(r->sense)-1; + drive->data = r->sense; + drive->dlen = sizeof(r->sense)-1; + drive->status = 0; + drive->error = 0; + if(drive->pkt) + reqstatus = atapktio(drive, cmd10, 6); + else + reqstatus = atagenio(drive, cmd10, 6); + if(reqstatus == SDok){ + r->flags |= SDvalidsense; + atasetsense(drive, SDok, 0, 0, 0); + } + } + qunlock(drive); + r->status = status; + if(status != SDok) + return status; + + /* + * Fix up any results. + * Many ATAPI CD-ROMs ignore the LUN field completely and + * return valid INQUIRY data. Patch the response to indicate + * 'logical unit not supported' if the LUN is non-zero. + */ + switch(cmdp[0]){ + case 0x12: /* inquiry */ + if((p = r->data) == nil) + break; + if((cmdp[1]>>5) && (!drive->pkt || (p[0] & 0x1F) == 0x05)) + p[0] = 0x7F; + /*FALLTHROUGH*/ + default: + break; + } + + return SDok; +} + +/* interrupt ack hack for intel ich controllers */ +static void +ichirqack(Ctlr *ctlr) +{ + int bmiba; + + bmiba = ctlr->bmiba; + if(bmiba) + outb(bmiba+Bmisx, inb(bmiba+Bmisx)); +} + +static void +atainterrupt(Ureg*, void* arg) +{ + Ctlr *ctlr; + Drive *drive; + int cmdport, len, status; + + ctlr = arg; + + ilock(ctlr); + if(inb(ctlr->ctlport+As) & Bsy){ + ctlr->intbusy++; + iunlock(ctlr); + if(DEBUG & DbgBsy) + print("IBsy+"); + return; + } + cmdport = ctlr->cmdport; + status = inb(cmdport+Status); + if((drive = ctlr->curdrive) == nil){ + ctlr->intnil++; + if(ctlr->irqack != nil) + ctlr->irqack(ctlr); + iunlock(ctlr); + if((DEBUG & DbgINL) && ctlr->command != Cedd) + print("Inil%2.2uX+", ctlr->command); + return; + } + + ctlr->intok++; + + if(status & Err) + drive->error = inb(cmdport+Error); + else switch(drive->command){ + default: + drive->error = Abrt; + break; + + case Crs: + case Crsm: + drive->intrd++; + if(!(status & Drq)){ + drive->error = Abrt; + break; + } + len = drive->block; + if(drive->data+len > drive->limit) + len = drive->limit-drive->data; + inss(cmdport+Data, drive->data, len/2); + drive->data += len; + if(drive->data >= drive->limit) + ctlr->done = 1; + break; + + case Cws: + case Cwsm: + drive->intwr++; + len = drive->block; + if(drive->data+len > drive->limit) + len = drive->limit-drive->data; + drive->data += len; + if(drive->data >= drive->limit){ + ctlr->done = 1; + break; + } + if(!(status & Drq)){ + drive->error = Abrt; + break; + } + len = drive->block; + if(drive->data+len > drive->limit) + len = drive->limit-drive->data; + outss(cmdport+Data, drive->data, len/2); + break; + + case Cpkt: + atapktinterrupt(drive); + break; + + case Crd: + drive->intrd++; + /* fall through */ + case Cwd: + if (drive->command == Cwd) + drive->intwr++; + atadmainterrupt(drive, drive->count*drive->secsize); + break; + + case Cstandby: + ctlr->done = 1; + break; + } + if(ctlr->irqack != nil) + ctlr->irqack(ctlr); + iunlock(ctlr); + + if(drive->error){ + status |= Err; + ctlr->done = 1; + } + + if(ctlr->done){ + ctlr->curdrive = nil; + drive->status = status; + wakeup(ctlr); + } +} + +static SDev* +atapnp(void) +{ + Ctlr *ctlr; + Pcidev *p; + SDev *legacy[2], *sdev, *head, *tail; + int channel, ispc87415, maxio, pi, r, span; + void (*irqack)(Ctlr*); + + irqack = nil; + legacy[0] = legacy[1] = head = tail = nil; + if(sdev = ataprobe(0x1F0, 0x3F4, IrqATA0)){ + head = tail = sdev; + legacy[0] = sdev; + } + if(sdev = ataprobe(0x170, 0x374, IrqATA1)){ + if(head != nil) + tail->next = sdev; + else + head = sdev; + tail = sdev; + legacy[1] = sdev; + } + + p = nil; + while(p = pcimatch(p, 0, 0)){ + /* + * Look for devices with the correct class and sub-class + * code and known device and vendor ID; add native-mode + * channels to the list to be probed, save info for the + * compatibility mode channels. + * Note that the legacy devices should not be considered + * PCI devices by the interrupt controller. + * For both native and legacy, save info for busmastering + * if capable. + * Promise Ultra ATA/66 (PDC20262) appears to + * 1) give a sub-class of 'other mass storage controller' + * instead of 'IDE controller', regardless of whether it's + * the only controller or not; + * 2) put 0 in the programming interface byte (probably + * as a consequence of 1) above). + * Sub-class code 0x04 is 'RAID controller', e.g. VIA VT8237. + */ + if(p->ccrb != 0x01) + continue; + if(p->ccru != 0x01 && p->ccru != 0x04 && p->ccru != 0x80) + continue; + pi = p->ccrp; + ispc87415 = 0; + maxio = 0; + span = BMspan; + + switch((p->did<<16)|p->vid){ + default: + continue; + + case (0x0002<<16)|0x100B: /* NS PC87415 */ + /* + * Disable interrupts on both channels until + * after they are probed for drives. + * This must be called before interrupts are + * enabled because the IRQ may be shared. + */ + ispc87415 = 1; + pcicfgw32(p, 0x40, 0x00000300); + break; + case (0x1000<<16)|0x1042: /* PC-Tech RZ1000 */ + /* + * Turn off prefetch. Overkill, but cheap. + */ + r = pcicfgr32(p, 0x40); + r &= ~0x2000; + pcicfgw32(p, 0x40, r); + break; + case (0x4379<<16)|0x1002: /* ATI SB400 SATA*/ + case (0x437a<<16)|0x1002: /* ATI SB400 SATA */ + case (0x439c<<16)|0x1002: /* ATI 439c SATA*/ + case (0x3373<<16)|0x105A: /* Promise 20378 RAID */ + case (0x4D30<<16)|0x105A: /* Promise PDC202xx */ + case (0x4D38<<16)|0x105A: /* Promise PDC20262 */ + case (0x4D68<<16)|0x105A: /* Promise PDC20268 */ + case (0x4D69<<16)|0x105A: /* Promise Ultra/133 TX2 */ + case (0x3112<<16)|0x1095: /* SiI 3112 SATA/RAID */ + case (0x3149<<16)|0x1106: /* VIA VT8237 SATA/RAID */ + maxio = 15; + span = 8*1024; + /*FALLTHROUGH*/ + case (0x0680<<16)|0x1095: /* SiI 0680/680A PATA133 ATAPI/RAID */ + case (0x3114<<16)|0x1095: /* SiI 3114 SATA/RAID */ + pi = 0x85; + break; + case (0x0004<<16)|0x1103: /* HighPoint HPT366 */ + pi = 0x85; + /* + * Turn off fast interrupt prediction. + */ + if((r = pcicfgr8(p, 0x51)) & 0x80) + pcicfgw8(p, 0x51, r & ~0x80); + if((r = pcicfgr8(p, 0x55)) & 0x80) + pcicfgw8(p, 0x55, r & ~0x80); + break; + case (0x0640<<16)|0x1095: /* CMD 640B */ + /* + * Bugfix code here... + */ + break; + case (0x7441<<16)|0x1022: /* AMD 768 */ + /* + * Set: + * 0x41 prefetch, postwrite; + * 0x43 FIFO configuration 1/2 and 1/2; + * 0x44 status register read retry; + * 0x46 DMA read and end of sector flush. + */ + r = pcicfgr8(p, 0x41); + pcicfgw8(p, 0x41, r|0xF0); + r = pcicfgr8(p, 0x43); + pcicfgw8(p, 0x43, (r & 0x90)|0x2A); + r = pcicfgr8(p, 0x44); + pcicfgw8(p, 0x44, r|0x08); + r = pcicfgr8(p, 0x46); + pcicfgw8(p, 0x46, (r & 0x0C)|0xF0); + /*FALLTHROUGH*/ + case (0x7401<<16)|0x1022: /* AMD 755 Cobra */ + case (0x7409<<16)|0x1022: /* AMD 756 Viper */ + case (0x7410<<16)|0x1022: /* AMD 766 Viper Plus */ + case (0x7469<<16)|0x1022: /* AMD 3111 */ + /* + * This can probably be lumped in with the 768 above. + */ + /*FALLTHROUGH*/ + case (0x209A<<16)|0x1022: /* AMD CS5536 */ + case (0x01BC<<16)|0x10DE: /* nVidia nForce1 */ + case (0x0065<<16)|0x10DE: /* nVidia nForce2 */ + case (0x0085<<16)|0x10DE: /* nVidia nForce2 MCP */ + case (0x00E3<<16)|0x10DE: /* nVidia nForce2 250 SATA */ + case (0x00D5<<16)|0x10DE: /* nVidia nForce3 */ + case (0x00E5<<16)|0x10DE: /* nVidia nForce3 Pro */ + case (0x00EE<<16)|0x10DE: /* nVidia nForce3 250 SATA */ + case (0x0035<<16)|0x10DE: /* nVidia nForce3 MCP */ + case (0x0053<<16)|0x10DE: /* nVidia nForce4 */ + case (0x0054<<16)|0x10DE: /* nVidia nForce4 SATA */ + case (0x0055<<16)|0x10DE: /* nVidia nForce4 SATA */ + case (0x0266<<16)|0x10DE: /* nVidia nForce4 430 SATA */ + case (0x0267<<16)|0x10DE: /* nVidia nForce 55 MCP SATA */ + case (0x03EC<<16)|0x10DE: /* nVidia nForce 61 MCP SATA */ + case (0x0448<<16)|0x10DE: /* nVidia nForce 65 MCP SATA */ + case (0x0560<<16)|0x10DE: /* nVidia nForce 69 MCP SATA */ + /* + * Ditto, although it may have a different base + * address for the registers (0x50?). + */ + /*FALLTHROUGH*/ + case (0x4376<<16)|0x1002: /* ATI SB400 PATA */ + case (0x438c<<16)|0x1002: /* ATI SB600 PATA */ + break; + case (0x0211<<16)|0x1166: /* ServerWorks IB6566 */ + { + Pcidev *sb; + + sb = pcimatch(nil, 0x1166, 0x0200); + if(sb == nil) + break; + r = pcicfgr32(sb, 0x64); + r &= ~0x2000; + pcicfgw32(sb, 0x64, r); + } + span = 32*1024; + break; + case (0x0502<<17)|0x100B: /* NS SC1100/SCx200 */ + case (0x5229<<16)|0x10B9: /* ALi M1543 */ + case (0x5288<<16)|0x10B9: /* ALi M5288 SATA */ + case (0x5513<<16)|0x1039: /* SiS 962 */ + case (0x0646<<16)|0x1095: /* CMD 646 */ + case (0x0571<<16)|0x1106: /* VIA 82C686 */ + case (0x2363<<16)|0x197b: /* JMicron SATA */ + break; /* TODO: verify that this should be here; wasn't in original patch */ + case (0x1230<<16)|0x8086: /* 82371FB (PIIX) */ + case (0x7010<<16)|0x8086: /* 82371SB (PIIX3) */ + case (0x7111<<16)|0x8086: /* 82371[AE]B (PIIX4[E]) */ + case (0x2411<<16)|0x8086: /* 82801AA (ICH) */ + case (0x2421<<16)|0x8086: /* 82801AB (ICH0) */ + case (0x244A<<16)|0x8086: /* 82801BA (ICH2, Mobile) */ + case (0x244B<<16)|0x8086: /* 82801BA (ICH2, High-End) */ + case (0x248A<<16)|0x8086: /* 82801CA (ICH3, Mobile) */ + case (0x248B<<16)|0x8086: /* 82801CA (ICH3, High-End) */ + case (0x24CA<<16)|0x8086: /* 82801DBM (ICH4, Mobile) */ + case (0x24CB<<16)|0x8086: /* 82801DB (ICH4, High-End) */ + case (0x24D1<<16)|0x8086: /* 82801EB/ER (ICH5 High-End) */ + case (0x24DB<<16)|0x8086: /* 82801EB (ICH5) */ + case (0x25A3<<16)|0x8086: /* 6300ESB (E7210) */ + case (0x2653<<16)|0x8086: /* 82801FBM (ICH6M) */ + case (0x266F<<16)|0x8086: /* 82801FB (ICH6) */ + case (0x27DF<<16)|0x8086: /* 82801G SATA (ICH7) */ + case (0x27C0<<16)|0x8086: /* 82801GB SATA AHCI (ICH7) */ +// case (0x27C4<<16)|0x8086: /* 82801GBM SATA (ICH7) */ + case (0x27C5<<16)|0x8086: /* 82801GBM SATA AHCI (ICH7) */ + case (0x2920<<16)|0x8086: /* 82801(IB)/IR/IH/IO SATA IDE (ICH9) */ + case (0x3a20<<16)|0x8086: /* 82801JI (ICH10) */ + case (0x3a26<<16)|0x8086: /* 82801JI (ICH10) */ + irqack = ichirqack; + break; + } + + for(channel = 0; channel < 2; channel++){ + if(pi & (1<<(2*channel))){ + sdev = ataprobe(p->mem[0+2*channel].bar & ~0x01, + p->mem[1+2*channel].bar & ~0x01, + p->intl); + if(sdev == nil) + continue; + + ctlr = sdev->ctlr; + if(ispc87415) { + ctlr->ienable = pc87415ienable; + print("pc87415disable: not yet implemented\n"); + } + + if(head != nil) + tail->next = sdev; + else + head = sdev; + tail = sdev; + ctlr->tbdf = p->tbdf; + } + else if((sdev = legacy[channel]) == nil) + continue; + else + ctlr = sdev->ctlr; + + ctlr->pcidev = p; + ctlr->maxio = maxio; + ctlr->span = span; + ctlr->irqack = irqack; + if(!(pi & 0x80)) + continue; + ctlr->bmiba = (p->mem[4].bar & ~0x01) + channel*8; + } + } + + return head; +} + +static SDev* +atalegacy(int port, int irq) +{ + return ataprobe(port, port+0x204, irq); +} + +static int +ataenable(SDev* sdev) +{ + Ctlr *ctlr; + char name[32]; + + ctlr = sdev->ctlr; + + if(ctlr->bmiba){ +#define ALIGN (4 * 1024) + if(ctlr->pcidev != nil) + pcisetbme(ctlr->pcidev); + ctlr->prdt = mallocalign(Nprd*sizeof(Prd), 4, 0, 4*1024); + if(ctlr->prdt == nil) + error(Enomem); + } + snprint(name, sizeof(name), "%s (%s)", sdev->name, sdev->ifc->name); + ctlr->vector = intrenable(ctlr->irq, atainterrupt, ctlr, ctlr->tbdf, name); + outb(ctlr->ctlport+Dc, 0); + if(ctlr->ienable) + ctlr->ienable(ctlr); + + return 1; +} + +static int +atadisable(SDev *sdev) +{ + Ctlr *ctlr; + char name[32]; + + ctlr = sdev->ctlr; + outb(ctlr->ctlport+Dc, Nien); /* disable interrupts */ + if (ctlr->idisable) + ctlr->idisable(ctlr); + snprint(name, sizeof(name), "%s (%s)", sdev->name, sdev->ifc->name); + intrdisable(ctlr->vector); + if (ctlr->bmiba) { + if (ctlr->pcidev) + pciclrbme(ctlr->pcidev); + free(ctlr->prdt); + } + return 0; +} + +static int +atarctl(SDunit* unit, char* p, int l) +{ + int n; + Ctlr *ctlr; + Drive *drive; + + if((ctlr = unit->dev->ctlr) == nil || ctlr->drive[unit->subno] == nil) + return 0; + drive = ctlr->drive[unit->subno]; + + qlock(drive); + n = snprint(p, l, "config %4.4uX capabilities %4.4uX", + drive->info[Iconfig], drive->info[Icapabilities]); + if(drive->dma) + n += snprint(p+n, l-n, " dma %8.8uX dmactl %8.8uX", + drive->dma, drive->dmactl); + if(drive->rwm) + n += snprint(p+n, l-n, " rwm %ud rwmctl %ud", + drive->rwm, drive->rwmctl); + if(drive->flags&Lba48) + n += snprint(p+n, l-n, " lba48always %s", + (drive->flags&Lba48always) ? "on" : "off"); + n += snprint(p+n, l-n, "\n"); + n += snprint(p+n, l-n, "interrupts read %lud write %lud cmds %lud\n", + drive->intrd, drive->intwr, drive->intcmd); + if(drive->sectors){ + n += snprint(p+n, l-n, "geometry %lld %d", + drive->sectors, drive->secsize); + if(drive->pkt == 0) + n += snprint(p+n, l-n, " %d %d %d", + drive->c, drive->h, drive->s); + n += snprint(p+n, l-n, "\n"); + } + qunlock(drive); + + return n; +} + +static int +atawctl(SDunit* unit, Cmdbuf* cb) +{ + int period; + Ctlr *ctlr; + Drive *drive; + + if((ctlr = unit->dev->ctlr) == nil || ctlr->drive[unit->subno] == nil) + return 0; + drive = ctlr->drive[unit->subno]; + + qlock(drive); + if(waserror()){ + qunlock(drive); + nexterror(); + } + + /* + * Dma and rwm control is passive at the moment, + * i.e. it is assumed that the hardware is set up + * correctly already either by the BIOS or when + * the drive was initially identified. + */ + if(strcmp(cb->f[0], "dma") == 0){ + if(cb->nf != 2 || drive->dma == 0) + error(Ebadctl); + if(strcmp(cb->f[1], "on") == 0) + drive->dmactl = drive->dma; + else if(strcmp(cb->f[1], "off") == 0) + drive->dmactl = 0; + else + error(Ebadctl); + } + else if(strcmp(cb->f[0], "rwm") == 0){ + if(cb->nf != 2 || drive->rwm == 0) + error(Ebadctl); + if(strcmp(cb->f[1], "on") == 0) + drive->rwmctl = drive->rwm; + else if(strcmp(cb->f[1], "off") == 0) + drive->rwmctl = 0; + else + error(Ebadctl); + } + else if(strcmp(cb->f[0], "standby") == 0){ + switch(cb->nf){ + default: + error(Ebadctl); + case 2: + period = strtol(cb->f[1], 0, 0); + if(period && (period < 30 || period > 240*5)) + error(Ebadctl); + period /= 5; + break; + } + if(atastandby(drive, period) != SDok) + error(Ebadctl); + } + else if(strcmp(cb->f[0], "lba48always") == 0){ + if(cb->nf != 2 || !(drive->flags&Lba48)) + error(Ebadctl); + if(strcmp(cb->f[1], "on") == 0) + drive->flags |= Lba48always; + else if(strcmp(cb->f[1], "off") == 0) + drive->flags &= ~Lba48always; + else + error(Ebadctl); + } + else + error(Ebadctl); + qunlock(drive); + poperror(); + + return 0; +} + +SDifc sdataifc = { + "ata", /* name */ + + atapnp, /* pnp */ + atalegacy, /* legacy */ + ataenable, /* enable */ + atadisable, /* disable */ + + scsiverify, /* verify */ + scsionline, /* online */ + atario, /* rio */ + atarctl, /* rctl */ + atawctl, /* wctl */ + + scsibio, /* bio */ + nil, /* probe */ + ataclear, /* clear */ + atastat, /* rtopctl */ + nil, /* wtopctl */ +}; diff -Nru /sys/src/9k/386/sdiahci.c /sys/src/9k/386/sdiahci.c --- /sys/src/9k/386/sdiahci.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/sdiahci.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,2409 @@ +/* + * ahci serial ata driver + * copyright © 2007-8 coraid, inc. + * + * there was a great deal of locking of single operations (e.g., + * atomic assignments); it's not clear what that locking was intended to + * prevent. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/sd.h" +#include "ahci.h" + +#define dprint(...) if(debug) iprint(__VA_ARGS__); else USED(debug) +#define idprint(...) if(prid) iprint(__VA_ARGS__); else USED(prid) +#define aprint(...) if(datapi) iprint(__VA_ARGS__); else USED(datapi) + +#define Tname(c) tname[(c)->type] +#define Intel(x) ((x)->pci->vid == 0x8086) + +enum { + NCtlr = 16, + NCtlrdrv= 32, + NDrive = NCtlr*NCtlrdrv, + + Read = 0, + Write, + + Nms = 256, /* ms. between drive checks */ + Mphywait= 2*1024/Nms - 1, + Midwait = 16*1024/Nms - 1, + Mcomrwait= 64*1024/Nms - 1, + + Obs = 0xa0, /* obsolete device bits */ + + /* + * if we get more than this many interrupts per tick for a drive, + * either the hardware is broken or we've got a bug in this driver. + */ + Maxintrspertick = 2000, /* was 1000 */ +}; + +/* pci space configuration */ +enum { + Pmap = 0x90, + Ppcs = 0x91, + Prev = 0xa8, +}; + +enum { + Tesb, + Tich, + Tsb600, + Tunk, +}; + +static char *tname[] = { + "63xxesb", + "ich", + "sb600", + "unknown", +}; + +enum { + Dnull, + Dmissing, + Dnew, + Dready, + Derror, + Dreset, + Doffline, + Dportreset, + Dlast, +}; + +static char *diskstates[Dlast] = { + "null", + "missing", + "new", + "ready", + "error", + "reset", + "offline", + "portreset", +}; + +enum { + DMautoneg, + DMsatai, + DMsataii, + DMsata3, +}; + +static char *modename[] = { /* used in control messages */ + "auto", + "satai", + "sataii", + "sata3", +}; +static char *descmode[] = { /* only printed */ + "auto", + "sata 1", + "sata 2", + "sata 3", +}; + +static char *flagname[] = { + "llba", + "smart", + "power", + "nop", + "atapi", + "atapi16", +}; + +typedef struct Asleep Asleep; +typedef struct Ctlr Ctlr; +typedef struct Drive Drive; + +struct Drive { + Lock; + + Ctlr *ctlr; + SDunit *unit; + char name[10]; + Aport *port; + Aportm portm; + Aportc portc; /* redundant ptr to port and portm */ + + uchar mediachange; + uchar state; + uchar smartrs; + + uvlong sectors; + ulong secsize; + ulong intick; /* start tick of current transfer */ + ulong lastseen; + int wait; + uchar mode; /* DMautoneg, satai or sataii */ + uchar active; + + char serial[20+1]; + char firmware[8+1]; + char model[40+1]; + + int infosz; + ushort *info; + ushort tinyinfo[2]; /* used iff malloc fails */ + + int driveno; /* ctlr*NCtlrdrv + unit */ + /* controller port # != driveno when not all ports are enabled */ + int portno; + + ulong lastintr0; + ulong intrs; +}; + +struct Ctlr { + Lock; + + int type; + int enabled; + SDev *sdev; + Pcidev *pci; + void* vector; + + /* virtual register addresses */ + uchar *mmio; + ulong *lmmio; + Ahba *hba; + + /* phyical register address */ + uchar *physio; + + Drive *rawdrive; + Drive *drive[NCtlrdrv]; + int ndrive; + int mport; /* highest drive # (0-origin) on ich9 at least */ + + ulong lastintr0; + ulong intrs; /* not attributable to any drive */ +}; + +struct Asleep { + Aport *p; + int i; +}; + +extern SDifc sdiahciifc; + +static Ctlr iactlr[NCtlr]; +static SDev sdevs[NCtlr]; +static int niactlr; + +static Drive *iadrive[NDrive]; +static int niadrive; + +/* these are fiddled in iawtopctl() */ +static int debug; +static int prid = 1; +static int datapi; + +static char stab[] = { +[0] 'i', 'm', +[8] 't', 'c', 'p', 'e', +[16] 'N', 'I', 'W', 'B', 'D', 'C', 'H', 'S', 'T', 'F', 'X' +}; + +static void +serrstr(ulong r, char *s, char *e) +{ + int i; + + e -= 3; + for(i = 0; i < nelem(stab) && s < e; i++) + if(r & (1<>4]; + *e++ = ntab[reg[i]&0xf]; + *e++ = ' '; + } + *e++ = '\n'; + *e = 0; + dprint(buf); +} + +static void +dreg(char *s, Aport *p) +{ + dprint("ahci: %stask=%#lux; cmd=%#lux; ci=%#lux; is=%#lux\n", + s, p->task, p->cmd, p->ci, p->isr); +} + +static void +esleep(int ms) +{ + if(waserror()) + return; + tsleep(&up->sleep, return0, 0, ms); + poperror(); +} + +static int +ahciclear(void *v) +{ + Asleep *s; + + s = v; + return (s->p->ci & s->i) == 0; +} + +static void +aesleep(Aportm *pm, Asleep *a, int ms) +{ + if(waserror()) + return; + tsleep(pm, ahciclear, a, ms); + poperror(); +} + +static int +ahciwait(Aportc *c, int ms) +{ + Asleep as; + Aport *p; + + p = c->p; + p->ci = 1; + as.p = p; + as.i = 1; + aesleep(c->pm, &as, ms); + if((p->task&1) == 0 && p->ci == 0) + return 0; + dreg("ahciwait timeout ", c->p); + return -1; +} + +/* fill in cfis boilerplate */ +static uchar * +cfissetup(Aportc *pc) +{ + uchar *cfis; + + cfis = pc->pm->ctab->cfis; + memset(cfis, 0, 0x20); + cfis[0] = 0x27; + cfis[1] = 0x80; + cfis[7] = Obs; + return cfis; +} + +/* initialise pc's list */ +static void +listsetup(Aportc *pc, int flags) +{ + Alist *list; + + list = pc->pm->list; + list->flags = flags | 5; + list->len = 0; + list->ctab = PCIWADDRL(pc->pm->ctab); + list->ctabhi = PCIWADDRH(pc->pm->ctab); +} + +static int +nop(Aportc *pc) +{ + uchar *c; + + if((pc->pm->feat & Dnop) == 0) + return -1; + c = cfissetup(pc); + c[2] = 0; + listsetup(pc, Lwrite); + return ahciwait(pc, 3*1000); +} + +static int +setfeatures(Aportc *pc, uchar f) +{ + uchar *c; + + c = cfissetup(pc); + c[2] = 0xef; + c[3] = f; + listsetup(pc, Lwrite); + return ahciwait(pc, 3*1000); +} + +static int +setudmamode(Aportc *pc, uchar f) +{ + uchar *c; + + /* hack */ + if((pc->p->sig >> 16) == 0xeb14) + return 0; + c = cfissetup(pc); + c[2] = 0xef; + c[3] = 3; /* set transfer mode */ + c[12] = 0x40 | f; /* sector count */ + listsetup(pc, Lwrite); + return ahciwait(pc, 3*1000); +} + +static void +asleep(int ms) +{ + if(up == nil) + delay(ms); + else + esleep(ms); +} + +static int +ahciportreset(Aportc *c) +{ + ulong *cmd, i; + Aport *p; + + p = c->p; + cmd = &p->cmd; + *cmd &= ~(Afre|Ast); + for(i = 0; i < 500; i += 25){ + if((*cmd&Acr) == 0) + break; + asleep(25); + } + p->sctl = 1|(p->sctl&~7); + delay(1); + p->sctl &= ~7; + return 0; +} + +static int +smart(Aportc *pc, int n) +{ + uchar *c; + + if((pc->pm->feat&Dsmart) == 0) + return -1; + c = cfissetup(pc); + c[2] = 0xb0; + c[3] = 0xd8 + n; /* able smart */ + c[5] = 0x4f; + c[6] = 0xc2; + listsetup(pc, Lwrite); + if(ahciwait(pc, 1000) == -1 || pc->p->task & (1|32)){ + dprint("ahci: smart fail %#lux\n", pc->p->task); +// preg(pc->pm->fis.r, 20); + return -1; + } + if(n) + return 0; + return 1; +} + +static int +smartrs(Aportc *pc) +{ + uchar *c; + + c = cfissetup(pc); + c[2] = 0xb0; + c[3] = 0xda; /* return smart status */ + c[5] = 0x4f; + c[6] = 0xc2; + listsetup(pc, Lwrite); + + c = pc->pm->fis.r; + if(ahciwait(pc, 1000) == -1 || pc->p->task & (1|32)){ + dprint("ahci: smart fail %#lux\n", pc->p->task); + preg(c, 20); + return -1; + } + if(c[5] == 0x4f && c[6] == 0xc2) + return 1; + return 0; +} + +static int +ahciflushcache(Aportc *pc) +{ + uchar *c; + + c = cfissetup(pc); + c[2] = pc->pm->feat & Dllba? 0xea: 0xe7; + listsetup(pc, Lwrite); + if(ahciwait(pc, 60000) == -1 || pc->p->task & (1|32)){ + dprint("ahciflushcache: fail %#lux\n", pc->p->task); +// preg(pc->pm->fis.r, 20); + return -1; + } + return 0; +} + +static ushort +gbit16(void *a) +{ + uchar *i; + + i = a; + return i[1]<<8 | i[0]; +} + +static ulong +gbit32(void *a) +{ + ulong j; + uchar *i; + + i = a; + j = i[3] << 24; + j |= i[2] << 16; + j |= i[1] << 8; + j |= i[0]; + return j; +} + +static uvlong +gbit64(void *a) +{ + uchar *i; + + i = a; + return (uvlong)gbit32(i+4) << 32 | gbit32(a); +} + +static int +ahciidentify0(Aportc *pc, void *id, int atapi) +{ + uchar *c; + Aprdt *p; + static uchar tab[] = { 0xec, 0xa1, }; + + c = cfissetup(pc); + c[2] = tab[atapi]; + listsetup(pc, 1<<16); + + memset(id, 0, 0x100); /* magic */ + p = &pc->pm->ctab->prdt; + p->dba = PCIWADDRL(id); + p->dbahi = PCIWADDRH(id); + p->count = 1<<31 | (0x200-2) | 1; + return ahciwait(pc, 3*1000); +} + +static vlong +ahciidentify(Aportc *pc, ushort *id) +{ + int i, sig; + vlong s; + Aportm *pm; + + pm = pc->pm; + pm->feat = 0; + pm->smart = 0; + i = 0; + sig = pc->p->sig >> 16; + if(sig == 0xeb14){ + pm->feat |= Datapi; + i = 1; + } + if(ahciidentify0(pc, id, i) == -1) + return -1; + + i = gbit16(id+83) | gbit16(id+86); + if(i & (1<<10)){ + pm->feat |= Dllba; + s = gbit64(id+100); + }else + s = gbit32(id+60); + + if(pm->feat&Datapi){ + i = gbit16(id+0); + if(i&1) + pm->feat |= Datapi16; + } + + i = gbit16(id+83); + if((i>>14) == 1) { + if(i & (1<<3)) + pm->feat |= Dpower; + i = gbit16(id+82); + if(i & 1) + pm->feat |= Dsmart; + if(i & (1<<14)) + pm->feat |= Dnop; + } + return s; +} + +static int +ahciquiet(Aport *a) +{ + ulong *p, i; + + p = &a->cmd; + *p &= ~Ast; + for(i = 0; i < 500; i += 50){ + if((*p & Acr) == 0) + goto stop; + asleep(50); + } + return -1; +stop: + if((a->task & (ASdrq|ASbsy)) == 0){ + *p |= Ast; + return 0; + } + + *p |= Aclo; + for(i = 0; i < 500; i += 50){ + if((*p & Aclo) == 0) + goto stop1; + asleep(50); + } + return -1; +stop1: + /* extra check */ + dprint("ahci: clo clear %#lx\n", a->task); + if(a->task & ASbsy) + return -1; + *p |= Ast; + return 0; +} + +static int +ahcicomreset(Aportc *pc) +{ + uchar *c; + + dprint("ahcicomreset\n"); + dreg("ahci: comreset ", pc->p); + if(ahciquiet(pc->p) == -1){ + dprint("ahciquiet failed\n"); + return -1; + } + dreg("comreset ", pc->p); + + c = cfissetup(pc); + c[1] = 0; + c[15] = 1<<2; /* srst */ + listsetup(pc, Lclear | Lreset); + if(ahciwait(pc, 500) == -1){ + dprint("ahcicomreset: first command failed\n"); + return -1; + } + microdelay(250); + dreg("comreset ", pc->p); + + c = cfissetup(pc); + c[1] = 0; + listsetup(pc, Lwrite); + if(ahciwait(pc, 150) == -1){ + dprint("ahcicomreset: second command failed\n"); + return -1; + } + dreg("comreset ", pc->p); + return 0; +} + +static int +ahciidle(Aport *port) +{ + ulong *p, i, r; + + p = &port->cmd; + if((*p & Arun) == 0) + return 0; + *p &= ~Ast; + r = 0; + for(i = 0; i < 500; i += 25){ + if((*p & Acr) == 0) + goto stop; + asleep(25); + } + r = -1; +stop: + if((*p & Afre) == 0) + return r; + *p &= ~Afre; + for(i = 0; i < 500; i += 25){ + if((*p & Afre) == 0) + return 0; + asleep(25); + } + return -1; +} + +/* + * § 6.2.2.1 first part; comreset handled by reset disk. + * - remainder is handled by configdisk. + * - ahcirecover is a quick recovery from a failed command. + */ +static int +ahciswreset(Aportc *pc) +{ + int i; + + i = ahciidle(pc->p); + pc->p->cmd |= Afre; + if(i == -1) + return -1; + if(pc->p->task & (ASdrq|ASbsy)) + return -1; + return 0; +} + +static int +ahcirecover(Aportc *pc) +{ + ahciswreset(pc); + pc->p->cmd |= Ast; + if(setudmamode(pc, 5) == -1) + return -1; + return 0; +} + +static void* +malign(int size, int align) +{ + return mallocalign(size, align, 0, 0); +} + +static void +setupfis(Afis *f) +{ + f->base = malign(0x100, 0x100); /* magic */ + f->d = f->base + 0; + f->p = f->base + 0x20; + f->r = f->base + 0x40; + f->u = f->base + 0x60; + f->devicebits = (ulong*)(f->base + 0x58); +} + +static void +ahciwakeup(Aport *p) +{ + ushort s; + + s = p->sstatus; + if((s & Intpm) != Intslumber && (s & Intpm) != Intpartpwr) + return; + if((s & Devdet) != Devpresent){ /* not (device, no phy) */ + iprint("ahci: slumbering drive unwakable %#ux\n", s); + return; + } + p->sctl = 3*Aipm | 0*Aspd | Adet; + delay(1); + p->sctl &= ~7; +// iprint("ahci: wake %#ux -> %#ux\n", s, p->sstatus); +} + +static int +ahciconfigdrive(Drive *d) +{ + char *name; + Ahba *h; + Aport *p; + Aportm *pm; + + h = d->ctlr->hba; + p = d->portc.p; + pm = d->portc.pm; + if(pm->list == 0){ + setupfis(&pm->fis); + pm->list = malign(sizeof *pm->list, 1024); + pm->ctab = malign(sizeof *pm->ctab, 128); + } + + if (d->unit) + name = d->unit->name; + else + name = nil; + if(p->sstatus & (Devphycomm|Devpresent) && h->cap & Hsss){ + /* device connected & staggered spin-up */ + dprint("ahci: configdrive: %s: spinning up ... [%#lux]\n", + name, p->sstatus); + p->cmd |= Apod|Asud; + asleep(1400); + } + + p->serror = SerrAll; + + p->list = PCIWADDRL(pm->list); + p->listhi = PCIWADDRH(pm->list); + p->fis = PCIWADDRL(pm->fis.base); + p->fishi = PCIWADDRH(pm->fis.base); + p->cmd |= Afre|Ast; + + /* drive coming up in slumbering? */ + if((p->sstatus & Devdet) == Devpresent && + ((p->sstatus & Intpm) == Intslumber || + (p->sstatus & Intpm) == Intpartpwr)) + ahciwakeup(p); + + /* "disable power managment" sequence from book. */ + p->sctl = (3*Aipm) | (d->mode*Aspd) | (0*Adet); + p->cmd &= ~Aalpe; + + p->ie = IEM; + + return 0; +} + +static void +ahcienable(Ahba *h) +{ + h->ghc |= Hie; +} + +static void +ahcidisable(Ahba *h) +{ + h->ghc &= ~Hie; +} + +static int +countbits(ulong u) +{ + int n; + + n = 0; + for (; u != 0; u >>= 1) + if(u & 1) + n++; + return n; +} + +static int +ahciconf(Ctlr *ctlr) +{ + Ahba *h; + ulong u; + + h = ctlr->hba = (Ahba*)ctlr->mmio; + u = h->cap; + + if((u&Hsam) == 0) + h->ghc |= Hae; + + dprint("#S/sd%c: type %s port %#p: sss %ld ncs %ld coal %ld " + "%ld ports, led %ld clo %ld ems %ld\n", + ctlr->sdev->idno, tname[ctlr->type], h, + (u>>27) & 1, (u>>8) & 0x1f, (u>>7) & 1, + (u & 0x1f) + 1, (u>>25) & 1, (u>>24) & 1, (u>>6) & 1); + return countbits(h->pi); +} + +static int +ahcihbareset(Ahba *h) +{ + int wait; + + h->ghc |= 1; + for(wait = 0; wait < 1000; wait += 100){ + if(h->ghc == 0) + return 0; + delay(100); + } + return -1; +} + +static void +idmove(char *p, ushort *a, int n) +{ + int i; + char *op, *e; + + op = p; + for(i = 0; i < n/2; i++){ + *p++ = a[i] >> 8; + *p++ = a[i]; + } + *p = 0; + while(p > op && *--p == ' ') + *p = 0; + e = p; + for (p = op; *p == ' '; p++) + ; + memmove(op, p, n - (e - p)); +} + +static int +identify(Drive *d) +{ + ushort *id; + vlong osectors, s; + uchar oserial[21]; + SDunit *u; + + if(d->info == nil) { + d->infosz = 512 * sizeof(ushort); + d->info = malloc(d->infosz); + } + if(d->info == nil) { + d->info = d->tinyinfo; + d->infosz = sizeof d->tinyinfo; + } + id = d->info; + s = ahciidentify(&d->portc, id); + if(s == -1){ + d->state = Derror; + return -1; + } + osectors = d->sectors; + memmove(oserial, d->serial, sizeof d->serial); + + u = d->unit; + d->sectors = s; + d->secsize = u->secsize; + if(d->secsize == 0) + d->secsize = 512; /* default */ + d->smartrs = 0; + + idmove(d->serial, id+10, 20); + idmove(d->firmware, id+23, 8); + idmove(d->model, id+27, 40); + + memset(u->inquiry, 0, sizeof u->inquiry); + u->inquiry[2] = 2; + u->inquiry[3] = 2; + u->inquiry[4] = sizeof u->inquiry - 4; + memmove(u->inquiry+8, d->model, 40); + + if(osectors != s || memcmp(oserial, d->serial, sizeof oserial) != 0){ + d->mediachange = 1; + u->sectors = 0; + } + return 0; +} + +static void +clearci(Aport *p) +{ + if(p->cmd & Ast) { + p->cmd &= ~Ast; + p->cmd |= Ast; + } +} + +static void +updatedrive(Drive *d) +{ + ulong cause, serr, s0, pr, ewake; + char *name; + Aport *p; + static ulong last; + + pr = 1; + ewake = 0; + p = d->port; + cause = p->isr; + serr = p->serror; + p->isr = cause; + name = "??"; + if(d->unit && d->unit->name) + name = d->unit->name; + + if(p->ci == 0){ + d->portm.flag |= Fdone; + wakeup(&d->portm); + pr = 0; + }else if(cause & Adps) + pr = 0; + if(cause & Ifatal){ + ewake = 1; + dprint("ahci: updatedrive: %s: fatal\n", name); + } + if(cause & Adhrs){ + if(p->task & (1<<5|1)){ + dprint("ahci: %s: Adhrs cause %#lux serr %#lux task %#lux\n", + name, cause, serr, p->task); + d->portm.flag |= Ferror; + ewake = 1; + } + pr = 0; + } + if(p->task & 1 && last != cause) + dprint("%s: err ca %#lux serr %#lux task %#lux sstat %#lux\n", + name, cause, serr, p->task, p->sstatus); + if(pr) + dprint("%s: upd %#lux ta %#lux\n", name, cause, p->task); + + if(cause & (Aprcs|Aifs)){ + s0 = d->state; + switch(p->sstatus & Devdet){ + case 0: /* no device */ + d->state = Dmissing; + break; + case Devpresent: /* device but no phy comm. */ + if((p->sstatus & Intpm) == Intslumber || + (p->sstatus & Intpm) == Intpartpwr) + d->state = Dnew; /* slumbering */ + else + d->state = Derror; + break; + case Devpresent|Devphycomm: + /* power mgnt crap for surprise removal */ + p->ie |= Aprcs|Apcs; /* is this required? */ + d->state = Dreset; + break; + case Devphyoffline: + d->state = Doffline; + break; + } + dprint("%s: %s → %s [Apcrs] %#lux\n", name, + diskstates[s0], diskstates[d->state], p->sstatus); + /* print pulled message here. */ + if(s0 == Dready && d->state != Dready) + idprint("%s: pulled\n", name); + if(d->state != Dready) + d->portm.flag |= Ferror; + ewake = 1; + } + p->serror = serr; + if(ewake){ + clearci(p); + wakeup(&d->portm); + } + last = cause; +} + +static void +pstatus(Drive *d, ulong s) +{ + /* + * s is masked with Devdet. + * + * bogus code because the first interrupt is currently dropped. + * likely my fault. serror may be cleared at the wrong time. + */ + switch(s){ + case 0: /* no device */ + d->state = Dmissing; + break; + case Devpresent: /* device but no phy. comm. */ + break; + case Devphycomm: /* should this be missing? need testcase. */ + dprint("ahci: pstatus 2\n"); + /* fallthrough */ + case Devpresent|Devphycomm: + d->wait = 0; + d->state = Dnew; + break; + case Devphyoffline: + d->state = Doffline; + break; + case Devphyoffline|Devphycomm: /* does this make sense? */ + d->state = Dnew; + break; + } +} + +static int +configdrive(Drive *d) +{ + if(ahciconfigdrive(d) == -1) + return -1; + ilock(d); + pstatus(d, d->port->sstatus & Devdet); + iunlock(d); + return 0; +} + +static void +resetdisk(Drive *d) +{ + uint state, det, stat; + Aport *p; + + p = d->port; + det = p->sctl & 7; + stat = p->sstatus & Devdet; + state = (p->cmd>>28) & 0xf; + dprint("ahci: resetdisk: icc %#ux det %d sdet %d\n", state, det, stat); + + ilock(d); + state = d->state; + if(d->state != Dready || d->state != Dnew) + d->portm.flag |= Ferror; + clearci(p); /* satisfy sleep condition. */ + wakeup(&d->portm); + if(stat != (Devpresent|Devphycomm)){ + /* device absent or phy not communicating */ + d->state = Dportreset; + iunlock(d); + return; + } + d->state = Derror; + iunlock(d); + + qlock(&d->portm); + if(p->cmd&Ast && ahciswreset(&d->portc) == -1){ + d->state = Dportreset; /* get a bigger stick. */ + } else { + d->state = Dmissing; + configdrive(d); + } + dprint("ahci: %s: resetdisk: %s → %s\n", (d->unit? d->unit->name: nil), + diskstates[state], diskstates[d->state]); + qunlock(&d->portm); +} + +static int +newdrive(Drive *d) +{ + char *name; + Aportc *c; + Aportm *pm; + + c = &d->portc; + pm = &d->portm; + + name = d->unit->name; + if(name == 0) + name = "??"; + + if(d->port->task == 0x80) + return -1; + qlock(c->pm); + if(setudmamode(c, 5) == -1){ + dprint("%s: can't set udma mode\n", name); + goto lose; + } + if(identify(d) == -1){ + dprint("%s: identify failure\n", name); + goto lose; + } + if(pm->feat & Dpower && setfeatures(c, 0x85) == -1){ + pm->feat &= ~Dpower; + if(ahcirecover(c) == -1) + goto lose; + } + + d->state = Dready; + + qunlock(c->pm); + + idprint("%s: %sLBA %,llud sectors: %s %s %s %s\n", d->unit->name, + (pm->feat & Dllba? "L": ""), d->sectors, d->model, d->firmware, + d->serial, d->mediachange? "[mediachange]": ""); + return 0; + +lose: + idprint("%s: can't be initialized\n", d->unit->name); + d->state = Dnull; + qunlock(c->pm); + return -1; +} + +static void +westerndigitalhung(Drive *d) +{ + if((d->portm.feat&Datapi) == 0 && d->active && + TK2MS(sys->ticks - d->intick) > 5000){ + dprint("%s: drive hung; resetting [%#lux] ci %#lx\n", + d->unit->name, d->port->task, d->port->ci); + d->state = Dreset; + } +} + +static ushort olds[NCtlr*NCtlrdrv]; + +static int +doportreset(Drive *d) +{ + int i; + + i = -1; + qlock(&d->portm); + if(ahciportreset(&d->portc) == -1) + dprint("ahci: doportreset: fails\n"); + else + i = 0; + qunlock(&d->portm); + dprint("ahci: doportreset: portreset → %s [task %#lux]\n", + diskstates[d->state], d->port->task); + return i; +} + +/* drive must be locked */ +static void +statechange(Drive *d) +{ + switch(d->state){ + case Dnull: + case Doffline: + if(d->unit->sectors != 0){ + d->sectors = 0; + d->mediachange = 1; + } + /* fallthrough */ + case Dready: + d->wait = 0; + break; + } +} + +static void +checkdrive(Drive *d, int i) +{ + ushort s; + char *name; + + if(d == nil) { + print("checkdrive: nil d\n"); + return; + } + ilock(d); + if(d->unit == nil || d->port == nil) { + if(0) + print("checkdrive: nil d->%s\n", + d->unit == nil? "unit": "port"); + iunlock(d); + return; + } + name = d->unit->name; + s = d->port->sstatus; + if(s) + d->lastseen = sys->ticks; + if(s != olds[i]){ + dprint("%s: status: %06#ux -> %06#ux: %s\n", + name, olds[i], s, diskstates[d->state]); + olds[i] = s; + d->wait = 0; + } + westerndigitalhung(d); + + switch(d->state){ + case Dnull: + case Dready: + break; + case Dmissing: + case Dnew: + switch(s & (Intactive | Devdet)){ + case Devpresent: /* no device (pm), device but no phy. comm. */ + ahciwakeup(d->port); + /* fall through */ + case 0: /* no device */ + break; + default: + dprint("%s: unknown status %06#ux\n", name, s); + /* fall through */ + case Intactive: /* active, no device */ + if(++d->wait&Mphywait) + break; +reset: + if(++d->mode > DMsataii) + d->mode = 0; + if(d->mode == DMsatai){ /* we tried everything */ + d->state = Dportreset; + goto portreset; + } + dprint("%s: reset; new mode %s\n", name, + modename[d->mode]); + iunlock(d); + resetdisk(d); + ilock(d); + break; + case Intactive|Devphycomm|Devpresent: + if((++d->wait&Midwait) == 0){ + dprint("%s: slow reset %06#ux task=%#lux; %d\n", + name, s, d->port->task, d->wait); + goto reset; + } + s = (uchar)d->port->task; + if(s == 0x7f || ((d->port->sig >> 16) != 0xeb14 && + (s & ~0x17) != (1<<6))) + break; + iunlock(d); + newdrive(d); + ilock(d); + break; + } + break; + case Doffline: + if(d->wait++ & Mcomrwait) + break; + /* fallthrough */ + case Derror: + case Dreset: + dprint("%s: reset [%s]: mode %d; status %06#ux\n", + name, diskstates[d->state], d->mode, s); + iunlock(d); + resetdisk(d); + ilock(d); + break; + case Dportreset: +portreset: + if(d->wait++ & 0xff && (s & Intactive) == 0) + break; + /* device is active */ + dprint("%s: portreset [%s]: mode %d; status %06#ux\n", + name, diskstates[d->state], d->mode, s); + d->portm.flag |= Ferror; + clearci(d->port); + wakeup(&d->portm); + if((s & Devdet) == 0){ /* no device */ + d->state = Dmissing; + break; + } + iunlock(d); + doportreset(d); + ilock(d); + break; + } + statechange(d); + iunlock(d); +} + +static void +satakproc(void*) +{ + int i; + + for(;;){ + tsleep(&up->sleep, return0, 0, Nms); + for(i = 0; i < niadrive; i++) + if(iadrive[i] != nil) + checkdrive(iadrive[i], i); + } +} + +static void +isctlrjabbering(Ctlr *c, ulong cause) +{ + ulong now; + + now = TK2MS(sys->ticks); + if (now > c->lastintr0) { + c->intrs = 0; + c->lastintr0 = now; + } + if (++c->intrs > Maxintrspertick) + panic("sdiahci: too many intrs per tick for no serviced " + "drive; cause %#lux mport %d", cause, c->mport); +} + +static void +isdrivejabbering(Drive *d) +{ + ulong now; + + now = TK2MS(sys->ticks); + if (now > d->lastintr0) { + d->intrs = 0; + d->lastintr0 = now; + } + if (++d->intrs > Maxintrspertick) + panic("sdiahci: too many interrupts per tick for %s", + d->unit->name); +} + +static void +iainterrupt(Ureg*, void *a) +{ + int i; + ulong cause, mask; + Ctlr *c; + Drive *d; + + c = a; + ilock(c); + cause = c->hba->isr; + if (cause == 0) { + isctlrjabbering(c, cause); + // iprint("sdiahci: interrupt for no drive\n"); + iunlock(c); + return; + } + for(i = 0; cause && i <= c->mport; i++){ + mask = 1 << i; + if((cause & mask) == 0) + continue; + d = c->rawdrive + i; + ilock(d); + isdrivejabbering(d); + if(d->port->isr && c->hba->pi & mask) + updatedrive(d); + c->hba->isr = mask; + iunlock(d); + + cause &= ~mask; + } + if (cause) { + isctlrjabbering(c, cause); + iprint("sdiachi: intr cause unserviced: %#lux\n", cause); + } + iunlock(c); +} + +/* checkdrive, called from satakproc, will prod the drive while we wait */ +static void +awaitspinup(Drive *d) +{ + int ms; + ushort s; + char *name; + + ilock(d); + if(d->unit == nil || d->port == nil) { + panic("awaitspinup: nil d->unit or d->port"); + iunlock(d); + return; + } + name = (d->unit? d->unit->name: nil); + s = d->port->sstatus; + if(!(s & Devpresent)) { /* never going to be ready */ + dprint("awaitspinup: %s absent, not waiting\n", name); + iunlock(d); + return; + } + + for (ms = 20000; ms > 0; ms -= 50) + switch(d->state){ + case Dnull: + /* absent; done */ + iunlock(d); + dprint("awaitspinup: %s in null state\n", name); + return; + case Dready: + case Dnew: + if(d->sectors || d->mediachange) { + /* ready to use; done */ + iunlock(d); + dprint("awaitspinup: %s ready!\n", name); + return; + } + /* fall through */ + default: + case Dmissing: /* normal waiting states */ + case Dreset: + case Doffline: /* transitional states */ + case Derror: + case Dportreset: + iunlock(d); + asleep(50); + ilock(d); + break; + } + print("awaitspinup: %s didn't spin up after 20 seconds\n", name); + iunlock(d); +} + +static int +iaverify(SDunit *u) +{ + Ctlr *c; + Drive *d; + + c = u->dev->ctlr; + d = c->drive[u->subno]; + ilock(c); + ilock(d); + d->unit = u; + iunlock(d); + iunlock(c); + checkdrive(d, d->driveno); /* c->d0 + d->driveno */ + + /* + * hang around until disks are spun up and thus available as + * nvram, dos file systems, etc. you wouldn't expect it, but + * the intel 330 ssd takes a while to `spin up'. + */ + awaitspinup(d); + return 1; +} + +static int +iaenable(SDev *s) +{ + char name[32]; + Ctlr *c; + static int once; + + c = s->ctlr; + ilock(c); + if(!c->enabled) { + if(c->ndrive == 0) + panic("iaenable: zero s->ctlr->ndrive"); + pcisetbme(c->pci); + snprint(name, sizeof name, "%s (%s)", s->name, s->ifc->name); + c->vector = intrenable(c->pci->intl, iainterrupt, c, c->pci->tbdf, name); + /* supposed to squelch leftover interrupts here. */ + ahcienable(c->hba); + if(once == 0) { + once = 1; + kproc("ahci", satakproc, 0); + } + c->enabled = 1; + } + iunlock(c); + return 1; +} + +static int +iadisable(SDev *s) +{ + char name[32]; + Ctlr *c; + + c = s->ctlr; + ilock(c); + ahcidisable(c->hba); + snprint(name, sizeof name, "%s (%s)", s->name, s->ifc->name); + intrdisable(c->vector); + c->enabled = 0; + iunlock(c); + return 1; +} + +static int +iaonline(SDunit *unit) +{ + int r; + Ctlr *c; + Drive *d; + + c = unit->dev->ctlr; + d = c->drive[unit->subno]; + r = 0; + + if(d->portm.feat & Datapi && d->mediachange){ + r = scsionline(unit); + if(r > 0) + d->mediachange = 0; + return r; + } + + ilock(d); + if(d->mediachange){ + r = 2; + d->mediachange = 0; + /* devsd resets this after online is called; why? */ + unit->sectors = d->sectors; + unit->secsize = 512; /* default size */ + } else if(d->state == Dready) + r = 1; + iunlock(d); + return r; +} + +/* returns locked list! */ +static Alist* +ahcibuild(Drive *d, uchar *cmd, void *data, int n, vlong lba) +{ + uchar *c, acmd, dir, llba; + Alist *l; + Actab *t; + Aportm *pm; + Aprdt *p; + static uchar tab[2][2] = { 0xc8, 0x25, 0xca, 0x35, }; + + pm = &d->portm; + dir = *cmd != 0x28; + llba = pm->feat&Dllba? 1: 0; + acmd = tab[dir][llba]; + qlock(pm); + l = pm->list; + t = pm->ctab; + c = t->cfis; + + c[0] = 0x27; + c[1] = 0x80; + c[2] = acmd; + c[3] = 0; + + c[4] = lba; /* sector lba low 7:0 */ + c[5] = lba >> 8; /* cylinder low lba mid 15:8 */ + c[6] = lba >> 16; /* cylinder hi lba hi 23:16 */ + c[7] = Obs | 0x40; /* 0x40 == lba */ + if(llba == 0) + c[7] |= (lba>>24) & 7; + + c[8] = lba >> 24; /* sector (exp) lba 31:24 */ + c[9] = lba >> 32; /* cylinder low (exp) lba 39:32 */ + c[10] = lba >> 48; /* cylinder hi (exp) lba 48:40 */ + c[11] = 0; /* features (exp); */ + + c[12] = n; /* sector count */ + c[13] = n >> 8; /* sector count (exp) */ + c[14] = 0; /* r */ + c[15] = 0; /* control */ + + *(ulong*)(c + 16) = 0; + + l->flags = 1<<16 | Lpref | 0x5; /* Lpref ?? */ + if(dir == Write) + l->flags |= Lwrite; + l->len = 0; + l->ctab = PCIWADDRL(t); + l->ctabhi = PCIWADDRH(t); + + p = &t->prdt; + p->dba = PCIWADDRL(data); + p->dbahi = PCIWADDRH(data); + if(d->unit == nil) + panic("ahcibuild: nil d->unit"); + p->count = 1<<31 | (d->unit->secsize*n - 2) | 1; + + return l; +} + +static Alist* +ahcibuildpkt(Aportm *pm, SDreq *r, void *data, int n) +{ + int fill, len; + uchar *c; + Alist *l; + Actab *t; + Aprdt *p; + + qlock(pm); + l = pm->list; + t = pm->ctab; + c = t->cfis; + + fill = pm->feat&Datapi16? 16: 12; + if((len = r->clen) > fill) + len = fill; + memmove(t->atapi, r->cmd, len); + memset(t->atapi+len, 0, fill-len); + + c[0] = 0x27; + c[1] = 0x80; + c[2] = 0xa0; + if(n != 0) + c[3] = 1; /* dma */ + else + c[3] = 0; /* features (exp); */ + + c[4] = 0; /* sector lba low 7:0 */ + c[5] = n; /* cylinder low lba mid 15:8 */ + c[6] = n >> 8; /* cylinder hi lba hi 23:16 */ + c[7] = Obs; + + *(ulong*)(c + 8) = 0; + *(ulong*)(c + 12) = 0; + *(ulong*)(c + 16) = 0; + + l->flags = 1<<16 | Lpref | Latapi | 0x5; + if(r->write != 0 && data) + l->flags |= Lwrite; + l->len = 0; + l->ctab = PCIWADDRL(t); + l->ctabhi = PCIWADDRH(t); + + if(data == 0) + return l; + + p = &t->prdt; + p->dba = PCIWADDRL(data); + p->dbahi = PCIWADDRH(data); + p->count = 1<<31 | (n - 2) | 1; + + return l; +} + +static int +waitready(Drive *d) +{ + ulong s, i, δ; + + for(i = 0; i < 15000; i += 250){ + if(d->state == Dreset || d->state == Dportreset || + d->state == Dnew) + return 1; + δ = sys->ticks - d->lastseen; + if(d->state == Dnull || δ > 10*1000) + return -1; + ilock(d); + s = d->port->sstatus; + iunlock(d); + if((s & Intpm) == 0 && δ > 1500) + return -1; /* no detect */ + if(d->state == Dready && + (s & Devdet) == (Devphycomm|Devpresent)) + return 0; /* ready, present & phy. comm. */ + esleep(250); + } + print("%s: not responding; offline\n", d->unit->name); + d->state = Doffline; + return -1; +} + +static int +lockready(Drive *d) +{ + int i; + + qlock(&d->portm); + while ((i = waitready(d)) == 1) { + qunlock(&d->portm); + esleep(1); + qlock(&d->portm); + } + return i; +} + +static int +flushcache(Drive *d) +{ + int i; + + i = -1; + if(lockready(d) == 0) + i = ahciflushcache(&d->portc); + qunlock(&d->portm); + return i; +} + +static int +iariopkt(SDreq *r, Drive *d) +{ + int n, count, try, max, flag, task, wormwrite; + char *name; + uchar *cmd, *data; + Aport *p; + Asleep as; + + cmd = r->cmd; + name = d->unit->name; + p = d->port; + + aprint("ahci: iariopkt: %04#ux %04#ux %c %d %p\n", + cmd[0], cmd[2], "rw"[r->write], r->dlen, r->data); + if(cmd[0] == 0x5a && (cmd[2] & 0x3f) == 0x3f) + return sdmodesense(r, cmd, d->info, d->infosz); + r->rlen = 0; + count = r->dlen; + max = 65536; + + try = 0; +retry: + data = r->data; + n = count; + if(n > max) + n = max; + ahcibuildpkt(&d->portm, r, data, n); + switch(waitready(d)){ + case -1: + qunlock(&d->portm); + return SDeio; + case 1: + qunlock(&d->portm); + esleep(1); + goto retry; + } + + ilock(d); + d->portm.flag = 0; + iunlock(d); + p->ci = 1; + + as.p = p; + as.i = 1; + d->intick = sys->ticks; + d->active++; + + while(waserror()) + ; + sleep(&d->portm, ahciclear, &as); + poperror(); + + d->active--; + ilock(d); + flag = d->portm.flag; + task = d->port->task; + iunlock(d); + + if(task & (Efatal<<8) || task & (ASbsy|ASdrq) && d->state == Dready){ + d->port->ci = 0; + ahcirecover(&d->portc); + task = d->port->task; + flag &= ~Fdone; /* either an error or do-over */ + } + qunlock(&d->portm); + if(flag == 0){ + if(++try == 10){ + print("%s: bad disk\n", name); + r->status = SDcheck; + return SDcheck; + } + /* + * write retries cannot succeed on write-once media, + * so just accept any failure. + */ + wormwrite = 0; + switch(d->unit->inquiry[0] & SDinq0periphtype){ + case SDperworm: + case SDpercd: + switch(cmd[0]){ + case 0x0a: /* write (6?) */ + case 0x2a: /* write (10) */ + case 0x8a: /* long write (16) */ + case 0x2e: /* write and verify (10) */ + wormwrite = 1; + break; + } + break; + } + if (!wormwrite) { + print("%s: retry\n", name); + goto retry; + } + } + if(flag & Ferror){ + if((task&Eidnf) == 0) + print("%s: i/o error task=%#ux\n", name, task); + r->status = SDcheck; + return SDcheck; + } + + data += n; + + r->rlen = data - (uchar*)r->data; + r->status = SDok; + return SDok; +} + +static int +iario(SDreq *r) +{ + int i, n, count, try, max, flag, task; + vlong lba; + char *name; + uchar *cmd, *data; + Aport *p; + Asleep as; + Ctlr *c; + Drive *d; + SDunit *unit; + + unit = r->unit; + c = unit->dev->ctlr; + d = c->drive[unit->subno]; + if(d->portm.feat & Datapi) + return iariopkt(r, d); + cmd = r->cmd; + name = d->unit->name; + p = d->port; + + if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91){ + if(flushcache(d) == 0) + return sdsetsense(r, SDok, 0, 0, 0); + return sdsetsense(r, SDcheck, 3, 0xc, 2); + } + + if((i = sdfakescsi(r, d->info, d->infosz)) != SDnostatus){ + r->status = i; + return i; + } + + if(*cmd != 0x28 && *cmd != 0x2a){ + print("%s: bad cmd %.2#ux\n", name, cmd[0]); + r->status = SDcheck; + return SDcheck; + } + + lba = cmd[2]<<24 | cmd[3]<<16 | cmd[4]<<8 | cmd[5]; + count = cmd[7]<<8 | cmd[8]; + if(r->data == nil) + return SDok; + if(r->dlen < count * unit->secsize) + count = r->dlen / unit->secsize; + max = 128; + + try = 0; +retry: + data = r->data; + while(count > 0){ + n = count; + if(n > max) + n = max; + ahcibuild(d, cmd, data, n, lba); + switch(waitready(d)){ + case -1: + qunlock(&d->portm); + return SDeio; + case 1: + qunlock(&d->portm); + esleep(1); + goto retry; + } + ilock(d); + d->portm.flag = 0; + iunlock(d); + p->ci = 1; + + as.p = p; + as.i = 1; + d->intick = sys->ticks; + d->active++; + + while(waserror()) + ; + sleep(&d->portm, ahciclear, &as); + poperror(); + + d->active--; + ilock(d); + flag = d->portm.flag; + task = d->port->task; + iunlock(d); + + if(task & (Efatal<<8) || + task & (ASbsy|ASdrq) && d->state == Dready){ + d->port->ci = 0; + ahcirecover(&d->portc); + task = d->port->task; + } + qunlock(&d->portm); + if(flag == 0){ + if(++try == 10){ + print("%s: bad disk\n", name); + r->status = SDeio; + return SDeio; + } + print("%s: retry blk %lld\n", name, lba); + goto retry; + } + if(flag & Ferror){ + print("%s: i/o error task=%#ux @%,lld\n", + name, task, lba); + r->status = SDeio; + return SDeio; + } + + count -= n; + lba += n; + data += n * unit->secsize; + } + r->rlen = data - (uchar*)r->data; + r->status = SDok; + return SDok; +} + +/* + * configure drives 0-5 as ahci sata (c.f. errata) + */ +static int +iaahcimode(Pcidev *p) +{ + dprint("iaahcimode: %#ux %#ux %#ux\n", pcicfgr8(p, 0x91), pcicfgr8(p, 92), + pcicfgr8(p, 93)); + pcicfgw16(p, 0x92, pcicfgr16(p, 0x92) | 0x3f); /* ports 0-5 */ + return 0; +} + +static void +iasetupahci(Ctlr *c) +{ + /* disable cmd block decoding. */ + pcicfgw16(c->pci, 0x40, pcicfgr16(c->pci, 0x40) & ~(1<<15)); + pcicfgw16(c->pci, 0x42, pcicfgr16(c->pci, 0x42) & ~(1<<15)); + + c->lmmio[0x4/4] |= 1 << 31; /* enable ahci mode (ghc register) */ + c->lmmio[0xc/4] = (1 << 6) - 1; /* 5 ports. (supposedly ro pi reg.) */ + + /* enable ahci mode and 6 ports; from ich9 datasheet */ + pcicfgw16(c->pci, 0x90, 1<<6 | 1<<5); +} + +static int +didtype(Pcidev *p) +{ + switch(p->vid){ + case 0x8086: + if((p->did & 0xfffc) == 0x2680) + return Tesb; + /* + * 0x27c4 is the intel 82801 in compatibility (not sata) mode. + */ + if (p->did == 0x24d1 || /* 82801eb/er */ + (p->did & 0xfffb) == 0x27c1 || /* 82801g[bh]m ich7 */ + p->did == 0x2821 || /* 82801h[roh] */ + (p->did & 0xfffe) == 0x2824 || /* 82801h[b] */ + (p->did & 0xfeff) == 0x2829 || /* ich8/9m */ + (p->did & 0xfffe) == 0x2922 || /* ich9 */ + p->did == 0x3a02 || /* 82801jd/do */ + (p->did & 0xfefe) == 0x3a22 || /* ich10, pch */ + (p->did & 0xfff8) == 0x3b28) /* pchm */ + return Tich; + if(1) + return Tich; + break; + case 0x1002: + if(p->did == 0x4380 || p->did == 0x4390 || p->did == 0x4391){ + print("detected sb600 vid %#ux did %#ux\n", p->vid, p->did); + return Tsb600; + } + break; + case 0x1b4b: + /* can't cope with sata 3 yet; touching sd files will hang */ + if (p->did == 0x9123) { + print("ahci: ignoring sata 3 controller\n"); + return -1; + } + break; + } + if(p->ccrb == Pcibcstore && p->ccru == Pciscsata && p->ccrp == 1){ + print("ahci: Tunk: VID %#4.4ux DID %#4.4ux\n", p->vid, p->did); + return Tunk; + } + return -1; +} + +static int +newctlr(Ctlr *ctlr, SDev *sdev, int nunit) +{ + int i, n; + Drive *drive; + + ctlr->ndrive = sdev->nunit = nunit; + ctlr->mport = ctlr->hba->cap & ((1<<5)-1); + + i = (ctlr->hba->cap >> 20) & ((1<<4)-1); /* iss */ + print("#S/sd%c: %s: %#p %s, %d ports, irq %d\n", sdev->idno, + Tname(ctlr), ctlr->physio, descmode[i], nunit, ctlr->pci->intl); + /* map the drives -- they don't all need to be enabled. */ + n = 0; + ctlr->rawdrive = malloc(NCtlrdrv * sizeof(Drive)); + if(ctlr->rawdrive == nil) { + print("ahci: out of memory\n"); + return -1; + } + for(i = 0; i < NCtlrdrv; i++) { + drive = ctlr->rawdrive + i; + drive->portno = i; + drive->driveno = -1; + drive->sectors = 0; + drive->serial[0] = ' '; + drive->ctlr = ctlr; + if((ctlr->hba->pi & (1<port = (Aport*)(ctlr->mmio + 0x80*i + 0x100); + drive->portc.p = drive->port; + drive->portc.pm = &drive->portm; + drive->driveno = n++; + ctlr->drive[drive->driveno] = drive; + iadrive[niadrive + drive->driveno] = drive; + } + for(i = 0; i < n; i++) + if(ahciidle(ctlr->drive[i]->port) == -1){ + dprint("ahci: %s: port %d wedged; abort\n", + Tname(ctlr), i); + return -1; + } + for(i = 0; i < n; i++){ + ctlr->drive[i]->mode = DMsatai; + configdrive(ctlr->drive[i]); + } + return n; +} + +static SDev* +iapnp(void) +{ + int n, nunit, type; + ulong io; + Ctlr *c; + Pcidev *p; + SDev *head, *tail, *s; + static int done; + + if(done++) + return nil; + + memset(olds, 0xff, sizeof olds); + p = nil; + head = tail = nil; + while((p = pcimatch(p, 0, 0)) != nil){ + type = didtype(p); + if (type == -1 || p->mem[Abar].bar == 0) + continue; + if(niactlr == NCtlr){ + print("ahci: iapnp: %s: too many controllers\n", + tname[type]); + break; + } + c = iactlr + niactlr; + s = sdevs + niactlr; + memset(c, 0, sizeof *c); + memset(s, 0, sizeof *s); + io = p->mem[Abar].bar & ~0xf; + c->physio = (uchar *)io; + c->mmio = vmap(io, p->mem[Abar].size); + if(c->mmio == 0){ + print("ahci: %s: address %#lux in use did=%#ux\n", + Tname(c), io, p->did); + continue; + } + c->lmmio = (ulong*)c->mmio; + c->pci = p; + c->type = type; + + s->ifc = &sdiahciifc; + s->idno = 'E' + niactlr; + s->ctlr = c; + c->sdev = s; + + if(Intel(c) && p->did != 0x2681) + iasetupahci(c); + nunit = ahciconf(c); +// ahcihbareset((Ahba*)c->mmio); + if(Intel(c) && iaahcimode(p) == -1) + break; + if(nunit < 1){ + vunmap(c->mmio, p->mem[Abar].size); + continue; + } + n = newctlr(c, s, nunit); + if(n < 0) + continue; + niadrive += n; + niactlr++; + if(head) + tail->next = s; + else + head = s; + tail = s; + } + return head; +} + +static char* smarttab[] = { + "unset", + "error", + "threshold exceeded", + "normal" +}; + +static char * +pflag(char *s, char *e, uchar f) +{ + uchar i; + + for(i = 0; i < 8; i++) + if(f & (1 << i)) + s = seprint(s, e, "%s ", flagname[i]); + return seprint(s, e, "\n"); +} + +static int +iarctl(SDunit *u, char *p, int l) +{ + char buf[32]; + char *e, *op; + Aport *o; + Ctlr *c; + Drive *d; + + c = u->dev->ctlr; + if(c == nil) { +print("iarctl: nil u->dev->ctlr\n"); + return 0; + } + d = c->drive[u->subno]; + o = d->port; + + e = p+l; + op = p; + if(d->state == Dready){ + p = seprint(p, e, "model\t%s\n", d->model); + p = seprint(p, e, "serial\t%s\n", d->serial); + p = seprint(p, e, "firm\t%s\n", d->firmware); + if(d->smartrs == 0xff) + p = seprint(p, e, "smart\tenable error\n"); + else if(d->smartrs == 0) + p = seprint(p, e, "smart\tdisabled\n"); + else + p = seprint(p, e, "smart\t%s\n", + smarttab[d->portm.smart]); + p = seprint(p, e, "flag\t"); + p = pflag(p, e, d->portm.feat); + }else + p = seprint(p, e, "no disk present [%s]\n", diskstates[d->state]); + serrstr(o->serror, buf, buf + sizeof buf - 1); + p = seprint(p, e, "reg\ttask %#lux cmd %#lux serr %#lux %s ci %#lux " + "is %#lux; sig %#lux sstatus %06#lux\n", + o->task, o->cmd, o->serror, buf, + o->ci, o->isr, o->sig, o->sstatus); + if(d->unit == nil) + panic("iarctl: nil d->unit"); + p = seprint(p, e, "geometry %llud %lud\n", d->sectors, d->unit->secsize); + return p - op; +} + +static void +runflushcache(Drive *d) +{ + long t0; + + t0 = sys->ticks; + if(flushcache(d) != 0) + error(Eio); + dprint("ahci: flush in %ld ms\n", sys->ticks - t0); +} + +static void +forcemode(Drive *d, char *mode) +{ + int i; + + for(i = 0; i < nelem(modename); i++) + if(strcmp(mode, modename[i]) == 0) + break; + if(i == nelem(modename)) + i = 0; + d->mode = i; +} + +static void +runsmartable(Drive *d, int i) +{ + if(waserror()){ + qunlock(&d->portm); + d->smartrs = 0; + nexterror(); + } + if(lockready(d) == -1) + error(Eio); + d->smartrs = smart(&d->portc, i); + d->portm.smart = 0; + qunlock(&d->portm); + poperror(); +} + +static void +forcestate(Drive *d, char *state) +{ + int i; + + for(i = 0; i < nelem(diskstates); i++) + if(strcmp(state, diskstates[i]) == 0) + break; + if(i == nelem(diskstates)) + error(Ebadctl); + d->state = i; +} + +/* + * force this driver to notice a change of medium if the hardware doesn't + * report it. + */ +static void +changemedia(SDunit *u) +{ + Ctlr *c; + Drive *d; + + c = u->dev->ctlr; + d = c->drive[u->subno]; + ilock(d); + d->mediachange = 1; + u->sectors = 0; + iunlock(d); +} + +static int +iawctl(SDunit *u, Cmdbuf *cmd) +{ + char **f; + Ctlr *c; + Drive *d; + uint i; + + c = u->dev->ctlr; + d = c->drive[u->subno]; + f = cmd->f; + + if(strcmp(f[0], "change") == 0) + changemedia(u); + else if(strcmp(f[0], "flushcache") == 0) + runflushcache(d); + else if(strcmp(f[0], "identify") == 0){ + i = strtoul(f[1]? f[1]: "0", 0, 0); + if(i > 0xff) + i = 0; + dprint("ahci: %04d %#ux\n", i, d->info[i]); + }else if(strcmp(f[0], "mode") == 0) + forcemode(d, f[1]? f[1]: "satai"); + else if(strcmp(f[0], "nop") == 0){ + if((d->portm.feat & Dnop) == 0){ + cmderror(cmd, "no drive support"); + return -1; + } + if(waserror()){ + qunlock(&d->portm); + nexterror(); + } + if(lockready(d) == -1) + error(Eio); + nop(&d->portc); + qunlock(&d->portm); + poperror(); + }else if(strcmp(f[0], "reset") == 0) + forcestate(d, "reset"); + else if(strcmp(f[0], "smart") == 0){ + if(d->smartrs == 0){ + cmderror(cmd, "smart not enabled"); + return -1; + } + if(waserror()){ + qunlock(&d->portm); + d->smartrs = 0; + nexterror(); + } + if(lockready(d) == -1) + error(Eio); + d->portm.smart = 2 + smartrs(&d->portc); + qunlock(&d->portm); + poperror(); + }else if(strcmp(f[0], "smartdisable") == 0) + runsmartable(d, 1); + else if(strcmp(f[0], "smartenable") == 0) + runsmartable(d, 0); + else if(strcmp(f[0], "state") == 0) + forcestate(d, f[1]? f[1]: "null"); + else{ + cmderror(cmd, Ebadctl); + return -1; + } + return 0; +} + +static char * +portr(char *p, char *e, uint x) +{ + int i, a; + + p[0] = 0; + a = -1; + for(i = 0; i < 32; i++){ + if((x & (1< 0) + p = seprint(p, e, ", "); + p = seprint(p, e, "%d", a = i); + } + } + if(a != -1 && i - 1 != a) + p = seprint(p, e, "-%d", i - 1); + return p; +} + +/* must emit exactly one line per controller (sd(3)) */ +static char* +iartopctl(SDev *sdev, char *p, char *e) +{ + ulong cap; + char pr[25]; + Ahba *hba; + Ctlr *ctlr; + +#define has(x, str) if(cap & (x)) p = seprint(p, e, "%s ", (str)) + + ctlr = sdev->ctlr; + hba = ctlr->hba; + p = seprint(p, e, "sd%c ahci port %#p: ", sdev->idno, ctlr->physio); + cap = hba->cap; + has(Hs64a, "64a"); + has(Hsalp, "alp"); + has(Hsam, "am"); + has(Hsclo, "clo"); + has(Hcccs, "coal"); + has(Hems, "ems"); + has(Hsal, "led"); + has(Hsmps, "mps"); + has(Hsncq, "ncq"); + has(Hssntf, "ntf"); + has(Hspm, "pm"); + has(Hpsc, "pslum"); + has(Hssc, "slum"); + has(Hsss, "ss"); + has(Hsxs, "sxs"); + portr(pr, pr + sizeof pr, hba->pi); + return seprint(p, e, + "iss %ld ncs %ld np %ld; ghc %#lux isr %#lux pi %#lux %s ver %#lux\n", + (cap>>20) & 0xf, (cap>>8) & 0x1f, 1 + (cap & 0x1f), + hba->ghc, hba->isr, hba->pi, pr, hba->ver); +#undef has +} + +static int +iawtopctl(SDev *, Cmdbuf *cmd) +{ + int *v; + char **f; + + f = cmd->f; + v = 0; + + if (f[0] == nil) + return 0; + if(strcmp(f[0], "debug") == 0) + v = &debug; + else if(strcmp(f[0], "idprint") == 0) + v = &prid; + else if(strcmp(f[0], "aprint") == 0) + v = &datapi; + else + cmderror(cmd, Ebadctl); + + switch(cmd->nf){ + default: + cmderror(cmd, Ebadarg); + case 1: + *v ^= 1; + break; + case 2: + if(f[1]) + *v = strcmp(f[1], "on") == 0; + else + *v ^= 1; + break; + } + return 0; +} + +SDifc sdiahciifc = { + "iahci", + + iapnp, + nil, /* legacy */ + iaenable, + iadisable, + + iaverify, + iaonline, + iario, + iarctl, + iawctl, + + scsibio, + nil, /* probe */ + nil, /* clear */ + iartopctl, + iawtopctl, +}; diff -Nru /sys/src/9k/386/sdide.c /sys/src/9k/386/sdide.c --- /sys/src/9k/386/sdide.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/sdide.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,2470 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" +#include "../port/error.h" + +#include "../port/sd.h" +#include "fis.h" + +#define uprint(...) snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__); +#pragma varargck argpos atadebug 3 + +extern SDifc sdideifc; + +enum { + DbgCONFIG = 0x0001, /* detected drive config info */ + DbgIDENTIFY = 0x0002, /* detected drive identify info */ + DbgSTATE = 0x0004, /* dump state on panic */ + DbgPROBE = 0x0008, /* trace device probing */ + DbgDEBUG = 0x0080, /* the current problem... */ + DbgINL = 0x0100, /* That Inil20+ message we hate */ + Dbg48BIT = 0x0200, /* 48-bit LBA */ + DbgBsy = 0x0400, /* interrupt but Bsy (shared IRQ) */ + DbgAtazz = 0x0800, /* debug raw ata io */ +}; +#define DEBUG (DbgDEBUG|DbgSTATE) + +enum { /* I/O ports */ + Data = 0, + Error = 1, /* (read) */ + Features = 1, /* (write) */ + Count = 2, /* sector count<7-0>, sector count<15-8> */ + Ir = 2, /* interrupt reason (PACKET) */ + Sector = 3, /* sector number */ + Lbalo = 3, /* LBA<7-0>, LBA<31-24> */ + Cyllo = 4, /* cylinder low */ + Bytelo = 4, /* byte count low (PACKET) */ + Lbamid = 4, /* LBA<15-8>, LBA<39-32> */ + Cylhi = 5, /* cylinder high */ + Bytehi = 5, /* byte count hi (PACKET) */ + Lbahi = 5, /* LBA<23-16>, LBA<47-40> */ + Dh = 6, /* Device/Head, LBA<27-24> */ + Status = 7, /* (read) */ + Command = 7, /* (write) */ + + As = 2, /* Alternate Status (read) */ + Dc = 2, /* Device Control (write) */ +}; + +enum { /* Error */ + Med = 0x01, /* Media error */ + Ili = 0x01, /* command set specific (PACKET) */ + Nm = 0x02, /* No Media */ + Eom = 0x02, /* command set specific (PACKET) */ + Abrt = 0x04, /* Aborted command */ + Mcr = 0x08, /* Media Change Request */ + Idnf = 0x10, /* no user-accessible address */ + Mc = 0x20, /* Media Change */ + Unc = 0x40, /* Uncorrectable data error */ + Wp = 0x40, /* Write Protect */ + Icrc = 0x80, /* Interface CRC error */ +}; + +enum { /* Features */ + Dma = 0x01, /* data transfer via DMA (PACKET) */ + Ovl = 0x02, /* command overlapped (PACKET) */ +}; + +enum { /* Interrupt Reason */ + Cd = 0x01, /* Command/Data */ + Io = 0x02, /* I/O direction */ + Rel = 0x04, /* Bus Release */ +}; + +enum { /* Device/Head */ + Dev0 = 0xA0, /* Master */ + Dev1 = 0xB0, /* Slave */ + Devs = Dev0 | Dev1, + Lba = 0x40, /* LBA mode */ +}; + +enum { /* Status, Alternate Status */ + Err = 0x01, /* Error */ + Chk = 0x01, /* Check error (PACKET) */ + Drq = 0x08, /* Data Request */ + Dsc = 0x10, /* Device Seek Complete */ + Serv = 0x10, /* Service */ + Df = 0x20, /* Device Fault */ + Dmrd = 0x20, /* DMA ready (PACKET) */ + Drdy = 0x40, /* Device Ready */ + Bsy = 0x80, /* Busy */ +}; + +enum { /* Command */ + Cnop = 0x00, /* NOP */ + Crs = 0x20, /* Read Sectors */ + Crs48 = 0x24, /* Read Sectors Ext */ + Crd48 = 0x25, /* Read w/ DMA Ext */ + Crsm48 = 0x29, /* Read Multiple Ext */ + Cws = 0x30, /* Write Sectors */ + Cws48 = 0x34, /* Write Sectors Ext */ + Cwd48 = 0x35, /* Write w/ DMA Ext */ + Cwsm48 = 0x39, /* Write Multiple Ext */ + Cedd = 0x90, /* Execute Device Diagnostics */ + Cpkt = 0xA0, /* Packet */ + Cidpkt = 0xA1, /* Identify Packet Device */ + Crsm = 0xC4, /* Read Multiple */ + Cwsm = 0xC5, /* Write Multiple */ + Csm = 0xC6, /* Set Multiple */ + Crd = 0xC8, /* Read DMA */ + Cwd = 0xCA, /* Write DMA */ + Cid = 0xEC, /* Identify Device */ +}; + +enum { /* Device Control */ + Nien = 0x02, /* (not) Interrupt Enable */ + Srst = 0x04, /* Software Reset */ + Hob = 0x80, /* High Order Bit [sic] */ +}; + +enum { /* PCI Configuration Registers */ + Bmiba = 0x20, /* Bus Master Interface Base Address */ + Idetim = 0x40, /* IE Timing */ + Sidetim = 0x44, /* Slave IE Timing */ + Udmactl = 0x48, /* Ultra DMA/33 Control */ + Udmatim = 0x4A, /* Ultra DMA/33 Timing */ +}; + +enum { /* Bus Master IDE I/O Ports */ + Bmicx = 0, /* Command */ + Bmisx = 2, /* Status */ + Bmidtpx = 4, /* Descriptor Table Pointer */ +}; + +enum { /* Bmicx */ + Ssbm = 0x01, /* Start/Stop Bus Master */ + Rwcon = 0x08, /* Read/Write Control */ +}; + +enum { /* Bmisx */ + Bmidea = 0x01, /* Bus Master IDE Active */ + Idedmae = 0x02, /* IDE DMA Error (R/WC) */ + Ideints = 0x04, /* IDE Interrupt Status (R/WC) */ + Dma0cap = 0x20, /* Drive 0 DMA Capable */ + Dma1cap = 0x40, /* Drive 0 DMA Capable */ +}; +enum { /* Physical Region Descriptor */ + PrdEOT = 0x80000000, /* End of Transfer */ +}; + +enum { /* offsets into the identify info. */ + Iconfig = 0, /* general configuration */ + Ilcyl = 1, /* logical cylinders */ + Ilhead = 3, /* logical heads */ + Ilsec = 6, /* logical sectors per logical track */ + Iserial = 10, /* serial number */ + Ifirmware = 23, /* firmware revision */ + Imodel = 27, /* model number */ + Imaxrwm = 47, /* max. read/write multiple sectors */ + Icapabilities = 49, /* capabilities */ + Istandby = 50, /* device specific standby timer */ + Ipiomode = 51, /* PIO data transfer mode number */ + Ivalid = 53, + Iccyl = 54, /* cylinders if (valid&0x01) */ + Ichead = 55, /* heads if (valid&0x01) */ + Icsec = 56, /* sectors if (valid&0x01) */ + Iccap = 57, /* capacity if (valid&0x01) */ + Irwm = 59, /* read/write multiple */ + Ilba = 60, /* LBA size */ + Imwdma = 63, /* multiword DMA mode */ + Iapiomode = 64, /* advanced PIO modes supported */ + Iminmwdma = 65, /* min. multiword DMA cycle time */ + Irecmwdma = 66, /* rec. multiword DMA cycle time */ + Iminpio = 67, /* min. PIO cycle w/o flow control */ + Iminiordy = 68, /* min. PIO cycle with IORDY */ + Ipcktbr = 71, /* time from PACKET to bus release */ + Iserbsy = 72, /* time from SERVICE to !Bsy */ + Iqdepth = 75, /* max. queue depth */ + Imajor = 80, /* major version number */ + Iminor = 81, /* minor version number */ + Icsfs = 82, /* command set/feature supported */ + Icsfe = 85, /* command set/feature enabled */ + Iudma = 88, /* ultra DMA mode */ + Ierase = 89, /* time for security erase */ + Ieerase = 90, /* time for enhanced security erase */ + Ipower = 91, /* current advanced power management */ + Ilba48 = 100, /* 48-bit LBA size (64 bits in 100-103) */ + Irmsn = 127, /* removable status notification */ + Isecstat = 128, /* security status */ + Icfapwr = 160, /* CFA power mode */ + Imediaserial = 176, /* current media serial number */ + Icksum = 255, /* checksum */ +}; + +enum { /* bit masks for config identify info */ + Mpktsz = 0x0003, /* packet command size */ + Mincomplete = 0x0004, /* incomplete information */ + Mdrq = 0x0060, /* DRQ type */ + Mrmdev = 0x0080, /* device is removable */ + Mtype = 0x1F00, /* device type */ + Mproto = 0x8000, /* command protocol */ +}; + +enum { /* bit masks for capabilities identify info */ + Mdma = 0x0100, /* DMA supported */ + Mlba = 0x0200, /* LBA supported */ + Mnoiordy = 0x0400, /* IORDY may be disabled */ + Miordy = 0x0800, /* IORDY supported */ + Msoftrst = 0x1000, /* needs soft reset when Bsy */ + Mqueueing = 0x4000, /* queueing overlap supported */ + Midma = 0x8000, /* interleaved DMA supported */ +}; + +enum { /* bit masks for supported/enabled features */ + Msmart = 0x0001, + Msecurity = 0x0002, + Mrmmedia = 0x0004, + Mpwrmgmt = 0x0008, + Mpkt = 0x0010, + Mwcache = 0x0020, + Mlookahead = 0x0040, + Mrelirq = 0x0080, + Msvcirq = 0x0100, + Mreset = 0x0200, + Mprotected = 0x0400, + Mwbuf = 0x1000, + Mrbuf = 0x2000, + Mnop = 0x4000, + Mmicrocode = 0x0001, + Mqueued = 0x0002, + Mcfa = 0x0004, + Mapm = 0x0008, + Mnotify = 0x0010, + Mspinup = 0x0040, + Mmaxsec = 0x0100, + Mautoacoustic = 0x0200, + Maddr48 = 0x0400, + Mdevconfov = 0x0800, + Mflush = 0x1000, + Mflush48 = 0x2000, + Msmarterror = 0x0001, + Msmartselftest = 0x0002, + Mmserial = 0x0004, + Mmpassthru = 0x0008, + Mlogging = 0x0020, +}; + +typedef struct Ctlr Ctlr; +typedef struct Drive Drive; + +typedef struct Prd { /* Physical Region Descriptor */ + ulong pa; /* Physical Base Address */ + int count; +} Prd; + +enum { + BMspan = 64*1024, /* must be power of 2 <= 64*1024 */ + + Nprd = SDmaxio/BMspan+2, +}; + +typedef struct Ctlr { + int cmdport; + int ctlport; + int irq; + void* vector; + int tbdf; + int bmiba; /* bus master interface base address */ + int maxio; /* sector count transfer maximum */ + int span; /* don't span this boundary with dma */ + + Pcidev* pcidev; + void (*ienable)(Ctlr*); + void (*idisable)(Ctlr*); + SDev* sdev; + + Drive* drive[2]; + + Prd* prdt; /* physical region descriptor table */ + void (*irqack)(Ctlr*); + + QLock; /* current command */ + Drive* curdrive; + int command; /* last command issued (debugging) */ + Rendez; + int done; + uint nrq; + uint nildrive; + uint bsy; + + Lock; /* register access */ +} Ctlr; + +typedef struct Drive { + Ctlr* ctlr; + SDunit *unit; + + int dev; + ushort info[256]; + Sfis; + + int dma; /* DMA R/W possible */ + int dmactl; + int rwm; /* read/write multiple possible */ + int rwmctl; + + int pkt; /* PACKET device, length of pktcmd */ + uchar pktcmd[16]; + int pktdma; /* this PACKET command using dma */ + + uvlong sectors; + uint secsize; + char serial[20+1]; + char firmware[8+1]; + char model[40+1]; + + QLock; /* drive access */ + int command; /* current command */ + int write; + uchar* data; + int dlen; + uchar* limit; + int count; /* sectors */ + int block; /* R/W bytes per block */ + int status; + int error; + int flags; /* internal flags */ + uint missirq; + uint spurloop; + uint irq; + uint bsy; +} Drive; + +enum { /* internal flags */ + Lba48always = 0x2, /* ... */ + Online = 0x4, /* drive onlined */ +}; + +static void +pc87415ienable(Ctlr* ctlr) +{ + Pcidev *p; + int x; + + p = ctlr->pcidev; + if(p == nil) + return; + + x = pcicfgr32(p, 0x40); + if(ctlr->cmdport == p->mem[0].bar) + x &= ~0x00000100; + else + x &= ~0x00000200; + pcicfgw32(p, 0x40, x); +} + +static void +atadumpstate(Drive* drive, SDreq *r, uvlong lba, int count) +{ + Prd *prd; + Pcidev *p; + Ctlr *ctlr; + int i, bmiba, ccnt; + uvlong clba; + + if(!(DEBUG & DbgSTATE)) + return; + + ctlr = drive->ctlr; + print("command %2.2uX\n", ctlr->command); + print("data %8.8p limit %8.8p dlen %d status %uX error %uX\n", + drive->data, drive->limit, drive->dlen, + drive->status, drive->error); + if(r->clen == -16) + clba = fisrw(nil, r->cmd, &ccnt); + else + sdfakescsirw(r, &clba, &ccnt, 0); + print("lba %llud -> %llud, count %d -> %d (%d)\n", + clba, lba, ccnt, count, drive->count); + if(!(inb(ctlr->ctlport+As) & Bsy)){ + for(i = 1; i < 7; i++) + print(" 0x%2.2uX", inb(ctlr->cmdport+i)); + print(" 0x%2.2uX\n", inb(ctlr->ctlport+As)); + } + if(drive->command == Cwd || drive->command == Crd + || drive->command == (Pdma|Pin) || drive->command == (Pdma|Pout)){ + bmiba = ctlr->bmiba; + prd = ctlr->prdt; + print("bmicx %2.2uX bmisx %2.2uX prdt %8.8p\n", + inb(bmiba+Bmicx), inb(bmiba+Bmisx), prd); + for(;;){ + print("pa 0x%8.8luX count %8.8uX\n", + prd->pa, prd->count); + if(prd->count & PrdEOT) + break; + prd++; + } + } + if(ctlr->pcidev && ctlr->pcidev->vid == 0x8086){ + p = ctlr->pcidev; + print("0x40: %4.4uX 0x42: %4.4uX ", + pcicfgr16(p, 0x40), pcicfgr16(p, 0x42)); + print("0x48: %2.2uX\n", pcicfgr8(p, 0x48)); + print("0x4A: %4.4uX\n", pcicfgr16(p, 0x4A)); + } +} + +static void +atadebug(int cmdport, int ctlport, char* fmt, ...) +{ + char *p, *e, buf[PRINTSIZE]; + int i; + va_list arg; + + if(!(DEBUG & DbgPROBE)) + return; + + p = buf; + e = buf + sizeof buf; + va_start(arg, fmt); + p = vseprint(p, e, fmt, arg); + va_end(arg); + + if(cmdport){ + if(p > buf && p[-1] == '\n') + p--; + p = seprint(p, e, " ataregs 0x%uX:", cmdport); + for(i = Features; i < Command; i++) + p = seprint(p, e, " 0x%2.2uX", inb(cmdport+i)); + if(ctlport) + p = seprint(p, e, " 0x%2.2uX", inb(ctlport+As)); + p = seprint(p, e, "\n"); + } +// putlog(buf, p - buf); + print("%s\n", buf); +} + +static int +ataready(int cmdport, int ctlport, int dev, int reset, int ready, int m) +{ + int as, m0; + + atadebug(cmdport, ctlport, "ataready: dev %ux:%ux reset %ux ready %ux", + cmdport, dev, reset, ready); + m0 = m; + do{ + /* + * Wait for the controller to become not busy and + * possibly for a status bit to become true (usually + * Drdy). Must change to the appropriate device + * register set if necessary before testing for ready. + * Always run through the loop at least once so it + * can be used as a test for !Bsy. + */ + as = inb(ctlport+As); + if(as & reset){ + /* nothing to do */ + } + else if(dev){ + outb(cmdport+Dh, dev); + dev = 0; + } + else if(ready == 0 || (as & ready)){ + atadebug(0, 0, "ataready: %d:%d %#.2ux\n", m, m0, as); + return as; + } + microdelay(1); + }while(m-- > 0); + atadebug(0, 0, "ataready: timeout %d %#.2ux\n", m0, as); + return -1; +} + +static int +atadone(void* arg) +{ + return ((Ctlr*)arg)->done; +} + +static int +atarwmmode(Drive* drive, int cmdport, int ctlport, int dev) +{ + int as, maxrwm, rwm; + + maxrwm = drive->info[Imaxrwm] & 0xFF; + if(maxrwm == 0) + return 0; + + /* + * Sometimes drives come up with the current count set + * to 0; if so, set a suitable value, otherwise believe + * the value in Irwm if the 0x100 bit is set. + */ + if(drive->info[Irwm] & 0x100) + rwm = drive->info[Irwm] & 0xFF; + else + rwm = 0; + if(rwm == 0) + rwm = maxrwm; + if(rwm > 16) + rwm = 16; + if(ataready(cmdport, ctlport, dev, Bsy|Drq, Drdy, 102*1000) < 0) + return 0; + outb(cmdport+Count, rwm); + outb(cmdport+Command, Csm); + microdelay(1); + as = ataready(cmdport, ctlport, 0, Bsy, Drdy|Df|Err, 1000); + inb(cmdport+Status); + if(as < 0 || (as & (Df|Err))) + return 0; + + drive->rwm = rwm; + + return rwm; +} + +static int +atadmamode(SDunit *unit, Drive* drive) +{ + char buf[32], *s; + int dma; + + /* + * Check if any DMA mode enabled. + * Assumes the BIOS has picked and enabled the best. + * This is completely passive at the moment, no attempt is + * made to ensure the hardware is correctly set up. + */ + dma = drive->info[Imwdma] & 0x0707; + drive->dma = (dma>>8) & dma; + if(drive->dma == 0 && (drive->info[Ivalid] & 0x04)){ + dma = drive->info[Iudma] & 0x7F7F; + drive->dma = (dma>>8) & dma; + if(drive->dma) + drive->dma |= 'U'<<16; + } + if(unit != nil){ + snprint(buf, sizeof buf, "*%sdma", unit->name); + if((s = getconf(buf)) && strcmp(s, "on") == 0){ +// print("set %s dma\n", unit->name); + drive->dmactl = drive->dma; + } + } + return dma; +} + +static int +ataidentify(Ctlr*, int cmdport, int ctlport, int dev, int pkt, void* info) +{ + int as, command, drdy; + + if(pkt){ + command = Cidpkt; + drdy = 0; + } + else{ + command = Cid; + drdy = Drdy; + } + dev &= ~Lba; + as = ataready(cmdport, ctlport, dev, Bsy|Drq, drdy, 103*1000); + if(as < 0) + return as; + outb(cmdport+Command, command); + microdelay(1); + + as = ataready(cmdport, ctlport, 0, Bsy, Drq|Err, 400*1000); + if(as < 0) + return -1; + if(as & Err) + return as; + + memset(info, 0, 512); + inss(cmdport+Data, info, 256); + ataready(cmdport, ctlport, dev, Bsy|Drq, Drdy, 3*1000); + inb(cmdport+Status); + + return 0; +} + +static Drive* +atadrive(SDunit *unit, Drive *drive, int cmdport, int ctlport, int dev) +{ + int as, pkt; + uchar buf[512], oserial[21]; + uvlong osectors; + Ctlr *ctlr; + + if(DEBUG & DbgIDENTIFY) + print("identify: port %ux dev %.2ux\n", cmdport, dev & ~Lba); + atadebug(0, 0, "identify: port 0x%uX dev 0x%2.2uX\n", cmdport, dev); + pkt = 1; + if(drive != nil){ + osectors = drive->sectors; + memmove(oserial, drive->serial, sizeof drive->serial); + ctlr = drive->ctlr; + }else{ + osectors = 0; + memset(oserial, 0, sizeof drive->serial); + ctlr = nil; + } +retry: + as = ataidentify(ctlr, cmdport, ctlport, dev, pkt, buf); + if(as < 0) + return nil; + if(as & Err){ + if(pkt == 0) + return nil; + pkt = 0; + goto retry; + } + + if(drive == 0){ + if((drive = malloc(sizeof(Drive))) == nil) + return nil; + drive->serial[0] = ' '; + drive->dev = dev; + } + + memmove(drive->info, buf, sizeof(drive->info)); + + setfissig(drive, pkt? 0xeb140000: 0x0101); + drive->sectors = idfeat(drive, drive->info); + drive->secsize = idss(drive, drive->info); + + idmove(drive->serial, drive->info+10, 20); + idmove(drive->firmware, drive->info+23, 8); + idmove(drive->model, drive->info+27, 40); + if(unit != nil){ + memset(unit->inquiry, 0, sizeof unit->inquiry); + unit->inquiry[2] = 2; + unit->inquiry[3] = 2; + unit->inquiry[4] = sizeof unit->inquiry - 4; + memmove(unit->inquiry+8, drive->model, 40); + } + + if(pkt){ + drive->pkt = 12; + if(drive->feat & Datapi16) + drive->pkt = 16; + }else{ + if(drive->feat & Dlba) + drive->dev |= Lba; + atarwmmode(drive, cmdport, ctlport, dev); + } + atadmamode(unit, drive); + + if(osectors != 0 && memcmp(oserial, drive->serial, sizeof oserial) != 0) + if(unit) + unit->sectors = 0; + drive->unit = unit; + if(DEBUG & DbgCONFIG){ + print("dev %2.2uX port %uX config %4.4uX capabilities %4.4uX", + dev, cmdport, drive->info[Iconfig], drive->info[Icapabilities]); + print(" mwdma %4.4uX", drive->info[Imwdma]); + if(drive->info[Ivalid] & 0x04) + print(" udma %4.4uX", drive->info[Iudma]); + print(" dma %8.8uX rwm %ud", drive->dma, drive->rwm); + if(drive->feat&Dllba) + print("\tLLBA sectors %llud", drive->sectors); + print("\n"); + } + + return drive; +} + +static void +atasrst(int ctlport) +{ + int dc0; + + /* + * Srst is a big stick and may cause problems if further + * commands are tried before the drives become ready again. + * Also, there will be problems here if overlapped commands + * are ever supported. + */ + dc0 = inb(ctlport+Dc); + microdelay(5); + outb(ctlport+Dc, Srst|dc0); + microdelay(5); + outb(ctlport+Dc, dc0); + microdelay(2*1000); +} + +static int +seldev(int dev, int map) +{ + if((dev & Devs) == Dev0 && map&1) + return dev; + if((dev & Devs) == Dev1 && map&2) + return dev; + return -1; +} + +static SDev* +ataprobe(int cmdport, int ctlport, int irq, int map) +{ + Ctlr* ctlr; + SDev *sdev; + Drive *drive; + int dev, error, rhi, rlo; + static int nonlegacy = 'C'; + + if(ioalloc(cmdport, 8, 0, "atacmd") < 0) { + print("ataprobe: Cannot allocate %X\n", cmdport); + return nil; + } + if(ioalloc(ctlport+As, 1, 0, "atactl") < 0){ + print("ataprobe: Cannot allocate %X\n", ctlport + As); + iofree(cmdport); + return nil; + } + + /* + * Try to detect a floating bus. + * Bsy should be cleared. If not, see if the cylinder registers + * are read/write capable. + * If the master fails, try the slave to catch slave-only + * configurations. + * There's no need to restore the tested registers as they will + * be reset on any detected drives by the Cedd command. + * All this indicates is that there is at least one drive on the + * controller; when the non-existent drive is selected in a + * single-drive configuration the registers of the existing drive + * are often seen, only command execution fails. + */ + if((dev = seldev(Dev0, map)) == -1) + if((dev = seldev(Dev1, map)) == -1) + goto release; + if(inb(ctlport+As) & Bsy){ + outb(cmdport+Dh, dev); + microdelay(1); +trydev1: + atadebug(cmdport, ctlport, "ataprobe bsy"); + outb(cmdport+Cyllo, 0xAA); + outb(cmdport+Cylhi, 0x55); + outb(cmdport+Sector, 0xFF); + rlo = inb(cmdport+Cyllo); + rhi = inb(cmdport+Cylhi); + if(rlo != 0xAA && (rlo == 0xFF || rhi != 0x55)){ + if(dev == Dev1 || (dev = seldev(Dev1, map)) == -1){ +release: + outb(cmdport+Dc, Nien); + inb(cmdport+Status); + /* further measures to prevent irqs? */ + iofree(cmdport); + iofree(ctlport+As); + return nil; + } + if(ataready(cmdport, ctlport, dev, Bsy, 0, 20*1000) < 0) + goto trydev1; + } + } + + /* + * Disable interrupts on any detected controllers. + */ + outb(ctlport+Dc, Nien); +tryedd1: + if(ataready(cmdport, ctlport, dev, Bsy|Drq, 0, 105*1000) < 0){ + /* + * There's something there, but it didn't come up clean, + * so try hitting it with a big stick. The timing here is + * wrong but this is a last-ditch effort and it sometimes + * gets some marginal hardware back online. + */ + atasrst(ctlport); + if(ataready(cmdport, ctlport, dev, Bsy|Drq, 0, 106*1000) < 0) + goto release; + } + + /* + * Can only get here if controller is not busy. + * If there are drives Bsy will be set within 400nS, + * must wait 2mS before testing Status. + * Wait for the command to complete (6 seconds max). + */ + outb(cmdport+Command, Cedd); + delay(2); + if(ataready(cmdport, ctlport, dev, Bsy|Drq, 0, 6*1000*1000) < 0) + goto release; + + /* + * If bit 0 of the error register is set then the selected drive + * exists. This is enough to detect single-drive configurations. + * However, if the master exists there is no way short of executing + * a command to determine if a slave is present. + * It appears possible to get here testing Dev0 although it doesn't + * exist and the EDD won't take, so try again with Dev1. + */ + error = inb(cmdport+Error); + atadebug(cmdport, ctlport, "ataprobe: dev %uX", dev); + if((error & ~0x80) != 0x01){ + if(dev == Dev1) + goto release; + if((dev = seldev(Dev1, map)) == -1) + goto release; + goto tryedd1; + } + + /* + * At least one drive is known to exist, try to + * identify it. If that fails, don't bother checking + * any further. + * If the one drive found is Dev0 and the EDD command + * didn't indicate Dev1 doesn't exist, check for it. + */ + if((drive = atadrive(0, 0, cmdport, ctlport, dev)) == nil) + goto release; + if((ctlr = malloc(sizeof(Ctlr))) == nil){ + free(drive); + goto release; + } + if((sdev = malloc(sizeof(SDev))) == nil){ + free(ctlr); + free(drive); + goto release; + } + drive->ctlr = ctlr; + if(dev == Dev0){ + ctlr->drive[0] = drive; + if(!(error & 0x80)){ + /* + * Always leave Dh pointing to a valid drive, + * otherwise a subsequent call to ataready on + * this controller may try to test a bogus Status. + * Ataprobe is the only place possibly invalid + * drives should be selected. + */ + drive = atadrive(0, 0, cmdport, ctlport, Dev1); + if(drive != nil){ + drive->ctlr = ctlr; + ctlr->drive[1] = drive; + } + else{ + outb(cmdport+Dh, Dev0); + microdelay(1); + } + } + } + else + ctlr->drive[1] = drive; + + ctlr->cmdport = cmdport; + ctlr->ctlport = ctlport; + ctlr->irq = irq; + ctlr->tbdf = BUSUNKNOWN; + ctlr->command = Cedd; /* debugging */ + + switch(cmdport){ + default: + sdev->idno = nonlegacy; + break; + case 0x1F0: + sdev->idno = 'C'; + nonlegacy = 'E'; + break; + case 0x170: + sdev->idno = 'D'; + nonlegacy = 'E'; + break; + } + sdev->ifc = &sdideifc; + sdev->ctlr = ctlr; + sdev->nunit = 2; + ctlr->sdev = sdev; + + return sdev; +} + +static void +ataclear(SDev *sdev) +{ + Ctlr* ctlr; + + ctlr = sdev->ctlr; + iofree(ctlr->cmdport); + iofree(ctlr->ctlport + As); + + if (ctlr->drive[0]) + free(ctlr->drive[0]); + if (ctlr->drive[1]) + free(ctlr->drive[1]); + if (sdev->name) + free(sdev->name); + if (sdev->unitflg) + free(sdev->unitflg); + if (sdev->unit) + free(sdev->unit); + free(ctlr); + free(sdev); +} + +static char * +atastat(SDev *sdev, char *p, char *e) +{ + Ctlr *ctlr; + + ctlr = sdev->ctlr; +// return seprint(p, e, "%s ata port %X ctl %X irq %d %T\n", +// sdev->name, ctlr->cmdport, ctlr->ctlport, ctlr->irq, ctlr->tbdf); + return seprint(p, e, "%s ata port %X ctl %X irq %d\n", + sdev->name, ctlr->cmdport, ctlr->ctlport, ctlr->irq); +} + +static void atainterrupt(Ureg*, void*); + +static int +iowait(Drive *drive, int ms, int interrupt) +{ + int msec, step; + Ctlr *ctlr; + + step = 1000; + if(drive->missirq > 10) + step = 50; + ctlr = drive->ctlr; + for(msec = 0; msec < ms; msec += step){ + while(waserror()) + if(interrupt) + return -1; + tsleep(ctlr, atadone, ctlr, step); + poperror(); + if(ctlr->done) + break; + atainterrupt(nil, ctlr); + if(ctlr->done){ + if(drive->missirq++ < 3) + {}// BOTCH print("ide: caught missed irq\n"); + break; + }else + drive->spurloop++; + } + return ctlr->done; +} + +static void +atanop(Drive* drive, int subcommand) +{ + Ctlr* ctlr; + int as, cmdport, ctlport, timeo; + + /* + * Attempt to abort a command by using NOP. + * In response, the drive is supposed to set Abrt + * in the Error register, set (Drdy|Err) in Status + * and clear Bsy when done. However, some drives + * (e.g. ATAPI Zip) just go Bsy then clear Status + * when done, hence the timeout loop only on Bsy + * and the forced setting of drive->error. + */ + ctlr = drive->ctlr; + cmdport = ctlr->cmdport; + outb(cmdport+Features, subcommand); + outb(cmdport+Dh, drive->dev); + ctlr->command = Cnop; /* debugging */ + outb(cmdport+Command, Cnop); + + microdelay(1); + ctlport = ctlr->ctlport; + for(timeo = 0; timeo < 1000; timeo++){ + as = inb(ctlport+As); + if(!(as & Bsy)) + break; + microdelay(1); + } + drive->error |= Abrt; +} + +static void +ataabort(Drive* drive, int dolock) +{ + /* + * If NOP is available use it otherwise + * must try a software reset. + */ + if(dolock) + ilock(drive->ctlr); + if(drive->feat & Dnop) + atanop(drive, 0); + else{ + atasrst(drive->ctlr->ctlport); + drive->error |= Abrt; + } + if(dolock) + iunlock(drive->ctlr); +} + +static int +atadmasetup(Drive* drive, int len) +{ + Prd *prd; + ulong pa; + Ctlr *ctlr; + int bmiba, bmisx, count, i, span; + + ctlr = drive->ctlr; + pa = PCIWADDR32(drive->data); + if(pa & 0x03) + return -1; + + /* + * Sometimes drives identify themselves as being DMA capable + * although they are not on a busmastering controller. + */ + prd = ctlr->prdt; + if(prd == nil){ + drive->dmactl = 0; + print("disabling dma: not on a busmastering controller\n"); + return -1; + } + + for(i = 0; len && i < Nprd; i++){ + prd->pa = pa; + span = ROUNDUP(pa, ctlr->span); + if(span == pa) + span += ctlr->span; + count = span - pa; + if(count >= len){ + prd->count = PrdEOT|len; + break; + } + prd->count = count; + len -= count; + pa += count; + prd++; + } + if(i == Nprd) + (prd-1)->count |= PrdEOT; + + bmiba = ctlr->bmiba; + outl(bmiba+Bmidtpx, PCIWADDR32(ctlr->prdt)); + if(drive->write) + outb(bmiba+Bmicx, 0); + else + outb(bmiba+Bmicx, Rwcon); + bmisx = inb(bmiba+Bmisx); + outb(bmiba+Bmisx, bmisx|Ideints|Idedmae); + + return 0; +} + +static void +atadmastart(Ctlr* ctlr, int write) +{ + if(write) + outb(ctlr->bmiba+Bmicx, Ssbm); + else + outb(ctlr->bmiba+Bmicx, Rwcon|Ssbm); +} + +static int +atadmastop(Ctlr* ctlr) +{ + int bmiba; + + bmiba = ctlr->bmiba; + outb(bmiba+Bmicx, inb(bmiba+Bmicx) & ~Ssbm); + + return inb(bmiba+Bmisx); +} + +static void +atadmainterrupt(Drive* drive, int count) +{ + Ctlr* ctlr; + int bmiba, bmisx; + + ctlr = drive->ctlr; + bmiba = ctlr->bmiba; + bmisx = inb(bmiba+Bmisx); + switch(bmisx & (Ideints|Idedmae|Bmidea)){ + case Bmidea: + /* + * Data transfer still in progress, nothing to do + * (this should never happen). + */ + return; + + case Ideints: + case Ideints|Bmidea: + /* + * Normal termination, tidy up. + */ + drive->data += count; + break; + + default: + /* + * What's left are error conditions (memory transfer + * problem) and the device is not done but the PRD is + * exhausted. For both cases must somehow tell the + * drive to abort. + */ + ataabort(drive, 0); + break; + } + atadmastop(ctlr); + ctlr->done = 1; +} + +static void +atapktinterrupt(Drive* drive) +{ + Ctlr* ctlr; + int cmdport, len; + + ctlr = drive->ctlr; + cmdport = ctlr->cmdport; + switch(inb(cmdport+Ir) & (/*Rel|*/Io|Cd)){ + case Cd: + outss(cmdport+Data, drive->pktcmd, drive->pkt/2); + break; + + case 0: + len = (inb(cmdport+Bytehi)<<8)|inb(cmdport+Bytelo); + if(drive->data+len > drive->limit){ + atanop(drive, 0); + break; + } + outss(cmdport+Data, drive->data, len/2); + drive->data += len; + break; + + case Io: + len = (inb(cmdport+Bytehi)<<8)|inb(cmdport+Bytelo); + if(drive->data+len > drive->limit){ + atanop(drive, 0); + break; + } + inss(cmdport+Data, drive->data, len/2); + drive->data += len; + break; + + case Io|Cd: + if(drive->pktdma) + atadmainterrupt(drive, drive->dlen); + else + ctlr->done = 1; + break; + } +} + +static int +atapktio0(Drive *drive, SDreq *r) +{ + uchar *cmd; + int as, cmdport, ctlport, len, rv, timeo; + Ctlr *ctlr; + + rv = SDok; + cmd = r->cmd; + drive->command = Cpkt; + memmove(drive->pktcmd, cmd, r->clen); + memset(drive->pktcmd+r->clen, 0, drive->pkt-r->clen); + drive->limit = drive->data+drive->dlen; + + ctlr = drive->ctlr; + cmdport = ctlr->cmdport; + ctlport = ctlr->ctlport; + + as = ataready(cmdport, ctlport, drive->dev, Bsy|Drq, Drdy, 107*1000); + /* used to test as&Chk as failure too, but some CD readers use that for media change */ + if(as < 0) + return SDnostatus; + + ilock(ctlr); + if(drive->dlen && drive->dmactl && !atadmasetup(drive, drive->dlen)) + drive->pktdma = Dma; + else + drive->pktdma = 0; + + outb(cmdport+Features, drive->pktdma); + outb(cmdport+Count, 0); + outb(cmdport+Sector, 0); + len = 16*drive->secsize; + outb(cmdport+Bytelo, len); + outb(cmdport+Bytehi, len>>8); + outb(cmdport+Dh, drive->dev); + ctlr->done = 0; + ctlr->curdrive = drive; + ctlr->command = Cpkt; /* debugging */ + if(drive->pktdma) + atadmastart(ctlr, drive->write); + outb(cmdport+Command, Cpkt); + + if((drive->info[Iconfig] & Mdrq) != 0x0020){ + microdelay(1); + as = ataready(cmdport, ctlport, 0, Bsy, Drq|Chk, 4*1000); + if(as < 0 || (as & (Bsy|Chk))){ + drive->status = as<0 ? 0 : as; + ctlr->curdrive = nil; + ctlr->done = 1; + rv = SDtimeout; + }else + atapktinterrupt(drive); + } + iunlock(ctlr); + + while(waserror()) + ; + if(!drive->pktdma) + sleep(ctlr, atadone, ctlr); + else for(timeo = 0; !ctlr->done; timeo++){ + tsleep(ctlr, atadone, ctlr, 1000); + if(ctlr->done) + break; + ilock(ctlr); + atadmainterrupt(drive, 0); + if(!drive->error && timeo > 20){ + ataabort(drive, 0); + atadmastop(ctlr); + drive->dmactl = 0; + drive->error |= Abrt; + } + if(drive->error){ + drive->status |= Chk; + ctlr->curdrive = nil; + } + iunlock(ctlr); + } + poperror(); + + if(drive->status & Chk) + rv = SDcheck; + return rv; +} + +static int +atapktio(Drive* drive, SDreq *r) +{ + int n; + Ctlr *ctlr; + + ctlr = drive->ctlr; + qlock(ctlr); + n = atapktio0(drive, r); + qunlock(ctlr); + return n; +} + +static uchar cmd48[256] = { + [Crs] Crs48, + [Crd] Crd48, + [Crsm] Crsm48, + [Cws] Cws48, + [Cwd] Cwd48, + [Cwsm] Cwsm48, +}; + +enum{ + Last28 = (1<<28) - 1 - 1, +}; + +static int +atageniostart(Drive* drive, uvlong lba) +{ + Ctlr *ctlr; + uchar cmd; + int as, c, cmdport, ctlport, h, len, s, use48; + + use48 = 0; + if((drive->flags&Lba48always) || lba > Last28 || drive->count > 256){ + if((drive->feat & Dllba) == 0) + return -1; + use48 = 1; + c = h = s = 0; + }else if(drive->dev & Lba){ + c = (lba>>8) & 0xFFFF; + h = (lba>>24) & 0x0F; + s = lba & 0xFF; + }else{ + if (drive->s == 0 || drive->h == 0){ + print("sdide: chs address botch"); + return -1; + } + c = lba/(drive->s*drive->h); + h = (lba/drive->s) % drive->h; + s = (lba % drive->s) + 1; + } + + ctlr = drive->ctlr; + cmdport = ctlr->cmdport; + ctlport = ctlr->ctlport; + if(ataready(cmdport, ctlport, drive->dev, Bsy|Drq, Drdy, 101*1000) < 0) + return -1; + + ilock(ctlr); + if(drive->dmactl && !atadmasetup(drive, drive->count*drive->secsize)){ + if(drive->write) + drive->command = Cwd; + else + drive->command = Crd; + } + else if(drive->rwmctl){ + drive->block = drive->rwm*drive->secsize; + if(drive->write) + drive->command = Cwsm; + else + drive->command = Crsm; + } + else{ + drive->block = drive->secsize; + if(drive->write) + drive->command = Cws; + else + drive->command = Crs; + } + drive->limit = drive->data + drive->count*drive->secsize; + cmd = drive->command; + if(use48){ + outb(cmdport+Count, drive->count>>8); + outb(cmdport+Count, drive->count); + outb(cmdport+Lbalo, lba>>24); + outb(cmdport+Lbalo, lba); + outb(cmdport+Lbamid, lba>>32); + outb(cmdport+Lbamid, lba>>8); + outb(cmdport+Lbahi, lba>>40); + outb(cmdport+Lbahi, lba>>16); + outb(cmdport+Dh, drive->dev|Lba); + cmd = cmd48[cmd]; + + if(DEBUG & Dbg48BIT) + print("using 48-bit commands\n"); + }else{ + outb(cmdport+Count, drive->count); + outb(cmdport+Sector, s); + outb(cmdport+Cyllo, c); + outb(cmdport+Cylhi, c>>8); + outb(cmdport+Dh, drive->dev|h); + } + ctlr->done = 0; + ctlr->curdrive = drive; + ctlr->command = drive->command; /* debugging */ + outb(cmdport+Command, cmd); + + switch(drive->command){ + case Cws: + case Cwsm: + microdelay(1); + as = ataready(cmdport, ctlport, 0, Bsy, Drq|Err, 1*1000*1000); + if(as < 0 || (as & Err)){ + iunlock(ctlr); + return -1; + } + len = drive->block; + if(drive->data+len > drive->limit) + len = drive->limit-drive->data; + outss(cmdport+Data, drive->data, len/2); + break; + + case Crd: + case Cwd: + atadmastart(ctlr, drive->write); + break; + } + iunlock(ctlr); + + return 0; +} + +static int +atagenioretry(Drive* drive, SDreq *r, uvlong lba, int count) +{ + char *s; + int rv, count0, rw; + uvlong lba0; + + if(drive->dmactl){ + drive->dmactl = 0; + s = "disabling dma"; + rv = SDretry; + }else if(drive->rwmctl){ + drive->rwmctl = 0; + s = "disabling rwm"; + rv = SDretry; + }else{ + s = "nondma"; + rv = sdsetsense(r, SDcheck, 4, 8, drive->error); + } + sdfakescsirw(r, &lba0, &count0, &rw); + print("atagenioretry: %s %c:%llud:%d @%llud:%d\n", + s, "rw"[rw], lba0, count0, lba, count); + return rv; +} + +static int +atagenio(Drive* drive, SDreq *r) +{ + Ctlr *ctlr; + uvlong lba; + int i, rw, count, maxio; + + if((i = sdfakescsi(r)) != SDnostatus) + return i; + if((i = sdfakescsirw(r, &lba, &count, &rw)) != SDnostatus) + return i; + ctlr = drive->ctlr; + if(drive->data == nil) + return SDok; + if(drive->dlen < count*drive->secsize) + count = drive->dlen/drive->secsize; + qlock(ctlr); + if(ctlr->maxio) + maxio = ctlr->maxio; + else if(drive->feat & Dllba) + maxio = 65536; + else + maxio = 256; + while(count){ + if(count > maxio) + drive->count = maxio; + else + drive->count = count; + if(atageniostart(drive, lba)){ + ilock(ctlr); + atanop(drive, 0); + iunlock(ctlr); + qunlock(ctlr); + return atagenioretry(drive, r, lba, count); + } + iowait(drive, 60*1000, 0); + if(!ctlr->done){ + /* + * What should the above timeout be? In + * standby and sleep modes it could take as + * long as 30 seconds for a drive to respond. + * Very hard to get out of this cleanly. + */ + atadumpstate(drive, r, lba, count); + ataabort(drive, 1); + qunlock(ctlr); + return atagenioretry(drive, r, lba, count); + } + + if(drive->status & Err){ + qunlock(ctlr); +print("atagenio: %llud:%d\n", lba, drive->count); + return sdsetsense(r, SDcheck, 4, 8, drive->error); + } + count -= drive->count; + lba += drive->count; + } + qunlock(ctlr); + + return SDok; +} + +static int +atario(SDreq* r) +{ + uchar *p; + int status; + Ctlr *ctlr; + Drive *drive; + SDunit *unit; + + unit = r->unit; + if((ctlr = unit->dev->ctlr) == nil || ctlr->drive[unit->subno] == nil){ + r->status = SDtimeout; + return SDtimeout; + } + drive = ctlr->drive[unit->subno]; + qlock(drive); + for(;;){ + drive->write = r->write; + drive->data = r->data; + drive->dlen = r->dlen; + drive->status = 0; + drive->error = 0; + if(drive->pkt) + status = atapktio(drive, r); + else + status = atagenio(drive, r); + if(status != SDretry) + break; + if(DbgDEBUG) + print("%s: retry: dma %8.8uX rwm %4.4uX\n", + unit->name, drive->dmactl, drive->rwmctl); + } + if(status == SDok && r->rlen == 0 && (r->flags & SDvalidsense) == 0){ + sdsetsense(r, SDok, 0, 0, 0); + if(drive->data){ + p = r->data; + r->rlen = drive->data - p; + } + else + r->rlen = 0; + } + qunlock(drive); + return status; +} + +/**/ +static int +isdmacmd(Drive *d, SDreq *r) +{ + switch(r->ataproto & Pprotom){ + default: + return 0; + case Pdmq: + error("no queued support"); + case Pdma: + if(!(d->dmactl || d->rwmctl)) + error("dma in non dma mode"); + return 1; + } +} + +static int +atagenatastart(Drive* d, SDreq *r) +{ + uchar u; + int as, cmdport, ctlport, len, pr, isdma; + Ctlr *ctlr; + + isdma = isdmacmd(d, r); + ctlr = d->ctlr; + cmdport = ctlr->cmdport; + ctlport = ctlr->ctlport; + if(ataready(cmdport, ctlport, d->dev, Bsy|Drq, d->pkt? 0: Drdy, 101*1000) < 0) + return -1; + + ilock(ctlr); + if(isdma && atadmasetup(d, d->block)){ + iunlock(ctlr); + return -1; + + } + if(d->feat & Dllba && (r->ataproto & P28) == 0){ + outb(cmdport+Features, r->cmd[Ffeat8]); + outb(cmdport+Features, r->cmd[Ffeat]); + outb(cmdport+Count, r->cmd[Fsc8]); + outb(cmdport+Count, r->cmd[Fsc]); + outb(cmdport+Lbalo, r->cmd[Flba24]); + outb(cmdport+Lbalo, r->cmd[Flba0]); + outb(cmdport+Lbamid, r->cmd[Flba32]); + outb(cmdport+Lbamid, r->cmd[Flba8]); + outb(cmdport+Lbahi, r->cmd[Flba40]); + outb(cmdport+Lbahi, r->cmd[Flba16]); + u = r->cmd[Fdev] & ~0xb0; + outb(cmdport+Dh, d->dev|u); + }else{ + outb(cmdport+Features, r->cmd[Ffeat]); + outb(cmdport+Count, r->cmd[Fsc]); + outb(cmdport+Lbalo, r->cmd[Flba0]); + outb(cmdport+Lbamid, r->cmd[Flba8]); + outb(cmdport+Lbahi, r->cmd[Flba16]); + u = r->cmd[Fdev] & ~0xb0; + outb(cmdport+Dh, d->dev|u); + } + ctlr->done = 0; + ctlr->curdrive = d; + d->command = r->ataproto & (Pprotom|Pdatam); + ctlr->command = r->cmd[Fcmd]; + outb(cmdport+Command, r->cmd[Fcmd]); + + pr = r->ataproto & Pprotom; + if(pr == Pnd || pr == Preset) + USED(d); + else if(!isdma){ + microdelay(1); + as = ataready(cmdport, ctlport, 0, Bsy, Drq|Err, 1*1000*1000); + if(as < 0 || (as & Err)){ + iunlock(ctlr); + return -1; + } + len = d->block; + if(r->write && len > 0) + outss(cmdport+Data, d->data, len/2); + }else + atadmastart(ctlr, d->write); + iunlock(ctlr); + return 0; +} + +static void +mkrfis(Drive *d, SDreq *r) +{ + uchar *u; + int cmdport; + Ctlr *ctlr; + + ctlr = d->ctlr; + cmdport = ctlr->cmdport; + u = r->cmd; + + ilock(ctlr); + u[Ftype] = 0x34; + u[Fioport] = 0; + if((d->feat & Dllba) && (r->ataproto & P28) == 0){ + u[Frerror] = inb(cmdport+Error); + u[Fsc8] = inb(cmdport+Count); + u[Fsc] = inb(cmdport+Count); + u[Flba24] = inb(cmdport+Lbalo); + u[Flba0] = inb(cmdport+Lbalo); + u[Flba32] = inb(cmdport+Lbamid); + u[Flba8] = inb(cmdport+Lbamid); + u[Flba40] = inb(cmdport+Lbahi); + u[Flba16] = inb(cmdport+Lbahi); + u[Fdev] = inb(cmdport+Dh); + u[Fstatus] = inb(cmdport+Status); + }else{ + u[Frerror] = inb(cmdport+Error); + u[Fsc] = inb(cmdport+Count); + u[Flba0] = inb(cmdport+Lbalo); + u[Flba8] = inb(cmdport+Lbamid); + u[Flba16] = inb(cmdport+Lbahi); + u[Fdev] = inb(cmdport+Dh); + u[Fstatus] = inb(cmdport+Status); + } + iunlock(ctlr); +} + +static int +atarstdone(Drive *d) +{ + int as; + Ctlr *c; + + c = d->ctlr; + as = ataready(c->cmdport, c->ctlport, 0, Bsy|Drq, 0, 5*1000); + c->done = as >= 0; + return c->done; +} + +static uint +cmdss(Drive *d, SDreq *r) +{ + switch(r->cmd[Fcmd]){ + case Cid: + case Cidpkt: + return 512; + default: + return d->secsize; + } +} + +/* + * various checks. we should be craftier and + * avoid figuring out how big stuff is supposed to be. + */ +static uint +patasizeck(Drive *d, SDreq *r) +{ + uint count, maxio, secsize; + Ctlr *ctlr; + + secsize = cmdss(d, r); /* BOTCH */ + if(secsize == 0) + error(Eio); + count = r->dlen / secsize; + ctlr = d->ctlr; + if(ctlr->maxio) + maxio = ctlr->maxio; + else if((d->feat & Dllba) && (r->ataproto & P28) == 0) + maxio = 65536; + else + maxio = 256; + if(count > maxio){ + uprint("i/o too large, lim %d", maxio); + error(up->genbuf); + } + if(r->ataproto&Ppio && count > 1) + error("invalid # of sectors"); + return count; +} + +static int +atapataio(Drive *d, SDreq *r) +{ + int rv; + Ctlr *ctlr; + + d->count = 0; + if(r->ataproto & Pdatam) + d->count = patasizeck(d, r); + d->block = r->dlen; + d->limit = d->data + r->dlen; + + ctlr = d->ctlr; + qlock(ctlr); + if(waserror()){ + qunlock(ctlr); + nexterror(); + } + rv = atagenatastart(d, r); + poperror(); + if(rv){ + if(DEBUG & DbgAtazz) + print("sdide: !atageatastart\n"); + ilock(ctlr); + atanop(d, 0); + iunlock(ctlr); + qunlock(ctlr); + return sdsetsense(r, SDcheck, 4, 8, d->error); + } + + if((r->ataproto & Pprotom) == Preset) + atarstdone(d); + else + while(iowait(d, 30*1000, 1) == 0) + ; + if(!ctlr->done){ + if(DEBUG & DbgAtazz){ + print("sdide: !done\n"); + atadumpstate(d, r, 0, d->count); + } + ataabort(d, 1); + qunlock(ctlr); + return sdsetsense(r, SDcheck, 11, 0, 6); /* aborted; i/o process terminated */ + } + mkrfis(d, r); + if(d->status & Err){ + if(DEBUG & DbgAtazz) + print("sdide: status&Err\n"); + qunlock(ctlr); + return sdsetsense(r, SDcheck, 4, 8, d->error); + } + qunlock(ctlr); + return SDok; +} + +static int +ataataio0(Drive *d, SDreq *r) +{ + int i; + + if((r->ataproto & Pprotom) == Ppkt){ + if(r->clen > d->pkt) + error(Eio); + qlock(d->ctlr); + i = atapktio0(d, r); + d->block = d->data - (uchar*)r->data; + mkrfis(d, r); + qunlock(d->ctlr); + return i; + }else + return atapataio(d, r); +} + +/* + * hack to allow udma mode to be set or unset + * via direct ata command. it would be better + * to move the assumptions about dma mode out + * of some of the helper functions. + */ +static int +isudm(SDreq *r) +{ + uchar *c; + + c = r->cmd; + if(c[Fcmd] == 0xef && c[Ffeat] == 0x03){ + if(c[Fsc]&0x40) + return 1; + return -1; + } + return 0; +} + +static int +fisreqchk(Sfis *f, SDreq *r) +{ + if((r->ataproto & Pprotom) == Ppkt) + return SDnostatus; + /* + * handle oob requests; + * restrict & sanitize commands + */ + if(r->clen != 16) + error(Eio); + if(r->cmd[0] == 0xf0){ + sigtofis(f, r->cmd); + r->status = SDok; + return SDok; + } + r->cmd[0] = 0x27; + r->cmd[1] = 0x80; + r->cmd[7] |= 0xa0; + return SDnostatus; +} + +static int +ataataio(SDreq *r) +{ + int status, udm; + Ctlr *c; + Drive *d; + SDunit *u; + + u = r->unit; + if((c = u->dev->ctlr) == nil || (d = c->drive[u->subno]) == nil){ + r->status = SDtimeout; + return SDtimeout; + } + if((status = fisreqchk(d, r)) != SDnostatus) + return status; + udm = isudm(r); + + qlock(d); + if(waserror()){ + qunlock(d); + nexterror(); + } +retry: + d->write = r->write; + d->data = r->data; + d->dlen = r->dlen; + d->status = 0; + d->error = 0; + + switch(status = ataataio0(d, r)){ + case SDretry: + if(DbgDEBUG) + print("%s: retry: dma %.8ux rwm %.4ux\n", + u->name, d->dmactl, d->rwmctl); + goto retry; + case SDok: + if(udm == 1) + d->dmactl = d->dma; + else if(udm == -1) + d->dmactl = 0; + sdsetsense(r, SDok, 0, 0, 0); + r->rlen = d->block; + break; + } + poperror(); + qunlock(d); + r->status = status; + return status; +} +/**/ + +static void +ichirqack(Ctlr *ctlr) +{ + int bmiba; + + if(bmiba = ctlr->bmiba) + outb(bmiba+Bmisx, inb(bmiba+Bmisx)); +} + +static void +atainterrupt(Ureg*, void* arg) +{ + Ctlr *ctlr; + Drive *drive; + int cmdport, len, status; + + ctlr = arg; + + ilock(ctlr); + ctlr->nrq++; + if(ctlr->curdrive) + ctlr->curdrive->irq++; + if(inb(ctlr->ctlport+As) & Bsy){ + ctlr->bsy++; + if(ctlr->curdrive) + ctlr->curdrive->bsy++; + iunlock(ctlr); + if(DEBUG & DbgBsy) + print("IBsy+"); + return; + } + cmdport = ctlr->cmdport; + status = inb(cmdport+Status); + if((drive = ctlr->curdrive) == nil){ + ctlr->nildrive++; + if(ctlr->irqack != nil) + ctlr->irqack(ctlr); + iunlock(ctlr); + if((DEBUG & DbgINL) && ctlr->command != Cedd) + print("Inil%2.2uX+", ctlr->command); + return; + } + + if(status & Err) + drive->error = inb(cmdport+Error); + else switch(drive->command){ + default: + drive->error = Abrt; + break; + + case Crs: + case Crsm: + case Ppio|Pin: + if(!(status & Drq)){ + drive->error = Abrt; + break; + } + len = drive->block; + if(drive->data+len > drive->limit) + len = drive->limit-drive->data; + inss(cmdport+Data, drive->data, len/2); + drive->data += len; + if(drive->data >= drive->limit) + ctlr->done = 1; + break; + + case Cws: + case Cwsm: + case Ppio|Pout: + len = drive->block; + if(drive->data+len > drive->limit) + len = drive->limit-drive->data; + drive->data += len; + if(drive->data >= drive->limit){ + ctlr->done = 1; + break; + } + if(!(status & Drq)){ + drive->error = Abrt; + break; + } + len = drive->block; + if(drive->data+len > drive->limit) + len = drive->limit-drive->data; + outss(cmdport+Data, drive->data, len/2); + break; + + case Cpkt: + case Ppkt|Pin: + case Ppkt|Pout: + atapktinterrupt(drive); + break; + + case Crd: + case Cwd: + case Pdma|Pin: + case Pdma|Pout: + atadmainterrupt(drive, drive->count*drive->secsize); + break; + + case Pnd: + case Preset: + ctlr->done = 1; + break; + } + if(ctlr->irqack != nil) + ctlr->irqack(ctlr); + iunlock(ctlr); + + if(drive->error){ + status |= Err; + ctlr->done = 1; + } + + if(ctlr->done){ + ctlr->curdrive = nil; + drive->status = status; + wakeup(ctlr); + } +} + +typedef struct Lchan Lchan; +struct Lchan { + int cmdport; + int ctlport; + int irq; + int probed; +}; +static Lchan lchan[2] = { + 0x1f0, 0x3f4, IrqATA0, 0, + 0x170, 0x374, IrqATA1, 0, +}; + +static int +badccru(Pcidev *p) +{ + switch(p->did<<16 | p->did){ + case 0x439c<<16 | 0x1002: + case 0x438c<<16 | 0x1002: +print("hi, anothy\n"); +print("%T: allowing bad ccru %.2ux for suspected ide controller\n", p->tbdf, p->ccru); + return 1; + default: + return 0; + } +} + +static SDev* +atapnp(void) +{ + char *s; + int channel, map, ispc87415, maxio, pi, r, span, tbdf; + Ctlr *ctlr; + Pcidev *p; + SDev *sdev, *head, *tail; + void (*irqack)(Ctlr*); + + head = tail = nil; + for(p = nil; p = pcimatch(p, 0, 0); ){ + /* + * Look for devices with the correct class and sub-class + * code and known device and vendor ID; add native-mode + * channels to the list to be probed, save info for the + * compatibility mode channels. + * Note that the legacy devices should not be considered + * PCI devices by the interrupt controller. + * For both native and legacy, save info for busmastering + * if capable. + * Promise Ultra ATA/66 (PDC20262) appears to + * 1) give a sub-class of 'other mass storage controller' + * instead of 'IDE controller', regardless of whether it's + * the only controller or not; + * 2) put 0 in the programming interface byte (probably + * as a consequence of 1) above). + * Sub-class code 0x04 is 'RAID controller', e.g. VIA VT8237. + */ + if(p->ccrb != 0x01) + continue; + if(!badccru(p)) + if(p->ccru != 0x01 && p->ccru != 0x04 && p->ccru != 0x80) + continue; + pi = p->ccrp; + map = 3; + ispc87415 = 0; + maxio = 0; + if(s = getconf("*idemaxio")) + maxio = atoi(s); + span = BMspan; + irqack = nil; + + switch((p->did<<16)|p->vid){ + default: + continue; + + case (0x0002<<16)|0x100B: /* NS PC87415 */ + /* + * Disable interrupts on both channels until + * after they are probed for drives. + * This must be called before interrupts are + * enabled because the IRQ may be shared. + */ + ispc87415 = 1; + pcicfgw32(p, 0x40, 0x00000300); + break; + case (0x1000<<16)|0x1042: /* PC-Tech RZ1000 */ + /* + * Turn off prefetch. Overkill, but cheap. + */ + r = pcicfgr32(p, 0x40); + r &= ~0x2000; + pcicfgw32(p, 0x40, r); + break; + case (0x4D38<<16)|0x105A: /* Promise PDC20262 */ + case (0x4D30<<16)|0x105A: /* Promise PDC202xx */ + case (0x4D68<<16)|0x105A: /* Promise PDC20268 */ + case (0x4D69<<16)|0x105A: /* Promise Ultra/133 TX2 */ + case (0x3373<<16)|0x105A: /* Promise 20378 RAID */ + case (0x3149<<16)|0x1106: /* VIA VT8237 SATA/RAID */ + case (0x3112<<16)|0x1095: /* SiL 3112 SATA/RAID */ + maxio = 15; + span = 8*1024; + /*FALLTHROUGH*/ + case (0x3114<<16)|0x1095: /* SiL 3114 SATA/RAID */ + case (0x0680<<16)|0x1095: /* SiI 0680/680A PATA133 ATAPI/RAID */ + pi = 0x85; + break; + case (0x0004<<16)|0x1103: /* HighPoint HPT366 */ + pi = 0x85; + /* + * Turn off fast interrupt prediction. + */ + if((r = pcicfgr8(p, 0x51)) & 0x80) + pcicfgw8(p, 0x51, r & ~0x80); + if((r = pcicfgr8(p, 0x55)) & 0x80) + pcicfgw8(p, 0x55, r & ~0x80); + break; + case (0x0640<<16)|0x1095: /* CMD 640B */ + /* + * Bugfix code here... + */ + break; + case (0x7441<<16)|0x1022: /* AMD 768 */ + /* + * Set: + * 0x41 prefetch, postwrite; + * 0x43 FIFO configuration 1/2 and 1/2; + * 0x44 status register read retry; + * 0x46 DMA read and end of sector flush. + */ + r = pcicfgr8(p, 0x41); + pcicfgw8(p, 0x41, r|0xF0); + r = pcicfgr8(p, 0x43); + pcicfgw8(p, 0x43, (r & 0x90)|0x2A); + r = pcicfgr8(p, 0x44); + pcicfgw8(p, 0x44, r|0x08); + r = pcicfgr8(p, 0x46); + pcicfgw8(p, 0x46, (r & 0x0C)|0xF0); + /*FALLTHROUGH*/ + case (0x01BC<<16)|0x10DE: /* nVidia nForce1 */ + case (0x0065<<16)|0x10DE: /* nVidia nForce2 */ + case (0x0085<<16)|0x10DE: /* nVidia nForce2 MCP */ + case (0x00E3<<16)|0x10DE: /* nVidia nForce2 250 SATA */ + case (0x00D5<<16)|0x10DE: /* nVidia nForce3 */ + case (0x00E5<<16)|0x10DE: /* nVidia nForce3 Pro */ + case (0x00EE<<16)|0x10DE: /* nVidia nForce3 250 SATA */ + case (0x0035<<16)|0x10DE: /* nVidia nForce3 MCP */ + case (0x0053<<16)|0x10DE: /* nVidia nForce4 */ + case (0x0054<<16)|0x10DE: /* nVidia nForce4 SATA */ + case (0x0055<<16)|0x10DE: /* nVidia nForce4 SATA */ + case (0x0266<<16)|0x10DE: /* nVidia nForce4 430 SATA */ + case (0x0265<<16)|0x10DE: /* nVidia nForce 51 MCP */ + case (0x0267<<16)|0x10DE: /* nVidia nForce 55 MCP SATA */ + case (0x03ec<<16)|0x10DE: /* nVidia nForce 61 MCP SATA */ + case (0x03f6<<16)|0x10DE: /* nVidia nForce 61 MCP PATA */ + case (0x0448<<16)|0x10DE: /* nVidia nForce 65 MCP SATA */ + case (0x0560<<16)|0x10DE: /* nVidia nForce 69 MCP SATA */ + /* + * Ditto, although it may have a different base + * address for the registers (0x50?). + */ + /*FALLTHROUGH*/ + case (0x209A<<16)|0x1022: /* AMD CS5536 */ + case (0x7401<<16)|0x1022: /* AMD 755 Cobra */ + case (0x7409<<16)|0x1022: /* AMD 756 Viper */ + case (0x7410<<16)|0x1022: /* AMD 766 Viper Plus */ + case (0x7469<<16)|0x1022: /* AMD 3111 */ + case (0x4376<<16)|0x1002: /* SB4xx pata */ + case (0x4379<<16)|0x1002: /* SB4xx sata */ + case (0x437a<<16)|0x1002: /* SB4xx sata ctlr #2 */ + case (0x437c<<16)|0x1002: /* Rx6xx pata */ + case (0x439c<<16)|0x1002: /* SB7xx pata */ + break; + case (0x0211<<16)|0x1166: /* ServerWorks IB6566 */ + { + Pcidev *sb; + + sb = pcimatch(nil, 0x1166, 0x0200); + if(sb == nil) + break; + r = pcicfgr32(sb, 0x64); + r &= ~0x2000; + pcicfgw32(sb, 0x64, r); + } + span = 32*1024; + break; + case (0x5229<<16)|0x10B9: /* ALi M1543 */ + case (0x5288<<16)|0x10B9: /* ALi M5288 SATA */ + /*FALLTHROUGH*/ + case (0x5513<<16)|0x1039: /* SiS 962 */ + case (0x0646<<16)|0x1095: /* CMD 646 */ + case (0x0571<<16)|0x1106: /* VIA 82C686 */ + case (0x0502<<16)|0x100b: /* National Semiconductor SC1100/SCx200 */ + break; + case (0x2360<<16)|0x197b: /* jmicron jmb360 */ + case (0x2361<<16)|0x197b: /* jmicron jmb361 */ + case (0x2363<<16)|0x197b: /* jmicron jmb363 */ + case (0x2365<<16)|0x197b: /* jmicron jmb365 */ + case (0x2366<<16)|0x197b: /* jmicron jmb366 */ + case (0x2368<<16)|0x197b: /* jmicron jmb368 */ + break; + case (0x1230<<16)|0x8086: /* 82371FB (PIIX) */ + case (0x7010<<16)|0x8086: /* 82371SB (PIIX3) */ + case (0x7111<<16)|0x8086: /* 82371[AE]B (PIIX4[E]) */ + break; + case (0x2411<<16)|0x8086: /* 82801AA (ICH) */ + case (0x2421<<16)|0x8086: /* 82801AB (ICH0) */ + case (0x244A<<16)|0x8086: /* 82801BA (ICH2, Mobile) */ + case (0x244B<<16)|0x8086: /* 82801BA (ICH2, High-End) */ + case (0x248A<<16)|0x8086: /* 82801CA (ICH3, Mobile) */ + case (0x248B<<16)|0x8086: /* 82801CA (ICH3, High-End) */ + case (0x24CA<<16)|0x8086: /* 82801DBM (ICH4, Mobile) */ + case (0x24CB<<16)|0x8086: /* 82801DB (ICH4, High-End) */ + case (0x24D1<<16)|0x8086: /* 82801er (ich5) */ + case (0x24DB<<16)|0x8086: /* 82801EB (ICH5) */ + case (0x25A2<<16)|0x8086: /* 6300ESB pata */ + case (0x25A3<<16)|0x8086: /* 6300ESB (E7210) */ + case (0x266F<<16)|0x8086: /* 82801FB (ICH6) */ + case (0x2653<<16)|0x8086: /* 82801FBM (ICH6, Mobile) */ + case (0x269e<<16)|0x8086: /* 63xxESB (intel 5000) */ + case (0x27DF<<16)|0x8086: /* 82801G PATA (ICH7) */ + case (0x27C0<<16)|0x8086: /* 82801GB SATA (ICH7) */ + case (0x27C4<<16)|0x8086: /* 82801GBM SATA (ICH7) */ + case (0x27C5<<16)|0x8086: /* 82801GBM SATA AHCI (ICH7) */ + case (0x2820<<16)|0x8086: /* 82801HB/HR/HH/HO SATA IDE */ + case (0x2828<<16)|0x8086: /* 82801HBM SATA (ICH8-M) */ + case (0x2920<<16)|0x8086: /* 82801(IB)/IR/IH/IO SATA (ICH9) port 0-3 */ + case (0x2921<<16)|0x8086: /* 82801(IB)/IR/IH/IO SATA (ICH9) port 0-1 */ + case (0x2926<<16)|0x8086: /* 82801(IB)/IR/IH/IO SATA (ICH9) port 4-5 */ + case (0x2928<<16)|0x8086: /* 82801(IB)/IR/IH/IO SATA (ICH9m) port 0-1 */ + case (0x2929<<16)|0x8086: /* 82801(IB)/IR/IH/IO SATA (ICH9m) port 0-1, 4-5 */ + case (0x292d<<16)|0x8086: /* 82801(IB)/IR/IH/IO SATA (ICH9m) port 4-5*/ + case (0x3a20<<16)|0x8086: /* 82801ji (ich10) */ + case (0x3a26<<16)|0x8086: /* 82801ji (ich10) */ + case (0x3b20<<16)|0x8086: /* 34x0 (pch) port 0-3 */ + case (0x3b21<<16)|0x8086: /* 34x0 (pch) port 4-5 */ + case (0x3b28<<16)|0x8086: /* 34x0pm (pch) port 0-1, 4-5 */ + case (0x3b2e<<16)|0x8086: /* 34x0pm (pch) port 0-3 */ + case (0x1d00<<16)|0x8086: /* Patsburg (pch) port 0-3 */ + case (0x1d08<<16)|0x8086: /* Patsburg (pch) port 4-5 */ + map = 0; + if(pcicfgr16(p, 0x40) & 0x8000) + map |= 1; + if(pcicfgr16(p, 0x42) & 0x8000) + map |= 2; + irqack = ichirqack; + break; + } + for(channel = 0; channel < 2; channel++){ + if((map & 1<mem[0+2*channel].bar & ~0x01, + p->mem[1+2*channel].bar & ~0x01, + p->intl, 3); + tbdf = p->tbdf; + } + else if(lchan[channel].probed == 0){ + sdev = ataprobe(lchan[channel].cmdport, + lchan[channel].ctlport, lchan[channel].irq, 3); + lchan[channel].probed = 1; + tbdf = BUSUNKNOWN; + } + else + continue; + if(sdev == nil) + continue; + ctlr = sdev->ctlr; + if(ispc87415) { + ctlr->ienable = pc87415ienable; + print("pc87415disable: not yet implemented\n"); + } + ctlr->tbdf = tbdf; + ctlr->pcidev = p; + ctlr->maxio = maxio; + ctlr->span = span; + ctlr->irqack = irqack; + if(pi & 0x80) + ctlr->bmiba = (p->mem[4].bar & ~0x01) + channel*8; + if(head != nil) + tail->next = sdev; + else + head = sdev; + tail = sdev; + } + } + + if(lchan[0].probed + lchan[1].probed == 0) + for(channel = 0; channel < 2; channel++){ + sdev = nil; + if(lchan[channel].probed == 0){ + // print("sdide: blind probe %.3ux\n", lchan[channel].cmdport); + sdev = ataprobe(lchan[channel].cmdport, + lchan[channel].ctlport, lchan[channel].irq, 3); + lchan[channel].probed = 1; + } + if(sdev == nil) + continue; + if(head != nil) + tail->next = sdev; + else + head = sdev; + tail = sdev; + } + + return head; +} + +static void +atadmaclr(Ctlr *ctlr) +{ + int bmiba, bmisx; + + if(ctlr->curdrive) + ataabort(ctlr->curdrive, 1); + bmiba = ctlr->bmiba; + if(bmiba == 0) + return; + atadmastop(ctlr); + outl(bmiba+Bmidtpx, 0); + bmisx = inb(bmiba+Bmisx) & ~Bmidea; + outb(bmiba+Bmisx, bmisx|Ideints|Idedmae); +// pciintst(ctlr->pcidev); +} + +static int +ataenable(SDev* sdev) +{ + Ctlr *ctlr; + char name[32]; + + ctlr = sdev->ctlr; + if(ctlr->bmiba){ + atadmaclr(ctlr); + if(ctlr->pcidev != nil) + pcisetbme(ctlr->pcidev); + ctlr->prdt = mallocalign(Nprd*sizeof(Prd), 4, 0, 64*1024); + } + snprint(name, sizeof(name), "%s (%s)", sdev->name, sdev->ifc->name); + ctlr->vector = intrenable(ctlr->irq, atainterrupt, ctlr, ctlr->tbdf, name); + outb(ctlr->ctlport+Dc, 0); + if(ctlr->ienable) + ctlr->ienable(ctlr); + return 1; +} + +static int +atadisable(SDev *sdev) +{ + Ctlr *ctlr; + char name[32]; + + ctlr = sdev->ctlr; + outb(ctlr->ctlport+Dc, Nien); /* disable interrupts */ + if (ctlr->idisable) + ctlr->idisable(ctlr); + snprint(name, sizeof(name), "%s (%s)", sdev->name, sdev->ifc->name); + intrdisable(ctlr->vector); + if(ctlr->bmiba) { +// atadmaclr(ctlr); + if (ctlr->pcidev) + pciclrbme(ctlr->pcidev); + free(ctlr->prdt); + } + return 0; +} + +static int +ataonline(SDunit *unit) +{ + Ctlr *ctlr; + Drive *drive; + + if((ctlr = unit->dev->ctlr) == nil || ctlr->drive[unit->subno] == nil) + return 0; + drive = ctlr->drive[unit->subno]; + if((drive->flags & Online) == 0){ + drive->flags |= Online; + atadrive(unit, drive, ctlr->cmdport, ctlr->ctlport, drive->dev); + } + unit->sectors = drive->sectors; + unit->secsize = drive->secsize; + if(drive->feat & Datapi) + return scsionline(unit); + return 1; +} + +static int +atarctl(SDunit* unit, char* p, int l) +{ + Ctlr *ctlr; + Drive *drive; + char *e, *op; + + if((ctlr = unit->dev->ctlr) == nil || ctlr->drive[unit->subno] == nil) + return 0; + drive = ctlr->drive[unit->subno]; + + e = p+l; + op = p; + qlock(drive); + p = seprint(p, e, "config %4.4uX capabilities %4.4uX", drive->info[Iconfig], drive->info[Icapabilities]); + if(drive->dma) + p = seprint(p, e, " dma %8.8uX dmactl %8.8uX", drive->dma, drive->dmactl); + if(drive->rwm) + p = seprint(p, e, " rwm %ud rwmctl %ud", drive->rwm, drive->rwmctl); + if(drive->feat & Dllba) + p = seprint(p, e, " lba48always %s", (drive->flags&Lba48always) ? "on" : "off"); + p = seprint(p, e, "\n"); + p = seprint(p, e, "model %s\n", drive->model); + p = seprint(p, e, "serial %s\n", drive->serial); + p = seprint(p, e, "firm %s\n", drive->firmware); + p = seprint(p, e, "feat "); + p = pflag(p, e, drive); + if(drive->sectors){ + p = seprint(p, e, "geometry %llud %d", drive->sectors, drive->secsize); + if(drive->pkt == 0 && (drive->feat & Dlba) == 0) + p = seprint(p, e, " %d %d %d", drive->c, drive->h, drive->s); + p = seprint(p, e, "\n"); + } + p = seprint(p, e, "missirq %ud\n", drive->missirq); + p = seprint(p, e, "sloop %ud\n", drive->spurloop); + p = seprint(p, e, "irq %ud %ud\n", ctlr->nrq, drive->irq); + p = seprint(p, e, "bsy %ud %ud\n", ctlr->bsy, drive->bsy); + p = seprint(p, e, "nildrive %ud\n", ctlr->nildrive); + qunlock(drive); + + return p - op; +} + +static int +atawctl(SDunit* unit, Cmdbuf* cb) +{ + Ctlr *ctlr; + Drive *drive; + + if((ctlr = unit->dev->ctlr) == nil || ctlr->drive[unit->subno] == nil) + return 0; + drive = ctlr->drive[unit->subno]; + + qlock(drive); + if(waserror()){ + qunlock(drive); + nexterror(); + } + + /* + * Dma and rwm control is passive at the moment, + * i.e. it is assumed that the hardware is set up + * correctly already either by the BIOS or when + * the drive was initially identified. + */ + if(strcmp(cb->f[0], "dma") == 0){ + if(cb->nf != 2 || drive->dma == 0) + error(Ebadctl); + if(strcmp(cb->f[1], "on") == 0) + drive->dmactl = drive->dma; + else if(strcmp(cb->f[1], "off") == 0) + drive->dmactl = 0; + else + error(Ebadctl); + } + else if(strcmp(cb->f[0], "rwm") == 0){ + if(cb->nf != 2 || drive->rwm == 0) + error(Ebadctl); + if(strcmp(cb->f[1], "on") == 0) + drive->rwmctl = drive->rwm; + else if(strcmp(cb->f[1], "off") == 0) + drive->rwmctl = 0; + else + error(Ebadctl); + } + else if(strcmp(cb->f[0], "lba48always") == 0){ + if(cb->nf != 2 || !(drive->feat & Dllba)) + error(Ebadctl); + if(strcmp(cb->f[1], "on") == 0) + drive->flags |= Lba48always; + else if(strcmp(cb->f[1], "off") == 0) + drive->flags &= ~Lba48always; + else + error(Ebadctl); + } + else if(strcmp(cb->f[0], "identify") == 0){ + atadrive(unit, drive, ctlr->cmdport, ctlr->ctlport, drive->dev); + } + else + error(Ebadctl); + qunlock(drive); + poperror(); + + return 0; +} + +SDifc sdideifc = { + "ide", /* name */ + + atapnp, /* pnp */ + nil, /* legacy */ + ataenable, /* enable */ + atadisable, /* disable */ + + scsiverify, /* verify */ + ataonline, /* online */ + atario, /* rio */ + atarctl, /* rctl */ + atawctl, /* wctl */ + + scsibio, /* bio */ + nil, /* probe */ + ataclear, /* clear */ + atastat, /* rtopctl */ + nil, /* wtopctl */ + ataataio, +}; diff -Nru /sys/src/9k/386/sdscsi.c /sys/src/9k/386/sdscsi.c --- /sys/src/9k/386/sdscsi.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/sdscsi.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,478 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" +#include "../port/error.h" + +#include "../port/sd.h" + +static int +scsitest(SDreq* r) +{ + r->write = 0; + memset(r->cmd, 0, sizeof(r->cmd)); + r->cmd[1] = r->lun<<5; + r->clen = 6; + r->data = nil; + r->dlen = 0; + r->flags = 0; + + r->status = ~0; + + return r->unit->dev->ifc->rio(r); +} + +int +scsiverify(SDunit* unit) +{ + SDreq *r; + int i, status; + uchar *inquiry; + + if((r = malloc(sizeof(SDreq))) == nil) + return 0; + if((inquiry = sdmalloc(sizeof(unit->inquiry))) == nil){ + free(r); + return 0; + } + r->unit = unit; + r->lun = 0; /* ??? */ + + memset(unit->inquiry, 0, sizeof(unit->inquiry)); + r->write = 0; + r->cmd[0] = 0x12; + r->cmd[1] = r->lun<<5; + r->cmd[4] = sizeof(unit->inquiry)-1; + r->clen = 6; + r->data = inquiry; + r->dlen = sizeof(unit->inquiry)-1; + r->flags = 0; + + r->status = ~0; + if(unit->dev->ifc->rio(r) != SDok){ + free(r); + return 0; + } + memmove(unit->inquiry, inquiry, r->dlen); + free(inquiry); + + SET(status); + for(i = 0; i < 3; i++){ + while((status = scsitest(r)) == SDbusy) + ; + if(status == SDok || status != SDcheck) + break; + if(!(r->flags & SDvalidsense)) + break; + if((r->sense[2] & 0x0F) != 0x02) + continue; + + /* + * Unit is 'not ready'. + * If it is in the process of becoming ready or needs + * an initialising command, set status so it will be spun-up + * below. + * If there's no medium, that's OK too, but don't + * try to spin it up. + */ + if(r->sense[12] == 0x04){ + if(r->sense[13] == 0x02 || r->sense[13] == 0x01){ + status = SDok; + break; + } + } + if(r->sense[12] == 0x3A) + break; + } + + if(status == SDok){ + /* + * Try to ensure a direct-access device is spinning. + * Don't wait for completion, ignore the result. + */ + if((unit->inquiry[0] & 0x1F) == 0){ + memset(r->cmd, 0, sizeof(r->cmd)); + r->write = 0; + r->cmd[0] = 0x1B; + r->cmd[1] = (r->lun<<5)|0x01; + r->cmd[4] = 1; + r->clen = 6; + r->data = nil; + r->dlen = 0; + r->flags = 0; + + r->status = ~0; + unit->dev->ifc->rio(r); + } + } + free(r); + + if(status == SDok || status == SDcheck) + return 1; + return 0; +} + +static int +scsirio(SDreq* r) +{ + /* + * Perform an I/O request, returning + * -1 failure + * 0 ok + * 1 no medium present + * 2 retry + * The contents of r may be altered so the + * caller should re-initialise if necesary. + */ + r->status = ~0; + switch(r->unit->dev->ifc->rio(r)){ + default: + break; + case SDcheck: + if(!(r->flags & SDvalidsense)) + break; + switch(r->sense[2] & 0x0F){ + case 0x00: /* no sense */ + case 0x01: /* recovered error */ + return 2; + case 0x06: /* check condition */ + /* + * 0x28 - not ready to ready transition, + * medium may have changed. + * 0x29 - power on or some type of reset. + */ + if(r->sense[12] == 0x28 && r->sense[13] == 0) + return 2; + if(r->sense[12] == 0x29) + return 2; + break; + case 0x02: /* not ready */ + /* + * If no medium present, bail out. + * If unit is becoming ready, rather than not + * not ready, wait a little then poke it again. + */ + if(r->sense[12] == 0x3A) + break; + if(r->sense[12] != 0x04 || r->sense[13] != 0x01) + break; + + while(waserror()) + ; + tsleep(&up->sleep, return0, 0, 500); + poperror(); + scsitest(r); + return 2; + default: + break; + } + break; + case SDok: + return 0; + } + return -1; +} + +static void +cap10(SDreq *r) +{ + r->cmd[0] = 0x25; + r->cmd[1] = r->lun<<5; + r->clen = 10; + r->dlen = 8; +} + +static void +cap16(SDreq *r) +{ + uint i; + + i = 32; + r->cmd[0] = 0x9e; + r->cmd[1] = 0x10; + r->cmd[10] = i>>24; + r->cmd[11] = i>>16; + r->cmd[12] = i>>8; + r->cmd[13] = i; + r->clen = 16; + r->dlen = i; +} + +static uint +belong(uchar *u) +{ + return u[0]<<24 | u[1]<<16 | u[2]<<8 | u[3]; +} + +static uvlong +capreply(SDreq *r, ulong *secsize) +{ + uchar *u; + ulong ss; + uvlong s; + + *secsize = 0; + u = r->data; + if(r->clen == 16){ + s = (uvlong)belong(u)<<32 | belong(u + 4); + ss = belong(u + 8); + }else{ + s = belong(u); + ss = belong(u + 4); + } + if(s == 0) + return s; + /* + * Some ATAPI CD readers lie about the block size. + * Since we don't read audio via this interface + * it's okay to always fudge this. + */ + if(ss == 2352) + ss = 2048; + /* + * Devices with removable media may return 0 sectors + * when they have empty media (e.g. sata dvd writers); + * if so, keep the count zero. + * + * Read-capacity returns the LBA of the last sector, + * therefore the number of sectors must be incremented. + */ + if(s != 0) + s++; + *secsize = ss; + return s; +} + +int +scsionline(SDunit* unit) +{ + SDreq *r; + uchar *p; + int ok, retries; + void (*cap)(SDreq*); + + if((r = malloc(sizeof *r)) == nil) + return 0; + if((p = sdmalloc(32)) == nil){ + free(r); + return 0; + } + + ok = 0; + cap = cap10; + r->unit = unit; + r->lun = 0; /* ??? */ + for(retries = 0; retries < 10; retries++){ + /* + * Read-capacity is mandatory for DA, WORM, CD-ROM and + * MO. It may return 'not ready' if type DA is not + * spun up, type MO or type CD-ROM are not loaded or just + * plain slow getting their act together after a reset. + */ + r->write = 0; + r->data = p; + r->flags = 0; + memset(r->cmd, 0, sizeof r->cmd); + cap(r); + + r->status = ~0; + switch(scsirio(r)){ + default: + break; + case 0: + unit->sectors = capreply(r, &unit->secsize); + if(unit->sectors == 0xffffffff && cap == cap10){ + cap = cap16; + continue; + } + ok = 1; + break; + case 1: + ok = 1; + break; + case 2: + continue; + } + break; + } + free(p); + free(r); + + if(ok) + return ok+retries; + else + return 0; +} + +int +scsiexec(SDunit* unit, int write, uchar* cmd, int clen, void* data, int* dlen) +{ + SDreq *r; + int status; + + if((r = malloc(sizeof(SDreq))) == nil) + return SDmalloc; + r->unit = unit; + r->lun = cmd[1]>>5; /* ??? */ + r->write = write; + memmove(r->cmd, cmd, clen); + r->clen = clen; + r->data = data; + if(dlen) + r->dlen = *dlen; + r->flags = 0; + + r->status = ~0; + + /* + * Call the device-specific I/O routine. + * There should be no calls to 'error()' below this + * which percolate back up. + */ + switch(status = unit->dev->ifc->rio(r)){ + case SDok: + if(dlen) + *dlen = r->rlen; + /*FALLTHROUGH*/ + case SDcheck: + /*FALLTHROUGH*/ + default: + /* + * It's more complicated than this. There are conditions + * which are 'ok' but for which the returned status code + * is not 'SDok'. + * Also, not all conditions require a reqsense, might + * need to do a reqsense here and make it available to the + * caller somehow. + * + * Mañana. + */ + break; + } + sdfree(r); + + return status; +} + +static void +scsifmt10(SDreq *r, int write, int lun, ulong nb, uvlong bno) +{ + uchar *c; + + c = r->cmd; + if(write == 0) + c[0] = 0x28; + else + c[0] = 0x2A; + c[1] = lun<<5; + c[2] = bno>>24; + c[3] = bno>>16; + c[4] = bno>>8; + c[5] = bno; + c[6] = 0; + c[7] = nb>>8; + c[8] = nb; + c[9] = 0; + + r->clen = 10; +} + +static void +scsifmt16(SDreq *r, int write, int lun, ulong nb, uvlong bno) +{ + uchar *c; + + c = r->cmd; + if(write == 0) + c[0] = 0x88; + else + c[0] = 0x8A; + c[1] = lun<<5; /* so wrong */ + c[2] = bno>>56; + c[3] = bno>>48; + c[4] = bno>>40; + c[5] = bno>>32; + c[6] = bno>>24; + c[7] = bno>>16; + c[8] = bno>>8; + c[9] = bno; + c[10] = nb>>24; + c[11] = nb>>16; + c[12] = nb>>8; + c[13] = nb; + c[14] = 0; + c[15] = 0; + + r->clen = 16; +} + +long +scsibio(SDunit* unit, int lun, int write, void* data, long nb, uvlong bno) +{ + SDreq *r; + long rlen; + + if((r = malloc(sizeof(SDreq))) == nil) + error(Enomem); + r->unit = unit; + r->lun = lun; +again: + r->write = write; + if(bno >= (1ULL<<32)) + scsifmt16(r, write, lun, nb, bno); + else + scsifmt10(r, write, lun, nb, bno); + r->data = data; + r->dlen = nb*unit->secsize; + r->flags = 0; + + r->status = ~0; + switch(scsirio(r)){ + default: + rlen = -1; + break; + case 0: + rlen = r->rlen; + break; + case 2: + rlen = -1; + if(!(r->flags & SDvalidsense)) + break; + switch(r->sense[2] & 0x0F){ + default: + break; + case 0x01: /* recovered error */ + print("%s: recovered error at sector %llud\n", + unit->name, bno); + rlen = r->rlen; + break; + case 0x06: /* check condition */ + /* + * Check for a removeable media change. + * If so, mark it by zapping the geometry info + * to force an online request. + */ + if(r->sense[12] != 0x28 || r->sense[13] != 0) + break; + if(unit->inquiry[1] & 0x80) + unit->sectors = 0; + break; + case 0x02: /* not ready */ + /* + * If unit is becoming ready, + * rather than not not ready, try again. + */ + if(r->sense[12] == 0x04 && r->sense[13] == 0x01) + goto again; + break; + } + break; + } + free(r); + + return rlen; +} + diff -Nru /sys/src/9k/386/uarti8250.c /sys/src/9k/386/uarti8250.c --- /sys/src/9k/386/uarti8250.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/uarti8250.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,799 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +/* + * 8250 UART and compatibles. + */ +enum { + Uart0 = 0x3F8, /* COM1 */ + Uart0IRQ = 4, + Uart1 = 0x2F8, /* COM2 */ + Uart1IRQ = 3, + + UartFREQ = 1843200, +}; + +enum { /* registers */ + Rbr = 0, /* Receiver Buffer (RO) */ + Thr = 0, /* Transmitter Holding (WO) */ + Ier = 1, /* Interrupt Enable */ + Iir = 2, /* Interrupt Identification (RO) */ + Fcr = 2, /* FIFO Control (WO) */ + Lcr = 3, /* Line Control */ + Mcr = 4, /* Modem Control */ + Lsr = 5, /* Line Status */ + Msr = 6, /* Modem Status */ + Scr = 7, /* Scratch Pad */ + Dll = 0, /* Divisor Latch LSB */ + Dlm = 1, /* Divisor Latch MSB */ +}; + +enum { /* Ier */ + Erda = 0x01, /* Enable Received Data Available */ + Ethre = 0x02, /* Enable Thr Empty */ + Erls = 0x04, /* Enable Receiver Line Status */ + Ems = 0x08, /* Enable Modem Status */ +}; + +enum { /* Iir */ + Ims = 0x00, /* Ms interrupt */ + Ip = 0x01, /* Interrupt Pending (not) */ + Ithre = 0x02, /* Thr Empty */ + Irda = 0x04, /* Received Data Available */ + Irls = 0x06, /* Receiver Line Status */ + Ictoi = 0x0C, /* Character Time-out Indication */ + IirMASK = 0x3F, + Ifena = 0xC0, /* FIFOs enabled */ +}; + +enum { /* Fcr */ + FIFOena = 0x01, /* FIFO enable */ + FIFOrclr = 0x02, /* clear Rx FIFO */ + FIFOtclr = 0x04, /* clear Tx FIFO */ + FIFO1 = 0x00, /* Rx FIFO trigger level 1 byte */ + FIFO4 = 0x40, /* 4 bytes */ + FIFO8 = 0x80, /* 8 bytes */ + FIFO14 = 0xC0, /* 14 bytes */ +}; + +enum { /* Lcr */ + Wls5 = 0x00, /* Word Length Select 5 bits/byte */ + Wls6 = 0x01, /* 6 bits/byte */ + Wls7 = 0x02, /* 7 bits/byte */ + Wls8 = 0x03, /* 8 bits/byte */ + WlsMASK = 0x03, + Stb = 0x04, /* 2 stop bits */ + Pen = 0x08, /* Parity Enable */ + Eps = 0x10, /* Even Parity Select */ + Stp = 0x20, /* Stick Parity */ + Brk = 0x40, /* Break */ + Dlab = 0x80, /* Divisor Latch Access Bit */ +}; + +enum { /* Mcr */ + Dtr = 0x01, /* Data Terminal Ready */ + Rts = 0x02, /* Ready To Send */ + Out1 = 0x04, /* no longer in use */ + Ie = 0x08, /* IRQ Enable */ + Dm = 0x10, /* Diagnostic Mode loopback */ +}; + +enum { /* Lsr */ + Dr = 0x01, /* Data Ready */ + Oe = 0x02, /* Overrun Error */ + Pe = 0x04, /* Parity Error */ + Fe = 0x08, /* Framing Error */ + Bi = 0x10, /* Break Interrupt */ + Thre = 0x20, /* Thr Empty */ + Temt = 0x40, /* Tramsmitter Empty */ + FIFOerr = 0x80, /* error in receiver FIFO */ +}; + +enum { /* Msr */ + Dcts = 0x01, /* Delta Cts */ + Ddsr = 0x02, /* Delta Dsr */ + Teri = 0x04, /* Trailing Edge of Ri */ + Ddcd = 0x08, /* Delta Dcd */ + Cts = 0x10, /* Clear To Send */ + Dsr = 0x20, /* Data Set Ready */ + Ri = 0x40, /* Ring Indicator */ + Dcd = 0x80, /* Data Set Ready */ +}; + +typedef struct Ctlr { + int io; + int irq; + int tbdf; + int iena; + void* vector; + int poll; + + uchar sticky[8]; + + Lock; + int hasfifo; + int checkfifo; + int fena; +} Ctlr; + +extern PhysUart i8250physuart; + +static Ctlr i8250ctlr[2] = { +{ .io = Uart0, + .irq = Uart0IRQ, + .tbdf = -1, + .poll = 0, }, + +{ .io = Uart1, + .irq = Uart1IRQ, + .tbdf = -1, + .poll = 0, }, +}; + +static Uart i8250uart[2] = { +{ .regs = &i8250ctlr[0], + .name = "COM1", + .freq = UartFREQ, + .phys = &i8250physuart, + .special= 0, + .next = &i8250uart[1], }, + +{ .regs = &i8250ctlr[1], + .name = "COM2", + .freq = UartFREQ, + .phys = &i8250physuart, + .special= 0, + .next = nil, }, +}; + +#define csr8r(c, r) inb((c)->io+(r)) +#define csr8w(c, r, v) outb((c)->io+(r), (c)->sticky[(r)]|(v)) +#define csr8o(c, r, v) outb((c)->io+(r), (v)) + +static long +i8250status(Uart* uart, void* buf, long n, long offset) +{ + char *p; + Ctlr *ctlr; + uchar ier, lcr, mcr, msr; + + ctlr = uart->regs; + p = malloc(READSTR); + mcr = ctlr->sticky[Mcr]; + msr = csr8r(ctlr, Msr); + ier = ctlr->sticky[Ier]; + lcr = ctlr->sticky[Lcr]; + snprint(p, READSTR, + "b%d c%d d%d e%d l%d m%d p%c r%d s%d i%d\n" + "dev(%d) type(%d) framing(%d) overruns(%d) " + "berr(%d) serr(%d)%s%s%s%s\n", + + uart->baud, + uart->hup_dcd, + (msr & Dsr) != 0, + uart->hup_dsr, + (lcr & WlsMASK) + 5, + (ier & Ems) != 0, + (lcr & Pen) ? ((lcr & Eps) ? 'e': 'o'): 'n', + (mcr & Rts) != 0, + (lcr & Stb) ? 2: 1, + ctlr->fena, + + uart->dev, + uart->type, + uart->ferr, + uart->oerr, + uart->berr, + uart->serr, + (msr & Cts) ? " cts": "", + (msr & Dsr) ? " dsr": "", + (msr & Dcd) ? " dcd": "", + (msr & Ri) ? " ring": "" + ); + n = readstr(offset, buf, n, p); + free(p); + + return n; +} + +static void +i8250fifo(Uart* uart, int level) +{ + Ctlr *ctlr; + + ctlr = uart->regs; + if(ctlr->hasfifo == 0) + return; + + /* + * Changing the FIFOena bit in Fcr flushes data + * from both receive and transmit FIFOs; there's + * no easy way to guarantee not losing data on + * the receive side, but it's possible to wait until + * the transmitter is really empty. + */ + ilock(ctlr); + while(!(csr8r(ctlr, Lsr) & Temt)) + ; + + /* + * Set the trigger level, default is the max. + * value. + * Some UARTs require FIFOena to be set before + * other bits can take effect, so set it twice. + */ + ctlr->fena = level; + switch(level){ + case 0: + break; + case 1: + level = FIFO1|FIFOena; + break; + case 4: + level = FIFO4|FIFOena; + break; + case 8: + level = FIFO8|FIFOena; + break; + default: + level = FIFO14|FIFOena; + break; + } + csr8w(ctlr, Fcr, level); + csr8w(ctlr, Fcr, level); + iunlock(ctlr); +} + +static void +i8250dtr(Uart* uart, int on) +{ + Ctlr *ctlr; + + /* + * Toggle DTR. + */ + ctlr = uart->regs; + if(on) + ctlr->sticky[Mcr] |= Dtr; + else + ctlr->sticky[Mcr] &= ~Dtr; + csr8w(ctlr, Mcr, 0); +} + +static void +i8250rts(Uart* uart, int on) +{ + Ctlr *ctlr; + + /* + * Toggle RTS. + */ + ctlr = uart->regs; + if(on) + ctlr->sticky[Mcr] |= Rts; + else + ctlr->sticky[Mcr] &= ~Rts; + csr8w(ctlr, Mcr, 0); +} + +static void +i8250modemctl(Uart* uart, int on) +{ + Ctlr *ctlr; + + ctlr = uart->regs; + ilock(&uart->tlock); + if(on){ + ctlr->sticky[Ier] |= Ems; + csr8w(ctlr, Ier, ctlr->sticky[Ier]); + uart->modem = 1; + uart->cts = csr8r(ctlr, Msr) & Cts; + } + else{ + ctlr->sticky[Ier] &= ~Ems; + csr8w(ctlr, Ier, ctlr->sticky[Ier]); + uart->modem = 0; + uart->cts = 1; + } + iunlock(&uart->tlock); + + /* modem needs fifo */ + (*uart->phys->fifo)(uart, on); +} + +static int +i8250parity(Uart* uart, int parity) +{ + int lcr; + Ctlr *ctlr; + + ctlr = uart->regs; + lcr = ctlr->sticky[Lcr] & ~(Eps|Pen); + + switch(parity){ + case 'e': + lcr |= Eps|Pen; + break; + case 'o': + lcr |= Pen; + break; + case 'n': + break; + default: + return -1; + } + ctlr->sticky[Lcr] = lcr; + csr8w(ctlr, Lcr, 0); + + uart->parity = parity; + + return 0; +} + +static int +i8250stop(Uart* uart, int stop) +{ + int lcr; + Ctlr *ctlr; + + ctlr = uart->regs; + lcr = ctlr->sticky[Lcr] & ~Stb; + + switch(stop){ + case 1: + break; + case 2: + lcr |= Stb; + break; + default: + return -1; + } + ctlr->sticky[Lcr] = lcr; + csr8w(ctlr, Lcr, 0); + + uart->stop = stop; + + return 0; +} + +static int +i8250bits(Uart* uart, int bits) +{ + int lcr; + Ctlr *ctlr; + + ctlr = uart->regs; + lcr = ctlr->sticky[Lcr] & ~WlsMASK; + + switch(bits){ + case 5: + lcr |= Wls5; + break; + case 6: + lcr |= Wls6; + break; + case 7: + lcr |= Wls7; + break; + case 8: + lcr |= Wls8; + break; + default: + return -1; + } + ctlr->sticky[Lcr] = lcr; + csr8w(ctlr, Lcr, 0); + + uart->bits = bits; + + return 0; +} + +static int +i8250baud(Uart* uart, int baud) +{ + ulong bgc; + Ctlr *ctlr; + + /* + * Set the Baud rate by calculating and setting the Baud rate + * Generator Constant. This will work with fairly non-standard + * Baud rates. + */ + if(uart->freq == 0 || baud <= 0) + return -1; + bgc = (uart->freq+8*baud-1)/(16*baud); + + ctlr = uart->regs; + csr8w(ctlr, Lcr, Dlab); + csr8o(ctlr, Dlm, bgc>>8); + csr8o(ctlr, Dll, bgc); + csr8w(ctlr, Lcr, 0); + + uart->baud = baud; + + return 0; +} + +static void +i8250break(Uart* uart, int ms) +{ + Ctlr *ctlr; + + /* + * Send a break. + */ + if(ms <= 0) + ms = 200; + + ctlr = uart->regs; + csr8w(ctlr, Lcr, Brk); + tsleep(&up->sleep, return0, 0, ms); + csr8w(ctlr, Lcr, 0); +} + +static void +i8250kick(Uart* uart) +{ + int i; + Ctlr *ctlr; + + if(uart->cts == 0 || uart->blocked) + return; + + /* + * 128 here is an arbitrary limit to make sure + * we don't stay in this loop too long. If the + * chip's output queue is longer than 128, too + * bad -- presotto + */ + ctlr = uart->regs; + for(i = 0; i < 128; i++){ + if(!(csr8r(ctlr, Lsr) & Thre)) + break; + if(uart->op >= uart->oe && uartstageoutput(uart) == 0) + break; + csr8o(ctlr, Thr, *(uart->op++)); + } +} + +static void +i8250interrupt(Ureg*, void* arg) +{ + Ctlr *ctlr; + Uart *uart; + int iir, lsr, old, r; + + uart = arg; + + ctlr = uart->regs; + for(iir = csr8r(ctlr, Iir); !(iir & Ip); iir = csr8r(ctlr, Iir)){ + switch(iir & IirMASK){ + case Ims: /* Ms interrupt */ + r = csr8r(ctlr, Msr); + if(r & Dcts){ + ilock(&uart->tlock); + old = uart->cts; + uart->cts = r & Cts; + if(old == 0 && uart->cts) + uart->ctsbackoff = 2; + iunlock(&uart->tlock); + } + if(r & Ddsr){ + old = r & Dsr; + if(uart->hup_dsr && uart->dsr && !old) + uart->dohup = 1; + uart->dsr = old; + } + if(r & Ddcd){ + old = r & Dcd; + if(uart->hup_dcd && uart->dcd && !old) + uart->dohup = 1; + uart->dcd = old; + } + break; + case Ithre: /* Thr Empty */ + uartkick(uart); + break; + case Irda: /* Received Data Available */ + case Irls: /* Receiver Line Status */ + case Ictoi: /* Character Time-out Indication */ + /* + * Consume any received data. + * If the received byte came in with a break, + * parity or framing error, throw it away; + * overrun is an indication that something has + * already been tossed. + */ + while((lsr = csr8r(ctlr, Lsr)) & Dr){ + if(lsr & (FIFOerr|Oe)) + uart->oerr++; + if(lsr & Pe) + uart->perr++; + if(lsr & Fe) + uart->ferr++; + r = csr8r(ctlr, Rbr); + if(!(lsr & (Bi|Fe|Pe))) + uartrecv(uart, r); + } + break; + + default: + iprint("weird uart interrupt %#2.2ux\n", iir); + break; + } + } +} + +static void +i8250disable(Uart* uart) +{ + Ctlr *ctlr; + + /* + * Turn off DTR and RTS, disable interrupts and fifos. + */ + (*uart->phys->dtr)(uart, 0); + (*uart->phys->rts)(uart, 0); + (*uart->phys->fifo)(uart, 0); + + ctlr = uart->regs; + ctlr->sticky[Ier] = 0; + csr8w(ctlr, Ier, ctlr->sticky[Ier]); + + if(ctlr->iena != 0){ + if(intrdisable(ctlr->vector) == 0) + ctlr->iena = 0; + } +} + +static void +i8250enable(Uart* uart, int ie) +{ + Ctlr *ctlr; + + ctlr = uart->regs; + + /* + * Check if there is a FIFO. + * Changing the FIFOena bit in Fcr flushes data + * from both receive and transmit FIFOs; there's + * no easy way to guarantee not losing data on + * the receive side, but it's possible to wait until + * the transmitter is really empty. + * Also, reading the Iir outwith i8250interrupt() + * can be dangerous, but this should only happen + * once, before interrupts are enabled. + */ + ilock(ctlr); + if(!ctlr->checkfifo){ + /* + * Wait until the transmitter is really empty. + */ + while(!(csr8r(ctlr, Lsr) & Temt)) + ; + csr8w(ctlr, Fcr, FIFOena); + if(csr8r(ctlr, Iir) & Ifena) + ctlr->hasfifo = 1; + csr8w(ctlr, Fcr, 0); + ctlr->checkfifo = 1; + } + iunlock(ctlr); + + /* + * Enable interrupts and turn on DTR and RTS. + * Be careful if this is called to set up a polled serial line + * early on not to try to enable interrupts as interrupt- + * -enabling mechanisms might not be set up yet. + */ + if(ie){ + if(ctlr->iena == 0 && !ctlr->poll){ + ctlr->vector = intrenable(ctlr->irq, i8250interrupt, uart, ctlr->tbdf, uart->name); + ctlr->iena = 1; + } + ctlr->sticky[Ier] = Ethre|Erda; + ctlr->sticky[Mcr] |= Ie; + } + else{ + ctlr->sticky[Ier] = 0; + ctlr->sticky[Mcr] = 0; + } + csr8w(ctlr, Ier, ctlr->sticky[Ier]); + csr8w(ctlr, Mcr, ctlr->sticky[Mcr]); + + (*uart->phys->dtr)(uart, 1); + (*uart->phys->rts)(uart, 1); + + /* + * During startup, the i8259 interrupt controller is reset. + * This may result in a lost interrupt from the i8250 uart. + * The i8250 thinks the interrupt is still outstanding and does not + * generate any further interrupts. The workaround is to call the + * interrupt handler to clear any pending interrupt events. + * Note: this must be done after setting Ier. + */ + if(ie) + i8250interrupt(nil, uart); +} + +void* +i8250alloc(int io, int irq, int tbdf) +{ + Ctlr *ctlr; + + if((ctlr = malloc(sizeof(Ctlr))) != nil){ + ctlr->io = io; + ctlr->irq = irq; + ctlr->tbdf = tbdf; + } + + return ctlr; +} + +static Uart* +i8250pnp(void) +{ + int i; + Ctlr *ctlr; + Uart *head, *uart; + + head = i8250uart; + for(i = 0; i < nelem(i8250uart); i++){ + /* + * Does it exist? + * Should be able to write/read the Scratch Pad + * and reserve the I/O space. + */ + uart = &i8250uart[i]; + ctlr = uart->regs; + csr8o(ctlr, Scr, 0x55); + if(csr8r(ctlr, Scr) == 0x55) + continue; + if(ioalloc(ctlr->io, 8, 0, uart->name) < 0) + continue; + if(uart == head) + head = uart->next; + else + (uart-1)->next = uart->next; + } + + return head; +} + +static int +i8250getc(Uart* uart) +{ + Ctlr *ctlr; + + ctlr = uart->regs; + while(!(csr8r(ctlr, Lsr) & Dr)) + delay(1); + return csr8r(ctlr, Rbr); +} + +static void +i8250putc(Uart* uart, int c) +{ + int i; + Ctlr *ctlr; + + ctlr = uart->regs; + for(i = 0; !(csr8r(ctlr, Lsr) & Thre) && i < 128; i++) + delay(1); + csr8o(ctlr, Thr, c); + for(i = 0; !(csr8r(ctlr, Lsr) & Thre) && i < 128; i++) + delay(1); +} + +static void +i8250poll(Uart* uart) +{ + Ctlr *ctlr; + + /* + * If PhysUart has a non-nil .poll member, this + * routine will be called from the uartclock timer. + * If the Ctlr .poll member is non-zero, when the + * Uart is enabled interrupts will not be enabled + * and the result is polled input and output. + * Not very useful here, but ports to new hardware + * or simulators can use this to get serial I/O + * without setting up the interrupt mechanism. + */ + ctlr = uart->regs; + if(ctlr->iena || !ctlr->poll) + return; + i8250interrupt(nil, uart); +} + +PhysUart i8250physuart = { + .name = "i8250", + .pnp = i8250pnp, + .enable = i8250enable, + .disable = i8250disable, + .kick = i8250kick, + .dobreak = i8250break, + .baud = i8250baud, + .bits = i8250bits, + .stop = i8250stop, + .parity = i8250parity, + .modemctl = i8250modemctl, + .rts = i8250rts, + .dtr = i8250dtr, + .status = i8250status, + .fifo = i8250fifo, + .getc = i8250getc, + .putc = i8250putc, + .poll = i8250poll, +}; + +Uart* +i8250console(char* cfg) +{ + int i; + Uart *uart; + Ctlr *ctlr; + char *cmd, *p; + ISAConf isa; + + /* + * Before i8250pnp() is run can only set the console + * to 0 or 1 because those are the only uart structs which + * will be the same before and after that. + */ + if((p = getconf("console")) == nil && (p = cfg) == nil) + return nil; + i = strtoul(p, &cmd, 0); + if(p == cmd) + return nil; +//WTF? Something to do with the PCIe-only machine? + if((uart = uartconsole(i, cmd)) != nil){ + consuart = uart; + return uart; + } + switch(i){ + default: + return nil; + case 0: + uart = &i8250uart[0]; + break; + case 1: + uart = &i8250uart[1]; + break; + } + +//Madness. Something to do with the PCIe-only machine? + memset(&isa, 0, sizeof(isa)); + ctlr = uart->regs; + if(isaconfig("eia", i, &isa) != 0){ + if(isa.port != 0) + ctlr->io = isa.port; + if(isa.irq != 0) + ctlr->irq = isa.irq; + if(isa.freq != 0) + uart->freq = isa.freq; + } + + /* + * Does it exist? + * Should be able to write/read + * the Scratch Pad. + */ +// ctlr = uart->regs; +// csr8o(ctlr, Scr, 0x55); +// if(csr8r(ctlr, Scr) != 0x55) +// return nil; + + (*uart->phys->enable)(uart, 0); + uartctl(uart, "b9600 l8 pn s1 i1"); + if(*cmd != '\0') + uartctl(uart, cmd); + + consuart = uart; + uart->console = 1; + + return uart; +} diff -Nru /sys/src/9k/386/uartpci.c /sys/src/9k/386/uartpci.c --- /sys/src/9k/386/uartpci.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/386/uartpci.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,167 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "io.h" + +extern PhysUart i8250physuart; +extern PhysUart pciphysuart; +extern void* i8250alloc(int, int, int); + +static Uart* +uartpci(int ctlrno, Pcidev* p, int barno, int n, int freq, char* name) +{ + int i, io; + void *ctlr; + char buf[64]; + Uart *head, *uart; + + io = p->mem[barno].bar & ~0x01; + snprint(buf, sizeof(buf), "%s%d", pciphysuart.name, ctlrno); + if(ioalloc(io, p->mem[barno].size, 0, buf) < 0){ + print("uartpci: I/O %#ux in use\n", io); + return nil; + } + + head = uart = malloc(sizeof(Uart)*n); + + for(i = 0; i < n; i++){ + ctlr = i8250alloc(io, p->intl, p->tbdf); + io += 8; + if(ctlr == nil) + continue; + + uart->regs = ctlr; + snprint(buf, sizeof(buf), "%s.%8.8ux", name, p->tbdf); + kstrdup(&uart->name, buf); + uart->freq = freq; + uart->phys = &i8250physuart; + if(uart != head) + (uart-1)->next = uart; + uart++; + } + + return head; +} + +static Uart* +uartpcipnp(void) +{ + Pcidev *p; + char *name; + int ctlrno, n, subid; + Uart *head, *tail, *uart; + + /* + * Loop through all PCI devices looking for simple serial + * controllers (ccrb == 0x07) and configure the ones which + * are familiar. All suitable devices are configured to + * simply point to the generic i8250 driver. + */ + head = tail = nil; + ctlrno = 0; + for(p = pcimatch(nil, 0, 0); p != nil; p = pcimatch(p, 0, 0)){ + if(p->ccrb != 0x07 || p->ccru > 2) + continue; + + switch((p->did<<16)|p->vid){ + default: + continue; + case (0x9835<<16)|0x9710: /* StarTech PCI2S550 */ + uart = uartpci(ctlrno, p, 0, 1, 1843200, "PCI2S550-0"); + if(uart == nil) + continue; + uart->next = uartpci(ctlrno, p, 1, 1, 1843200, "PCI2S550-1"); + break; + case (0x9501<<16)|0x1415: /* Oxford Semi OX16PCI954 */ + case (0x950A<<16)|0x1415: + /* + * These are common devices used by 3rd-party + * manufacturers. + * Should check the subsystem VID and DID for correct + * match, mostly to get the clock frequency right. + */ + subid = pcicfgr16(p, PciSVID); + subid |= pcicfgr16(p, PciSID)<<16; + switch(subid){ + default: + continue; + case (0<<16)|0x1415: /* StarTech PCI4S550 */ + uart = uartpci(ctlrno, p, 0, 4, 18432000, "PCI4S550-0"); + if(uart == nil) + continue; + break; + case (0x2000<<16)|0x131F:/* SIIG CyberSerial PCIe */ + uart = uartpci(ctlrno, p, 0, 1, 18432000, "CyberSerial-1S"); + if(uart == nil) + continue; + break; + } + break; + case (0x9050<<16)|0x10B5: /* Perle PCI-Fast4 series */ + case (0x9030<<16)|0x10B5: /* Perle Ultraport series */ + /* + * These devices consists of a PLX bridge (the above + * PCI VID+DID) behind which are some 16C654 UARTs. + * Must check the subsystem VID and DID for correct + * match. + */ + subid = pcicfgr16(p, PciSVID); + subid |= pcicfgr16(p, PciSID)<<16; + switch(subid){ + default: + continue; + case (0x0011<<16)|0x12E0: /* Perle PCI-Fast16 */ + n = 16; + name = "PCI-Fast16"; + break; + case (0x0021<<16)|0x12E0: /* Perle PCI-Fast8 */ + n = 8; + name = "PCI-Fast8"; + break; + case (0x0031<<16)|0x12E0: /* Perle PCI-Fast4 */ + n = 4; + name = "PCI-Fast4"; + break; + case (0x0021<<16)|0x155F: /* Perle Ultraport8 */ + n = 8; + name = "Ultraport8"; /* 16C754 UARTs */ + break; + } + uart = uartpci(ctlrno, p, 2, n, 7372800, name); + if(uart == nil) + continue; + break; + } + + if(head != nil) + tail->next = uart; + else + head = uart; + for(tail = uart; tail->next != nil; tail = tail->next) + ; + ctlrno++; + } + + return head; +} + +PhysUart pciphysuart = { + .name = "UartPCI", + .pnp = uartpcipnp, + .enable = nil, + .disable = nil, + .kick = nil, + .dobreak = nil, + .baud = nil, + .bits = nil, + .stop = nil, + .parity = nil, + .modemctl = nil, + .rts = nil, + .dtr = nil, + .status = nil, + .fifo = nil, +}; diff -Nru /sys/src/9k/Notes /sys/src/9k/Notes --- /sys/src/9k/Notes Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/Notes Wed Dec 9 00:00:00 2015 @@ -0,0 +1,166 @@ +need to re-think flushme/mmupageflush. if va->pa and pa shared at different va, +mmupageflush shouldn't be in Page, but in Segment mapping page (Pages) within va range. +perhaps a different way of expressing it? flush segment? matters on sep i/d cache machines. + ++ VA(k) looks wrong? VA(k) -> void* not uintptr + +textlim, datalim, bsslim +pragma pack in acpi.h +page.c/asm.c interface: asm should call pageinit function +SEGMAPSIZE/SEGMAXSIZE ++ RENDHASH (2^5 isn't distinguished enough) +kmap of big pages + +--- +mount point held by image in cache +clock0link linkage + +procctlmemio loop, and s->flushme when only read? +syssegflush: s->flushme set perpetually, even for pages paged out + to force PG_TXTFLUSH in fault for later page fault to ensure dcache flushed after pio +syssegflush + missing Pte + chunking ++ space taken by cachectl in Page, when MAXMACH large + +addseg + +new interface: + + void + mmucachectl(Page *p, uint why) + { + if(!pagedout(p)) + memset(p->cachectl, why, sizeof(p->cachectl)); + } + +mmuidle() + +splhi alreadyhi does CLI + +128 process limit in pexit(!) + +Cor + _xinc->ainc + +meminit -> asmmeminit + builds palloc.mem, used by pageinit. + also sets ialloclimit based on sys->vmend - sys->vmstart + +if malloc needs physalloc, physalloc will need ilock (malloc already uses it) + +bootstrap: + tailalloc or kmemalloc? + could size it during vmend/kernel mapping? + +split allocation: small pages and big ones to avoid huge map for all of memory. + +>=page-aligned things need bibop table: allocate in regions + +need physallocinit before full mallocinit + + +------- Segment curiosities + Physseg and Image are mutually-exclusive? + fstart/flen, should be in Image with list of prototype Segments + size in pages + profile should be Proc.up: ref Profbuf (ref-counted, shared) + sema data only + profile text only + +-- without swap: + don't need swap Image + don't need page hash by daddr?/lookpage through image's Segments + LRU within image, or don't bother recycling + LRU unused images instead + don't need Image hash chains + don't need Lru free list (global, perhaps for image?) + +--- images + sysload? + +Segments hold arch mmu data? +Image Segment would then really be page cache + Orbit had Exec -> Seg; copyseg, but seg->master (not implemented) +SSEG currently doesn't grow. +Image prototype Segment might have different virtual address, or none? + + +TO DO +- nemo's changes to sysexec +- reset +- nemo changes to path etc + + +Allocation + +- physical memory + physalloc, physfree + buddy allocator + physical memory is not inherently organised as Pages + any contiguous range is acceptable to most devices + in practice, buddy has minimal size; malloc chunks has minimal size + (also, buddy is initialised with range of blocks of different sizes since memory + is arranged in an awkward way) + smallest Page size is much smaller + +- kernel heap + malloc + + +- problems: + - bootstrap: asmalloc, basealloc + - physalloc (buddy allocator) needs structures sized to memory, + which implies an underlying primitive allocator: basealloc -> asmalloc + - malloc calls morecore when current heap runs out + it should call physalloc and map the result + - buddy tables are too large when dealing with huge spaces, + seems to require at least a 2-level hierarchy + - how should the hierarchy be managed? + eg, 2 levels of buddy? + top level of huge chunks, with buddy below? + if huge chunks are not that big, is buddy needed? + malloc chunks don't need to be that big + use different primitives for large (typically contiguous physical) allocations + - with asmalloc you've got 3 levels! + - just malloc and asmalloc? + need fast source of Pages + build up Page pools and leave them as is + fragmentation? memory is huge, but 1 Gb fragments are large relative to memory + stable workload? + it's not as if malloc ever returns memory to allow buddies to coalesce + -> malloc calls asmalloc and Page calls physalloc? + -> still, Page size is small + -> no real need for malloc pages to allocate buddies, but + page and IO allocators usefully can do that (if allocated/freed) + - trouble with Page subdivision is that it amounts to a restricted buddy, + or a bitmask allocator, and both AMD64 and ARM64 have + a range of page sizes + - page table pages are smallest page size + - does only malloc need radix tree/bibop for metadata for big allocations? + - needs to be sized initially? + - currently kernel malloc doesn't allow huge chunks anyway + +Try + - basealloc -> asmalloc for small chunks of raw memory + (careful it doesn't fragment a useful chunk!) + - one or more buddy pools + - buddy pool for Pages, starting with largest block as largest page size + - or could allocate smaller pages by allocating a larger + - malloc pool chunk is on the order of 2 Mbytes (or reasonable page size) + if it's 2 Mbytes aligned, with header, can use pointer-masking scheme + - all pools get memory from basealloc? (ends up in KSEG2) + +Virtual space +- user space +- kernel space + - kmap (mainly or exclusively for user pages) -> notional KMap structure + - vmap (for long-lived maps for device registers) +- 64-bit can use KSEG2 (must take care about aliasing) + +Mdom table? + pa2mdom(pa) + - presumably the memory is assigned to domains with physical addresses aligned somehow, + allowing a quick check of (say) the top bits in a table or a short binary search? + +Mesg diff -Nru /sys/src/9k/boot/aux.c /sys/src/9k/boot/aux.c --- /sys/src/9k/boot/aux.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/boot/aux.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,188 @@ +#include +#include +#include <../boot/boot.h> + +/* +int +plumb(char *dir, char *dest, int *efd, char *here) +{ + char buf[128]; + char name[128]; + int n; + + sprint(name, "%s/clone", dir); + efd[0] = open(name, ORDWR); + if(efd[0] < 0) + return -1; + n = read(efd[0], buf, sizeof(buf)-1); + if(n < 0){ + close(efd[0]); + return -1; + } + buf[n] = 0; + sprint(name, "%s/%s/data", dir, buf); + if(here){ + sprint(buf, "announce %s", here); + if(sendmsg(efd[0], buf) < 0){ + close(efd[0]); + return -1; + } + } + sprint(buf, "connect %s", dest); + if(sendmsg(efd[0], buf) < 0){ + close(efd[0]); + return -1; + } + efd[1] = open(name, ORDWR); + if(efd[1] < 0){ + close(efd[0]); + return -1; + } + return efd[1]; +} + */ + +int +sendmsg(int fd, char *msg) +{ + int n; + + n = strlen(msg); + if(write(fd, msg, n) != n) + return -1; + return 0; +} + +void +warning(char *fmt, ...) +{ + char msg[ERRMAX]; + va_list ap; + + va_start(ap, fmt); + vsnprint(msg, sizeof(msg), fmt, ap); + va_end(ap); + fprint(2, "boot: %s: %r\n", msg); +} + +void +fatal(char *fmt, ...) +{ + char msg[ERRMAX]; + va_list ap; + + va_start(ap, fmt); + vsnprint(msg, sizeof(msg), fmt, ap); + va_end(ap); + fprint(2, "boot: %s: %r\n", msg); + exits(0); +} + +int +readfile(char *name, char *buf, int len) +{ + int f, n; + + buf[0] = 0; + f = open(name, OREAD); + if(f < 0) + return -1; + n = read(f, buf, len-1); + if(n >= 0) + buf[n] = 0; + close(f); + return 0; +} + +int +writefile(char *name, char *buf, int len) +{ + int f, n; + + f = open(name, OWRITE); + if(f < 0) + return -1; + n = write(f, buf, len); + close(f); + return (n != len) ? -1 : 0; +} + +void +setenv(char *name, char *val) +{ + int f; + char ename[64]; + + snprint(ename, sizeof ename, "#e/%s", name); + f = create(ename, 1, 0666); + if(f < 0){ + fprint(2, "create %s: %r\n", ename); + return; + } + write(f, val, strlen(val)); + close(f); +} + +void +srvcreate(char *name, int fd) +{ + char *srvname; + int f; + char buf[64]; + + srvname = strrchr(name, '/'); + if(srvname) + srvname++; + else + srvname = name; + + snprint(buf, sizeof buf, "#s/%s", srvname); + f = create(buf, 1, 0666); + if(f < 0) + fatal(buf); + sprint(buf, "%d", fd); + if(write(f, buf, strlen(buf)) != strlen(buf)) + fatal("write"); + close(f); +} + +void +catchint(void *a, char *note) +{ + USED(a); + if(strcmp(note, "alarm") == 0) + noted(NCONT); + noted(NDFLT); +} + +int +outin(char *prompt, char *def, int len) +{ + int n; + char buf[256]; + + if(len >= sizeof buf) + len = sizeof(buf)-1; + + if(cpuflag){ + notify(catchint); + alarm(15*1000); + } + print("%s[%s]: ", prompt, *def ? def : "no default"); + memset(buf, 0, sizeof buf); + n = read(0, buf, len); + if(cpuflag){ + alarm(0); + notify(0); + } + + if(n < 0){ + print("\n"); + return 1; + } + if(n > 1){ + buf[n-1] = 0; + strcpy(def, buf); + } + return n; +} diff -Nru /sys/src/9k/boot/boot.c /sys/src/9k/boot/boot.c --- /sys/src/9k/boot/boot.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/boot/boot.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,302 @@ +#include +#include +#include +#include +#include "../boot/boot.h" + +char cputype[64]; +char sys[2*64]; +char reply[256]; +int printcol; +int mflag; +int fflag; +int kflag; + +char *bargv[Nbarg]; +int bargc; + +static void swapproc(void); +static Method *rootserver(char*); +static void usbinit(void); +static void kbmap(void); + +void +boot(int argc, char *argv[]) +{ + int fd, afd; + Method *mp; + char *cmd, cmdbuf[64], *iargv[16]; + char rootbuf[64]; + int islocal, ishybrid; + char *rp, *rsp; + int iargc, n; + char buf[32]; + AuthInfo *ai; + + fmtinstall('r', errfmt); + + bind("#c", "/dev", MBEFORE); + open("/dev/cons", OREAD); + open("/dev/cons", OWRITE); + open("/dev/cons", OWRITE); + /* + * init will reinitialize its namespace. + * #ec gets us plan9.ini settings (*var variables). + */ + bind("#ec", "/env", MREPL); + bind("#e", "/env", MBEFORE|MCREATE); + bind("#s", "/srv", MREPL|MCREATE); +#define DEBUG +#ifdef DEBUG + print("argc=%d\n", argc); + for(fd = 0; fd < argc; fd++) + print("%#p %s ", argv[fd], argv[fd]); + print("\n"); +#endif + + ARGBEGIN{ + case 'k': + kflag = 1; + break; + case 'm': + mflag = 1; + break; + case 'f': + fflag = 1; + break; + }ARGEND + + readfile("#e/cputype", cputype, sizeof(cputype)); + + /* + * set up usb keyboard, mouse and disk, if any. + */ + usbinit(); + + /* + * pick a method and initialize it + */ + if(method[0].name == nil) + fatal("no boot methods"); + mp = rootserver(argc ? *argv : 0); + (*mp->config)(mp); + islocal = strcmp(mp->name, "local") == 0; + ishybrid = strcmp(mp->name, "hybrid") == 0; + + /* + * load keymap if it is there. + */ + kbmap(); + + /* + * authentication agent + */ + authentication(cpuflag); + + /* + * connect to the root file system + */ + fd = (*mp->connect)(); + if(fd < 0) + fatal("can't connect to file server"); + if(!islocal && !ishybrid){ + if(cfs) + fd = (*cfs)(fd); + } + print("version..."); + buf[0] = '\0'; + n = fversion(fd, 0, buf, sizeof buf); + if(n < 0) + fatal("can't init 9P"); + srvcreate("boot", fd); + + /* + * create the name space, mount the root fs + */ + if(bind("/", "/", MREPL) < 0) + fatal("bind /"); + rp = getenv("rootspec"); + if(rp == nil) + rp = ""; + + afd = fauth(fd, rp); + if(afd >= 0){ + ai = auth_proxy(afd, auth_getkey, "proto=p9any role=client"); + if(ai == nil) + print("authentication failed (%r), trying mount anyways\n"); + } + if(mount(fd, afd, "/root", MREPL|MCREATE, rp) < 0) + fatal("mount /"); + rsp = rp; + rp = getenv("rootdir"); + if(rp == nil) + rp = rootdir; + if(bind(rp, "/", MAFTER|MCREATE) < 0){ + if(strncmp(rp, "/root", 5) == 0){ + fprint(2, "boot: couldn't bind $rootdir=%s to root: %r\n", rp); + fatal("second bind /"); + } + snprint(rootbuf, sizeof rootbuf, "/root/%s", rp); + rp = rootbuf; + if(bind(rp, "/", MAFTER|MCREATE) < 0){ + fprint(2, "boot: couldn't bind $rootdir=%s to root: %r\n", rp); + if(strcmp(rootbuf, "/root//plan9") == 0){ + fprint(2, "**** warning: remove rootdir=/plan9 entry from plan9.ini\n"); + rp = "/root"; + if(bind(rp, "/", MAFTER|MCREATE) < 0) + fatal("second bind /"); + }else + fatal("second bind /"); + } + } + close(fd); + setenv("rootdir", rp); + + settime(islocal, afd, rsp); + if(afd > 0) + close(afd); + + cmd = getenv("init"); + if(cmd == nil){ + sprint(cmdbuf, "/%s/init -%s%s", cputype, + cpuflag ? "c" : "t", mflag ? "m" : ""); + cmd = cmdbuf; + } + iargc = tokenize(cmd, iargv, nelem(iargv)-1); + cmd = iargv[0]; + + /* make iargv[0] basename(iargv[0]) */ + if(iargv[0] = strrchr(iargv[0], '/')) + iargv[0]++; + else + iargv[0] = cmd; + + iargv[iargc] = nil; + + exec(cmd, iargv); + fatal(cmd); +} + +static Method* +findmethod(char *a) +{ + Method *mp; + int i, j; + char *cp; + + if((i = strlen(a)) == 0) + return nil; + cp = strchr(a, '!'); + if(cp) + i = cp - a; + for(mp = method; mp->name; mp++){ + j = strlen(mp->name); + if(j > i) + j = i; + if(strncmp(a, mp->name, j) == 0) + break; + } + if(mp->name) + return mp; + return nil; +} + +/* + * ask user from whence cometh the root file system + */ +static Method* +rootserver(char *arg) +{ + char prompt[256]; + Method *mp; + char *cp; + int n; + + /* look for required reply */ + readfile("#e/nobootprompt", reply, sizeof(reply)); + if(reply[0]){ + mp = findmethod(reply); + if(mp) + goto HaveMethod; + print("boot method %s not found\n", reply); + reply[0] = 0; + } + + /* make list of methods */ + mp = method; + n = sprint(prompt, "root is from (%s", mp->name); + for(mp++; mp->name; mp++) + n += sprint(prompt+n, ", %s", mp->name); + sprint(prompt+n, ")"); + + /* create default reply */ + readfile("#e/bootargs", reply, sizeof(reply)); + if(reply[0] == 0 && arg != 0) + strcpy(reply, arg); + if(reply[0]){ + mp = findmethod(reply); + if(mp == 0) + reply[0] = 0; + } + if(reply[0] == 0) + strcpy(reply, method->name); + + /* parse replies */ + do{ + outin(prompt, reply, sizeof(reply)); + mp = findmethod(reply); + }while(mp == nil); + +HaveMethod: + bargc = tokenize(reply, bargv, Nbarg-2); + bargv[bargc] = nil; + cp = strchr(reply, '!'); + if(cp) + strcpy(sys, cp+1); + return mp; +} + +static void +usbinit(void) +{ + static char usbd[] = "/boot/usbd"; + + if(access("#u/usb/ctl", 0) >= 0 && bind("#u", "/dev", MAFTER) >= 0 && + access(usbd, AEXIST) >= 0) + run(usbd, nil); +} + +static void +kbmap(void) +{ + char *f; + int n, in, out; + char buf[1024]; + + f = getenv("kbmap"); + if(f == nil) + return; + if(bind("#κ", "/dev", MAFTER) < 0){ + warning("can't bind #κ"); + return; + } + + in = open(f, OREAD); + if(in < 0){ + warning("can't open kbd map"); + return; + } + out = open("/dev/kbmap", OWRITE); + if(out < 0) { + warning("can't open /dev/kbmap"); + close(in); + return; + } + while((n = read(in, buf, sizeof(buf))) > 0) + if(write(out, buf, n) != n){ + warning("write to /dev/kbmap failed"); + break; + } + close(in); + close(out); +} diff -Nru /sys/src/9k/boot/boot.h /sys/src/9k/boot/boot.h --- /sys/src/9k/boot/boot.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/boot/boot.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,77 @@ +typedef struct Method Method; +struct Method +{ + char *name; + void (*config)(Method*); + int (*connect)(void); + char *arg; +}; +enum +{ + Statsz= 256, + Nbarg= 16, +}; + +extern void authentication(int); +extern char* bootdisk; +extern char* rootdir; +extern int (*cfs)(int); +extern int cpuflag; +extern char cputype[]; +extern int fflag; +extern int kflag; +extern Method method[]; +extern void (*pword)(int, Method*); +extern char sys[]; +extern uchar hostkey[]; +extern uchar statbuf[Statsz]; +extern int bargc; +extern char *bargv[Nbarg]; + +/* libc equivalent */ +extern int cache(int); +extern char* checkkey(Method*, char*, char*); +extern void fatal(char*, ...); +extern void getpasswd(char*, int); +extern void key(int, Method*); +extern int outin(char*, char*, int); +extern int plumb(char*, char*, int*, char*); +extern int readfile(char*, char*, int); +extern long readn(int, void*, long); +extern void run(char *file, ...); +extern int sendmsg(int, char*); +extern void setenv(char*, char*); +extern void settime(int, int, char*); +extern void srvcreate(char*, int); +extern void warning(char*, ...); +extern int writefile(char*, char*, int); +extern void boot(int, char **); +extern void doauthenticate(int, Method*); +extern int old9p(int); +extern int parsefields(char*, char**, int, char*); + +/* methods */ +extern void configil(Method*); +extern void configtcp(Method*); +extern int connectil(void); +extern int connecttcp(void); + +extern void configlocal(Method*); +extern int connectlocal(void); + +extern void configlocalpaq(Method*); +extern int connectlocalpaq(void); + +extern void configsac(Method*); +extern int connectsac(void); + +extern void configpaq(Method*); +extern int connectpaq(void); + +extern void configembed(Method*); +extern int connectembed(void); + +extern void configip(int, char**, char*); + +/* hack for passing authentication address */ +extern char *authaddr; diff -Nru /sys/src/9k/boot/bootauth.c /sys/src/9k/boot/bootauth.c --- /sys/src/9k/boot/bootauth.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/boot/bootauth.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,73 @@ +#include +#include +#include +#include +#include "../boot/boot.h" + +char *authaddr; +static void glenda(void); + +void +authentication(int cpuflag) +{ + char *argv[16], **av; + int ac; + + if(access("/boot/factotum", AEXEC) < 0){ + glenda(); + return; + } + + /* start agent */ + ac = 0; + av = argv; + av[ac++] = "factotum"; + if(getenv("debugfactotum")) + av[ac++] = "-p"; +// av[ac++] = "-d"; /* debug traces */ +// av[ac++] = "-D"; /* 9p messages */ + if(cpuflag) + av[ac++] = "-S"; + else + av[ac++] = "-u"; + av[ac++] = "-sfactotum"; + if(authaddr != nil){ + av[ac++] = "-a"; + av[ac++] = authaddr; + } + av[ac] = 0; + switch(fork()){ + case -1: + fatal("starting factotum"); + case 0: + exec("/boot/factotum", av); + fatal("execing /boot/factotum"); + default: + break; + } + + /* wait for agent to really be there */ + while(access("/mnt/factotum", 0) < 0) + sleep(250); + + if(cpuflag) + return; +} + +static void +glenda(void) +{ + int fd; + char *s; + + s = getenv("user"); + if(s == nil) + s = "glenda"; + + fd = open("#c/hostowner", OWRITE); + if(fd >= 0){ + if(write(fd, s, strlen(s)) != strlen(s)) + fprint(2, "setting #c/hostowner to %s: %r\n", s); + close(fd); + } +} diff -Nru /sys/src/9k/boot/bootcache.c /sys/src/9k/boot/bootcache.c --- /sys/src/9k/boot/bootcache.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/boot/bootcache.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,80 @@ +#include +#include +#include <../boot/boot.h> + +uchar statbuf[Statsz]; + +int +cache(int fd) +{ + int argc, i, p[2]; + char *argv[5], bd[32], buf[256], partition[64], *pp; + + if(stat("/boot/cfs", statbuf, sizeof statbuf) < 0) + return fd; + + *partition = 0; + + bind("#S", "/dev", MAFTER); + readfile("#e/cfs", buf, sizeof(buf)); + if(*buf){ + argc = tokenize(buf, argv, 4); + for(i = 0; i < argc; i++){ + if(strcmp(argv[i], "off") == 0) + return fd; + else if(stat(argv[i], statbuf, sizeof statbuf) >= 0){ + strncpy(partition, argv[i], sizeof(partition)-1); + partition[sizeof(partition)-1] = 0; + } + } + } + + if(*partition == 0){ + readfile("#e/bootdisk", bd, sizeof(bd)); + if(*bd){ + if(pp = strchr(bd, ':')) + *pp = 0; + /* damned artificial intelligence */ + i = strlen(bd); + if(strcmp("disk", &bd[i-4]) == 0) + bd[i-4] = 0; + else if(strcmp("fs", &bd[i-2]) == 0) + bd[i-2] = 0; + else if(strcmp("fossil", &bd[i-6]) == 0) + bd[i-6] = 0; + sprint(partition, "%scache", bd); + if(stat(partition, statbuf, sizeof statbuf) < 0) + *bd = 0; + } + if(*bd == 0){ + sprint(partition, "%scache", bootdisk); + if(stat(partition, statbuf, sizeof statbuf) < 0) + return fd; + } + } + + print("cfs..."); + if(pipe(p)<0) + fatal("pipe"); + switch(fork()){ + case -1: + fatal("fork"); + case 0: + close(p[1]); + dup(fd, 0); + close(fd); + dup(p[0], 1); + close(p[0]); + if(fflag) + execl("/boot/cfs", "bootcfs", "-rs", "-f", partition, 0); + else + execl("/boot/cfs", "bootcfs", "-s", "-f", partition, 0); + break; + default: + close(p[0]); + close(fd); + fd = p[1]; + break; + } + return fd; +} diff -Nru /sys/src/9k/boot/bootip.c /sys/src/9k/boot/bootip.c --- /sys/src/9k/boot/bootip.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/boot/bootip.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,221 @@ +#include +#include +#include + +#include "boot.h" + +static char* fsaddr; +static char mpoint[32]; + +static int isvalidip(uchar*); +static void netndb(char*, uchar*); +static void netenv(char*, uchar*); +static char* queryaddr(char*, char*, char*, char*); + + +void +configip(int bargc, char **bargv, char *fsproto) +{ + Waitmsg *w; + int argc, pid; + char **arg, **argv, buf[32], buf1[32], *p; + + fmtinstall('I', eipfmt); + fmtinstall('M', eipfmt); + fmtinstall('E', eipfmt); + + arg = malloc((bargc+1) * sizeof(char*)); + if(arg == nil) + fatal("malloc"); + memmove(arg, bargv, bargc * sizeof(char*)); + arg[bargc] = 0; + +print("ipconfig..."); + argc = bargc; + argv = arg; + strcpy(mpoint, "/net"); + ARGBEGIN { + case 'x': + p = ARGF(); + if(p != nil) + snprint(mpoint, sizeof(mpoint), "/net%s", p); + break; + case 'g': + case 'b': + case 'h': + case 'm': + p = ARGF(); + USED(p); + break; + } ARGEND; + + /* bind in an ip interface */ + if(bind("#I", mpoint, MAFTER) < 0) + fatal("bind #I\n"); + if(access("#l0", 0) == 0 && bind("#l0", mpoint, MAFTER) < 0) + print("bind #l0: %r\n"); + if(access("#l1", 0) == 0 && bind("#l1", mpoint, MAFTER) < 0) + print("bind #l1: %r\n"); + if(access("#l2", 0) == 0 && bind("#l2", mpoint, MAFTER) < 0) + print("bind #l2: %r\n"); + if(access("#l3", 0) == 0 && bind("#l3", mpoint, MAFTER) < 0) + print("bind #l3: %r\n"); + bind("#©", "/dev", MBEFORE); + writefile("/dev/cecctl", "cecon #l0/ether0", 16); + bind("#æ", "/dev", MAFTER); + werrstr(""); + + /* let ipconfig configure the ip interface */ + switch(pid = fork()){ + case -1: + fatal("fork configuring ip"); + case 0: + exec("/boot/ipconfig", arg); + fatal("execing /ipconfig"); + default: + break; + } + + /* wait for ipconfig to finish */ + for(;;){ + w = wait(); + if(w != nil && w->pid == pid){ + if(w->msg[0] != 0) + fatal(w->msg); + free(w); + break; + } else if(w == nil) + fatal("configuring ip"); + free(w); + } + + readfile("#c/sysname", buf, sizeof buf); + if(buf[0] != 0){ + snprint(buf1, sizeof buf1, "name %s\n", buf); + writefile("#©/cecctl", buf1, sizeof buf1); + } + + if(fsproto == nil) + return; + + /* if we didn't get a file and auth server, query user */ + fsaddr = queryaddr("fs", fsproto, strcmp(fsproto,"tcp")==0? "564": "17008", "filesystem IP address"); + authaddr = queryaddr("auth", "tcp", "567", "authentication server IP address"); +} + +static char* +queryaddr(char *key, char *proto, char *svc, char *what) +{ + uchar ipa[IPaddrlen]; + char buf[64], *p; + + netndb(key, ipa); + if(!isvalidip(ipa)) + netenv(key, ipa); + while(!isvalidip(ipa)){ + buf[0] = 0; + outin(what, buf, sizeof(buf)); + p = strchr(buf, '!'); + if(p != nil) + return strdup(netmkaddr(buf, proto, svc)); + if(parseip(ipa, buf) == -1) + fprint(2, "configip: can't parse %s %s\n", what, buf); + } + return smprint("%s!%I!%s", proto, ipa, svc); +} + +void +configtcp(Method*) +{ + configip(bargc, bargv, "tcp"); +} + +int +connecttcp(void) +{ + int fd; + + fd = dial(fsaddr, 0, 0, 0); + if (fd < 0) + werrstr("dial %s: %r", fsaddr); + return fd; +} + +void +configil(Method*) +{ + configip(bargc, bargv, "il"); +} + +int +connectil(void) +{ + int fd; + + fd = dial(fsaddr, 0, 0, 0); + if(fd < 0) + werrstr("dial %s: %r", fsaddr); + return fd; +} + +static int +isvalidip(uchar *ip) +{ + if(ipcmp(ip, IPnoaddr) == 0) + return 0; + if(ipcmp(ip, v4prefix) == 0) + return 0; + return 1; +} + +static void +netenv(char *attr, uchar *ip) +{ + int fd, n; + char buf[128]; + + ipmove(ip, IPnoaddr); + snprint(buf, sizeof(buf), "#e/%s", attr); + fd = open(buf, OREAD); + if(fd < 0) + return; + + n = read(fd, buf, sizeof(buf)-1); + if(n <= 0) + return; + buf[n] = 0; + if (parseip(ip, buf) == -1) + fprint(2, "netenv: can't parse ip %s\n", buf); +} + +static void +netndb(char *attr, uchar *ip) +{ + int fd, n, c; + char buf[1024]; + char *p; + + ipmove(ip, IPnoaddr); + snprint(buf, sizeof(buf), "%s/ndb", mpoint); + fd = open(buf, OREAD); + if(fd < 0) + return; + n = read(fd, buf, sizeof(buf)-1); + close(fd); + if(n <= 0) + return; + buf[n] = 0; + n = strlen(attr); + for(p = buf; ; p++){ + p = strstr(p, attr); + if(p == nil) + break; + c = *(p-1); + if(*(p + n) == '=' && (p == buf || c == '\n' || c == ' ' || c == '\t')){ + p += n+1; + if (parseip(ip, p) == -1) + fprint(2, "netndb: can't parse ip %s\n", p); + return; + } + } +} diff -Nru /sys/src/9k/boot/doauthenticate.c /sys/src/9k/boot/doauthenticate.c --- /sys/src/9k/boot/doauthenticate.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/boot/doauthenticate.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,126 @@ +#include +#include +#include +#include "../boot/boot.h" + +static char *pbmsg = "AS protocol botch"; +static char *ccmsg = "can't connect to AS"; + +long +readn(int fd, void *buf, long len) +{ + int m, n; + char *p; + + p = buf; + for(n = 0; n < len; n += m){ + m = read(fd, p+n, len-n); + if(m <= 0) + return -1; + } + return n; +} + +static char* +fromauth(Method *mp, char *trbuf, char *tbuf) +{ + int afd; + char t; + char *msg; + static char error[2*ERRMAX]; + + if(mp->auth == 0) + fatal("no method for accessing auth server"); + afd = (*mp->auth)(); + if(afd < 0) { + sprint(error, "%s: %r", ccmsg); + return error; + } + + if(write(afd, trbuf, TICKREQLEN) < 0 || read(afd, &t, 1) != 1){ + close(afd); + sprint(error, "%s: %r", pbmsg); + return error; + } + switch(t){ + case AuthOK: + msg = 0; + if(readn(afd, tbuf, 2*TICKETLEN) < 0) { + sprint(error, "%s: %r", pbmsg); + msg = error; + } + break; + case AuthErr: + if(readn(afd, error, ERRMAX) < 0) { + sprint(error, "%s: %r", pbmsg); + msg = error; + } + else { + error[ERRMAX-1] = 0; + msg = error; + } + break; + default: + msg = pbmsg; + break; + } + + close(afd); + return msg; +} + +void +doauthenticate(int fd, Method *mp) +{ + char *msg; + char trbuf[TICKREQLEN]; + char tbuf[2*TICKETLEN]; + + print("session..."); + if(fsession(fd, trbuf, sizeof trbuf) < 0) + fatal("session command failed"); + + /* no authentication required? */ + memset(tbuf, 0, 2*TICKETLEN); + if(trbuf[0] == 0) + return; + + /* try getting to an auth server */ + print("getting ticket..."); + msg = fromauth(mp, trbuf, tbuf); + print("authenticating..."); + if(msg == 0) + if(fauth(fd, tbuf) >= 0) + return; + + /* didn't work, go for the security hole */ + fprint(2, "no authentication server (%s), using your key as server key\n", msg); +} + +char* +checkkey(Method *mp, char *name, char *key) +{ + char *msg; + Ticketreq tr; + Ticket t; + char trbuf[TICKREQLEN]; + char tbuf[TICKETLEN]; + + memset(&tr, 0, sizeof tr); + tr.type = AuthTreq; + strcpy(tr.authid, name); + strcpy(tr.hostid, name); + strcpy(tr.uid, name); + convTR2M(&tr, trbuf); + msg = fromauth(mp, trbuf, tbuf); + if(msg == ccmsg){ + fprint(2, "boot: can't contact auth server, passwd unchecked\n"); + return 0; + } + if(msg) + return msg; + convM2T(tbuf, &t, key); + if(t.num == AuthTc && strcmp(name, t.cuid)==0) + return 0; + return "no match"; +} diff -Nru /sys/src/9k/boot/embed.c /sys/src/9k/boot/embed.c --- /sys/src/9k/boot/embed.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/boot/embed.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,74 @@ +#include +#include +#include <../boot/boot.h> + +static char *paqfile; + +void +configembed(Method *m) +{ + if(*sys == '/' || *sys == '#'){ + /* + * if the user specifies the disk in the boot cmd or + * 'root is from' prompt, use it + */ + paqfile = sys; + } else if(m->arg){ + /* + * a default is supplied when the kernel is made + */ + paqfile = m->arg; + } +} + +int +connectembed(void) +{ + int i, p[2]; + Dir *dir; + char **arg, **argp; + + dir = dirstat("/boot/paqfs"); + if(dir == nil) + return -1; + free(dir); + + dir = dirstat(paqfile); + if(dir == nil || dir->mode & DMDIR) + return -1; + free(dir); + + print("paqfs..."); + if(bind("#c", "/dev", MREPL) < 0) + fatal("bind #c"); + if(bind("#p", "/proc", MREPL) < 0) + fatal("bind #p"); + if(pipe(p)<0) + fatal("pipe"); + switch(fork()){ + case -1: + fatal("fork"); + case 0: + arg = malloc((bargc+5)*sizeof(char*)); + argp = arg; + *argp++ = "/boot/paqfs"; + *argp++ = "-iv"; + *argp++ = paqfile; + for(i=1; i +#include +#include <../boot/boot.h> + +void +getpasswd(char *p, int len) +{ + char c; + int i, n, fd; + + fd = open("#c/consctl", OWRITE); + if(fd < 0) + fatal("can't open consctl; please reboot"); + write(fd, "rawon", 5); + Prompt: + print("password: "); + n = 0; + for(;;){ + do{ + i = read(0, &c, 1); + if(i < 0) + fatal("can't read cons; please reboot"); + }while(i == 0); + switch(c){ + case '\n': + p[n] = '\0'; + close(fd); + print("\n"); + return; + case '\b': + if(n > 0) + n--; + break; + case 'u' - 'a' + 1: /* cntrl-u */ + print("\n"); + goto Prompt; + default: + if(n < len - 1) + p[n++] = c; + break; + } + } +} diff -Nru /sys/src/9k/boot/local.c /sys/src/9k/boot/local.c --- /sys/src/9k/boot/local.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/boot/local.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,275 @@ +#include +#include +#include <../boot/boot.h> + +static char diskname[64]; +static char *disk; +static char **args; + +void +configlocal(Method *mp) +{ + char *p; + int n; + + if(*sys == '/' || *sys == '#'){ + /* + * if the user specifies the disk in the boot cmd or + * 'root is from' prompt, use it + */ + disk = sys; + } else if(strncmp(argv0, "dksc(0,", 7) == 0){ + /* + * on many mips arg0 of the boot command specifies the + * scsi logical unit number + */ + p = strchr(argv0, ','); + n = strtoul(p+1, 0, 10); + sprint(diskname, "#w%d/sd%dfs", n, n); + disk = diskname; + } else if(mp->arg){ + /* + * a default is supplied when the kernel is made + */ + disk = mp->arg; + } else if(*bootdisk){ + /* + * an environment variable from a pc's plan9.ini or + * from the mips nvram or generated by the kernel + * is the last resort. + */ + disk = bootdisk; + } + + /* if we've decided on one, pass it on to all programs */ + if(disk) + setenv("bootdisk", disk); + + USED(mp); +} + +int +connectlocalkfs(void) +{ + int i, pid, fd, p[2]; + char partition[64]; + char *dev; + char **arg, **argp; + Dir *d; + + if(stat("/boot/kfs", statbuf, sizeof statbuf) < 0) + return -1; + + dev = disk ? disk : bootdisk; + snprint(partition, sizeof partition, "%sfs", dev); + fd = open(partition, OREAD); + if(fd < 0){ + strcpy(partition, dev); + fd = open(partition, OREAD); + if(fd < 0) + return -1; + } + /* + * can't do this check -- might be some other server posing as kfs. + * + memset(buf, 0, sizeof buf); + pread(fd, buf, 512, 0); + close(fd); + if(memcmp(buf+256, "kfs wren device\n", 16) != 0){ + if(strstr(partition, "/fs")) + print("no kfs file system found on %s\n", partition); + return -1; + } + * + */ + d = dirfstat(fd); + close(fd); + if(d == nil) + return -1; + if(d->mode&DMDIR){ + free(d); + return -1; + } + free(d); + + print("kfs..."); + if(pipe(p)<0) + fatal("pipe"); + switch(pid = fork()){ + case -1: + fatal("fork"); + case 0: + arg = malloc((bargc+5)*sizeof(char*)); + argp = arg; + *argp++ = "kfs"; + *argp++ = "-f"; + *argp++ = partition; + *argp++ = "-s"; + for(i=1; i= 0){ + print("venti..."); + memset(buf, 0, sizeof buf); + pread(fd, buf, 512, 248*1024); + close(fd); + if(memcmp(buf, "venti config\n", 13) != 0){ + print("no venti config found on %s\n", f[0]); + return -1; + } + if(stat("/boot/venti", statbuf, sizeof statbuf) < 0){ + print("/boot/venti does not exist\n"); + return -1; + } + switch(nf){ + case 1: + f[1] = "tcp!127.1!17034"; + case 2: + f[2] = "tcp!127.1!8000"; + } + configloopback(); + run("/boot/venti", "-c", f[0], "-a", f[1], "-h", f[2], nil); + /* + * If the announce address is tcp!*!foo, then set + * $venti to tcp!127.1!foo instead, which is actually dialable. + */ + if((p = strstr(f[1], "!*!")) != 0){ + *p = 0; + snprint(buf, sizeof buf, "%s!127.1!%s", f[1], p+3); + f[1] = buf; + } + setenv("venti", f[1]); + }else{ + /* set up the network so we can talk to the venti server */ + /* this is such a crock. */ + configip(nf, f, 0); + setenv("venti", f[0]); + } + } + + /* start fossil */ + print("fossil(%s)...", partition); + run("/boot/fossil", "-f", partition, "-c", "srv -A fboot", "-c", "srv -p fscons", nil); + fd = open("#s/fboot", ORDWR); + if(fd < 0){ + print("open #s/fboot: %r\n"); + return -1; + } + remove("#s/fboot"); /* we'll repost as #s/boot */ + return fd; +} + +int +connectlocal(void) +{ + int fd; + + if(bind("#c", "/dev", MREPL) < 0) + fatal("bind #c"); + if(bind("#p", "/proc", MREPL) < 0) + fatal("bind #p"); + bind("#S", "/dev", MAFTER); + bind("#k", "/dev", MAFTER); + bind("#æ", "/dev", MAFTER); + + if((fd = connectlocalfossil()) < 0) + if((fd = connectlocalkfs()) < 0) + return -1; + return fd; +} diff -Nru /sys/src/9k/boot/localpaq.c /sys/src/9k/boot/localpaq.c --- /sys/src/9k/boot/localpaq.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/boot/localpaq.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,194 @@ +#include +#include +#include <../boot/boot.h> + +/* + * this is a paqfs version of local. + */ + +static char diskname[64]; +static char *disk; +static char **args; + +void +configlocalpaq(Method *mp) +{ + +//print("configlocalpaq: sys=%s\n", sys); +argv0 = ""; + if(*sys == '/' || *sys == '#'){ + /* + * if the user specifies the disk in the boot cmd or + * 'root is from' prompt, use it + */ + disk = sys; + } else if(mp->arg){ + /* + * a default is supplied when the kernel is made + */ + disk = mp->arg; + } else if(*bootdisk){ + /* + * an environment variable from a pc's plan9.ini or + * from the nvram or generated by the kernel + * is the last resort. + */ + disk = bootdisk; + } + + /* if we've decided on one, pass it on to all programs */ + if(disk) + setenv("bootdisk", disk); + + USED(mp); +} + +static char partition[64]; + +static int +forkexec(char **execv) +{ + int pid, i, p[2]; + + print("%s...", execv[1]); + if(pipe(p) < 0) + fatal("pipe"); + switch(pid = fork()){ + case -1: + fatal("fork"); + case 0: + dup(p[1], 0); + dup(p[1], 1); + close(p[0]); + close(p[1]); + exec(execv[0], execv+1); + fatal("can't exec %s", execv[1]); + default: + break; + } + for(;;){ + if((i = waitpid()) == -1) + fatal("waitpid for %s failed", execv[1]); + if(i == pid) + break; + } + close(p[1]); + return p[0]; +} + +#define RAMMNT "/tmp" +#define PAQF "/paqfs" + +static void +rampart(int partfd) +{ + int mfd, pfd, n, i; + char *buf; + char *ramv[] = { "/boot/ramfs", "ramfs", "-i", "-l 64m", 0 }; + enum { + Nbuf = 1024*1024, + Ndot = (1024*1024)/Nbuf, + }; + + if(stat(ramv[0], statbuf, sizeof statbuf) < 0) + return; + + mfd = forkexec(ramv); + if(mfd < 0) + return; + if(mount(mfd, -1, RAMMNT, MREPL|MCREATE, "") < 0){ + print("rampart mount fail: %r\n"); + close(mfd); + return; + } + /* mfd is closed by mount */ + pfd = create(RAMMNT PAQF, ORDWR, 0666|OEXCL); + if(pfd < 0){ + print("rampart create fail: %r\n"); + unmount(0, RAMMNT); + return; + } + buf = malloc(Nbuf); + if(buf == nil){ + print("rampart malloc fail: %r\n"); + close(pfd); + return; + } + for(i=1;;i++){ + if ((i % Ndot) == 0) + print("."); + n = read(partfd, buf, Nbuf); + if (n <= 0) + break; + write(pfd, buf, n); + } + free(buf); + close(pfd); + + snprint(partition, sizeof partition, RAMMNT PAQF); +} + +static int +connectpaqfs(void) +{ + int fd, i; + char *dev; + char **arg, **argp; + + if(stat("/boot/paqfs", statbuf, sizeof statbuf) < 0) + return -1; + + dev = disk ? disk : bootdisk; + snprint(partition, sizeof partition, "%sctl", dev); + fd = open(partition, OWRITE); + if(fd >= 0){ + fprint(fd, "dma on\n"); + fprint(fd, "rwm on\n"); + close(fd); + } + + snprint(partition, sizeof partition, "%sroot", dev); + + fd = open(partition, OREAD); + if(fd < 0){ + strcpy(partition, dev); + fd = open(partition, OREAD); + if(fd < 0) + return -1; + } + rampart(fd); + close(fd); + + arg = malloc((bargc+8)*sizeof(char*)); + argp = arg; + *argp++ = "/boot/paqfs"; + *argp++ = "paqfs"; + *argp++ = "-v"; + *argp++ = "-i"; + *argp++ = "-q"; + *argp++ = "-s"; + *argp++ = partition; + for(i=1; i +#include +#include +#include +#include "../boot/boot.h" + +static Fcall hdr; + +static void +rpc(int fd, int type) +{ + int n, l; + char buf[128], *p; + + hdr.type = type; + hdr.tag = NOTAG; + n = convS2M(&hdr, buf); + if(write(fd, buf, n) != n) + fatal("write rpc"); + + print("..."); + p = buf; + l = 0; + while(l < 3) { + n = read(fd, p, 3); + if(n <= 0) + fatal("read rpc"); + if(n == 2 && l == 0 && buf[0] == 'O' && buf[1] == 'K') + continue; + p += n; + l += n; + } + if(convM2S(buf, &hdr, n) == 0){ + print("%ux %ux %ux\n", buf[0], buf[1], buf[2]); + fatal("rpc format"); + } + if(hdr.tag != NOTAG) + fatal("rpc tag not NOTAG"); + if(hdr.type == Rerror){ + print("error %s;", hdr.ename); + fatal("remote error"); + } + if(hdr.type != type+1) + fatal("not reply"); +} + +void +nop(int fd) +{ + print("nop"); + rpc(fd, Tnop); +} diff -Nru /sys/src/9k/boot/paq.c /sys/src/9k/boot/paq.c --- /sys/src/9k/boot/paq.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/boot/paq.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,67 @@ +#include +#include +#include <../boot/boot.h> + +char *fparts[] = +{ + "add bootldr 0x0000000 0x0040000", + "add params 0x0040000 0x0080000", + "add kernel 0x0080000 0x0140000", + "add user 0x0140000 0x0200000", + "add ramdisk 0x0200000 0x0600000", +}; + +void +configpaq(Method*) +{ + int fd; + int i; + + if(bind("#F", "/dev", MAFTER) < 0) + fatal("bind #c"); + if(bind("#p", "/proc", MREPL) < 0) + fatal("bind #p"); + fd = open("/dev/flash/flashctl", OWRITE); + if(fd < 0) + fatal("opening flashctl"); + for(i = 0; i < nelem(fparts); i++) + if(fprint(fd, fparts[i]) < 0) + fatal(fparts[i]); + close(fd); +} + +int +connectpaq(void) +{ + int p[2]; + char **arg, **argp; + + print("paq..."); + if(pipe(p)<0) + fatal("pipe"); + switch(fork()){ + case -1: + fatal("fork"); + case 0: + arg = malloc(10*sizeof(char*)); + argp = arg; + *argp++ = "paqfs"; + *argp++ = "-v"; + *argp++ = "-i"; + *argp++ = "/dev/flash/ramdisk"; + *argp = 0; + + dup(p[0], 0); + dup(p[1], 1); + close(p[0]); + close(p[1]); + exec("/boot/paqfs", arg); + fatal("can't exec paqfs"); + default: + break; + } + waitpid(); + + close(p[1]); + return p[0]; +} diff -Nru /sys/src/9k/boot/parts.c /sys/src/9k/boot/parts.c --- /sys/src/9k/boot/parts.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/boot/parts.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,607 @@ +/* + * read disk partition tables, intended for early use on systems + * that don't use 9load. borrowed from 9load. + */ + +#include +#include +#include +#include +#include +#include "../boot/boot.h" + +typedef struct Fs Fs; +#include "/sys/src/boot/pc/dosfs.h" + +#define GSHORT(p) (((p)[1]<<8)|(p)[0]) +#define GLONG(p) ((GSHORT((p)+2)<<16)|GSHORT(p)) + +#define trace 0 + +enum { + parttrace = 0, + + Npart = 64, + SDnpart = Npart, + + Maxsec = 2048, + Cdsec = 2048, + Normsec = 512, /* disks */ + + NAMELEN = 256, /* hack */ +}; + +typedef struct SDpart SDpart; +typedef struct SDunit SDunit; + +typedef struct SDpart { + uvlong start; + uvlong end; + char name[NAMELEN]; + int valid; +} SDpart; + +typedef struct SDunit { + int ctl; /* fds */ + int data; + + char name[NAMELEN]; + + uvlong sectors; + ulong secsize; + SDpart* part; + int npart; /* of valid partitions */ +} SDunit; + +static uchar *mbrbuf, *partbuf; + +static void +sdaddpart(SDunit* unit, char* name, uvlong start, uvlong end) +{ + SDpart *pp; + int i, partno; + + if(parttrace) + print("add %d %s %s %lld %lld\n", unit->npart, unit->name, name, start, end); + /* + * Check name not already used + * and look for a free slot. + */ + if(unit->part != nil){ + partno = -1; + for(i = 0; i < SDnpart; i++){ + pp = &unit->part[i]; + if(!pp->valid){ + if(partno == -1) + partno = i; + break; + } + if(strcmp(name, pp->name) == 0){ + if(pp->start == start && pp->end == end){ + if(parttrace) + print("already present\n"); + return; + } + } + } + }else{ + if((unit->part = malloc(sizeof(SDpart)*SDnpart)) == nil){ + if(parttrace) + print("malloc failed\n"); + return; + } + partno = 0; + } + + /* + * Check there is a free slot and size and extent are valid. + */ + if(partno == -1 || start > end || end > unit->sectors){ + print("cannot add %s!%s [%llud,%llud) to disk [0,%llud): %s\n", + unit->name, name, start, end, unit->sectors, + partno==-1 ? "no free partitions" : "partition boundaries out of range"); + return; + } + pp = &unit->part[partno]; + pp->start = start; + pp->end = end; + strncpy(pp->name, name, NAMELEN); + pp->valid = 1; + unit->npart++; + + /* update devsd's in-memory partition table */ + if (fprint(unit->ctl, "part %s %lld %lld\n", name, start, end) < 0) + fprint(2, "can't update %s's devsd partition table for %s: %r\n", + unit->name, name); + dprint("part %s %lld %lld\n", name, start, end); +} + +static long +sdread(SDunit *unit, SDpart *pp, void* va, long len, vlong off) +{ + long l, secsize; + uvlong bno, nb; + + /* + * Check the request is within partition bounds. + */ + secsize = unit->secsize; + if (secsize == 0) + sysfatal("sdread: zero sector size"); + bno = off/secsize + pp->start; + nb = (off+len+secsize-1)/secsize + pp->start - bno; + if(bno+nb > pp->end) + nb = pp->end - bno; + if(bno >= pp->end || nb == 0) + return 0; + + seek(unit->data, bno * secsize, 0); + assert(va); /* "sdread" */ + l = read(unit->data, va, len); + if (l < 0) + return 0; + return l; +} + +static int +sdreadblk(SDunit *unit, SDpart *part, void *a, vlong off, int mbr) +{ + uchar *b; + + assert(a); /* sdreadblk */ + if(sdread(unit, part, a, unit->secsize, off) != unit->secsize){ + if(trace) + print("%s: read %lud at %lld failed\n", unit->name, + unit->secsize, (vlong)part->start*unit->secsize+off); + return -1; + } + b = a; + if(mbr && (b[0x1FE] != 0x55 || b[0x1FF] != 0xAA)){ + if(trace) + print("%s: bad magic %.2ux %.2ux at %lld\n", + unit->name, b[0x1FE], b[0x1FF], + (vlong)part->start*unit->secsize+off); + return -1; + } + return 0; +} + +/* + * read partition table. The partition table is just ascii strings. + */ +#define MAGIC "plan9 partitions" +static void +oldp9part(SDunit *unit) +{ + SDpart *pp; + char *field[3], *line[Npart+1]; + ulong n; + uvlong start, end; + int i; + + /* + * We have some partitions already. + */ + pp = &unit->part[unit->npart]; + + /* + * We prefer partition tables on the second to last sector, + * but some old disks use the last sector instead. + */ + strcpy(pp->name, "partition"); + pp->start = unit->sectors - 2; + pp->end = unit->sectors - 1; + + dprint("oldp9part %s\n", unit->name); + if(sdreadblk(unit, pp, partbuf, 0, 0) < 0) + return; + + if(strncmp((char*)partbuf, MAGIC, sizeof(MAGIC)-1) != 0) { + /* not found on 2nd last sector; look on last sector */ + pp->start++; + pp->end++; + if(sdreadblk(unit, pp, partbuf, 0, 0) < 0) + return; + if(strncmp((char*)partbuf, MAGIC, sizeof(MAGIC)-1) != 0) + return; + print("%s: using old plan9 partition table on last sector\n", unit->name); + }else + print("%s: using old plan9 partition table on 2nd-to-last sector\n", unit->name); + + /* we found a partition table, so add a partition partition */ + unit->npart++; + partbuf[unit->secsize-1] = '\0'; + + /* + * parse partition table + */ + n = gettokens((char*)partbuf, line, Npart+1, "\n"); + if(n && strncmp(line[0], MAGIC, sizeof(MAGIC)-1) == 0){ + for(i = 1; i < n && unit->npart < SDnpart; i++){ + if(gettokens(line[i], field, 3, " ") != 3) + break; + start = strtoull(field[1], 0, 0); + end = strtoull(field[2], 0, 0); + if(start >= end || end > unit->sectors) + break; + sdaddpart(unit, field[0], start, end); + } + } +} + +static SDpart* +sdfindpart(SDunit *unit, char *name) +{ + int i; + + if(parttrace) + print("findpart %d %s %s: ", unit->npart, unit->name, name); + for(i=0; inpart; i++) { + if(parttrace) + print("%s...", unit->part[i].name); + if(strcmp(unit->part[i].name, name) == 0){ + if(parttrace) + print("\n"); + return &unit->part[i]; + } + } + if(parttrace) + print("not found\n"); + return nil; +} + +/* + * look for a plan 9 partition table on drive `unit' in the second + * sector (sector 1) of partition `name'. + * if found, add the partitions defined in the table. + */ +static void +p9part(SDunit *unit, char *name) +{ + SDpart *p; + char *field[4], *line[Npart+1]; + uvlong start, end; + int i, n; + + dprint("p9part %s %s\n", unit->name, name); + p = sdfindpart(unit, name); + if(p == nil) + return; + + if(sdreadblk(unit, p, partbuf, unit->secsize, 0) < 0) + return; + partbuf[unit->secsize-1] = '\0'; + + if(strncmp((char*)partbuf, "part ", 5) != 0) + return; + + n = gettokens((char*)partbuf, line, Npart+1, "\n"); + if(n == 0) + return; + for(i = 0; i < n && unit->npart < SDnpart; i++){ + if(strncmp(line[i], "part ", 5) != 0) + break; + if(gettokens(line[i], field, 4, " ") != 4) + break; + start = strtoull(field[2], 0, 0); + end = strtoull(field[3], 0, 0); + if(start >= end || end > unit->sectors) + break; + sdaddpart(unit, field[1], p->start+start, p->start+end); + } +} + +static int +isdos(int t) +{ + return t==FAT12 || t==FAT16 || t==FATHUGE || t==FAT32 || t==FAT32X; +} + +static int +isextend(int t) +{ + return t==EXTEND || t==EXTHUGE || t==LEXTEND; +} + +/* + * Fetch the first dos and all plan9 partitions out of the MBR partition table. + * We return -1 if we did not find a plan9 partition. + */ +static int +mbrpart(SDunit *unit) +{ + Dospart *dp; + uvlong taboffset, start, end; + uvlong firstxpart, nxtxpart; + int havedos, i, nplan9; + char name[10]; + + taboffset = 0; + dp = (Dospart*)&mbrbuf[0x1BE]; + { + /* get the MBR (allowing for DMDDO) */ + if(sdreadblk(unit, &unit->part[0], mbrbuf, + (vlong)taboffset * unit->secsize, 1) < 0) + return -1; + for(i=0; i<4; i++) + if(dp[i].type == DMDDO) { + if(trace) + print("DMDDO partition found\n"); + taboffset = 63; + if(sdreadblk(unit, &unit->part[0], mbrbuf, + (vlong)taboffset * unit->secsize, 1) < 0) + return -1; + i = -1; /* start over */ + } + } + + /* + * Read the partitions, first from the MBR and then + * from successive extended partition tables. + */ + nplan9 = 0; + havedos = 0; + firstxpart = 0; + for(;;) { + if(sdreadblk(unit, &unit->part[0], mbrbuf, + (vlong)taboffset * unit->secsize, 1) < 0) + return -1; + if(trace) { + if(firstxpart) + print("%s ext %llud ", unit->name, taboffset); + else + print("%s mbr ", unit->name); + } + nxtxpart = 0; + for(i=0; i<4; i++) { + if(trace) + print("dp %d...", dp[i].type); + start = taboffset+GLONG(dp[i].start); + end = start+GLONG(dp[i].len); + + if(dp[i].type == PLAN9) { + if(nplan9 == 0) + strcpy(name, "plan9"); + else + sprint(name, "plan9.%d", nplan9); + sdaddpart(unit, name, start, end); + p9part(unit, name); + nplan9++; + } + + /* + * We used to take the active partition (and then the first + * when none are active). We have to take the first here, + * so that the partition we call ``dos'' agrees with the + * partition disk/fdisk calls ``dos''. + */ + if(havedos==0 && isdos(dp[i].type)){ + havedos = 1; + sdaddpart(unit, "dos", start, end); + } + + /* nxtxpart is relative to firstxpart (or 0), not taboffset */ + if(isextend(dp[i].type)){ + nxtxpart = start-taboffset+firstxpart; + if(trace) + print("link %llud...", nxtxpart); + } + } + if(trace) + print("\n"); + + if(!nxtxpart) + break; + if(!firstxpart) + firstxpart = nxtxpart; + taboffset = nxtxpart; + } + return nplan9 ? 0 : -1; +} + +/* + * To facilitate booting from CDs, we create a partition for + * the boot floppy image embedded in a bootable CD. + */ +static int +part9660(SDunit *unit) +{ + uchar buf[Maxsec]; + ulong a, n; + uchar *p; + + if(unit->secsize != Cdsec) + return -1; + + if(sdread(unit, &unit->part[0], buf, Cdsec, 17*Cdsec) < 0) + return -1; + + if(buf[0] || strcmp((char*)buf+1, "CD001\x01EL TORITO SPECIFICATION") != 0) + return -1; + + + p = buf+0x47; + a = p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24); + + if(sdread(unit, &unit->part[0], buf, Cdsec, a*Cdsec) < 0) + return -1; + + if(memcmp(buf, "\x01\x00\x00\x00", 4) != 0 + || memcmp(buf+30, "\x55\xAA", 2) != 0 + || buf[0x20] != 0x88) + return -1; + + p = buf+0x28; + a = p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24); + + switch(buf[0x21]){ + case 0x01: + n = 1200*1024; + break; + case 0x02: + n = 1440*1024; + break; + case 0x03: + n = 2880*1024; + break; + default: + return -1; + } + n /= Cdsec; + + print("found partition %s!cdboot; %lud+%lud\n", unit->name, a, n); + sdaddpart(unit, "cdboot", a, a+n); + return 0; +} + +enum { + NEW = 1<<0, + OLD = 1<<1 +}; + +/* + * read unit->data to look for partition tables. + * if found, stash partitions in environment and write them to ctl too. + */ +static void +partition(SDunit *unit) +{ + int type; + char *p; + + if(unit->part == 0) + return; + + if(part9660(unit) == 0) + return; + + p = getenv("partition"); + if(p != nil && strncmp(p, "new", 3) == 0) + type = NEW; + else if(p != nil && strncmp(p, "old", 3) == 0) + type = OLD; + else + type = NEW|OLD; + + if(mbrbuf == nil) { + mbrbuf = malloc(Maxsec); + partbuf = malloc(Maxsec); + if(mbrbuf==nil || partbuf==nil) { + free(mbrbuf); + free(partbuf); + partbuf = mbrbuf = nil; + return; + } + } + + /* + * there might be no mbr (e.g. on a very large device), so look for + * a bare plan 9 partition table if mbrpart fails. + */ + if((type & NEW) && mbrpart(unit) >= 0){ + /* nothing to do */ + } + else if (type & NEW) + p9part(unit, "data"); + else if(type & OLD) + oldp9part(unit); +} + +static void +rdgeom(SDunit *unit) +{ + char *line; + char *flds[5]; + Biobuf bb; + Biobuf *bp; + static char geom[] = "geometry "; + + bp = &bb; + seek(unit->ctl, 0, 0); + Binit(bp, unit->ctl, OREAD); + while((line = Brdline(bp, '\n')) != nil){ + line[Blinelen(bp) - 1] = '\0'; + if (strncmp(line, geom, sizeof geom - 1) == 0) + break; + } + if (line != nil && tokenize(line, flds, nelem(flds)) >= 3) { + unit->sectors = atoll(flds[1]); + unit->secsize = atoll(flds[2]); + } + Bterm(bp); + seek(unit->ctl, 0, 0); +} + +static void +setpartitions(char *name, int ctl, int data) +{ + SDunit sdunit; + SDunit *unit; + SDpart *part0; + + unit = &sdunit; + memset(unit, 0, sizeof *unit); + unit->ctl = ctl; + unit->data = data; + + unit->secsize = Normsec; /* default: won't work for CDs */ + unit->sectors = ~0ull; + rdgeom(unit); + strncpy(unit->name, name, sizeof unit->name); + unit->part = mallocz(sizeof(SDpart) * SDnpart, 1); + + part0 = &unit->part[0]; + part0->end = unit->sectors - 1; + strcpy(part0->name, "data"); + part0->valid = 1; + unit->npart++; + + mbrbuf = malloc(Maxsec); + partbuf = malloc(Maxsec); + partition(unit); + free(unit->part); +} + +/* + * read disk partition tables so that readnvram via factotum + * can see them. + */ +int +readparts(void) +{ + int i, n, ctl, data, fd; + char *name, *ctlname, *dataname; + Dir *dir; + + fd = open("/dev", OREAD); + if(fd < 0) + return -1; + n = dirreadall(fd, &dir); + close(fd); + + for(i = 0; i < n; i++) { + name = dir[i].name; + if (strncmp(name, "sd", 2) != 0) + continue; + + ctlname = smprint("/dev/%s/ctl", name); + dataname = smprint("/dev/%s/data", name); + if (ctlname == nil || dataname == nil) { + free(ctlname); + free(dataname); + continue; + } + + ctl = open(ctlname, ORDWR); + data = open(dataname, OREAD); + free(ctlname); + free(dataname); + + if (ctl >= 0 && data >= 0) + setpartitions(dataname, ctl, data); + close(ctl); + close(data); + } + free(dir); + return 0; +} diff -Nru /sys/src/9k/boot/printstub.c /sys/src/9k/boot/printstub.c --- /sys/src/9k/boot/printstub.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/boot/printstub.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,22 @@ +#include +#include + +static Lock fmtl; + +void +_fmtlock(void) +{ + lock(&fmtl); +} + +void +_fmtunlock(void) +{ + unlock(&fmtl); +} + +int +_efgfmt(Fmt*) +{ + return -1; +} diff -Nru /sys/src/9k/boot/sac.c /sys/src/9k/boot/sac.c --- /sys/src/9k/boot/sac.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/boot/sac.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,50 @@ +#include +#include +#include <../boot/boot.h> + +/* + * HACK - take over from boot since file system is not + * available on a pipe + */ + +void +configsac(Method *mp) +{ + int fd; + char cmd[64]; + + USED(mp); + + /* + * create the name space, mount the root fs + */ + if(bind("/", "/", MREPL) < 0) + fatal("bind /"); + if(bind("#C", "/", MAFTER) < 0) + fatal("bind /"); + + /* fixed sysname - enables correct namespace file */ + fd = open("#c/sysname", OWRITE); + if(fd < 0) + fatal("open sysname"); + write(fd, "brick", 5); + close(fd); + + fd = open("#c/hostowner", OWRITE); + if(fd < 0) + fatal("open sysname"); + write(fd, "brick", 5); + close(fd); + + sprint(cmd, "/%s/init", cputype); + print("starting %s\n", cmd); + execl(cmd, "init", "-c", 0); + fatal(cmd); +} + +int +connectsac(void) +{ + /* does not get here */ + return -1; +} diff -Nru /sys/src/9k/boot/settime.c /sys/src/9k/boot/settime.c --- /sys/src/9k/boot/settime.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/boot/settime.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,150 @@ +#include +#include +#include +#include +#include "../boot/boot.h" + +static long lusertime(char*); + +char *timeserver = "#s/boot"; + +void +settime(int islocal, int afd, char *rp) +{ + int n, f; + Dir dir[2]; + char timebuf[64]; + static int timeset; + + if(timeset) + return; + print("time..."); + if(islocal){ + /* + * set the time from the real time clock + */ + f = open("#r/rtc", ORDWR); + if(f >= 0){ + if((n = read(f, timebuf, sizeof(timebuf)-1)) > 0){ + timebuf[n] = '\0'; + timeset = 1; + } + close(f); + }else do{ + strcpy(timebuf, "yymmddhhmm[ss]"); + outin("\ndate/time ", timebuf, sizeof(timebuf)); + }while((timeset=lusertime(timebuf)) <= 0); + } + if(timeset == 0){ + /* + * set the time from the access time of the root + */ + f = open(timeserver, ORDWR); + if(f < 0) + return; + if(mount(f, afd, "/tmp", MREPL, rp) < 0){ + warning("settime mount"); + close(f); + return; + } + close(f); + if(stat("/tmp", statbuf, sizeof statbuf) < 0) + fatal("stat"); + convM2D(statbuf, sizeof statbuf, &dir[0], (char*)&dir[1]); + sprint(timebuf, "%ld", dir[0].atime); + unmount(0, "/tmp"); + } + + f = open("#c/time", OWRITE); + if(write(f, timebuf, strlen(timebuf)) < 0) + warning("can't set #c/time"); + close(f); + print("\n"); +} + +#define SEC2MIN 60L +#define SEC2HOUR (60L*SEC2MIN) +#define SEC2DAY (24L*SEC2HOUR) + +int +g2(char **pp) +{ + int v; + + v = 10*((*pp)[0]-'0') + (*pp)[1]-'0'; + *pp += 2; + return v; +} + +/* + * days per month plus days/year + */ +static int dmsize[] = +{ + 365, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 +}; +static int ldmsize[] = +{ + 366, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 +}; + +/* + * return the days/month for the given year + */ +static int * +yrsize(int y) +{ + + if((y%4) == 0 && ((y%100) != 0 || (y%400) == 0)) + return ldmsize; + else + return dmsize; +} + +/* + * compute seconds since Jan 1 1970 + */ +static long +lusertime(char *argbuf) +{ + char *buf; + ulong secs; + int i, y, m; + int *d2m; + + buf = argbuf; + i = strlen(buf); + if(i != 10 && i != 12) + return -1; + secs = 0; + y = g2(&buf); + m = g2(&buf); + if(y < 70) + y += 2000; + else + y += 1900; + + /* + * seconds per year + */ + for(i = 1970; i < y; i++){ + d2m = yrsize(i); + secs += d2m[0] * SEC2DAY; + } + + /* + * seconds per month + */ + d2m = yrsize(y); + for(i = 1; i < m; i++) + secs += d2m[i] * SEC2DAY; + + secs += (g2(&buf)-1) * SEC2DAY; + secs += g2(&buf) * SEC2HOUR; + secs += g2(&buf) * SEC2MIN; + if(*buf) + secs += g2(&buf); + + sprint(argbuf, "%ld", secs); + return secs; +} diff -Nru /sys/src/9k/ip/arp.c /sys/src/9k/ip/arp.c --- /sys/src/9k/ip/arp.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/arp.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,685 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" +#include "ipv6.h" + +/* + * address resolution tables + */ + +enum +{ + NHASH = (1<<6), + NCACHE = 256, + + AOK = 1, + AWAIT = 2, +}; + +char *arpstate[] = +{ + "UNUSED", + "OK", + "WAIT", +}; + +/* + * one per Fs + */ +struct Arp +{ + QLock; + Fs *f; + Arpent *hash[NHASH]; + Arpent cache[NCACHE]; + Arpent *rxmt; + Proc *rxmitp; /* neib sol re-transmit proc */ + Rendez rxmtq; + Block *dropf, *dropl; +}; + +char *Ebadarp = "bad arp"; + +#define haship(s) ((s)[IPaddrlen-1]%NHASH) + +extern int ReTransTimer = RETRANS_TIMER; + +static void rxmitproc(void *v); + +void +arpinit(Fs *f) +{ + f->arp = smalloc(sizeof(Arp)); + f->arp->f = f; + f->arp->rxmt = nil; + f->arp->dropf = f->arp->dropl = nil; + kproc("rxmitproc", rxmitproc, f->arp); +} + +/* + * create a new arp entry for an ip address. + */ +static Arpent* +newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt) +{ + uint t; + Block *next, *xp; + Arpent *a, *e, *f, **l; + int empty; + + /* find oldest entry */ + e = &arp->cache[NCACHE]; + a = arp->cache; + t = a->utime; + for(f = a; f < e; f++){ + if(f->utime < t){ + t = f->utime; + a = f; + } + } + + /* dump waiting packets */ + xp = a->hold; + a->hold = nil; + + if(isv4(a->ip)){ + while(xp){ + next = xp->list; + freeblist(xp); + xp = next; + } + } + else { /* queue icmp unreachable for rxmitproc later on, w/o arp lock */ + if(xp){ + if(arp->dropl == nil) + arp->dropf = xp; + else + arp->dropl->list = xp; + + for(next = xp->list; next; next = next->list) + xp = next; + arp->dropl = xp; + wakeup(&arp->rxmtq); + } + } + + /* take out of current chain */ + l = &arp->hash[haship(a->ip)]; + for(f = *l; f; f = f->hash){ + if(f == a){ + *l = a->hash; + break; + } + l = &f->hash; + } + + /* insert into new chain */ + l = &arp->hash[haship(ip)]; + a->hash = *l; + *l = a; + + memmove(a->ip, ip, sizeof(a->ip)); + a->utime = NOW; + a->ctime = 0; + a->type = ifc->medium; + + a->rtime = NOW + ReTransTimer; + a->rxtsrem = MAX_MULTICAST_SOLICIT; + a->ifc = ifc; + a->ifcid = ifc->ifcid; + + /* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */ + if(!ipismulticast(a->ip) && addrxt){ + l = &arp->rxmt; + empty = (*l==nil); + + for(f = *l; f; f = f->nextrxt){ + if(f == a){ + *l = a->nextrxt; + break; + } + l = &f->nextrxt; + } + for(f = *l; f; f = f->nextrxt){ + l = &f->nextrxt; + } + *l = a; + if(empty) + wakeup(&arp->rxmtq); + } + + a->nextrxt = nil; + + return a; +} + +/* called with arp qlocked */ + +void +cleanarpent(Arp *arp, Arpent *a) +{ + Arpent *f, **l; + + a->utime = 0; + a->ctime = 0; + a->type = 0; + a->state = 0; + + /* take out of current chain */ + l = &arp->hash[haship(a->ip)]; + for(f = *l; f; f = f->hash){ + if(f == a){ + *l = a->hash; + break; + } + l = &f->hash; + } + + /* take out of re-transmit chain */ + l = &arp->rxmt; + for(f = *l; f; f = f->nextrxt){ + if(f == a){ + *l = a->nextrxt; + break; + } + l = &f->nextrxt; + } + a->nextrxt = nil; + a->hash = nil; + a->hold = nil; + a->last = nil; + a->ifc = nil; +} + +/* + * fill in the media address if we have it. Otherwise return an + * Arpent that represents the state of the address resolution FSM + * for ip. Add the packet to be sent onto the list of packets + * waiting for ip->mac to be resolved. + */ +Arpent* +arpget(Arp *arp, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *mac) +{ + int hash; + Arpent *a; + Medium *type; + uchar v6ip[IPaddrlen]; + + if(version == V4){ + v4tov6(v6ip, ip); + ip = v6ip; + } + + qlock(arp); + hash = haship(ip); + type = ifc->medium; + for(a = arp->hash[hash]; a; a = a->hash){ + if(memcmp(ip, a->ip, sizeof(a->ip)) == 0) + if(type == a->type) + break; + } + + if(a == nil){ + a = newarp6(arp, ip, ifc, (version != V4)); + a->state = AWAIT; + } + a->utime = NOW; + if(a->state == AWAIT){ + if(bp != nil){ + if(a->hold) + a->last->list = bp; + else + a->hold = bp; + a->last = bp; + bp->list = nil; + } + return a; /* return with arp qlocked */ + } + + memmove(mac, a->mac, a->type->maclen); + + /* remove old entries */ + if(NOW - a->ctime > 15*60*1000) + cleanarpent(arp, a); + + qunlock(arp); + return nil; +} + +/* + * called with arp locked + */ +void +arprelease(Arp *arp, Arpent*) +{ + qunlock(arp); +} + +/* + * Copy out the mac address from the Arpent. Return the + * block waiting to get sent to this mac address. + * + * called with arp locked + */ +Block* +arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac) +{ + Block *bp; + Arpent *f, **l; + + if(!isv4(a->ip)){ + l = &arp->rxmt; + for(f = *l; f; f = f->nextrxt){ + if(f == a){ + *l = a->nextrxt; + break; + } + l = &f->nextrxt; + } + } + + memmove(a->mac, mac, type->maclen); + a->type = type; + a->state = AOK; + a->utime = NOW; + bp = a->hold; + a->hold = nil; + qunlock(arp); + + return bp; +} + +void +arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh) +{ + Arp *arp; + Route *r; + Arpent *a, *f, **l; + Ipifc *ifc; + Medium *type; + Block *bp, *next; + uchar v6ip[IPaddrlen]; + + arp = fs->arp; + + if(n != 6){ +// print("arp: len = %d\n", n); + return; + } + + switch(version){ + case V4: + r = v4lookup(fs, ip, nil); + v4tov6(v6ip, ip); + ip = v6ip; + break; + case V6: + r = v6lookup(fs, ip, nil); + break; + default: + panic("arpenter: version %d", version); + return; /* to supress warnings */ + } + + if(r == nil){ +// print("arp: no route for entry\n"); + return; + } + + ifc = r->ifc; + type = ifc->medium; + + qlock(arp); + for(a = arp->hash[haship(ip)]; a; a = a->hash){ + if(a->type != type || (a->state != AWAIT && a->state != AOK)) + continue; + + if(ipcmp(a->ip, ip) == 0){ + a->state = AOK; + memmove(a->mac, mac, type->maclen); + + if(version == V6){ + /* take out of re-transmit chain */ + l = &arp->rxmt; + for(f = *l; f; f = f->nextrxt){ + if(f == a){ + *l = a->nextrxt; + break; + } + l = &f->nextrxt; + } + } + + a->ifc = ifc; + a->ifcid = ifc->ifcid; + bp = a->hold; + a->hold = nil; + if(version == V4) + ip += IPv4off; + a->utime = NOW; + a->ctime = a->utime; + qunlock(arp); + + while(bp){ + next = bp->list; + if(ifc != nil){ + if(waserror()){ + runlock(ifc); + nexterror(); + } + rlock(ifc); + if(ifc->medium != nil) + ifc->medium->bwrite(ifc, bp, version, ip); + else + freeb(bp); + runlock(ifc); + poperror(); + } else + freeb(bp); + bp = next; + } + return; + } + } + + if(refresh == 0){ + a = newarp6(arp, ip, ifc, 0); + a->state = AOK; + a->type = type; + a->ctime = NOW; + memmove(a->mac, mac, type->maclen); + } + + qunlock(arp); +} + +int +arpwrite(Fs *fs, char *s, int len) +{ + int n; + Route *r; + Arp *arp; + Block *bp; + Arpent *a, *fl, **l; + Medium *type; + char *f[4], buf[256]; + uchar ip[IPaddrlen], mac[MAClen]; + + arp = fs->arp; + + if(len == 0) + error(Ebadarp); + if(len >= sizeof(buf)) + len = sizeof(buf)-1; + strncpy(buf, s, len); + buf[len] = 0; + if(len > 0 && buf[len-1] == '\n') + buf[len-1] = 0; + + n = getfields(buf, f, 4, 1, " "); + if(strcmp(f[0], "flush") == 0){ + qlock(arp); + for(a = arp->cache; a < &arp->cache[NCACHE]; a++){ + memset(a->ip, 0, sizeof(a->ip)); + memset(a->mac, 0, sizeof(a->mac)); + a->hash = nil; + a->state = 0; + a->utime = 0; + while(a->hold != nil){ + bp = a->hold->list; + freeblist(a->hold); + a->hold = bp; + } + } + memset(arp->hash, 0, sizeof(arp->hash)); + /* clear all pkts on these lists (rxmt, dropf/l) */ + arp->rxmt = nil; + arp->dropf = nil; + arp->dropl = nil; + qunlock(arp); + } else if(strcmp(f[0], "add") == 0){ + switch(n){ + default: + error(Ebadarg); + case 3: + if (parseip(ip, f[1]) == -1) + error(Ebadip); + if(isv4(ip)) + r = v4lookup(fs, ip+IPv4off, nil); + else + r = v6lookup(fs, ip, nil); + if(r == nil) + error("Destination unreachable"); + type = r->ifc->medium; + n = parsemac(mac, f[2], type->maclen); + break; + case 4: + type = ipfindmedium(f[1]); + if(type == nil) + error(Ebadarp); + if (parseip(ip, f[2]) == -1) + error(Ebadip); + n = parsemac(mac, f[3], type->maclen); + break; + } + + if(type->ares == nil) + error(Ebadarp); + + type->ares(fs, V6, ip, mac, n, 0); + } else if(strcmp(f[0], "del") == 0){ + if(n != 2) + error(Ebadarg); + + if (parseip(ip, f[1]) == -1) + error(Ebadip); + qlock(arp); + + l = &arp->hash[haship(ip)]; + for(a = *l; a; a = a->hash){ + if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){ + *l = a->hash; + break; + } + l = &a->hash; + } + + if(a){ + /* take out of re-transmit chain */ + l = &arp->rxmt; + for(fl = *l; fl; fl = fl->nextrxt){ + if(fl == a){ + *l = a->nextrxt; + break; + } + l = &fl->nextrxt; + } + + a->nextrxt = nil; + a->hash = nil; + a->hold = nil; + a->last = nil; + a->ifc = nil; + memset(a->ip, 0, sizeof(a->ip)); + memset(a->mac, 0, sizeof(a->mac)); + } + qunlock(arp); + } else + error(Ebadarp); + + return len; +} + +enum +{ + Alinelen= 90, +}; + +char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n"; + +static void +convmac(char *p, uchar *mac, int n) +{ + while(n-- > 0) + p += sprint(p, "%2.2ux", *mac++); +} + +int +arpread(Arp *arp, char *p, ulong offset, int len) +{ + Arpent *a; + int n; + char mac[2*MAClen+1]; + + if(offset % Alinelen) + return 0; + + offset = offset/Alinelen; + len = len/Alinelen; + + n = 0; + for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){ + if(a->state == 0) + continue; + if(offset > 0){ + offset--; + continue; + } + len--; + qlock(arp); + convmac(mac, a->mac, a->type->maclen); + n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac); + qunlock(arp); + } + + return n; +} + +extern int +rxmitsols(Arp *arp) +{ + uint sflag; + Block *next, *xp; + Arpent *a, *b, **l; + Fs *f; + uchar ipsrc[IPaddrlen]; + Ipifc *ifc = nil; + long nrxt; + + qlock(arp); + f = arp->f; + + a = arp->rxmt; + if(a==nil){ + nrxt = 0; + goto dodrops; /* return nrxt; */ + } + nrxt = a->rtime - NOW; + if(nrxt > 3*ReTransTimer/4) + goto dodrops; /* return nrxt; */ + + for(; a; a = a->nextrxt){ + ifc = a->ifc; + assert(ifc != nil); + if((a->rxtsrem <= 0) || !(canrlock(ifc)) || (a->ifcid != ifc->ifcid)){ + xp = a->hold; + a->hold = nil; + + if(xp){ + if(arp->dropl == nil) + arp->dropf = xp; + else + arp->dropl->list = xp; + } + + cleanarpent(arp, a); + } + else + break; + } + if(a == nil) + goto dodrops; + + + qunlock(arp); /* for icmpns */ + if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC) + icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac); + + runlock(ifc); + qlock(arp); + + /* put to the end of re-transmit chain */ + l = &arp->rxmt; + for(b = *l; b; b = b->nextrxt){ + if(b == a){ + *l = a->nextrxt; + break; + } + l = &b->nextrxt; + } + for(b = *l; b; b = b->nextrxt){ + l = &b->nextrxt; + } + *l = a; + a->rxtsrem--; + a->nextrxt = nil; + a->rtime = NOW + ReTransTimer; + + a = arp->rxmt; + if(a==nil) + nrxt = 0; + else + nrxt = a->rtime - NOW; + +dodrops: + xp = arp->dropf; + arp->dropf = nil; + arp->dropl = nil; + qunlock(arp); + + for(; xp; xp = next){ + next = xp->list; + icmphostunr(f, ifc, xp, Icmp6_adr_unreach, 1); + } + + return nrxt; + +} + +static int +rxready(void *v) +{ + Arp *arp = (Arp *) v; + int x; + + x = ((arp->rxmt != nil) || (arp->dropf != nil)); + + return x; +} + +static void +rxmitproc(void *v) +{ + Arp *arp = v; + long wakeupat; + + arp->rxmitp = up; + //print("arp rxmitproc started\n"); + if(waserror()){ + arp->rxmitp = 0; + pexit("hangup", 1); + } + for(;;){ + wakeupat = rxmitsols(arp); + if(wakeupat == 0) + sleep(&arp->rxmtq, rxready, v); + else if(wakeupat > ReTransTimer/4) + tsleep(&arp->rxmtq, return0, 0, wakeupat); + } +} + diff -Nru /sys/src/9k/ip/chandial.c /sys/src/9k/ip/chandial.c --- /sys/src/9k/ip/chandial.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/chandial.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,124 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "../ip/ip.h" + +typedef struct DS DS; +static Chan* call(char*, char*, DS*); +static void _dial_string_parse(char*, DS*); + +enum +{ + Maxstring= 128, +}; + +struct DS +{ + char buf[Maxstring]; /* dist string */ + char *netdir; + char *proto; + char *rem; + char *local; /* other args */ + char *dir; + Chan **ctlp; +}; + +/* + * the dialstring is of the form '[/net/]proto!dest' + */ +Chan* +chandial(char *dest, char *local, char *dir, Chan **ctlp) +{ + DS ds; + char clone[Maxpath]; + + ds.local = local; + ds.dir = dir; + ds.ctlp = ctlp; + + _dial_string_parse(dest, &ds); + if(ds.netdir == 0) + ds.netdir = "/net"; + + /* no connection server, don't translate */ + snprint(clone, sizeof(clone), "%s/%s/clone", ds.netdir, ds.proto); + return call(clone, ds.rem, &ds); +} + +static Chan* +call(char *clone, char *dest, DS *ds) +{ + int n; + Chan *dchan, *cchan; + char name[Maxpath], data[Maxpath], *p; + + cchan = namec(clone, Aopen, ORDWR, 0); + + /* get directory name */ + if(waserror()){ + cclose(cchan); + nexterror(); + } + n = cchan->dev->read(cchan, name, sizeof(name)-1, 0); + name[n] = 0; + for(p = name; *p == ' '; p++) + ; + sprint(name, "%lud", strtoul(p, 0, 0)); + p = strrchr(clone, '/'); + *p = 0; + if(ds->dir) + snprint(ds->dir, Maxpath, "%s/%s", clone, name); + snprint(data, sizeof(data), "%s/%s/data", clone, name); + + /* connect */ + if(ds->local) + snprint(name, sizeof(name), "connect %s %s", dest, ds->local); + else + snprint(name, sizeof(name), "connect %s", dest); + cchan->dev->write(cchan, name, strlen(name), 0); + + /* open data connection */ + dchan = namec(data, Aopen, ORDWR, 0); + if(ds->ctlp) + *ds->ctlp = cchan; + else + cclose(cchan); + poperror(); + return dchan; + +} + +/* + * parse a dial string + */ +static void +_dial_string_parse(char *str, DS *ds) +{ + char *p, *p2; + + strncpy(ds->buf, str, Maxstring); + ds->buf[Maxstring-1] = 0; + + p = strchr(ds->buf, '!'); + if(p == 0) { + ds->netdir = 0; + ds->proto = "net"; + ds->rem = ds->buf; + } else { + if(*ds->buf != '/' && *ds->buf != '#'){ + ds->netdir = 0; + ds->proto = ds->buf; + } else { + for(p2 = p; *p2 != '/'; p2--) + ; + *p2++ = 0; + ds->netdir = ds->buf; + ds->proto = p2; + } + *p = 0; + ds->rem = p + 1; + } +} diff -Nru /sys/src/9k/ip/devip.c /sys/src/9k/ip/devip.c --- /sys/src/9k/ip/devip.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/devip.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1446 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "../ip/ip.h" + +enum +{ + Qtopdir= 1, /* top level directory */ + Qtopbase, + Qarp= Qtopbase, + Qndb, + Qiproute, + Qipselftab, + Qlog, + + Qprotodir, /* directory for a protocol */ + Qprotobase, + Qclone= Qprotobase, + Qstats, + + Qconvdir, /* directory for a conversation */ + Qconvbase, + Qctl= Qconvbase, + Qdata, + Qerr, + Qlisten, + Qlocal, + Qremote, + Qstatus, + Qsnoop, + + Logtype= 5, + Masktype= (1<> Shiftconv) & Maskconv ) +#define PROTO(x) ( (((ulong)(x).path) >> Shiftproto) & Maskproto ) +#define QID(p, c, y) ( ((p)<<(Shiftproto)) | ((c)<devno]->p[PROTO(c->qid)]->conv[CONV(c->qid)]; + if(cv->owner == nil) + kstrdup(&cv->owner, eve); + mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE); + + switch(i) { + default: + return -1; + case Qctl: + devdir(c, q, "ctl", 0, cv->owner, cv->perm, dp); + return 1; + case Qdata: + devdir(c, q, "data", qlen(cv->rq), cv->owner, cv->perm, dp); + return 1; + case Qerr: + devdir(c, q, "err", qlen(cv->eq), cv->owner, cv->perm, dp); + return 1; + case Qlisten: + devdir(c, q, "listen", 0, cv->owner, cv->perm, dp); + return 1; + case Qlocal: + p = "local"; + break; + case Qremote: + p = "remote"; + break; + case Qsnoop: + if(strcmp(cv->p->name, "ipifc") != 0) + return -1; + devdir(c, q, "snoop", qlen(cv->sq), cv->owner, 0400, dp); + return 1; + case Qstatus: + p = "status"; + break; + } + devdir(c, q, p, 0, cv->owner, 0444, dp); + return 1; +} + +static int +ip2gen(Chan *c, int i, Dir *dp) +{ + Qid q; + + switch(i) { + case Qclone: + mkqid(&q, QID(PROTO(c->qid), 0, Qclone), 0, QTFILE); + devdir(c, q, "clone", 0, network, 0666, dp); + return 1; + case Qstats: + mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE); + devdir(c, q, "stats", 0, network, 0444, dp); + return 1; + } + return -1; +} + +static int +ip1gen(Chan *c, int i, Dir *dp) +{ + Qid q; + char *p; + int prot; + int len = 0; + Fs *f; + extern ulong kerndate; + + f = ipfs[c->devno]; + + prot = 0666; + mkqid(&q, QID(0, 0, i), 0, QTFILE); + switch(i) { + default: + return -1; + case Qarp: + p = "arp"; + prot = 0664; + break; + case Qndb: + p = "ndb"; + len = strlen(f->ndb); + q.vers = f->ndbvers; + break; + case Qiproute: + p = "iproute"; + prot = 0664; + break; + case Qipselftab: + p = "ipselftab"; + prot = 0444; + break; + case Qlog: + p = "log"; + break; + } + devdir(c, q, p, len, network, prot, dp); + if(i == Qndb && f->ndbmtime > kerndate) + dp->mtime = f->ndbmtime; + return 1; +} + +static int +ipgen(Chan *c, char*, Dirtab*, int, int s, Dir *dp) +{ + Qid q; + Conv *cv; + Fs *f; + + f = ipfs[c->devno]; + + switch(TYPE(c->qid)) { + case Qtopdir: + if(s == DEVDOTDOT){ + mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR); + snprint(up->genbuf, sizeof up->genbuf, "#I%ud", c->devno); + devdir(c, q, up->genbuf, 0, network, 0555, dp); + return 1; + } + if(s < f->np) { + if(f->p[s]->connect == nil) + return 0; /* protocol with no user interface */ + mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR); + devdir(c, q, f->p[s]->name, 0, network, 0555, dp); + return 1; + } + s -= f->np; + return ip1gen(c, s+Qtopbase, dp); + case Qarp: + case Qndb: + case Qlog: + case Qiproute: + case Qipselftab: + return ip1gen(c, TYPE(c->qid), dp); + case Qprotodir: + if(s == DEVDOTDOT){ + mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR); + snprint(up->genbuf, sizeof up->genbuf, "#I%ud", c->devno); + devdir(c, q, up->genbuf, 0, network, 0555, dp); + return 1; + } + if(s < f->p[PROTO(c->qid)]->ac) { + cv = f->p[PROTO(c->qid)]->conv[s]; + snprint(up->genbuf, sizeof up->genbuf, "%d", s); + mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR); + devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp); + return 1; + } + s -= f->p[PROTO(c->qid)]->ac; + return ip2gen(c, s+Qprotobase, dp); + case Qclone: + case Qstats: + return ip2gen(c, TYPE(c->qid), dp); + case Qconvdir: + if(s == DEVDOTDOT){ + s = PROTO(c->qid); + mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR); + devdir(c, q, f->p[s]->name, 0, network, 0555, dp); + return 1; + } + return ip3gen(c, s+Qconvbase, dp); + case Qctl: + case Qdata: + case Qerr: + case Qlisten: + case Qlocal: + case Qremote: + case Qstatus: + case Qsnoop: + return ip3gen(c, TYPE(c->qid), dp); + } + return -1; +} + +static void +ipreset(void) +{ + nullmediumlink(); + pktmediumlink(); + + fmtinstall('i', eipfmt); + fmtinstall('I', eipfmt); + fmtinstall('E', eipfmt); + fmtinstall('V', eipfmt); + fmtinstall('M', eipfmt); +} + +static Fs* +ipgetfs(int dev) +{ + extern void (*ipprotoinit[])(Fs*); + Fs *f; + int i; + + if(dev >= Nfs) + return nil; + + qlock(&fslock); + if(ipfs[dev] == nil){ + f = smalloc(sizeof(Fs)); + ip_init(f); + arpinit(f); + netloginit(f); + for(i = 0; ipprotoinit[i]; i++) + ipprotoinit[i](f); + f->dev = dev; + ipfs[dev] = f; + } + qunlock(&fslock); + + return ipfs[dev]; +} + +IPaux* +newipaux(char *owner, char *tag) +{ + IPaux *a; + int n; + + a = smalloc(sizeof(*a)); + kstrdup(&a->owner, owner); + memset(a->tag, ' ', sizeof(a->tag)); + n = strlen(tag); + if(n > sizeof(a->tag)) + n = sizeof(a->tag); + memmove(a->tag, tag, n); + return a; +} + +#define ATTACHER(c) (((IPaux*)((c)->aux))->owner) + +static Chan* +ipattach(char* spec) +{ + Chan *c; + int devno; + + devno = atoi(spec); + if(devno >= Nfs) + error("bad specification"); + + ipgetfs(devno); + c = devattach('I', spec); + mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR); + c->devno = devno; + + c->aux = newipaux(commonuser(), "none"); + + return c; +} + +static Walkqid* +ipwalk(Chan* c, Chan *nc, char **name, int nname) +{ + IPaux *a = c->aux; + Walkqid* w; + + w = devwalk(c, nc, name, nname, nil, 0, ipgen); + if(w != nil && w->clone != nil) + w->clone->aux = newipaux(a->owner, a->tag); + return w; +} + + +static long +ipstat(Chan* c, uchar* db, long n) +{ + return devstat(c, db, n, nil, 0, ipgen); +} + +static int +incoming(void* arg) +{ + Conv *conv; + + conv = arg; + return conv->incall != nil; +} + +static int m2p[] = { + [OREAD] 4, + [OWRITE] 2, + [ORDWR] 6 +}; + +static Chan* +ipopen(Chan* c, int omode) +{ + Conv *cv, *nc; + Proto *p; + int perm; + Fs *f; + + perm = m2p[omode&3]; + + f = ipfs[c->devno]; + + switch(TYPE(c->qid)) { + default: + break; + case Qndb: + if(omode & (OWRITE|OTRUNC) && !isevegroup()) + error(Eperm); + if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC)) + f->ndb[0] = 0; + break; + case Qlog: + netlogopen(f); + break; + case Qiproute: + case Qarp: + if(omode != OREAD && !isevegroup()) + error(Eperm); + break; + case Qtopdir: + case Qprotodir: + case Qconvdir: + case Qstatus: + case Qremote: + case Qlocal: + case Qstats: + case Qipselftab: + if(omode != OREAD) + error(Eperm); + break; + case Qsnoop: + if(omode != OREAD) + error(Eperm); + p = f->p[PROTO(c->qid)]; + cv = p->conv[CONV(c->qid)]; + if(strcmp(ATTACHER(c), cv->owner) != 0 && !isevegroup()) + error(Eperm); + incref(&cv->snoopers); + break; + case Qclone: + p = f->p[PROTO(c->qid)]; + qlock(p); + if(waserror()){ + qunlock(p); + nexterror(); + } + cv = Fsprotoclone(p, ATTACHER(c)); + qunlock(p); + poperror(); + if(cv == nil) { + error(Enodev); + break; + } + mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE); + break; + case Qdata: + case Qctl: + case Qerr: + p = f->p[PROTO(c->qid)]; + qlock(p); + cv = p->conv[CONV(c->qid)]; + qlock(cv); + if(waserror()) { + qunlock(cv); + qunlock(p); + nexterror(); + } + if((perm & (cv->perm>>6)) != perm) { + if(strcmp(ATTACHER(c), cv->owner) != 0) + error(Eperm); + if((perm & cv->perm) != perm) + error(Eperm); + + } + cv->inuse++; + if(cv->inuse == 1){ + kstrdup(&cv->owner, ATTACHER(c)); + cv->perm = 0660; + } + qunlock(cv); + qunlock(p); + poperror(); + break; + case Qlisten: + cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)]; + if((perm & (cv->perm>>6)) != perm) { + if(strcmp(ATTACHER(c), cv->owner) != 0) + error(Eperm); + if((perm & cv->perm) != perm) + error(Eperm); + + } + + if(cv->state != Announced) + error("not announced"); + + if(waserror()){ + closeconv(cv); + nexterror(); + } + qlock(cv); + cv->inuse++; + qunlock(cv); + + nc = nil; + while(nc == nil) { + /* give up if we got a hangup */ + if(qisclosed(cv->rq)) + error("listen hungup"); + + qlock(&cv->listenq); + if(waserror()) { + qunlock(&cv->listenq); + nexterror(); + } + + /* wait for a connect */ + sleep(&cv->listenr, incoming, cv); + + qlock(cv); + nc = cv->incall; + if(nc != nil){ + cv->incall = nc->next; + cv->nincall--; + mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE); + kstrdup(&cv->owner, ATTACHER(c)); + } + qunlock(cv); + + qunlock(&cv->listenq); + poperror(); + } + closeconv(cv); + poperror(); + break; + } + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + return c; +} + +static void +ipcreate(Chan*, char*, int, int) +{ + error(Eperm); +} + +static void +ipremove(Chan*) +{ + error(Eperm); +} + +static long +ipwstat(Chan *c, uchar *dp, long n) +{ + Dir d; + Conv *cv; + Fs *f; + Proto *p; + + f = ipfs[c->devno]; + switch(TYPE(c->qid)) { + default: + error(Eperm); + break; + case Qctl: + case Qdata: + break; + } + + n = convM2D(dp, n, &d, nil); + if(n > 0){ + p = f->p[PROTO(c->qid)]; + cv = p->conv[CONV(c->qid)]; + if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0) + error(Eperm); + if(d.uid[0]) + kstrdup(&cv->owner, d.uid); + cv->perm = d.mode & 0777; + } + return n; +} + +void +closeconv(Conv *cv) +{ + Conv *nc; + Ipmulti *mp; + + qlock(cv); + + if(--cv->inuse > 0) { + qunlock(cv); + return; + } + + /* close all incoming calls since no listen will ever happen */ + while((nc = cv->incall) != nil){ + cv->incall = nc->next; + cv->nincall--; + closeconv(nc); + } + cv->incall = nil; + cv->incalltl = nil; + cv->nincall = 0; + + kstrdup(&cv->owner, network); + cv->perm = 0660; + + while((mp = cv->multi) != nil) + ipifcremmulti(cv, mp->ma, mp->ia); + + cv->r = nil; + cv->rgen = 0; + cv->p->close(cv); + cv->state = Idle; + qunlock(cv); +} + +static void +ipclose(Chan* c) +{ + Fs *f; + + f = ipfs[c->devno]; + switch(TYPE(c->qid)) { + default: + break; + case Qlog: + if(c->flag & COPEN) + netlogclose(f); + break; + case Qdata: + case Qctl: + case Qerr: + if(c->flag & COPEN) + closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]); + break; + case Qsnoop: + if(c->flag & COPEN) + decref(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers); + break; + } + free(((IPaux*)c->aux)->owner); + free(c->aux); +} + +enum +{ + Statelen= 32*1024, +}; + +static long +ipread(Chan *ch, void *a, long n, vlong off) +{ + Conv *c; + Proto *x; + char *buf, *p; + long rv; + Fs *f; + ulong offset = off; + + f = ipfs[ch->devno]; + + p = a; + switch(TYPE(ch->qid)) { + default: + error(Eperm); + case Qtopdir: + case Qprotodir: + case Qconvdir: + return devdirread(ch, a, n, 0, 0, ipgen); + case Qarp: + return arpread(f->arp, a, offset, n); + case Qndb: + return readstr(offset, a, n, f->ndb); + case Qiproute: + return routeread(f, a, offset, n); + case Qipselftab: + return ipselftabread(f, a, offset, n); + case Qlog: + return netlogread(f, a, offset, n); + case Qctl: + buf = smalloc(16); + snprint(buf, 16, "%lud", CONV(ch->qid)); + rv = readstr(offset, p, n, buf); + free(buf); + return rv; + case Qremote: + buf = smalloc(Statelen); + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + if(x->remote == nil) { + snprint(buf, Statelen, "%I!%d\n", c->raddr, c->rport); + } else { + (*x->remote)(c, buf, Statelen-2); + } + rv = readstr(offset, p, n, buf); + free(buf); + return rv; + case Qlocal: + buf = smalloc(Statelen); + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + if(x->local == nil) { + snprint(buf, Statelen, "%I!%d\n", c->laddr, c->lport); + } else { + (*x->local)(c, buf, Statelen-2); + } + rv = readstr(offset, p, n, buf); + free(buf); + return rv; + case Qstatus: + buf = smalloc(Statelen); + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + (*x->state)(c, buf, Statelen-2); + rv = readstr(offset, p, n, buf); + free(buf); + return rv; + case Qdata: + c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)]; + return qread(c->rq, a, n); + case Qerr: + c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)]; + return qread(c->eq, a, n); + case Qsnoop: + c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)]; + return qread(c->sq, a, n); + case Qstats: + x = f->p[PROTO(ch->qid)]; + if(x->stats == nil) + error("stats not implemented"); + buf = smalloc(Statelen); + (*x->stats)(x, buf, Statelen); + rv = readstr(offset, p, n, buf); + free(buf); + return rv; + } +} + +static Block* +ipbread(Chan* ch, long n, vlong offset) +{ + Conv *c; + Proto *x; + Fs *f; + + switch(TYPE(ch->qid)){ + case Qdata: + f = ipfs[ch->devno]; + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + return qbread(c->rq, n); + default: + return devbread(ch, n, offset); + } +} + +/* + * set local address to be that of the ifc closest to remote address + */ +static void +setladdr(Conv* c) +{ + findlocalip(c->p->f, c->laddr, c->raddr); +} + +/* + * set a local port making sure the quad of raddr,rport,laddr,lport is unique + */ +char* +setluniqueport(Conv* c, int lport) +{ + Proto *p; + Conv *xp; + int x; + + p = c->p; + + qlock(p); + for(x = 0; x < p->nc; x++){ + xp = p->conv[x]; + if(xp == nil) + break; + if(xp == c) + continue; + if((xp->state == Connected || xp->state == Announced) + && xp->lport == lport + && xp->rport == c->rport + && ipcmp(xp->raddr, c->raddr) == 0 + && ipcmp(xp->laddr, c->laddr) == 0){ + qunlock(p); + return "address in use"; + } + } + c->lport = lport; + qunlock(p); + return nil; +} + +/* + * is lport in use by anyone? + */ +static int +lportinuse(Proto *p, ushort lport) +{ + int x; + + for(x = 0; x < p->nc && p->conv[x]; x++) + if(p->conv[x]->lport == lport) + return 1; + return 0; +} + +/* + * pick a local port and set it + */ +char * +setlport(Conv* c) +{ + Proto *p; + int i, port; + + p = c->p; + qlock(p); + if(c->restricted){ + /* Restricted ports cycle between 600 and 1024. */ + for(i=0; i<1024-600; i++){ + if(p->nextrport >= 1024 || p->nextrport < 600) + p->nextrport = 600; + port = p->nextrport++; + if(!lportinuse(p, port)) + goto chosen; + } + }else{ + /* + * Unrestricted ports are chosen randomly + * between 2^15 and 2^16. There are at most + * 4*Nchan = 4096 ports in use at any given time, + * so even in the worst case, a random probe has a + * 1 - 4096/2^15 = 87% chance of success. + * If 64 successive probes fail, there is a bug somewhere + * (or a once in 10^58 event has happened, but that's + * less likely than a venti collision). + */ + for(i=0; i<64; i++){ + port = (1<<15) + nrand(1<<15); + if(!lportinuse(p, port)) + goto chosen; + } + } + qunlock(p); + /* + * debugging: let's see if we ever get this. + * if we do (and we're a cpu server), we might as well restart + * since we're now unable to service new connections. + */ + print("setlport: %s: %s: %I!%ud -> %I out of ports\n", + p->name, c->restricted? "restrict": "", c->raddr, c->rport, c->laddr); + return "no ports available"; + +chosen: + c->lport = port; + qunlock(p); + return nil; +} + +/* + * set a local address and port from a string of the form + * [address!]port[!r] + */ +char* +setladdrport(Conv* c, char* str, int announcing) +{ + char *p; + char *rv; + ushort lport; + uchar addr[IPaddrlen]; + + /* + * ignore restricted part if it exists. it's + * meaningless on local ports. + */ + p = strchr(str, '!'); + if(p != nil){ + *p++ = 0; + if(strcmp(p, "r") == 0) + p = nil; + } + + c->lport = 0; + if(p == nil){ + if(announcing) + ipmove(c->laddr, IPnoaddr); + else + setladdr(c); + p = str; + } else { + if(strcmp(str, "*") == 0) + ipmove(c->laddr, IPnoaddr); + else { + if(parseip(addr, str) == -1) + return Ebadip; + if(ipforme(c->p->f, addr)) + ipmove(c->laddr, addr); + else + return "not a local IP address"; + } + } + + /* one process can get all connections */ + if(announcing && strcmp(p, "*") == 0){ + if(!iseve()) + error(Eperm); + return setluniqueport(c, 0); + } + + lport = atoi(p); + if(lport <= 0) + rv = setlport(c); + else + rv = setluniqueport(c, lport); + return rv; +} + +static char* +setraddrport(Conv* c, char* str) +{ + char *p; + + p = strchr(str, '!'); + if(p == nil) + return "malformed address"; + *p++ = 0; + if (parseip(c->raddr, str) == -1) + return Ebadip; + c->rport = atoi(p); + p = strchr(p, '!'); + if(p){ + if(strstr(p, "!r") != nil) + c->restricted = 1; + } + return nil; +} + +/* + * called by protocol connect routine to set addresses + */ +char* +Fsstdconnect(Conv *c, char *argv[], int argc) +{ + char *p; + + switch(argc) { + default: + return "bad args to connect"; + case 2: + p = setraddrport(c, argv[1]); + if(p != nil) + return p; + setladdr(c); + p = setlport(c); + if (p != nil) + return p; + break; + case 3: + p = setraddrport(c, argv[1]); + if(p != nil) + return p; + p = setladdrport(c, argv[2], 0); + if(p != nil) + return p; + } + + if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 && + memcmp(c->laddr, v4prefix, IPv4off) == 0) + || ipcmp(c->raddr, IPnoaddr) == 0) + c->ipversion = V4; + else + c->ipversion = V6; + + return nil; +} +/* + * initiate connection and sleep till its set up + */ +static int +connected(void* a) +{ + return ((Conv*)a)->state == Connected; +} +static void +connectctlmsg(Proto *x, Conv *c, Cmdbuf *cb) +{ + char *p; + + if(c->state != 0) + error(Econinuse); + c->state = Connecting; + c->cerr[0] = '\0'; + if(x->connect == nil) + error("connect not supported"); + p = x->connect(c, cb->f, cb->nf); + if(p != nil) + error(p); + + qunlock(c); + if(waserror()){ + qlock(c); + nexterror(); + } + sleep(&c->cr, connected, c); + qlock(c); + poperror(); + + if(c->cerr[0] != '\0') + error(c->cerr); +} + +/* + * called by protocol announce routine to set addresses + */ +char* +Fsstdannounce(Conv* c, char* argv[], int argc) +{ + memset(c->raddr, 0, sizeof(c->raddr)); + c->rport = 0; + switch(argc){ + default: + break; + case 2: + return setladdrport(c, argv[1], 1); + } + return "bad args to announce"; +} + +/* + * initiate announcement and sleep till its set up + */ +static int +announced(void* a) +{ + return ((Conv*)a)->state == Announced; +} +static void +announcectlmsg(Proto *x, Conv *c, Cmdbuf *cb) +{ + char *p; + + if(c->state != 0) + error(Econinuse); + c->state = Announcing; + c->cerr[0] = '\0'; + if(x->announce == nil) + error("announce not supported"); + p = x->announce(c, cb->f, cb->nf); + if(p != nil) + error(p); + + qunlock(c); + if(waserror()){ + qlock(c); + nexterror(); + } + sleep(&c->cr, announced, c); + qlock(c); + poperror(); + + if(c->cerr[0] != '\0') + error(c->cerr); +} + +/* + * called by protocol bind routine to set addresses + */ +char* +Fsstdbind(Conv* c, char* argv[], int argc) +{ + switch(argc){ + default: + break; + case 2: + return setladdrport(c, argv[1], 0); + } + return "bad args to bind"; +} + +static void +bindctlmsg(Proto *x, Conv *c, Cmdbuf *cb) +{ + char *p; + + if(x->bind == nil) + p = Fsstdbind(c, cb->f, cb->nf); + else + p = x->bind(c, cb->f, cb->nf); + if(p != nil) + error(p); +} + +static void +tosctlmsg(Conv *c, Cmdbuf *cb) +{ + if(cb->nf < 2) + c->tos = 0; + else + c->tos = atoi(cb->f[1]); +} + +static void +ttlctlmsg(Conv *c, Cmdbuf *cb) +{ + if(cb->nf < 2) + c->ttl = MAXTTL; + else + c->ttl = atoi(cb->f[1]); +} + +static long +ipwrite(Chan* ch, void *v, long n, vlong off) +{ + Conv *c; + Proto *x; + char *p; + Cmdbuf *cb; + uchar ia[IPaddrlen], ma[IPaddrlen]; + Fs *f; + char *a; + ulong offset = off; + + a = v; + f = ipfs[ch->devno]; + + switch(TYPE(ch->qid)){ + default: + error(Eperm); + case Qdata: + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + + if(c->wq == nil) + error(Eperm); + + qwrite(c->wq, a, n); + break; + case Qarp: + return arpwrite(f, a, n); + case Qiproute: + return routewrite(f, ch, a, n); + case Qlog: + netlogctl(f, a, n); + return n; + case Qndb: + return ndbwrite(f, a, offset, n); + break; + case Qctl: + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + cb = parsecmd(a, n); + + qlock(c); + if(waserror()) { + qunlock(c); + free(cb); + nexterror(); + } + if(cb->nf < 1) + error("short control request"); + if(strcmp(cb->f[0], "connect") == 0) + connectctlmsg(x, c, cb); + else if(strcmp(cb->f[0], "announce") == 0) + announcectlmsg(x, c, cb); + else if(strcmp(cb->f[0], "bind") == 0) + bindctlmsg(x, c, cb); + else if(strcmp(cb->f[0], "ttl") == 0) + ttlctlmsg(c, cb); + else if(strcmp(cb->f[0], "tos") == 0) + tosctlmsg(c, cb); + else if(strcmp(cb->f[0], "ignoreadvice") == 0) + c->ignoreadvice = 1; + else if(strcmp(cb->f[0], "addmulti") == 0){ + if(cb->nf < 2) + error("addmulti needs interface address"); + if(cb->nf == 2){ + if(!ipismulticast(c->raddr)) + error("addmulti for a non multicast address"); + if (parseip(ia, cb->f[1]) == -1) + error(Ebadip); + ipifcaddmulti(c, c->raddr, ia); + } else { + if (parseip(ia, cb->f[1]) == -1 || + parseip(ma, cb->f[2]) == -1) + error(Ebadip); + if(!ipismulticast(ma)) + error("addmulti for a non multicast address"); + ipifcaddmulti(c, ma, ia); + } + } else if(strcmp(cb->f[0], "remmulti") == 0){ + if(cb->nf < 2) + error("remmulti needs interface address"); + if(!ipismulticast(c->raddr)) + error("remmulti for a non multicast address"); + if (parseip(ia, cb->f[1]) == -1) + error(Ebadip); + ipifcremmulti(c, c->raddr, ia); + } else if(strcmp(cb->f[0], "maxfragsize") == 0){ + if(cb->nf < 2) + error("maxfragsize needs size"); + + c->maxfragsize = (int)strtol(cb->f[1], nil, 0); + + } else if(x->ctl != nil) { + p = x->ctl(c, cb->f, cb->nf); + if(p != nil) + error(p); + } else + error("unknown control request"); + qunlock(c); + free(cb); + poperror(); + } + return n; +} + +static long +ipbwrite(Chan* ch, Block* bp, vlong offset) +{ + Conv *c; + Proto *x; + Fs *f; + int n; + + switch(TYPE(ch->qid)){ + case Qdata: + f = ipfs[ch->devno]; + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + + if(c->wq == nil) + error(Eperm); + + if(bp->next) + bp = concatblock(bp); + n = BLEN(bp); + qbwrite(c->wq, bp); + return n; + default: + return devbwrite(ch, bp, offset); + } +} + +Dev ipdevtab = { + 'I', + "ip", + + ipreset, + devinit, + devshutdown, + ipattach, + ipwalk, + ipstat, + ipopen, + ipcreate, + ipclose, + ipread, + ipbread, + ipwrite, + ipbwrite, + ipremove, + ipwstat, +}; + +int +Fsproto(Fs *f, Proto *p) +{ + if(f->np >= Maxproto) + return -1; + + p->f = f; + + if(p->ipproto > 0){ + if(f->t2p[p->ipproto] != nil) + return -1; + f->t2p[p->ipproto] = p; + } + + p->qid.type = QTDIR; + p->qid.path = QID(f->np, 0, Qprotodir); + p->conv = malloc(sizeof(Conv*)*(p->nc+1)); + if(p->conv == nil) + panic("Fsproto"); + + p->x = f->np; + p->nextrport = 600; + f->p[f->np++] = p; + + return 0; +} + +/* + * return true if this protocol is + * built in + */ +int +Fsbuiltinproto(Fs* f, uchar proto) +{ + return f->t2p[proto] != nil; +} + +/* + * called with protocol locked + */ +Conv* +Fsprotoclone(Proto *p, char *user) +{ + Conv *c, **pp, **ep; + +retry: + c = nil; + ep = &p->conv[p->nc]; + for(pp = p->conv; pp < ep; pp++) { + c = *pp; + if(c == nil){ + c = malloc(sizeof(Conv)); + if(c == nil) + error(Enomem); + qlock(c); + c->p = p; + c->x = pp - p->conv; + if(p->ptclsize != 0){ + c->ptcl = malloc(p->ptclsize); + if(c->ptcl == nil) { + free(c); + error(Enomem); + } + } + *pp = c; + p->ac++; + c->eq = qopen(1024, Qmsg, 0, 0); + (*p->create)(c); + break; + } + if(canqlock(c)){ + /* + * make sure both processes and protocol + * are done with this Conv + */ + if(c->inuse == 0 && (p->inuse == nil || (*p->inuse)(c) == 0)) + break; + + qunlock(c); + } + } + if(pp >= ep) { + if(p->gc) + print("Fsprotoclone: garbage collecting Convs\n"); + if(p->gc != nil && (*p->gc)(p)) + goto retry; + /* debugging: do we ever get here? */ + print("Fsprotoclone: %s: all conversations in use: %s\n", p->name, user); + return nil; + } + + c->inuse = 1; + kstrdup(&c->owner, user); + c->perm = 0660; + c->state = Idle; + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + c->r = nil; + c->rgen = 0; + c->lport = 0; + c->rport = 0; + c->restricted = 0; + c->maxfragsize = 0; + c->ttl = MAXTTL; + qreopen(c->rq); + qreopen(c->wq); + qreopen(c->eq); + + qunlock(c); + return c; +} + +int +Fsconnected(Conv* c, char* msg) +{ + if(msg != nil && *msg != '\0') + strncpy(c->cerr, msg, ERRMAX-1); + + switch(c->state){ + + case Announcing: + c->state = Announced; + break; + + case Connecting: + c->state = Connected; + break; + } + + wakeup(&c->cr); + return 0; +} + +Proto* +Fsrcvpcol(Fs* f, uchar proto) +{ + if(f->ipmux) + return f->ipmux; + else + return f->t2p[proto]; +} + +Proto* +Fsrcvpcolx(Fs *f, uchar proto) +{ + return f->t2p[proto]; +} + +/* + * called with protocol locked + */ +Conv* +Fsnewcall(Conv *c, uchar *raddr, ushort rport, uchar *laddr, ushort lport, uchar version) +{ + Conv *nc; + + qlock(c); + if(c->nincall+1 >= Maxincall){ + qunlock(c); + return nil; + } + + /* find a free conversation */ + nc = Fsprotoclone(c->p, network); + if(nc == nil) { + qunlock(c); + return nil; + } + ipmove(nc->raddr, raddr); + nc->rport = rport; + ipmove(nc->laddr, laddr); + nc->lport = lport; + nc->next = nil; + if(c->incall != nil) + c->incalltl->next = nc; + else + c->incall = nc; + c->incalltl = nc; + c->nincall++; + nc->state = Connected; + nc->ipversion = version; + + qunlock(c); + + wakeup(&c->listenr); + + return nc; +} + +long +ndbwrite(Fs *f, char *a, ulong off, int n) +{ + if(off > strlen(f->ndb)) + error(Eio); + if(off+n >= sizeof(f->ndb)) + error(Eio); + memmove(f->ndb+off, a, n); + f->ndb[off+n] = 0; + f->ndbvers++; + f->ndbmtime = seconds(); + return n; +} + +ulong +scalednconv(void) +{ + if(cpuserver) + return Nchans*4; + return Nchans; +} diff -Nru /sys/src/9k/ip/esp.c /sys/src/9k/ip/esp.c --- /sys/src/9k/ip/esp.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/esp.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1266 @@ +/* + * Encapsulating Security Payload for IPsec for IPv4, rfc1827. + * rfc2104 defines hmac computation. + * currently only implements tunnel mode. + * TODO: verify aes algorithms; + * transport mode (host-to-host) + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" +#include "ipv6.h" +#include "libsec.h" + +#define BITS2BYTES(bi) (((bi) + BI2BY - 1) / BI2BY) +#define BYTES2BITS(by) ((by) * BI2BY) + +typedef struct Algorithm Algorithm; +typedef struct Esp4hdr Esp4hdr; +typedef struct Esp6hdr Esp6hdr; +typedef struct Espcb Espcb; +typedef struct Esphdr Esphdr; +typedef struct Esppriv Esppriv; +typedef struct Esptail Esptail; +typedef struct Userhdr Userhdr; +typedef struct Window Window; + +enum { + Encrypt, + Decrypt, + + IP_ESPPROTO = 50, /* IP v4 and v6 protocol number */ + Esp4hdrlen = IP4HDR + 8, + Esp6hdrlen = IP6HDR + 8, + + Esptaillen = 2, /* does not include pad or auth data */ + Userhdrlen = 4, /* user-visible header size - if enabled */ + + Desblk = BITS2BYTES(64), + Des3keysz = BITS2BYTES(192), + + Aesblk = BITS2BYTES(128), + Aeskeysz = BITS2BYTES(128), +}; + +struct Esphdr +{ + uchar espspi[4]; /* Security parameter index */ + uchar espseq[4]; /* Sequence number */ + uchar payload[]; +}; + +/* + * tunnel-mode (network-to-network, etc.) layout is: + * new IP hdrs | ESP hdr | + * enc { orig IP hdrs | TCP/UDP hdr | user data | ESP trailer } | ESP ICV + * + * transport-mode (host-to-host) layout would be: + * orig IP hdrs | ESP hdr | + * enc { TCP/UDP hdr | user data | ESP trailer } | ESP ICV + */ +struct Esp4hdr +{ + /* ipv4 header */ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + uchar Unused; + uchar espproto; /* Protocol */ + uchar espplen[2]; /* Header plus data length */ + uchar espsrc[4]; /* Ip source */ + uchar espdst[4]; /* Ip destination */ + + Esphdr; +}; + +/* tunnel-mode layout */ +struct Esp6hdr +{ + IPV6HDR; + Esphdr; +}; + +struct Esptail +{ + uchar pad; + uchar nexthdr; +}; + +/* IP-version-dependent data */ +typedef struct Versdep Versdep; +struct Versdep +{ + ulong version; + ulong iphdrlen; + ulong hdrlen; /* iphdrlen + esp hdr len */ + ulong spi; + ulong seq; + uchar laddr[IPaddrlen]; + uchar raddr[IPaddrlen]; +}; + +/* header as seen by the user */ +struct Userhdr +{ + uchar nexthdr; /* next protocol */ + uchar unused[3]; +}; + +/* MIB II counters; http://www.icir.org/fenner/mibs/htmllint/IPSEC-SA-MON-MIB.html */ +enum { + /* ipsecSaEsp ... */ + InUserOctets, + InPackets, /* just the payload */ + InDecryptErrors, + InAuthErrors, + InReplayErrors, + InPolicyErrors, + InPadErrors, + InOtherRxErr, + InQueueFullErr, + + OutUserOctets, + OutPackets, + OutSendErrors, + + /* non-MIB? */ + InSpiUnknown, + InSeqFastfwd, + InDropQpass, + + Nstats, +}; + +static char *statnames[Nstats] = { +[InUserOctets] "InUserOctets", +[InPackets] "InPackets", +[InDecryptErrors] "InDecryptErrors", +[InAuthErrors] "InAuthErrors", +[InReplayErrors] "InReplayErrors", +[InPolicyErrors] "InPolicyErrors", +[InPadErrors] "InPadErrors", +[InOtherRxErr] "InOtherRxErr", +[InQueueFullErr] "InQueueFullErr", + +[OutUserOctets] "OutUserOctets", +[OutPackets] "OutPackets", +[OutSendErrors] "OutSendErrors", + +[InSpiUnknown] "InSpiUnknown", +[InSeqFastfwd] "InSeqFastfwd", +[InDropQpass] "InDropQpass", + +}; + +struct Esppriv +{ + uvlong stats[Nstats]; +}; + +struct Window +{ + uchar w0set; /* unknown how this is properly set */ + ulong w0; + uvlong w; +}; + +/* + * protocol specific part of Conv + */ +struct Espcb +{ + int incoming; + int header; /* user-level header */ + ulong spi; + ulong seq; /* last seq sent */ + Window; /* for replay attacks */ + + char *espalg; + void *espstate; /* other state for esp */ + int espivlen; /* in bytes */ + int espblklen; + int (*cipher)(Espcb*, uchar *buf, int len); + + char *ahalg; + void *ahstate; /* other state for esp */ + int ahlen; /* auth data length in bytes */ + int ahblklen; + int (*auth)(Espcb*, uchar *buf, int len, uchar *hash); + DigestState *ds; +}; + +struct Algorithm +{ + char *name; + int keylen; /* in bits */ + void (*init)(Espcb*, char* name, uchar *key, unsigned keylen); +}; + +static Conv* convlookup(Proto *esp, ulong spi); +static char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg); +static void espkick(void *x); + +static void nullespinit(Espcb*, char*, uchar *key, unsigned keylen); +static void des3espinit(Espcb*, char*, uchar *key, unsigned keylen); +static void aescbcespinit(Espcb*, char*, uchar *key, unsigned keylen); +static void aesctrespinit(Espcb*, char*, uchar *key, unsigned keylen); +static void desespinit(Espcb *ecb, char *name, uchar *k, unsigned n); + +static void nullahinit(Espcb*, char*, uchar *key, unsigned keylen); +static void shaahinit(Espcb*, char*, uchar *key, unsigned keylen); +static void aesahinit(Espcb*, char*, uchar *key, unsigned keylen); +static void md5ahinit(Espcb*, char*, uchar *key, unsigned keylen); + +static Algorithm espalg[] = +{ + "null", 0, nullespinit, + "des3_cbc", 192, des3espinit, /* new rfc2451, des-ede3 */ + "aes_128_cbc", 128, aescbcespinit, /* new rfc3602 */ + "aes_ctr", 128, aesctrespinit, /* new rfc3686 */ + "des_56_cbc", 64, desespinit, /* rfc2405, deprecated */ + /* rc4 was never required, was used in original bandt */ +// "rc4_128", 128, rc4espinit, + nil, 0, nil, +}; + +static Algorithm ahalg[] = +{ + "null", 0, nullahinit, + "hmac_sha1_96", 128, shaahinit, /* rfc2404 */ + "aes_xcbc_mac_96", 128, aesahinit, /* new rfc3566 */ + "hmac_md5_96", 128, md5ahinit, /* rfc2403 */ + nil, 0, nil, +}; + +void +espstat(Proto *esp, int stat, uint n) +{ + Esppriv *p; + + p = esp->priv; + p->stats[stat] += n; +} + +static int +ckwindow(Proto *esp, Window *w, ulong seq) +{ + long δ; + + if(w->w0set == 0){ + w->w0set = 1; + w->w0 = seq - 1; + } + δ = seq - w->w0 - 1; + + if(δ < 0 || w->w & 1ull << 63 - δ) + return -1; + if(δ > 63){ + netlog(esp->f, Logesp, "long drop seq %ld %ld\n", seq, w->w0); + espstat(esp, InSeqFastfwd, 1); + w->w <<= δ - 32; + w->w0 += δ - 32; + δ = 32; + } + w->w |= 1ull<<63 - δ; + while(w->w & 1ull<<63 || δ >= 32 && δ <= 63){ + w->w0++; + w->w <<= 1; + δ--; + } + return 0; +} + +static char* +espconnect(Conv *c, char **argv, int argc) +{ + char *p, *pp, *e = nil; + ulong spi; + Espcb *ecb = (Espcb*)c->ptcl; + + switch(argc) { + default: + e = "bad args to connect"; + break; + case 2: + p = strchr(argv[1], '!'); + if(p == nil){ + e = "malformed address"; + break; + } + *p++ = 0; + if (parseip(c->raddr, argv[1]) == -1) { + e = Ebadip; + break; + } + findlocalip(c->p->f, c->laddr, c->raddr); + ecb->incoming = 0; + ecb->seq = 0; + if(strcmp(p, "*") == 0) { + qlock(c->p); + for(;;) { + spi = nrand(1<<16) + 256; + if(convlookup(c->p, spi) == nil) + break; + } + qunlock(c->p); + ecb->spi = spi; + ecb->incoming = 1; + qhangup(c->wq, nil); + } else { + spi = strtoul(p, &pp, 10); + if(pp == p) { + e = "malformed address"; + break; + } + ecb->spi = spi; + qhangup(c->rq, nil); + } + nullespinit(ecb, "null", nil, 0); + nullahinit(ecb, "null", nil, 0); + } + Fsconnected(c, e); + + return e; +} + + +static int +espstate(Conv *c, char *state, int n) +{ + return snprint(state, n, "%s", c->inuse?"Open\n":"Closed\n"); +} + +static void +espcreate(Conv *c) +{ + c->rq = qopen(64*1024, Qmsg, 0, 0); + c->wq = qopen(64*1024, Qkick, espkick, c); +} + +static void +espclose(Conv *c) +{ + Espcb *ecb; + + qclose(c->rq); + qclose(c->wq); + qclose(c->eq); + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + + ecb = (Espcb*)c->ptcl; + free(ecb->espstate); + free(ecb->ahstate); + memset(ecb, 0, sizeof(Espcb)); +} + +static int +convipvers(Conv *c) +{ + if((memcmp(c->raddr, v4prefix, IPv4off) == 0 && + memcmp(c->laddr, v4prefix, IPv4off) == 0) || + ipcmp(c->raddr, IPnoaddr) == 0) + return V4; + else + return V6; +} + +static int +pktipvers(Fs *f, Block **bpp) +{ + if (*bpp == nil || BLEN(*bpp) == 0) { + /* get enough to identify the IP version */ + *bpp = pullupblock(*bpp, IP4HDR); + if(*bpp == nil) { + netlog(f, Logesp, "esp: short packet\n"); + return 0; + } + } + return (((Esp4hdr*)(*bpp)->rp)->vihl & 0xf0) == IP_VER4? V4: V6; +} + +static void +getverslens(int version, Versdep *vp) +{ + vp->version = version; + switch(vp->version) { + case V4: + vp->iphdrlen = IP4HDR; + vp->hdrlen = Esp4hdrlen; + break; + case V6: + vp->iphdrlen = IP6HDR; + vp->hdrlen = Esp6hdrlen; + break; + default: + panic("esp: getverslens version %d wrong", version); + } +} + +static void +getpktspiaddrs(uchar *pkt, Versdep *vp) +{ + Esp4hdr *eh4; + Esp6hdr *eh6; + + switch(vp->version) { + case V4: + eh4 = (Esp4hdr*)pkt; + v4tov6(vp->raddr, eh4->espsrc); + v4tov6(vp->laddr, eh4->espdst); + vp->spi = nhgetl(eh4->espspi); + vp->seq = nhgetl(eh4->espseq); + break; + case V6: + eh6 = (Esp6hdr*)pkt; + ipmove(vp->raddr, eh6->src); + ipmove(vp->laddr, eh6->dst); + vp->spi = nhgetl(eh6->espspi); + vp->seq = nhgetl(eh6->espseq); + break; + default: + panic("esp: getpktspiaddrs vp->version %ld wrong", vp->version); + } +} + +/* + * encapsulate next IP packet on x's write queue in IP/ESP packet + * and initiate output of the result. + */ +static void +espkick(void *x) +{ + int nexthdr, payload, pad, align, ulen; + uchar *auth; + Block *bp; + Conv *c = x; + Esp4hdr *eh4; + Esp6hdr *eh6; + Espcb *ecb; + Esptail *et; + Userhdr *uh; + Versdep vers; + + getverslens(convipvers(c), &vers); + bp = qget(c->wq); + if(bp == nil) + return; + + qlock(c); + ecb = c->ptcl; + + if(ecb->header) { + /* make sure the message has a User header */ + bp = pullupblock(bp, Userhdrlen); + if(bp == nil) { + qunlock(c); + return; + } + uh = (Userhdr*)bp->rp; + nexthdr = uh->nexthdr; + bp->rp += Userhdrlen; + } else { + nexthdr = 0; /* what should this be? */ + } + + ulen = BLEN(bp); + payload = ulen + ecb->espivlen; + + /* Make space to fit ip header */ + bp = padblock(bp, vers.hdrlen + ecb->espivlen); + getpktspiaddrs(bp->rp, &vers); + + align = 4; + if(ecb->espblklen > align) + align = ecb->espblklen; + if(align % ecb->ahblklen != 0) + panic("espkick: ahblklen is important after all"); + pad = (align-1) - (payload + Esptaillen-1)%align; + + /* + * Make space for tail + * this is done by calling padblock with a negative size + * Padblock does not change bp->wp! + */ + bp = padblock(bp, -(pad+Esptaillen+ecb->ahlen)); + bp->wp += pad+Esptaillen+ecb->ahlen; + + et = (Esptail*)(bp->rp + vers.hdrlen + payload + pad); + + /* fill in tail */ + et->pad = pad; + et->nexthdr = nexthdr; + + /* encrypt the payload */ + ecb->cipher(ecb, bp->rp + vers.hdrlen, payload + pad + Esptaillen); + auth = bp->rp + vers.hdrlen + payload + pad + Esptaillen; + + /* fill in head; construct a new IP header and an ESP header */ + if (vers.version == V4) { + eh4 = (Esp4hdr *)bp->rp; + eh4->vihl = IP_VER4; + v6tov4(eh4->espsrc, c->laddr); + v6tov4(eh4->espdst, c->raddr); + eh4->espproto = IP_ESPPROTO; + eh4->frag[0] = 0; + eh4->frag[1] = 0; + + hnputl(eh4->espspi, ecb->spi); + hnputl(eh4->espseq, ++ecb->seq); + } else { + eh6 = (Esp6hdr *)bp->rp; + eh6->vcf[0] = IP_VER6; + ipmove(eh6->src, c->laddr); + ipmove(eh6->dst, c->raddr); + eh6->proto = IP_ESPPROTO; + + hnputl(eh6->espspi, ecb->spi); + hnputl(eh6->espseq, ++ecb->seq); + } + + /* compute secure hash */ + ecb->auth(ecb, bp->rp + vers.iphdrlen, (vers.hdrlen - vers.iphdrlen) + + payload + pad + Esptaillen, auth); + + qunlock(c); + /* print("esp: pass down: %lud\n", BLEN(bp)); */ + if (vers.version == V4) + ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c); + else + ipoput6(c->p->f, bp, 0, c->ttl, c->tos, c); + + espstat(c->p, OutUserOctets, ulen); + espstat(c->p, OutPackets, 1); +} + +/* + * decapsulate IP packet from IP/ESP packet in bp and + * pass the result up the spi's Conv's read queue. + */ +void +espiput(Proto *esp, Ipifc*, Block *bp) +{ + int payload, nexthdr; + uchar *auth, *espspi; + Conv *c; + Espcb *ecb; + Esptail *et; + Fs *f; + Userhdr *uh; + Versdep vers; + + f = esp->f; + + getverslens(pktipvers(f, &bp), &vers); + + bp = pullupblock(bp, vers.hdrlen + Esptaillen); + if(bp == nil) { + netlog(f, Logesp, "esp: short packet\n"); + return; + } + getpktspiaddrs(bp->rp, &vers); + + qlock(esp); + /* Look for a conversation structure for this port */ + c = convlookup(esp, vers.spi); + if(c == nil) { + espstat(esp, InSpiUnknown, 1); + qunlock(esp); + netlog(f, Logesp, "esp: no conv %I -> %I!%lud\n", vers.raddr, + vers.laddr, vers.spi); + icmpnoconv(f, bp); + freeblist(bp); + return; + } + + qlock(c); + qunlock(esp); + + ecb = c->ptcl; + /* too hard to do decryption/authentication on block lists */ + if(bp->next) + bp = concatblock(bp); + + if(BLEN(bp) < vers.hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) { + espstat(esp, InOtherRxErr, 1); + qunlock(c); + netlog(f, Logesp, "esp: short block %I -> %I!%lud\n", vers.raddr, + vers.laddr, vers.spi); + freeb(bp); + return; + } + + auth = bp->wp - ecb->ahlen; + espspi = vers.version == V4? ((Esp4hdr*)bp->rp)->espspi: + ((Esp6hdr*)bp->rp)->espspi; + + /* compute secure hash and authenticate */ + if(!ecb->auth(ecb, espspi, auth - espspi, auth)) { + static int once = 1; + + espstat(esp, InAuthErrors, 1); + qunlock(c); + netlog(f, Logesp, "esp: bad auth %I -> %I!%lud\n", vers.raddr, + vers.laddr, vers.spi); + if (!once) { + unsigned len = BLEN(bp), l, b = 128; + + once = 1; + fmtinstall('[', encodefmt); + netlog(f, Logesp, "len %ud\n", len); + for (l = 0; l < len; l += b) { + netlog(f, Logesp, "%.*[\n", + (len - l) > b ? b : (len - l), + bp->rp + l); + } + } + freeb(bp); + return; + } + + payload = BLEN(bp) - vers.hdrlen - ecb->ahlen; + if(payload <= 0 || payload % 4 != 0 || payload % ecb->espblklen != 0) { + espstat(esp, InOtherRxErr, 1); + qunlock(c); + netlog(f, Logesp, "esp: bad length %I -> %I!%lud payload=%d BLEN=%lud\n", + vers.raddr, vers.laddr, vers.spi, payload, BLEN(bp)); + freeb(bp); + return; + } + if (0 && payload > 4000) { + static int once = 0; + + if (!once) { + unsigned len = BLEN(bp), l, b = 128; + + once = 1; + fmtinstall('[', encodefmt); + netlog(f, Logesp, "len %ud\n", len); + for (l = 0; l < len; l += b) { + netlog(f, Logesp, "%.*[\n", + (len - l) > b ? b : (len - l), + bp->rp + l); + } + } + } + /* decrypt payload */ + if(!ecb->cipher(ecb, bp->rp + vers.hdrlen, payload)) { + espstat(esp, InDecryptErrors, 1); + qunlock(c); + netlog(f, Logesp, "esp: cipher failed %I -> %I!%ld: %s\n", + vers.raddr, vers.laddr, vers.spi, up->errstr); + freeb(bp); + return; + } + + if(ckwindow(esp, ecb, vers.seq) == -1){ + espstat(esp, InReplayErrors, 1); + qunlock(c); + netlog(f, Logesp, + "esp: window fail %I -> %I!%lud: %lux %lux %.64llud\n", + vers.raddr, vers.laddr, vers.spi, vers.seq, + ecb->w0, ecb->w); + freeb(bp); + return; + } + + payload -= Esptaillen; + et = (Esptail*)(bp->rp + vers.hdrlen + payload); + payload -= et->pad + ecb->espivlen; + nexthdr = et->nexthdr; + if(payload <= 0) { + espstat(esp, InPadErrors, 1); + qunlock(c); + netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%lud\n", + vers.raddr, vers.laddr, vers.spi); + freeb(bp); + return; + } + + /* trim packet */ + bp->rp += vers.hdrlen + ecb->espivlen; /* toss original IP & ESP hdrs */ + bp->wp = bp->rp + payload; + if(ecb->header) { + /* assume Userhdrlen < Esp4hdrlen < Esp6hdrlen */ + bp->rp -= Userhdrlen; + uh = (Userhdr*)bp->rp; + memset(uh, 0, Userhdrlen); + uh->nexthdr = nexthdr; + } + + /* ingress filtering here? */ + + if(qfull(c->rq)){ + espstat(esp, InQueueFullErr, 1); + /* netlog(f, Logesp, "esp: qfull %I -> %I!%lud\n", vers.raddr, */ + /* vers.laddr, vers.spi); */ + freeblist(bp); + }else { +// print("esp: pass up: %lud\n", BLEN(bp)); + espstat(esp, InUserOctets, BLEN(bp)); + espstat(esp, InPackets, 1); + if(qpass(c->rq, bp) == -1) + espstat(esp, InDropQpass, 1); + } + + qunlock(c); +} + +char* +espctl(Conv *c, char **f, int n) +{ + Espcb *ecb = c->ptcl; + char *e = nil; + + if(strcmp(f[0], "esp") == 0) { + qsetlimit(c->rq, 3*1024*1024); // 2M had qfull drops + e = setalg(ecb, f, n, espalg); + } else if(strcmp(f[0], "ah") == 0) + e = setalg(ecb, f, n, ahalg); + else if(strcmp(f[0], "header") == 0) + ecb->header = 1; + else if(strcmp(f[0], "noheader") == 0) + ecb->header = 0; + else + e = "unknown control request"; + return e; +} + +/* called from icmp(v6) for unreachable hosts, time exceeded, etc. */ +void +espadvise(Proto *esp, Block *bp, char *msg) +{ + Conv *c; + Versdep vers; + + getverslens(pktipvers(esp->f, &bp), &vers); + getpktspiaddrs(bp->rp, &vers); + + qlock(esp); + c = convlookup(esp, vers.spi); + if(c != nil) { + qhangup(c->rq, msg); + qhangup(c->wq, msg); + } + qunlock(esp); + freeblist(bp); +} + +int +espstats(Proto *esp, char *buf, int len) +{ + char *p, *e; + int i; + uvlong *u; + Esppriv *v; + + v = esp->priv; + u = v->stats; + p = buf; + e = p+len; + for(i = 0; i < Nstats; i++) + p = seprint(p, e, "%s: %llud\n", statnames[i], u[i]); + return p - buf; +} + +static int +esplocal(Conv *c, char *buf, int len) +{ + Espcb *ecb = c->ptcl; + int n; + + qlock(c); + if(ecb->incoming) + n = snprint(buf, len, "%I!%lud\n", c->laddr, ecb->spi); + else + n = snprint(buf, len, "%I\n", c->laddr); + qunlock(c); + return n; +} + +static int +espremote(Conv *c, char *buf, int len) +{ + Espcb *ecb = c->ptcl; + int n; + + qlock(c); + if(ecb->incoming) + n = snprint(buf, len, "%I\n", c->raddr); + else + n = snprint(buf, len, "%I!%lud\n", c->raddr, ecb->spi); + qunlock(c); + return n; +} + +static Conv* +convlookup(Proto *esp, ulong spi) +{ + Conv *c, **p; + Espcb *ecb; + + for(p=esp->conv; *p; p++){ + c = *p; + ecb = c->ptcl; + if(ecb->incoming && ecb->spi == spi) + return c; + } + return nil; +} + +static char * +setalg(Espcb *ecb, char **f, int n, Algorithm *alg) +{ + char *p; + uchar *key; + int c, i, nbyte, nchar; + + if(n != 2 && n != 3) + return "bad format"; + for(;; alg++) + if(alg->name == nil) + return "unknown algorithm"; + else if(strcmp(f[1], alg->name) == 0) + break; + + nbyte = (alg->keylen + 7) >> 3; + key = nil; + if(nbyte > 0){ + if(n != 3) + return "bad format"; + p = f[2]; + nchar = strlen(p); + if(nchar != nbyte*2){ + print("nchar %d nbyte %d: [%s]\n", nchar, nbyte, p); + return "bad keylength"; + } + key = smalloc(nbyte); + for(i=0; i= '0' && c <= '9') + c -= '0'; + else if(c >= 'a' && c <= 'f') + c -= 'a' - 10; + else if(c >= 'A' && c <= 'F') + c -= 'A' - 10; + else{ + free(key); + return "bad character in key"; + } + key[i>>1] |= c<<4*(i&1); + } + /* fmtinstall('[', encodefmt); */ + /* netlog(esp->f, Logesp, "%s key is %.*[\n", */ + /* alg->name, nbyte, key); */ + } + alg->init(ecb, alg->name, key, alg->keylen); + free(key); + return nil; +} + + +/* + * null encryption + */ + +static int +nullcipher(Espcb*, uchar*, int) +{ + return 1; +} + +static void +nullespinit(Espcb *ecb, char *name, uchar*, unsigned) +{ + ecb->espalg = name; + ecb->espblklen = 1; + ecb->espivlen = 0; + ecb->cipher = nullcipher; +} + +static int +nullauth(Espcb*, uchar*, int, uchar*) +{ + return 1; +} + +static void +nullahinit(Espcb *ecb, char *name, uchar*, unsigned) +{ + ecb->ahalg = name; + ecb->ahblklen = 1; + ecb->ahlen = 0; + ecb->auth = nullauth; +} + + +/* + * sha1 + */ + +static void +seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen) +{ + int i; + uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[SHA1dlen]; + DigestState *digest; + + memset(ipad, 0x36, Hmacblksz); + memset(opad, 0x5c, Hmacblksz); + ipad[Hmacblksz] = opad[Hmacblksz] = 0; + for(i = 0; i < klen; i++){ + ipad[i] ^= key[i]; + opad[i] ^= key[i]; + } + digest = sha1(ipad, Hmacblksz, nil, nil); + sha1(t, tlen, innerhash, digest); + digest = sha1(opad, Hmacblksz, nil, nil); + sha1(innerhash, SHA1dlen, hash, digest); +} + +static int +shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth) +{ + int r; + uchar hash[SHA1dlen]; + + memset(hash, 0, SHA1dlen); + seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128)); + r = memcmp(auth, hash, ecb->ahlen) == 0; + memmove(auth, hash, ecb->ahlen); + return r; +} + +static void +shaahinit(Espcb *ecb, char *name, uchar *key, unsigned klen) +{ + if(klen != 128) + panic("shaahinit: bad keylen"); + klen /= BI2BY; + + ecb->ahalg = name; + ecb->ahblklen = 1; + ecb->ahlen = BITS2BYTES(96); + ecb->auth = shaauth; + ecb->ahstate = smalloc(klen); + memmove(ecb->ahstate, key, klen); +} + + +/* + * aes + */ + +/* ah_aes_xcbc_mac_96, rfc3566 */ +static int +aesahauth(Espcb *ecb, uchar *t, int tlen, uchar *auth) +{ + int r; + uchar hash[AESdlen]; + + memset(hash, 0, AESdlen); + ecb->ds = hmac_aes(t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(96), hash, + ecb->ds); + r = memcmp(auth, hash, ecb->ahlen) == 0; + memmove(auth, hash, ecb->ahlen); + return r; +} + +static void +aesahinit(Espcb *ecb, char *name, uchar *key, unsigned klen) +{ + if(klen != 128) + panic("aesahinit: keylen not 128"); + klen /= BI2BY; + + ecb->ahalg = name; + ecb->ahblklen = 1; + ecb->ahlen = BITS2BYTES(96); + ecb->auth = aesahauth; + ecb->ahstate = smalloc(klen); + memmove(ecb->ahstate, key, klen); +} + +static int +aescbccipher(Espcb *ecb, uchar *p, int n) /* 128-bit blocks */ +{ + uchar tmp[AESbsize], q[AESbsize]; + uchar *pp, *tp, *ip, *eip, *ep; + AESstate *ds = ecb->espstate; + + ep = p + n; + if(ecb->incoming) { + memmove(ds->ivec, p, AESbsize); + p += AESbsize; + while(p < ep){ + memmove(tmp, p, AESbsize); + aes_decrypt(ds->dkey, ds->rounds, p, q); + memmove(p, q, AESbsize); + tp = tmp; + ip = ds->ivec; + for(eip = ip + AESbsize; ip < eip; ){ + *p++ ^= *ip; + *ip++ = *tp++; + } + } + } else { + memmove(p, ds->ivec, AESbsize); + for(p += AESbsize; p < ep; p += AESbsize){ + pp = p; + ip = ds->ivec; + for(eip = ip + AESbsize; ip < eip; ) + *pp++ ^= *ip++; + aes_encrypt(ds->ekey, ds->rounds, p, q); + memmove(ds->ivec, q, AESbsize); + memmove(p, q, AESbsize); + } + } + return 1; +} + +static void +aescbcespinit(Espcb *ecb, char *name, uchar *k, unsigned n) +{ + uchar key[Aeskeysz], ivec[Aeskeysz]; + int i; + + n = BITS2BYTES(n); + if(n > Aeskeysz) + n = Aeskeysz; + memset(key, 0, sizeof(key)); + memmove(key, k, n); + for(i = 0; i < Aeskeysz; i++) + ivec[i] = nrand(256); + ecb->espalg = name; + ecb->espblklen = Aesblk; + ecb->espivlen = Aesblk; + ecb->cipher = aescbccipher; + ecb->espstate = smalloc(sizeof(AESstate)); + setupAESstate(ecb->espstate, key, n /* keybytes */, ivec); +} + +static int +aesctrcipher(Espcb *ecb, uchar *p, int n) /* 128-bit blocks */ +{ + uchar tmp[AESbsize], q[AESbsize]; + uchar *pp, *tp, *ip, *eip, *ep; + AESstate *ds = ecb->espstate; + + ep = p + n; + if(ecb->incoming) { + memmove(ds->ivec, p, AESbsize); + p += AESbsize; + while(p < ep){ + memmove(tmp, p, AESbsize); + aes_decrypt(ds->dkey, ds->rounds, p, q); + memmove(p, q, AESbsize); + tp = tmp; + ip = ds->ivec; + for(eip = ip + AESbsize; ip < eip; ){ + *p++ ^= *ip; + *ip++ = *tp++; + } + } + } else { + memmove(p, ds->ivec, AESbsize); + for(p += AESbsize; p < ep; p += AESbsize){ + pp = p; + ip = ds->ivec; + for(eip = ip + AESbsize; ip < eip; ) + *pp++ ^= *ip++; + aes_encrypt(ds->ekey, ds->rounds, p, q); + memmove(ds->ivec, q, AESbsize); + memmove(p, q, AESbsize); + } + } + return 1; +} + +static void +aesctrespinit(Espcb *ecb, char *name, uchar *k, unsigned n) +{ + uchar key[Aesblk], ivec[Aesblk]; + int i; + + n = BITS2BYTES(n); + if(n > Aeskeysz) + n = Aeskeysz; + memset(key, 0, sizeof(key)); + memmove(key, k, n); + for(i = 0; i < Aesblk; i++) + ivec[i] = nrand(256); + ecb->espalg = name; + ecb->espblklen = Aesblk; + ecb->espivlen = Aesblk; + ecb->cipher = aesctrcipher; + ecb->espstate = smalloc(sizeof(AESstate)); + setupAESstate(ecb->espstate, key, n /* keybytes */, ivec); +} + + +/* + * md5 + */ + +static void +seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen) +{ + int i; + uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[MD5dlen]; + DigestState *digest; + + memset(ipad, 0x36, Hmacblksz); + memset(opad, 0x5c, Hmacblksz); + ipad[Hmacblksz] = opad[Hmacblksz] = 0; + for(i = 0; i < klen; i++){ + ipad[i] ^= key[i]; + opad[i] ^= key[i]; + } + digest = md5(ipad, Hmacblksz, nil, nil); + md5(t, tlen, innerhash, digest); + digest = md5(opad, Hmacblksz, nil, nil); + md5(innerhash, MD5dlen, hash, digest); +} + +static int +md5auth(Espcb *ecb, uchar *t, int tlen, uchar *auth) +{ + uchar hash[MD5dlen]; + int r; + + memset(hash, 0, MD5dlen); + seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128)); + r = memcmp(auth, hash, ecb->ahlen) == 0; + memmove(auth, hash, ecb->ahlen); + return r; +} + +static void +md5ahinit(Espcb *ecb, char *name, uchar *key, unsigned klen) +{ + if(klen != 128) + panic("md5ahinit: bad keylen"); + klen = BITS2BYTES(klen); + ecb->ahalg = name; + ecb->ahblklen = 1; + ecb->ahlen = BITS2BYTES(96); + ecb->auth = md5auth; + ecb->ahstate = smalloc(klen); + memmove(ecb->ahstate, key, klen); +} + + +/* + * des, single and triple + */ + +static int +descipher(Espcb *ecb, uchar *p, int n) +{ + DESstate *ds = ecb->espstate; + + if(ecb->incoming) { + memmove(ds->ivec, p, Desblk); + desCBCdecrypt(p + Desblk, n - Desblk, ds); + } else { + memmove(p, ds->ivec, Desblk); + desCBCencrypt(p + Desblk, n - Desblk, ds); + } + return 1; +} + +static int +des3cipher(Espcb *ecb, uchar *p, int n) +{ + DES3state *ds = ecb->espstate; + + if(ecb->incoming) { + memmove(ds->ivec, p, Desblk); + des3CBCdecrypt(p + Desblk, n - Desblk, ds); + } else { + memmove(p, ds->ivec, Desblk); + des3CBCencrypt(p + Desblk, n - Desblk, ds); + } + return 1; +} + +static void +desespinit(Espcb *ecb, char *name, uchar *k, unsigned n) +{ + uchar key[Desblk], ivec[Desblk]; + int i; + + n = BITS2BYTES(n); + if(n > Desblk) + n = Desblk; + memset(key, 0, sizeof(key)); + memmove(key, k, n); + for(i = 0; i < Desblk; i++) + ivec[i] = nrand(256); + ecb->espalg = name; + ecb->espblklen = Desblk; + ecb->espivlen = Desblk; + + ecb->cipher = descipher; + ecb->espstate = smalloc(sizeof(DESstate)); + setupDESstate(ecb->espstate, key, ivec); +} + +static void +des3espinit(Espcb *ecb, char *name, uchar *k, unsigned n) +{ + uchar key[3][Desblk], ivec[Desblk]; + int i; + + n = BITS2BYTES(n); + if(n > Des3keysz) + n = Des3keysz; + memset(key, 0, sizeof(key)); + memmove(key, k, n); + for(i = 0; i < Desblk; i++) + ivec[i] = nrand(256); + ecb->espalg = name; + ecb->espblklen = Desblk; + ecb->espivlen = Desblk; + + ecb->cipher = des3cipher; + ecb->espstate = smalloc(sizeof(DES3state)); + setupDES3state(ecb->espstate, key, ivec); +} + + +/* + * interfacing to devip + */ +void +espinit(Fs *fs) +{ + Proto *esp; + + esp = smalloc(sizeof(Proto)); + esp->priv = smalloc(sizeof(Esppriv)); + esp->name = "esp"; + esp->connect = espconnect; + esp->announce = nil; + esp->ctl = espctl; + esp->state = espstate; + esp->create = espcreate; + esp->close = espclose; + esp->rcv = espiput; + esp->advise = espadvise; + esp->stats = espstats; + esp->local = esplocal; + esp->remote = espremote; + esp->ipproto = IP_ESPPROTO; + esp->nc = Nchans; + esp->ptclsize = sizeof(Espcb); + + Fsproto(fs, esp); +} diff -Nru /sys/src/9k/ip/ethermedium.c /sys/src/9k/ip/ethermedium.c --- /sys/src/9k/ip/ethermedium.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/ethermedium.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,770 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/netif.h" +#include "ip.h" +#include "ipv6.h" + +typedef struct Etherhdr Etherhdr; +struct Etherhdr +{ + uchar d[6]; + uchar s[6]; + uchar t[2]; +}; + +static uchar ipbroadcast[IPaddrlen] = { + 0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff, +}; + +static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + +static void etherread4(void *a); +static void etherread6(void *a); +static void etherbind(Ipifc *ifc, int argc, char **argv); +static void etherunbind(Ipifc *ifc); +static void etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip); +static void etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia); +static void etherremmulti(Ipifc *ifc, uchar *a, uchar *ia); +static Block* multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac); +static void sendarp(Ipifc *ifc, Arpent *a); +static void sendgarp(Ipifc *ifc, uchar*); +static int multicastea(uchar *ea, uchar *ip); +static void recvarpproc(void*); +static void resolveaddr6(Ipifc *ifc, Arpent *a); +static void etherpref2addr(uchar *pref, uchar *ea); + +Medium ethermedium = +{ +.name= "ether", +.hsize= 14, +.mintu= 60, +.maxtu= 1514, +.maclen= 6, +.bind= etherbind, +.unbind= etherunbind, +.bwrite= etherbwrite, +.addmulti= etheraddmulti, +.remmulti= etherremmulti, +.ares= arpenter, +.areg= sendgarp, +.pref2addr= etherpref2addr, +}; + +Medium gbemedium = +{ +.name= "gbe", +.hsize= 14, +.mintu= 60, +.maxtu= 9014, +.maclen= 6, +.bind= etherbind, +.unbind= etherunbind, +.bwrite= etherbwrite, +.addmulti= etheraddmulti, +.remmulti= etherremmulti, +.ares= arpenter, +.areg= sendgarp, +.pref2addr= etherpref2addr, +}; + +typedef struct Etherrock Etherrock; +struct Etherrock +{ + Fs *f; /* file system we belong to */ + Proc *arpp; /* arp process */ + Proc *read4p; /* reading process (v4)*/ + Proc *read6p; /* reading process (v6)*/ + Chan *mchan4; /* Data channel for v4 */ + Chan *achan; /* Arp channel */ + Chan *cchan4; /* Control channel for v4 */ + Chan *mchan6; /* Data channel for v6 */ + Chan *cchan6; /* Control channel for v6 */ +}; + +/* + * ethernet arp request + */ +enum +{ + ETARP = 0x0806, + ETIP4 = 0x0800, + ETIP6 = 0x86DD, + ARPREQUEST = 1, + ARPREPLY = 2, +}; + +typedef struct Etherarp Etherarp; +struct Etherarp +{ + uchar d[6]; + uchar s[6]; + uchar type[2]; + uchar hrd[2]; + uchar pro[2]; + uchar hln; + uchar pln; + uchar op[2]; + uchar sha[6]; + uchar spa[4]; + uchar tha[6]; + uchar tpa[4]; +}; + +static char *nbmsg = "nonblocking"; + +/* + * called to bind an IP ifc to an ethernet device + * called with ifc wlock'd + */ +static void +etherbind(Ipifc *ifc, int argc, char **argv) +{ + Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6, *schan; + char addr[Maxpath]; //char addr[2*KNAMELEN]; + char dir[Maxpath]; //char dir[2*KNAMELEN]; + char *buf; + int n; + char *ptr; + Etherrock *er; + + if(argc < 2) + error(Ebadarg); + + mchan4 = cchan4 = achan = mchan6 = cchan6 = nil; + buf = nil; + if(waserror()){ + if(mchan4 != nil) + cclose(mchan4); + if(cchan4 != nil) + cclose(cchan4); + if(achan != nil) + cclose(achan); + if(mchan6 != nil) + cclose(mchan6); + if(cchan6 != nil) + cclose(cchan6); + if(buf != nil) + free(buf); + nexterror(); + } + + /* + * open ipv4 conversation + * + * the dial will fail if the type is already open on + * this device. + */ + snprint(addr, sizeof(addr), "%s!0x800", argv[2]); /* ETIP4 */ + mchan4 = chandial(addr, nil, dir, &cchan4); + + /* + * make it non-blocking + */ + cchan4->dev->write(cchan4, nbmsg, strlen(nbmsg), 0); + + /* + * get mac address and speed + */ + snprint(addr, sizeof(addr), "%s/stats", argv[2]); + buf = smalloc(512); + schan = namec(addr, Aopen, OREAD, 0); + if(waserror()){ + cclose(schan); + nexterror(); + } + n = schan->dev->read(schan, buf, 511, 0); + cclose(schan); + poperror(); + buf[n] = 0; + + ptr = strstr(buf, "addr: "); + if(!ptr) + error(Eio); + ptr += 6; + parsemac(ifc->mac, ptr, 6); + + ptr = strstr(buf, "mbps: "); + if(ptr){ + ptr += 6; + ifc->mbps = atoi(ptr); + } else + ifc->mbps = 100; + + /* + * open arp conversation + */ + snprint(addr, sizeof(addr), "%s!0x806", argv[2]); /* ETARP */ + achan = chandial(addr, nil, nil, nil); + + /* + * open ipv6 conversation + * + * the dial will fail if the type is already open on + * this device. + */ + snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]); /* ETIP6 */ + mchan6 = chandial(addr, nil, dir, &cchan6); + + /* + * make it non-blocking + */ + cchan6->dev->write(cchan6, nbmsg, strlen(nbmsg), 0); + + er = smalloc(sizeof(*er)); + er->mchan4 = mchan4; + er->cchan4 = cchan4; + er->achan = achan; + er->mchan6 = mchan6; + er->cchan6 = cchan6; + er->f = ifc->conv->p->f; + ifc->arg = er; + + free(buf); + poperror(); + + kproc("etherread4", etherread4, ifc); + kproc("recvarpproc", recvarpproc, ifc); + kproc("etherread6", etherread6, ifc); +} + +/* + * called with ifc wlock'd + */ +static void +etherunbind(Ipifc *ifc) +{ + Etherrock *er = ifc->arg; + + if(er->read4p) + postnote(er->read4p, 1, "unbind", 0); + if(er->read6p) + postnote(er->read6p, 1, "unbind", 0); + if(er->arpp) + postnote(er->arpp, 1, "unbind", 0); + + /* wait for readers to die */ + while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0) + tsleep(&up->sleep, return0, 0, 300); + + if(er->mchan4 != nil) + cclose(er->mchan4); + if(er->achan != nil) + cclose(er->achan); + if(er->cchan4 != nil) + cclose(er->cchan4); + if(er->mchan6 != nil) + cclose(er->mchan6); + if(er->cchan6 != nil) + cclose(er->cchan6); + + free(er); +} + +/* + * called by ipoput with a single block to write with ifc rlock'd + */ +static void +etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip) +{ + Etherhdr *eh; + Arpent *a; + uchar mac[6]; + Etherrock *er = ifc->arg; + + /* get mac address of destination */ + a = arpget(er->f->arp, bp, version, ifc, ip, mac); + if(a){ + /* check for broadcast or multicast */ + bp = multicastarp(er->f, a, ifc->medium, mac); + if(bp==nil){ + switch(version){ + case V4: + sendarp(ifc, a); + break; + case V6: + resolveaddr6(ifc, a); + break; + default: + panic("etherbwrite: version %d", version); + } + return; + } + } + + /* make it a single block with space for the ether header */ + bp = padblock(bp, ifc->medium->hsize); + if(bp->next) + bp = concatblock(bp); + if(BLEN(bp) < ifc->mintu) + bp = adjustblock(bp, ifc->mintu); + eh = (Etherhdr*)bp->rp; + + /* copy in mac addresses and ether type */ + memmove(eh->s, ifc->mac, sizeof(eh->s)); + memmove(eh->d, mac, sizeof(eh->d)); + + switch(version){ + case V4: + eh->t[0] = 0x08; + eh->t[1] = 0x00; + er->mchan4->dev->bwrite(er->mchan4, bp, 0); + break; + case V6: + eh->t[0] = 0x86; + eh->t[1] = 0xDD; + er->mchan6->dev->bwrite(er->mchan6, bp, 0); + break; + default: + panic("etherbwrite2: version %d", version); + } + ifc->out++; +} + + +/* + * process to read from the ethernet + */ +static void +etherread4(void *a) +{ + Ipifc *ifc; + Block *bp; + Etherrock *er; + + ifc = a; + er = ifc->arg; + er->read4p = up; /* hide identity under a rock for unbind */ + if(waserror()){ + er->read4p = 0; + pexit("hangup", 1); + } + for(;;){ + bp = er->mchan4->dev->bread(er->mchan4, ifc->maxtu, 0); + if(!canrlock(ifc)){ + freeb(bp); + continue; + } + if(waserror()){ + runlock(ifc); + nexterror(); + } + ifc->in++; + bp->rp += ifc->medium->hsize; + if(ifc->lifc == nil) + freeb(bp); + else + ipiput4(er->f, ifc, bp); + runlock(ifc); + poperror(); + } +} + + +/* + * process to read from the ethernet, IPv6 + */ +static void +etherread6(void *a) +{ + Ipifc *ifc; + Block *bp; + Etherrock *er; + + ifc = a; + er = ifc->arg; + er->read6p = up; /* hide identity under a rock for unbind */ + if(waserror()){ + er->read6p = 0; + pexit("hangup", 1); + } + for(;;){ + bp = er->mchan6->dev->bread(er->mchan6, ifc->maxtu, 0); + if(!canrlock(ifc)){ + freeb(bp); + continue; + } + if(waserror()){ + runlock(ifc); + nexterror(); + } + ifc->in++; + bp->rp += ifc->medium->hsize; + if(ifc->lifc == nil) + freeb(bp); + else + ipiput6(er->f, ifc, bp); + runlock(ifc); + poperror(); + } +} + +static void +etheraddmulti(Ipifc *ifc, uchar *a, uchar *) +{ + uchar mac[6]; + char buf[64]; + Etherrock *er = ifc->arg; + int version; + + version = multicastea(mac, a); + sprint(buf, "addmulti %E", mac); + switch(version){ + case V4: + er->cchan4->dev->write(er->cchan4, buf, strlen(buf), 0); + break; + case V6: + er->cchan6->dev->write(er->cchan6, buf, strlen(buf), 0); + break; + default: + panic("etheraddmulti: version %d", version); + } +} + +static void +etherremmulti(Ipifc *ifc, uchar *a, uchar *) +{ + uchar mac[6]; + char buf[64]; + Etherrock *er = ifc->arg; + int version; + + version = multicastea(mac, a); + sprint(buf, "remmulti %E", mac); + switch(version){ + case V4: + er->cchan4->dev->write(er->cchan4, buf, strlen(buf), 0); + break; + case V6: + er->cchan6->dev->write(er->cchan6, buf, strlen(buf), 0); + break; + default: + panic("etherremmulti: version %d", version); + } +} + +/* + * send an ethernet arp + * (only v4, v6 uses the neighbor discovery, rfc1970) + */ +static void +sendarp(Ipifc *ifc, Arpent *a) +{ + int n; + Block *bp; + Etherarp *e; + Etherrock *er = ifc->arg; + + /* don't do anything if it's been less than a second since the last */ + if(NOW - a->ctime < 1000){ + arprelease(er->f->arp, a); + return; + } + + /* remove all but the last message */ + while((bp = a->hold) != nil){ + if(bp == a->last) + break; + a->hold = bp->list; + freeblist(bp); + } + + /* try to keep it around for a second more */ + a->ctime = NOW; + arprelease(er->f->arp, a); + + n = sizeof(Etherarp); + if(n < a->type->mintu) + n = a->type->mintu; + bp = allocb(n); + memset(bp->rp, 0, n); + e = (Etherarp*)bp->rp; + memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa)); + ipv4local(ifc, e->spa); + memmove(e->sha, ifc->mac, sizeof(e->sha)); + memset(e->d, 0xff, sizeof(e->d)); /* ethernet broadcast */ + memmove(e->s, ifc->mac, sizeof(e->s)); + + hnputs(e->type, ETARP); + hnputs(e->hrd, 1); + hnputs(e->pro, ETIP4); + e->hln = sizeof(e->sha); + e->pln = sizeof(e->spa); + hnputs(e->op, ARPREQUEST); + bp->wp += n; + + er->achan->dev->bwrite(er->achan, bp, 0); +} + +static void +resolveaddr6(Ipifc *ifc, Arpent *a) +{ + int sflag; + Block *bp; + Etherrock *er = ifc->arg; + uchar ipsrc[IPaddrlen]; + + /* don't do anything if it's been less than a second since the last */ + if(NOW - a->ctime < ReTransTimer){ + arprelease(er->f->arp, a); + return; + } + + /* remove all but the last message */ + while((bp = a->hold) != nil){ + if(bp == a->last) + break; + a->hold = bp->list; + freeblist(bp); + } + + /* try to keep it around for a second more */ + a->ctime = NOW; + a->rtime = NOW + ReTransTimer; + if(a->rxtsrem <= 0) { + arprelease(er->f->arp, a); + return; + } + + a->rxtsrem--; + arprelease(er->f->arp, a); + + if(sflag = ipv6anylocal(ifc, ipsrc)) + icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac); +} + +/* + * send a gratuitous arp to refresh arp caches + */ +static void +sendgarp(Ipifc *ifc, uchar *ip) +{ + int n; + Block *bp; + Etherarp *e; + Etherrock *er = ifc->arg; + + /* don't arp for our initial non address */ + if(ipcmp(ip, IPnoaddr) == 0) + return; + + n = sizeof(Etherarp); + if(n < ifc->medium->mintu) + n = ifc->medium->mintu; + bp = allocb(n); + memset(bp->rp, 0, n); + e = (Etherarp*)bp->rp; + memmove(e->tpa, ip+IPv4off, sizeof(e->tpa)); + memmove(e->spa, ip+IPv4off, sizeof(e->spa)); + memmove(e->sha, ifc->mac, sizeof(e->sha)); + memset(e->d, 0xff, sizeof(e->d)); /* ethernet broadcast */ + memmove(e->s, ifc->mac, sizeof(e->s)); + + hnputs(e->type, ETARP); + hnputs(e->hrd, 1); + hnputs(e->pro, ETIP4); + e->hln = sizeof(e->sha); + e->pln = sizeof(e->spa); + hnputs(e->op, ARPREQUEST); + bp->wp += n; + + er->achan->dev->bwrite(er->achan, bp, 0); +} + +static void +recvarp(Ipifc *ifc) +{ + int n; + Block *ebp, *rbp; + Etherarp *e, *r; + uchar ip[IPaddrlen]; + static uchar eprinted[4], nullspa[4]; + Etherrock *er = ifc->arg; + + ebp = er->achan->dev->bread(er->achan, ifc->maxtu, 0); + if(ebp == nil) + return; + + e = (Etherarp*)ebp->rp; + switch(nhgets(e->op)) { + default: + break; + + case ARPREPLY: + /* check for machine using my ip address */ + v4tov6(ip, e->spa); + if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){ + if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){ + print("arprep: 0x%E/0x%E also has ip addr %V\n", + e->s, e->sha, e->spa); + break; + } + } + + /* make sure we're not entering broadcast addresses */ + if(ipcmp(ip, ipbroadcast) == 0 || + !memcmp(e->sha, etherbroadcast, sizeof(e->sha))){ + print("arprep: 0x%E/0x%E cannot register broadcast address %I\n", + e->s, e->sha, e->spa); + break; + } + + arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0); + break; + + case ARPREQUEST: + /* don't answer arps till we know who we are */ + if(ifc->lifc == 0) + break; + + /* check for machine using my ip or ether address */ + v4tov6(ip, e->spa); + if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){ + if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){ + if (memcmp(eprinted, e->spa, sizeof(e->spa))){ + /* print only once */ + print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa); + memmove(eprinted, e->spa, sizeof(e->spa)); + } + } + } else { + if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){ + /* Ignore ARP probes from ourself. */ + if(memcmp(e->spa, nullspa, sizeof(e->spa)) != 0) + print("arpreq: %V also has ether addr %E\n", e->spa, e->sha); + break; + } + } + + /* refresh what we know about sender */ + arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1); + + /* answer only requests for our address or systems we're proxying for */ + v4tov6(ip, e->tpa); + if(!iplocalonifc(ifc, ip)) + if(!ipproxyifc(er->f, ifc, ip)) + break; + + n = sizeof(Etherarp); + if(n < ifc->mintu) + n = ifc->mintu; + rbp = allocb(n); + r = (Etherarp*)rbp->rp; + memset(r, 0, sizeof(Etherarp)); + hnputs(r->type, ETARP); + hnputs(r->hrd, 1); + hnputs(r->pro, ETIP4); + r->hln = sizeof(r->sha); + r->pln = sizeof(r->spa); + hnputs(r->op, ARPREPLY); + memmove(r->tha, e->sha, sizeof(r->tha)); + memmove(r->tpa, e->spa, sizeof(r->tpa)); + memmove(r->sha, ifc->mac, sizeof(r->sha)); + memmove(r->spa, e->tpa, sizeof(r->spa)); + memmove(r->d, e->sha, sizeof(r->d)); + memmove(r->s, ifc->mac, sizeof(r->s)); + rbp->wp += n; + + er->achan->dev->bwrite(er->achan, rbp, 0); + } + freeb(ebp); +} + +static void +recvarpproc(void *v) +{ + Ipifc *ifc = v; + Etherrock *er = ifc->arg; + + er->arpp = up; + if(waserror()){ + er->arpp = 0; + pexit("hangup", 1); + } + for(;;) + recvarp(ifc); +} + +static int +multicastea(uchar *ea, uchar *ip) +{ + int x; + + switch(x = ipismulticast(ip)){ + case V4: + ea[0] = 0x01; + ea[1] = 0x00; + ea[2] = 0x5e; + ea[3] = ip[13] & 0x7f; + ea[4] = ip[14]; + ea[5] = ip[15]; + break; + case V6: + ea[0] = 0x33; + ea[1] = 0x33; + ea[2] = ip[12]; + ea[3] = ip[13]; + ea[4] = ip[14]; + ea[5] = ip[15]; + break; + } + return x; +} + +/* + * fill in an arp entry for broadcast or multicast + * addresses. Return the first queued packet for the + * IP address. + */ +static Block* +multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac) +{ + /* is it broadcast? */ + switch(ipforme(f, a->ip)){ + case Runi: + return nil; + case Rbcast: + memset(mac, 0xff, 6); + return arpresolve(f->arp, a, medium, mac); + default: + break; + } + + /* if multicast, fill in mac */ + switch(multicastea(mac, a->ip)){ + case V4: + case V6: + return arpresolve(f->arp, a, medium, mac); + } + + /* let arp take care of it */ + return nil; +} + +void +ethermediumlink(void) +{ + addipmedium(ðermedium); + addipmedium(&gbemedium); +} + + +static void +etherpref2addr(uchar *pref, uchar *ea) +{ + pref[8] = ea[0] | 0x2; + pref[9] = ea[1]; + pref[10] = ea[2]; + pref[11] = 0xFF; + pref[12] = 0xFE; + pref[13] = ea[3]; + pref[14] = ea[4]; + pref[15] = ea[5]; +} diff -Nru /sys/src/9k/ip/gre.c /sys/src/9k/ip/gre.c --- /sys/src/9k/ip/gre.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/gre.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,947 @@ +/* + * Generic Routing Encapsulation over IPv4, rfc1702 + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +enum { + GRE_IPONLY = 12, /* size of ip header */ + GRE_IPPLUSGRE = 12, /* minimum size of GRE header */ + IP_GREPROTO = 47, + + GRErxms = 200, + GREtickms = 100, + GREmaxxmit = 10, + + K = 1024, + GREqlen = 256 * K, + + GRE_cksum = 0x8000, + GRE_routing = 0x4000, + GRE_key = 0x2000, + GRE_seq = 0x1000, + + Nring = 1 << 10, /* power of two, please */ + Ringmask = Nring - 1, + + GREctlraw = 0, + GREctlcooked, + GREctlretunnel, + GREctlreport, + GREctldlsuspend, + GREctlulsuspend, + GREctldlresume, + GREctlulresume, + GREctlforward, + GREctlulkey, + Ncmds, +}; + +typedef struct GREhdr GREhdr; +struct GREhdr{ + /* ip header */ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar len[2]; /* packet length (including headers) */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + uchar ttl; + uchar proto; /* Protocol */ + uchar cksum[2]; /* checksum */ + uchar src[4]; /* Ip source */ + uchar dst[4]; /* Ip destination */ + + /* gre header */ + uchar flags[2]; + uchar eproto[2]; /* encapsulation protocol */ +}; + +typedef struct GREpriv GREpriv; +struct GREpriv{ + /* non-MIB stats */ + uvlong lenerr; /* short packet */ +}; + +typedef struct Bring Bring; +struct Bring{ + Block *ring[Nring]; + long produced; + long consumed; +}; + +typedef struct GREconv GREconv; +struct GREconv{ + int raw; + + /* Retunnelling information. v4 only */ + uchar north[4]; /* HA */ + uchar south[4]; /* Base station */ + uchar hoa[4]; /* Home address */ + uchar coa[4]; /* Careof address */ + ulong seq; /* Current sequence # */ + int dlsusp; /* Downlink suspended? */ + int ulsusp; /* Uplink suspended? */ + ulong ulkey; /* GRE key */ + + QLock lock; /* Lock for rings */ + Bring dlpending; /* Ring of pending packets */ + Bring dlbuffered; /* Received while suspended */ + Bring ulbuffered; /* Received while suspended */ +}; + +typedef struct Metablock Metablock; +struct Metablock{ + uchar *rp; + ulong seq; +}; + +static char *grectlcooked(Conv *, int, char **); +static char *grectldlresume(Conv *, int, char **); +static char *grectldlsuspend(Conv *, int, char **); +static char *grectlforward(Conv *, int, char **); +static char *grectlraw(Conv *, int, char **); +static char *grectlreport(Conv *, int, char **); +static char *grectlretunnel(Conv *, int, char **); +static char *grectlulkey(Conv *, int, char **); +static char *grectlulresume(Conv *, int, char **); +static char *grectlulsuspend(Conv *, int, char **); + +static struct{ + char *cmd; + int argc; + char *(*f)(Conv *, int, char **); +} grectls[Ncmds] = { +[GREctlraw] = { "raw", 1, grectlraw, }, +[GREctlcooked] = { "cooked", 1, grectlcooked, }, +[GREctlretunnel]= { "retunnel", 5, grectlretunnel, }, +[GREctlreport] = { "report", 2, grectlreport, }, +[GREctldlsuspend]= { "dlsuspend", 1, grectldlsuspend,}, +[GREctlulsuspend]= { "ulsuspend", 1, grectlulsuspend,}, +[GREctldlresume]= { "dlresume", 1, grectldlresume, }, +[GREctlulresume]= { "ulresume", 1, grectlulresume, }, +[GREctlforward] = { "forward", 2, grectlforward, }, +[GREctlulkey] = { "ulkey", 2, grectlulkey, }, +}; + +static uchar nulladdr[4]; +static char *sessend = "session end"; + +static void grekick(void *x, Block *bp); +static char *gresetup(Conv *, char *, char *, char *); + +uvlong grepdin, grepdout, grebdin, grebdout; +uvlong grepuin, grepuout, grebuin, grebuout; + +static Block * +getring(Bring *r) +{ + Block *bp; + + if(r->consumed == r->produced) + return nil; + + bp = r->ring[r->consumed & Ringmask]; + r->ring[r->consumed & Ringmask] = nil; + r->consumed++; + return bp; +} + +static void +addring(Bring *r, Block *bp) +{ + Block *tbp; + + if(r->produced - r->consumed > Ringmask){ + /* Full! */ + tbp = r->ring[r->produced & Ringmask]; + assert(tbp); + freeb(tbp); + r->consumed++; + } + r->ring[r->produced & Ringmask] = bp; + r->produced++; +} + +static char * +greconnect(Conv *c, char **argv, int argc) +{ + Proto *p; + char *err; + Conv *tc, **cp, **ecp; + + err = Fsstdconnect(c, argv, argc); + if(err != nil) + return err; + + /* make sure noone's already connected to this other sys */ + p = c->p; + qlock(p); + ecp = &p->conv[p->nc]; + for(cp = p->conv; cp < ecp; cp++){ + tc = *cp; + if(tc == nil) + break; + if(tc == c) + continue; + if(tc->rport == c->rport && ipcmp(tc->raddr, c->raddr) == 0){ + err = "already connected to that addr/proto"; + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + break; + } + } + qunlock(p); + + if(err != nil) + return err; + Fsconnected(c, nil); + + return nil; +} + +static void +grecreate(Conv *c) +{ + c->rq = qopen(GREqlen, Qmsg, 0, c); + c->wq = qbypass(grekick, c); +} + +static int +grestate(Conv *c, char *state, int n) +{ + GREconv *grec; + char *ep, *p; + + grec = c->ptcl; + p = state; + ep = p + n; + p = seprint(p, ep, "%s%s%s%shoa %V north %V south %V seq %ulx " + "pending %uld %uld buffered dl %uld %uld ul %uld %uld ulkey %.8ulx\n", + c->inuse? "Open ": "Closed ", + grec->raw? "raw ": "", + grec->dlsusp? "DL suspended ": "", + grec->ulsusp? "UL suspended ": "", + grec->hoa, grec->north, grec->south, grec->seq, + grec->dlpending.consumed, grec->dlpending.produced, + grec->dlbuffered.consumed, grec->dlbuffered.produced, + grec->ulbuffered.consumed, grec->ulbuffered.produced, + grec->ulkey); + return p - state; +} + +static char* +greannounce(Conv*, char**, int) +{ + return "gre does not support announce"; +} + +static void +greclose(Conv *c) +{ + GREconv *grec; + Block *bp; + + grec = c->ptcl; + + /* Make sure we don't forward any more packets */ + memset(grec->hoa, 0, sizeof grec->hoa); + memset(grec->north, 0, sizeof grec->north); + memset(grec->south, 0, sizeof grec->south); + + qlock(&grec->lock); + while((bp = getring(&grec->dlpending)) != nil) + freeb(bp); + + while((bp = getring(&grec->dlbuffered)) != nil) + freeb(bp); + + while((bp = getring(&grec->ulbuffered)) != nil) + freeb(bp); + + grec->dlpending.produced = grec->dlpending.consumed = 0; + grec->dlbuffered.produced = grec->dlbuffered.consumed = 0; + grec->ulbuffered.produced = grec->ulbuffered.consumed = 0; + qunlock(&grec->lock); + + grec->raw = 0; + grec->seq = 0; + grec->dlsusp = grec->ulsusp = 1; + + qhangup(c->rq, sessend); + qhangup(c->wq, sessend); + qhangup(c->eq, sessend); + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + c->lport = c->rport = 0; +} + +static void +grekick(void *x, Block *bp) +{ + Conv *c; + GREconv *grec; + GREhdr *gre; + uchar laddr[IPaddrlen], raddr[IPaddrlen]; + + if(bp == nil) + return; + + c = x; + grec = c->ptcl; + + /* Make space to fit ip header (gre header already there) */ + bp = padblock(bp, GRE_IPONLY); + if(bp == nil) + return; + + /* make sure the message has a GRE header */ + bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE); + if(bp == nil) + return; + + gre = (GREhdr *)bp->rp; + gre->vihl = IP_VER4; + + if(grec->raw == 0){ + v4tov6(raddr, gre->dst); + if(ipcmp(raddr, v4prefix) == 0) + memmove(gre->dst, c->raddr + IPv4off, IPv4addrlen); + v4tov6(laddr, gre->src); + if(ipcmp(laddr, v4prefix) == 0){ + if(ipcmp(c->laddr, IPnoaddr) == 0) + /* pick interface closest to dest */ + findlocalip(c->p->f, c->laddr, raddr); + memmove(gre->src, c->laddr + IPv4off, sizeof gre->src); + } + hnputs(gre->eproto, c->rport); + } + + gre->proto = IP_GREPROTO; + gre->frag[0] = gre->frag[1] = 0; + + grepdout++; + grebdout += BLEN(bp); + ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil); +} + +static void +gredownlink(Conv *c, Block *bp) +{ + Metablock *m; + GREconv *grec; + GREhdr *gre; + int hdrlen, suspended, extra; + ushort flags; + ulong seq; + + gre = (GREhdr *)bp->rp; + if(gre->ttl == 1){ + freeb(bp); + return; + } + + /* + * We've received a packet with a GRE header and we need to + * re-adjust the packet header to strip all unwanted parts + * but leave room for only a sequence number. + */ + grec = c->ptcl; + flags = nhgets(gre->flags); + hdrlen = 0; + if(flags & GRE_cksum) + hdrlen += 2; + if(flags & GRE_routing){ + print("%V routing info present. Discarding packet", gre->src); + freeb(bp); + return; + } + if(flags & (GRE_cksum|GRE_routing)) + hdrlen += 2; /* Offset field */ + if(flags & GRE_key) + hdrlen += 4; + if(flags & GRE_seq) + hdrlen += 4; + + /* + * The outgoing packet only has the sequence number set. Make room + * for the sequence number. + */ + if(hdrlen != sizeof(ulong)){ + extra = hdrlen - sizeof(ulong); + if(extra < 0 && bp->rp - bp->base < -extra){ + print("gredownlink: cannot add sequence number\n"); + freeb(bp); + return; + } + memmove(bp->rp + extra, bp->rp, sizeof(GREhdr)); + bp->rp += extra; + assert(BLEN(bp) >= sizeof(GREhdr) + sizeof(ulong)); + gre = (GREhdr *)bp->rp; + } + seq = grec->seq++; + hnputs(gre->flags, GRE_seq); + hnputl(bp->rp + sizeof(GREhdr), seq); + + /* + * Keep rp and seq at the base. ipoput4 consumes rp for + * refragmentation. + */ + assert(bp->rp - bp->base >= sizeof(Metablock)); + m = (Metablock *)bp->base; + m->rp = bp->rp; + m->seq = seq; + + /* + * Here we make a decision what we're doing with the packet. We're + * doing this w/o holding a lock which means that later on in the + * process we may discover we've done the wrong thing. I don't want + * to call ipoput with the lock held. + */ +restart: + suspended = grec->dlsusp; + if(suspended){ + if(!canqlock(&grec->lock)){ + /* + * just give up. too bad, we lose a packet. this + * is just too hard and my brain already hurts. + */ + freeb(bp); + return; + } + + if(!grec->dlsusp){ + /* + * suspend race. We though we were suspended, but + * we really weren't. + */ + qunlock(&grec->lock); + goto restart; + } + + /* Undo the incorrect ref count addition */ + addring(&grec->dlbuffered, bp); + qunlock(&grec->lock); + return; + } + + /* + * When we get here, we're not suspended. Proceed to send the + * packet. + */ + memmove(gre->src, grec->coa, sizeof gre->dst); + memmove(gre->dst, grec->south, sizeof gre->dst); + + /* + * Make sure the packet does not go away. + */ + + ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil); + grepdout++; + grebdout += BLEN(bp); + + /* + * Now make sure we didn't do the wrong thing. + */ + if(!canqlock(&grec->lock)){ + freeb(bp); /* The packet just goes away */ + return; + } + + /* We did the right thing */ + addring(&grec->dlpending, bp); + qunlock(&grec->lock); +} + +static void +greuplink(Conv *c, Block *bp) +{ + GREconv *grec; + GREhdr *gre; + ushort flags; + + gre = (GREhdr *)bp->rp; + if(gre->ttl == 1) + return; + + grec = c->ptcl; + memmove(gre->src, grec->coa, sizeof gre->src); + memmove(gre->dst, grec->north, sizeof gre->dst); + + /* + * Add a key, if needed. + */ + if(grec->ulkey){ + flags = nhgets(gre->flags); + if(flags & (GRE_cksum|GRE_routing)){ + print("%V routing info present. Discarding packet\n", + gre->src); + freeb(bp); + return; + } + + if((flags & GRE_key) == 0){ + /* Make room for the key */ + if(bp->rp - bp->base < sizeof(ulong)){ + print("%V can't add key\n", gre->src); + freeb(bp); + return; + } + + bp->rp -= 4; + memmove(bp->rp, bp->rp + 4, sizeof(GREhdr)); + + gre = (GREhdr *)bp->rp; + hnputs(gre->flags, flags | GRE_key); + } + + /* Add the key */ + hnputl(bp->rp + sizeof(GREhdr), grec->ulkey); + } + + if(!canqlock(&grec->lock)){ + freeb(bp); + return; + } + + if(grec->ulsusp) + addring(&grec->ulbuffered, bp); + else{ + ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil); + grepuout++; + grebuout += BLEN(bp); + } + qunlock(&grec->lock); +} + +static void +greiput(Proto *proto, Ipifc *, Block *bp) +{ + int len, hdrlen; + ushort eproto, flags; + uchar raddr[IPaddrlen]; + Conv *c, **p; + GREconv *grec; + GREhdr *gre; + GREpriv *gpriv; + Ip4hdr *ip; + + /* + * We don't want to deal with block lists. Ever. The problem is + * that when the block is forwarded, devether.c puts the block into + * a queue that also uses ->next. Just do not use ->next here! + */ + if(bp->next){ + len = blocklen(bp); + bp = pullupblock(bp, len); + assert(BLEN(bp) == len && bp->next == nil); + } + + gre = (GREhdr *)bp->rp; + if(BLEN(bp) < sizeof(GREhdr) || gre->proto != IP_GREPROTO){ + freeb(bp); + return; + } + + v4tov6(raddr, gre->src); + eproto = nhgets(gre->eproto); + flags = nhgets(gre->flags); + hdrlen = sizeof(GREhdr); + + if(flags & GRE_cksum) + hdrlen += 2; + if(flags & GRE_routing){ + print("%I routing info present. Discarding packet\n", raddr); + freeb(bp); + return; + } + if(flags & (GRE_cksum|GRE_routing)) + hdrlen += 2; /* Offset field */ + if(flags & GRE_key) + hdrlen += 4; + if(flags & GRE_seq) + hdrlen += 4; + + if(BLEN(bp) - hdrlen < sizeof(Ip4hdr)){ + print("greretunnel: packet too short (s=%V d=%V)\n", + gre->src, gre->dst); + freeb(bp); + return; + } + ip = (Ip4hdr *)(bp->rp + hdrlen); + + qlock(proto); + /* + * Look for a conversation structure for this port and address, or + * match the retunnel part, or match on the raw flag. + */ + for(p = proto->conv; *p; p++) { + c = *p; + + if(c->inuse == 0) + continue; + + /* + * Do not stop this session - blocking here + * implies that etherread is blocked. + */ + grec = c->ptcl; + if(memcmp(ip->dst, grec->hoa, sizeof ip->dst) == 0){ + grepdin++; + grebdin += BLEN(bp); + gredownlink(c, bp); + qunlock(proto); + return; + } + + if(memcmp(ip->src, grec->hoa, sizeof ip->src) == 0){ + grepuin++; + grebuin += BLEN(bp); + greuplink(c, bp); + qunlock(proto); + return; + } + } + + /* + * when we get here, none of the forwarding tunnels matched. now + * try to match on raw and conversational sessions. + */ + for(c = nil, p = proto->conv; *p; p++) { + c = *p; + + if(c->inuse == 0) + continue; + + /* + * Do not stop this session - blocking here + * implies that etherread is blocked. + */ + grec = c->ptcl; + if(c->rport == eproto && + (grec->raw || ipcmp(c->raddr, raddr) == 0)) + break; + } + + qunlock(proto); + + if(*p == nil){ + freeb(bp); + return; + } + + /* + * Trim the packet down to data size + */ + len = nhgets(gre->len) - GRE_IPONLY; + if(len < GRE_IPPLUSGRE){ + freeb(bp); + return; + } + + bp = trimblock(bp, GRE_IPONLY, len); + if(bp == nil){ + gpriv = proto->priv; + gpriv->lenerr++; + return; + } + + /* + * Can't delimit packet so pull it all into one block. + */ + if(qlen(c->rq) > GREqlen) + freeb(bp); + else{ + bp = concatblock(bp); + if(bp == 0) + panic("greiput"); + qpass(c->rq, bp); + } +} + +int +grestats(Proto *gre, char *buf, int len) +{ + GREpriv *gpriv; + + gpriv = gre->priv; + return snprint(buf, len, + "gre: %llud %llud %llud %llud %llud %llud %llud %llud, lenerrs %llud\n", + grepdin, grepdout, grepuin, grepuout, + grebdin, grebdout, grebuin, grebuout, gpriv->lenerr); +} + +static char * +grectlraw(Conv *c, int, char **) +{ + GREconv *grec; + + grec = c->ptcl; + grec->raw = 1; + return nil; +} + +static char * +grectlcooked(Conv *c, int, char **) +{ + GREconv *grec; + + grec = c->ptcl; + grec->raw = 0; + return nil; +} + +static char * +grectlretunnel(Conv *c, int, char **argv) +{ + GREconv *grec; + uchar ipaddr[4]; + + grec = c->ptcl; + if(memcmp(grec->hoa, nulladdr, sizeof grec->hoa)) + return "tunnel already set up"; + + v4parseip(ipaddr, argv[1]); + if(memcmp(ipaddr, nulladdr, sizeof ipaddr) == 0) + return "bad hoa"; + memmove(grec->hoa, ipaddr, sizeof grec->hoa); + v4parseip(ipaddr, argv[2]); + memmove(grec->north, ipaddr, sizeof grec->north); + v4parseip(ipaddr, argv[3]); + memmove(grec->south, ipaddr, sizeof grec->south); + v4parseip(ipaddr, argv[4]); + memmove(grec->coa, ipaddr, sizeof grec->coa); + grec->ulsusp = 1; + grec->dlsusp = 0; + + return nil; +} + +static char * +grectlreport(Conv *c, int, char **argv) +{ + ulong seq; + Block *bp; + Bring *r; + GREconv *grec; + Metablock *m; + + grec = c->ptcl; + seq = strtoul(argv[1], nil, 0); + + qlock(&grec->lock); + r = &grec->dlpending; + while(r->produced - r->consumed > 0){ + bp = r->ring[r->consumed & Ringmask]; + + assert(bp && bp->rp - bp->base >= sizeof(Metablock)); + m = (Metablock *)bp->base; + if((long)(seq - m->seq) <= 0) + break; + + r->ring[r->consumed & Ringmask] = nil; + r->consumed++; + + freeb(bp); + } + qunlock(&grec->lock); + return nil; +} + +static char * +grectldlsuspend(Conv *c, int, char **) +{ + GREconv *grec; + + grec = c->ptcl; + if(grec->dlsusp) + return "already suspended"; + + grec->dlsusp = 1; + return nil; +} + +static char * +grectlulsuspend(Conv *c, int, char **) +{ + GREconv *grec; + + grec = c->ptcl; + if(grec->ulsusp) + return "already suspended"; + + grec->ulsusp = 1; + return nil; +} + +static char * +grectldlresume(Conv *c, int, char **) +{ + GREconv *grec; + GREhdr *gre; + Block *bp; + + grec = c->ptcl; + + qlock(&grec->lock); + if(!grec->dlsusp){ + qunlock(&grec->lock); + return "not suspended"; + } + + while((bp = getring(&grec->dlbuffered)) != nil){ + gre = (GREhdr *)bp->rp; + qunlock(&grec->lock); + + ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil); + + qlock(&grec->lock); + addring(&grec->dlpending, bp); + } + grec->dlsusp = 0; + qunlock(&grec->lock); + return nil; +} + +static char * +grectlulresume(Conv *c, int, char **) +{ + GREconv *grec; + GREhdr *gre; + Block *bp; + + grec = c->ptcl; + + qlock(&grec->lock); + while((bp = getring(&grec->ulbuffered)) != nil){ + gre = (GREhdr *)bp->rp; + + qunlock(&grec->lock); + ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil); + qlock(&grec->lock); + } + grec->ulsusp = 0; + qunlock(&grec->lock); + return nil; +} + +static char * +grectlforward(Conv *c, int, char **argv) +{ + Block *bp; + GREconv *grec; + GREhdr *gre; + Metablock *m; + + grec = c->ptcl; + + v4parseip(grec->south, argv[1]); + memmove(grec->north, grec->south, sizeof grec->north); + + qlock(&grec->lock); + if(!grec->dlsusp){ + qunlock(&grec->lock); + return "not suspended"; + } + grec->dlsusp = 0; + grec->ulsusp = 0; + + while((bp = getring(&grec->dlpending)) != nil){ + + assert(bp->rp - bp->base >= sizeof(Metablock)); + m = (Metablock *)bp->base; + assert(m->rp >= bp->base && m->rp < bp->lim); + + bp->rp = m->rp; + + gre = (GREhdr *)bp->rp; + memmove(gre->src, grec->coa, sizeof gre->dst); + memmove(gre->dst, grec->south, sizeof gre->dst); + + qunlock(&grec->lock); + ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil); + qlock(&grec->lock); + } + + while((bp = getring(&grec->dlbuffered)) != nil){ + gre = (GREhdr *)bp->rp; + memmove(gre->src, grec->coa, sizeof gre->dst); + memmove(gre->dst, grec->south, sizeof gre->dst); + + qunlock(&grec->lock); + ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil); + qlock(&grec->lock); + } + + while((bp = getring(&grec->ulbuffered)) != nil){ + gre = (GREhdr *)bp->rp; + + memmove(gre->src, grec->coa, sizeof gre->dst); + memmove(gre->dst, grec->south, sizeof gre->dst); + + qunlock(&grec->lock); + ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil); + qlock(&grec->lock); + } + qunlock(&grec->lock); + return nil; +} + +static char * +grectlulkey(Conv *c, int, char **argv) +{ + GREconv *grec; + + grec = c->ptcl; + grec->ulkey = strtoul(argv[1], nil, 0); + return nil; +} + +char * +grectl(Conv *c, char **f, int n) +{ + int i; + + if(n < 1) + return "too few arguments"; + + for(i = 0; i < Ncmds; i++) + if(strcmp(f[0], grectls[i].cmd) == 0) + break; + + if(i == Ncmds) + return "no such command"; + if(grectls[i].argc != 0 && grectls[i].argc != n) + return "incorrect number of arguments"; + + return grectls[i].f(c, n, f); +} + +void +greinit(Fs *fs) +{ + Proto *gre; + + gre = smalloc(sizeof(Proto)); + gre->priv = smalloc(sizeof(GREpriv)); + gre->name = "gre"; + gre->connect = greconnect; + gre->announce = greannounce; + gre->state = grestate; + gre->create = grecreate; + gre->close = greclose; + gre->rcv = greiput; + gre->ctl = grectl; + gre->advise = nil; + gre->stats = grestats; + gre->ipproto = IP_GREPROTO; + gre->nc = 64; + gre->ptclsize = sizeof(GREconv); + + Fsproto(fs, gre); +} diff -Nru /sys/src/9k/ip/icmp.c /sys/src/9k/ip/icmp.c --- /sys/src/9k/ip/icmp.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/icmp.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,494 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +typedef struct Icmp { + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + uchar ttl; /* Time to live */ + uchar proto; /* Protocol */ + uchar ipcksum[2]; /* Header checksum */ + uchar src[4]; /* Ip source */ + uchar dst[4]; /* Ip destination */ + uchar type; + uchar code; + uchar cksum[2]; + uchar icmpid[2]; + uchar seq[2]; + uchar data[1]; +} Icmp; + +enum { /* Packet Types */ + EchoReply = 0, + Unreachable = 3, + SrcQuench = 4, + Redirect = 5, + EchoRequest = 8, + TimeExceed = 11, + InParmProblem = 12, + Timestamp = 13, + TimestampReply = 14, + InfoRequest = 15, + InfoReply = 16, + AddrMaskRequest = 17, + AddrMaskReply = 18, + + Maxtype = 18, +}; + +enum +{ + MinAdvise = 24, /* minimum needed for us to advise another protocol */ +}; + +char *icmpnames[Maxtype+1] = +{ +[EchoReply] "EchoReply", +[Unreachable] "Unreachable", +[SrcQuench] "SrcQuench", +[Redirect] "Redirect", +[EchoRequest] "EchoRequest", +[TimeExceed] "TimeExceed", +[InParmProblem] "InParmProblem", +[Timestamp] "Timestamp", +[TimestampReply] "TimestampReply", +[InfoRequest] "InfoRequest", +[InfoReply] "InfoReply", +[AddrMaskRequest] "AddrMaskRequest", +[AddrMaskReply] "AddrMaskReply", +}; + +enum { + IP_ICMPPROTO = 1, + ICMP_IPSIZE = 20, + ICMP_HDRSIZE = 8, +}; + +enum +{ + InMsgs, + InErrors, + OutMsgs, + CsumErrs, + LenErrs, + HlenErrs, + + Nstats, +}; + +static char *statnames[Nstats] = +{ +[InMsgs] "InMsgs", +[InErrors] "InErrors", +[OutMsgs] "OutMsgs", +[CsumErrs] "CsumErrs", +[LenErrs] "LenErrs", +[HlenErrs] "HlenErrs", +}; + +typedef struct Icmppriv Icmppriv; +struct Icmppriv +{ + ulong stats[Nstats]; + + /* message counts */ + ulong in[Maxtype+1]; + ulong out[Maxtype+1]; +}; + +static void icmpkick(void *x, Block*); + +static void +icmpcreate(Conv *c) +{ + c->rq = qopen(64*1024, Qmsg, 0, c); + c->wq = qbypass(icmpkick, c); +} + +extern char* +icmpconnect(Conv *c, char **argv, int argc) +{ + char *e; + + e = Fsstdconnect(c, argv, argc); + if(e != nil) + return e; + Fsconnected(c, e); + + return nil; +} + +extern int +icmpstate(Conv *c, char *state, int n) +{ + USED(c); + return snprint(state, n, "%s qin %d qout %d\n", + "Datagram", + c->rq ? qlen(c->rq) : 0, + c->wq ? qlen(c->wq) : 0 + ); +} + +extern char* +icmpannounce(Conv *c, char **argv, int argc) +{ + char *e; + + e = Fsstdannounce(c, argv, argc); + if(e != nil) + return e; + Fsconnected(c, nil); + + return nil; +} + +extern void +icmpclose(Conv *c) +{ + qclose(c->rq); + qclose(c->wq); + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + c->lport = 0; +} + +static void +icmpkick(void *x, Block *bp) +{ + Conv *c = x; + Icmp *p; + Icmppriv *ipriv; + + if(bp == nil) + return; + + if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){ + freeblist(bp); + return; + } + p = (Icmp *)(bp->rp); + p->vihl = IP_VER4; + ipriv = c->p->priv; + if(p->type <= Maxtype) + ipriv->out[p->type]++; + + v6tov4(p->dst, c->raddr); + v6tov4(p->src, c->laddr); + p->proto = IP_ICMPPROTO; + hnputs(p->icmpid, c->lport); + memset(p->cksum, 0, sizeof(p->cksum)); + hnputs(p->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE)); + ipriv->stats[OutMsgs]++; + ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil); +} + +extern void +icmpttlexceeded(Fs *f, uchar *ia, Block *bp) +{ + Block *nbp; + Icmp *p, *np; + + p = (Icmp *)bp->rp; + + netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src); + nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8); + nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8; + np = (Icmp *)nbp->rp; + np->vihl = IP_VER4; + memmove(np->dst, p->src, sizeof(np->dst)); + v6tov4(np->src, ia); + memmove(np->data, bp->rp, ICMP_IPSIZE + 8); + np->type = TimeExceed; + np->code = 0; + np->proto = IP_ICMPPROTO; + hnputs(np->icmpid, 0); + hnputs(np->seq, 0); + memset(np->cksum, 0, sizeof(np->cksum)); + hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE)); + ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil); + +} + +static void +icmpunreachable(Fs *f, Block *bp, int code, int seq) +{ + Block *nbp; + Icmp *p, *np; + int i; + uchar addr[IPaddrlen]; + + p = (Icmp *)bp->rp; + + /* only do this for unicast sources and destinations */ + v4tov6(addr, p->dst); + i = ipforme(f, addr); + if((i&Runi) == 0) + return; + v4tov6(addr, p->src); + i = ipforme(f, addr); + if(i != 0 && (i&Runi) == 0) + return; + + netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src); + nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8); + nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8; + np = (Icmp *)nbp->rp; + np->vihl = IP_VER4; + memmove(np->dst, p->src, sizeof(np->dst)); + memmove(np->src, p->dst, sizeof(np->src)); + memmove(np->data, bp->rp, ICMP_IPSIZE + 8); + np->type = Unreachable; + np->code = code; + np->proto = IP_ICMPPROTO; + hnputs(np->icmpid, 0); + hnputs(np->seq, seq); + memset(np->cksum, 0, sizeof(np->cksum)); + hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE)); + ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil); +} + +extern void +icmpnoconv(Fs *f, Block *bp) +{ + icmpunreachable(f, bp, 3, 0); +} + +extern void +icmpcantfrag(Fs *f, Block *bp, int mtu) +{ + icmpunreachable(f, bp, 4, mtu); +} + +static void +goticmpkt(Proto *icmp, Block *bp) +{ + Conv **c, *s; + Icmp *p; + uchar dst[IPaddrlen]; + ushort recid; + + p = (Icmp *) bp->rp; + v4tov6(dst, p->src); + recid = nhgets(p->icmpid); + + for(c = icmp->conv; *c; c++) { + s = *c; + if(s->lport == recid) + if(ipcmp(s->raddr, dst) == 0){ + bp = concatblock(bp); + if(bp != nil) + qpass(s->rq, bp); + return; + } + } + freeblist(bp); +} + +static Block * +mkechoreply(Block *bp) +{ + Icmp *q; + uchar ip[4]; + + q = (Icmp *)bp->rp; + q->vihl = IP_VER4; + memmove(ip, q->src, sizeof(q->dst)); + memmove(q->src, q->dst, sizeof(q->src)); + memmove(q->dst, ip, sizeof(q->dst)); + q->type = EchoReply; + memset(q->cksum, 0, sizeof(q->cksum)); + hnputs(q->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE)); + + return bp; +} + +static char *unreachcode[] = +{ +[0] "net unreachable", +[1] "host unreachable", +[2] "protocol unreachable", +[3] "port unreachable", +[4] "fragmentation needed and DF set", +[5] "source route failed", +}; + +static void +icmpiput(Proto *icmp, Ipifc*, Block *bp) +{ + int n, iplen; + Icmp *p; + Block *r; + Proto *pr; + char *msg; + char m2[128]; + Icmppriv *ipriv; + + ipriv = icmp->priv; + + ipriv->stats[InMsgs]++; + + p = (Icmp *)bp->rp; + netlog(icmp->f, Logicmp, "icmpiput %s (%d) %d\n", + (p->type < nelem(icmpnames)? icmpnames[p->type]: ""), + p->type, p->code); + n = blocklen(bp); + if(n < ICMP_IPSIZE+ICMP_HDRSIZE){ + ipriv->stats[InErrors]++; + ipriv->stats[HlenErrs]++; + netlog(icmp->f, Logicmp, "icmp hlen %d\n", n); + goto raise; + } + iplen = nhgets(p->length); + if(iplen > n){ + ipriv->stats[LenErrs]++; + ipriv->stats[InErrors]++; + netlog(icmp->f, Logicmp, "icmp length error n %d iplen %d\n", + n, iplen); + goto raise; + } + if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){ + ipriv->stats[InErrors]++; + ipriv->stats[CsumErrs]++; + netlog(icmp->f, Logicmp, "icmp checksum error n %d iplen %d\n", + n, iplen); + goto raise; + } + if(p->type <= Maxtype) + ipriv->in[p->type]++; + + switch(p->type) { + case EchoRequest: + if (iplen < n) + bp = trimblock(bp, 0, iplen); + r = mkechoreply(bp); + ipriv->out[EchoReply]++; + ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil); + break; + case Unreachable: + if(p->code > 5) + msg = unreachcode[1]; + else + msg = unreachcode[p->code]; + + bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE; + if(blocklen(bp) < MinAdvise){ + ipriv->stats[LenErrs]++; + goto raise; + } + p = (Icmp *)bp->rp; + pr = Fsrcvpcolx(icmp->f, p->proto); + if(pr != nil && pr->advise != nil) { + (*pr->advise)(pr, bp, msg); + return; + } + + bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE; + goticmpkt(icmp, bp); + break; + case TimeExceed: + if(p->code == 0){ + sprint(m2, "ttl exceeded at %V", p->src); + + bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE; + if(blocklen(bp) < MinAdvise){ + ipriv->stats[LenErrs]++; + goto raise; + } + p = (Icmp *)bp->rp; + pr = Fsrcvpcolx(icmp->f, p->proto); + if(pr != nil && pr->advise != nil) { + (*pr->advise)(pr, bp, m2); + return; + } + bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE; + } + + goticmpkt(icmp, bp); + break; + default: + goticmpkt(icmp, bp); + break; + } + return; + +raise: + freeblist(bp); +} + +void +icmpadvise(Proto *icmp, Block *bp, char *msg) +{ + Conv **c, *s; + Icmp *p; + uchar dst[IPaddrlen]; + ushort recid; + + p = (Icmp *) bp->rp; + v4tov6(dst, p->dst); + recid = nhgets(p->icmpid); + + for(c = icmp->conv; *c; c++) { + s = *c; + if(s->lport == recid) + if(ipcmp(s->raddr, dst) == 0){ + qhangup(s->rq, msg); + qhangup(s->wq, msg); + break; + } + } + freeblist(bp); +} + +int +icmpstats(Proto *icmp, char *buf, int len) +{ + Icmppriv *priv; + char *p, *e; + int i; + + priv = icmp->priv; + p = buf; + e = p+len; + for(i = 0; i < Nstats; i++) + p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]); + for(i = 0; i <= Maxtype; i++){ + if(icmpnames[i]) + p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]); + else + p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]); + } + return p - buf; +} + +void +icmpinit(Fs *fs) +{ + Proto *icmp; + + icmp = smalloc(sizeof(Proto)); + icmp->priv = smalloc(sizeof(Icmppriv)); + icmp->name = "icmp"; + icmp->connect = icmpconnect; + icmp->announce = icmpannounce; + icmp->state = icmpstate; + icmp->create = icmpcreate; + icmp->close = icmpclose; + icmp->rcv = icmpiput; + icmp->stats = icmpstats; + icmp->ctl = nil; + icmp->advise = icmpadvise; + icmp->gc = nil; + icmp->ipproto = IP_ICMPPROTO; + icmp->nc = 128; + icmp->ptclsize = 0; + + Fsproto(fs, icmp); +} diff -Nru /sys/src/9k/ip/icmp6.c /sys/src/9k/ip/icmp6.c --- /sys/src/9k/ip/icmp6.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/icmp6.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,909 @@ +/* + * Internet Control Message Protocol for IPv6 + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "ip.h" +#include "ipv6.h" + +enum +{ + InMsgs6, + InErrors6, + OutMsgs6, + CsumErrs6, + LenErrs6, + HlenErrs6, + HoplimErrs6, + IcmpCodeErrs6, + TargetErrs6, + OptlenErrs6, + AddrmxpErrs6, + RouterAddrErrs6, + + Nstats6, +}; + +enum { + ICMP_USEAD6 = 40, +}; + +enum { + Oflag = 1<<5, + Sflag = 1<<6, + Rflag = 1<<7, +}; + +enum { + /* ICMPv6 types */ + EchoReply = 0, + UnreachableV6 = 1, + PacketTooBigV6 = 2, + TimeExceedV6 = 3, + SrcQuench = 4, + ParamProblemV6 = 4, + Redirect = 5, + EchoRequest = 8, + TimeExceed = 11, + InParmProblem = 12, + Timestamp = 13, + TimestampReply = 14, + InfoRequest = 15, + InfoReply = 16, + AddrMaskRequest = 17, + AddrMaskReply = 18, + EchoRequestV6 = 128, + EchoReplyV6 = 129, + RouterSolicit = 133, + RouterAdvert = 134, + NbrSolicit = 135, + NbrAdvert = 136, + RedirectV6 = 137, + + Maxtype6 = 137, +}; + +/* on-the-wire packet formats */ +typedef struct IPICMP IPICMP; +typedef struct Ndpkt Ndpkt; +typedef struct NdiscC NdiscC; + +/* we do this to avoid possible struct padding */ +#define ICMPHDR \ + IPV6HDR; \ + uchar type; \ + uchar code; \ + uchar cksum[2]; \ + uchar icmpid[2]; \ + uchar seq[2] + +struct IPICMP { + ICMPHDR; + uchar payload[]; +}; + +#define IPICMPSZ offsetof(IPICMP, payload[0]) + +struct NdiscC { + ICMPHDR; + uchar target[IPaddrlen]; + uchar payload[]; +}; + +#define NDISCSZ offsetof(NdiscC, payload[0]) + +struct Ndpkt { + ICMPHDR; + uchar target[IPaddrlen]; + uchar otype; + uchar olen; /* length in units of 8 octets(incl type, code), + * 1 for IEEE 802 addresses */ + uchar lnaddr[6]; /* link-layer address */ + uchar payload[]; +}; + +#define NDPKTSZ offsetof(Ndpkt, payload[0]) + +typedef struct Icmppriv6 +{ + ulong stats[Nstats6]; + + /* message counts */ + ulong in[Maxtype6+1]; + ulong out[Maxtype6+1]; +} Icmppriv6; + +typedef struct Icmpcb6 +{ + QLock; + uchar headers; +} Icmpcb6; + +char *icmpnames6[Maxtype6+1] = +{ +[EchoReply] "EchoReply", +[UnreachableV6] "UnreachableV6", +[PacketTooBigV6] "PacketTooBigV6", +[TimeExceedV6] "TimeExceedV6", +[SrcQuench] "SrcQuench", +[Redirect] "Redirect", +[EchoRequest] "EchoRequest", +[TimeExceed] "TimeExceed", +[InParmProblem] "InParmProblem", +[Timestamp] "Timestamp", +[TimestampReply] "TimestampReply", +[InfoRequest] "InfoRequest", +[InfoReply] "InfoReply", +[AddrMaskRequest] "AddrMaskRequest", +[AddrMaskReply] "AddrMaskReply", +[EchoRequestV6] "EchoRequestV6", +[EchoReplyV6] "EchoReplyV6", +[RouterSolicit] "RouterSolicit", +[RouterAdvert] "RouterAdvert", +[NbrSolicit] "NbrSolicit", +[NbrAdvert] "NbrAdvert", +[RedirectV6] "RedirectV6", +}; + +static char *statnames6[Nstats6] = +{ +[InMsgs6] "InMsgs", +[InErrors6] "InErrors", +[OutMsgs6] "OutMsgs", +[CsumErrs6] "CsumErrs", +[LenErrs6] "LenErrs", +[HlenErrs6] "HlenErrs", +[HoplimErrs6] "HoplimErrs", +[IcmpCodeErrs6] "IcmpCodeErrs", +[TargetErrs6] "TargetErrs", +[OptlenErrs6] "OptlenErrs", +[AddrmxpErrs6] "AddrmxpErrs", +[RouterAddrErrs6] "RouterAddrErrs", +}; + +static char *unreachcode[] = +{ +[Icmp6_no_route] "no route to destination", +[Icmp6_ad_prohib] "comm with destination administratively prohibited", +[Icmp6_out_src_scope] "beyond scope of source address", +[Icmp6_adr_unreach] "address unreachable", +[Icmp6_port_unreach] "port unreachable", +[Icmp6_gress_src_fail] "source address failed ingress/egress policy", +[Icmp6_rej_route] "reject route to destination", +[Icmp6_unknown] "icmp unreachable: unknown code", +}; + +static void icmpkick6(void *x, Block *bp); + +static void +icmpcreate6(Conv *c) +{ + c->rq = qopen(64*1024, Qmsg, 0, c); + c->wq = qbypass(icmpkick6, c); +} + +static void +set_cksum(Block *bp) +{ + IPICMP *p = (IPICMP *)(bp->rp); + + hnputl(p->vcf, 0); /* borrow IP header as pseudoheader */ + hnputs(p->ploadlen, blocklen(bp) - IP6HDR); + p->proto = 0; + p->ttl = ICMPv6; /* ttl gets set later */ + hnputs(p->cksum, 0); + hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp))); + p->proto = ICMPv6; +} + +static Block * +newIPICMP(int packetlen) +{ + Block *nbp; + + nbp = allocb(packetlen); + nbp->wp += packetlen; + memset(nbp->rp, 0, packetlen); + return nbp; +} + +void +icmpadvise6(Proto *icmp, Block *bp, char *msg) +{ + ushort recid; + Conv **c, *s; + IPICMP *p; + + p = (IPICMP *)bp->rp; + recid = nhgets(p->icmpid); + + for(c = icmp->conv; *c; c++) { + s = *c; + if(s->lport == recid && ipcmp(s->raddr, p->dst) == 0){ + qhangup(s->rq, msg); + qhangup(s->wq, msg); + break; + } + } + freeblist(bp); +} + +static void +icmpkick6(void *x, Block *bp) +{ + uchar laddr[IPaddrlen], raddr[IPaddrlen]; + Conv *c = x; + IPICMP *p; + Icmppriv6 *ipriv = c->p->priv; + Icmpcb6 *icb = (Icmpcb6*)c->ptcl; + + if(bp == nil) + return; + + if(icb->headers==6) { + /* get user specified addresses */ + bp = pullupblock(bp, ICMP_USEAD6); + if(bp == nil) + return; + bp->rp += 8; + ipmove(laddr, bp->rp); + bp->rp += IPaddrlen; + ipmove(raddr, bp->rp); + bp->rp += IPaddrlen; + bp = padblock(bp, IP6HDR); + } + + if(blocklen(bp) < IPICMPSZ){ + freeblist(bp); + return; + } + p = (IPICMP *)(bp->rp); + if(icb->headers == 6) { + ipmove(p->dst, raddr); + ipmove(p->src, laddr); + } else { + ipmove(p->dst, c->raddr); + ipmove(p->src, c->laddr); + hnputs(p->icmpid, c->lport); + } + + set_cksum(bp); + p->vcf[0] = 0x06 << 4; + if(p->type <= Maxtype6) + ipriv->out[p->type]++; + ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil); +} + +char* +icmpctl6(Conv *c, char **argv, int argc) +{ + Icmpcb6 *icb; + + icb = (Icmpcb6*) c->ptcl; + if(argc==1 && strcmp(argv[0], "headers")==0) { + icb->headers = 6; + return nil; + } + return "unknown control request"; +} + +static void +goticmpkt6(Proto *icmp, Block *bp, int muxkey) +{ + ushort recid; + uchar *addr; + Conv **c, *s; + IPICMP *p = (IPICMP *)bp->rp; + + if(muxkey == 0) { + recid = nhgets(p->icmpid); + addr = p->src; + } else { + recid = muxkey; + addr = p->dst; + } + + for(c = icmp->conv; *c; c++){ + s = *c; + if(s->lport == recid && ipcmp(s->raddr, addr) == 0){ + bp = concatblock(bp); + if(bp != nil) + qpass(s->rq, bp); + return; + } + } + + freeblist(bp); +} + +static Block * +mkechoreply6(Block *bp, Ipifc *ifc) +{ + uchar addr[IPaddrlen]; + IPICMP *p = (IPICMP *)(bp->rp); + + ipmove(addr, p->src); + if(!isv6mcast(p->dst)) + ipmove(p->src, p->dst); + else if (!ipv6anylocal(ifc, p->src)) + return nil; + ipmove(p->dst, addr); + p->type = EchoReplyV6; + set_cksum(bp); + return bp; +} + +/* + * sends out an ICMPv6 neighbor solicitation + * suni == SRC_UNSPEC or SRC_UNI, + * tuni == TARG_MULTI => multicast for address resolution, + * and tuni == TARG_UNI => neighbor reachability. + */ +extern void +icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac) +{ + Block *nbp; + Ndpkt *np; + Proto *icmp = f->t2p[ICMPv6]; + Icmppriv6 *ipriv = icmp->priv; + + nbp = newIPICMP(NDPKTSZ); + np = (Ndpkt*) nbp->rp; + + if(suni == SRC_UNSPEC) + memmove(np->src, v6Unspecified, IPaddrlen); + else + memmove(np->src, src, IPaddrlen); + + if(tuni == TARG_UNI) + memmove(np->dst, targ, IPaddrlen); + else + ipv62smcast(np->dst, targ); + + np->type = NbrSolicit; + np->code = 0; + memmove(np->target, targ, IPaddrlen); + if(suni != SRC_UNSPEC) { + np->otype = SRC_LLADDR; + np->olen = 1; /* 1+1+6 = 8 = 1 8-octet */ + memmove(np->lnaddr, mac, sizeof(np->lnaddr)); + } else + nbp->wp -= NDPKTSZ - NDISCSZ; + + set_cksum(nbp); + np = (Ndpkt*)nbp->rp; + np->ttl = HOP_LIMIT; + np->vcf[0] = 0x06 << 4; + ipriv->out[NbrSolicit]++; + netlog(f, Logicmp, "sending neighbor solicitation %I\n", targ); + ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil); +} + +/* + * sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags. + */ +extern void +icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags) +{ + Block *nbp; + Ndpkt *np; + Proto *icmp = f->t2p[ICMPv6]; + Icmppriv6 *ipriv = icmp->priv; + + nbp = newIPICMP(NDPKTSZ); + np = (Ndpkt*)nbp->rp; + + memmove(np->src, src, IPaddrlen); + memmove(np->dst, dst, IPaddrlen); + + np->type = NbrAdvert; + np->code = 0; + np->icmpid[0] = flags; + memmove(np->target, targ, IPaddrlen); + + np->otype = TARGET_LLADDR; + np->olen = 1; + memmove(np->lnaddr, mac, sizeof(np->lnaddr)); + + set_cksum(nbp); + np = (Ndpkt*) nbp->rp; + np->ttl = HOP_LIMIT; + np->vcf[0] = 0x06 << 4; + ipriv->out[NbrAdvert]++; + netlog(f, Logicmp, "sending neighbor advertisement %I\n", src); + ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil); +} + +extern void +icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free) +{ + int osz = BLEN(bp); + int sz = MIN(IPICMPSZ + osz, v6MINTU); + Block *nbp; + IPICMP *np; + Ip6hdr *p; + Proto *icmp = f->t2p[ICMPv6]; + Icmppriv6 *ipriv = icmp->priv; + + p = (Ip6hdr *)bp->rp; + + if(isv6mcast(p->src)) + goto clean; + + nbp = newIPICMP(sz); + np = (IPICMP *)nbp->rp; + + rlock(ifc); + if(ipv6anylocal(ifc, np->src)) + netlog(f, Logicmp, "send icmphostunr -> src %I dst %I\n", + p->src, p->dst); + else { + netlog(f, Logicmp, "icmphostunr fail -> src %I dst %I\n", + p->src, p->dst); + freeblist(nbp); + if(free) + goto clean; + else + return; + } + + memmove(np->dst, p->src, IPaddrlen); + np->type = UnreachableV6; + np->code = code; + memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ); + set_cksum(nbp); + np->ttl = HOP_LIMIT; + np->vcf[0] = 0x06 << 4; + ipriv->out[UnreachableV6]++; + + if(free) + ipiput6(f, ifc, nbp); + else { + ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil); + return; + } + +clean: + runlock(ifc); + freeblist(bp); +} + +extern void +icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp) +{ + int osz = BLEN(bp); + int sz = MIN(IPICMPSZ + osz, v6MINTU); + Block *nbp; + IPICMP *np; + Ip6hdr *p; + Proto *icmp = f->t2p[ICMPv6]; + Icmppriv6 *ipriv = icmp->priv; + + p = (Ip6hdr *)bp->rp; + + if(isv6mcast(p->src)) + return; + + nbp = newIPICMP(sz); + np = (IPICMP *) nbp->rp; + + if(ipv6anylocal(ifc, np->src)) + netlog(f, Logicmp, "send icmpttlexceeded6 -> src %I dst %I\n", + p->src, p->dst); + else { + netlog(f, Logicmp, "icmpttlexceeded6 fail -> src %I dst %I\n", + p->src, p->dst); + return; + } + + memmove(np->dst, p->src, IPaddrlen); + np->type = TimeExceedV6; + np->code = 0; + memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ); + set_cksum(nbp); + np->ttl = HOP_LIMIT; + np->vcf[0] = 0x06 << 4; + ipriv->out[TimeExceedV6]++; + ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil); +} + +extern void +icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp) +{ + int osz = BLEN(bp); + int sz = MIN(IPICMPSZ + osz, v6MINTU); + Block *nbp; + IPICMP *np; + Ip6hdr *p; + Proto *icmp = f->t2p[ICMPv6]; + Icmppriv6 *ipriv = icmp->priv; + + p = (Ip6hdr *)bp->rp; + + if(isv6mcast(p->src)) + return; + + nbp = newIPICMP(sz); + np = (IPICMP *)nbp->rp; + + if(ipv6anylocal(ifc, np->src)) + netlog(f, Logicmp, "send icmppkttoobig6 -> src %I dst %I\n", + p->src, p->dst); + else { + netlog(f, Logicmp, "icmppkttoobig6 fail -> src %I dst %I\n", + p->src, p->dst); + return; + } + + memmove(np->dst, p->src, IPaddrlen); + np->type = PacketTooBigV6; + np->code = 0; + hnputl(np->icmpid, ifc->maxtu - ifc->medium->hsize); + memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ); + set_cksum(nbp); + np->ttl = HOP_LIMIT; + np->vcf[0] = 0x06 << 4; + ipriv->out[PacketTooBigV6]++; + ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil); +} + +/* + * RFC 2461, pages 39-40, pages 57-58. + */ +static int +valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv) +{ + int sz, osz, unsp, n, ttl, iplen; + int pktsz = BLEN(bp); + uchar *packet = bp->rp; + IPICMP *p = (IPICMP *) packet; + Ndpkt *np; + + USED(ifc); + n = blocklen(bp); + if(n < IPICMPSZ) { + ipriv->stats[HlenErrs6]++; + netlog(icmp->f, Logicmp, "icmp hlen %d\n", n); + goto err; + } + + iplen = nhgets(p->ploadlen); + if(iplen > n - IP6HDR) { + ipriv->stats[LenErrs6]++; + netlog(icmp->f, Logicmp, "icmp length %d\n", iplen); + goto err; + } + + /* Rather than construct explicit pseudoheader, overwrite IPv6 header */ + if(p->proto != ICMPv6) { + /* This code assumes no extension headers!!! */ + netlog(icmp->f, Logicmp, "icmp error: extension header\n"); + goto err; + } + memset(packet, 0, 4); + ttl = p->ttl; + p->ttl = p->proto; + p->proto = 0; + if(ptclcsum(bp, 0, iplen + IP6HDR)) { + ipriv->stats[CsumErrs6]++; + netlog(icmp->f, Logicmp, "icmp checksum error\n"); + goto err; + } + p->proto = p->ttl; + p->ttl = ttl; + + /* additional tests for some pkt types */ + if (p->type == NbrSolicit || p->type == NbrAdvert || + p->type == RouterAdvert || p->type == RouterSolicit || + p->type == RedirectV6) { + if(p->ttl != HOP_LIMIT) { + ipriv->stats[HoplimErrs6]++; + goto err; + } + if(p->code != 0) { + ipriv->stats[IcmpCodeErrs6]++; + goto err; + } + + switch (p->type) { + case NbrSolicit: + case NbrAdvert: + np = (Ndpkt*) p; + if(isv6mcast(np->target)) { + ipriv->stats[TargetErrs6]++; + goto err; + } + if(optexsts(np) && np->olen == 0) { + ipriv->stats[OptlenErrs6]++; + goto err; + } + + if (p->type == NbrSolicit && + ipcmp(np->src, v6Unspecified) == 0) + if(!issmcast(np->dst) || optexsts(np)) { + ipriv->stats[AddrmxpErrs6]++; + goto err; + } + + if(p->type == NbrAdvert) + if(isv6mcast(np->dst) && + (nhgets(np->icmpid) & Sflag)){ + ipriv->stats[AddrmxpErrs6]++; + goto err; + } + break; + + case RouterAdvert: + if(pktsz - IP6HDR < 16) { + ipriv->stats[HlenErrs6]++; + goto err; + } + if(!islinklocal(p->src)) { + ipriv->stats[RouterAddrErrs6]++; + goto err; + } + sz = IPICMPSZ + 8; + while (sz+1 < pktsz) { + osz = packet[sz+1]; + if(osz <= 0) { + ipriv->stats[OptlenErrs6]++; + goto err; + } + sz += 8*osz; + } + break; + + case RouterSolicit: + if(pktsz - IP6HDR < 8) { + ipriv->stats[HlenErrs6]++; + goto err; + } + unsp = (ipcmp(p->src, v6Unspecified) == 0); + sz = IPICMPSZ + 8; + while (sz+1 < pktsz) { + osz = packet[sz+1]; + if(osz <= 0 || + (unsp && packet[sz] == SRC_LLADDR)) { + ipriv->stats[OptlenErrs6]++; + goto err; + } + sz += 8*osz; + } + break; + + case RedirectV6: + /* to be filled in */ + break; + + default: + goto err; + } + } + return 1; +err: + ipriv->stats[InErrors6]++; + return 0; +} + +static int +targettype(Fs *f, Ipifc *ifc, uchar *target) +{ + Iplifc *lifc; + int t; + + rlock(ifc); + if(ipproxyifc(f, ifc, target)) { + runlock(ifc); + return Tuniproxy; + } + + for(lifc = ifc->lifc; lifc; lifc = lifc->next) + if(ipcmp(lifc->local, target) == 0) { + t = (lifc->tentative)? Tunitent: Tunirany; + runlock(ifc); + return t; + } + + runlock(ifc); + return 0; +} + +static void +icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp) +{ + int refresh = 1; + char *msg, m2[128]; + uchar pktflags; + uchar *packet = bp->rp; + uchar lsrc[IPaddrlen]; + Block *r; + IPICMP *p = (IPICMP *)packet; + Icmppriv6 *ipriv = icmp->priv; + Iplifc *lifc; + Ndpkt* np; + Proto *pr; + + if(!valid(icmp, ipifc, bp, ipriv) || p->type > Maxtype6) + goto raise; + + ipriv->in[p->type]++; + + switch(p->type) { + case EchoRequestV6: + r = mkechoreply6(bp, ipifc); + if(r == nil) + goto raise; + ipriv->out[EchoReply]++; + ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil); + break; + + case UnreachableV6: + if(p->code >= nelem(unreachcode)) + msg = unreachcode[Icmp6_unknown]; + else + msg = unreachcode[p->code]; + + bp->rp += IPICMPSZ; + if(blocklen(bp) < 8){ + ipriv->stats[LenErrs6]++; + goto raise; + } + p = (IPICMP *)bp->rp; + pr = Fsrcvpcolx(icmp->f, p->proto); + if(pr != nil && pr->advise != nil) { + (*pr->advise)(pr, bp, msg); + return; + } + + bp->rp -= IPICMPSZ; + goticmpkt6(icmp, bp, 0); + break; + + case TimeExceedV6: + if(p->code == 0){ + sprint(m2, "ttl exceeded at %I", p->src); + + bp->rp += IPICMPSZ; + if(blocklen(bp) < 8){ + ipriv->stats[LenErrs6]++; + goto raise; + } + p = (IPICMP *)bp->rp; + pr = Fsrcvpcolx(icmp->f, p->proto); + if(pr && pr->advise) { + (*pr->advise)(pr, bp, m2); + return; + } + bp->rp -= IPICMPSZ; + } + + goticmpkt6(icmp, bp, 0); + break; + + case RouterAdvert: + case RouterSolicit: + /* using lsrc as a temp, munge hdr for goticmp6 */ + if (0) { + memmove(lsrc, p->src, IPaddrlen); + memmove(p->src, p->dst, IPaddrlen); + memmove(p->dst, lsrc, IPaddrlen); + } + goticmpkt6(icmp, bp, p->type); + break; + + case NbrSolicit: + np = (Ndpkt*) p; + pktflags = 0; + switch (targettype(icmp->f, ipifc, np->target)) { + case Tunirany: + pktflags |= Oflag; + /* fall through */ + + case Tuniproxy: + if(ipcmp(np->src, v6Unspecified) != 0) { + arpenter(icmp->f, V6, np->src, np->lnaddr, + 8*np->olen-2, 0); + pktflags |= Sflag; + } + if(ipv6local(ipifc, lsrc)) + icmpna(icmp->f, lsrc, + (ipcmp(np->src, v6Unspecified) == 0? + v6allnodesL: np->src), + np->target, ipifc->mac, pktflags); + else + freeblist(bp); + break; + + case Tunitent: + /* not clear what needs to be done. send up + * an icmp mesg saying don't use this address? */ + default: + freeblist(bp); + } + break; + + case NbrAdvert: + np = (Ndpkt*) p; + + /* + * if the target address matches one of the local interface + * addresses and the local interface address has tentative bit + * set, insert into ARP table. this is so the duplicate address + * detection part of ipconfig can discover duplication through + * the arp table. + */ + lifc = iplocalonifc(ipifc, np->target); + if(lifc && lifc->tentative) + refresh = 0; + arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, + refresh); + freeblist(bp); + break; + + case PacketTooBigV6: + default: + goticmpkt6(icmp, bp, 0); + break; + } + return; +raise: + freeblist(bp); +} + +int +icmpstats6(Proto *icmp6, char *buf, int len) +{ + Icmppriv6 *priv; + char *p, *e; + int i; + + priv = icmp6->priv; + p = buf; + e = p+len; + for(i = 0; i < Nstats6; i++) + p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]); + for(i = 0; i <= Maxtype6; i++) + if(icmpnames6[i]) + p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i], + priv->in[i], priv->out[i]); +/* else + p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], + priv->out[i]); + */ + return p - buf; +} + + +/* import from icmp.c */ +extern int icmpstate(Conv *c, char *state, int n); +extern char* icmpannounce(Conv *c, char **argv, int argc); +extern char* icmpconnect(Conv *c, char **argv, int argc); +extern void icmpclose(Conv *c); + +void +icmp6init(Fs *fs) +{ + Proto *icmp6 = smalloc(sizeof(Proto)); + + icmp6->priv = smalloc(sizeof(Icmppriv6)); + icmp6->name = "icmpv6"; + icmp6->connect = icmpconnect; + icmp6->announce = icmpannounce; + icmp6->state = icmpstate; + icmp6->create = icmpcreate6; + icmp6->close = icmpclose; + icmp6->rcv = icmpiput6; + icmp6->stats = icmpstats6; + icmp6->ctl = icmpctl6; + icmp6->advise = icmpadvise6; + icmp6->gc = nil; + icmp6->ipproto = ICMPv6; + icmp6->nc = 16; + icmp6->ptclsize = sizeof(Icmpcb6); + + Fsproto(fs, icmp6); +} diff -Nru /sys/src/9k/ip/il.c /sys/src/9k/ip/il.c --- /sys/src/9k/ip/il.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/il.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1408 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +enum /* Connection state */ +{ + Ilclosed, + Ilsyncer, + Ilsyncee, + Ilestablished, + Illistening, + Ilclosing, + Ilopening, /* only for file server */ +}; + +char *ilstates[] = +{ + "Closed", + "Syncer", + "Syncee", + "Established", + "Listen", + "Closing", + "Opening", /* only for file server */ +}; + +enum /* Packet types */ +{ + Ilsync, + Ildata, + Ildataquery, + Ilack, + Ilquery, + Ilstate, + Ilclose, +}; + +char *iltype[] = +{ + "sync", + "data", + "dataquery", + "ack", + "query", + "state", + "close" +}; + +enum +{ + Seconds = 1000, + Iltickms = 50, /* time base */ + AckDelay = 2*Iltickms, /* max time twixt message rcvd & ack sent */ + MaxTimeout = 30*Seconds, /* max time between rexmit */ + QueryTime = 10*Seconds, /* time between subsequent queries */ + DeathTime = 30*QueryTime, + + MaxRexmit = 16, /* max retransmissions before hangup */ + Defaultwin = 20, + + LogAGain = 3, + AGain = 1< 1){ + p = strstr(argv[1], "!fasttimeout"); + if(p != nil){ + *p = 0; + fast = 1; + } + } + + e = Fsstdconnect(c, argv, argc); + if(e != nil) + return e; + return ilstart(c, IL_CONNECT, fast); +} + +static int +ilstate(Conv *c, char *state, int n) +{ + Ilcb *ic; + + ic = (Ilcb*)(c->ptcl); + return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d\n", + ilstates[ic->state], + c->rq ? qlen(c->rq) : 0, + c->wq ? qlen(c->wq) : 0, + ic->delay>>LogAGain, ic->rate>>LogAGain, ic->mdev>>LogDGain, + ic->unackedbytes, ic->rxtot, ic->rxquery, ic->maxrtt); +} + +static int +ilinuse(Conv *c) +{ + Ilcb *ic; + + ic = (Ilcb*)(c->ptcl); + return ic->state != Ilclosed; + +} + +/* called with c locked */ +static char* +ilannounce(Conv *c, char **argv, int argc) +{ + char *e; + + e = Fsstdannounce(c, argv, argc); + if(e != nil) + return e; + e = ilstart(c, IL_LISTEN, 0); + if(e != nil) + return e; + Fsconnected(c, nil); + + return nil; +} + +void +illocalclose(Conv *c) +{ + Ilcb *ic; + Ilpriv *ipriv; + + ipriv = c->p->priv; + ic = (Ilcb*)c->ptcl; + ic->state = Ilclosed; + iphtrem(&ipriv->ht, c); + ipmove(c->laddr, IPnoaddr); + c->lport = 0; +} + +static void +ilclose(Conv *c) +{ + Ilcb *ic; + + ic = (Ilcb*)c->ptcl; + + qclose(c->rq); + qclose(c->wq); + qclose(c->eq); + + switch(ic->state) { + case Ilclosing: + case Ilclosed: + break; + case Ilsyncer: + case Ilsyncee: + case Ilestablished: + ic->state = Ilclosing; + ilsettimeout(ic); + ilsendctl(c, nil, Ilclose, ic->next, ic->recvd, 0); + break; + case Illistening: + illocalclose(c); + break; + } + ilfreeq(ic); +} + +void +ilkick(void *x, Block *bp) +{ + Conv *c = x; + Ilhdr *ih; + Ilcb *ic; + int dlen; + ulong id, ack; + Fs *f; + Ilpriv *priv; + + f = c->p->f; + priv = c->p->priv; + ic = (Ilcb*)c->ptcl; + + if(bp == nil) + return; + + switch(ic->state) { + case Ilclosed: + case Illistening: + case Ilclosing: + freeblist(bp); + qhangup(c->rq, nil); + return; + } + + dlen = blocklen(bp); + + /* Make space to fit il & ip */ + bp = padblock(bp, IL_IPSIZE+IL_HDRSIZE); + ih = (Ilhdr *)(bp->rp); + ih->vihl = IP_VER4; + + /* Ip fields */ + ih->frag[0] = 0; + ih->frag[1] = 0; + v6tov4(ih->dst, c->raddr); + v6tov4(ih->src, c->laddr); + ih->proto = IP_ILPROTO; + + /* Il fields */ + hnputs(ih->illen, dlen+IL_HDRSIZE); + hnputs(ih->ilsrc, c->lport); + hnputs(ih->ildst, c->rport); + + qlock(&ic->ackq); + id = ic->next++; + hnputl(ih->ilid, id); + ack = ic->recvd; + hnputl(ih->ilack, ack); + ic->acksent = ack; + ic->acktime = NOW + AckDelay; + ih->iltype = Ildata; + ih->ilspec = 0; + ih->ilsum[0] = 0; + ih->ilsum[1] = 0; + + /* Checksum of ilheader plus data (not ip & no pseudo header) */ + if(ilcksum) + hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, dlen+IL_HDRSIZE)); + + ilackq(ic, bp); + qunlock(&ic->ackq); + + /* Start the round trip timer for this packet if the timer is free */ + if(ic->rttack == 0) { + ic->rttack = id; + ic->rttstart = fastticks(nil); + ic->rttlen = dlen + IL_IPSIZE + IL_HDRSIZE; + } + + if(later(NOW, ic->timeout, nil)) + ilsettimeout(ic); + ipoput4(f, bp, 0, c->ttl, c->tos, c); + priv->stats[OutMsgs]++; +} + +static void +ilcreate(Conv *c) +{ + c->rq = qopen(Maxrq, 0, 0, c); + c->wq = qbypass(ilkick, c); +} + +int +ilxstats(Proto *il, char *buf, int len) +{ + Ilpriv *priv; + char *p, *e; + int i; + + priv = il->priv; + p = buf; + e = p+len; + for(i = 0; i < Nstats; i++) + p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]); + return p - buf; +} + +void +ilackq(Ilcb *ic, Block *bp) +{ + Block *np; + int n; + + n = blocklen(bp); + + /* Enqueue a copy on the unacked queue in case this one gets lost */ + np = copyblock(bp, n); + if(ic->unacked) + ic->unackedtail->list = np; + else + ic->unacked = np; + ic->unackedtail = np; + np->list = nil; + ic->unackedbytes += n; +} + +static +void +ilrttcalc(Ilcb *ic, Block *bp) +{ + int rtt, tt, pt, delay, rate; + + rtt = fastticks(nil) - ic->rttstart; + rtt = (rtt*scalemul)/scalediv; + delay = ic->delay; + rate = ic->rate; + + /* Guard against zero wrap */ + if(rtt > 120000 || rtt < 0) + return; + + /* this block had to be transmitted after the one acked so count its size */ + ic->rttlen += blocklen(bp) + IL_IPSIZE + IL_HDRSIZE; + + if(ic->rttlen < 256){ + /* guess fixed delay as rtt of small packets */ + delay += rtt - (delay>>LogAGain); + if(delay < AGain) + delay = AGain; + ic->delay = delay; + } else { + /* if packet took longer than avg rtt delay, recalc rate */ + tt = rtt - (delay>>LogAGain); + if(tt > 0){ + rate += ic->rttlen/tt - (rate>>LogAGain); + if(rate < AGain) + rate = AGain; + ic->rate = rate; + } + } + + /* mdev */ + pt = ic->rttlen/(rate>>LogAGain) + (delay>>LogAGain); + ic->mdev += abs(rtt-pt) - (ic->mdev>>LogDGain); + + if(rtt > ic->maxrtt) + ic->maxrtt = rtt; +} + +void +ilackto(Ilcb *ic, ulong ackto, Block *bp) +{ + Ilhdr *h; + ulong id; + + if(ic->rttack == ackto) + ilrttcalc(ic, bp); + + /* Cancel if we've passed the packet we were interested in */ + if(ic->rttack <= ackto) + ic->rttack = 0; + + qlock(&ic->ackq); + while(ic->unacked) { + h = (Ilhdr *)ic->unacked->rp; + id = nhgetl(h->ilid); + if(ackto < id) + break; + + bp = ic->unacked; + ic->unacked = bp->list; + bp->list = nil; + ic->unackedbytes -= blocklen(bp); + freeblist(bp); + ic->rexmit = 0; + ilsettimeout(ic); + } + qunlock(&ic->ackq); +} + +void +iliput(Proto *il, Ipifc*, Block *bp) +{ + char *st; + Ilcb *ic; + Ilhdr *ih; + uchar raddr[IPaddrlen]; + uchar laddr[IPaddrlen]; + ushort sp, dp, csum; + int plen, illen; + Conv *new, *s; + Ilpriv *ipriv; + + ipriv = il->priv; + + ih = (Ilhdr *)bp->rp; + plen = blocklen(bp); + if(plen < IL_IPSIZE+IL_HDRSIZE){ + netlog(il->f, Logil, "il: hlenerr\n"); + ipriv->stats[HlenErrs]++; + goto raise; + } + + illen = nhgets(ih->illen); + if(illen+IL_IPSIZE > plen){ + netlog(il->f, Logil, "il: lenerr\n"); + ipriv->stats[LenErrs]++; + goto raise; + } + + sp = nhgets(ih->ildst); + dp = nhgets(ih->ilsrc); + v4tov6(raddr, ih->src); + v4tov6(laddr, ih->dst); + + if((csum = ptclcsum(bp, IL_IPSIZE, illen)) != 0) { + if(ih->iltype > Ilclose) + st = "?"; + else + st = iltype[ih->iltype]; + ipriv->stats[CsumErrs]++; + netlog(il->f, Logil, "il: cksum %ux, pkt(%s id %ud ack %ud %I/%d->%d)\n", + csum, st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp); + goto raise; + } + + qlock(il); + s = iphtlook(&ipriv->ht, raddr, dp, laddr, sp); + if(s == nil){ + if(ih->iltype == Ilsync) + ilreject(il->f, ih); /* no listener */ + qunlock(il); + goto raise; + } + + ic = (Ilcb*)s->ptcl; + if(ic->state == Illistening){ + if(ih->iltype != Ilsync){ + qunlock(il); + if(ih->iltype > Ilclose) + st = "?"; + else + st = iltype[ih->iltype]; + ilreject(il->f, ih); /* no channel and not sync */ + netlog(il->f, Logil, "il: no channel, pkt(%s id %ud ack %ud %I/%ud->%ud)\n", + st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp); + goto raise; + } + + new = Fsnewcall(s, raddr, dp, laddr, sp, V4); + if(new == nil){ + qunlock(il); + netlog(il->f, Logil, "il: bad newcall %I/%ud->%ud\n", raddr, sp, dp); + ilsendctl(s, ih, Ilclose, 0, nhgetl(ih->ilid), 0); + goto raise; + } + s = new; + + ic = (Ilcb*)s->ptcl; + + ic->conv = s; + ic->state = Ilsyncee; + ilcbinit(ic); + ic->rstart = nhgetl(ih->ilid); + iphtadd(&ipriv->ht, s); + } + + qlock(s); + qunlock(il); + if(waserror()){ + qunlock(s); + nexterror(); + } + ilprocess(s, ih, bp); + qunlock(s); + poperror(); + return; +raise: + freeblist(bp); +} + +void +_ilprocess(Conv *s, Ilhdr *h, Block *bp) +{ + Ilcb *ic; + ulong id, ack; + Ilpriv *priv; + + id = nhgetl(h->ilid); + ack = nhgetl(h->ilack); + + ic = (Ilcb*)s->ptcl; + + ic->lastrecv = NOW; + ic->querytime = NOW + QueryTime; + priv = s->p->priv; + priv->stats[InMsgs]++; + + switch(ic->state) { + default: + netlog(s->p->f, Logil, "il: unknown state %d\n", ic->state); + case Ilclosed: + freeblist(bp); + break; + case Ilsyncer: + switch(h->iltype) { + default: + break; + case Ilsync: + if(ack != ic->start) + ilhangup(s, "connection rejected"); + else { + ic->recvd = id; + ic->rstart = id; + ilsendctl(s, nil, Ilack, ic->next, ic->recvd, 0); + ic->state = Ilestablished; + ic->fasttimeout = 0; + ic->rexmit = 0; + Fsconnected(s, nil); + ilpullup(s); + } + break; + case Ilclose: + if(ack == ic->start) + ilhangup(s, "connection rejected"); + break; + } + freeblist(bp); + break; + case Ilsyncee: + switch(h->iltype) { + default: + break; + case Ilsync: + if(id != ic->rstart || ack != 0){ + illocalclose(s); + } else { + ic->recvd = id; + ilsendctl(s, nil, Ilsync, ic->start, ic->recvd, 0); + } + break; + case Ilack: + if(ack == ic->start) { + ic->state = Ilestablished; + ic->fasttimeout = 0; + ic->rexmit = 0; + ilpullup(s); + } + break; + case Ildata: + if(ack == ic->start) { + ic->state = Ilestablished; + ic->fasttimeout = 0; + ic->rexmit = 0; + goto established; + } + break; + case Ilclose: + if(ack == ic->start) + ilhangup(s, "remote close"); + break; + } + freeblist(bp); + break; + case Ilestablished: + established: + switch(h->iltype) { + case Ilsync: + if(id != ic->rstart) + ilhangup(s, "remote close"); + else + ilsendctl(s, nil, Ilack, ic->next, ic->rstart, 0); + freeblist(bp); + break; + case Ildata: + /* + * avoid consuming all the mount rpc buffers in the + * system. if the input queue is too long, drop this + * packet. + */ + if (s->rq && qlen(s->rq) >= Maxrq) { + priv->stats[DroppedMsgs]++; + freeblist(bp); + break; + } + + ilackto(ic, ack, bp); + iloutoforder(s, h, bp); + ilpullup(s); + break; + case Ildataquery: + ilackto(ic, ack, bp); + iloutoforder(s, h, bp); + ilpullup(s); + ilsendctl(s, nil, Ilstate, ic->next, ic->recvd, h->ilspec); + break; + case Ilack: + ilackto(ic, ack, bp); + freeblist(bp); + break; + case Ilquery: + ilackto(ic, ack, bp); + ilsendctl(s, nil, Ilstate, ic->next, ic->recvd, h->ilspec); + freeblist(bp); + break; + case Ilstate: + if(ack >= ic->rttack) + ic->rttack = 0; + ilackto(ic, ack, bp); + if(h->ilspec > Nqt) + h->ilspec = 0; + if(ic->qt[h->ilspec] > ack){ + ilrexmit(ic); + ilsettimeout(ic); + } + freeblist(bp); + break; + case Ilclose: + freeblist(bp); + if(ack < ic->start || ack > ic->next) + break; + ic->recvd = id; + ilsendctl(s, nil, Ilclose, ic->next, ic->recvd, 0); + ic->state = Ilclosing; + ilsettimeout(ic); + ilfreeq(ic); + break; + } + break; + case Illistening: + freeblist(bp); + break; + case Ilclosing: + switch(h->iltype) { + case Ilclose: + ic->recvd = id; + ilsendctl(s, nil, Ilclose, ic->next, ic->recvd, 0); + if(ack == ic->next) + ilhangup(s, nil); + break; + default: + break; + } + freeblist(bp); + break; + } +} + +void +ilrexmit(Ilcb *ic) +{ + Ilhdr *h; + Block *nb; + Conv *c; + ulong id; + Ilpriv *priv; + + nb = nil; + qlock(&ic->ackq); + if(ic->unacked) + nb = copyblock(ic->unacked, blocklen(ic->unacked)); + qunlock(&ic->ackq); + + if(nb == nil) + return; + + h = (Ilhdr*)nb->rp; + h->vihl = IP_VER4; + + h->iltype = Ildataquery; + hnputl(h->ilack, ic->recvd); + h->ilspec = ilnextqt(ic); + h->ilsum[0] = 0; + h->ilsum[1] = 0; + hnputs(h->ilsum, ptclcsum(nb, IL_IPSIZE, nhgets(h->illen))); + + c = ic->conv; + id = nhgetl(h->ilid); + netlog(c->p->f, Logil, "il: rexmit %lud %lud: %d %lud: %I %ud/%ud\n", id, ic->recvd, + ic->rexmit, ic->timeout, + c->raddr, c->lport, c->rport); + + ilbackoff(ic); + + ipoput4(c->p->f, nb, 0, c->ttl, c->tos, c); + + /* statistics */ + ic->rxtot++; + priv = c->p->priv; + priv->rexmit++; +} + +/* DEBUG */ +void +ilprocess(Conv *s, Ilhdr *h, Block *bp) +{ + Ilcb *ic; + + ic = (Ilcb*)s->ptcl; + + USED(ic); + netlog(s->p->f, Logilmsg, "%11s rcv %lud/%lud snt %lud/%lud pkt(%s id %ud ack %ud %ud->%ud) ", + ilstates[ic->state], ic->rstart, ic->recvd, ic->start, + ic->next, iltype[h->iltype], nhgetl(h->ilid), + nhgetl(h->ilack), nhgets(h->ilsrc), nhgets(h->ildst)); + + _ilprocess(s, h, bp); + + netlog(s->p->f, Logilmsg, "%11s rcv %lud snt %lud\n", ilstates[ic->state], ic->recvd, ic->next); +} + +void +ilhangup(Conv *s, char *msg) +{ + Ilcb *ic; + int callout; + + netlog(s->p->f, Logil, "il: hangup! %I %d/%d: %s\n", s->raddr, + s->lport, s->rport, msg?msg:"no reason"); + + ic = (Ilcb*)s->ptcl; + callout = ic->state == Ilsyncer; + illocalclose(s); + + qhangup(s->rq, msg); + qhangup(s->wq, msg); + + if(callout) + Fsconnected(s, msg); +} + +void +ilpullup(Conv *s) +{ + Ilcb *ic; + Ilhdr *oh; + Block *bp; + ulong oid, dlen; + Ilpriv *ipriv; + + ic = (Ilcb*)s->ptcl; + if(ic->state != Ilestablished) + return; + + qlock(&ic->outo); + while(ic->outoforder) { + bp = ic->outoforder; + oh = (Ilhdr*)bp->rp; + oid = nhgetl(oh->ilid); + if(oid <= ic->recvd) { + ic->outoforder = bp->list; + freeblist(bp); + continue; + } + if(oid != ic->recvd+1){ + ipriv = s->p->priv; + ipriv->stats[OutOfOrder]++; + break; + } + + ic->recvd = oid; + ic->outoforder = bp->list; + + bp->list = nil; + dlen = nhgets(oh->illen)-IL_HDRSIZE; + bp = trimblock(bp, IL_IPSIZE+IL_HDRSIZE, dlen); + /* + * Upper levels don't know about multiple-block + * messages so copy all into one (yick). + */ + bp = concatblock(bp); + if(bp == 0) + panic("ilpullup"); + bp = packblock(bp); + if(bp == 0) + panic("ilpullup2"); + qpass(s->rq, bp); + } + qunlock(&ic->outo); +} + +void +iloutoforder(Conv *s, Ilhdr *h, Block *bp) +{ + Ilcb *ic; + uchar *lid; + Block *f, **l; + ulong id, newid; + Ilpriv *ipriv; + + ipriv = s->p->priv; + ic = (Ilcb*)s->ptcl; + bp->list = nil; + + id = nhgetl(h->ilid); + /* Window checks */ + if(id <= ic->recvd || id > ic->recvd+ic->window) { + netlog(s->p->f, Logil, "il: message outside window %lud <%lud-%lud>: %I %d/%d\n", + id, ic->recvd, ic->recvd+ic->window, s->raddr, s->lport, s->rport); + freeblist(bp); + return; + } + + /* Packet is acceptable so sort onto receive queue for pullup */ + qlock(&ic->outo); + if(ic->outoforder == nil) + ic->outoforder = bp; + else { + l = &ic->outoforder; + for(f = *l; f; f = f->list) { + lid = ((Ilhdr*)(f->rp))->ilid; + newid = nhgetl(lid); + if(id <= newid) { + if(id == newid) { + ipriv->stats[DupMsg]++; + ipriv->stats[DupBytes] += blocklen(bp); + qunlock(&ic->outo); + freeblist(bp); + return; + } + bp->list = f; + *l = bp; + qunlock(&ic->outo); + return; + } + l = &f->list; + } + *l = bp; + } + qunlock(&ic->outo); +} + +void +ilsendctl(Conv *ipc, Ilhdr *inih, int type, ulong id, ulong ack, int ilspec) +{ + Ilhdr *ih; + Ilcb *ic; + Block *bp; + int ttl, tos; + + bp = allocb(IL_IPSIZE+IL_HDRSIZE); + bp->wp += IL_IPSIZE+IL_HDRSIZE; + + ih = (Ilhdr *)(bp->rp); + ih->vihl = IP_VER4; + + /* Ip fields */ + ih->proto = IP_ILPROTO; + hnputs(ih->illen, IL_HDRSIZE); + ih->frag[0] = 0; + ih->frag[1] = 0; + if(inih) { + hnputl(ih->dst, nhgetl(inih->src)); + hnputl(ih->src, nhgetl(inih->dst)); + hnputs(ih->ilsrc, nhgets(inih->ildst)); + hnputs(ih->ildst, nhgets(inih->ilsrc)); + hnputl(ih->ilid, nhgetl(inih->ilack)); + hnputl(ih->ilack, nhgetl(inih->ilid)); + ttl = MAXTTL; + tos = DFLTTOS; + } + else { + v6tov4(ih->dst, ipc->raddr); + v6tov4(ih->src, ipc->laddr); + hnputs(ih->ilsrc, ipc->lport); + hnputs(ih->ildst, ipc->rport); + hnputl(ih->ilid, id); + hnputl(ih->ilack, ack); + ic = (Ilcb*)ipc->ptcl; + ic->acksent = ack; + ic->acktime = NOW; + ttl = ipc->ttl; + tos = ipc->tos; + } + ih->iltype = type; + ih->ilspec = ilspec; + ih->ilsum[0] = 0; + ih->ilsum[1] = 0; + + if(ilcksum) + hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE)); + +if(ipc==nil) + panic("ipc is nil caller is %#p", getcallerpc(&ipc)); +if(ipc->p==nil) + panic("ipc->p is nil"); + + netlog(ipc->p->f, Logilmsg, "ctl(%s id %d ack %d %d->%d)\n", + iltype[ih->iltype], nhgetl(ih->ilid), nhgetl(ih->ilack), + nhgets(ih->ilsrc), nhgets(ih->ildst)); + + ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc); +} + +void +ilreject(Fs *f, Ilhdr *inih) +{ + Ilhdr *ih; + Block *bp; + + bp = allocb(IL_IPSIZE+IL_HDRSIZE); + bp->wp += IL_IPSIZE+IL_HDRSIZE; + + ih = (Ilhdr *)(bp->rp); + ih->vihl = IP_VER4; + + /* Ip fields */ + ih->proto = IP_ILPROTO; + hnputs(ih->illen, IL_HDRSIZE); + ih->frag[0] = 0; + ih->frag[1] = 0; + hnputl(ih->dst, nhgetl(inih->src)); + hnputl(ih->src, nhgetl(inih->dst)); + hnputs(ih->ilsrc, nhgets(inih->ildst)); + hnputs(ih->ildst, nhgets(inih->ilsrc)); + hnputl(ih->ilid, nhgetl(inih->ilack)); + hnputl(ih->ilack, nhgetl(inih->ilid)); + ih->iltype = Ilclose; + ih->ilspec = 0; + ih->ilsum[0] = 0; + ih->ilsum[1] = 0; + + if(ilcksum) + hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE)); + + ipoput4(f, bp, 0, MAXTTL, DFLTTOS, nil); +} + +void +ilsettimeout(Ilcb *ic) +{ + ulong pt; + + pt = (ic->delay>>LogAGain) + + ic->unackedbytes/(ic->rate>>LogAGain) + + (ic->mdev>>(LogDGain-1)) + + AckDelay; + if(pt > MaxTimeout) + pt = MaxTimeout; + ic->timeout = NOW + pt; +} + +void +ilbackoff(Ilcb *ic) +{ + ulong pt; + int i; + + pt = (ic->delay>>LogAGain) + + ic->unackedbytes/(ic->rate>>LogAGain) + + (ic->mdev>>(LogDGain-1)) + + AckDelay; + for(i = 0; i < ic->rexmit; i++) + pt = pt + (pt>>1); + if(pt > MaxTimeout) + pt = MaxTimeout; + ic->timeout = NOW + pt; + + if(ic->fasttimeout) + ic->timeout = NOW+Iltickms; + + ic->rexmit++; +} + +// complain if two numbers not within an hour of each other +#define Tfuture (1000*60*60) +int +later(ulong t1, ulong t2, char *x) +{ + int dt; + + dt = t1 - t2; + if(dt > 0) { + if(x != nil && dt > Tfuture) + print("%s: way future %d\n", x, dt); + return 1; + } + if(dt < -Tfuture) { + if(x != nil) + print("%s: way past %d\n", x, -dt); + return 1; + } + return 0; +} + +void +ilackproc(void *x) +{ + Ilcb *ic; + Conv **s, *p; + Proto *il; + + il = x; + +loop: + tsleep(&up->sleep, return0, 0, Iltickms); + for(s = il->conv; s && *s; s++) { + p = *s; + ic = (Ilcb*)p->ptcl; + + switch(ic->state) { + case Ilclosed: + case Illistening: + break; + case Ilclosing: + if(later(NOW, ic->timeout, "timeout0")) { + if(ic->rexmit > MaxRexmit){ + ilhangup(p, nil); + break; + } + ilsendctl(p, nil, Ilclose, ic->next, ic->recvd, 0); + ilbackoff(ic); + } + break; + + case Ilsyncee: + case Ilsyncer: + if(later(NOW, ic->timeout, "timeout1")) { + if(ic->rexmit > MaxRexmit){ + ilhangup(p, etime); + break; + } + ilsendctl(p, nil, Ilsync, ic->start, ic->recvd, 0); + ilbackoff(ic); + } + break; + + case Ilestablished: + if(ic->recvd != ic->acksent) + if(later(NOW, ic->acktime, "acktime")) + ilsendctl(p, nil, Ilack, ic->next, ic->recvd, 0); + + if(later(NOW, ic->querytime, "querytime")){ + if(later(NOW, ic->lastrecv+DeathTime, "deathtime")){ + netlog(il->f, Logil, "il: hangup: deathtime\n"); + ilhangup(p, etime); + break; + } + ilsendctl(p, nil, Ilquery, ic->next, ic->recvd, ilnextqt(ic)); + ic->querytime = NOW + QueryTime; + } + + if(ic->unacked != nil) + if(later(NOW, ic->timeout, "timeout2")) { + if(ic->rexmit > MaxRexmit){ + netlog(il->f, Logil, "il: hangup: too many rexmits\n"); + ilhangup(p, etime); + break; + } + ilsendctl(p, nil, Ilquery, ic->next, ic->recvd, ilnextqt(ic)); + ic->rxquery++; + ilbackoff(ic); + } + break; + } + } + goto loop; +} + +void +ilcbinit(Ilcb *ic) +{ + ic->start = nrand(0x1000000); + ic->next = ic->start+1; + ic->recvd = 0; + ic->window = Defaultwin; + ic->unackedbytes = 0; + ic->unacked = nil; + ic->outoforder = nil; + ic->rexmit = 0; + ic->rxtot = 0; + ic->rxquery = 0; + ic->qtx = 1; + ic->fasttimeout = 0; + + /* timers */ + ic->delay = DefRtt<mdev = DefRtt<rate = DefByteRate<querytime = NOW + QueryTime; + ic->lastrecv = NOW; /* or we'll timeout right away */ + ilsettimeout(ic); +} + +char* +ilstart(Conv *c, int type, int fasttimeout) +{ + Ilcb *ic; + Ilpriv *ipriv; + char kpname[KNAMELEN]; + + ipriv = c->p->priv; + + if(ipriv->ackprocstarted == 0){ + qlock(&ipriv->apl); + if(ipriv->ackprocstarted == 0){ + sprint(kpname, "#I%dilack", c->p->f->dev); + kproc(kpname, ilackproc, c->p); + ipriv->ackprocstarted = 1; + } + qunlock(&ipriv->apl); + } + + ic = (Ilcb*)c->ptcl; + ic->conv = c; + + if(ic->state != Ilclosed) + return nil; + + ilcbinit(ic); + + if(fasttimeout){ + /* timeout if we can't connect quickly */ + ic->fasttimeout = 1; + ic->timeout = NOW+Iltickms; + ic->rexmit = MaxRexmit - 4; + }; + + switch(type) { + default: + netlog(c->p->f, Logil, "il: start: type %d\n", type); + break; + case IL_LISTEN: + ic->state = Illistening; + iphtadd(&ipriv->ht, c); + break; + case IL_CONNECT: + ic->state = Ilsyncer; + iphtadd(&ipriv->ht, c); + ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0); + break; + } + + return nil; +} + +void +ilfreeq(Ilcb *ic) +{ + Block *bp, *next; + + qlock(&ic->ackq); + for(bp = ic->unacked; bp; bp = next) { + next = bp->list; + freeblist(bp); + } + ic->unacked = nil; + qunlock(&ic->ackq); + + qlock(&ic->outo); + for(bp = ic->outoforder; bp; bp = next) { + next = bp->list; + freeblist(bp); + } + ic->outoforder = nil; + qunlock(&ic->outo); +} + +void +iladvise(Proto *il, Block *bp, char *msg) +{ + Ilhdr *h; + Ilcb *ic; + uchar source[IPaddrlen], dest[IPaddrlen]; + ushort psource; + Conv *s, **p; + + h = (Ilhdr*)(bp->rp); + + v4tov6(dest, h->dst); + v4tov6(source, h->src); + psource = nhgets(h->ilsrc); + + + /* Look for a connection, unfortunately the destination port is missing */ + qlock(il); + for(p = il->conv; *p; p++) { + s = *p; + if(s->lport == psource) + if(ipcmp(s->laddr, source) == 0) + if(ipcmp(s->raddr, dest) == 0){ + qunlock(il); + ic = (Ilcb*)s->ptcl; + switch(ic->state){ + case Ilsyncer: + ilhangup(s, msg); + break; + } + freeblist(bp); + return; + } + } + qunlock(il); + freeblist(bp); +} + +int +ilnextqt(Ilcb *ic) +{ + int x; + + qlock(&ic->ackq); + x = ic->qtx; + if(++x > Nqt) + x = 1; + ic->qtx = x; + ic->qt[x] = ic->next-1; /* highest xmitted packet */ + ic->qt[0] = ic->qt[x]; /* compatibility with old implementations */ + qunlock(&ic->ackq); + + return x; +} + +/* calculate scale constants that converts fast ticks to ms (more or less) */ +static void +inittimescale(void) +{ + uvlong hz; + + fastticks(&hz); + if(hz > 1000){ + scalediv = hz/1000; + scalemul = 1; + } else { + scalediv = 1; + scalemul = 1000/hz; + } +} + +void +ilinit(Fs *f) +{ + Proto *il; + + inittimescale(); + + il = smalloc(sizeof(Proto)); + il->priv = smalloc(sizeof(Ilpriv)); + il->name = "il"; + il->connect = ilconnect; + il->announce = ilannounce; + il->state = ilstate; + il->create = ilcreate; + il->close = ilclose; + il->rcv = iliput; + il->ctl = nil; + il->advise = iladvise; + il->stats = ilxstats; + il->inuse = ilinuse; + il->gc = nil; + il->ipproto = IP_ILPROTO; + il->nc = scalednconv(); + il->ptclsize = sizeof(Ilcb); + Fsproto(f, il); +} diff -Nru /sys/src/9k/ip/ip.c /sys/src/9k/ip/ip.c --- /sys/src/9k/ip/ip.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/ip.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,705 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +#define BLKIPVER(xp) (((Ip4hdr*)((xp)->rp))->vihl&0xF0) + +static char *statnames[] = +{ +[Forwarding] "Forwarding", +[DefaultTTL] "DefaultTTL", +[InReceives] "InReceives", +[InHdrErrors] "InHdrErrors", +[InAddrErrors] "InAddrErrors", +[ForwDatagrams] "ForwDatagrams", +[InUnknownProtos] "InUnknownProtos", +[InDiscards] "InDiscards", +[InDelivers] "InDelivers", +[OutRequests] "OutRequests", +[OutDiscards] "OutDiscards", +[OutNoRoutes] "OutNoRoutes", +[ReasmTimeout] "ReasmTimeout", +[ReasmReqds] "ReasmReqds", +[ReasmOKs] "ReasmOKs", +[ReasmFails] "ReasmFails", +[FragOKs] "FragOKs", +[FragFails] "FragFails", +[FragCreates] "FragCreates", +}; + +#define BLKIP(xp) ((Ip4hdr*)((xp)->rp)) +/* + * This sleazy macro relies on the media header size being + * larger than sizeof(Ipfrag). ipreassemble checks this is true + */ +#define BKFG(xp) ((Ipfrag*)((xp)->base)) + +ushort ipcsum(uchar*); +Block* ip4reassemble(IP*, int, Block*, Ip4hdr*); +void ipfragfree4(IP*, Fragment4*); +Fragment4* ipfragallo4(IP*); + +void +ip_init_6(Fs *f) +{ + v6params *v6p; + + v6p = smalloc(sizeof(v6params)); + + v6p->rp.mflag = 0; /* default not managed */ + v6p->rp.oflag = 0; + v6p->rp.maxraint = 600000; /* millisecs */ + v6p->rp.minraint = 200000; + v6p->rp.linkmtu = 0; /* no mtu sent */ + v6p->rp.reachtime = 0; + v6p->rp.rxmitra = 0; + v6p->rp.ttl = MAXTTL; + v6p->rp.routerlt = 3 * v6p->rp.maxraint; + + v6p->hp.rxmithost = 1000; /* v6 RETRANS_TIMER */ + + v6p->cdrouter = -1; + + f->v6p = v6p; +} + +void +initfrag(IP *ip, int size) +{ + Fragment4 *fq4, *eq4; + Fragment6 *fq6, *eq6; + + ip->fragfree4 = (Fragment4*)malloc(sizeof(Fragment4) * size); + if(ip->fragfree4 == nil) + panic("initfrag"); + + eq4 = &ip->fragfree4[size]; + for(fq4 = ip->fragfree4; fq4 < eq4; fq4++) + fq4->next = fq4+1; + + ip->fragfree4[size-1].next = nil; + + ip->fragfree6 = (Fragment6*)malloc(sizeof(Fragment6) * size); + if(ip->fragfree6 == nil) + panic("initfrag"); + + eq6 = &ip->fragfree6[size]; + for(fq6 = ip->fragfree6; fq6 < eq6; fq6++) + fq6->next = fq6+1; + + ip->fragfree6[size-1].next = nil; +} + +void +ip_init(Fs *f) +{ + IP *ip; + + ip = smalloc(sizeof(IP)); + initfrag(ip, 100); + f->ip = ip; + + ip_init_6(f); +} + +void +iprouting(Fs *f, int on) +{ + f->ip->iprouting = on; + if(f->ip->iprouting==0) + f->ip->stats[Forwarding] = 2; + else + f->ip->stats[Forwarding] = 1; +} + +int +ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c) +{ + Ipifc *ifc; + uchar *gate; + ulong fragoff; + Block *xp, *nb; + Ip4hdr *eh, *feh; + int lid, len, seglen, chunk, dlen, blklen, offset, medialen; + Route *r, *sr; + IP *ip; + int rv = 0; + + ip = f->ip; + + /* Fill out the ip header */ + eh = (Ip4hdr*)(bp->rp); + + ip->stats[OutRequests]++; + + /* Number of uchars in data and ip header to write */ + len = blocklen(bp); + + if(gating){ + chunk = nhgets(eh->length); + if(chunk > len){ + ip->stats[OutDiscards]++; + netlog(f, Logip, "short gated packet\n"); + goto free; + } + if(chunk < len) + len = chunk; + } + if(len >= IP_MAX){ + ip->stats[OutDiscards]++; + netlog(f, Logip, "exceeded ip max size %V\n", eh->dst); + goto free; + } + + r = v4lookup(f, eh->dst, c); + if(r == nil){ + ip->stats[OutNoRoutes]++; + netlog(f, Logip, "no interface %V\n", eh->dst); + rv = -1; + goto free; + } + + ifc = r->ifc; + if(r->type & (Rifc|Runi)) + gate = eh->dst; + else + if(r->type & (Rbcast|Rmulti)) { + gate = eh->dst; + sr = v4lookup(f, eh->src, nil); + if(sr != nil && (sr->type & Runi)) + ifc = sr->ifc; + } + else + gate = r->v4.gate; + + if(!gating) + eh->vihl = IP_VER4|IP_HLEN4; + eh->ttl = ttl; + if(!gating) + eh->tos = tos; + + if(!canrlock(ifc)) + goto free; + if(waserror()){ + runlock(ifc); + nexterror(); + } + if(ifc->medium == nil) + goto raise; + + /* If we dont need to fragment just send it */ + if(c && c->maxfragsize && c->maxfragsize < ifc->maxtu) + medialen = c->maxfragsize - ifc->medium->hsize; + else + medialen = ifc->maxtu - ifc->medium->hsize; + if(len <= medialen) { + if(!gating) + hnputs(eh->id, ainc(&ip->id4)); + hnputs(eh->length, len); + if(!gating){ + eh->frag[0] = 0; + eh->frag[1] = 0; + } + eh->cksum[0] = 0; + eh->cksum[1] = 0; + hnputs(eh->cksum, ipcsum(&eh->vihl)); + assert(bp->next == nil); + ifc->medium->bwrite(ifc, bp, V4, gate); + runlock(ifc); + poperror(); + return 0; + } + +if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst); + + if(eh->frag[0] & (IP_DF>>8)){ + ip->stats[FragFails]++; + ip->stats[OutDiscards]++; + icmpcantfrag(f, bp, medialen); + netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst); + goto raise; + } + + seglen = (medialen - IP4HDR) & ~7; + if(seglen < 8){ + ip->stats[FragFails]++; + ip->stats[OutDiscards]++; + netlog(f, Logip, "%V seglen < 8\n", eh->dst); + goto raise; + } + + dlen = len - IP4HDR; + xp = bp; + if(gating) + lid = nhgets(eh->id); + else + lid = ainc(&ip->id4); + + offset = IP4HDR; + while(xp != nil && offset && offset >= BLEN(xp)) { + offset -= BLEN(xp); + xp = xp->next; + } + xp->rp += offset; + + if(gating) + fragoff = nhgets(eh->frag)<<3; + else + fragoff = 0; + dlen += fragoff; + for(; fragoff < dlen; fragoff += seglen) { + nb = allocb(IP4HDR+seglen); + feh = (Ip4hdr*)(nb->rp); + + memmove(nb->wp, eh, IP4HDR); + nb->wp += IP4HDR; + + if((fragoff + seglen) >= dlen) { + seglen = dlen - fragoff; + hnputs(feh->frag, fragoff>>3); + } + else + hnputs(feh->frag, (fragoff>>3)|IP_MF); + + hnputs(feh->length, seglen + IP4HDR); + hnputs(feh->id, lid); + + /* Copy up the data area */ + chunk = seglen; + while(chunk) { + if(!xp) { + ip->stats[OutDiscards]++; + ip->stats[FragFails]++; + freeblist(nb); + netlog(f, Logip, "!xp: chunk %d\n", chunk); + goto raise; + } + blklen = chunk; + if(BLEN(xp) < chunk) + blklen = BLEN(xp); + memmove(nb->wp, xp->rp, blklen); + nb->wp += blklen; + xp->rp += blklen; + chunk -= blklen; + if(xp->rp == xp->wp) + xp = xp->next; + } + + feh->cksum[0] = 0; + feh->cksum[1] = 0; + hnputs(feh->cksum, ipcsum(&feh->vihl)); + ifc->medium->bwrite(ifc, nb, V4, gate); + ip->stats[FragCreates]++; + } + ip->stats[FragOKs]++; +raise: + runlock(ifc); + poperror(); +free: + freeblist(bp); + return rv; +} + +void +ipiput4(Fs *f, Ipifc *ifc, Block *bp) +{ + int hl; + int hop, tos, proto, olen; + Ip4hdr *h; + Proto *p; + ushort frag; + int notforme; + uchar *dp, v6dst[IPaddrlen]; + IP *ip; + Route *r; + Conv conv; + + if(BLKIPVER(bp) != IP_VER4) { + ipiput6(f, ifc, bp); + return; + } + + ip = f->ip; + ip->stats[InReceives]++; + + /* + * Ensure we have all the header info in the first + * block. Make life easier for other protocols by + * collecting up to the first 64 bytes in the first block. + */ + if(BLEN(bp) < 64) { + hl = blocklen(bp); + if(hl < IP4HDR) + hl = IP4HDR; + if(hl > 64) + hl = 64; + bp = pullupblock(bp, hl); + if(bp == nil) + return; + } + + h = (Ip4hdr*)(bp->rp); + + /* dump anything that whose header doesn't checksum */ + if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) { + ip->stats[InHdrErrors]++; + netlog(f, Logip, "ip: checksum error %V\n", h->src); + freeblist(bp); + return; + } + v4tov6(v6dst, h->dst); + notforme = ipforme(f, v6dst) == 0; + + /* Check header length and version */ + if((h->vihl&0x0F) != IP_HLEN4) { + hl = (h->vihl&0xF)<<2; + if(hl < (IP_HLEN4<<2)) { + ip->stats[InHdrErrors]++; + netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl); + freeblist(bp); + return; + } + /* If this is not routed strip off the options */ + if(notforme == 0) { + olen = nhgets(h->length); + dp = bp->rp + (hl - (IP_HLEN4<<2)); + memmove(dp, h, IP_HLEN4<<2); + bp->rp = dp; + h = (Ip4hdr*)(bp->rp); + h->vihl = (IP_VER4|IP_HLEN4); + hnputs(h->length, olen-hl+(IP_HLEN4<<2)); + } + } + + /* route */ + if(notforme) { + if(!ip->iprouting){ + freeblist(bp); + return; + } + + /* don't forward to source's network */ + memset(&conv, 0, sizeof conv); + r = v4lookup(f, h->dst, &conv); + if(r == nil || r->ifc == ifc){ + ip->stats[OutDiscards]++; + freeblist(bp); + return; + } + + /* don't forward if packet has timed out */ + hop = h->ttl; + if(hop < 1) { + ip->stats[InHdrErrors]++; + icmpttlexceeded(f, ifc->lifc->local, bp); + freeblist(bp); + return; + } + + /* reassemble if the interface expects it */ +if(r->ifc == nil) panic("nil route rfc"); + if(r->ifc->reassemble){ + frag = nhgets(h->frag); + if(frag) { + h->tos = 0; + if(frag & IP_MF) + h->tos = 1; + bp = ip4reassemble(ip, frag, bp, h); + if(bp == nil) + return; + h = (Ip4hdr*)(bp->rp); + } + } + + ip->stats[ForwDatagrams]++; + tos = h->tos; + hop = h->ttl; + ipoput4(f, bp, 1, hop - 1, tos, &conv); + return; + } + + frag = nhgets(h->frag); + if(frag) { + h->tos = 0; + if(frag & IP_MF) + h->tos = 1; + bp = ip4reassemble(ip, frag, bp, h); + if(bp == nil) + return; + h = (Ip4hdr*)(bp->rp); + } + + /* don't let any frag info go up the stack */ + h->frag[0] = 0; + h->frag[1] = 0; + + proto = h->proto; + p = Fsrcvpcol(f, proto); + if(p != nil && p->rcv != nil) { + ip->stats[InDelivers]++; + (*p->rcv)(p, ifc, bp); + return; + } + ip->stats[InDiscards]++; + ip->stats[InUnknownProtos]++; + freeblist(bp); +} + +int +ipstats(Fs *f, char *buf, int len) +{ + IP *ip; + char *p, *e; + int i; + + ip = f->ip; + ip->stats[DefaultTTL] = MAXTTL; + + p = buf; + e = p+len; + for(i = 0; i < Nipstats; i++) + p = seprint(p, e, "%s: %llud\n", statnames[i], ip->stats[i]); + return p - buf; +} + +Block* +ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih) +{ + int fend; + ushort id; + Fragment4 *f, *fnext; + ulong src, dst; + Block *bl, **l, *last, *prev; + int ovlap, len, fragsize, pktposn; + + src = nhgetl(ih->src); + dst = nhgetl(ih->dst); + id = nhgets(ih->id); + + /* + * block lists are too hard, pullupblock into a single block + */ + if(bp->next){ + bp = pullupblock(bp, blocklen(bp)); + ih = (Ip4hdr*)(bp->rp); + } + + qlock(&ip->fraglock4); + + /* + * find a reassembly queue for this fragment + */ + for(f = ip->flisthead4; f; f = fnext){ + fnext = f->next; /* because ipfragfree4 changes the list */ + if(f->src == src && f->dst == dst && f->id == id) + break; + if(f->age < NOW){ + ip->stats[ReasmTimeout]++; + ipfragfree4(ip, f); + } + } + + /* + * if this isn't a fragmented packet, accept it + * and get rid of any fragments that might go + * with it. + */ + if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) { + if(f != nil) { + ipfragfree4(ip, f); + ip->stats[ReasmFails]++; + } + qunlock(&ip->fraglock4); + return bp; + } + + if(bp->base+IPFRAGSZ >= bp->rp){ + bp = padblock(bp, IPFRAGSZ); + bp->rp += IPFRAGSZ; + } + + BKFG(bp)->foff = offset<<3; + BKFG(bp)->flen = nhgets(ih->length)-IP4HDR; + + /* First fragment allocates a reassembly queue */ + if(f == nil) { + f = ipfragallo4(ip); + f->id = id; + f->src = src; + f->dst = dst; + + f->blist = bp; + + qunlock(&ip->fraglock4); + ip->stats[ReasmReqds]++; + return nil; + } + + /* + * find the new fragment's position in the queue + */ + prev = nil; + l = &f->blist; + bl = f->blist; + while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) { + prev = bl; + l = &bl->next; + bl = bl->next; + } + + /* Check overlap of a previous fragment - trim away as necessary */ + if(prev) { + ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff; + if(ovlap > 0) { + if(ovlap >= BKFG(bp)->flen) { + freeblist(bp); + qunlock(&ip->fraglock4); + return nil; + } + BKFG(prev)->flen -= ovlap; + } + } + + /* Link onto assembly queue */ + bp->next = *l; + *l = bp; + + /* Check to see if succeeding segments overlap */ + if(bp->next) { + l = &bp->next; + fend = BKFG(bp)->foff + BKFG(bp)->flen; + /* Take completely covered segments out */ + while(*l) { + ovlap = fend - BKFG(*l)->foff; + if(ovlap <= 0) + break; + if(ovlap < BKFG(*l)->flen) { + BKFG(*l)->flen -= ovlap; + BKFG(*l)->foff += ovlap; + /* move up ih hdrs */ + memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR); + (*l)->rp += ovlap; + break; + } + last = (*l)->next; + (*l)->next = nil; + freeblist(*l); + *l = last; + } + } + + /* + * look for a complete packet. if we get to a fragment + * without IP_MF set, we're done. + */ + pktposn = 0; + for(bl = f->blist; bl; bl = bl->next) { + if(BKFG(bl)->foff != pktposn) + break; + if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) { + bl = f->blist; + len = nhgets(BLKIP(bl)->length); + bl->wp = bl->rp + len; + + /* Pullup all the fragment headers and + * return a complete packet + */ + for(bl = bl->next; bl; bl = bl->next) { + fragsize = BKFG(bl)->flen; + len += fragsize; + bl->rp += IP4HDR; + bl->wp = bl->rp + fragsize; + } + + bl = f->blist; + f->blist = nil; + ipfragfree4(ip, f); + ih = BLKIP(bl); + hnputs(ih->length, len); + qunlock(&ip->fraglock4); + ip->stats[ReasmOKs]++; + return bl; + } + pktposn += BKFG(bl)->flen; + } + qunlock(&ip->fraglock4); + return nil; +} + +/* + * ipfragfree4 - Free a list of fragments - assume hold fraglock4 + */ +void +ipfragfree4(IP *ip, Fragment4 *frag) +{ + Fragment4 *fl, **l; + + if(frag->blist) + freeblist(frag->blist); + + frag->src = 0; + frag->id = 0; + frag->blist = nil; + + l = &ip->flisthead4; + for(fl = *l; fl; fl = fl->next) { + if(fl == frag) { + *l = frag->next; + break; + } + l = &fl->next; + } + + frag->next = ip->fragfree4; + ip->fragfree4 = frag; + +} + +/* + * ipfragallo4 - allocate a reassembly queue - assume hold fraglock4 + */ +Fragment4 * +ipfragallo4(IP *ip) +{ + Fragment4 *f; + + while(ip->fragfree4 == nil) { + /* free last entry on fraglist */ + for(f = ip->flisthead4; f->next; f = f->next) + ; + ipfragfree4(ip, f); + } + f = ip->fragfree4; + ip->fragfree4 = f->next; + f->next = ip->flisthead4; + ip->flisthead4 = f; + f->age = NOW + 30000; + + return f; +} + +ushort +ipcsum(uchar *addr) +{ + int len; + ulong sum; + + sum = 0; + len = (addr[0]&0xf)<<2; + + while(len > 0) { + sum += addr[0]<<8 | addr[1] ; + len -= 2; + addr += 2; + } + + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + + return (sum^0xffff); +} diff -Nru /sys/src/9k/ip/ip.h /sys/src/9k/ip/ip.h --- /sys/src/9k/ip/ip.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/ip.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,738 @@ +typedef struct Conv Conv; +typedef struct Fragment4 Fragment4; +typedef struct Fragment6 Fragment6; +typedef struct Fs Fs; +typedef union Hwaddr Hwaddr; +typedef struct IP IP; +typedef struct IPaux IPaux; +typedef struct Ip4hdr Ip4hdr; +typedef struct Ipfrag Ipfrag; +typedef struct Ipself Ipself; +typedef struct Ipselftab Ipselftab; +typedef struct Iplink Iplink; +typedef struct Iplifc Iplifc; +typedef struct Ipmulti Ipmulti; +typedef struct Ipifc Ipifc; +typedef struct Iphash Iphash; +typedef struct Ipht Ipht; +typedef struct Netlog Netlog; +typedef struct Medium Medium; +typedef struct Proto Proto; +typedef struct Arpent Arpent; +typedef struct Arp Arp; +typedef struct Route Route; + +typedef struct Routerparams Routerparams; +typedef struct Hostparams Hostparams; +typedef struct v6router v6router; +typedef struct v6params v6params; + +#pragma incomplete Arp +#pragma incomplete Ipself +#pragma incomplete Ipselftab +#pragma incomplete IP +#pragma incomplete Netlog + +enum +{ + Addrlen= 64, + Maxproto= 20, + Nhash= 64, + Nchans= 1024, + Maxincall= Nchans/2, + MAClen= 16, /* longest mac address */ + + MAXTTL= 255, + DFLTTOS= 0, + + IPaddrlen= 16, + IPv4addrlen= 4, + IPv4off= 12, + IPllen= 4, + + /* ip versions */ + V4= 4, + V6= 6, + IP_VER4= 0x40, + IP_VER6= 0x60, + IP_HLEN4= 5, /* v4: Header length in words */ + IP_DF= 0x4000, /* v4: Don't fragment */ + IP_MF= 0x2000, /* v4: More fragments */ + IP4HDR= 20, /* sizeof(Ip4hdr) */ + IP_MAX= 64*1024, /* Max. Internet packet size, v4 & v6 */ + + /* 2^Lroot trees in the root table */ + Lroot= 10, + + Maxpath = 64, +}; + +enum +{ + Idle= 0, + Announcing= 1, + Announced= 2, + Connecting= 3, + Connected= 4, +}; + +/* MIB II counters */ +enum +{ + Forwarding, + DefaultTTL, + InReceives, + InHdrErrors, + InAddrErrors, + ForwDatagrams, + InUnknownProtos, + InDiscards, + InDelivers, + OutRequests, + OutDiscards, + OutNoRoutes, + ReasmTimeout, + ReasmReqds, + ReasmOKs, + ReasmFails, + FragOKs, + FragFails, + FragCreates, + + Nipstats, +}; + +struct Fragment4 +{ + Block* blist; + Fragment4* next; + ulong src; + ulong dst; + ushort id; + ulong age; +}; + +struct Fragment6 +{ + Block* blist; + Fragment6* next; + uchar src[IPaddrlen]; + uchar dst[IPaddrlen]; + uint id; + ulong age; +}; + +struct Ipfrag +{ + ushort foff; + ushort flen; + + uchar payload[]; +}; + +#define IPFRAGSZ offsetof(Ipfrag, payload[0]) + +/* an instance of IP */ +struct IP +{ + uvlong stats[Nipstats]; + + QLock fraglock4; + Fragment4* flisthead4; + Fragment4* fragfree4; + int id4; + + QLock fraglock6; + Fragment6* flisthead6; + Fragment6* fragfree6; + int id6; + + int iprouting; /* true if we route like a gateway */ +}; + +/* on the wire packet header */ +struct Ip4hdr +{ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* ip->identification */ + uchar frag[2]; /* Fragment information */ + uchar ttl; /* Time to live */ + uchar proto; /* Protocol */ + uchar cksum[2]; /* Header checksum */ + uchar src[4]; /* IP source */ + uchar dst[4]; /* IP destination */ +}; + +/* + * one per conversation directory + */ +struct Conv +{ + QLock; + + int x; /* conversation index */ + Proto* p; + + int restricted; /* remote port is restricted */ + uint ttl; /* max time to live */ + uint tos; /* type of service */ + int ignoreadvice; /* don't terminate connection on icmp errors */ + + uchar ipversion; + uchar laddr[IPaddrlen]; /* local IP address */ + uchar raddr[IPaddrlen]; /* remote IP address */ + ushort lport; /* local port number */ + ushort rport; /* remote port number */ + + char *owner; /* protections */ + int perm; + int inuse; /* opens of listen/data/ctl */ + int length; + int state; + + int maxfragsize; /* If set, used for fragmentation */ + + /* udp specific */ + int headers; /* data src/dst headers in udp */ + int reliable; /* true if reliable udp */ + + Conv* incall; /* calls waiting to be listened for */ + Conv* incalltl; /* tail of incoming call queue */ + int nincall; /* length of queue */ + Conv* next; + + Queue* rq; /* queued data waiting to be read */ + Queue* wq; /* queued data waiting to be written */ + Queue* eq; /* returned error packets */ + Queue* sq; /* snooping queue */ + Ref snoopers; /* number of processes with snoop open */ + + QLock car; + Rendez cr; + char cerr[ERRMAX]; + + QLock listenq; + Rendez listenr; + + Ipmulti *multi; /* multicast bindings for this interface */ + + void* ptcl; /* protocol specific stuff */ + + Route *r; /* last route used */ + ulong rgen; /* routetable generation for *r */ +}; + +struct Medium +{ + char *name; + int hsize; /* medium header size */ + int mintu; /* default min mtu */ + int maxtu; /* default max mtu */ + int maclen; /* mac address length */ + void (*bind)(Ipifc*, int, char**); + void (*unbind)(Ipifc*); + void (*bwrite)(Ipifc *ifc, Block *b, int version, uchar *ip); + + /* for arming interfaces to receive multicast */ + void (*addmulti)(Ipifc *ifc, uchar *a, uchar *ia); + void (*remmulti)(Ipifc *ifc, uchar *a, uchar *ia); + + /* process packets written to 'data' */ + void (*pktin)(Fs *f, Ipifc *ifc, Block *bp); + + /* routes for router boards */ + void (*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int); + void (*remroute)(Ipifc *ifc, int, uchar*, uchar*); + void (*flushroutes)(Ipifc *ifc); + + /* for routing multicast groups */ + void (*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia); + void (*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia); + + /* address resolution */ + void (*ares)(Fs*, int, uchar*, uchar*, int, int); /* resolve */ + void (*areg)(Ipifc*, uchar*); /* register */ + + /* v6 address generation */ + void (*pref2addr)(uchar *pref, uchar *ea); + + int unbindonclose; /* if non-zero, unbind on last close */ +}; + +/* logical interface associated with a physical one */ +struct Iplifc +{ + uchar local[IPaddrlen]; + uchar mask[IPaddrlen]; + uchar remote[IPaddrlen]; + uchar net[IPaddrlen]; + uchar tentative; /* =1 => v6 dup disc on, =0 => confirmed unique */ + uchar onlink; /* =1 => onlink, =0 offlink. */ + uchar autoflag; /* v6 autonomous flag */ + long validlt; /* v6 valid lifetime */ + long preflt; /* v6 preferred lifetime */ + long origint; /* time when addr was added */ + Iplink *link; /* addresses linked to this lifc */ + Iplifc *next; +}; + +/* binding twixt Ipself and Iplifc */ +struct Iplink +{ + Ipself *self; + Iplifc *lifc; + Iplink *selflink; /* next link for this local address */ + Iplink *lifclink; /* next link for this ifc */ + ulong expire; + Iplink *next; /* free list */ + int ref; +}; + +/* rfc 2461, pp.40—43. */ + +/* default values, one per stack */ +struct Routerparams { + int mflag; /* flag: managed address configuration */ + int oflag; /* flag: other stateful configuration */ + int maxraint; /* max. router adv interval (ms) */ + int minraint; /* min. router adv interval (ms) */ + int linkmtu; /* mtu options */ + int reachtime; /* reachable time */ + int rxmitra; /* retransmit interval */ + int ttl; /* cur hop count limit */ + int routerlt; /* router lifetime */ +}; + +struct Hostparams { + int rxmithost; +}; + +struct Ipifc +{ + RWlock; + + Conv *conv; /* link to its conversation structure */ + char dev[64]; /* device we're attached to */ + Medium *medium; /* Media pointer */ + int maxtu; /* Maximum transfer unit */ + int mintu; /* Minumum tranfer unit */ + int mbps; /* megabits per second */ + void *arg; /* medium specific */ + int reassemble; /* reassemble IP packets before forwarding */ + + /* these are used so that we can unbind on the fly */ + Lock idlock; + uchar ifcid; /* incremented each 'bind/unbind/add/remove' */ + int ref; /* number of proc's using this ipifc */ + Rendez wait; /* where unbinder waits for ref == 0 */ + int unbinding; + + uchar mac[MAClen]; /* MAC address */ + + Iplifc *lifc; /* logical interfaces on this physical one */ + + uvlong in, out; /* message statistics */ + uvlong inerr, outerr; /* ... */ + + uchar sendra6; /* flag: send router advs on this ifc */ + uchar recvra6; /* flag: recv router advs on this ifc */ + Routerparams rp; /* router parameters as in RFC 2461, pp.40—43. + used only if node is router */ +}; + +/* + * one per multicast-lifc pair used by a Conv + */ +struct Ipmulti +{ + uchar ma[IPaddrlen]; + uchar ia[IPaddrlen]; + Ipmulti *next; +}; + +/* + * hash table for 2 ip addresses + 2 ports + */ +enum +{ + Nipht= 521, /* convenient prime */ + + IPmatchexact= 0, /* match on 4 tuple */ + IPmatchany, /* *!* */ + IPmatchport, /* *!port */ + IPmatchaddr, /* addr!* */ + IPmatchpa, /* addr!port */ +}; +struct Iphash +{ + Iphash *next; + Conv *c; + int match; +}; +struct Ipht +{ + Lock; + Iphash *tab[Nipht]; +}; +void iphtadd(Ipht*, Conv*); +void iphtrem(Ipht*, Conv*); +Conv* iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp); + +/* + * one per multiplexed protocol + */ +struct Proto +{ + QLock; + char* name; /* protocol name */ + int x; /* protocol index */ + int ipproto; /* ip protocol type */ + + char* (*connect)(Conv*, char**, int); + char* (*announce)(Conv*, char**, int); + char* (*bind)(Conv*, char**, int); + int (*state)(Conv*, char*, int); + void (*create)(Conv*); + void (*close)(Conv*); + void (*rcv)(Proto*, Ipifc*, Block*); + char* (*ctl)(Conv*, char**, int); + void (*advise)(Proto*, Block*, char*); + int (*stats)(Proto*, char*, int); + int (*local)(Conv*, char*, int); + int (*remote)(Conv*, char*, int); + int (*inuse)(Conv*); + int (*gc)(Proto*); /* returns true if any conversations are freed */ + + Fs *f; /* file system this proto is part of */ + Conv **conv; /* array of conversations */ + int ptclsize; /* size of per protocol ctl block */ + int nc; /* number of conversations */ + int ac; + Qid qid; /* qid for protocol directory */ + ushort nextrport; + + void *priv; +}; + + +/* + * one per IP protocol stack + */ +struct Fs +{ + RWlock; + int dev; + + int np; + Proto* p[Maxproto+1]; /* list of supported protocols */ + Proto* t2p[256]; /* vector of all protocols */ + Proto* ipifc; /* kludge for ipifcremroute & ipifcaddroute */ + Proto* ipmux; /* kludge for finding an ip multiplexor */ + + IP *ip; + Ipselftab *self; + Arp *arp; + v6params *v6p; + + Route *v4root[1<= 0. */ +}; + + +int Fsconnected(Conv*, char*); +Conv* Fsnewcall(Conv*, uchar*, ushort, uchar*, ushort, uchar); +int Fspcolstats(char*, int); +int Fsproto(Fs*, Proto*); +int Fsbuiltinproto(Fs*, uchar); +Conv* Fsprotoclone(Proto*, char*); +Proto* Fsrcvpcol(Fs*, uchar); +Proto* Fsrcvpcolx(Fs*, uchar); +char* Fsstdconnect(Conv*, char**, int); +char* Fsstdannounce(Conv*, char**, int); +char* Fsstdbind(Conv*, char**, int); +ulong scalednconv(void); +void closeconv(Conv*); +/* + * logging + */ +enum +{ + Logip= 1<<1, + Logtcp= 1<<2, + Logfs= 1<<3, + Logil= 1<<4, + Logicmp= 1<<5, + Logudp= 1<<6, + Logcompress= 1<<7, + Logilmsg= 1<<8, + Loggre= 1<<9, + Logppp= 1<<10, + Logtcprxmt= 1<<11, + Logigmp= 1<<12, + Logudpmsg= 1<<13, + Logipmsg= 1<<14, + Logrudp= 1<<15, + Logrudpmsg= 1<<16, + Logesp= 1<<17, + Logtcpwin= 1<<18, +}; + +void netloginit(Fs*); +void netlogopen(Fs*); +void netlogclose(Fs*); +void netlogctl(Fs*, char*, int); +long netlogread(Fs*, void*, ulong, long); +void netlog(Fs*, int, char*, ...); +void ifcloginit(Fs*); +long ifclogread(Fs*, Chan *,void*, ulong, long); +void ifclog(Fs*, uchar *, int); +void ifclogopen(Fs*, Chan*); +void ifclogclose(Fs*, Chan*); + +#pragma varargck argpos netlog 3 + +/* + * iproute.c + */ +typedef struct RouteTree RouteTree; +typedef struct Routewalk Routewalk; +typedef struct V4route V4route; +typedef struct V6route V6route; + +enum +{ + + /* type bits */ + Rv4= (1<<0), /* this is a version 4 route */ + Rifc= (1<<1), /* this route is a directly connected interface */ + Rptpt= (1<<2), /* this route is a pt to pt interface */ + Runi= (1<<3), /* a unicast self address */ + Rbcast= (1<<4), /* a broadcast self address */ + Rmulti= (1<<5), /* a multicast self address */ + Rproxy= (1<<6), /* this route should be proxied */ +}; + +struct Routewalk +{ + int o; + int h; + char* p; + char* e; + void* state; + void (*walk)(Route*, Routewalk*); +}; + +struct RouteTree +{ + Route* right; + Route* left; + Route* mid; + uchar depth; + uchar type; + uchar ifcid; /* must match ifc->id */ + Ipifc *ifc; + char tag[4]; + int ref; +}; + +struct V4route +{ + ulong address; + ulong endaddress; + uchar gate[IPv4addrlen]; +}; + +struct V6route +{ + ulong address[IPllen]; + ulong endaddress[IPllen]; + uchar gate[IPaddrlen]; +}; + +struct Route +{ + RouteTree; + + union { + V6route v6; + V4route v4; + }; +}; +extern void v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type); +extern void v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type); +extern void v4delroute(Fs *f, uchar *a, uchar *mask, int dolock); +extern void v6delroute(Fs *f, uchar *a, uchar *mask, int dolock); +extern Route* v4lookup(Fs *f, uchar *a, Conv *c); +extern Route* v6lookup(Fs *f, uchar *a, Conv *c); +extern long routeread(Fs *f, char*, ulong, int); +extern long routewrite(Fs *f, Chan*, char*, int); +extern void routetype(int, char*); +extern void ipwalkroutes(Fs*, Routewalk*); +extern void convroute(Route*, uchar*, uchar*, uchar*, char*, int*); + +/* + * devip.c + */ + +/* + * Hanging off every ip channel's ->aux is the following structure. + * It maintains the state used by devip and iproute. + */ +struct IPaux +{ + char *owner; /* the user that did the attach */ + char tag[4]; +}; + +extern IPaux* newipaux(char*, char*); + +/* + * arp.c + */ +struct Arpent +{ + uchar ip[IPaddrlen]; + uchar mac[MAClen]; + Medium *type; /* media type */ + Arpent* hash; + Block* hold; + Block* last; + uint ctime; /* time entry was created or refreshed */ + uint utime; /* time entry was last used */ + uchar state; + Arpent *nextrxt; /* re-transmit chain */ + uint rtime; /* time for next retransmission */ + uchar rxtsrem; + Ipifc *ifc; + uchar ifcid; /* must match ifc->id */ +}; + +extern void arpinit(Fs*); +extern int arpread(Arp*, char*, ulong, int); +extern int arpwrite(Fs*, char*, int); +extern Arpent* arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h); +extern void arprelease(Arp*, Arpent *a); +extern Block* arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac); +extern void arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh); + +/* + * ipaux.c + */ + +extern int myetheraddr(uchar*, char*); +extern vlong parseip(uchar*, char*); +extern vlong parseipmask(uchar*, char*); +extern char* v4parseip(uchar*, char*); +extern void maskip(uchar *from, uchar *mask, uchar *to); +extern int parsemac(uchar *to, char *from, int len); +extern uchar* defmask(uchar*); +extern int isv4(uchar*); +extern void v4tov6(uchar *v6, uchar *v4); +extern int v6tov4(uchar *v4, uchar *v6); +extern int eipfmt(Fmt*); + +#define ipmove(x, y) memmove(x, y, IPaddrlen) +#define ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) ) + +extern uchar IPv4bcast[IPaddrlen]; +extern uchar IPv4bcastobs[IPaddrlen]; +extern uchar IPv4allsys[IPaddrlen]; +extern uchar IPv4allrouter[IPaddrlen]; +extern uchar IPnoaddr[IPaddrlen]; +extern uchar v4prefix[IPaddrlen]; +extern uchar IPallbits[IPaddrlen]; + +#define NOW TK2MS(sys->ticks) + +/* + * media + */ +extern Medium ethermedium; +extern Medium nullmedium; +extern Medium pktmedium; + +/* + * ipifc.c + */ +extern Medium* ipfindmedium(char *name); +extern void addipmedium(Medium *med); +extern int ipforme(Fs*, uchar *addr); +extern int iptentative(Fs*, uchar *addr); +extern int ipisbm(uchar *); +extern int ipismulticast(uchar *); +extern Ipifc* findipifc(Fs*, uchar *remote, int type); +extern void findlocalip(Fs*, uchar *local, uchar *remote); +extern int ipv4local(Ipifc *ifc, uchar *addr); +extern int ipv6local(Ipifc *ifc, uchar *addr); +extern int ipv6anylocal(Ipifc *ifc, uchar *addr); +extern Iplifc* iplocalonifc(Ipifc *ifc, uchar *ip); +extern int ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip); +extern int ipismulticast(uchar *ip); +extern int ipisbooting(void); +extern int ipifccheckin(Ipifc *ifc, Medium *med); +extern void ipifccheckout(Ipifc *ifc); +extern int ipifcgrab(Ipifc *ifc); +extern void ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int); +extern void ipifcremroute(Fs*, int, uchar*, uchar*); +extern void ipifcremmulti(Conv *c, uchar *ma, uchar *ia); +extern void ipifcaddmulti(Conv *c, uchar *ma, uchar *ia); +extern char* ipifcrem(Ipifc *ifc, char **argv, int argc); +extern char* ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp); +extern long ipselftabread(Fs*, char *a, ulong offset, int n); +extern char* ipifcadd6(Ipifc *ifc, char**argv, int argc); +/* + * ip.c + */ +extern void iprouting(Fs*, int); +extern void icmpnoconv(Fs*, Block*); +extern void icmpcantfrag(Fs*, Block*, int); +extern void icmpttlexceeded(Fs*, uchar*, Block*); +extern ushort ipcsum(uchar*); +extern void ipiput4(Fs*, Ipifc*, Block*); +extern void ipiput6(Fs*, Ipifc*, Block*); +extern int ipoput4(Fs*, Block*, int, int, int, Conv*); +extern int ipoput6(Fs*, Block*, int, int, int, Conv*); +extern int ipstats(Fs*, char*, int); +extern ushort ptclbsum(uchar*, int); +extern ushort ptclcsum(Block*, int, int); +extern void ip_init(Fs*); +extern void update_mtucache(uchar*, ulong); +extern ulong restrict_mtu(uchar*, ulong); + +/* + * chandial.c + */ +extern Chan* chandial(char*, char*, char*, Chan**); + +/* + * global to all of the stack + */ +extern void (*igmpreportfn)(Ipifc*, uchar*); diff -Nru /sys/src/9k/ip/ipaux.c /sys/src/9k/ip/ipaux.c --- /sys/src/9k/ip/ipaux.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/ipaux.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,368 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "ip.h" +#include "ipv6.h" + +char *v6hdrtypes[Maxhdrtype] = +{ + [HBH] "HopbyHop", + [ICMP] "ICMP", + [IGMP] "IGMP", + [GGP] "GGP", + [IPINIP] "IP", + [ST] "ST", + [TCP] "TCP", + [UDP] "UDP", + [ISO_TP4] "ISO_TP4", + [RH] "Routinghdr", + [FH] "Fraghdr", + [IDRP] "IDRP", + [RSVP] "RSVP", + [AH] "Authhdr", + [ESP] "ESP", + [ICMPv6] "ICMPv6", + [NNH] "Nonexthdr", + [ISO_IP] "ISO_IP", + [IGRP] "IGRP", + [OSPF] "OSPF", +}; + +/* + * well known IPv6 addresses + */ +uchar v6Unspecified[IPaddrlen] = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +uchar v6loopback[IPaddrlen] = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x01 +}; + +uchar v6linklocal[IPaddrlen] = { + 0xfe, 0x80, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +uchar v6linklocalmask[IPaddrlen] = { + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +int v6llpreflen = 8; /* link-local prefix length in bytes */ + +uchar v6multicast[IPaddrlen] = { + 0xff, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +uchar v6multicastmask[IPaddrlen] = { + 0xff, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +int v6mcpreflen = 1; /* multicast prefix length */ + +uchar v6allnodesN[IPaddrlen] = { + 0xff, 0x01, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x01 +}; +uchar v6allroutersN[IPaddrlen] = { + 0xff, 0x01, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x02 +}; +uchar v6allnodesNmask[IPaddrlen] = { + 0xff, 0xff, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +int v6aNpreflen = 2; /* all nodes (N) prefix */ + +uchar v6allnodesL[IPaddrlen] = { + 0xff, 0x02, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x01 +}; +uchar v6allroutersL[IPaddrlen] = { + 0xff, 0x02, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x02 +}; +uchar v6allnodesLmask[IPaddrlen] = { + 0xff, 0xff, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +int v6aLpreflen = 2; /* all nodes (L) prefix */ + +uchar v6solicitednode[IPaddrlen] = { + 0xff, 0x02, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x01, + 0xff, 0, 0, 0 +}; +uchar v6solicitednodemask[IPaddrlen] = { + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0x0, 0x0, 0x0 +}; +int v6snpreflen = 13; + +ushort +ptclcsum(Block *bp, int offset, int len) +{ + uchar *addr; + ulong losum, hisum; + ushort csum; + int odd, blocklen, x; + + /* Correct to front of data area */ + while(bp != nil && offset && offset >= BLEN(bp)) { + offset -= BLEN(bp); + bp = bp->next; + } + if(bp == nil) + return 0; + + addr = bp->rp + offset; + blocklen = BLEN(bp) - offset; + + if(bp->next == nil) { + if(blocklen < len) + len = blocklen; + return ~ptclbsum(addr, len) & 0xffff; + } + + losum = 0; + hisum = 0; + + odd = 0; + while(len) { + x = blocklen; + if(len < x) + x = len; + + csum = ptclbsum(addr, x); + if(odd) + hisum += csum; + else + losum += csum; + odd = (odd+x) & 1; + len -= x; + + bp = bp->next; + if(bp == nil) + break; + blocklen = BLEN(bp); + addr = bp->rp; + } + + losum += hisum>>8; + losum += (hisum&0xff)<<8; + while((csum = losum>>16) != 0) + losum = csum + (losum & 0xffff); + + return ~losum & 0xffff; +} + +enum +{ + Isprefix= 16, +}; + +#define CLASS(p) ((*(uchar*)(p))>>6) + +void +ipv62smcast(uchar *smcast, uchar *a) +{ + assert(IPaddrlen == 16); + memmove(smcast, v6solicitednode, IPaddrlen); + smcast[13] = a[13]; + smcast[14] = a[14]; + smcast[15] = a[15]; +} + + +/* + * parse a hex mac address + */ +int +parsemac(uchar *to, char *from, int len) +{ + char nip[4]; + char *p; + int i; + + p = from; + memset(to, 0, len); + for(i = 0; i < len; i++){ + if(p[0] == '\0' || p[1] == '\0') + break; + + nip[0] = p[0]; + nip[1] = p[1]; + nip[2] = '\0'; + p += 2; + + to[i] = strtoul(nip, 0, 16); + if(*p == ':') + p++; + } + return i; +} + +/* + * hashing tcp, udp, ... connections + */ +ulong +iphash(uchar *sa, ushort sp, uchar *da, ushort dp) +{ + return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nhash; +} + +void +iphtadd(Ipht *ht, Conv *c) +{ + ulong hv; + Iphash *h; + + hv = iphash(c->raddr, c->rport, c->laddr, c->lport); + h = smalloc(sizeof(*h)); + if(ipcmp(c->raddr, IPnoaddr) != 0) + h->match = IPmatchexact; + else { + if(ipcmp(c->laddr, IPnoaddr) != 0){ + if(c->lport == 0) + h->match = IPmatchaddr; + else + h->match = IPmatchpa; + } else { + if(c->lport == 0) + h->match = IPmatchany; + else + h->match = IPmatchport; + } + } + h->c = c; + + lock(ht); + h->next = ht->tab[hv]; + ht->tab[hv] = h; + unlock(ht); +} + +void +iphtrem(Ipht *ht, Conv *c) +{ + ulong hv; + Iphash **l, *h; + + hv = iphash(c->raddr, c->rport, c->laddr, c->lport); + lock(ht); + for(l = &ht->tab[hv]; (*l) != nil; l = &(*l)->next) + if((*l)->c == c){ + h = *l; + (*l) = h->next; + free(h); + break; + } + unlock(ht); +} + +/* look for a matching conversation with the following precedence + * connected && raddr,rport,laddr,lport + * announced && laddr,lport + * announced && *,lport + * announced && laddr,* + * announced && *,* + */ +Conv* +iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp) +{ + ulong hv; + Iphash *h; + Conv *c; + + /* exact 4 pair match (connection) */ + hv = iphash(sa, sp, da, dp); + lock(ht); + for(h = ht->tab[hv]; h != nil; h = h->next){ + if(h->match != IPmatchexact) + continue; + c = h->c; + if(sp == c->rport && dp == c->lport + && ipcmp(sa, c->raddr) == 0 && ipcmp(da, c->laddr) == 0){ + unlock(ht); + return c; + } + } + + /* match local address and port */ + hv = iphash(IPnoaddr, 0, da, dp); + for(h = ht->tab[hv]; h != nil; h = h->next){ + if(h->match != IPmatchpa) + continue; + c = h->c; + if(dp == c->lport && ipcmp(da, c->laddr) == 0){ + unlock(ht); + return c; + } + } + + /* match just port */ + hv = iphash(IPnoaddr, 0, IPnoaddr, dp); + for(h = ht->tab[hv]; h != nil; h = h->next){ + if(h->match != IPmatchport) + continue; + c = h->c; + if(dp == c->lport){ + unlock(ht); + return c; + } + } + + /* match local address */ + hv = iphash(IPnoaddr, 0, da, 0); + for(h = ht->tab[hv]; h != nil; h = h->next){ + if(h->match != IPmatchaddr) + continue; + c = h->c; + if(ipcmp(da, c->laddr) == 0){ + unlock(ht); + return c; + } + } + + /* look for something that matches anything */ + hv = iphash(IPnoaddr, 0, IPnoaddr, 0); + for(h = ht->tab[hv]; h != nil; h = h->next){ + if(h->match != IPmatchany) + continue; + c = h->c; + unlock(ht); + return c; + } + unlock(ht); + return nil; +} diff -Nru /sys/src/9k/ip/ipifc.c /sys/src/9k/ip/ipifc.c --- /sys/src/9k/ip/ipifc.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/ipifc.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1664 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" +#include "ipv6.h" + +#define DPRINT if(0)print + +enum { + Maxmedia = 32, + Nself = Maxmedia*5, + NHASH = 1<<6, + NCACHE = 256, + QMAX = 192*1024-1, +}; + +Medium *media[Maxmedia] = { 0 }; + +/* + * cache of local addresses (addresses we answer to) + */ +struct Ipself +{ + uchar a[IPaddrlen]; + Ipself *hnext; /* next address in the hash table */ + Iplink *link; /* binding twixt Ipself and Ipifc */ + ulong expire; + uchar type; /* type of address */ + int ref; + Ipself *next; /* free list */ +}; + +struct Ipselftab +{ + QLock; + int inited; + int acceptall; /* true if an interface has the null address */ + Ipself *hash[NHASH]; /* hash chains */ +}; + +/* + * Multicast addresses are chained onto a Chan so that + * we can remove them when the Chan is closed. + */ +typedef struct Ipmcast Ipmcast; +struct Ipmcast +{ + Ipmcast *next; + uchar ma[IPaddrlen]; /* multicast address */ + uchar ia[IPaddrlen]; /* interface address */ +}; + +/* quick hash for ip addresses */ +#define hashipa(a) ( ( ((a)[IPaddrlen-2]<<8) | (a)[IPaddrlen-1] )%NHASH ) + +static char tifc[] = "ifc "; + +static void addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type); +static void remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a); +static char* ipifcjoinmulti(Ipifc *ifc, char **argv, int argc); +static char* ipifcleavemulti(Ipifc *ifc, char **argv, int argc); +static void ipifcregisterproxy(Fs*, Ipifc*, uchar*); +static char* ipifcremlifc(Ipifc*, Iplifc*); + +/* + * link in a new medium + */ +void +addipmedium(Medium *med) +{ + int i; + + for(i = 0; i < nelem(media)-1; i++) + if(media[i] == nil){ + media[i] = med; + break; + } +} + +/* + * find the medium with this name + */ +Medium* +ipfindmedium(char *name) +{ + Medium **mp; + + for(mp = media; *mp != nil; mp++) + if(strcmp((*mp)->name, name) == 0) + break; + return *mp; +} + +/* + * attach a device (or pkt driver) to the interface. + * called with c locked + */ +static char* +ipifcbind(Conv *c, char **argv, int argc) +{ + Ipifc *ifc; + Medium *medium; + + if(argc < 2) + return Ebadarg; + + ifc = (Ipifc*)c->ptcl; + + /* bind the device to the interface */ + medium = ipfindmedium(argv[1]); + if(medium == nil) + return "unknown interface type"; + + wlock(ifc); + if(ifc->medium != nil){ + wunlock(ifc); + return "interface already bound"; + } + if(waserror()){ + wunlock(ifc); + nexterror(); + } + + /* do medium specific binding */ + (*medium->bind)(ifc, argc, argv); + + /* set the bound device name */ + if(argc > 2) + strncpy(ifc->dev, argv[2], sizeof(ifc->dev)); + else + snprint(ifc->dev, sizeof ifc->dev, "%s%d", medium->name, c->x); + ifc->dev[sizeof(ifc->dev)-1] = 0; + + /* set up parameters */ + ifc->medium = medium; + ifc->mintu = ifc->medium->mintu; + ifc->maxtu = ifc->medium->maxtu; + if(ifc->medium->unbindonclose == 0) + ifc->conv->inuse++; + ifc->rp.mflag = 0; /* default not managed */ + ifc->rp.oflag = 0; + ifc->rp.maxraint = 600000; /* millisecs */ + ifc->rp.minraint = 200000; + ifc->rp.linkmtu = 0; /* no mtu sent */ + ifc->rp.reachtime = 0; + ifc->rp.rxmitra = 0; + ifc->rp.ttl = MAXTTL; + ifc->rp.routerlt = 3 * ifc->rp.maxraint; + + /* any ancillary structures (like routes) no longer pertain */ + ifc->ifcid++; + + /* reopen all the queues closed by a previous unbind */ + qreopen(c->rq); + qreopen(c->eq); + qreopen(c->sq); + + wunlock(ifc); + poperror(); + + return nil; +} + +/* + * detach a device from an interface, close the interface + * called with ifc->conv closed + */ +static char* +ipifcunbind(Ipifc *ifc) +{ + char *err; + + if(waserror()){ + wunlock(ifc); + nexterror(); + } + wlock(ifc); + + /* dissociate routes */ + if(ifc->medium != nil && ifc->medium->unbindonclose == 0) + ifc->conv->inuse--; + ifc->ifcid++; + + /* disassociate logical interfaces (before zeroing ifc->arg) */ + while(ifc->lifc){ + err = ipifcremlifc(ifc, ifc->lifc); + /* + * note: err non-zero means lifc not found, + * which can't happen in this case. + */ + if(err) + error(err); + } + + /* disassociate device */ + if(ifc->medium && ifc->medium->unbind) + (*ifc->medium->unbind)(ifc); + memset(ifc->dev, 0, sizeof(ifc->dev)); + ifc->arg = nil; + ifc->reassemble = 0; + + /* close queues to stop queuing of packets */ + qclose(ifc->conv->rq); + qclose(ifc->conv->wq); + qclose(ifc->conv->sq); + + ifc->medium = nil; + wunlock(ifc); + poperror(); + return nil; +} + +char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag" +" %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt" +" %d pktin %lud pktout %lud errin %lud errout %lud\n"; + +char slineformat[] = " %-40I %-10M %-40I %-12lud %-12lud\n"; + +static int +ipifcstate(Conv *c, char *state, int n) +{ + Ipifc *ifc; + Iplifc *lifc; + char *e, *s; + + ifc = (Ipifc*)c->ptcl; + s = state; + e = s+n; + s = seprint(s, e, sfixedformat, + ifc->dev, ifc->maxtu, ifc->sendra6, ifc->recvra6, + ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint, + ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime, + ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt, + ifc->in, ifc->out, ifc->inerr, ifc->outerr); + + rlock(ifc); + for(lifc = ifc->lifc; lifc && s < e; lifc = lifc->next) + s = seprint(s, e, slineformat, lifc->local, + lifc->mask, lifc->remote, lifc->validlt, lifc->preflt); + if(ifc->lifc == nil) + s = seprint(s, e, "\n"); + runlock(ifc); + return s - state; +} + +static int +ipifclocal(Conv *c, char *state, int n) +{ + Ipifc *ifc; + Iplifc *lifc; + Iplink *link; + char *e, *s; + + ifc = (Ipifc*)c->ptcl; + s = state; + e = s+n; + + rlock(ifc); + for(lifc = ifc->lifc; lifc && s < e; lifc = lifc->next){ + s = seprint(s, e, "%-40.40I ->", lifc->local); + for(link = lifc->link; link; link = link->lifclink) + s = seprint(s, e, " %-40.40I", link->self->a); + s = seprint(s, e, "\n"); + } + runlock(ifc); + return s - state; +} + +static int +ipifcinuse(Conv *c) +{ + Ipifc *ifc; + + ifc = (Ipifc*)c->ptcl; + return ifc->medium != nil; +} + +/* + * called when a process writes to an interface's 'data' + */ +static void +ipifckick(void *x) +{ + Conv *c = x; + Block *bp; + Ipifc *ifc; + + bp = qget(c->wq); + if(bp == nil) + return; + + ifc = (Ipifc*)c->ptcl; + if(!canrlock(ifc)){ + freeb(bp); + return; + } + if(waserror()){ + runlock(ifc); + nexterror(); + } + if(ifc->medium == nil || ifc->medium->pktin == nil) + freeb(bp); + else + (*ifc->medium->pktin)(c->p->f, ifc, bp); + runlock(ifc); + poperror(); +} + +/* + * called when a new ipifc structure is created + */ +static void +ipifccreate(Conv *c) +{ + Ipifc *ifc; + + c->rq = qopen(QMAX, 0, 0, 0); + c->sq = qopen(QMAX, 0, 0, 0); + c->wq = qopen(QMAX, Qkick, ipifckick, c); + ifc = (Ipifc*)c->ptcl; + ifc->conv = c; + ifc->unbinding = 0; + ifc->medium = nil; + ifc->reassemble = 0; +} + +/* + * called after last close of ipifc data or ctl + * called with c locked, we must unlock + */ +static void +ipifcclose(Conv *c) +{ + Ipifc *ifc; + Medium *medium; + + ifc = (Ipifc*)c->ptcl; + medium = ifc->medium; + if(medium != nil && medium->unbindonclose) + ipifcunbind(ifc); +} + +/* + * change an interface's mtu + */ +char* +ipifcsetmtu(Ipifc *ifc, char **argv, int argc) +{ + int mtu; + + if(argc < 2 || ifc->medium == nil) + return Ebadarg; + mtu = strtoul(argv[1], 0, 0); + if(mtu < ifc->medium->mintu || mtu > ifc->medium->maxtu) + return Ebadarg; + ifc->maxtu = mtu; + return nil; +} + +/* + * add an address to an interface. + */ +char* +ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp) +{ + int i, type, mtu, sendnbrdisc = 0; + uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen]; + uchar bcast[IPaddrlen], net[IPaddrlen]; + Iplifc *lifc, **l; + Fs *f; + + if(ifc->medium == nil) + return "ipifc not yet bound to device"; + + f = ifc->conv->p->f; + + type = Rifc; + memset(ip, 0, IPaddrlen); + memset(mask, 0, IPaddrlen); + memset(rem, 0, IPaddrlen); + switch(argc){ + case 6: + if(strcmp(argv[5], "proxy") == 0) + type |= Rproxy; + /* fall through */ + case 5: + mtu = strtoul(argv[4], 0, 0); + if(mtu >= ifc->medium->mintu && mtu <= ifc->medium->maxtu) + ifc->maxtu = mtu; + /* fall through */ + case 4: + if (parseip(ip, argv[1]) == -1 || parseip(rem, argv[3]) == -1) + return Ebadip; + parseipmask(mask, argv[2]); + maskip(rem, mask, net); + break; + case 3: + if (parseip(ip, argv[1]) == -1) + return Ebadip; + parseipmask(mask, argv[2]); + maskip(ip, mask, rem); + maskip(rem, mask, net); + break; + case 2: + if (parseip(ip, argv[1]) == -1) + return Ebadip; + memmove(mask, defmask(ip), IPaddrlen); + maskip(ip, mask, rem); + maskip(rem, mask, net); + break; + default: + return Ebadarg; + } + if(isv4(ip)) + tentative = 0; + wlock(ifc); + + /* ignore if this is already a local address for this ifc */ + for(lifc = ifc->lifc; lifc; lifc = lifc->next) { + if(ipcmp(lifc->local, ip) == 0) { + if(lifc->tentative != tentative) + lifc->tentative = tentative; + if(lifcp) { + lifc->onlink = lifcp->onlink; + lifc->autoflag = lifcp->autoflag; + lifc->validlt = lifcp->validlt; + lifc->preflt = lifcp->preflt; + lifc->origint = lifcp->origint; + } + goto out; + } + } + + /* add the address to the list of logical ifc's for this ifc */ + lifc = smalloc(sizeof(Iplifc)); + ipmove(lifc->local, ip); + ipmove(lifc->mask, mask); + ipmove(lifc->remote, rem); + ipmove(lifc->net, net); + lifc->tentative = tentative; + if(lifcp) { + lifc->onlink = lifcp->onlink; + lifc->autoflag = lifcp->autoflag; + lifc->validlt = lifcp->validlt; + lifc->preflt = lifcp->preflt; + lifc->origint = lifcp->origint; + } else { /* default values */ + lifc->onlink = lifc->autoflag = 1; + lifc->validlt = lifc->preflt = ~0L; + lifc->origint = NOW / 1000; + } + lifc->next = nil; + + for(l = &ifc->lifc; *l; l = &(*l)->next) + ; + *l = lifc; + + /* check for point-to-point interface */ + if(ipcmp(ip, v6loopback)) /* skip v6 loopback, it's a special address */ + if(ipcmp(mask, IPallbits) == 0) + type |= Rptpt; + + /* add local routes */ + if(isv4(ip)) + v4addroute(f, tifc, rem+IPv4off, mask+IPv4off, rem+IPv4off, type); + else + v6addroute(f, tifc, rem, mask, rem, type); + + addselfcache(f, ifc, lifc, ip, Runi); + + if((type & (Rproxy|Rptpt)) == (Rproxy|Rptpt)){ + ipifcregisterproxy(f, ifc, rem); + goto out; + } + + if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0) { + /* add subnet directed broadcast address to the self cache */ + for(i = 0; i < IPaddrlen; i++) + bcast[i] = (ip[i] & mask[i]) | ~mask[i]; + addselfcache(f, ifc, lifc, bcast, Rbcast); + + /* add subnet directed network address to the self cache */ + for(i = 0; i < IPaddrlen; i++) + bcast[i] = (ip[i] & mask[i]) & mask[i]; + addselfcache(f, ifc, lifc, bcast, Rbcast); + + /* add network directed broadcast address to the self cache */ + memmove(mask, defmask(ip), IPaddrlen); + for(i = 0; i < IPaddrlen; i++) + bcast[i] = (ip[i] & mask[i]) | ~mask[i]; + addselfcache(f, ifc, lifc, bcast, Rbcast); + + /* add network directed network address to the self cache */ + memmove(mask, defmask(ip), IPaddrlen); + for(i = 0; i < IPaddrlen; i++) + bcast[i] = (ip[i] & mask[i]) & mask[i]; + addselfcache(f, ifc, lifc, bcast, Rbcast); + + addselfcache(f, ifc, lifc, IPv4bcast, Rbcast); + } + else { + if(ipcmp(ip, v6loopback) == 0) { + /* add node-local mcast address */ + addselfcache(f, ifc, lifc, v6allnodesN, Rmulti); + + /* add route for all node multicast */ + v6addroute(f, tifc, v6allnodesN, v6allnodesNmask, + v6allnodesN, Rmulti); + } + + /* add all nodes multicast address */ + addselfcache(f, ifc, lifc, v6allnodesL, Rmulti); + + /* add route for all nodes multicast */ + v6addroute(f, tifc, v6allnodesL, v6allnodesLmask, v6allnodesL, + Rmulti); + + /* add solicited-node multicast address */ + ipv62smcast(bcast, ip); + addselfcache(f, ifc, lifc, bcast, Rmulti); + + sendnbrdisc = 1; + } + + /* register the address on this network for address resolution */ + if(isv4(ip) && ifc->medium->areg != nil) + (*ifc->medium->areg)(ifc, ip); + +out: + wunlock(ifc); + if(tentative && sendnbrdisc) + icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac); + return nil; +} + +/* + * remove a logical interface from an ifc + * always called with ifc wlock'd + */ +static char* +ipifcremlifc(Ipifc *ifc, Iplifc *lifc) +{ + Iplifc **l; + Fs *f; + + f = ifc->conv->p->f; + + /* + * find address on this interface and remove from chain. + * for pt to pt we actually specify the remote address as the + * addresss to remove. + */ + for(l = &ifc->lifc; *l != nil && *l != lifc; l = &(*l)->next) + ; + if(*l == nil) + return "address not on this interface"; + *l = lifc->next; + + /* disassociate any addresses */ + while(lifc->link) + remselfcache(f, ifc, lifc, lifc->link->self->a); + + /* remove the route for this logical interface */ + if(isv4(lifc->local)) + v4delroute(f, lifc->remote+IPv4off, lifc->mask+IPv4off, 1); + else { + v6delroute(f, lifc->remote, lifc->mask, 1); + if(ipcmp(lifc->local, v6loopback) == 0) + /* remove route for all node multicast */ + v6delroute(f, v6allnodesN, v6allnodesNmask, 1); + else if(memcmp(lifc->local, v6linklocal, v6llpreflen) == 0) + /* remove route for all link multicast */ + v6delroute(f, v6allnodesL, v6allnodesLmask, 1); + } + + free(lifc); + return nil; +} + +/* + * remove an address from an interface. + * called with c->car locked + */ +char* +ipifcrem(Ipifc *ifc, char **argv, int argc) +{ + char *rv; + uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen]; + Iplifc *lifc; + + if(argc < 3) + return Ebadarg; + + if (parseip(ip, argv[1]) == -1) + return Ebadip; + parseipmask(mask, argv[2]); + if(argc < 4) + maskip(ip, mask, rem); + else + if (parseip(rem, argv[3]) == -1) + return Ebadip; + + wlock(ifc); + + /* + * find address on this interface and remove from chain. + * for pt to pt we actually specify the remote address as the + * addresss to remove. + */ + for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next) { + if (memcmp(ip, lifc->local, IPaddrlen) == 0 + && memcmp(mask, lifc->mask, IPaddrlen) == 0 + && memcmp(rem, lifc->remote, IPaddrlen) == 0) + break; + } + + rv = ipifcremlifc(ifc, lifc); + wunlock(ifc); + return rv; +} + +/* + * distribute routes to active interfaces like the + * TRIP linecards + */ +void +ipifcaddroute(Fs *f, int vers, uchar *addr, uchar *mask, uchar *gate, int type) +{ + Medium *medium; + Conv **cp, **e; + Ipifc *ifc; + + e = &f->ipifc->conv[f->ipifc->nc]; + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp != nil) { + ifc = (Ipifc*)(*cp)->ptcl; + medium = ifc->medium; + if(medium != nil && medium->addroute != nil) + medium->addroute(ifc, vers, addr, mask, gate, type); + } + } +} + +void +ipifcremroute(Fs *f, int vers, uchar *addr, uchar *mask) +{ + Medium *medium; + Conv **cp, **e; + Ipifc *ifc; + + e = &f->ipifc->conv[f->ipifc->nc]; + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp != nil) { + ifc = (Ipifc*)(*cp)->ptcl; + medium = ifc->medium; + if(medium != nil && medium->remroute != nil) + medium->remroute(ifc, vers, addr, mask); + } + } +} + +/* + * associate an address with the interface. This wipes out any previous + * addresses. This is a macro that means, remove all the old interfaces + * and add a new one. + */ +static char* +ipifcconnect(Conv* c, char **argv, int argc) +{ + char *err; + Ipifc *ifc; + + ifc = (Ipifc*)c->ptcl; + + if(ifc->medium == nil) + return "ipifc not yet bound to device"; + + if(waserror()){ + wunlock(ifc); + nexterror(); + } + wlock(ifc); + while(ifc->lifc){ + err = ipifcremlifc(ifc, ifc->lifc); + if(err) + error(err); + } + wunlock(ifc); + poperror(); + + err = ipifcadd(ifc, argv, argc, 0, nil); + if(err) + return err; + + Fsconnected(c, nil); + return nil; +} + +char* +ipifcra6(Ipifc *ifc, char **argv, int argc) +{ + int i, argsleft, vmax = ifc->rp.maxraint, vmin = ifc->rp.minraint; + + argsleft = argc - 1; + i = 1; + + if(argsleft % 2 != 0) + return Ebadarg; + + while (argsleft > 1) { + if(strcmp(argv[i], "recvra") == 0) + ifc->recvra6 = (atoi(argv[i+1]) != 0); + else if(strcmp(argv[i], "sendra") == 0) + ifc->sendra6 = (atoi(argv[i+1]) != 0); + else if(strcmp(argv[i], "mflag") == 0) + ifc->rp.mflag = (atoi(argv[i+1]) != 0); + else if(strcmp(argv[i], "oflag") == 0) + ifc->rp.oflag = (atoi(argv[i+1]) != 0); + else if(strcmp(argv[i], "maxraint") == 0) + ifc->rp.maxraint = atoi(argv[i+1]); + else if(strcmp(argv[i], "minraint") == 0) + ifc->rp.minraint = atoi(argv[i+1]); + else if(strcmp(argv[i], "linkmtu") == 0) + ifc->rp.linkmtu = atoi(argv[i+1]); + else if(strcmp(argv[i], "reachtime") == 0) + ifc->rp.reachtime = atoi(argv[i+1]); + else if(strcmp(argv[i], "rxmitra") == 0) + ifc->rp.rxmitra = atoi(argv[i+1]); + else if(strcmp(argv[i], "ttl") == 0) + ifc->rp.ttl = atoi(argv[i+1]); + else if(strcmp(argv[i], "routerlt") == 0) + ifc->rp.routerlt = atoi(argv[i+1]); + else + return Ebadarg; + + argsleft -= 2; + i += 2; + } + + /* consistency check */ + if(ifc->rp.maxraint < ifc->rp.minraint) { + ifc->rp.maxraint = vmax; + ifc->rp.minraint = vmin; + return Ebadarg; + } + return nil; +} + +/* + * non-standard control messages. + * called with c->car locked. + */ +static char* +ipifcctl(Conv* c, char**argv, int argc) +{ + Ipifc *ifc; + int i; + + ifc = (Ipifc*)c->ptcl; + if(strcmp(argv[0], "add") == 0) + return ipifcadd(ifc, argv, argc, 0, nil); + else if(strcmp(argv[0], "try") == 0) + return ipifcadd(ifc, argv, argc, 1, nil); + else if(strcmp(argv[0], "remove") == 0) + return ipifcrem(ifc, argv, argc); + else if(strcmp(argv[0], "unbind") == 0) + return ipifcunbind(ifc); + else if(strcmp(argv[0], "joinmulti") == 0) + return ipifcjoinmulti(ifc, argv, argc); + else if(strcmp(argv[0], "leavemulti") == 0) + return ipifcleavemulti(ifc, argv, argc); + else if(strcmp(argv[0], "mtu") == 0) + return ipifcsetmtu(ifc, argv, argc); + else if(strcmp(argv[0], "reassemble") == 0){ + ifc->reassemble = 1; + return nil; + } + else if(strcmp(argv[0], "iprouting") == 0){ + i = 1; + if(argc > 1) + i = atoi(argv[1]); + iprouting(c->p->f, i); + return nil; + } + else if(strcmp(argv[0], "add6") == 0) + return ipifcadd6(ifc, argv, argc); + else if(strcmp(argv[0], "ra6") == 0) + return ipifcra6(ifc, argv, argc); + return "unsupported ctl"; +} + +int +ipifcstats(Proto *ipifc, char *buf, int len) +{ + return ipstats(ipifc->f, buf, len); +} + +void +ipifcinit(Fs *f) +{ + Proto *ipifc; + + ipifc = smalloc(sizeof(Proto)); + ipifc->name = "ipifc"; + ipifc->connect = ipifcconnect; + ipifc->announce = nil; + ipifc->bind = ipifcbind; + ipifc->state = ipifcstate; + ipifc->create = ipifccreate; + ipifc->close = ipifcclose; + ipifc->rcv = nil; + ipifc->ctl = ipifcctl; + ipifc->advise = nil; + ipifc->stats = ipifcstats; + ipifc->inuse = ipifcinuse; + ipifc->local = ipifclocal; + ipifc->ipproto = -1; + ipifc->nc = Maxmedia; + ipifc->ptclsize = sizeof(Ipifc); + + f->ipifc = ipifc; /* hack for ipifcremroute, findipifc, ... */ + f->self = smalloc(sizeof(Ipselftab)); /* hack for ipforme */ + + Fsproto(f, ipifc); +} + +/* + * add to self routing cache + * called with c->car locked + */ +static void +addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type) +{ + Ipself *p; + Iplink *lp; + int h; + + qlock(f->self); + + /* see if the address already exists */ + h = hashipa(a); + for(p = f->self->hash[h]; p; p = p->next) + if(memcmp(a, p->a, IPaddrlen) == 0) + break; + + /* allocate a local address and add to hash chain */ + if(p == nil){ + p = smalloc(sizeof(*p)); + ipmove(p->a, a); + p->type = type; + p->next = f->self->hash[h]; + f->self->hash[h] = p; + + /* if the null address, accept all packets */ + if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0) + f->self->acceptall = 1; + } + + /* look for a link for this lifc */ + for(lp = p->link; lp; lp = lp->selflink) + if(lp->lifc == lifc) + break; + + /* allocate a lifc-to-local link and link to both */ + if(lp == nil){ + lp = smalloc(sizeof(*lp)); + lp->ref = 1; + lp->lifc = lifc; + lp->self = p; + lp->selflink = p->link; + p->link = lp; + lp->lifclink = lifc->link; + lifc->link = lp; + + /* add to routing table */ + if(isv4(a)) + v4addroute(f, tifc, a+IPv4off, IPallbits+IPv4off, + a+IPv4off, type); + else + v6addroute(f, tifc, a, IPallbits, a, type); + + if((type & Rmulti) && ifc->medium->addmulti != nil) + (*ifc->medium->addmulti)(ifc, a, lifc->local); + } else + lp->ref++; + + qunlock(f->self); +} + +/* + * These structures are unlinked from their chains while + * other threads may be using them. To avoid excessive locking, + * just put them aside for a while before freeing them. + * called with f->self locked + */ +static Iplink *freeiplink; +static Ipself *freeipself; + +static void +iplinkfree(Iplink *p) +{ + Iplink **l, *np; + ulong now = NOW; + + l = &freeiplink; + for(np = *l; np; np = *l){ + if(np->expire > now){ + *l = np->next; + free(np); + continue; + } + l = &np->next; + } + p->expire = now + 5000; /* give other threads 5 secs to get out */ + p->next = nil; + *l = p; +} + +static void +ipselffree(Ipself *p) +{ + Ipself **l, *np; + ulong now = NOW; + + l = &freeipself; + for(np = *l; np; np = *l){ + if(np->expire > now){ + *l = np->next; + free(np); + continue; + } + l = &np->next; + } + p->expire = now + 5000; /* give other threads 5 secs to get out */ + p->next = nil; + *l = p; +} + +/* + * Decrement reference for this address on this link. + * Unlink from selftab if this is the last ref. + * called with c->car locked + */ +static void +remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a) +{ + Ipself *p, **l; + Iplink *link, **l_self, **l_lifc; + + qlock(f->self); + + /* find the unique selftab entry */ + l = &f->self->hash[hashipa(a)]; + for(p = *l; p; p = *l){ + if(ipcmp(p->a, a) == 0) + break; + l = &p->next; + } + + if(p == nil) + goto out; + + /* + * walk down links from an ifc looking for one + * that matches the selftab entry + */ + l_lifc = &lifc->link; + for(link = *l_lifc; link; link = *l_lifc){ + if(link->self == p) + break; + l_lifc = &link->lifclink; + } + + if(link == nil) + goto out; + + /* + * walk down the links from the selftab looking for + * the one we just found + */ + l_self = &p->link; + for(link = *l_self; link; link = *l_self){ + if(link == *l_lifc) + break; + l_self = &link->selflink; + } + + if(link == nil) + panic("remselfcache"); + + if(--(link->ref) != 0) + goto out; + + if((p->type & Rmulti) && ifc->medium->remmulti != nil) + (*ifc->medium->remmulti)(ifc, a, lifc->local); + + /* ref == 0, remove from both chains and free the link */ + *l_lifc = link->lifclink; + *l_self = link->selflink; + iplinkfree(link); + + if(p->link != nil) + goto out; + + /* remove from routing table */ + if(isv4(a)) + v4delroute(f, a+IPv4off, IPallbits+IPv4off, 1); + else + v6delroute(f, a, IPallbits, 1); + + /* no more links, remove from hash and free */ + *l = p->next; + ipselffree(p); + + /* if IPnoaddr, forget */ + if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0) + f->self->acceptall = 0; + +out: + qunlock(f->self); +} + +static char *stformat = "%-44.44I %2.2d %4.4s\n"; +enum +{ + Nstformat= 41, +}; + +long +ipselftabread(Fs *f, char *cp, ulong offset, int n) +{ + int i, nifc, off; + Ipself *p; + Iplink *link; + char *e, *s, state[8]; + + s = cp; + e = s+n; + off = offset; + qlock(f->self); + for(i = 0; i < NHASH && s < e; i++){ + for(p = f->self->hash[i]; p != nil && s < e; p = p->next){ + nifc = 0; + for(link = p->link; link; link = link->selflink) + nifc++; + routetype(p->type, state); + s = seprint(s, e, stformat, p->a, nifc, state); + if(off > 0){ + off -= s - cp; + s = cp; + } + } + } + qunlock(f->self); + return s - cp; +} + +int +iptentative(Fs *f, uchar *addr) +{ + Ipself *p; + + p = f->self->hash[hashipa(addr)]; + for(; p; p = p->next){ + if(ipcmp(addr, p->a) == 0) + return p->link->lifc->tentative; + } + return 0; +} + +/* + * returns + * 0 - no match + * Runi + * Rbcast + * Rmcast + */ +int +ipforme(Fs *f, uchar *addr) +{ + Ipself *p; + + p = f->self->hash[hashipa(addr)]; + for(; p; p = p->next){ + if(ipcmp(addr, p->a) == 0) + return p->type; + } + + /* hack to say accept anything */ + if(f->self->acceptall) + return Runi; + return 0; +} + +/* + * find the ifc on same net as the remote system. If none, + * return nil. + */ +Ipifc* +findipifc(Fs *f, uchar *remote, int type) +{ + Ipifc *ifc, *x; + Iplifc *lifc; + Conv **cp, **e; + uchar gnet[IPaddrlen], xmask[IPaddrlen]; + + x = nil; + memset(xmask, 0, IPaddrlen); + + /* find most specific match */ + e = &f->ipifc->conv[f->ipifc->nc]; + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp == 0) + continue; + ifc = (Ipifc*)(*cp)->ptcl; + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + maskip(remote, lifc->mask, gnet); + if(ipcmp(gnet, lifc->net) == 0){ + if(x == nil || ipcmp(lifc->mask, xmask) > 0){ + x = ifc; + ipmove(xmask, lifc->mask); + } + } + } + } + if(x != nil) + return x; + + /* for now for broadcast and multicast, just use first interface */ + if(type & (Rbcast|Rmulti)){ + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp == 0) + continue; + ifc = (Ipifc*)(*cp)->ptcl; + if(ifc->lifc != nil) + return ifc; + } + } + return nil; +} + +enum { + unknownv6, /* UGH */ +// multicastv6, + unspecifiedv6, + linklocalv6, + globalv6, +}; + +int +v6addrtype(uchar *addr) +{ + if(isv4(addr) || ipcmp(addr, IPnoaddr) == 0) + return unknownv6; + else if(islinklocal(addr) || + isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop) + return linklocalv6; + else + return globalv6; +} + +#define v6addrcurr(lifc) ((lifc)->preflt == ~0L || \ + (lifc)->origint + (lifc)->preflt >= NOW/1000) + +static void +findprimaryipv6(Fs *f, uchar *local) +{ + int atype, atypel; + Conv **cp, **e; + Ipifc *ifc; + Iplifc *lifc; + + ipmove(local, v6Unspecified); + atype = unspecifiedv6; + + /* + * find "best" (global > link local > unspecified) + * local address; address must be current. + */ + e = &f->ipifc->conv[f->ipifc->nc]; + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp == 0) + continue; + ifc = (Ipifc*)(*cp)->ptcl; + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + atypel = v6addrtype(lifc->local); + if(atypel > atype && v6addrcurr(lifc)) { + ipmove(local, lifc->local); + atype = atypel; + if(atype == globalv6) + return; + } + } + } +} + +/* + * returns first ip address configured + */ +static void +findprimaryipv4(Fs *f, uchar *local) +{ + Conv **cp, **e; + Ipifc *ifc; + Iplifc *lifc; + + /* find first ifc local address */ + e = &f->ipifc->conv[f->ipifc->nc]; + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp == 0) + continue; + ifc = (Ipifc*)(*cp)->ptcl; + if((lifc = ifc->lifc) != nil){ + ipmove(local, lifc->local); + return; + } + } +} + +/* + * find the local address 'closest' to the remote system, copy it to + * local and return the ifc for that address + */ +void +findlocalip(Fs *f, uchar *local, uchar *remote) +{ + int version, atype = unspecifiedv6, atypel = unknownv6; + int atyper, deprecated; + uchar gate[IPaddrlen], gnet[IPaddrlen]; + Ipifc *ifc; + Iplifc *lifc; + Route *r; + + USED(atype); + USED(atypel); + qlock(f->ipifc); + r = v6lookup(f, remote, nil); + version = (memcmp(remote, v4prefix, IPv4off) == 0)? V4: V6; + + if(r != nil){ + ifc = r->ifc; + if(r->type & Rv4) + v4tov6(gate, r->v4.gate); + else { + ipmove(gate, r->v6.gate); + ipmove(local, v6Unspecified); + } + + switch(version) { + case V4: + /* find ifc address closest to the gateway to use */ + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + maskip(gate, lifc->mask, gnet); + if(ipcmp(gnet, lifc->net) == 0){ + ipmove(local, lifc->local); + goto out; + } + } + break; + case V6: + /* find ifc address with scope matching the destination */ + atyper = v6addrtype(remote); + deprecated = 0; + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + atypel = v6addrtype(lifc->local); + /* prefer appropriate scope */ + if(atypel > atype && atype < atyper || + atypel < atype && atype > atyper){ + ipmove(local, lifc->local); + deprecated = !v6addrcurr(lifc); + atype = atypel; + } else if(atypel == atype){ + /* avoid deprecated addresses */ + if(deprecated && v6addrcurr(lifc)){ + ipmove(local, lifc->local); + atype = atypel; + deprecated = 0; + } + } + if(atype == atyper && !deprecated) + goto out; + } + if(atype >= atyper) + goto out; + break; + default: + panic("findlocalip: version %d", version); + } + } + + switch(version){ + case V4: + findprimaryipv4(f, local); + break; + case V6: + findprimaryipv6(f, local); + break; + default: + panic("findlocalip2: version %d", version); + } + +out: + qunlock(f->ipifc); +} + +/* + * return first v4 address associated with an interface + */ +int +ipv4local(Ipifc *ifc, uchar *addr) +{ + Iplifc *lifc; + + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + if(isv4(lifc->local)){ + memmove(addr, lifc->local+IPv4off, IPv4addrlen); + return 1; + } + } + return 0; +} + +/* + * return first v6 address associated with an interface + */ +int +ipv6local(Ipifc *ifc, uchar *addr) +{ + Iplifc *lifc; + + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + if(!isv4(lifc->local) && !(lifc->tentative)){ + ipmove(addr, lifc->local); + return 1; + } + } + return 0; +} + +int +ipv6anylocal(Ipifc *ifc, uchar *addr) +{ + Iplifc *lifc; + + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + if(!isv4(lifc->local)){ + ipmove(addr, lifc->local); + return SRC_UNI; + } + } + return SRC_UNSPEC; +} + +/* + * see if this address is bound to the interface + */ +Iplifc* +iplocalonifc(Ipifc *ifc, uchar *ip) +{ + Iplifc *lifc; + + for(lifc = ifc->lifc; lifc; lifc = lifc->next) + if(ipcmp(ip, lifc->local) == 0) + return lifc; + return nil; +} + + +/* + * See if we're proxying for this address on this interface + */ +int +ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip) +{ + Route *r; + uchar net[IPaddrlen]; + Iplifc *lifc; + + /* see if this is a direct connected pt to pt address */ + r = v6lookup(f, ip, nil); + if(r == nil || (r->type & (Rifc|Rproxy)) != (Rifc|Rproxy)) + return 0; + + /* see if this is on the right interface */ + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + maskip(ip, lifc->mask, net); + if(ipcmp(net, lifc->remote) == 0) + return 1; + } + return 0; +} + +/* + * return multicast version if any + */ +int +ipismulticast(uchar *ip) +{ + if(isv4(ip)){ + if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0) + return V4; + } + else if(ip[0] == 0xff) + return V6; + return 0; +} +int +ipisbm(uchar *ip) +{ + if(isv4(ip)){ + if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0) + return V4; + else if(ipcmp(ip, IPv4bcast) == 0) + return V4; + } + else if(ip[0] == 0xff) + return V6; + return 0; +} + + +/* + * add a multicast address to an interface, called with c->car locked + */ +void +ipifcaddmulti(Conv *c, uchar *ma, uchar *ia) +{ + Ipifc *ifc; + Iplifc *lifc; + Conv **p; + Ipmulti *multi, **l; + Fs *f; + + f = c->p->f; + + for(l = &c->multi; *l; l = &(*l)->next) + if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0) + return; /* it's already there */ + + multi = *l = smalloc(sizeof(*multi)); + ipmove(multi->ma, ma); + ipmove(multi->ia, ia); + multi->next = nil; + + for(p = f->ipifc->conv; *p; p++){ + if((*p)->inuse == 0) + continue; + ifc = (Ipifc*)(*p)->ptcl; + if(waserror()){ + wunlock(ifc); + nexterror(); + } + wlock(ifc); + for(lifc = ifc->lifc; lifc; lifc = lifc->next) + if(ipcmp(ia, lifc->local) == 0) + addselfcache(f, ifc, lifc, ma, Rmulti); + wunlock(ifc); + poperror(); + } +} + + +/* + * remove a multicast address from an interface, called with c->car locked + */ +void +ipifcremmulti(Conv *c, uchar *ma, uchar *ia) +{ + Ipmulti *multi, **l; + Iplifc *lifc; + Conv **p; + Ipifc *ifc; + Fs *f; + + f = c->p->f; + + for(l = &c->multi; *l; l = &(*l)->next) + if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0) + break; + + multi = *l; + if(multi == nil) + return; /* we don't have it open */ + + *l = multi->next; + + for(p = f->ipifc->conv; *p; p++){ + if((*p)->inuse == 0) + continue; + + ifc = (Ipifc*)(*p)->ptcl; + if(waserror()){ + wunlock(ifc); + nexterror(); + } + wlock(ifc); + for(lifc = ifc->lifc; lifc; lifc = lifc->next) + if(ipcmp(ia, lifc->local) == 0) + remselfcache(f, ifc, lifc, ma); + wunlock(ifc); + poperror(); + } + + free(multi); +} + +/* + * make lifc's join and leave multicast groups + */ +static char* +ipifcjoinmulti(Ipifc *ifc, char **argv, int argc) +{ + USED(ifc, argv, argc); + return nil; +} + +static char* +ipifcleavemulti(Ipifc *ifc, char **argv, int argc) +{ + USED(ifc, argv, argc); + return nil; +} + +static void +ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip) +{ + Conv **cp, **e; + Ipifc *nifc; + Iplifc *lifc; + Medium *medium; + uchar net[IPaddrlen]; + + /* register the address on any network that will proxy for us */ + e = &f->ipifc->conv[f->ipifc->nc]; + + if(!isv4(ip)) { /* V6 */ + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp == nil || (nifc = (Ipifc*)(*cp)->ptcl) == ifc) + continue; + rlock(nifc); + medium = nifc->medium; + if(medium == nil || medium->addmulti == nil) { + runlock(nifc); + continue; + } + for(lifc = nifc->lifc; lifc; lifc = lifc->next){ + maskip(ip, lifc->mask, net); + if(ipcmp(net, lifc->remote) == 0) { + /* add solicited-node multicast addr */ + ipv62smcast(net, ip); + addselfcache(f, nifc, lifc, net, Rmulti); + arpenter(f, V6, ip, nifc->mac, 6, 0); + // (*medium->addmulti)(nifc, net, ip); + break; + } + } + runlock(nifc); + } + } + else { /* V4 */ + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp == nil || (nifc = (Ipifc*)(*cp)->ptcl) == ifc) + continue; + rlock(nifc); + medium = nifc->medium; + if(medium == nil || medium->areg == nil){ + runlock(nifc); + continue; + } + for(lifc = nifc->lifc; lifc; lifc = lifc->next){ + maskip(ip, lifc->mask, net); + if(ipcmp(net, lifc->remote) == 0){ + (*medium->areg)(nifc, ip); + break; + } + } + runlock(nifc); + } + } +} + + +/* added for new v6 mesg types */ +static void +adddefroute6(Fs *f, uchar *gate, int force) +{ + Route *r; + + r = v6lookup(f, v6Unspecified, nil); + /* + * route entries generated by all other means take precedence + * over router announcements. + */ + if (r && !force && strcmp(r->tag, "ra") != 0) + return; + + v6delroute(f, v6Unspecified, v6Unspecified, 1); + v6addroute(f, "ra", v6Unspecified, v6Unspecified, gate, 0); +} + +enum { + Ngates = 3, +}; + +char* +ipifcadd6(Ipifc *ifc, char**argv, int argc) +{ + int plen = 64; + long origint = NOW / 1000, preflt = ~0L, validlt = ~0L; + char addr[40], preflen[6]; + char *params[3]; + uchar autoflag = 1, onlink = 1; + uchar prefix[IPaddrlen]; + Iplifc *lifc; + + switch(argc) { + case 7: + preflt = atoi(argv[6]); + /* fall through */ + case 6: + validlt = atoi(argv[5]); + /* fall through */ + case 5: + autoflag = atoi(argv[4]); + /* fall through */ + case 4: + onlink = atoi(argv[3]); + /* fall through */ + case 3: + plen = atoi(argv[2]); + /* fall through */ + case 2: + break; + default: + return Ebadarg; + } + + if (parseip(prefix, argv[1]) != 6 || validlt < preflt || plen < 0 || + plen > 64 || islinklocal(prefix)) + return Ebadarg; + + lifc = smalloc(sizeof(Iplifc)); + lifc->onlink = (onlink != 0); + lifc->autoflag = (autoflag != 0); + lifc->validlt = validlt; + lifc->preflt = preflt; + lifc->origint = origint; + + /* issue "add" ctl msg for v6 link-local addr and prefix len */ + if(!ifc->medium->pref2addr) + return Ebadarg; + ifc->medium->pref2addr(prefix, ifc->mac); /* mac → v6 link-local addr */ + sprint(addr, "%I", prefix); + sprint(preflen, "/%d", plen); + params[0] = "add"; + params[1] = addr; + params[2] = preflen; + + return ipifcadd(ifc, params, 3, 0, lifc); +} diff -Nru /sys/src/9k/ip/iproute.c /sys/src/9k/ip/iproute.c --- /sys/src/9k/ip/iproute.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/iproute.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,892 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +static void walkadd(Fs*, Route**, Route*); +static void addnode(Fs*, Route**, Route*); +static void calcd(Route*); + +/* these are used for all instances of IP */ +static Route* v4freelist; +static Route* v6freelist; +static RWlock routelock; +static ulong v4routegeneration, v6routegeneration; + +static void +freeroute(Route *r) +{ + Route **l; + + r->left = nil; + r->right = nil; + if(r->type & Rv4) + l = &v4freelist; + else + l = &v6freelist; + r->mid = *l; + *l = r; +} + +static Route* +allocroute(int type) +{ + Route *r; + int n; + Route **l; + + if(type & Rv4){ + n = sizeof(RouteTree) + sizeof(V4route); + l = &v4freelist; + } else { + n = sizeof(RouteTree) + sizeof(V6route); + l = &v6freelist; + } + + r = *l; + if(r != nil){ + *l = r->mid; + } else { + r = malloc(n); + if(r == nil) + panic("out of routing nodes"); + } + memset(r, 0, n); + r->type = type; + r->ifc = nil; + r->ref = 1; + + return r; +} + +static void +addqueue(Route **q, Route *r) +{ + Route *l; + + if(r == nil) + return; + + l = allocroute(r->type); + l->mid = *q; + *q = l; + l->left = r; +} + +/* + * compare 2 v6 addresses + */ +static int +lcmp(ulong *a, ulong *b) +{ + int i; + + for(i = 0; i < IPllen; i++){ + if(a[i] > b[i]) + return 1; + if(a[i] < b[i]) + return -1; + } + return 0; +} + +/* + * compare 2 v4 or v6 ranges + */ +enum +{ + Rpreceeds, + Rfollows, + Requals, + Rcontains, + Rcontained, +}; + +static int +rangecompare(Route *a, Route *b) +{ + if(a->type & Rv4){ + if(a->v4.endaddress < b->v4.address) + return Rpreceeds; + + if(a->v4.address > b->v4.endaddress) + return Rfollows; + + if(a->v4.address <= b->v4.address + && a->v4.endaddress >= b->v4.endaddress){ + if(a->v4.address == b->v4.address + && a->v4.endaddress == b->v4.endaddress) + return Requals; + return Rcontains; + } + return Rcontained; + } + + if(lcmp(a->v6.endaddress, b->v6.address) < 0) + return Rpreceeds; + + if(lcmp(a->v6.address, b->v6.endaddress) > 0) + return Rfollows; + + if(lcmp(a->v6.address, b->v6.address) <= 0 + && lcmp(a->v6.endaddress, b->v6.endaddress) >= 0){ + if(lcmp(a->v6.address, b->v6.address) == 0 + && lcmp(a->v6.endaddress, b->v6.endaddress) == 0) + return Requals; + return Rcontains; + } + + return Rcontained; +} + +static void +copygate(Route *old, Route *new) +{ + if(new->type & Rv4) + memmove(old->v4.gate, new->v4.gate, IPv4addrlen); + else + memmove(old->v6.gate, new->v6.gate, IPaddrlen); +} + +/* + * walk down a tree adding nodes back in + */ +static void +walkadd(Fs *f, Route **root, Route *p) +{ + Route *l, *r; + + l = p->left; + r = p->right; + p->left = 0; + p->right = 0; + addnode(f, root, p); + if(l) + walkadd(f, root, l); + if(r) + walkadd(f, root, r); +} + +/* + * calculate depth + */ +static void +calcd(Route *p) +{ + Route *q; + int d; + + if(p) { + d = 0; + q = p->left; + if(q) + d = q->depth; + q = p->right; + if(q && q->depth > d) + d = q->depth; + q = p->mid; + if(q && q->depth > d) + d = q->depth; + p->depth = d+1; + } +} + +/* + * balance the tree at the current node + */ +static void +balancetree(Route **cur) +{ + Route *p, *l, *r; + int dl, dr; + + /* + * if left and right are + * too out of balance, + * rotate tree node + */ + p = *cur; + dl = 0; if(l = p->left) dl = l->depth; + dr = 0; if(r = p->right) dr = r->depth; + + if(dl > dr+1) { + p->left = l->right; + l->right = p; + *cur = l; + calcd(p); + calcd(l); + } else + if(dr > dl+1) { + p->right = r->left; + r->left = p; + *cur = r; + calcd(p); + calcd(r); + } else + calcd(p); +} + +/* + * add a new node to the tree + */ +static void +addnode(Fs *f, Route **cur, Route *new) +{ + Route *p; + + p = *cur; + if(p == 0) { + *cur = new; + new->depth = 1; + return; + } + + switch(rangecompare(new, p)){ + case Rpreceeds: + addnode(f, &p->left, new); + break; + case Rfollows: + addnode(f, &p->right, new); + break; + case Rcontains: + /* + * if new node is superset + * of tree node, + * replace tree node and + * queue tree node to be + * merged into root. + */ + *cur = new; + new->depth = 1; + addqueue(&f->queue, p); + break; + case Requals: + /* + * supercede the old entry if the old one isn't + * a local interface. + */ + if((p->type & Rifc) == 0){ + p->type = new->type; + p->ifcid = -1; + copygate(p, new); + } else if(new->type & Rifc) + p->ref++; + freeroute(new); + break; + case Rcontained: + addnode(f, &p->mid, new); + break; + } + + balancetree(cur); +} + +#define V4H(a) ((a&0x07ffffff)>>(32-Lroot-5)) + +void +v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type) +{ + Route *p; + ulong sa; + ulong m; + ulong ea; + int h, eh; + + m = nhgetl(mask); + sa = nhgetl(a) & m; + ea = sa | ~m; + + eh = V4H(ea); + for(h=V4H(sa); h<=eh; h++) { + p = allocroute(Rv4 | type); + p->v4.address = sa; + p->v4.endaddress = ea; + memmove(p->v4.gate, gate, sizeof(p->v4.gate)); + memmove(p->tag, tag, sizeof(p->tag)); + + wlock(&routelock); + addnode(f, &f->v4root[h], p); + while(p = f->queue) { + f->queue = p->mid; + walkadd(f, &f->v4root[h], p->left); + freeroute(p); + } + wunlock(&routelock); + } + v4routegeneration++; + + ipifcaddroute(f, Rv4, a, mask, gate, type); +} + +#define V6H(a) (((a)[IPllen-1] & 0x07ffffff)>>(32-Lroot-5)) +#define ISDFLT(a, mask, tag) ((ipcmp((a),v6Unspecified)==0) && (ipcmp((mask),v6Unspecified)==0) && (strcmp((tag), "ra")!=0)) + +void +v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type) +{ + Route *p; + ulong sa[IPllen], ea[IPllen]; + ulong x, y; + int h, eh; + + /* + if(ISDFLT(a, mask, tag)) + f->v6p->cdrouter = -1; + */ + + + for(h = 0; h < IPllen; h++){ + x = nhgetl(a+4*h); + y = nhgetl(mask+4*h); + sa[h] = x & y; + ea[h] = x | ~y; + } + + eh = V6H(ea); + for(h = V6H(sa); h <= eh; h++) { + p = allocroute(type); + memmove(p->v6.address, sa, IPaddrlen); + memmove(p->v6.endaddress, ea, IPaddrlen); + memmove(p->v6.gate, gate, IPaddrlen); + memmove(p->tag, tag, sizeof(p->tag)); + + wlock(&routelock); + addnode(f, &f->v6root[h], p); + while(p = f->queue) { + f->queue = p->mid; + walkadd(f, &f->v6root[h], p->left); + freeroute(p); + } + wunlock(&routelock); + } + v6routegeneration++; + + ipifcaddroute(f, 0, a, mask, gate, type); +} + +Route** +looknode(Route **cur, Route *r) +{ + Route *p; + + for(;;){ + p = *cur; + if(p == 0) + return 0; + + switch(rangecompare(r, p)){ + case Rcontains: + return 0; + case Rpreceeds: + cur = &p->left; + break; + case Rfollows: + cur = &p->right; + break; + case Rcontained: + cur = &p->mid; + break; + case Requals: + return cur; + } + } +} + +void +v4delroute(Fs *f, uchar *a, uchar *mask, int dolock) +{ + Route **r, *p; + Route rt; + int h, eh; + ulong m; + + m = nhgetl(mask); + rt.v4.address = nhgetl(a) & m; + rt.v4.endaddress = rt.v4.address | ~m; + rt.type = Rv4; + + eh = V4H(rt.v4.endaddress); + for(h=V4H(rt.v4.address); h<=eh; h++) { + if(dolock) + wlock(&routelock); + r = looknode(&f->v4root[h], &rt); + if(r) { + p = *r; + if(--(p->ref) == 0){ + *r = 0; + addqueue(&f->queue, p->left); + addqueue(&f->queue, p->mid); + addqueue(&f->queue, p->right); + freeroute(p); + while(p = f->queue) { + f->queue = p->mid; + walkadd(f, &f->v4root[h], p->left); + freeroute(p); + } + } + } + if(dolock) + wunlock(&routelock); + } + v4routegeneration++; + + ipifcremroute(f, Rv4, a, mask); +} + +void +v6delroute(Fs *f, uchar *a, uchar *mask, int dolock) +{ + Route **r, *p; + Route rt; + int h, eh; + ulong x, y; + + for(h = 0; h < IPllen; h++){ + x = nhgetl(a+4*h); + y = nhgetl(mask+4*h); + rt.v6.address[h] = x & y; + rt.v6.endaddress[h] = x | ~y; + } + rt.type = 0; + + eh = V6H(rt.v6.endaddress); + for(h=V6H(rt.v6.address); h<=eh; h++) { + if(dolock) + wlock(&routelock); + r = looknode(&f->v6root[h], &rt); + if(r) { + p = *r; + if(--(p->ref) == 0){ + *r = 0; + addqueue(&f->queue, p->left); + addqueue(&f->queue, p->mid); + addqueue(&f->queue, p->right); + freeroute(p); + while(p = f->queue) { + f->queue = p->mid; + walkadd(f, &f->v6root[h], p->left); + freeroute(p); + } + } + } + if(dolock) + wunlock(&routelock); + } + v6routegeneration++; + + ipifcremroute(f, 0, a, mask); +} + +Route* +v4lookup(Fs *f, uchar *a, Conv *c) +{ + Route *p, *q; + ulong la; + uchar gate[IPaddrlen]; + Ipifc *ifc; + + if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v4routegeneration) + return c->r; + + la = nhgetl(a); + q = nil; + for(p=f->v4root[V4H(la)]; p;) + if(la >= p->v4.address) { + if(la <= p->v4.endaddress) { + q = p; + p = p->mid; + } else + p = p->right; + } else + p = p->left; + + if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){ + if(q->type & Rifc) { + hnputl(gate+IPv4off, q->v4.address); + memmove(gate, v4prefix, IPv4off); + } else + v4tov6(gate, q->v4.gate); + ifc = findipifc(f, gate, q->type); + if(ifc == nil) + return nil; + q->ifc = ifc; + q->ifcid = ifc->ifcid; + } + + if(c != nil){ + c->r = q; + c->rgen = v4routegeneration; + } + + return q; +} + +Route* +v6lookup(Fs *f, uchar *a, Conv *c) +{ + Route *p, *q; + ulong la[IPllen]; + int h; + ulong x, y; + uchar gate[IPaddrlen]; + Ipifc *ifc; + + if(memcmp(a, v4prefix, IPv4off) == 0){ + q = v4lookup(f, a+IPv4off, c); + if(q != nil) + return q; + } + + if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v6routegeneration) + return c->r; + + for(h = 0; h < IPllen; h++) + la[h] = nhgetl(a+4*h); + + q = 0; + for(p=f->v6root[V6H(la)]; p;){ + for(h = 0; h < IPllen; h++){ + x = la[h]; + y = p->v6.address[h]; + if(x == y) + continue; + if(x < y){ + p = p->left; + goto next; + } + break; + } + for(h = 0; h < IPllen; h++){ + x = la[h]; + y = p->v6.endaddress[h]; + if(x == y) + continue; + if(x > y){ + p = p->right; + goto next; + } + break; + } + q = p; + p = p->mid; +next: ; + } + + if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){ + if(q->type & Rifc) { + for(h = 0; h < IPllen; h++) + hnputl(gate+4*h, q->v6.address[h]); + ifc = findipifc(f, gate, q->type); + } else + ifc = findipifc(f, q->v6.gate, q->type); + if(ifc == nil) + return nil; + q->ifc = ifc; + q->ifcid = ifc->ifcid; + } + if(c != nil){ + c->r = q; + c->rgen = v6routegeneration; + } + + return q; +} + +void +routetype(int type, char *p) +{ + memset(p, ' ', 4); + p[4] = 0; + if(type & Rv4) + *p++ = '4'; + else + *p++ = '6'; + if(type & Rifc) + *p++ = 'i'; + if(type & Runi) + *p++ = 'u'; + else if(type & Rbcast) + *p++ = 'b'; + else if(type & Rmulti) + *p++ = 'm'; + if(type & Rptpt) + *p = 'p'; +} + +static char *rformat = "%-15I %-4M %-15I %4.4s %4.4s %3s\n"; + +void +convroute(Route *r, uchar *addr, uchar *mask, uchar *gate, char *t, int *nifc) +{ + int i; + + if(r->type & Rv4){ + memmove(addr, v4prefix, IPv4off); + hnputl(addr+IPv4off, r->v4.address); + memset(mask, 0xff, IPv4off); + hnputl(mask+IPv4off, ~(r->v4.endaddress ^ r->v4.address)); + memmove(gate, v4prefix, IPv4off); + memmove(gate+IPv4off, r->v4.gate, IPv4addrlen); + } else { + for(i = 0; i < IPllen; i++){ + hnputl(addr + 4*i, r->v6.address[i]); + hnputl(mask + 4*i, ~(r->v6.endaddress[i] ^ r->v6.address[i])); + } + memmove(gate, r->v6.gate, IPaddrlen); + } + + routetype(r->type, t); + + if(r->ifc) + *nifc = r->ifc->conv->x; + else + *nifc = -1; +} + +/* + * this code is not in rr to reduce stack size + */ +static void +sprintroute(Route *r, Routewalk *rw) +{ + int nifc, n; + char t[5], *iname, ifbuf[5]; + uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen]; + char *p; + + convroute(r, addr, mask, gate, t, &nifc); + iname = "-"; + if(nifc != -1) { + iname = ifbuf; + snprint(ifbuf, sizeof ifbuf, "%d", nifc); + } + p = seprint(rw->p, rw->e, rformat, addr, mask, gate, t, r->tag, iname); + if(rw->o < 0){ + n = p - rw->p; + if(n > -rw->o){ + memmove(rw->p, rw->p-rw->o, n+rw->o); + rw->p = p + rw->o; + } + rw->o += n; + } else + rw->p = p; +} + +/* + * recurse descending tree, applying the function in Routewalk + */ +static int +rr(Route *r, Routewalk *rw) +{ + int h; + + if(rw->e <= rw->p) + return 0; + if(r == nil) + return 1; + + if(rr(r->left, rw) == 0) + return 0; + + if(r->type & Rv4) + h = V4H(r->v4.address); + else + h = V6H(r->v6.address); + + if(h == rw->h) + rw->walk(r, rw); + + if(rr(r->mid, rw) == 0) + return 0; + + return rr(r->right, rw); +} + +void +ipwalkroutes(Fs *f, Routewalk *rw) +{ + rlock(&routelock); + if(rw->e > rw->p) { + for(rw->h = 0; rw->h < nelem(f->v4root); rw->h++) + if(rr(f->v4root[rw->h], rw) == 0) + break; + } + if(rw->e > rw->p) { + for(rw->h = 0; rw->h < nelem(f->v6root); rw->h++) + if(rr(f->v6root[rw->h], rw) == 0) + break; + } + runlock(&routelock); +} + +long +routeread(Fs *f, char *p, ulong offset, int n) +{ + Routewalk rw; + + rw.p = p; + rw.e = p+n; + rw.o = -offset; + rw.walk = sprintroute; + + ipwalkroutes(f, &rw); + + return rw.p - p; +} + +/* + * this code is not in routeflush to reduce stack size + */ +void +delroute(Fs *f, Route *r, int dolock) +{ + uchar addr[IPaddrlen]; + uchar mask[IPaddrlen]; + uchar gate[IPaddrlen]; + char t[5]; + int nifc; + + convroute(r, addr, mask, gate, t, &nifc); + if(r->type & Rv4) + v4delroute(f, addr+IPv4off, mask+IPv4off, dolock); + else + v6delroute(f, addr, mask, dolock); +} + +/* + * recurse until one route is deleted + * returns 0 if nothing is deleted, 1 otherwise + */ +int +routeflush(Fs *f, Route *r, char *tag) +{ + if(r == nil) + return 0; + if(routeflush(f, r->mid, tag)) + return 1; + if(routeflush(f, r->left, tag)) + return 1; + if(routeflush(f, r->right, tag)) + return 1; + if((r->type & Rifc) == 0){ + if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0){ + delroute(f, r, 0); + return 1; + } + } + return 0; +} + +Route * +iproute(Fs *fs, uchar *ip) +{ + if(isv4(ip)) + return v4lookup(fs, ip+IPv4off, nil); + else + return v6lookup(fs, ip, nil); +} + +static void +printroute(Route *r) +{ + int nifc; + char t[5], *iname, ifbuf[5]; + uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen]; + + convroute(r, addr, mask, gate, t, &nifc); + iname = "-"; + if(nifc != -1) { + iname = ifbuf; + snprint(ifbuf, sizeof ifbuf, "%d", nifc); + } + print(rformat, addr, mask, gate, t, r->tag, iname); +} + +long +routewrite(Fs *f, Chan *c, char *p, int n) +{ + int h, changed; + char *tag; + Cmdbuf *cb; + uchar addr[IPaddrlen]; + uchar mask[IPaddrlen]; + uchar gate[IPaddrlen]; + IPaux *a, *na; + Route *q; + + cb = parsecmd(p, n); + if(waserror()){ + free(cb); + nexterror(); + } + + if(strcmp(cb->f[0], "flush") == 0){ + tag = cb->f[1]; + for(h = 0; h < nelem(f->v4root); h++) + for(changed = 1; changed;){ + wlock(&routelock); + changed = routeflush(f, f->v4root[h], tag); + wunlock(&routelock); + } + for(h = 0; h < nelem(f->v6root); h++) + for(changed = 1; changed;){ + wlock(&routelock); + changed = routeflush(f, f->v6root[h], tag); + wunlock(&routelock); + } + } else if(strcmp(cb->f[0], "remove") == 0){ + if(cb->nf < 3) + error(Ebadarg); + if (parseip(addr, cb->f[1]) == -1) + error(Ebadip); + parseipmask(mask, cb->f[2]); + if(memcmp(addr, v4prefix, IPv4off) == 0) + v4delroute(f, addr+IPv4off, mask+IPv4off, 1); + else + v6delroute(f, addr, mask, 1); + } else if(strcmp(cb->f[0], "add") == 0){ + if(cb->nf < 4) + error(Ebadarg); + if(parseip(addr, cb->f[1]) == -1 || + parseip(gate, cb->f[3]) == -1) + error(Ebadip); + parseipmask(mask, cb->f[2]); + tag = "none"; + if(c != nil){ + a = c->aux; + tag = a->tag; + } + if(memcmp(addr, v4prefix, IPv4off) == 0) + v4addroute(f, tag, addr+IPv4off, mask+IPv4off, gate+IPv4off, 0); + else + v6addroute(f, tag, addr, mask, gate, 0); + } else if(strcmp(cb->f[0], "tag") == 0) { + if(cb->nf < 2) + error(Ebadarg); + + a = c->aux; + na = newipaux(a->owner, cb->f[1]); + c->aux = na; + free(a); + } else if(strcmp(cb->f[0], "route") == 0) { + if(cb->nf < 2) + error(Ebadarg); + if (parseip(addr, cb->f[1]) == -1) + error(Ebadip); + + q = iproute(f, addr); + print("%I: ", addr); + if(q == nil) + print("no route\n"); + else + printroute(q); + } + + poperror(); + free(cb); + return n; +} diff -Nru /sys/src/9k/ip/ipv6.c /sys/src/9k/ip/ipv6.c --- /sys/src/9k/ip/ipv6.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/ipv6.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,621 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" +#include "ipv6.h" + +enum +{ + IP6FHDR = 8, /* sizeof(Fraghdr6) */ +}; + +#define IPV6CLASS(hdr) (((hdr)->vcf[0]&0x0F)<<2 | ((hdr)->vcf[1]&0xF0)>>2) +#define BLKIPVER(xp) (((Ip6hdr*)((xp)->rp))->vcf[0] & 0xF0) +/* + * This sleazy macro is stolen shamelessly from ip.c, see comment there. + */ +#define BKFG(xp) ((Ipfrag*)((xp)->base)) + +Block* ip6reassemble(IP*, int, Block*, Ip6hdr*); +Fragment6* ipfragallo6(IP*); +void ipfragfree6(IP*, Fragment6*); +Block* procopts(Block *bp); +static Block* procxtns(IP *ip, Block *bp, int doreasm); +int unfraglen(Block *bp, uchar *nexthdr, int setfh); + +int +ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c) +{ + int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff; + int morefrags, blklen, rv = 0, tentative; + uchar *gate, nexthdr; + Block *xp, *nb; + Fraghdr6 fraghdr; + IP *ip; + Ip6hdr *eh; + Ipifc *ifc; + Route *r, *sr; + + ip = f->ip; + + /* Fill out the ip header */ + eh = (Ip6hdr*)(bp->rp); + + ip->stats[OutRequests]++; + + /* Number of uchars in data and ip header to write */ + len = blocklen(bp); + + tentative = iptentative(f, eh->src); + if(tentative){ + netlog(f, Logip, "reject tx of packet with tentative src address %I\n", + eh->src); + goto free; + } + + if(gating){ + chunk = nhgets(eh->ploadlen); + if(chunk > len){ + ip->stats[OutDiscards]++; + netlog(f, Logip, "short gated packet\n"); + goto free; + } + if(chunk + IP6HDR < len) + len = chunk + IP6HDR; + } + + if(len >= IP_MAX){ + ip->stats[OutDiscards]++; + netlog(f, Logip, "exceeded ip max size %I\n", eh->dst); + goto free; + } + + r = v6lookup(f, eh->dst, c); + if(r == nil){ +// print("no route for %I, src %I free\n", eh->dst, eh->src); + ip->stats[OutNoRoutes]++; + netlog(f, Logip, "no interface %I\n", eh->dst); + rv = -1; + goto free; + } + + ifc = r->ifc; + if(r->type & (Rifc|Runi)) + gate = eh->dst; + else if(r->type & (Rbcast|Rmulti)) { + gate = eh->dst; + sr = v6lookup(f, eh->src, nil); + if(sr && (sr->type & Runi)) + ifc = sr->ifc; + } + else + gate = r->v6.gate; + + if(!gating) + eh->vcf[0] = IP_VER6; + eh->ttl = ttl; + if(!gating) { + eh->vcf[0] |= tos >> 4; + eh->vcf[1] = tos << 4; + } + + if(!canrlock(ifc)) + goto free; + + if(waserror()){ + runlock(ifc); + nexterror(); + } + + if(ifc->medium == nil) + goto raise; + + /* If we dont need to fragment just send it */ + medialen = ifc->maxtu - ifc->medium->hsize; + if(len <= medialen) { + hnputs(eh->ploadlen, len - IP6HDR); + ifc->medium->bwrite(ifc, bp, V6, gate); + runlock(ifc); + poperror(); + return 0; + } + + if(gating && ifc->reassemble <= 0) { + /* + * v6 intermediate nodes are not supposed to fragment pkts; + * we fragment if ifc->reassemble is turned on; an exception + * needed for nat. + */ + ip->stats[OutDiscards]++; + icmppkttoobig6(f, ifc, bp); + netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst); + goto raise; + } + + /* start v6 fragmentation */ + uflen = unfraglen(bp, &nexthdr, 1); + if(uflen > medialen) { + ip->stats[FragFails]++; + ip->stats[OutDiscards]++; + netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst); + goto raise; + } + + flen = len - uflen; + seglen = (medialen - (uflen + IP6FHDR)) & ~7; + if(seglen < 8) { + ip->stats[FragFails]++; + ip->stats[OutDiscards]++; + netlog(f, Logip, "%I: seglen < 8\n", eh->dst); + goto raise; + } + + lid = ainc(&ip->id6); + fraghdr.nexthdr = nexthdr; + fraghdr.res = 0; + hnputl(fraghdr.id, lid); + + xp = bp; + offset = uflen; + while (xp && offset && offset >= BLEN(xp)) { + offset -= BLEN(xp); + xp = xp->next; + } + xp->rp += offset; + + fragoff = 0; + morefrags = 1; + + for(; fragoff < flen; fragoff += seglen) { + nb = allocb(uflen + IP6FHDR + seglen); + + if(fragoff + seglen >= flen) { + seglen = flen - fragoff; + morefrags = 0; + } + + hnputs(eh->ploadlen, seglen+IP6FHDR); + memmove(nb->wp, eh, uflen); + nb->wp += uflen; + + hnputs(fraghdr.offsetRM, fragoff); /* last 3 bits must be 0 */ + fraghdr.offsetRM[1] |= morefrags; + memmove(nb->wp, &fraghdr, IP6FHDR); + nb->wp += IP6FHDR; + + /* Copy data */ + chunk = seglen; + while (chunk) { + if(!xp) { + ip->stats[OutDiscards]++; + ip->stats[FragFails]++; + freeblist(nb); + netlog(f, Logip, "!xp: chunk in v6%d\n", chunk); + goto raise; + } + blklen = chunk; + if(BLEN(xp) < chunk) + blklen = BLEN(xp); + memmove(nb->wp, xp->rp, blklen); + + nb->wp += blklen; + xp->rp += blklen; + chunk -= blklen; + if(xp->rp == xp->wp) + xp = xp->next; + } + + ifc->medium->bwrite(ifc, nb, V6, gate); + ip->stats[FragCreates]++; + } + ip->stats[FragOKs]++; + +raise: + runlock(ifc); + poperror(); +free: + freeblist(bp); + return rv; +} + +void +ipiput6(Fs *f, Ipifc *ifc, Block *bp) +{ + int hl, hop, tos, notforme, tentative; + uchar proto; + uchar v6dst[IPaddrlen]; + IP *ip; + Ip6hdr *h; + Proto *p; + Route *r, *sr; + + ip = f->ip; + ip->stats[InReceives]++; + + /* + * Ensure we have all the header info in the first + * block. Make life easier for other protocols by + * collecting up to the first 64 bytes in the first block. + */ + if(BLEN(bp) < 64) { + hl = blocklen(bp); + if(hl < IP6HDR) + hl = IP6HDR; + if(hl > 64) + hl = 64; + bp = pullupblock(bp, hl); + if(bp == nil) + return; + } + + h = (Ip6hdr *)bp->rp; + + memmove(&v6dst[0], &h->dst[0], IPaddrlen); + notforme = ipforme(f, v6dst) == 0; + tentative = iptentative(f, v6dst); + + if(tentative && h->proto != ICMPv6) { + print("tentative addr, drop\n"); + freeblist(bp); + return; + } + + /* Check header version */ + if(BLKIPVER(bp) != IP_VER6) { + ip->stats[InHdrErrors]++; + netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2); + freeblist(bp); + return; + } + + /* route */ + if(notforme) { + if(!ip->iprouting){ + freeblist(bp); + return; + } + + /* don't forward to link-local destinations */ + if(islinklocal(h->dst) || + (isv6mcast(h->dst) && (h->dst[1]&0xF) <= Link_local_scop)){ + ip->stats[OutDiscards]++; + freeblist(bp); + return; + } + + /* don't forward to source's network */ + sr = v6lookup(f, h->src, nil); + r = v6lookup(f, h->dst, nil); + + if(r == nil || sr == r){ + ip->stats[OutDiscards]++; + freeblist(bp); + return; + } + + /* don't forward if packet has timed out */ + hop = h->ttl; + if(hop < 1) { + ip->stats[InHdrErrors]++; + icmpttlexceeded6(f, ifc, bp); + freeblist(bp); + return; + } + + /* process headers & reassemble if the interface expects it */ + bp = procxtns(ip, bp, r->ifc->reassemble); + if(bp == nil) + return; + + ip->stats[ForwDatagrams]++; + h = (Ip6hdr *)bp->rp; + tos = IPV6CLASS(h); + hop = h->ttl; + ipoput6(f, bp, 1, hop-1, tos, nil); + return; + } + + /* reassemble & process headers if needed */ + bp = procxtns(ip, bp, 1); + if(bp == nil) + return; + + h = (Ip6hdr *) (bp->rp); + proto = h->proto; + p = Fsrcvpcol(f, proto); + if(p && p->rcv) { + ip->stats[InDelivers]++; + (*p->rcv)(p, ifc, bp); + return; + } + + ip->stats[InDiscards]++; + ip->stats[InUnknownProtos]++; + freeblist(bp); +} + +/* + * ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6 + */ +void +ipfragfree6(IP *ip, Fragment6 *frag) +{ + Fragment6 *fl, **l; + + if(frag->blist) + freeblist(frag->blist); + + memset(frag->src, 0, IPaddrlen); + frag->id = 0; + frag->blist = nil; + + l = &ip->flisthead6; + for(fl = *l; fl; fl = fl->next) { + if(fl == frag) { + *l = frag->next; + break; + } + l = &fl->next; + } + + frag->next = ip->fragfree6; + ip->fragfree6 = frag; +} + +/* + * ipfragallo6 - copied from ipfragalloc4 + */ +Fragment6* +ipfragallo6(IP *ip) +{ + Fragment6 *f; + + while(ip->fragfree6 == nil) { + /* free last entry on fraglist */ + for(f = ip->flisthead6; f->next; f = f->next) + ; + ipfragfree6(ip, f); + } + f = ip->fragfree6; + ip->fragfree6 = f->next; + f->next = ip->flisthead6; + ip->flisthead6 = f; + f->age = NOW + 30000; + + return f; +} + +static Block* +procxtns(IP *ip, Block *bp, int doreasm) +{ + int offset; + uchar proto; + Ip6hdr *h; + + h = (Ip6hdr *)bp->rp; + offset = unfraglen(bp, &proto, 0); + + if(proto == FH && doreasm != 0) { + bp = ip6reassemble(ip, offset, bp, h); + if(bp == nil) + return nil; + offset = unfraglen(bp, &proto, 0); + } + + if(proto == DOH || offset > IP6HDR) + bp = procopts(bp); + return bp; +} + +/* + * returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr, + * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value + * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr + * field of the last header in the "Unfragmentable part" is set to FH. + */ +int +unfraglen(Block *bp, uchar *nexthdr, int setfh) +{ + uchar *p, *q; + int ufl, hs; + + p = bp->rp; + q = p+6; /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */ + *nexthdr = *q; + ufl = IP6HDR; + p += ufl; + + while (*nexthdr == HBH || *nexthdr == RH) { + *nexthdr = *p; + hs = ((int)*(p+1) + 1) * 8; + ufl += hs; + q = p; + p += hs; + } + + if(*nexthdr == FH) + *q = *p; + if(setfh) + *q = FH; + return ufl; +} + +Block* +procopts(Block *bp) +{ + return bp; +} + +Block* +ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih) +{ + int fend, offset, ovlap, len, fragsize, pktposn; + uint id; + uchar src[IPaddrlen], dst[IPaddrlen]; + Block *bl, **l, *last, *prev; + Fraghdr6 *fraghdr; + Fragment6 *f, *fnext; + + fraghdr = (Fraghdr6 *)(bp->rp + uflen); + memmove(src, ih->src, IPaddrlen); + memmove(dst, ih->dst, IPaddrlen); + id = nhgetl(fraghdr->id); + offset = nhgets(fraghdr->offsetRM) & ~7; + + /* + * block lists are too hard, pullupblock into a single block + */ + if(bp->next){ + bp = pullupblock(bp, blocklen(bp)); + ih = (Ip6hdr *)bp->rp; + } + + qlock(&ip->fraglock6); + + /* + * find a reassembly queue for this fragment + */ + for(f = ip->flisthead6; f; f = fnext){ + fnext = f->next; + if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id) + break; + if(f->age < NOW){ + ip->stats[ReasmTimeout]++; + ipfragfree6(ip, f); + } + } + + /* + * if this isn't a fragmented packet, accept it + * and get rid of any fragments that might go + * with it. + */ + if(nhgets(fraghdr->offsetRM) == 0) { /* 1st frag is also last */ + if(f) { + ipfragfree6(ip, f); + ip->stats[ReasmFails]++; + } + qunlock(&ip->fraglock6); + return bp; + } + + if(bp->base+IPFRAGSZ >= bp->rp){ + bp = padblock(bp, IPFRAGSZ); + bp->rp += IPFRAGSZ; + } + + BKFG(bp)->foff = offset; + BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR; + + /* First fragment allocates a reassembly queue */ + if(f == nil) { + f = ipfragallo6(ip); + f->id = id; + memmove(f->src, src, IPaddrlen); + memmove(f->dst, dst, IPaddrlen); + + f->blist = bp; + + qunlock(&ip->fraglock6); + ip->stats[ReasmReqds]++; + return nil; + } + + /* + * find the new fragment's position in the queue + */ + prev = nil; + l = &f->blist; + bl = f->blist; + while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) { + prev = bl; + l = &bl->next; + bl = bl->next; + } + + /* Check overlap of a previous fragment - trim away as necessary */ + if(prev) { + ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff; + if(ovlap > 0) { + if(ovlap >= BKFG(bp)->flen) { + freeblist(bp); + qunlock(&ip->fraglock6); + return nil; + } + BKFG(prev)->flen -= ovlap; + } + } + + /* Link onto assembly queue */ + bp->next = *l; + *l = bp; + + /* Check to see if succeeding segments overlap */ + if(bp->next) { + l = &bp->next; + fend = BKFG(bp)->foff + BKFG(bp)->flen; + + /* Take completely covered segments out */ + while(*l) { + ovlap = fend - BKFG(*l)->foff; + if(ovlap <= 0) + break; + if(ovlap < BKFG(*l)->flen) { + BKFG(*l)->flen -= ovlap; + BKFG(*l)->foff += ovlap; + /* move up ih hdrs */ + memmove((*l)->rp + ovlap, (*l)->rp, uflen); + (*l)->rp += ovlap; + break; + } + last = (*l)->next; + (*l)->next = nil; + freeblist(*l); + *l = last; + } + } + + /* + * look for a complete packet. if we get to a fragment + * with the trailing bit of fraghdr->offsetRM[1] set, we're done. + */ + pktposn = 0; + for(bl = f->blist; bl && BKFG(bl)->foff == pktposn; bl = bl->next) { + fraghdr = (Fraghdr6 *)(bl->rp + uflen); + if((fraghdr->offsetRM[1] & 1) == 0) { + bl = f->blist; + + /* get rid of frag header in first fragment */ + memmove(bl->rp + IP6FHDR, bl->rp, uflen); + bl->rp += IP6FHDR; + len = nhgets(((Ip6hdr*)bl->rp)->ploadlen) - IP6FHDR; + bl->wp = bl->rp + len + IP6HDR; + /* + * Pullup all the fragment headers and + * return a complete packet + */ + for(bl = bl->next; bl; bl = bl->next) { + fragsize = BKFG(bl)->flen; + len += fragsize; + bl->rp += uflen + IP6FHDR; + bl->wp = bl->rp + fragsize; + } + + bl = f->blist; + f->blist = nil; + ipfragfree6(ip, f); + ih = (Ip6hdr*)bl->rp; + hnputs(ih->ploadlen, len); + qunlock(&ip->fraglock6); + ip->stats[ReasmOKs]++; + return bl; + } + pktposn += BKFG(bl)->flen; + } + qunlock(&ip->fraglock6); + return nil; +} diff -Nru /sys/src/9k/ip/ipv6.h /sys/src/9k/ip/ipv6.h --- /sys/src/9k/ip/ipv6.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/ipv6.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,188 @@ +/* + * Internet Protocol Version 6 + * + * rfc2460 defines the protocol, rfc2461 neighbour discovery, and + * rfc2462 address autoconfiguration. rfc4443 defines ICMP; was rfc2463. + * rfc4291 defines the address architecture (including prefices), was rfc3513. + * rfc4007 defines the scoped address architecture. + * + * global unicast is anything but unspecified (::), loopback (::1), + * multicast (ff00::/8), and link-local unicast (fe80::/10). + * + * site-local (fec0::/10) is now deprecated, originally by rfc3879. + * + * Unique Local IPv6 Unicast Addresses are defined by rfc4193. + * prefix is fc00::/7, scope is global, routing is limited to roughly a site. + */ +#define isv6mcast(addr) ((addr)[0] == 0xff) +#define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80) + +#define optexsts(np) (nhgets((np)->ploadlen) > 24) +#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0) + +#ifndef MIN +#define MIN(a, b) ((a) <= (b)? (a): (b)) +#endif + +enum { /* Header Types */ + HBH = 0, /* hop-by-hop multicast routing protocol */ + ICMP = 1, + IGMP = 2, + GGP = 3, + IPINIP = 4, + ST = 5, + TCP = 6, + UDP = 17, + ISO_TP4 = 29, + RH = 43, + FH = 44, + IDRP = 45, + RSVP = 46, + AH = 51, + ESP = 52, + ICMPv6 = 58, + NNH = 59, + DOH = 60, + ISO_IP = 80, + IGRP = 88, + OSPF = 89, + + Maxhdrtype = 256, +}; + +enum { + /* multicast flags and scopes */ + +// Well_known_flg = 0, +// Transient_flg = 1, + +// Interface_local_scop = 1, + Link_local_scop = 2, +// Site_local_scop = 5, +// Org_local_scop = 8, + Global_scop = 14, + + /* various prefix lengths */ + SOLN_PREF_LEN = 13, + + /* icmpv6 unreachability codes */ + Icmp6_no_route = 0, + Icmp6_ad_prohib = 1, + Icmp6_out_src_scope = 2, + Icmp6_adr_unreach = 3, + Icmp6_port_unreach = 4, + Icmp6_gress_src_fail = 5, + Icmp6_rej_route = 6, + Icmp6_unknown = 7, /* our own invention for internal use */ + + /* various flags & constants */ + v6MINTU = 1280, + HOP_LIMIT = 255, + IP6HDR = 40, /* sizeof(Ip6hdr) = 8 + 2*16 */ + + /* option types */ + + /* neighbour discovery */ + SRC_LLADDR = 1, + TARGET_LLADDR = 2, + PREFIX_INFO = 3, + REDIR_HEADER = 4, + MTU_OPTION = 5, + /* new since rfc2461; see iana.org/assignments/icmpv6-parameters */ + V6nd_home = 8, + V6nd_srcaddrs = 9, /* rfc3122 */ + V6nd_ip = 17, + /* /lib/rfc/drafts/draft-jeong-dnsop-ipv6-dns-discovery-12.txt */ + V6nd_rdns = 25, + /* plan 9 extensions */ + V6nd_9fs = 250, + V6nd_9auth = 251, + + SRC_UNSPEC = 0, + SRC_UNI = 1, + TARG_UNI = 2, + TARG_MULTI = 3, + + Tunitent = 1, + Tuniproxy = 2, + Tunirany = 3, + + /* Node constants */ + MAX_MULTICAST_SOLICIT = 3, + RETRANS_TIMER = 1000, +}; + +typedef struct Ip6hdr Ip6hdr; +typedef struct Opthdr Opthdr; +typedef struct Routinghdr Routinghdr; +typedef struct Fraghdr6 Fraghdr6; + +/* we do this in case there's padding at the end of Ip6hdr */ +#define IPV6HDR \ + uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */\ + uchar ploadlen[2]; /* payload length: packet length - 40 */ \ + uchar proto; /* next header type */ \ + uchar ttl; /* hop limit */ \ + uchar src[IPaddrlen]; \ + uchar dst[IPaddrlen] + +struct Ip6hdr { + IPV6HDR; + uchar payload[]; +}; + +struct Opthdr { /* unused */ + uchar nexthdr; + uchar len; +}; + +/* + * Beware routing header type 0 (loose source routing); see + * http://www.secdev.org/conf/IPv6_RH_security-csw07.pdf. + * Type 1 is unused. Type 2 is for MIPv6 (mobile IPv6) filtering + * against type 0 header. + */ +struct Routinghdr { /* unused */ + uchar nexthdr; + uchar len; + uchar rtetype; + uchar segrem; +}; + +struct Fraghdr6 { + uchar nexthdr; + uchar res; + uchar offsetRM[2]; /* Offset, Res, M flag */ + uchar id[4]; +}; + +extern uchar v6allnodesN[IPaddrlen]; +extern uchar v6allnodesL[IPaddrlen]; +extern uchar v6allroutersN[IPaddrlen]; +extern uchar v6allroutersL[IPaddrlen]; +extern uchar v6allnodesNmask[IPaddrlen]; +extern uchar v6allnodesLmask[IPaddrlen]; +extern uchar v6solicitednode[IPaddrlen]; +extern uchar v6solicitednodemask[IPaddrlen]; +extern uchar v6Unspecified[IPaddrlen]; +extern uchar v6loopback[IPaddrlen]; +extern uchar v6loopbackmask[IPaddrlen]; +extern uchar v6linklocal[IPaddrlen]; +extern uchar v6linklocalmask[IPaddrlen]; +extern uchar v6multicast[IPaddrlen]; +extern uchar v6multicastmask[IPaddrlen]; + +extern int v6llpreflen; +extern int v6mcpreflen; +extern int v6snpreflen; +extern int v6aNpreflen; +extern int v6aLpreflen; + +extern int ReTransTimer; + +void ipv62smcast(uchar *, uchar *); +void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac); +void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags); +void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp); +void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp); +void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free); diff -Nru /sys/src/9k/ip/loopbackmedium.c /sys/src/9k/ip/loopbackmedium.c --- /sys/src/9k/ip/loopbackmedium.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/loopbackmedium.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,120 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +enum +{ + Maxtu= 16*1024, +}; + +typedef struct LB LB; +struct LB +{ + Proc *readp; + Queue *q; + Fs *f; +}; + +static void loopbackread(void *a); + +static void +loopbackbind(Ipifc *ifc, int, char**) +{ + LB *lb; + + lb = smalloc(sizeof(*lb)); + lb->f = ifc->conv->p->f; + lb->q = qopen(1024*1024, Qmsg, nil, nil); + ifc->arg = lb; + ifc->mbps = 10000; + + kproc("loopbackread", loopbackread, ifc); + +} + +static void +loopbackunbind(Ipifc *ifc) +{ + LB *lb = ifc->arg; + + if(lb->readp) + postnote(lb->readp, 1, "unbind", 0); + + /* wait for reader to die */ + while(lb->readp != 0) + tsleep(&up->sleep, return0, 0, 300); + + /* clean up */ + qfree(lb->q); + free(lb); +} + +static void +loopbackbwrite(Ipifc *ifc, Block *bp, int, uchar*) +{ + LB *lb; + + lb = ifc->arg; + if(qpass(lb->q, bp) < 0) + ifc->outerr++; + ifc->out++; +} + +static void +loopbackread(void *a) +{ + Ipifc *ifc; + Block *bp; + LB *lb; + + ifc = a; + lb = ifc->arg; + lb->readp = up; /* hide identity under a rock for unbind */ + if(waserror()){ + lb->readp = 0; + pexit("hangup", 1); + } + for(;;){ + bp = qbread(lb->q, Maxtu); + if(bp == nil) + continue; + ifc->in++; + if(!canrlock(ifc)){ + freeb(bp); + continue; + } + if(waserror()){ + runlock(ifc); + nexterror(); + } + if(ifc->lifc == nil) + freeb(bp); + else + ipiput4(lb->f, ifc, bp); + runlock(ifc); + poperror(); + } +} + +Medium loopbackmedium = +{ +.hsize= 0, +.mintu= 0, +.maxtu= Maxtu, +.maclen= 0, +.name= "loopback", +.bind= loopbackbind, +.unbind= loopbackunbind, +.bwrite= loopbackbwrite, +}; + +void +loopbackmediumlink(void) +{ + addipmedium(&loopbackmedium); +} diff -Nru /sys/src/9k/ip/netdevmedium.c /sys/src/9k/ip/netdevmedium.c --- /sys/src/9k/ip/netdevmedium.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/netdevmedium.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,153 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +static void netdevbind(Ipifc *ifc, int argc, char **argv); +static void netdevunbind(Ipifc *ifc); +static void netdevbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip); +static void netdevread(void *a); + +typedef struct Netdevrock Netdevrock; +struct Netdevrock +{ + Fs *f; /* file system we belong to */ + Proc *readp; /* reading process */ + Chan *mchan; /* Data channel */ +}; + +Medium netdevmedium = +{ +.name= "netdev", +.hsize= 0, +.mintu= 0, +.maxtu= 64000, +.maclen= 0, +.bind= netdevbind, +.unbind= netdevunbind, +.bwrite= netdevbwrite, +.unbindonclose= 0, +}; + +/* + * called to bind an IP ifc to a generic network device + * called with ifc qlock'd + */ +static void +netdevbind(Ipifc *ifc, int argc, char **argv) +{ + Chan *mchan; + Netdevrock *er; + + if(argc < 2) + error(Ebadarg); + + mchan = namec(argv[2], Aopen, ORDWR, 0); + + er = smalloc(sizeof(*er)); + er->mchan = mchan; + er->f = ifc->conv->p->f; + + ifc->arg = er; + + kproc("netdevread", netdevread, ifc); +} + +/* + * called with ifc wlock'd + */ +static void +netdevunbind(Ipifc *ifc) +{ + Netdevrock *er = ifc->arg; + + if(er->readp != nil) + postnote(er->readp, 1, "unbind", 0); + + /* wait for readers to die */ + while(er->readp != nil) + tsleep(&up->sleep, return0, 0, 300); + + if(er->mchan != nil) + cclose(er->mchan); + + free(er); +} + +/* + * called by ipoput with a single block to write + */ +static void +netdevbwrite(Ipifc *ifc, Block *bp, int, uchar*) +{ + Netdevrock *er = ifc->arg; + + if(bp->next) + bp = concatblock(bp); + if(BLEN(bp) < ifc->mintu) + bp = adjustblock(bp, ifc->mintu); + + er->mchan->dev->bwrite(er->mchan, bp, 0); + ifc->out++; +} + +/* + * process to read from the device + */ +static void +netdevread(void *a) +{ + Ipifc *ifc; + Block *bp; + Netdevrock *er; + char *argv[1]; + + ifc = a; + er = ifc->arg; + er->readp = up; /* hide identity under a rock for unbind */ + if(waserror()){ + er->readp = nil; + pexit("hangup", 1); + } + for(;;){ + bp = er->mchan->dev->bread(er->mchan, ifc->maxtu, 0); + if(bp == nil){ + /* + * get here if mchan is a pipe and other side hangs up + * clean up this interface & get out +ZZZ is this a good idea? + */ + poperror(); + er->readp = nil; + argv[0] = "unbind"; + if(!waserror()) + ifc->conv->p->ctl(ifc->conv, argv, 1); + pexit("hangup", 1); + } + if(!canrlock(ifc)){ + freeb(bp); + continue; + } + if(waserror()){ + runlock(ifc); + nexterror(); + } + ifc->in++; + if(ifc->lifc == nil) + freeb(bp); + else + ipiput4(er->f, ifc, bp); + runlock(ifc); + poperror(); + } +} + +void +netdevmediumlink(void) +{ + addipmedium(&netdevmedium); +} diff -Nru /sys/src/9k/ip/netlog.c /sys/src/9k/ip/netlog.c --- /sys/src/9k/ip/netlog.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/netlog.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,260 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "../ip/ip.h" + +enum { + Nlog = 16*1024, +}; + +/* + * action log + */ +struct Netlog { + Lock; + int opens; + char* buf; + char *end; + char *rptr; + int len; + + int logmask; /* mask of things to debug */ + uchar iponly[IPaddrlen]; /* ip address to print debugging for */ + int iponlyset; + + QLock; + Rendez; +}; + +typedef struct Netlogflag { + char* name; + int mask; +} Netlogflag; + +static Netlogflag flags[] = +{ + { "ppp", Logppp, }, + { "ip", Logip, }, + { "fs", Logfs, }, + { "tcp", Logtcp, }, + { "il", Logil, }, + { "icmp", Logicmp, }, + { "udp", Logudp, }, + { "compress", Logcompress, }, + { "ilmsg", Logil|Logilmsg, }, + { "gre", Loggre, }, + { "tcpwin", Logtcp|Logtcpwin, }, + { "tcprxmt", Logtcp|Logtcprxmt, }, + { "udpmsg", Logudp|Logudpmsg, }, + { "ipmsg", Logip|Logipmsg, }, + { "esp", Logesp, }, + { nil, 0, }, +}; + +char Ebadnetctl[] = "too few arguments for netlog control message"; + +enum +{ + CMset, + CMclear, + CMonly, +}; + +static +Cmdtab routecmd[] = { + CMset, "set", 0, + CMclear, "clear", 0, + CMonly, "only", 0, +}; + +void +netloginit(Fs *f) +{ + f->alog = smalloc(sizeof(Netlog)); +} + +void +netlogopen(Fs *f) +{ + lock(f->alog); + if(waserror()){ + unlock(f->alog); + nexterror(); + } + if(f->alog->opens == 0){ + if(f->alog->buf == nil){ + f->alog->buf = malloc(Nlog); + if(f->alog->buf == nil) + error(Enomem); + } + f->alog->rptr = f->alog->buf; + f->alog->end = f->alog->buf + Nlog; + } + f->alog->opens++; + poperror(); + unlock(f->alog); +} + +void +netlogclose(Fs *f) +{ + lock(f->alog); + f->alog->opens--; + if(f->alog->opens == 0){ + free(f->alog->buf); + f->alog->buf = nil; + } + unlock(f->alog); +} + +static int +netlogready(void *a) +{ + Fs *f = a; + + return f->alog->len; +} + +long +netlogread(Fs *f, void *a, ulong, long n) +{ + int i, d; + char *p, *rptr; + + qlock(f->alog); + if(waserror()){ + qunlock(f->alog); + nexterror(); + } + + for(;;){ + lock(f->alog); + if(f->alog->len){ + if(n > f->alog->len) + n = f->alog->len; + d = 0; + rptr = f->alog->rptr; + f->alog->rptr += n; + if(f->alog->rptr >= f->alog->end){ + d = f->alog->rptr - f->alog->end; + f->alog->rptr = f->alog->buf + d; + } + f->alog->len -= n; + unlock(f->alog); + + i = n-d; + p = a; + memmove(p, rptr, i); + memmove(p+i, f->alog->buf, d); + break; + } + else + unlock(f->alog); + + sleep(f->alog, netlogready, f); + } + + poperror(); + qunlock(f->alog); + + return n; +} + +void +netlogctl(Fs *f, char* s, int n) +{ + int i, set; + Netlogflag *fp; + Cmdbuf *cb; + Cmdtab *ct; + + cb = parsecmd(s, n); + if(waserror()){ + free(cb); + nexterror(); + } + + if(cb->nf < 2) + error(Ebadnetctl); + + ct = lookupcmd(cb, routecmd, nelem(routecmd)); + switch(ct->index){ + case CMset: + set = 1; + break; + + case CMclear: + set = 0; + break; + + case CMonly: + parseip(f->alog->iponly, cb->f[1]); + if(ipcmp(f->alog->iponly, IPnoaddr) == 0) + f->alog->iponlyset = 0; + else + f->alog->iponlyset = 1; + poperror(); + free(cb); + return; + + default: + SET(set); + cmderror(cb, "unknown ip control message"); + } + + for(i = 1; i < cb->nf; i++){ + for(fp = flags; fp->name; fp++) + if(strcmp(fp->name, cb->f[i]) == 0) + break; + if(fp->name == nil) + continue; + if(set) + f->alog->logmask |= fp->mask; + else + f->alog->logmask &= ~fp->mask; + } + + poperror(); + free(cb); +} + +void +netlog(Fs *f, int mask, char *fmt, ...) +{ + char buf[256], *t, *fp; + int i, n; + va_list arg; + + if(!(f->alog->logmask & mask)) + return; + + if(f->alog->opens == 0) + return; + + va_start(arg, fmt); + n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf; + va_end(arg); + + lock(f->alog); + i = f->alog->len + n - Nlog; + if(i > 0){ + f->alog->len -= i; + f->alog->rptr += i; + if(f->alog->rptr >= f->alog->end) + f->alog->rptr = f->alog->buf + (f->alog->rptr - f->alog->end); + } + t = f->alog->rptr + f->alog->len; + fp = buf; + f->alog->len += n; + while(n-- > 0){ + if(t >= f->alog->end) + t = f->alog->buf + (t - f->alog->end); + *t++ = *fp++; + } + unlock(f->alog); + + wakeup(f->alog); +} diff -Nru /sys/src/9k/ip/nullmedium.c /sys/src/9k/ip/nullmedium.c --- /sys/src/9k/ip/nullmedium.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/nullmedium.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,39 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +static void +nullbind(Ipifc*, int, char**) +{ + error("cannot bind null device"); +} + +static void +nullunbind(Ipifc*) +{ +} + +static void +nullbwrite(Ipifc*, Block*, int, uchar*) +{ + error("nullbwrite"); +} + +Medium nullmedium = +{ +.name= "null", +.bind= nullbind, +.unbind= nullunbind, +.bwrite= nullbwrite, +}; + +void +nullmediumlink(void) +{ + addipmedium(&nullmedium); +} diff -Nru /sys/src/9k/ip/pktmedium.c /sys/src/9k/ip/pktmedium.c --- /sys/src/9k/ip/pktmedium.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/pktmedium.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,79 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + + +static void pktbind(Ipifc*, int, char**); +static void pktunbind(Ipifc*); +static void pktbwrite(Ipifc*, Block*, int, uchar*); +static void pktin(Fs*, Ipifc*, Block*); + +Medium pktmedium = +{ +.name= "pkt", +.hsize= 14, +.mintu= 40, +.maxtu= 4*1024, +.maclen= 6, +.bind= pktbind, +.unbind= pktunbind, +.bwrite= pktbwrite, +.pktin= pktin, +}; + +/* + * called to bind an IP ifc to an ethernet device + * called with ifc wlock'd + */ +static void +pktbind(Ipifc*, int argc, char **argv) +{ + USED(argc, argv); +} + +/* + * called with ifc wlock'd + */ +static void +pktunbind(Ipifc*) +{ +} + +/* + * called by ipoput with a single packet to write + */ +static void +pktbwrite(Ipifc *ifc, Block *bp, int, uchar*) +{ + /* enqueue onto the conversation's rq */ + bp = concatblock(bp); + if(ifc->conv->snoopers.ref > 0) + qpass(ifc->conv->sq, copyblock(bp, BLEN(bp))); + qpass(ifc->conv->rq, bp); +} + +/* + * called with ifc rlocked when someone write's to 'data' + */ +static void +pktin(Fs *f, Ipifc *ifc, Block *bp) +{ + if(ifc->lifc == nil) + freeb(bp); + else { + if(ifc->conv->snoopers.ref > 0) + qpass(ifc->conv->sq, copyblock(bp, BLEN(bp))); + ipiput4(f, ifc, bp); + } +} + +void +pktmediumlink(void) +{ + addipmedium(&pktmedium); +} diff -Nru /sys/src/9k/ip/ptclbsum.c /sys/src/9k/ip/ptclbsum.c --- /sys/src/9k/ip/ptclbsum.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/ptclbsum.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,72 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "ip.h" + +static short endian = 1; +static uchar* aendian = (uchar*)&endian; +#define LITTLE *aendian + +ushort +ptclbsum(uchar *addr, int len) +{ + ulong losum, hisum, mdsum, x; + ulong t1, t2; + + losum = 0; + hisum = 0; + mdsum = 0; + + x = 0; + if(PTR2UINT(addr) & 1) { + if(len) { + hisum += addr[0]; + len--; + addr++; + } + x = 1; + } + while(len >= 16) { + t1 = *(ushort*)(addr+0); + t2 = *(ushort*)(addr+2); mdsum += t1; + t1 = *(ushort*)(addr+4); mdsum += t2; + t2 = *(ushort*)(addr+6); mdsum += t1; + t1 = *(ushort*)(addr+8); mdsum += t2; + t2 = *(ushort*)(addr+10); mdsum += t1; + t1 = *(ushort*)(addr+12); mdsum += t2; + t2 = *(ushort*)(addr+14); mdsum += t1; + mdsum += t2; + len -= 16; + addr += 16; + } + while(len >= 2) { + mdsum += *(ushort*)addr; + len -= 2; + addr += 2; + } + if(x) { + if(len) + losum += addr[0]; + if(LITTLE) + losum += mdsum; + else + hisum += mdsum; + } else { + if(len) + hisum += addr[0]; + if(LITTLE) + hisum += mdsum; + else + losum += mdsum; + } + + losum += hisum >> 8; + losum += (hisum & 0xff) << 8; + while(hisum = losum>>16) + losum = hisum + (losum & 0xffff); + + return losum & 0xffff; +} diff -Nru /sys/src/9k/ip/tcp.c /sys/src/9k/ip/tcp.c --- /sys/src/9k/ip/tcp.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/tcp.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,3241 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +enum +{ + QMAX = 64*1024-1, + IP_TCPPROTO = 6, + + TCP4_IPLEN = 8, + TCP4_PHDRSIZE = 12, + TCP4_HDRSIZE = 20, + TCP4_TCBPHDRSZ = 40, + TCP4_PKT = TCP4_IPLEN+TCP4_PHDRSIZE, + + TCP6_IPLEN = 0, + TCP6_PHDRSIZE = 40, + TCP6_HDRSIZE = 20, + TCP6_TCBPHDRSZ = 60, + TCP6_PKT = TCP6_IPLEN+TCP6_PHDRSIZE, + + TcptimerOFF = 0, + TcptimerON = 1, + TcptimerDONE = 2, + MAX_TIME = (1<<20), /* Forever */ + TCP_ACK = 50, /* Timed ack sequence in ms */ + MAXBACKMS = 9*60*1000, /* longest backoff time (ms) before hangup */ + + URG = 0x20, /* Data marked urgent */ + ACK = 0x10, /* Acknowledge is valid */ + PSH = 0x08, /* Whole data pipe is pushed */ + RST = 0x04, /* Reset connection */ + SYN = 0x02, /* Pkt. is synchronise */ + FIN = 0x01, /* Start close down */ + + EOLOPT = 0, + NOOPOPT = 1, + MSSOPT = 2, + MSS_LENGTH = 4, /* Maximum segment size */ + WSOPT = 3, + WS_LENGTH = 3, /* Bits to scale window size by */ + MSL2 = 10, + MSPTICK = 50, /* Milliseconds per timer tick */ + DEF_MSS = 1460, /* Default maximum segment */ + DEF_MSS6 = 1280, /* Default maximum segment (min) for v6 */ + DEF_RTT = 500, /* Default round trip */ + DEF_KAT = 120000, /* Default time (ms) between keep alives */ + TCP_LISTEN = 0, /* Listen connection */ + TCP_CONNECT = 1, /* Outgoing connection */ + SYNACK_RXTIMER = 250, /* ms between SYNACK retransmits */ + + TCPREXMTTHRESH = 3, /* dupack threshhold for rxt */ + + FORCE = 1, + CLONE = 2, + RETRAN = 4, + ACTIVE = 8, + SYNACK = 16, + + LOGAGAIN = 3, + LOGDGAIN = 2, + + Closed = 0, /* Connection states */ + Listen, + Syn_sent, + Syn_received, + Established, + Finwait1, + Finwait2, + Close_wait, + Closing, + Last_ack, + Time_wait, + + Maxlimbo = 1000, /* maximum procs waiting for response to SYN ACK */ + NLHT = 256, /* hash table size, must be a power of 2 */ + LHTMASK = NLHT-1, + + HaveWS = 1<<8, +}; + +/* Must correspond to the enumeration above */ +char *tcpstates[] = +{ + "Closed", "Listen", "Syn_sent", "Syn_received", + "Established", "Finwait1", "Finwait2", "Close_wait", + "Closing", "Last_ack", "Time_wait" +}; + +typedef struct Tcptimer Tcptimer; +struct Tcptimer +{ + Tcptimer *next; + Tcptimer *prev; + Tcptimer *readynext; + int state; + int start; + int count; + void (*func)(void*); + void *arg; +}; + +/* + * v4 and v6 pseudo headers used for + * checksuming tcp + */ +typedef struct Tcp4hdr Tcp4hdr; +struct Tcp4hdr +{ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + uchar Unused; + uchar proto; + uchar tcplen[2]; + uchar tcpsrc[4]; + uchar tcpdst[4]; + uchar tcpsport[2]; + uchar tcpdport[2]; + uchar tcpseq[4]; + uchar tcpack[4]; + uchar tcpflag[2]; + uchar tcpwin[2]; + uchar tcpcksum[2]; + uchar tcpurg[2]; + /* Options segment */ + uchar tcpopt[1]; +}; + +typedef struct Tcp6hdr Tcp6hdr; +struct Tcp6hdr +{ + uchar vcf[4]; + uchar ploadlen[2]; + uchar proto; + uchar ttl; + uchar tcpsrc[IPaddrlen]; + uchar tcpdst[IPaddrlen]; + uchar tcpsport[2]; + uchar tcpdport[2]; + uchar tcpseq[4]; + uchar tcpack[4]; + uchar tcpflag[2]; + uchar tcpwin[2]; + uchar tcpcksum[2]; + uchar tcpurg[2]; + /* Options segment */ + uchar tcpopt[1]; +}; + +/* + * this represents the control info + * for a single packet. It is derived from + * a packet in ntohtcp{4,6}() and stuck into + * a packet in htontcp{4,6}(). + */ +typedef struct Tcp Tcp; +struct Tcp +{ + ushort source; + ushort dest; + ulong seq; + ulong ack; + uchar flags; + ushort ws; /* window scale option (if not zero) */ + ulong wnd; + ushort urg; + ushort mss; /* max segment size option (if not zero) */ + ushort len; /* size of data */ +}; + +/* + * this header is malloc'd to thread together fragments + * waiting to be coalesced + */ +typedef struct Reseq Reseq; +struct Reseq +{ + Reseq *next; + Tcp seg; + Block *bp; + ushort length; +}; + +/* + * the qlock in the Conv locks this structure + */ +typedef struct Tcpctl Tcpctl; +struct Tcpctl +{ + uchar state; /* Connection state */ + uchar type; /* Listening or active connection */ + uchar code; /* Icmp code */ + struct { + ulong una; /* Unacked data pointer */ + ulong nxt; /* Next sequence expected */ + ulong ptr; /* Data pointer */ + ulong wnd; /* Tcp send window */ + ulong urg; /* Urgent data pointer */ + ulong wl2; + int scale; /* how much to right shift window in xmitted packets */ + /* to implement tahoe and reno TCP */ + ulong dupacks; /* number of duplicate acks rcvd */ + int recovery; /* loss recovery flag */ + ulong rxt; /* right window marker for recovery */ + } snd; + struct { + ulong nxt; /* Receive pointer to next uchar slot */ + ulong wnd; /* Receive window incoming */ + ulong urg; /* Urgent pointer */ + int blocked; + int una; /* unacked data segs */ + int scale; /* how much to left shift window in rcved packets */ + } rcv; + ulong iss; /* Initial sequence number */ + int sawwsopt; /* true if we saw a wsopt on the incoming SYN */ + ulong cwind; /* Congestion window */ + int scale; /* desired snd.scale */ + ushort ssthresh; /* Slow start threshold */ + int resent; /* Bytes just resent */ + int irs; /* Initial received squence */ + ushort mss; /* Maximum segment size */ + int rerecv; /* Overlap of data rerecevived */ + ulong window; /* Receive window */ + uchar backoff; /* Exponential backoff counter */ + int backedoff; /* ms we've backed off for rexmits */ + uchar flags; /* State flags */ + Reseq *reseq; /* Resequencing queue */ + Tcptimer timer; /* Activity timer */ + Tcptimer acktimer; /* Acknowledge timer */ + Tcptimer rtt_timer; /* Round trip timer */ + Tcptimer katimer; /* keep alive timer */ + ulong rttseq; /* Round trip sequence */ + int srtt; /* Shortened round trip */ + int mdev; /* Mean deviation of round trip */ + int kacounter; /* count down for keep alive */ + uint sndsyntime; /* time syn sent */ + ulong time; /* time Finwait2 or Syn_received was sent */ + int nochecksum; /* non-zero means don't send checksums */ + int flgcnt; /* number of flags in the sequence (FIN,SEQ) */ + + union { + Tcp4hdr tcp4hdr; + Tcp6hdr tcp6hdr; + } protohdr; /* prototype header */ +}; + +/* + * New calls are put in limbo rather than having a conversation structure + * allocated. Thus, a SYN attack results in lots of limbo'd calls but not + * any real Conv structures mucking things up. Calls in limbo rexmit their + * SYN ACK every SYNACK_RXTIMER ms up to 4 times, i.e., they disappear after 1 second. + * + * In particular they aren't on a listener's queue so that they don't figure + * in the input queue limit. + * + * If 1/2 of a T3 was attacking SYN packets, we'ld have a permanent queue + * of 70000 limbo'd calls. Not great for a linear list but doable. Therefore + * there is no hashing of this list. + */ +typedef struct Limbo Limbo; +struct Limbo +{ + Limbo *next; + + uchar laddr[IPaddrlen]; + uchar raddr[IPaddrlen]; + ushort lport; + ushort rport; + ulong irs; /* initial received sequence */ + ulong iss; /* initial sent sequence */ + ushort mss; /* mss from the other end */ + ushort rcvscale; /* how much to scale rcvd windows */ + ushort sndscale; /* how much to scale sent windows */ + ulong lastsend; /* last time we sent a synack */ + uchar version; /* v4 or v6 */ + uchar rexmits; /* number of retransmissions */ +}; + +int tcp_irtt = DEF_RTT; /* Initial guess at round trip time */ +ushort tcp_mss = DEF_MSS; /* Maximum segment size to be sent */ + +enum { + /* MIB stats */ + MaxConn, + Mss, + ActiveOpens, + PassiveOpens, + EstabResets, + CurrEstab, + InSegs, + OutSegs, + RetransSegs, + RetransTimeouts, + InErrs, + OutRsts, + + /* non-MIB stats */ + CsumErrs, + HlenErrs, + LenErrs, + OutOfOrder, + + Nstats +}; + +static char *statnames[] = +{ +[MaxConn] "MaxConn", +[Mss] "MaxSegment", +[ActiveOpens] "ActiveOpens", +[PassiveOpens] "PassiveOpens", +[EstabResets] "EstabResets", +[CurrEstab] "CurrEstab", +[InSegs] "InSegs", +[OutSegs] "OutSegs", +[RetransSegs] "RetransSegs", +[RetransTimeouts] "RetransTimeouts", +[InErrs] "InErrs", +[OutRsts] "OutRsts", +[CsumErrs] "CsumErrs", +[HlenErrs] "HlenErrs", +[LenErrs] "LenErrs", +[OutOfOrder] "OutOfOrder", +}; + +typedef struct Tcppriv Tcppriv; +struct Tcppriv +{ + /* List of active timers */ + QLock tl; + Tcptimer *timers; + + /* hash table for matching conversations */ + Ipht ht; + + /* calls in limbo waiting for an ACK to our SYN ACK */ + int nlimbo; + Limbo *lht[NLHT]; + + /* for keeping track of tcpackproc */ + QLock apl; + int ackprocstarted; + + uvlong stats[Nstats]; +}; + +/* + * Setting tcpporthogdefense to non-zero enables Dong Lin's + * solution to hijacked systems staking out port's as a form + * of DoS attack. + * + * To avoid stateless Conv hogs, we pick a sequence number at random. If + * that number gets acked by the other end, we shut down the connection. + * Look for tcpporthogdefense in the code. + */ +int tcpporthogdefense = 0; + +static int addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort); +static void getreseq(Tcpctl*, Tcp*, Block**, ushort*); +static void localclose(Conv*, char*); +static void procsyn(Conv*, Tcp*); +static void tcpacktimer(void*); +static void tcpiput(Proto*, Ipifc*, Block*); +static void tcpkeepalive(void*); +static void tcpoutput(Conv*); +static void tcprcvwin(Conv*); +static void tcprxmit(Conv*); +static void tcpsetkacounter(Tcpctl*); +static void tcpsetscale(Conv*, Tcpctl*, ushort, ushort); +static void tcpsettimer(Tcpctl*); +static void tcpsndsyn(Conv*, Tcpctl*); +static void tcpstart(Conv*, int); +static void tcpsynackrtt(Conv*); +static void tcptimeout(void*); +static int tcptrim(Tcpctl*, Tcp*, Block**, ushort*); + +static void limborexmit(Proto*); +static void limbo(Conv*, uchar*, uchar*, Tcp*, int); + +static void +tcpsetstate(Conv *s, uchar newstate) +{ + Tcpctl *tcb; + uchar oldstate; + Tcppriv *tpriv; + + tpriv = s->p->priv; + + tcb = (Tcpctl*)s->ptcl; + + oldstate = tcb->state; + if(oldstate == newstate) + return; + + if(oldstate == Established) + tpriv->stats[CurrEstab]--; + if(newstate == Established) + tpriv->stats[CurrEstab]++; + + /** + print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport, + tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab ); + **/ + + switch(newstate) { + case Closed: + qclose(s->rq); + qclose(s->wq); + qclose(s->eq); + break; + + case Close_wait: /* Remote closes */ + qhangup(s->rq, nil); + break; + } + + tcb->state = newstate; + + if(oldstate == Syn_sent && newstate != Closed) + Fsconnected(s, nil); +} + +static char* +tcpconnect(Conv *c, char **argv, int argc) +{ + char *e; + Tcpctl *tcb; + + tcb = (Tcpctl*)(c->ptcl); + if(tcb->state != Closed) + return Econinuse; + + e = Fsstdconnect(c, argv, argc); + if(e != nil) + return e; + tcpstart(c, TCP_CONNECT); + + return nil; +} + +static int +tcpstate(Conv *c, char *state, int n) +{ + Tcpctl *s; + + s = (Tcpctl*)(c->ptcl); + + return snprint(state, n, + "%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n", + tcpstates[s->state], + c->rq ? qlen(c->rq) : 0, + c->wq ? qlen(c->wq) : 0, + s->srtt, s->mdev, + s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale, + s->timer.start, s->timer.count, s->rerecv, + s->katimer.start, s->katimer.count); +} + +static int +tcpinuse(Conv *c) +{ + Tcpctl *s; + + s = (Tcpctl*)(c->ptcl); + return s->state != Closed; +} + +static char* +tcpannounce(Conv *c, char **argv, int argc) +{ + char *e; + Tcpctl *tcb; + + tcb = (Tcpctl*)(c->ptcl); + if(tcb->state != Closed) + return Econinuse; + + e = Fsstdannounce(c, argv, argc); + if(e != nil) + return e; + tcpstart(c, TCP_LISTEN); + Fsconnected(c, nil); + + return nil; +} + +/* + * tcpclose is always called with the q locked + */ +static void +tcpclose(Conv *c) +{ + Tcpctl *tcb; + + tcb = (Tcpctl*)c->ptcl; + + qhangup(c->rq, nil); + qhangup(c->wq, nil); + qhangup(c->eq, nil); + qflush(c->rq); + + switch(tcb->state) { + case Listen: + /* + * reset any incoming calls to this listener + */ + Fsconnected(c, "Hangup"); + + localclose(c, nil); + break; + case Closed: + case Syn_sent: + localclose(c, nil); + break; + case Syn_received: + case Established: + tcb->flgcnt++; + tcb->snd.nxt++; + tcpsetstate(c, Finwait1); + tcpoutput(c); + break; + case Close_wait: + tcb->flgcnt++; + tcb->snd.nxt++; + tcpsetstate(c, Last_ack); + tcpoutput(c); + break; + } +} + +static void +tcpkick(void *x) +{ + Conv *s = x; + Tcpctl *tcb; + + tcb = (Tcpctl*)s->ptcl; + + if(waserror()){ + qunlock(s); + nexterror(); + } + qlock(s); + + switch(tcb->state) { + case Syn_sent: + case Syn_received: + case Established: + case Close_wait: + /* + * Push data + */ + tcprcvwin(s); + tcpoutput(s); + break; + default: + localclose(s, "Hangup"); + break; + } + + qunlock(s); + poperror(); +} + +static void +tcprcvwin(Conv *s) /* Call with tcb locked */ +{ + int w; + Tcpctl *tcb; + + tcb = (Tcpctl*)s->ptcl; + w = tcb->window - qlen(s->rq); + if(w < 0) + w = 0; + if(w == 0) + netlog(s->p->f, Logtcp, "tcprcvwim: window %lud qlen %d\n", tcb->window, qlen(s->rq)); + tcb->rcv.wnd = w; + if(w == 0) + tcb->rcv.blocked = 1; +} + +static void +tcpacktimer(void *v) +{ + Tcpctl *tcb; + Conv *s; + + s = v; + tcb = (Tcpctl*)s->ptcl; + + if(waserror()){ + qunlock(s); + nexterror(); + } + qlock(s); + if(tcb->state != Closed){ + tcb->flags |= FORCE; + tcprcvwin(s); + tcpoutput(s); + } + qunlock(s); + poperror(); +} + +static void +tcpcreate(Conv *c) +{ + c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c); + c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c); +} + +static void +timerstate(Tcppriv *priv, Tcptimer *t, int newstate) +{ + if(newstate != TcptimerON){ + if(t->state == TcptimerON){ + /* unchain */ + if(priv->timers == t){ + priv->timers = t->next; + if(t->prev != nil) + panic("timerstate1"); + } + if(t->next) + t->next->prev = t->prev; + if(t->prev) + t->prev->next = t->next; + t->next = t->prev = nil; + } + } else { + if(t->state != TcptimerON){ + /* chain */ + if(t->prev != nil || t->next != nil) + panic("timerstate2"); + t->prev = nil; + t->next = priv->timers; + if(t->next) + t->next->prev = t; + priv->timers = t; + } + } + t->state = newstate; +} + +static void +tcpackproc(void *a) +{ + Tcptimer *t, *tp, *timeo; + Proto *tcp; + Tcppriv *priv; + int loop; + + tcp = a; + priv = tcp->priv; + + for(;;) { + tsleep(&up->sleep, return0, 0, MSPTICK); + + qlock(&priv->tl); + timeo = nil; + loop = 0; + for(t = priv->timers; t != nil; t = tp) { + if(loop++ > 10000) + panic("tcpackproc1"); + tp = t->next; + if(t->state == TcptimerON) { + t->count--; + if(t->count == 0) { + timerstate(priv, t, TcptimerDONE); + t->readynext = timeo; + timeo = t; + } + } + } + qunlock(&priv->tl); + + loop = 0; + for(t = timeo; t != nil; t = t->readynext) { + if(loop++ > 10000) + panic("tcpackproc2"); + if(t->state == TcptimerDONE && t->func != nil && !waserror()){ + (*t->func)(t->arg); + poperror(); + } + } + + limborexmit(tcp); + } +} + +static void +tcpgo(Tcppriv *priv, Tcptimer *t) +{ + if(t == nil || t->start == 0) + return; + + qlock(&priv->tl); + t->count = t->start; + timerstate(priv, t, TcptimerON); + qunlock(&priv->tl); +} + +static void +tcphalt(Tcppriv *priv, Tcptimer *t) +{ + if(t == nil) + return; + + qlock(&priv->tl); + timerstate(priv, t, TcptimerOFF); + qunlock(&priv->tl); +} + +static int +backoff(int n) +{ + return 1 << n; +} + +static void +localclose(Conv *s, char *reason) /* called with tcb locked */ +{ + Tcpctl *tcb; + Reseq *rp,*rp1; + Tcppriv *tpriv; + + tpriv = s->p->priv; + tcb = (Tcpctl*)s->ptcl; + + iphtrem(&tpriv->ht, s); + + tcphalt(tpriv, &tcb->timer); + tcphalt(tpriv, &tcb->rtt_timer); + tcphalt(tpriv, &tcb->acktimer); + tcphalt(tpriv, &tcb->katimer); + + /* Flush reassembly queue; nothing more can arrive */ + for(rp = tcb->reseq; rp != nil; rp = rp1) { + rp1 = rp->next; + freeblist(rp->bp); + free(rp); + } + tcb->reseq = nil; + + if(tcb->state == Syn_sent) + Fsconnected(s, reason); + if(s->state == Announced) + wakeup(&s->listenr); + + qhangup(s->rq, reason); + qhangup(s->wq, reason); + + tcpsetstate(s, Closed); +} + +/* mtu (- TCP + IP hdr len) of 1st hop */ +static int +tcpmtu(Proto *tcp, uchar *addr, int version, int *scale) +{ + Ipifc *ifc; + int mtu; + + ifc = findipifc(tcp->f, addr, 0); + switch(version){ + default: + case V4: + mtu = DEF_MSS; + if(ifc != nil) + mtu = ifc->maxtu - ifc->medium->hsize - (TCP4_PKT + TCP4_HDRSIZE); + break; + case V6: + mtu = DEF_MSS6; + if(ifc != nil) + mtu = ifc->maxtu - ifc->medium->hsize - (TCP6_PKT + TCP6_HDRSIZE); + break; + } + if(ifc != nil){ + if(ifc->mbps > 1000) + *scale = HaveWS | 4; + else if(ifc->mbps > 100) + *scale = HaveWS | 3; + else if(ifc->mbps > 10) + *scale = HaveWS | 1; + else + *scale = HaveWS | 0; + } else + *scale = HaveWS | 0; + + return mtu; +} + +static void +inittcpctl(Conv *s, int mode) +{ + Tcpctl *tcb; + Tcp4hdr* h4; + Tcp6hdr* h6; + Tcppriv *tpriv; + int mss; + + tcb = (Tcpctl*)s->ptcl; + + memset(tcb, 0, sizeof(Tcpctl)); + + tcb->ssthresh = 65535; + tcb->srtt = tcp_irtt<mdev = 0; + + /* setup timers */ + tcb->timer.start = tcp_irtt / MSPTICK; + tcb->timer.func = tcptimeout; + tcb->timer.arg = s; + tcb->rtt_timer.start = MAX_TIME; + tcb->acktimer.start = TCP_ACK / MSPTICK; + tcb->acktimer.func = tcpacktimer; + tcb->acktimer.arg = s; + tcb->katimer.start = DEF_KAT / MSPTICK; + tcb->katimer.func = tcpkeepalive; + tcb->katimer.arg = s; + + mss = DEF_MSS; + + /* create a prototype(pseudo) header */ + if(mode != TCP_LISTEN){ + if(ipcmp(s->laddr, IPnoaddr) == 0) + findlocalip(s->p->f, s->laddr, s->raddr); + + switch(s->ipversion){ + case V4: + h4 = &tcb->protohdr.tcp4hdr; + memset(h4, 0, sizeof(*h4)); + h4->proto = IP_TCPPROTO; + hnputs(h4->tcpsport, s->lport); + hnputs(h4->tcpdport, s->rport); + v6tov4(h4->tcpsrc, s->laddr); + v6tov4(h4->tcpdst, s->raddr); + break; + case V6: + h6 = &tcb->protohdr.tcp6hdr; + memset(h6, 0, sizeof(*h6)); + h6->proto = IP_TCPPROTO; + hnputs(h6->tcpsport, s->lport); + hnputs(h6->tcpdport, s->rport); + ipmove(h6->tcpsrc, s->laddr); + ipmove(h6->tcpdst, s->raddr); + mss = DEF_MSS6; + break; + default: + panic("inittcpctl: version %d", s->ipversion); + } + } + + tcb->mss = tcb->cwind = mss; + tpriv = s->p->priv; + tpriv->stats[Mss] = tcb->mss; + + /* default is no window scaling */ + tcb->window = QMAX; + tcb->rcv.wnd = QMAX; + tcb->rcv.scale = 0; + tcb->snd.scale = 0; + qsetlimit(s->rq, QMAX); +} + +/* + * called with s qlocked + */ +static void +tcpstart(Conv *s, int mode) +{ + Tcpctl *tcb; + Tcppriv *tpriv; + char kpname[KNAMELEN]; + + tpriv = s->p->priv; + + if(tpriv->ackprocstarted == 0){ + qlock(&tpriv->apl); + if(tpriv->ackprocstarted == 0){ + sprint(kpname, "#I%dtcpack", s->p->f->dev); + kproc(kpname, tcpackproc, s->p); + tpriv->ackprocstarted = 1; + } + qunlock(&tpriv->apl); + } + + tcb = (Tcpctl*)s->ptcl; + + inittcpctl(s, mode); + + iphtadd(&tpriv->ht, s); + switch(mode) { + case TCP_LISTEN: + tpriv->stats[PassiveOpens]++; + tcb->flags |= CLONE; + tcpsetstate(s, Listen); + break; + + case TCP_CONNECT: + tpriv->stats[ActiveOpens]++; + tcb->flags |= ACTIVE; + tcpsndsyn(s, tcb); + tcpsetstate(s, Syn_sent); + tcpoutput(s); + break; + } +} + +static char* +tcpflag(ushort flag) +{ + static char buf[128]; + + sprint(buf, "%d", flag>>10); /* Head len */ + if(flag & URG) + strcat(buf, " URG"); + if(flag & ACK) + strcat(buf, " ACK"); + if(flag & PSH) + strcat(buf, " PSH"); + if(flag & RST) + strcat(buf, " RST"); + if(flag & SYN) + strcat(buf, " SYN"); + if(flag & FIN) + strcat(buf, " FIN"); + + return buf; +} + +static Block* +htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb) +{ + int dlen; + Tcp6hdr *h; + ushort csum; + ushort hdrlen, optpad = 0; + uchar *opt; + + hdrlen = TCP6_HDRSIZE; + if(tcph->flags & SYN){ + if(tcph->mss) + hdrlen += MSS_LENGTH; + if(tcph->ws) + hdrlen += WS_LENGTH; + optpad = hdrlen & 3; + if(optpad) + optpad = 4 - optpad; + hdrlen += optpad; + } + + if(data) { + dlen = blocklen(data); + data = padblock(data, hdrlen + TCP6_PKT); + if(data == nil) + return nil; + } + else { + dlen = 0; + data = allocb(hdrlen + TCP6_PKT + 64); /* the 64 pad is to meet mintu's */ + if(data == nil) + return nil; + data->wp += hdrlen + TCP6_PKT; + } + + /* copy in pseudo ip header plus port numbers */ + h = (Tcp6hdr *)(data->rp); + memmove(h, ph, TCP6_TCBPHDRSZ); + + /* compose pseudo tcp header, do cksum calculation */ + hnputl(h->vcf, hdrlen + dlen); + h->ploadlen[0] = h->ploadlen[1] = h->proto = 0; + h->ttl = ph->proto; + + /* copy in variable bits */ + hnputl(h->tcpseq, tcph->seq); + hnputl(h->tcpack, tcph->ack); + hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags); + hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0)); + hnputs(h->tcpurg, tcph->urg); + + if(tcph->flags & SYN){ + opt = h->tcpopt; + if(tcph->mss != 0){ + *opt++ = MSSOPT; + *opt++ = MSS_LENGTH; + hnputs(opt, tcph->mss); +// print("our outgoing mss %d\n", tcph->mss); + opt += 2; + } + if(tcph->ws != 0){ + *opt++ = WSOPT; + *opt++ = WS_LENGTH; + *opt++ = tcph->ws; + } + while(optpad-- > 0) + *opt++ = NOOPOPT; + } + + if(tcb != nil && tcb->nochecksum){ + h->tcpcksum[0] = h->tcpcksum[1] = 0; + } else { + csum = ptclcsum(data, TCP6_IPLEN, hdrlen+dlen+TCP6_PHDRSIZE); + hnputs(h->tcpcksum, csum); + } + + /* move from pseudo header back to normal ip header */ + memset(h->vcf, 0, 4); + h->vcf[0] = IP_VER6; + hnputs(h->ploadlen, hdrlen+dlen); + h->proto = ph->proto; + + return data; +} + +static Block* +htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb) +{ + int dlen; + Tcp4hdr *h; + ushort csum; + ushort hdrlen, optpad = 0; + uchar *opt; + + hdrlen = TCP4_HDRSIZE; + if(tcph->flags & SYN){ + if(tcph->mss) + hdrlen += MSS_LENGTH; + if(tcph->ws) + hdrlen += WS_LENGTH; + optpad = hdrlen & 3; + if(optpad) + optpad = 4 - optpad; + hdrlen += optpad; + } + + if(data) { + dlen = blocklen(data); + data = padblock(data, hdrlen + TCP4_PKT); + if(data == nil) + return nil; + } + else { + dlen = 0; + data = allocb(hdrlen + TCP4_PKT + 64); /* the 64 pad is to meet mintu's */ + if(data == nil) + return nil; + data->wp += hdrlen + TCP4_PKT; + } + + /* copy in pseudo ip header plus port numbers */ + h = (Tcp4hdr *)(data->rp); + memmove(h, ph, TCP4_TCBPHDRSZ); + + /* copy in variable bits */ + hnputs(h->tcplen, hdrlen + dlen); + hnputl(h->tcpseq, tcph->seq); + hnputl(h->tcpack, tcph->ack); + hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags); + hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0)); + hnputs(h->tcpurg, tcph->urg); + + if(tcph->flags & SYN){ + opt = h->tcpopt; + if(tcph->mss != 0){ + *opt++ = MSSOPT; + *opt++ = MSS_LENGTH; + hnputs(opt, tcph->mss); + opt += 2; + } + if(tcph->ws != 0){ + *opt++ = WSOPT; + *opt++ = WS_LENGTH; + *opt++ = tcph->ws; + } + while(optpad-- > 0) + *opt++ = NOOPOPT; + } + + if(tcb != nil && tcb->nochecksum){ + h->tcpcksum[0] = h->tcpcksum[1] = 0; + } else { + csum = ptclcsum(data, TCP4_IPLEN, hdrlen+dlen+TCP4_PHDRSIZE); + hnputs(h->tcpcksum, csum); + } + + return data; +} + +static int +ntohtcp6(Tcp *tcph, Block **bpp) +{ + Tcp6hdr *h; + uchar *optr; + ushort hdrlen; + ushort optlen; + int n; + + *bpp = pullupblock(*bpp, TCP6_PKT+TCP6_HDRSIZE); + if(*bpp == nil) + return -1; + + h = (Tcp6hdr *)((*bpp)->rp); + tcph->source = nhgets(h->tcpsport); + tcph->dest = nhgets(h->tcpdport); + tcph->seq = nhgetl(h->tcpseq); + tcph->ack = nhgetl(h->tcpack); + hdrlen = (h->tcpflag[0]>>2) & ~3; + if(hdrlen < TCP6_HDRSIZE) { + freeblist(*bpp); + return -1; + } + + tcph->flags = h->tcpflag[1]; + tcph->wnd = nhgets(h->tcpwin); + tcph->urg = nhgets(h->tcpurg); + tcph->mss = 0; + tcph->ws = 0; + tcph->len = nhgets(h->ploadlen) - hdrlen; + + *bpp = pullupblock(*bpp, hdrlen+TCP6_PKT); + if(*bpp == nil) + return -1; + + optr = h->tcpopt; + n = hdrlen - TCP6_HDRSIZE; + while(n > 0 && *optr != EOLOPT) { + if(*optr == NOOPOPT) { + n--; + optr++; + continue; + } + optlen = optr[1]; + if(optlen < 2 || optlen > n) + break; + switch(*optr) { + case MSSOPT: + if(optlen == MSS_LENGTH) + tcph->mss = nhgets(optr+2); + break; + case WSOPT: + if(optlen == WS_LENGTH && *(optr+2) <= 14) + tcph->ws = HaveWS | *(optr+2); + break; + } + n -= optlen; + optr += optlen; + } + return hdrlen; +} + +static int +ntohtcp4(Tcp *tcph, Block **bpp) +{ + Tcp4hdr *h; + uchar *optr; + ushort hdrlen; + ushort optlen; + int n; + + *bpp = pullupblock(*bpp, TCP4_PKT+TCP4_HDRSIZE); + if(*bpp == nil) + return -1; + + h = (Tcp4hdr *)((*bpp)->rp); + tcph->source = nhgets(h->tcpsport); + tcph->dest = nhgets(h->tcpdport); + tcph->seq = nhgetl(h->tcpseq); + tcph->ack = nhgetl(h->tcpack); + + hdrlen = (h->tcpflag[0]>>2) & ~3; + if(hdrlen < TCP4_HDRSIZE) { + freeblist(*bpp); + return -1; + } + + tcph->flags = h->tcpflag[1]; + tcph->wnd = nhgets(h->tcpwin); + tcph->urg = nhgets(h->tcpurg); + tcph->mss = 0; + tcph->ws = 0; + tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT); + + *bpp = pullupblock(*bpp, hdrlen+TCP4_PKT); + if(*bpp == nil) + return -1; + + optr = h->tcpopt; + n = hdrlen - TCP4_HDRSIZE; + while(n > 0 && *optr != EOLOPT) { + if(*optr == NOOPOPT) { + n--; + optr++; + continue; + } + optlen = optr[1]; + if(optlen < 2 || optlen > n) + break; + switch(*optr) { + case MSSOPT: + if(optlen == MSS_LENGTH) { + tcph->mss = nhgets(optr+2); +// print("new incoming mss %d\n", tcph->mss); + } + break; + case WSOPT: + if(optlen == WS_LENGTH && *(optr+2) <= 14) + tcph->ws = HaveWS | *(optr+2); + break; + } + n -= optlen; + optr += optlen; + } + return hdrlen; +} + +/* + * For outgoing calls, generate an initial sequence + * number and put a SYN on the send queue + */ +static void +tcpsndsyn(Conv *s, Tcpctl *tcb) +{ + Tcppriv *tpriv; + + tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16); + tcb->rttseq = tcb->iss; + tcb->snd.wl2 = tcb->iss; + tcb->snd.una = tcb->iss; + tcb->snd.ptr = tcb->rttseq; + tcb->snd.nxt = tcb->rttseq; + tcb->flgcnt++; + tcb->flags |= FORCE; + tcb->sndsyntime = NOW; + + /* set desired mss and scale */ + tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale); + tpriv = s->p->priv; + tpriv->stats[Mss] = tcb->mss; +} + +static void +sndrst(Proto *tcp, uchar *source, uchar *dest, ushort length, Tcp *seg, uchar version, char *reason) +{ + Block *hbp; + uchar rflags; + Tcppriv *tpriv; + Tcp4hdr ph4; + Tcp6hdr ph6; + + netlog(tcp->f, Logtcp, "sndrst: %s\n", reason); + + tpriv = tcp->priv; + + if(seg->flags & RST) + return; + + /* make pseudo header */ + switch(version) { + case V4: + memset(&ph4, 0, sizeof(ph4)); + ph4.vihl = IP_VER4; + v6tov4(ph4.tcpsrc, dest); + v6tov4(ph4.tcpdst, source); + ph4.proto = IP_TCPPROTO; + hnputs(ph4.tcplen, TCP4_HDRSIZE); + hnputs(ph4.tcpsport, seg->dest); + hnputs(ph4.tcpdport, seg->source); + break; + case V6: + memset(&ph6, 0, sizeof(ph6)); + ph6.vcf[0] = IP_VER6; + ipmove(ph6.tcpsrc, dest); + ipmove(ph6.tcpdst, source); + ph6.proto = IP_TCPPROTO; + hnputs(ph6.ploadlen, TCP6_HDRSIZE); + hnputs(ph6.tcpsport, seg->dest); + hnputs(ph6.tcpdport, seg->source); + break; + default: + panic("sndrst: version %d", version); + } + + tpriv->stats[OutRsts]++; + rflags = RST; + + /* convince the other end that this reset is in band */ + if(seg->flags & ACK) { + seg->seq = seg->ack; + seg->ack = 0; + } + else { + rflags |= ACK; + seg->ack = seg->seq; + seg->seq = 0; + if(seg->flags & SYN) + seg->ack++; + seg->ack += length; + if(seg->flags & FIN) + seg->ack++; + } + seg->flags = rflags; + seg->wnd = 0; + seg->urg = 0; + seg->mss = 0; + seg->ws = 0; + switch(version) { + case V4: + hbp = htontcp4(seg, nil, &ph4, nil); + if(hbp == nil) + return; + ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil); + break; + case V6: + hbp = htontcp6(seg, nil, &ph6, nil); + if(hbp == nil) + return; + ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil); + break; + default: + panic("sndrst2: version %d", version); + } +} + +/* + * send a reset to the remote side and close the conversation + * called with s qlocked + */ +static char* +tcphangup(Conv *s) +{ + Tcp seg; + Tcpctl *tcb; + Block *hbp; + + tcb = (Tcpctl*)s->ptcl; + if(waserror()) + return up->errstr; + if(ipcmp(s->raddr, IPnoaddr) != 0) { + if(!waserror()){ + memset(&seg, 0, sizeof seg); + seg.flags = RST | ACK; + seg.ack = tcb->rcv.nxt; + tcb->rcv.una = 0; + seg.seq = tcb->snd.ptr; + seg.wnd = 0; + seg.urg = 0; + seg.mss = 0; + seg.ws = 0; + switch(s->ipversion) { + case V4: + tcb->protohdr.tcp4hdr.vihl = IP_VER4; + hbp = htontcp4(&seg, nil, &tcb->protohdr.tcp4hdr, tcb); + ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s); + break; + case V6: + tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6; + hbp = htontcp6(&seg, nil, &tcb->protohdr.tcp6hdr, tcb); + ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s); + break; + default: + panic("tcphangup: version %d", s->ipversion); + } + poperror(); + } + } + localclose(s, nil); + poperror(); + return nil; +} + +/* + * (re)send a SYN ACK + */ +static int +sndsynack(Proto *tcp, Limbo *lp) +{ + Block *hbp; + Tcp4hdr ph4; + Tcp6hdr ph6; + Tcp seg; + int scale; + + /* make pseudo header */ + switch(lp->version) { + case V4: + memset(&ph4, 0, sizeof(ph4)); + ph4.vihl = IP_VER4; + v6tov4(ph4.tcpsrc, lp->laddr); + v6tov4(ph4.tcpdst, lp->raddr); + ph4.proto = IP_TCPPROTO; + hnputs(ph4.tcplen, TCP4_HDRSIZE); + hnputs(ph4.tcpsport, lp->lport); + hnputs(ph4.tcpdport, lp->rport); + break; + case V6: + memset(&ph6, 0, sizeof(ph6)); + ph6.vcf[0] = IP_VER6; + ipmove(ph6.tcpsrc, lp->laddr); + ipmove(ph6.tcpdst, lp->raddr); + ph6.proto = IP_TCPPROTO; + hnputs(ph6.ploadlen, TCP6_HDRSIZE); + hnputs(ph6.tcpsport, lp->lport); + hnputs(ph6.tcpdport, lp->rport); + break; + default: + panic("sndrst: version %d", lp->version); + } + + memset(&seg, 0, sizeof seg); + seg.seq = lp->iss; + seg.ack = lp->irs+1; + seg.flags = SYN|ACK; + seg.urg = 0; + seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale); +// if (seg.mss > lp->mss && lp->mss >= 512) +// seg.mss = lp->mss; + seg.wnd = QMAX; + + /* if the other side set scale, we should too */ + if(lp->rcvscale){ + seg.ws = scale; + lp->sndscale = scale; + } else { + seg.ws = 0; + lp->sndscale = 0; + } + + switch(lp->version) { + case V4: + hbp = htontcp4(&seg, nil, &ph4, nil); + if(hbp == nil) + return -1; + ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil); + break; + case V6: + hbp = htontcp6(&seg, nil, &ph6, nil); + if(hbp == nil) + return -1; + ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil); + break; + default: + panic("sndsnack: version %d", lp->version); + } + lp->lastsend = NOW; + return 0; +} + +#define hashipa(a, p) ( ( (a)[IPaddrlen-2] + (a)[IPaddrlen-1] + p )&LHTMASK ) + +/* + * put a call into limbo and respond with a SYN ACK + * + * called with proto locked + */ +static void +limbo(Conv *s, uchar *source, uchar *dest, Tcp *seg, int version) +{ + Limbo *lp, **l; + Tcppriv *tpriv; + int h; + + tpriv = s->p->priv; + h = hashipa(source, seg->source); + + for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){ + lp = *l; + if(lp->lport != seg->dest || lp->rport != seg->source || lp->version != version) + continue; + if(ipcmp(lp->raddr, source) != 0) + continue; + if(ipcmp(lp->laddr, dest) != 0) + continue; + + /* each new SYN restarts the retransmits */ + lp->irs = seg->seq; + break; + } + lp = *l; + if(lp == nil){ + if(tpriv->nlimbo >= Maxlimbo && tpriv->lht[h]){ + lp = tpriv->lht[h]; + tpriv->lht[h] = lp->next; + lp->next = nil; + } else { + lp = malloc(sizeof(*lp)); + if(lp == nil) + return; + tpriv->nlimbo++; + } + *l = lp; + lp->version = version; + ipmove(lp->laddr, dest); + ipmove(lp->raddr, source); + lp->lport = seg->dest; + lp->rport = seg->source; + lp->mss = seg->mss; + lp->rcvscale = seg->ws; + lp->irs = seg->seq; + lp->iss = (nrand(1<<16)<<16)|nrand(1<<16); + } + + if(sndsynack(s->p, lp) < 0){ + *l = lp->next; + tpriv->nlimbo--; + free(lp); + } +} + +/* + * resend SYN ACK's once every SYNACK_RXTIMER ms. + */ +static void +limborexmit(Proto *tcp) +{ + Tcppriv *tpriv; + Limbo **l, *lp; + int h; + int seen; + ulong now; + + tpriv = tcp->priv; + + if(!canqlock(tcp)) + return; + seen = 0; + now = NOW; + for(h = 0; h < NLHT && seen < tpriv->nlimbo; h++){ + for(l = &tpriv->lht[h]; *l != nil && seen < tpriv->nlimbo; ){ + lp = *l; + seen++; + if(now - lp->lastsend < (lp->rexmits+1)*SYNACK_RXTIMER) + continue; + + /* time it out after 1 second */ + if(++(lp->rexmits) > 5){ + tpriv->nlimbo--; + *l = lp->next; + free(lp); + continue; + } + + /* if we're being attacked, don't bother resending SYN ACK's */ + if(tpriv->nlimbo > 100) + continue; + + if(sndsynack(tcp, lp) < 0){ + tpriv->nlimbo--; + *l = lp->next; + free(lp); + continue; + } + + l = &lp->next; + } + } + qunlock(tcp); +} + +/* + * lookup call in limbo. if found, throw it out. + * + * called with proto locked + */ +static void +limborst(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version) +{ + Limbo *lp, **l; + int h; + Tcppriv *tpriv; + + tpriv = s->p->priv; + + /* find a call in limbo */ + h = hashipa(src, segp->source); + for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){ + lp = *l; + if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version) + continue; + if(ipcmp(lp->laddr, dst) != 0) + continue; + if(ipcmp(lp->raddr, src) != 0) + continue; + + /* RST can only follow the SYN */ + if(segp->seq == lp->irs+1){ + tpriv->nlimbo--; + *l = lp->next; + free(lp); + } + break; + } +} + +/* + * come here when we finally get an ACK to our SYN-ACK. + * lookup call in limbo. if found, create a new conversation + * + * called with proto locked + */ +static Conv* +tcpincoming(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version) +{ + Conv *new; + Tcpctl *tcb; + Tcppriv *tpriv; + Tcp4hdr *h4; + Tcp6hdr *h6; + Limbo *lp, **l; + int h; + + /* unless it's just an ack, it can't be someone coming out of limbo */ + if((segp->flags & SYN) || (segp->flags & ACK) == 0) + return nil; + + tpriv = s->p->priv; + + /* find a call in limbo */ + h = hashipa(src, segp->source); + for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){ + netlog(s->p->f, Logtcp, "tcpincoming s %I!%ud/%I!%ud d %I!%ud/%I!%ud v %d/%d\n", + src, segp->source, lp->raddr, lp->rport, + dst, segp->dest, lp->laddr, lp->lport, + version, lp->version + ); + + if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version) + continue; + if(ipcmp(lp->laddr, dst) != 0) + continue; + if(ipcmp(lp->raddr, src) != 0) + continue; + + /* we're assuming no data with the initial SYN */ + if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){ + netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux\n", + segp->seq, lp->irs+1, segp->ack, lp->iss+1); + lp = nil; + } else { + tpriv->nlimbo--; + *l = lp->next; + } + break; + } + if(lp == nil) + return nil; + + new = Fsnewcall(s, src, segp->source, dst, segp->dest, version); + if(new == nil) + return nil; + + memmove(new->ptcl, s->ptcl, sizeof(Tcpctl)); + tcb = (Tcpctl*)new->ptcl; + tcb->flags &= ~CLONE; + tcb->timer.arg = new; + tcb->timer.state = TcptimerOFF; + tcb->acktimer.arg = new; + tcb->acktimer.state = TcptimerOFF; + tcb->katimer.arg = new; + tcb->katimer.state = TcptimerOFF; + tcb->rtt_timer.arg = new; + tcb->rtt_timer.state = TcptimerOFF; + + tcb->irs = lp->irs; + tcb->rcv.nxt = tcb->irs+1; + tcb->rcv.urg = tcb->rcv.nxt; + + tcb->iss = lp->iss; + tcb->rttseq = tcb->iss; + tcb->snd.wl2 = tcb->iss; + tcb->snd.una = tcb->iss+1; + tcb->snd.ptr = tcb->iss+1; + tcb->snd.nxt = tcb->iss+1; + tcb->flgcnt = 0; + tcb->flags |= SYNACK; + + /* our sending max segment size cannot be bigger than what he asked for */ + if(lp->mss != 0 && lp->mss < tcb->mss) { + tcb->mss = lp->mss; + tpriv->stats[Mss] = tcb->mss; + } + + /* window scaling */ + tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale); + + /* the congestion window always starts out as a single segment */ + tcb->snd.wnd = segp->wnd; + tcb->cwind = tcb->mss; + + /* set initial round trip time */ + tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER; + tcpsynackrtt(new); + + free(lp); + + /* set up proto header */ + switch(version){ + case V4: + h4 = &tcb->protohdr.tcp4hdr; + memset(h4, 0, sizeof(*h4)); + h4->proto = IP_TCPPROTO; + hnputs(h4->tcpsport, new->lport); + hnputs(h4->tcpdport, new->rport); + v6tov4(h4->tcpsrc, dst); + v6tov4(h4->tcpdst, src); + break; + case V6: + h6 = &tcb->protohdr.tcp6hdr; + memset(h6, 0, sizeof(*h6)); + h6->proto = IP_TCPPROTO; + hnputs(h6->tcpsport, new->lport); + hnputs(h6->tcpdport, new->rport); + ipmove(h6->tcpsrc, dst); + ipmove(h6->tcpdst, src); + break; + default: + panic("tcpincoming: version %d", new->ipversion); + } + + tcpsetstate(new, Established); + + iphtadd(&tpriv->ht, new); + + return new; +} + +static int +seq_within(ulong x, ulong low, ulong high) +{ + if(low <= high){ + if(low <= x && x <= high) + return 1; + } + else { + if(x >= low || x <= high) + return 1; + } + return 0; +} + +static int +seq_lt(ulong x, ulong y) +{ + return (int)(x-y) < 0; +} + +static int +seq_le(ulong x, ulong y) +{ + return (int)(x-y) <= 0; +} + +static int +seq_gt(ulong x, ulong y) +{ + return (int)(x-y) > 0; +} + +static int +seq_ge(ulong x, ulong y) +{ + return (int)(x-y) >= 0; +} + +/* + * use the time between the first SYN and it's ack as the + * initial round trip time + */ +static void +tcpsynackrtt(Conv *s) +{ + Tcpctl *tcb; + int delta; + Tcppriv *tpriv; + + tcb = (Tcpctl*)s->ptcl; + tpriv = s->p->priv; + + delta = NOW - tcb->sndsyntime; + tcb->srtt = delta<mdev = delta<rtt_timer); +} + +static void +update(Conv *s, Tcp *seg) +{ + int rtt, delta; + Tcpctl *tcb; + ulong acked; + ulong expand; + Tcppriv *tpriv; + + tpriv = s->p->priv; + tcb = (Tcpctl*)s->ptcl; + + /* if everything has been acked, force output(?) */ + if(seq_gt(seg->ack, tcb->snd.nxt)) { + tcb->flags |= FORCE; + return; + } + + /* added by Dong Lin for fast retransmission */ + if(seg->ack == tcb->snd.una + && tcb->snd.una != tcb->snd.nxt + && seg->len == 0 + && seg->wnd == tcb->snd.wnd) { + + /* this is a pure ack w/o window update */ + netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %lud advwin %lud\n", + tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd); + + if(++tcb->snd.dupacks == TCPREXMTTHRESH) { + /* + * tahoe tcp rxt the packet, half sshthresh, + * and set cwnd to one packet + */ + tcb->snd.recovery = 1; + tcb->snd.rxt = tcb->snd.nxt; + netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt); + tcprxmit(s); + } else { + /* do reno tcp here. */ + } + } + + /* + * update window + */ + if(seq_gt(seg->ack, tcb->snd.wl2) + || (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){ + tcb->snd.wnd = seg->wnd; + tcb->snd.wl2 = seg->ack; + } + + if(!seq_gt(seg->ack, tcb->snd.una)){ + /* + * don't let us hangup if sending into a closed window and + * we're still getting acks + */ + if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){ + tcb->backedoff = MAXBACKMS/4; + } + return; + } + + /* + * any positive ack turns off fast rxt, + * (should we do new-reno on partial acks?) + */ + if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) { + tcb->snd.dupacks = 0; + tcb->snd.recovery = 0; + } else + netlog(s->p->f, Logtcp, "rxt next %lud, cwin %lud\n", seg->ack, tcb->cwind); + + /* Compute the new send window size */ + acked = seg->ack - tcb->snd.una; + + /* avoid slow start and timers for SYN acks */ + if((tcb->flags & SYNACK) == 0) { + tcb->flags |= SYNACK; + acked--; + tcb->flgcnt--; + goto done; + } + + /* slow start as long as we're not recovering from lost packets */ + if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) { + if(tcb->cwind < tcb->ssthresh) { + expand = tcb->mss; + if(acked < expand) + expand = acked; + } + else + expand = ((int)tcb->mss * tcb->mss) / tcb->cwind; + + if(tcb->cwind + expand < tcb->cwind) + expand = tcb->snd.wnd - tcb->cwind; + if(tcb->cwind + expand > tcb->snd.wnd) + expand = tcb->snd.wnd - tcb->cwind; + tcb->cwind += expand; + } + + /* Adjust the timers according to the round trip time */ + if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) { + tcphalt(tpriv, &tcb->rtt_timer); + if((tcb->flags&RETRAN) == 0) { + tcb->backoff = 0; + tcb->backedoff = 0; + rtt = tcb->rtt_timer.start - tcb->rtt_timer.count; + if(rtt == 0) + rtt = 1; /* otherwise all close systems will rexmit in 0 time */ + rtt *= MSPTICK; + if(tcb->srtt == 0) { + tcb->srtt = rtt << LOGAGAIN; + tcb->mdev = rtt << LOGDGAIN; + } else { + delta = rtt - (tcb->srtt>>LOGAGAIN); + tcb->srtt += delta; + if(tcb->srtt <= 0) + tcb->srtt = 1; + + delta = abs(delta) - (tcb->mdev>>LOGDGAIN); + tcb->mdev += delta; + if(tcb->mdev <= 0) + tcb->mdev = 1; + } + tcpsettimer(tcb); + } + } + +done: + if(qdiscard(s->wq, acked) < acked) + tcb->flgcnt--; + + tcb->snd.una = seg->ack; + if(seq_gt(seg->ack, tcb->snd.urg)) + tcb->snd.urg = seg->ack; + + if(tcb->snd.una != tcb->snd.nxt) + tcpgo(tpriv, &tcb->timer); + else + tcphalt(tpriv, &tcb->timer); + + if(seq_lt(tcb->snd.ptr, tcb->snd.una)) + tcb->snd.ptr = tcb->snd.una; + + tcb->flags &= ~RETRAN; + tcb->backoff = 0; + tcb->backedoff = 0; +} + +static void +tcpiput(Proto *tcp, Ipifc*, Block *bp) +{ + Tcp seg; + Tcp4hdr *h4; + Tcp6hdr *h6; + int hdrlen; + Tcpctl *tcb; + ushort length, csum; + uchar source[IPaddrlen], dest[IPaddrlen]; + Conv *s; + Fs *f; + Tcppriv *tpriv; + uchar version; + + f = tcp->f; + tpriv = tcp->priv; + + tpriv->stats[InSegs]++; + + h4 = (Tcp4hdr*)(bp->rp); + h6 = (Tcp6hdr*)(bp->rp); + memset(&seg, 0, sizeof seg); + + if((h4->vihl&0xF0)==IP_VER4) { + version = V4; + length = nhgets(h4->length); + v4tov6(dest, h4->tcpdst); + v4tov6(source, h4->tcpsrc); + + h4->Unused = 0; + hnputs(h4->tcplen, length-TCP4_PKT); + if(!(bp->flag & Btcpck) && (h4->tcpcksum[0] || h4->tcpcksum[1]) && + ptclcsum(bp, TCP4_IPLEN, length-TCP4_IPLEN)) { + tpriv->stats[CsumErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "bad tcp proto cksum\n"); + freeblist(bp); + return; + } + + hdrlen = ntohtcp4(&seg, &bp); + if(hdrlen < 0){ + tpriv->stats[HlenErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "bad tcp hdr len\n"); + return; + } + + /* trim the packet to the size claimed by the datagram */ + length -= hdrlen+TCP4_PKT; + bp = trimblock(bp, hdrlen+TCP4_PKT, length); + if(bp == nil){ + tpriv->stats[LenErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "tcp len < 0 after trim\n"); + return; + } + } + else { + int ttl = h6->ttl; + int proto = h6->proto; + + version = V6; + length = nhgets(h6->ploadlen); + ipmove(dest, h6->tcpdst); + ipmove(source, h6->tcpsrc); + + h6->ploadlen[0] = h6->ploadlen[1] = h6->proto = 0; + h6->ttl = proto; + hnputl(h6->vcf, length); + if((h6->tcpcksum[0] || h6->tcpcksum[1]) && + (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) { + tpriv->stats[CsumErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, + "bad tcpv6 proto cksum: got %#ux, computed %#ux\n", + h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum); + freeblist(bp); + return; + } + h6->ttl = ttl; + h6->proto = proto; + hnputs(h6->ploadlen, length); + + hdrlen = ntohtcp6(&seg, &bp); + if(hdrlen < 0){ + tpriv->stats[HlenErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "bad tcpv6 hdr len\n"); + return; + } + + /* trim the packet to the size claimed by the datagram */ + length -= hdrlen; + bp = trimblock(bp, hdrlen+TCP6_PKT, length); + if(bp == nil){ + tpriv->stats[LenErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "tcpv6 len < 0 after trim\n"); + return; + } + } + + /* lock protocol while searching for a conversation */ + qlock(tcp); + + /* Look for a matching conversation */ + s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest); + if(s == nil){ + netlog(f, Logtcp, "iphtlook(src %I!%d, dst %I!%d) failed\n", + source, seg.source, dest, seg.dest); +reset: + qunlock(tcp); + sndrst(tcp, source, dest, length, &seg, version, "no conversation"); + freeblist(bp); + return; + } + + /* if it's a listener, look for the right flags and get a new conv */ + tcb = (Tcpctl*)s->ptcl; + if(tcb->state == Listen){ + if(seg.flags & RST){ + limborst(s, &seg, source, dest, version); + qunlock(tcp); + freeblist(bp); + return; + } + + /* if this is a new SYN, put the call into limbo */ + if((seg.flags & SYN) && (seg.flags & ACK) == 0){ + limbo(s, source, dest, &seg, version); + qunlock(tcp); + freeblist(bp); + return; + } + + /* + * if there's a matching call in limbo, tcpincoming will + * return it in state Syn_received + */ + s = tcpincoming(s, &seg, source, dest, version); + if(s == nil) + goto reset; + } + + /* The rest of the input state machine is run with the control block + * locked and implements the state machine directly out of the RFC. + * Out-of-band data is ignored - it was always a bad idea. + */ + tcb = (Tcpctl*)s->ptcl; + if(waserror()){ + qunlock(s); + nexterror(); + } + qlock(s); + qunlock(tcp); + + /* fix up window */ + seg.wnd <<= tcb->rcv.scale; + + /* every input packet in puts off the keep alive time out */ + tcpsetkacounter(tcb); + + switch(tcb->state) { + case Closed: + sndrst(tcp, source, dest, length, &seg, version, "sending to Closed"); + goto raise; + case Syn_sent: + if(seg.flags & ACK) { + if(!seq_within(seg.ack, tcb->iss+1, tcb->snd.nxt)) { + sndrst(tcp, source, dest, length, &seg, version, + "bad seq in Syn_sent"); + goto raise; + } + } + if(seg.flags & RST) { + if(seg.flags & ACK) + localclose(s, Econrefused); + goto raise; + } + + if(seg.flags & SYN) { + procsyn(s, &seg); + if(seg.flags & ACK){ + update(s, &seg); + tcpsynackrtt(s); + tcpsetstate(s, Established); + tcpsetscale(s, tcb, seg.ws, tcb->scale); + } + else { + tcb->time = NOW; + tcpsetstate(s, Syn_received); /* DLP - shouldn't this be a reset? */ + } + + if(length != 0 || (seg.flags & FIN)) + break; + + freeblist(bp); + goto output; + } + else + freeblist(bp); + + qunlock(s); + poperror(); + return; + case Syn_received: + /* doesn't matter if it's the correct ack, we're just trying to set timing */ + if(seg.flags & ACK) + tcpsynackrtt(s); + break; + } + + /* + * One DOS attack is to open connections to us and then forget about them, + * thereby tying up a conv at no long term cost to the attacker. + * This is an attempt to defeat these stateless DOS attacks. See + * corresponding code in tcpsendka(). + */ + if(tcb->state != Syn_received && (seg.flags & RST) == 0){ + if(tcpporthogdefense + && seq_within(seg.ack, tcb->snd.una-(1<<31), tcb->snd.una-(1<<29))){ + print("stateless hog %I.%d->%I.%d f %ux %lux - %lux - %lux\n", + source, seg.source, dest, seg.dest, seg.flags, + tcb->snd.una-(1<<31), seg.ack, tcb->snd.una-(1<<29)); + localclose(s, "stateless hog"); + } + } + + /* Cut the data to fit the receive window */ + if(tcptrim(tcb, &seg, &bp, &length) == -1) { + netlog(f, Logtcp, "tcptrim, not accept, seq %lud-%lud win %lud-%lud from %I\n", + seg.seq, seg.seq + length - 1, + tcb->rcv.nxt, tcb->rcv.nxt + tcb->rcv.wnd-1, s->raddr); + netlog(f, Logtcp, "tcp len < 0, %lud %d\n", seg.seq, length); + update(s, &seg); + if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) { + tcphalt(tpriv, &tcb->rtt_timer); + tcphalt(tpriv, &tcb->acktimer); + tcphalt(tpriv, &tcb->katimer); + tcpsetstate(s, Time_wait); + tcb->timer.start = MSL2*(1000 / MSPTICK); + tcpgo(tpriv, &tcb->timer); + } + if(!(seg.flags & RST)) { + tcb->flags |= FORCE; + goto output; + } + qunlock(s); + poperror(); + return; + } + + /* Cannot accept so answer with a rst */ + if(length && tcb->state == Closed) { + sndrst(tcp, source, dest, length, &seg, version, "sending to Closed"); + goto raise; + } + + /* The segment is beyond the current receive pointer so + * queue the data in the resequence queue + */ + if(seg.seq != tcb->rcv.nxt) + if(length != 0 || (seg.flags & (SYN|FIN))) { + update(s, &seg); + if(addreseq(tcb, tpriv, &seg, bp, length) < 0) + print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport); + tcb->flags |= FORCE; + goto output; + } + + /* + * keep looping till we've processed this packet plus any + * adjacent packets in the resequence queue + */ + for(;;) { + if(seg.flags & RST) { + if(tcb->state == Established) { + tpriv->stats[EstabResets]++; + if(tcb->rcv.nxt != seg.seq) + print("out of order RST rcvd: %I.%d -> %I.%d, rcv.nxt %lux seq %lux\n", s->raddr, s->rport, s->laddr, s->lport, tcb->rcv.nxt, seg.seq); + } + localclose(s, Econrefused); + goto raise; + } + + if((seg.flags&ACK) == 0) + goto raise; + + switch(tcb->state) { + case Syn_received: + if(!seq_within(seg.ack, tcb->snd.una+1, tcb->snd.nxt)){ + sndrst(tcp, source, dest, length, &seg, version, + "bad seq in Syn_received"); + goto raise; + } + update(s, &seg); + tcpsetstate(s, Established); + case Established: + case Close_wait: + update(s, &seg); + break; + case Finwait1: + update(s, &seg); + if(qlen(s->wq)+tcb->flgcnt == 0){ + tcphalt(tpriv, &tcb->rtt_timer); + tcphalt(tpriv, &tcb->acktimer); + tcpsetkacounter(tcb); + tcb->time = NOW; + tcpsetstate(s, Finwait2); + tcb->katimer.start = MSL2 * (1000 / MSPTICK); + tcpgo(tpriv, &tcb->katimer); + } + break; + case Finwait2: + update(s, &seg); + break; + case Closing: + update(s, &seg); + if(qlen(s->wq)+tcb->flgcnt == 0) { + tcphalt(tpriv, &tcb->rtt_timer); + tcphalt(tpriv, &tcb->acktimer); + tcphalt(tpriv, &tcb->katimer); + tcpsetstate(s, Time_wait); + tcb->timer.start = MSL2*(1000 / MSPTICK); + tcpgo(tpriv, &tcb->timer); + } + break; + case Last_ack: + update(s, &seg); + if(qlen(s->wq)+tcb->flgcnt == 0) { + localclose(s, nil); + goto raise; + } + case Time_wait: + tcb->flags |= FORCE; + if(tcb->timer.state != TcptimerON) + tcpgo(tpriv, &tcb->timer); + } + + if((seg.flags&URG) && seg.urg) { + if(seq_gt(seg.urg + seg.seq, tcb->rcv.urg)) { + tcb->rcv.urg = seg.urg + seg.seq; + pullblock(&bp, seg.urg); + } + } + else + if(seq_gt(tcb->rcv.nxt, tcb->rcv.urg)) + tcb->rcv.urg = tcb->rcv.nxt; + + if(length == 0) { + if(bp != nil) + freeblist(bp); + } + else { + switch(tcb->state){ + default: + /* Ignore segment text */ + if(bp != nil) + freeblist(bp); + break; + + case Syn_received: + case Established: + case Finwait1: + /* If we still have some data place on + * receive queue + */ + if(bp) { + bp = packblock(bp); + if(bp == nil) + panic("tcp packblock"); + qpassnolim(s->rq, bp); + bp = nil; + + /* + * Force an ack every 2 data messages. This is + * a hack for rob to make his home system run + * faster. + * + * this also keeps the standard TCP congestion + * control working since it needs an ack every + * 2 max segs worth. This is not quite that, + * but under a real stream is equivalent since + * every packet has a max seg in it. + */ + if(++(tcb->rcv.una) >= 2) + tcb->flags |= FORCE; + } + tcb->rcv.nxt += length; + + /* + * update our rcv window + */ + tcprcvwin(s); + + /* + * turn on the acktimer if there's something + * to ack + */ + if(tcb->acktimer.state != TcptimerON) + tcpgo(tpriv, &tcb->acktimer); + + break; + case Finwait2: + /* no process to read the data, send a reset */ + if(bp != nil) + freeblist(bp); + sndrst(tcp, source, dest, length, &seg, version, + "send to Finwait2"); + qunlock(s); + poperror(); + return; + } + } + + if(seg.flags & FIN) { + tcb->flags |= FORCE; + + switch(tcb->state) { + case Syn_received: + case Established: + tcb->rcv.nxt++; + tcpsetstate(s, Close_wait); + break; + case Finwait1: + tcb->rcv.nxt++; + if(qlen(s->wq)+tcb->flgcnt == 0) { + tcphalt(tpriv, &tcb->rtt_timer); + tcphalt(tpriv, &tcb->acktimer); + tcphalt(tpriv, &tcb->katimer); + tcpsetstate(s, Time_wait); + tcb->timer.start = MSL2*(1000/MSPTICK); + tcpgo(tpriv, &tcb->timer); + } + else + tcpsetstate(s, Closing); + break; + case Finwait2: + tcb->rcv.nxt++; + tcphalt(tpriv, &tcb->rtt_timer); + tcphalt(tpriv, &tcb->acktimer); + tcphalt(tpriv, &tcb->katimer); + tcpsetstate(s, Time_wait); + tcb->timer.start = MSL2 * (1000/MSPTICK); + tcpgo(tpriv, &tcb->timer); + break; + case Close_wait: + case Closing: + case Last_ack: + break; + case Time_wait: + tcpgo(tpriv, &tcb->timer); + break; + } + } + + /* + * get next adjacent segment from the resequence queue. + * dump/trim any overlapping segments + */ + for(;;) { + if(tcb->reseq == nil) + goto output; + + if(seq_ge(tcb->rcv.nxt, tcb->reseq->seg.seq) == 0) + goto output; + + getreseq(tcb, &seg, &bp, &length); + + if(tcptrim(tcb, &seg, &bp, &length) == 0) + break; + } + } +output: + tcpoutput(s); + qunlock(s); + poperror(); + return; +raise: + qunlock(s); + poperror(); + freeblist(bp); + tcpkick(s); +} + +/* + * always enters and exits with the s locked. We drop + * the lock to ipoput the packet so some care has to be + * taken by callers. + */ +static void +tcpoutput(Conv *s) +{ + Tcp seg; + int msgs; + Tcpctl *tcb; + Block *hbp, *bp; + int sndcnt, n; + ulong ssize, dsize, usable, sent; + Fs *f; + Tcppriv *tpriv; + uchar version; + + f = s->p->f; + tpriv = s->p->priv; + version = s->ipversion; + memset(&seg, 0, sizeof seg); + + for(msgs = 0; msgs < 100; msgs++) { + tcb = (Tcpctl*)s->ptcl; + + switch(tcb->state) { + case Listen: + case Closed: + case Finwait2: + return; + } + + /* force an ack when a window has opened up */ + if(tcb->rcv.blocked && tcb->rcv.wnd > 0){ + tcb->rcv.blocked = 0; + tcb->flags |= FORCE; + } + + sndcnt = qlen(s->wq)+tcb->flgcnt; + sent = tcb->snd.ptr - tcb->snd.una; + + /* Don't send anything else until our SYN has been acked */ + if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0) + break; + + /* Compute usable segment based on offered window and limit + * window probes to one + */ + if(tcb->snd.wnd == 0){ + if(sent != 0) { + if((tcb->flags&FORCE) == 0) + break; +// tcb->snd.ptr = tcb->snd.una; + } + usable = 1; + } + else { + usable = tcb->cwind; + if(tcb->snd.wnd < usable) + usable = tcb->snd.wnd; +// usable -= sent; + usable = usable >= sent? usable - sent: 0; + } + ssize = sndcnt-sent; + if(ssize && usable < 2) + netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n", + tcb->snd.wnd, tcb->cwind); + if(usable < ssize) + ssize = usable; + if(tcb->mss < ssize) + ssize = tcb->mss; + dsize = ssize; + seg.urg = 0; + + if(ssize == 0) + if((tcb->flags&FORCE) == 0) + break; + + tcb->flags &= ~FORCE; + tcprcvwin(s); + + /* By default we will generate an ack */ + tcphalt(tpriv, &tcb->acktimer); + tcb->rcv.una = 0; + seg.source = s->lport; + seg.dest = s->rport; + seg.flags = ACK; + seg.mss = 0; + seg.ws = 0; + switch(tcb->state){ + case Syn_sent: + seg.flags = 0; + if(tcb->snd.ptr == tcb->iss){ + seg.flags |= SYN; + dsize--; + seg.mss = tcb->mss; + seg.ws = tcb->scale; + } + break; + case Syn_received: + /* + * don't send any data with a SYN/ACK packet + * because Linux rejects the packet in its + * attempt to solve the SYN attack problem + */ + if(tcb->snd.ptr == tcb->iss){ + seg.flags |= SYN; + dsize = 0; + ssize = 1; + seg.mss = tcb->mss; + seg.ws = tcb->scale; + } + break; + } + seg.seq = tcb->snd.ptr; + seg.ack = tcb->rcv.nxt; + seg.wnd = tcb->rcv.wnd; + + /* Pull out data to send */ + bp = nil; + if(dsize != 0) { + bp = qcopy(s->wq, dsize, sent); + if(BLEN(bp) != dsize) { + seg.flags |= FIN; + dsize--; + } + } + + if(sent+dsize == sndcnt) + seg.flags |= PSH; + + /* keep track of balance of resent data */ + if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) { + n = tcb->snd.nxt - tcb->snd.ptr; + if(ssize < n) + n = ssize; + tcb->resent += n; + netlog(f, Logtcp, "rexmit: %I!%d -> %I!%d ptr %lux nxt %lux\n", + s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt); + tpriv->stats[RetransSegs]++; + } + + tcb->snd.ptr += ssize; + + /* Pull up the send pointer so we can accept acks + * for this window + */ + if(seq_gt(tcb->snd.ptr,tcb->snd.nxt)) + tcb->snd.nxt = tcb->snd.ptr; + + /* Build header, link data and compute cksum */ + switch(version){ + case V4: + tcb->protohdr.tcp4hdr.vihl = IP_VER4; + hbp = htontcp4(&seg, bp, &tcb->protohdr.tcp4hdr, tcb); + if(hbp == nil) { + freeblist(bp); + return; + } + break; + case V6: + tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6; + hbp = htontcp6(&seg, bp, &tcb->protohdr.tcp6hdr, tcb); + if(hbp == nil) { + freeblist(bp); + return; + } + break; + default: + hbp = nil; /* to suppress a warning */ + panic("tcpoutput: version %d", version); + } + + /* Start the transmission timers if there is new data and we + * expect acknowledges + */ + if(ssize != 0){ + if(tcb->timer.state != TcptimerON) + tcpgo(tpriv, &tcb->timer); + + /* If round trip timer isn't running, start it. + * measure the longest packet only in case the + * transmission time dominates RTT + */ + if(tcb->rtt_timer.state != TcptimerON) + if(ssize == tcb->mss) { + tcpgo(tpriv, &tcb->rtt_timer); + tcb->rttseq = tcb->snd.ptr; + } + } + + tpriv->stats[OutSegs]++; + + /* put off the next keep alive */ + tcpgo(tpriv, &tcb->katimer); + + switch(version){ + case V4: + if(ipoput4(f, hbp, 0, s->ttl, s->tos, s) < 0){ + /* a negative return means no route */ + localclose(s, "no route"); + } + break; + case V6: + if(ipoput6(f, hbp, 0, s->ttl, s->tos, s) < 0){ + /* a negative return means no route */ + localclose(s, "no route"); + } + break; + default: + panic("tcpoutput2: version %d", version); + } + if((msgs%4) == 1){ + qunlock(s); + sched(); + qlock(s); + } + } +} + +/* + * the BSD convention (hack?) for keep alives. resend last uchar acked. + */ +static void +tcpsendka(Conv *s) +{ + Tcp seg; + Tcpctl *tcb; + Block *hbp,*dbp; + + tcb = (Tcpctl*)s->ptcl; + + dbp = nil; + memset(&seg, 0, sizeof seg); + seg.urg = 0; + seg.source = s->lport; + seg.dest = s->rport; + seg.flags = ACK|PSH; + seg.mss = 0; + seg.ws = 0; + if(tcpporthogdefense) + seg.seq = tcb->snd.una-(1<<30)-nrand(1<<20); + else + seg.seq = tcb->snd.una-1; + seg.ack = tcb->rcv.nxt; + tcb->rcv.una = 0; + seg.wnd = tcb->rcv.wnd; + if(tcb->state == Finwait2){ + seg.flags |= FIN; + } else { + dbp = allocb(1); + dbp->wp++; + } + + if(isv4(s->raddr)) { + /* Build header, link data and compute cksum */ + tcb->protohdr.tcp4hdr.vihl = IP_VER4; + hbp = htontcp4(&seg, dbp, &tcb->protohdr.tcp4hdr, tcb); + if(hbp == nil) { + freeblist(dbp); + return; + } + ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s); + } + else { + /* Build header, link data and compute cksum */ + tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6; + hbp = htontcp6(&seg, dbp, &tcb->protohdr.tcp6hdr, tcb); + if(hbp == nil) { + freeblist(dbp); + return; + } + ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s); + } +} + +/* + * set connection to time out after 12 minutes + */ +static void +tcpsetkacounter(Tcpctl *tcb) +{ + tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK); + if(tcb->kacounter < 3) + tcb->kacounter = 3; +} + +/* + * if we've timed out, close the connection + * otherwise, send a keepalive and restart the timer + */ +static void +tcpkeepalive(void *v) +{ + Tcpctl *tcb; + Conv *s; + + s = v; + tcb = (Tcpctl*)s->ptcl; + if(waserror()){ + qunlock(s); + nexterror(); + } + qlock(s); + if(tcb->state != Closed){ + if(--(tcb->kacounter) <= 0) { + localclose(s, Etimedout); + } else { + tcpsendka(s); + tcpgo(s->p->priv, &tcb->katimer); + } + } + qunlock(s); + poperror(); +} + +/* + * start keepalive timer + */ +static char* +tcpstartka(Conv *s, char **f, int n) +{ + Tcpctl *tcb; + int x; + + tcb = (Tcpctl*)s->ptcl; + if(tcb->state != Established) + return "connection must be in Establised state"; + if(n > 1){ + x = atoi(f[1]); + if(x >= MSPTICK) + tcb->katimer.start = x/MSPTICK; + } + tcpsetkacounter(tcb); + tcpgo(s->p->priv, &tcb->katimer); + + return nil; +} + +/* + * turn checksums on/off + */ +static char* +tcpsetchecksum(Conv *s, char **f, int) +{ + Tcpctl *tcb; + + tcb = (Tcpctl*)s->ptcl; + tcb->nochecksum = !atoi(f[1]); + + return nil; +} + +static void +tcprxmit(Conv *s) +{ + Tcpctl *tcb; + + tcb = (Tcpctl*)s->ptcl; + + tcb->flags |= RETRAN|FORCE; + tcb->snd.ptr = tcb->snd.una; + + /* + * We should be halving the slow start threshhold (down to one + * mss) but leaving it at mss seems to work well enough + */ + tcb->ssthresh = tcb->mss; + + /* + * pull window down to a single packet + */ + tcb->cwind = tcb->mss; + tcpoutput(s); +} + +static void +tcptimeout(void *arg) +{ + Conv *s; + Tcpctl *tcb; + int maxback; + Tcppriv *tpriv; + + s = (Conv*)arg; + tpriv = s->p->priv; + tcb = (Tcpctl*)s->ptcl; + + if(waserror()){ + qunlock(s); + nexterror(); + } + qlock(s); + switch(tcb->state){ + default: + tcb->backoff++; + if(tcb->state == Syn_sent) + maxback = MAXBACKMS/2; + else + maxback = MAXBACKMS; + tcb->backedoff += tcb->timer.start * MSPTICK; + if(tcb->backedoff >= maxback) { + localclose(s, Etimedout); + break; + } + netlog(s->p->f, Logtcprxmt, "timeout rexmit %#lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW); + tcpsettimer(tcb); + tcprxmit(s); + tpriv->stats[RetransTimeouts]++; + tcb->snd.dupacks = 0; + break; + case Time_wait: + localclose(s, nil); + break; + case Closed: + break; + } + qunlock(s); + poperror(); +} + +static int +inwindow(Tcpctl *tcb, int seq) +{ + return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1); +} + +/* + * set up state for a received SYN (or SYN ACK) packet + */ +static void +procsyn(Conv *s, Tcp *seg) +{ + Tcpctl *tcb; + Tcppriv *tpriv; + + tcb = (Tcpctl*)s->ptcl; + tcb->flags |= FORCE; + + tcb->rcv.nxt = seg->seq + 1; + tcb->rcv.urg = tcb->rcv.nxt; + tcb->irs = seg->seq; + + /* our sending max segment size cannot be bigger than what he asked for */ + if(seg->mss != 0 && seg->mss < tcb->mss) { + tcb->mss = seg->mss; + tpriv = s->p->priv; + tpriv->stats[Mss] = tcb->mss; + } + + /* the congestion window always starts out as a single segment */ + tcb->snd.wnd = seg->wnd; + tcb->cwind = tcb->mss; +} + +static int +addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length) +{ + Reseq *rp, *rp1; + int i, rqlen, qmax; + + rp = malloc(sizeof(Reseq)); + if(rp == nil){ + freeblist(bp); /* bp always consumed by add_reseq */ + return 0; + } + + rp->seg = *seg; + rp->bp = bp; + rp->length = length; + + /* Place on reassembly list sorting by starting seq number */ + rp1 = tcb->reseq; + if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) { + rp->next = rp1; + tcb->reseq = rp; + if(rp->next != nil) + tpriv->stats[OutOfOrder]++; + return 0; + } + + rqlen = 0; + for(i = 0;; i++) { + rqlen += rp1->length; + if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) { + rp->next = rp1->next; + rp1->next = rp; + if(rp->next != nil) + tpriv->stats[OutOfOrder]++; + break; + } + rp1 = rp1->next; + } + qmax = QMAX<rcv.scale; + if(rqlen > qmax){ + print("resequence queue > window: %d > %d\n", rqlen, qmax); + i = 0; + for(rp1 = tcb->reseq; rp1 != nil; rp1 = rp1->next){ + print("%#lux %#lux %#ux\n", rp1->seg.seq, + rp1->seg.ack, rp1->seg.flags); + if(i++ > 10){ + print("...\n"); + break; + } + } + + /* + * delete entire reassembly queue; wait for retransmit. + * - should we be smarter and only delete the tail? + */ + for(rp = tcb->reseq; rp != nil; rp = rp1){ + rp1 = rp->next; + freeblist(rp->bp); + free(rp); + } + tcb->reseq = nil; + + return -1; + } + return 0; +} + +static void +getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length) +{ + Reseq *rp; + + rp = tcb->reseq; + if(rp == nil) + return; + + tcb->reseq = rp->next; + + *seg = rp->seg; + *bp = rp->bp; + *length = rp->length; + + free(rp); +} + +static int +tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length) +{ + ushort len; + uchar accept; + int dupcnt, excess; + + accept = 0; + len = *length; + if(seg->flags & SYN) + len++; + if(seg->flags & FIN) + len++; + + if(tcb->rcv.wnd == 0) { + if(len == 0 && seg->seq == tcb->rcv.nxt) + return 0; + } + else { + /* Some part of the segment should be in the window */ + if(inwindow(tcb,seg->seq)) + accept++; + else + if(len != 0) { + if(inwindow(tcb, seg->seq+len-1) || + seq_within(tcb->rcv.nxt, seg->seq,seg->seq+len-1)) + accept++; + } + } + if(!accept) { + freeblist(*bp); + return -1; + } + dupcnt = tcb->rcv.nxt - seg->seq; + if(dupcnt > 0){ + tcb->rerecv += dupcnt; + if(seg->flags & SYN){ + seg->flags &= ~SYN; + seg->seq++; + + if(seg->urg > 1) + seg->urg--; + else + seg->flags &= ~URG; + dupcnt--; + } + if(dupcnt > 0){ + pullblock(bp, (ushort)dupcnt); + seg->seq += dupcnt; + *length -= dupcnt; + + if(seg->urg > dupcnt) + seg->urg -= dupcnt; + else { + seg->flags &= ~URG; + seg->urg = 0; + } + } + } + excess = seg->seq + *length - (tcb->rcv.nxt + tcb->rcv.wnd); + if(excess > 0) { + tcb->rerecv += excess; + *length -= excess; + *bp = trimblock(*bp, 0, *length); + if(*bp == nil) + panic("presotto is a boofhead"); + seg->flags &= ~FIN; + } + return 0; +} + +static void +tcpadvise(Proto *tcp, Block *bp, char *msg) +{ + Tcp4hdr *h4; + Tcp6hdr *h6; + Tcpctl *tcb; + uchar source[IPaddrlen]; + uchar dest[IPaddrlen]; + ushort psource, pdest; + Conv *s, **p; + + h4 = (Tcp4hdr*)(bp->rp); + h6 = (Tcp6hdr*)(bp->rp); + + if((h4->vihl&0xF0)==IP_VER4) { + v4tov6(dest, h4->tcpdst); + v4tov6(source, h4->tcpsrc); + psource = nhgets(h4->tcpsport); + pdest = nhgets(h4->tcpdport); + } + else { + ipmove(dest, h6->tcpdst); + ipmove(source, h6->tcpsrc); + psource = nhgets(h6->tcpsport); + pdest = nhgets(h6->tcpdport); + } + + /* Look for a connection */ + qlock(tcp); + for(p = tcp->conv; *p; p++) { + s = *p; + tcb = (Tcpctl*)s->ptcl; + if(s->rport == pdest) + if(s->lport == psource) + if(tcb->state != Closed) + if(ipcmp(s->raddr, dest) == 0) + if(ipcmp(s->laddr, source) == 0){ + qlock(s); + qunlock(tcp); + switch(tcb->state){ + case Syn_sent: + localclose(s, msg); + break; + } + qunlock(s); + freeblist(bp); + return; + } + } + qunlock(tcp); + freeblist(bp); +} + +static char* +tcpporthogdefensectl(char *val) +{ + if(strcmp(val, "on") == 0) + tcpporthogdefense = 1; + else if(strcmp(val, "off") == 0) + tcpporthogdefense = 0; + else + return "unknown value for tcpporthogdefense"; + return nil; +} + +/* called with c qlocked */ +static char* +tcpctl(Conv* c, char** f, int n) +{ + if(n == 1 && strcmp(f[0], "hangup") == 0) + return tcphangup(c); + if(n >= 1 && strcmp(f[0], "keepalive") == 0) + return tcpstartka(c, f, n); + if(n >= 1 && strcmp(f[0], "checksum") == 0) + return tcpsetchecksum(c, f, n); + if(n >= 1 && strcmp(f[0], "tcpporthogdefense") == 0) + return tcpporthogdefensectl(f[1]); + return "unknown control request"; +} + +static int +tcpstats(Proto *tcp, char *buf, int len) +{ + Tcppriv *priv; + char *p, *e; + int i; + + priv = tcp->priv; + p = buf; + e = p+len; + for(i = 0; i < Nstats; i++) + p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]); + return p - buf; +} + +/* + * garbage collect any stale conversations: + * - SYN received but no SYN-ACK after 5 seconds (could be the SYN attack) + * - Finwait2 after 5 minutes + * + * this is called whenever we run out of channels. Both checks are + * of questionable validity so we try to use them only when we're + * up against the wall. + */ +static int +tcpgc(Proto *tcp) +{ + Conv *c, **pp, **ep; + int n; + Tcpctl *tcb; + + + n = 0; + ep = &tcp->conv[tcp->nc]; + for(pp = tcp->conv; pp < ep; pp++) { + c = *pp; + if(c == nil) + break; + if(!canqlock(c)) + continue; + tcb = (Tcpctl*)c->ptcl; + switch(tcb->state){ + case Syn_received: + if(NOW - tcb->time > 5000){ + localclose(c, Etimedout); + n++; + } + break; + case Finwait2: + if(NOW - tcb->time > 5*60*1000){ + localclose(c, Etimedout); + n++; + } + break; + } + qunlock(c); + } + return n; +} + +static void +tcpsettimer(Tcpctl *tcb) +{ + int x; + + /* round trip dependency */ + x = backoff(tcb->backoff) * + (tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK; + + /* bounded twixt 1/2 and 64 seconds */ + if(x < 500/MSPTICK) + x = 500/MSPTICK; + else if(x > (64000/MSPTICK)) + x = 64000/MSPTICK; + tcb->timer.start = x; +} + +void +tcpinit(Fs *fs) +{ + Proto *tcp; + Tcppriv *tpriv; + + tcp = smalloc(sizeof(Proto)); + tpriv = tcp->priv = smalloc(sizeof(Tcppriv)); + tcp->name = "tcp"; + tcp->connect = tcpconnect; + tcp->announce = tcpannounce; + tcp->ctl = tcpctl; + tcp->state = tcpstate; + tcp->create = tcpcreate; + tcp->close = tcpclose; + tcp->rcv = tcpiput; + tcp->advise = tcpadvise; + tcp->stats = tcpstats; + tcp->inuse = tcpinuse; + tcp->gc = tcpgc; + tcp->ipproto = IP_TCPPROTO; + tcp->nc = scalednconv(); + tcp->ptclsize = sizeof(Tcpctl); + tpriv->stats[MaxConn] = tcp->nc; + + Fsproto(fs, tcp); +} + +static void +tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale) +{ + if(rcvscale){ + tcb->rcv.scale = rcvscale & 0xff; + tcb->snd.scale = sndscale & 0xff; + tcb->window = QMAX<snd.scale; + qsetlimit(s->rq, tcb->window); + } else { + tcb->rcv.scale = 0; + tcb->snd.scale = 0; + tcb->window = QMAX; + qsetlimit(s->rq, tcb->window); + } +} diff -Nru /sys/src/9k/ip/udp.c /sys/src/9k/ip/udp.c --- /sys/src/9k/ip/udp.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/ip/udp.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,620 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" +#include "ipv6.h" + + +#define DPRINT if(0)print + +enum +{ + UDP_UDPHDR_SZ = 8, + + UDP4_PHDR_OFF = 8, + UDP4_PHDR_SZ = 12, + UDP4_IPHDR_SZ = 20, + UDP6_IPHDR_SZ = 40, + UDP6_PHDR_SZ = 40, + UDP6_PHDR_OFF = 0, + + IP_UDPPROTO = 17, + UDP_USEAD7 = 52, + + Udprxms = 200, + Udptickms = 100, + Udpmaxxmit = 10, +}; + +typedef struct Udp4hdr Udp4hdr; +struct Udp4hdr +{ + /* ip header */ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + uchar Unused; + uchar udpproto; /* Protocol */ + uchar udpplen[2]; /* Header plus data length */ + uchar udpsrc[IPv4addrlen]; /* Ip source */ + uchar udpdst[IPv4addrlen]; /* Ip destination */ + + /* udp header */ + uchar udpsport[2]; /* Source port */ + uchar udpdport[2]; /* Destination port */ + uchar udplen[2]; /* data length */ + uchar udpcksum[2]; /* Checksum */ +}; + +typedef struct Udp6hdr Udp6hdr; +struct Udp6hdr { + uchar viclfl[4]; + uchar len[2]; + uchar nextheader; + uchar hoplimit; + uchar udpsrc[IPaddrlen]; + uchar udpdst[IPaddrlen]; + + /* udp header */ + uchar udpsport[2]; /* Source port */ + uchar udpdport[2]; /* Destination port */ + uchar udplen[2]; /* data length */ + uchar udpcksum[2]; /* Checksum */ +}; + +/* MIB II counters */ +typedef struct Udpstats Udpstats; +struct Udpstats +{ + uvlong udpInDatagrams; + uvlong udpNoPorts; + uvlong udpInErrors; + uvlong udpOutDatagrams; +}; + +typedef struct Udppriv Udppriv; +struct Udppriv +{ + Ipht ht; + + /* MIB counters */ + Udpstats ustats; + + /* non-MIB stats */ + uvlong csumerr; /* checksum errors */ + uvlong lenerr; /* short packet */ +}; + +void (*etherprofiler)(char *name, int qlen); +void udpkick(void *x, Block *bp); + +/* + * protocol specific part of Conv + */ +typedef struct Udpcb Udpcb; +struct Udpcb +{ + QLock; + uchar headers; +}; + +static char* +udpconnect(Conv *c, char **argv, int argc) +{ + char *e; + Udppriv *upriv; + + upriv = c->p->priv; + e = Fsstdconnect(c, argv, argc); + Fsconnected(c, e); + if(e != nil) + return e; + + iphtadd(&upriv->ht, c); + return nil; +} + + +static int +udpstate(Conv *c, char *state, int n) +{ + return snprint(state, n, "%s qin %d qout %d\n", + c->inuse ? "Open" : "Closed", + c->rq ? qlen(c->rq) : 0, + c->wq ? qlen(c->wq) : 0 + ); +} + +static char* +udpannounce(Conv *c, char** argv, int argc) +{ + char *e; + Udppriv *upriv; + + upriv = c->p->priv; + e = Fsstdannounce(c, argv, argc); + if(e != nil) + return e; + Fsconnected(c, nil); + iphtadd(&upriv->ht, c); + + return nil; +} + +static void +udpcreate(Conv *c) +{ + c->rq = qopen(128*1024, Qmsg, 0, 0); + c->wq = qbypass(udpkick, c); +} + +static void +udpclose(Conv *c) +{ + Udpcb *ucb; + Udppriv *upriv; + + upriv = c->p->priv; + iphtrem(&upriv->ht, c); + + c->state = 0; + qclose(c->rq); + qclose(c->wq); + qclose(c->eq); + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + c->lport = 0; + c->rport = 0; + + ucb = (Udpcb*)c->ptcl; + ucb->headers = 0; +} + +void +udpkick(void *x, Block *bp) +{ + Conv *c = x; + Udp4hdr *uh4; + Udp6hdr *uh6; + ushort rport; + uchar laddr[IPaddrlen], raddr[IPaddrlen]; + Udpcb *ucb; + int dlen, ptcllen; + Udppriv *upriv; + Fs *f; + int version; + Conv *rc; + + upriv = c->p->priv; + f = c->p->f; + +// netlog(c->p->f, Logudp, "udp: kick\n"); /* frequent and uninteresting */ + if(bp == nil) + return; + + ucb = (Udpcb*)c->ptcl; + switch(ucb->headers) { + case 7: + /* get user specified addresses */ + bp = pullupblock(bp, UDP_USEAD7); + if(bp == nil) + return; + ipmove(raddr, bp->rp); + bp->rp += IPaddrlen; + ipmove(laddr, bp->rp); + bp->rp += IPaddrlen; + /* pick interface closest to dest */ + if(ipforme(f, laddr) != Runi) + findlocalip(f, laddr, raddr); + bp->rp += IPaddrlen; /* Ignore ifc address */ + rport = nhgets(bp->rp); + bp->rp += 2+2; /* Ignore local port */ + break; + default: + rport = 0; + break; + } + + if(ucb->headers) { + if(memcmp(laddr, v4prefix, IPv4off) == 0 + || ipcmp(laddr, IPnoaddr) == 0) + version = 4; + else + version = 6; + } else { + if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 && + memcmp(c->laddr, v4prefix, IPv4off) == 0) + || ipcmp(c->raddr, IPnoaddr) == 0) + version = 4; + else + version = 6; + } + + dlen = blocklen(bp); + + /* fill in pseudo header and compute checksum */ + switch(version){ + case V4: + bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ); + if(bp == nil) + return; + + uh4 = (Udp4hdr *)(bp->rp); + ptcllen = dlen + UDP_UDPHDR_SZ; + uh4->Unused = 0; + uh4->udpproto = IP_UDPPROTO; + uh4->frag[0] = 0; + uh4->frag[1] = 0; + hnputs(uh4->udpplen, ptcllen); + if(ucb->headers) { + v6tov4(uh4->udpdst, raddr); + hnputs(uh4->udpdport, rport); + v6tov4(uh4->udpsrc, laddr); + rc = nil; + } else { + v6tov4(uh4->udpdst, c->raddr); + hnputs(uh4->udpdport, c->rport); + if(ipcmp(c->laddr, IPnoaddr) == 0) + findlocalip(f, c->laddr, c->raddr); + v6tov4(uh4->udpsrc, c->laddr); + rc = c; + } + hnputs(uh4->udpsport, c->lport); + hnputs(uh4->udplen, ptcllen); + uh4->udpcksum[0] = 0; + uh4->udpcksum[1] = 0; + hnputs(uh4->udpcksum, + ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ)); + uh4->vihl = IP_VER4; + ipoput4(f, bp, 0, c->ttl, c->tos, rc); + break; + + case V6: + bp = padblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ); + if(bp == nil) + return; + + /* + * using the v6 ip header to create pseudo header + * first then reset it to the normal ip header + */ + uh6 = (Udp6hdr *)(bp->rp); + memset(uh6, 0, 8); + ptcllen = dlen + UDP_UDPHDR_SZ; + hnputl(uh6->viclfl, ptcllen); + uh6->hoplimit = IP_UDPPROTO; + if(ucb->headers) { + ipmove(uh6->udpdst, raddr); + hnputs(uh6->udpdport, rport); + ipmove(uh6->udpsrc, laddr); + rc = nil; + } else { + ipmove(uh6->udpdst, c->raddr); + hnputs(uh6->udpdport, c->rport); + if(ipcmp(c->laddr, IPnoaddr) == 0) + findlocalip(f, c->laddr, c->raddr); + ipmove(uh6->udpsrc, c->laddr); + rc = c; + } + hnputs(uh6->udpsport, c->lport); + hnputs(uh6->udplen, ptcllen); + uh6->udpcksum[0] = 0; + uh6->udpcksum[1] = 0; + hnputs(uh6->udpcksum, + ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ)); + memset(uh6, 0, 8); + uh6->viclfl[0] = IP_VER6; + hnputs(uh6->len, ptcllen); + uh6->nextheader = IP_UDPPROTO; + ipoput6(f, bp, 0, c->ttl, c->tos, rc); + break; + + default: + panic("udpkick: version %d", version); + } + upriv->ustats.udpOutDatagrams++; +} + +void +udpiput(Proto *udp, Ipifc *ifc, Block *bp) +{ + int len; + Udp4hdr *uh4; + Udp6hdr *uh6; + Conv *c; + Udpcb *ucb; + uchar raddr[IPaddrlen], laddr[IPaddrlen]; + ushort rport, lport; + Udppriv *upriv; + Fs *f; + int version; + int ottl, oviclfl, olen; + uchar *p; + + upriv = udp->priv; + f = udp->f; + upriv->ustats.udpInDatagrams++; + + uh4 = (Udp4hdr*)(bp->rp); + version = ((uh4->vihl&0xF0)==IP_VER6) ? 6 : 4; + + /* Put back pseudo header for checksum + * (remember old values for icmpnoconv()) */ + switch(version) { + case V4: + ottl = uh4->Unused; + uh4->Unused = 0; + len = nhgets(uh4->udplen); + olen = nhgets(uh4->udpplen); + hnputs(uh4->udpplen, len); + + v4tov6(raddr, uh4->udpsrc); + v4tov6(laddr, uh4->udpdst); + lport = nhgets(uh4->udpdport); + rport = nhgets(uh4->udpsport); + + if(nhgets(uh4->udpcksum)) { + if(ptclcsum(bp, UDP4_PHDR_OFF, len+UDP4_PHDR_SZ)) { + upriv->ustats.udpInErrors++; + netlog(f, Logudp, "udp: checksum error %I\n", raddr); + DPRINT("udp: checksum error %I\n", raddr); + freeblist(bp); + return; + } + } + uh4->Unused = ottl; + hnputs(uh4->udpplen, olen); + break; + case V6: + uh6 = (Udp6hdr*)(bp->rp); + len = nhgets(uh6->udplen); + oviclfl = nhgetl(uh6->viclfl); + olen = nhgets(uh6->len); + ottl = uh6->hoplimit; + ipmove(raddr, uh6->udpsrc); + ipmove(laddr, uh6->udpdst); + lport = nhgets(uh6->udpdport); + rport = nhgets(uh6->udpsport); + memset(uh6, 0, 8); + hnputl(uh6->viclfl, len); + uh6->hoplimit = IP_UDPPROTO; + if(ptclcsum(bp, UDP6_PHDR_OFF, len+UDP6_PHDR_SZ)) { + upriv->ustats.udpInErrors++; + netlog(f, Logudp, "udp: checksum error %I\n", raddr); + DPRINT("udp: checksum error %I\n", raddr); + freeblist(bp); + return; + } + hnputl(uh6->viclfl, oviclfl); + hnputs(uh6->len, olen); + uh6->nextheader = IP_UDPPROTO; + uh6->hoplimit = ottl; + break; + default: + panic("udpiput: version %d", version); + return; /* to avoid a warning */ + } + + qlock(udp); + + c = iphtlook(&upriv->ht, raddr, rport, laddr, lport); + if(c == nil){ + /* no conversation found */ + upriv->ustats.udpNoPorts++; + qunlock(udp); + netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport, + laddr, lport); + + switch(version){ + case V4: + icmpnoconv(f, bp); + break; + case V6: + icmphostunr(f, ifc, bp, Icmp6_port_unreach, 0); + break; + default: + panic("udpiput2: version %d", version); + } + + freeblist(bp); + return; + } + ucb = (Udpcb*)c->ptcl; + + if(c->state == Announced){ + if(ucb->headers == 0){ + /* create a new conversation */ + if(ipforme(f, laddr) != Runi) { + switch(version){ + case V4: + v4tov6(laddr, ifc->lifc->local); + break; + case V6: + ipmove(laddr, ifc->lifc->local); + break; + default: + panic("udpiput3: version %d", version); + } + } + c = Fsnewcall(c, raddr, rport, laddr, lport, version); + if(c == nil){ + qunlock(udp); + freeblist(bp); + return; + } + iphtadd(&upriv->ht, c); + ucb = (Udpcb*)c->ptcl; + } + } + + qlock(c); + qunlock(udp); + + /* + * Trim the packet down to data size + */ + len -= UDP_UDPHDR_SZ; + switch(version){ + case V4: + bp = trimblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ, len); + break; + case V6: + bp = trimblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ, len); + break; + default: + bp = nil; + panic("udpiput4: version %d", version); + } + if(bp == nil){ + qunlock(c); + netlog(f, Logudp, "udp: len err %I.%d -> %I.%d\n", raddr, rport, + laddr, lport); + upriv->lenerr++; + return; + } + + netlog(f, Logudpmsg, "udp: %I.%d -> %I.%d l %d\n", raddr, rport, + laddr, lport, len); + + switch(ucb->headers){ + case 7: + /* pass the src address */ + bp = padblock(bp, UDP_USEAD7); + p = bp->rp; + ipmove(p, raddr); p += IPaddrlen; + ipmove(p, laddr); p += IPaddrlen; + ipmove(p, ifc->lifc->local); p += IPaddrlen; + hnputs(p, rport); p += 2; + hnputs(p, lport); + break; + } + + if(bp->next) + bp = concatblock(bp); + + if(qfull(c->rq)){ + qunlock(c); + netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport, + laddr, lport); + freeblist(bp); + return; + } + + qpass(c->rq, bp); + qunlock(c); + +} + +char* +udpctl(Conv *c, char **f, int n) +{ + Udpcb *ucb; + + ucb = (Udpcb*)c->ptcl; + if(n == 1){ + if(strcmp(f[0], "headers") == 0){ + ucb->headers = 7; /* new headers format */ + return nil; + } + } + return "unknown control request"; +} + +void +udpadvise(Proto *udp, Block *bp, char *msg) +{ + Udp4hdr *h4; + Udp6hdr *h6; + uchar source[IPaddrlen], dest[IPaddrlen]; + ushort psource, pdest; + Conv *s, **p; + int version; + + h4 = (Udp4hdr*)(bp->rp); + version = ((h4->vihl&0xF0)==IP_VER6) ? 6 : 4; + + switch(version) { + case V4: + v4tov6(dest, h4->udpdst); + v4tov6(source, h4->udpsrc); + psource = nhgets(h4->udpsport); + pdest = nhgets(h4->udpdport); + break; + case V6: + h6 = (Udp6hdr*)(bp->rp); + ipmove(dest, h6->udpdst); + ipmove(source, h6->udpsrc); + psource = nhgets(h6->udpsport); + pdest = nhgets(h6->udpdport); + break; + default: + panic("udpadvise: version %d", version); + return; /* to avoid a warning */ + } + + /* Look for a connection */ + qlock(udp); + for(p = udp->conv; *p; p++) { + s = *p; + if(s->rport == pdest) + if(s->lport == psource) + if(ipcmp(s->raddr, dest) == 0) + if(ipcmp(s->laddr, source) == 0){ + if(s->ignoreadvice) + break; + qlock(s); + qunlock(udp); + qhangup(s->rq, msg); + qhangup(s->wq, msg); + qunlock(s); + freeblist(bp); + return; + } + } + qunlock(udp); + freeblist(bp); +} + +int +udpstats(Proto *udp, char *buf, int len) +{ + Udppriv *upriv; + + upriv = udp->priv; + return snprint(buf, len, "InDatagrams: %llud\nNoPorts: %llud\n" + "InErrors: %llud\nOutDatagrams: %llud\n", + upriv->ustats.udpInDatagrams, + upriv->ustats.udpNoPorts, + upriv->ustats.udpInErrors, + upriv->ustats.udpOutDatagrams); +} + +void +udpinit(Fs *fs) +{ + Proto *udp; + + udp = smalloc(sizeof(Proto)); + udp->priv = smalloc(sizeof(Udppriv)); + udp->name = "udp"; + udp->connect = udpconnect; + udp->announce = udpannounce; + udp->ctl = udpctl; + udp->state = udpstate; + udp->create = udpcreate; + udp->close = udpclose; + udp->rcv = udpiput; + udp->advise = udpadvise; + udp->stats = udpstats; + udp->ipproto = IP_UDPPROTO; + udp->nc = Nchans; + udp->ptclsize = sizeof(Udpcb); + + Fsproto(fs, udp); +} diff -Nru /sys/src/9k/k10/Notes /sys/src/9k/k10/Notes --- /sys/src/9k/k10/Notes Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/Notes Wed Dec 9 00:00:00 2015 @@ -0,0 +1,46 @@ +- acpi +- devipmi/ipmi +- devarch +- netif +- acpimatch +- syscalltrace +- srx/devec.c: BY2PG alignment? +- iovec handling +- ptclbsumamd64 could use 64-bit ops +- mpscqio Rendez isn't a Rendez +- Block.auxspc; Next ++ ip/tcp.c +- lapicperf +- nmienable; handling of NMI latch generally + += Sys is not conf -> ioconf for now +- ex/kbd.c + +- scheduler +- devsd.c/sdide.c +- qiox +- bad map +- running with single processor +- devcons +- devcons debugging +- e820 in boot1 +- /usr/forsyth/src/newmal +- portclock.c + +- devram (see devdram) + +- qclose f->iq + +------- +1. call to idlehands in proc +2. sleep/wakeup references + +-------- +PCIDs + CR4.PCIDE + CR4.PGE + INVPCID p1138 + 2^12 + +---- +fffffe402878f78 diff -Nru /sys/src/9k/k10/acpi.c /sys/src/9k/k10/acpi.c --- /sys/src/9k/k10/acpi.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/acpi.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1176 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "adr.h" + +#include "apic.h" +#include "acpi.h" +#include + +typedef struct Rsd Rsd; + +struct Rsd { + uchar sig[8]; + uchar csum; + uchar oemid[6]; + uchar rev; + uchar raddr[4]; + uchar len[4]; + uchar xaddr[8]; + uchar xcsum; + uchar reserved[3]; +}; + +enum { + Tblsz = 4+4+1+1+6+8+4+4+4, + Rdsz = 8+1+6+1+4+4+8+1+3, +}; + +static Rsd *rsd; +static int ntblpa; /* physical addresses visited by maptable() */ +static uintmem tblpa[64]; +static int ntblmap; /* successfully mapped tables */ +static Tbl *tblmap[64]; + Fadt fadt; + Acpicfg acpicfg; + +static int +checksum(void *v, int n) +{ + uchar *p, s; + + s = 0; + p = v; + while(n-- > 0) + s += *p++; + return s; +} + +static uint +get16(uchar *p) +{ + return (p[1]<<8)|p[0]; +} + +static uint +get32(uchar *p) +{ + return (p[3]<<24)|(p[2]<<16)|(p[1]<<8)|p[0]; +} + +static u64int +get64(uchar *p) +{ + return ((u64int)get32(p+4))|get32(p); +} + +static void +put16(uchar *p, int v) +{ + p[0] = v; + p[1] = v>>8; +} + +static void +put32(uchar *p, int v) +{ + p[0] = v; + p[1] = v>>8; + p[2] = v>>16; + p[3] = v>>24; +} + +static uint +tbldlen(Tbl *t) +{ + return get32(t->len) - Tblsz; +} + +Tbl* +acpigettbl(void *sig) +{ + int i; + for(i=0; isig, sig, 4) == 0) + return tblmap[i]; + return nil; +} + +#define vunmap(a,b) +#define vmapoverlap vmap + +static void +acpimaptbl(u64int xpa) +{ + uchar *p, *e; + int i; + uintmem pa; + u32int l; + Tbl *t; + + pa = xpa; + if(pa != xpa || pa == 0) + return; + if(ntblpa >= nelem(tblpa) || ntblmap >= nelem(tblmap)) + return; + for(i=0; ilen); + if(l < Tblsz){ + vunmap(t, 8); + return; + } + vunmap(t, 8); + if((t = vmapoverlap(pa, l)) == nil) + return; + if(checksum(t, l)){ + vunmap(t, l); + return; + } + tblmap[ntblmap++] = t; + + p = (uchar*)t; + e = p + l; + if(memcmp("RSDT", t->sig, 4) == 0){ + for(p = t->data; p+3 < e; p += 4) + acpimaptbl(get32(p)); + } + else if(memcmp("XSDT", t->sig, 4) == 0){ + for(p = t->data; p+7 < e; p += 8) + acpimaptbl(get64(p)); + } + else if(memcmp("FACP", t->sig, 4) == 0){ + if(l < 44) + return; + acpimaptbl(get32(p + 40)); + if(l < 148) + return; + acpimaptbl(get64(p + 140)); + } +} + +static void* +rsdscan(uchar* addr, int len, char* signature) +{ + int sl; + uchar *e, *p; + + e = addr+len; + sl = strlen(signature); + for(p = addr; p+sl < e; p += 16){ + if(memcmp(p, signature, sl)) + continue; + return p; + } + + return nil; +} + +static void* +rsdsearch(char* signature) +{ + uintptr p; + uchar *bda; + Rsd *rsd; + + /* + * Search for the data structure signature: + * 1) in the first KB of the EBDA; + * 2) in the BIOS ROM between 0xE0000 and 0xFFFFF. + */ + if(strncmp((char*)KADDR(0xFFFD9), "EISA", 4) == 0){ + bda = BIOSSEG(0x40); + if((p = (bda[0x0F]<<8)|bda[0x0E])){ + if(rsd = rsdscan(KADDR(p), 1024, signature)) + return rsd; + } + } + return rsdscan(BIOSSEG(0xE000), 0x20000, signature); +} + +static void +rsdload(void) +{ + if((rsd = rsdsearch("RSD PTR ")) == nil) + panic("acpi: no rsd ptr"); + if(checksum(rsd, 20) && checksum(rsd, 36)) + panic("acpi: acpi checksum"); +} + +static void +maptables(void) +{ + if(ntblmap > 0 || ntblpa > 0) + return; + rsdload(); + if(rsd->rev >= 2 && !checksum(rsd, 36)) + acpimaptbl(get64(rsd->xaddr)); + else if(!checksum(rsd, 20)) + acpimaptbl(get32(rsd->raddr)); +} + +enum { + Iointr, + Lintr, + + MTint = 0, /* fake interrupt type, equivalent to fixed */ +}; + +static u32int +apicmkintr(uint src, uint inttype, int polarity, int trigger, uint apicno, uint intin) +{ + u32int v; + IOapic *ioapic; + Lapic *lapic; + + /* + * Check valid bus, interrupt input pin polarity + * and trigger mode. If the APIC ID is 0xff it means + * all APICs of this type so those checks for useable + * APIC and valid INTIN must also be done later in + * the appropriate init routine in that case. It's hard + * to imagine routing a signal to all IOAPICs, the + * usual case is routing NMI and ExtINT to all LAPICs. + */ + if(apicno != 0xff){ + if(Napic < 256 && apicno >= Napic){ + print("apic: id out-of-range: %d\n", apicno); + return 0; + } + switch(src){ + default: + print("apic: intin botch: %d\n", intin); + return 0; + case Iointr: + if((ioapic = ioapiclookup(apicno)) == nil){ + print("ioapic%d: ioapic unusable\n", apicno); + return 0; + } + if(intin >= ioapic->nrdt){ + print("ioapic%d: intin %d >= nrdt %d\n", apicno, intin, ioapic->nrdt); + return 0; + } + break; + case Lintr: + if((lapic = lapiclookup(apicno)) == nil){ + print("lapic%d: lapic unusable\n", apicno); + return 0; + } + if(intin >= nelem(lapic->lvt)){ + print("lapic%d: intin beyond lvt: %d\n", apicno, intin); + return 0; + } + USED(lapic); + break; + } + } + + /* + * Create the low half of the vector table entry (LVT or RDT). + * For the NMI, SMI and ExtINT cases, the polarity and trigger + * are fixed (but are not always consistent over IA-32 generations). + * For the INT case, either the polarity/trigger are given or + * it defaults to that of the source bus; + * whether INT is Fixed or Lowest Priority is left until later. + */ + v = Im; + switch(inttype){ + default: + print("apic: bad irq type %d\n", inttype); + return 0; + case MTint: /* INT (fake type, same as fixed) */ + v |= polarity | trigger; + break; + case MTnmi: /* NMI */ + case MTsmi: /* SMI */ + case MTei: /* ExtINT */ + v |= TMedge|IPhigh|inttype; + break; + } + + return v; +} + +int +flagstopolarity(int bustype, int flags) +{ + switch(flags & 3){ + case 1: + return IPhigh; + case 3: + return IPlow; + case 2: + return -1; + } + switch(bustype){ + case BusISA: + return IPhigh; + case BusPCI: + return IPlow; + break; + default: + return -1; + } +} + +int +flagstotrigger(int bustype, int flags) +{ + switch((flags>>2) & 3){ + case 1: + return TMedge; + case 3: + return TMlevel; + case 2: + return -1; + } + switch(bustype){ + case BusISA: + return TMedge; + case BusPCI: + return TMlevel; + break; + default: + return -1; + } +} + +static void +addirq(int gsi, int bustype, int busno, int irq, int flags) +{ + uint apicno, intin, polarity, trigger; + u32int i; + + if((apicno = gsitoapicid(gsi, &intin)) == -1){ + DBG("acpi: addirq: no apic for gsi %d bus type=%d busno %d\n", gsi, bustype, busno); + return; + } + DBG("addirq: gsi %d %s busno %d irq %d flags %.8ux\n", + gsi, bustype == BusPCI? "pci": "isa", busno, irq, flags); + polarity = flagstopolarity(bustype, flags); + trigger = flagstotrigger(bustype, flags); + if(polarity == -1 || trigger == -1){ + print("addirq: bad polarity: gsi %d %s busno %d irq %d flags %.8ux\n", + gsi, bustype == BusPCI? "pci": "isa", busno, irq, flags); + return; + } + + i = apicmkintr(Iointr, MTint, polarity, trigger, apicno, intin); + ioapicintrinit(bustype, busno, apicno, intin, irq, i); +} + +static char* +eisaid(void *v) +{ + uint b, l; + int i; + static char id[8]; + + if(amltag(v) == 's') + return v; + b = amlint(v); + for(l = 0, i=24; i>=0; i -= 8, b >>= 8) + l |= (b & 0xFF) << i; + id[7] = 0; + for(i=6; i>=3; i--, l >>= 4) + id[i] = "0123456789ABCDEF"[l & 0xF]; + for(i=2; i>=0; i--, l >>= 5) + id[i] = '@' + (l & 0x1F); + return id; +} + +/*static*/ int +pcibusno(void *dot) +{ + int bno, adr, tbdf; + Pcidev *pdev; + void *p, *x; + char *id; + + id = nil; + if((x = amlwalk(dot, "^_HID")) != nil) + if((p = amlval(x)) != nil) + id = eisaid(p); + if((x = amlwalk(dot, "^_BBN")) == nil) + if((x = amlwalk(dot, "^_ADR")) == nil) + return -1; + if((p = amlval(x)) == nil) + return -1; + adr = amlint(p); + /* if root bridge, then we are done here */ + if(id != nil && (strcmp(id, "PNP0A03")==0 || strcmp(id, "PNP0A08")==0)) + return adr; + x = amlwalk(dot, "^"); + if(x == nil || x == dot) + return -1; + if((bno = pcibusno(x)) < 0) + return -1; + tbdf = MKBUS(BusPCI, bno, adr>>16, adr&0xFFFF); + pdev = pcimatchtbdf(tbdf); + if(pdev == nil) + return -1; + if(pdev->bridge == nil) + return bno; + return BUSBNO(pdev->bridge->tbdf); +} + +static int +getirqs(void *d, uchar pmask[32], int *pflags) +{ + int i, n, m; + uchar *p; + + *pflags = 0; + memset(pmask, 0, 32); + if(amltag(d) != 'b') + return -1; + p = amlval(d); + if(amllen(d) >= 2 && (p[0] == 0x22 || p[0] == 0x23)){ + pmask[0] = p[1]; + pmask[1] = p[2]; + if(amllen(d) >= 3 && p[0] == 0x23) + *pflags = p[3]; + return 0; + } + if(amllen(d) >= 5 && p[0] == 0x89){ + n = p[4]; + if(amllen(d) < 5+n*4) + return -1; + for(i=0; i= 0 && m < 256) + pmask[m/8] |= 1<<(m%8); + } + *pflags = p[3]; + return 0; + } + return -1; +} + +static uchar* +setirq(void *d, uint irq) +{ + uchar *p; + + if(amltag(d) != 'b') + return nil; + p = amlnew('b', amllen(d)); + memmove(p, d, amllen(p)); + if(p[0] == 0x22 || p[0] == 0x23) + put16(p, 1< 0 || getconf("*nopcirouting") != nil) + return gsi; + + for(i=0; i<256; i++){ + gsi = lastirq++ & 0xFF; /* round robin */ + im = 1<<(gsi%8); + if(pm[gsi/8] & im){ + if((c = setirq(r, gsi)) == nil) + break; + if(amleval(amlwalk(link, "_SRS"), "b", c, nil) < 0) + break; + return gsi; + } + } + return -1; +} + +static int +enumprt(void *dot, void *) +{ + void *p, **a, **b; + int bno, dno, pin, gsi, flags, n, i; + + bno = pcibusno(dot); + if(bno < 0){ + DBG("enumprt: pci not found %V\n", dot); + return 1; + } + + /* evalulate _PRT method */ + p = nil; + if(amleval(dot, "", &p) < 0) + return 1; + if(amltag(p) != 'p') + return 1; + + amltake(p); + n = amllen(p); + a = amlval(p); + for(i=0; i>16; + pin = amlint(b[1]); + gsi = amlint(b[3]); + if(gsi == 0){ + print("acpi: gsi is zero ... setuplink\n"); + gsi = setuplink(b[2], &flags); + if(gsi <= 0) + continue; + } + addirq(gsi, BusPCI, bno, (dno<<2)|pin, 0); + } + amldrop(p); + return 1; +} + +static void +loadtbls(char *name, int all) +{ + int i; + Tbl *t; + + for(i = 0; i < ntblmap; i++){ + t = tblmap[i]; + if(memcmp(t->sig, name, 4) == 0){ + amlload(t->data, tbldlen(t)); + if(!all) + break; + } + } +} + +static long +readtbls(Chan*, void *v, long n, vlong o) +{ + int i, l, m; + uchar *p; + Tbl *t; + + maptables(); + + p = v; + for(i=0; n > 0 && i < ntblmap; i++){ + t = tblmap[i]; + l = get32(t->len); + if(o >= l){ + o -= l; + continue; + } + m = l - o; + if(m > n) + m = n; + memmove(p, (uchar*)t + o, m); + p += m; + n -= m; + o = 0; + } + return p - (uchar*)v; +} + +enum { + Lapicen = 1, +}; + +typedef struct Parsedat Parsedat; +struct Parsedat { + int maxmach; /* for the apic structure */ +}; + +static void +parseapic(Tbl *t, Parsedat *dat) +{ + uchar *p, *e; + int i, c, nmach, maxmach; + uintmem lapicbase; + + maxmach = dat->maxmach; + + /* set APIC mode */ + amleval(amlwalk(amlroot, "_PIC"), "i", 1, nil); + + p = t->data; + e = p + tbldlen(t); + lapicbase = get32(p); + p += 8; + + nmach = 0; + for(; p < e; p += c){ + c = p[1]; + if(c < 2 || (p+c) > e) + break; + switch(*p){ + case 0x00: /* Processor Local APIC */ + if(p[4] & Lapicen && nmach < maxmach){ + lapicinit(p[3], lapicbase, nmach==0); + ++nmach; + } + break; + case 0x01: /* I/O APIC */ + ioapicinit(p[2], get32(p+8), get32(p+4)); + break; + case 0x02: /* Interrupt Source Override */ + addirq(get32(p+4), BusISA, 0, p[3], get16(p+8)); + break; + case 0x03: /* NMI Source */ + print("acpi: ignoring nmi source\n"); + break; + case 0x04: /* Local APIC NMI */ + DBG("acpi: lapic nmi %.2ux flags %.4ux lint# %d (ignored)\n", + p[2], (uint)get16(p+3), p[5]); + break; + case 0x05: /* Local APIC Address Override */ + case 0x06: /* I/O SAPIC */ + case 0x07: /* Local SAPIC */ + case 0x08: /* Platform Interrupt Sources */ + case 0x09: /* Processor Local x2APIC */ + case 0x0A: /* x2APIC NMI */ + case 0x0B: /* GIC */ + case 0x0C: /* GICD */ + print("acpi: ignoring entry: %.2ux\n", *p); + break; + } + } + + /* look for PCI interrupt mappings */ + amlenum(amlroot, "_PRT", enumprt, nil); + + /* add identity mapped legacy isa interrupts */ + for(i=0; i<16; i++) + addirq(i, BusISA, 0, i, 0); + + DBG("acpiinit: %d maches\n", nmach); +} + +static void +parsesrat(Tbl *t, Parsedat*) +{ + uchar *p, *e; + + e = t->data + tbldlen(t); + for(p = t->data + 12; p < e; p += p[1]){ + switch(p[0]){ + case 0: /* local apic */ + if(get32(p+4)&1) + lapicsetdom(p[3], p[2] | p[9]<<24| p[10]<<16 | p[11]<<8); + break; + case 1: /* memory affinity */ + if(get32(p+28)&1) + memaffinity(get64(p+8), get64(p+16), get32(p+2), p[0]); + break; + case 2: /* x2apic */ + if(get32(p+12)&1) + lapicsetdom(get32(p+8), get32(p+4)); + break; + default: + print("acpi: SRAT type %.2ux unknown\n", p[0]); + break; + } + } +} + +static char* regnames[] = { + "mem", "io", "pcicfg", "embed", + "smb", "cmos", "pcibar", +}; + +static int +Gfmt(Fmt* f) +{ + Gas *g; + + g = va_arg(f->args, Gas*); + switch(g->spc){ + case MemSpace: + case IoSpace: + case EbctlSpace: + case SmbusSpace: + case CmosSpace: + case PcibarSpace: + case IpmiSpace: + fmtprint(f, "[%s", regnames[g->spc]); + break; + case PcicfgSpace: + fmtprint(f, "[pci %T", (int)g->addr); + break; + case FixedhwSpace: + fmtprint(f, "[hw"); + break; + default: + fmtprint(f, "[%#ux", g->spc); + break; + } + fmtprint(f, " %#llux", g->addr); + if(g->off != 0) + fmtprint(f, "+%d", g->off); + fmtprint(f, " len %d", g->len); + if(g->accsz != 0) + fmtprint(f, " accsz %d", g->accsz); + return fmtprint(f, "]"); +} + +static void +gasget(Gas *gas, uchar *p) +{ + gas->spc = p[0]; + gas->len = p[1]; + gas->off = p[2]; + gas->accsz = p[3]; + gas->addr = get64(p+4); +} + +static int +loadfacs(uintmem pa) +{ + USED(pa); + return 0; +} + +static long +readfadt(Chan*, void *a, long n, vlong o) +{ + char *s, *p, *e; + Fadt *f; + + s = smalloc(READSTR); + if(waserror()){ + free(s); + nexterror(); + } + p = s; + e = s+READSTR; + f = &fadt; + + p = seprint(p, e, "facs %#ux\n", f->facs); + p = seprint(p, e, "dsdt %#ux\n", f->dsdt); + p = seprint(p, e, "pmprofile %#ux\n", f->pmprofile); + p = seprint(p, e, "sciint %d\n", f->sciint); + p = seprint(p, e, "smicmd %#ux\n", f->smicmd); + p = seprint(p, e, "acpienable %#ux\n", f->acpienable); + p = seprint(p, e, "acpidisable %#ux\n", f->acpidisable); + p = seprint(p, e, "s4biosreq %#ux\n", f->s4biosreq); + p = seprint(p, e, "pstatecnt %#ux\n", f->pstatecnt); + p = seprint(p, e, "pm1aevtblk %#ux\n", f->pm1aevtblk); + p = seprint(p, e, "pm1bevtblk %#ux\n", f->pm1bevtblk); + p = seprint(p, e, "pm1acntblk %#ux\n", f->pm1acntblk); + p = seprint(p, e, "pm1bcntblk %#ux\n", f->pm1bcntblk); + p = seprint(p, e, "pm2cntblk %#ux\n", f->pm2cntblk); + p = seprint(p, e, "pmtmrblk %#ux\n", f->pmtmrblk); + p = seprint(p, e, "gpe0blk %#ux\n", f->gpe0blk); + p = seprint(p, e, "gpe1blk %#ux\n", f->gpe1blk); + p = seprint(p, e, "pm1evtlen %#ux\n", f->pm1evtlen); + p = seprint(p, e, "pm1cntlen %#ux\n", f->pm1cntlen); + p = seprint(p, e, "pm2cntlen %#ux\n", f->pm2cntlen); + p = seprint(p, e, "pmtmrlen %#ux\n", f->pmtmrlen); + p = seprint(p, e, "gpe0blklen %#ux\n", f->gpe0blklen); + p = seprint(p, e, "gpe1blklen %#ux\n", f->gpe1blklen); + p = seprint(p, e, "gp1base %#ux\n", f->gp1base); + p = seprint(p, e, "cstcnt %#ux\n", f->cstcnt); + p = seprint(p, e, "plvl2lat %#ux\n", f->plvl2lat); + p = seprint(p, e, "plvl3lat %#ux\n", f->plvl3lat); + p = seprint(p, e, "flushsz %#ux\n", f->flushsz); + p = seprint(p, e, "flushstride %#ux\n", f->flushstride); + p = seprint(p, e, "dutyoff %#ux\n", f->dutyoff); + p = seprint(p, e, "dutywidth %#ux\n", f->dutywidth); + p = seprint(p, e, "dayalrm %#ux\n", f->dayalrm); + p = seprint(p, e, "monalrm %#ux\n", f->monalrm); + p = seprint(p, e, "century %#ux\n", f->century); + p = seprint(p, e, "iapcbootarch %#ux\n", f->iapcbootarch); + p = seprint(p, e, "flags %#ux\n", f->flags); + p = seprint(p, e, "resetreg %G\n", &f->resetreg); + if(f->rev >= 3){ + p = seprint(p, e, "resetval %#ux\n", f->resetval); + p = seprint(p, e, "xfacs %#llux\n", f->xfacs); + p = seprint(p, e, "xdsdt %#llux\n", f->xdsdt); + p = seprint(p, e, "xpm1aevtblk %G\n", &f->xpm1aevtblk); + p = seprint(p, e, "xpm1bevtblk %G\n", &f->xpm1bevtblk); + p = seprint(p, e, "xpm1acntblk %G\n", &f->xpm1acntblk); + p = seprint(p, e, "xpm1bcntblk %G\n", &f->xpm1bcntblk); + p = seprint(p, e, "xpm2cntblk %G\n", &f->xpm2cntblk); + p = seprint(p, e, "xpmtmrblk %G\n", &f->xpmtmrblk); + p = seprint(p, e, "xgpe0blk %G\n", &f->xgpe0blk); + p = seprint(p, e, "xgpe1blk %G\n", &f->xgpe1blk); + } + USED(p); + + n = readstr(o, a, n, s); + poperror(); + free(s); + return n; +} + +static void +parsefadt(Tbl *t, Parsedat*) +{ + uchar *p; + Fadt *f; + + p = (uchar*)t; + f = &fadt; + f->rev = t->rev; + f->facs = get32(p + 36); + f->dsdt = get32(p + 40); + f->pmprofile = p[45]; + f->sciint = get16(p+46); + f->smicmd = get32(p+48); + f->acpienable = p[52]; + f->acpidisable = p[53]; + f->s4biosreq = p[54]; + f->pstatecnt = p[55]; + f->pm1aevtblk = get32(p+56); + f->pm1bevtblk = get32(p+60); + f->pm1acntblk = get32(p+64); + f->pm1bcntblk = get32(p+68); + f->pm2cntblk = get32(p+72); + f->pmtmrblk = get32(p+76); + f->gpe0blk = get32(p+80); + f->gpe1blk = get32(p+84); + f->pm1evtlen = p[88]; + f->pm1cntlen = p[89]; + f->pm2cntlen = p[90]; + f->pmtmrlen = p[91]; + f->gpe0blklen = p[92]; + f->gpe1blklen = p[93]; + f->gp1base = p[94]; + f->cstcnt = p[95]; + f->plvl2lat = get16(p+96); + f->plvl3lat = get16(p+98); + f->flushsz = get16(p+100); + f->flushstride = get16(p+102); + f->dutyoff = p[104]; + f->dutywidth = p[105]; + f->dayalrm = p[106]; + f->monalrm = p[107]; + f->century = p[108]; + f->iapcbootarch = get16(p+109); + f->flags = get32(p+112); + gasget(&f->resetreg, p+116); + + if(f->rev >= 3){ + f->resetval = p[128]; + f->xfacs = get64(p+132); + f->xdsdt = get64(p+140); + gasget(&f->xpm1aevtblk, p+148); + gasget(&f->xpm1bevtblk, p+160); + gasget(&f->xpm1acntblk, p+172); + gasget(&f->xpm1bcntblk, p+184); + gasget(&f->xpm2cntblk, p+196); + gasget(&f->xpmtmrblk, p+208); + gasget(&f->xgpe0blk, p+220); + gasget(&f->xgpe1blk, p+232); + } + +// dumpfadt(f); + if(f->xfacs != 0) + loadfacs(f->xfacs); + else + loadfacs(f->facs); +} + +typedef struct Hpet Hpet; +struct Hpet { + uchar id[4]; + uchar addr[12]; /* gas */ + uchar seqno; + uchar minticks[2]; + uchar attr; /* Page Protection */ +}; + +static void +parsehpet(Tbl *t, Parsedat*) +{ + int minticks; + Hpet *h; + Gas g; + + h = (Hpet*)t->data; + gasget(&g, h->addr); + minticks = get16(h->minticks); + + DBG("acpi: hpet id %#ux addr %d %d %d %d %#p seqno %d ticks %d attr %#ux\n", + get32(h->id), g.spc, g.len, g.off, g.accsz, + g.addr, h->seqno, minticks, h->attr); + hpetinit(get32(h->id), h->seqno, g.addr, minticks); +} + +typedef struct Mcfg Mcfg; +struct Mcfg +{ + u64int base; /* base of enhanced configuration mechanism */ + u16int pciseg; /* pci segment group number */ + u8int start; /* first pci bus number covered by this */ + u8int end; /* last pci bus number covered */ + /* 4 reserved */ +}; +static Mcfg mcfg[8]; +static int nmcfg; + +static void +parsemcfg(Tbl *t, Parsedat*) +{ + uchar *p, *e; + Mcfg *mc; + + if(nmcfg == nelem(mcfg)) + return; + mc = &mcfg[nmcfg++]; + e = t->data + tbldlen(t); + p = t->data + 8; /* reserved */ +// mc->nbus = 0; + for(; p < e; p += 16){ + mc->base = get64(p+0); + mc->pciseg = get16(p+8); + mc->start = p[10]; + mc->end = p[11]; +print("MCFG: %d-%d %d %#P\n", mc->start, mc->end, mc->pciseg, mc->base); + } +} + +uintmem +pcixcfgspace(int b) +{ + Mcfg *m; + int i; + + for(i = 0; i < nmcfg; i++){ + m = &mcfg[i]; + if(m->start <= b && b <= m->end) + return m->base; + } + return 0; +} + +enum { + Blegacy = 1<<0, + B8042kbd = 1<<1, + Bnovga = 1<<2, + Bnomsi = 1<<3, + Bnocmos = 1<<4, +}; + +static void +iapcbootarch(void) +{ + int i; + + i = fadt.iapcbootarch; + + ioconf.nolegacyprobe = !(i&Blegacy); + ioconf.noi8042kbd = !(i&B8042kbd); + ioconf.novga = i&Bnovga; + ioconf.nomsi = i&Bnomsi; + ioconf.nocmos = i&Bnocmos; +} + +typedef struct Ptab Ptab; +struct Ptab { + char *sig; + void (*parse)(Tbl*, Parsedat*); + int required; +}; + +static Ptab ptab[] = { + "APIC", parseapic, 1, + "SRAT", parsesrat, 0, + "FACP", parsefadt, 0, + "HPET", parsehpet, 0, + "MCFG", parsemcfg, 0, +}; + +static void +parsetables(Parsedat *dat) +{ + int i; + Tbl *t; + Ptab *p; + + print("acpi parse: "); + for(i = 0; i < nelem(ptab); i++){ + p = ptab + i; + if((t = acpigettbl(p->sig)) != nil){ + p->parse(t, dat); + print("%s ", p->sig); + }else if(p->required) + panic("acpi: parsetables: no %s table\n", p->sig); + } + print("\n"); +} + +int +checkpnpid(void *dot, char *pnpid) +{ + char *id; + void *p, *x; + + if(dot == nil) + return -1; + id = nil; + if((x = amlwalk(dot, "_HID")) != nil) + if((p = amlval(x)) != nil) + id = eisaid(p); + if(id == nil) + return -1; + return strcmp(id, pnpid); +} + +enum { + Present = 1<<0, + Decode = 1<<1, + Funct = 1<<3, + Staok = Present | Funct, +}; + +int +staok(void *dot) +{ + int b; + void *p, *x; + + if((x = amlwalk(dot, "_STA")) == nil) + return 0; + if(amleval(x, "", &p) != 0) + return 0; + b = amlint(p); + return b & Staok; +} + +static int +evalini(void *dot) +{ + int b; + void *p, *x; + + b = staok(dot); + if((b & Present) == 0) + return -1; + if((x = amlwalk(dot, "_INI")) != nil){ + if(amleval(x, "", &p) == 0) + print("eval _INI → %V\n", p); + } + return 0; +} + +int +cfgpwerb(void) +{ + void *dot; + + dot = amlwalk(amlroot, "\\_SB_.PWRB"); + if(checkpnpid(dot, "PNP0C0C") != 0) + return 0; +// print("PWRB %V\n", dot); + if(evalini(dot) == -1){ + print("PWRB evalini fails\n"); + return 0; + } + return 1; +} + +static void +cfgsleep(void) +{ + char buf[16]; + uint *t, i; + void **v; + + /* default soft off values */ + t = acpicfg.sval[5]; + t[0] = 7; + t[1] = 0; + + /* look for the proper ones */ + for(i = 0; i < 6; i++){ + t = acpicfg.sval[i]; + snprint(buf, sizeof buf, "\\_S%d_", i); + v = amlval(amlwalk(amlroot, buf)); + if(v != nil && amltag(v) == 'p' && amllen(v) == 4){ + t[0] = amlint(v[0]); + t[1] = amlint(v[1]); + } + } +} + +void +acpiinit(int maxmach) +{ + int i; + Parsedat dat; + + print("acpiinit\n"); + fmtinstall('G', Gfmt); + maptables(); + amlinit(); + loadtbls("DSDT", 0); + loadtbls("SSDT", 1); + + memset(&dat, 0, sizeof dat); + dat.maxmach = maxmach; + parsetables(&dat); + if(fadt.smicmd != 0) + iapcbootarch(); + + cfgpwerb(); + cfgsleep(); + + /* free the AML interpreter */ + amlexit(); + + addarchfile("acpifadt", 0444, readfadt, nil); /* hack */ + addarchfile("acpitbls", 0444, readtbls, nil); + if(1||DBGFLG){ + print("acpi load: "); + for(i = 0; i < ntblmap; i++) + print("%.4s ", (char*)tblmap[i]->sig); + print("\n"); + } + lapicdump(); + iordtdump(); +} + +int +corecolor(int) +{ + return 0; +} diff -Nru /sys/src/9k/k10/acpi.h /sys/src/9k/k10/acpi.h --- /sys/src/9k/k10/acpi.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/acpi.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,129 @@ +typedef struct Fadt Fadt; +typedef struct Gas Gas; +typedef struct Tbl Tbl; + +typedef struct Acpicfg Acpicfg; + +/* + * Header for ACPI description tables + */ +struct Tbl { + uchar sig[4]; /* e.g. "FACP" */ + uchar len[4]; + uchar rev; + uchar csum; + uchar oemid[6]; + uchar oemtblid[8]; + uchar oemrev[4]; + uchar creatorid[4]; + uchar creatorrev[4]; + uchar data[]; +}; + +/* + * Generic address structure. + */ +struct Gas +{ + uchar spc; /* address space id */ + uchar len; /* register size in bits */ + uchar off; /* bit offset */ + uchar accsz; /* 1: byte; 2: word; 3: dword; 4: qword */ + u64int addr; /* address (or acpi encoded tbdf + reg) */ +}; + +/* + * Fixed ACPI description table. + */ +struct Fadt { + int rev; + + u32int facs; + u32int dsdt; + uchar pmprofile; + u16int sciint; + u32int smicmd; + uchar acpienable; + uchar acpidisable; + uchar s4biosreq; + uchar pstatecnt; + u32int pm1aevtblk; + u32int pm1bevtblk; + u32int pm1acntblk; + u32int pm1bcntblk; + u32int pm2cntblk; + u32int pmtmrblk; + u32int gpe0blk; + u32int gpe1blk; + uchar pm1evtlen; + uchar pm1cntlen; + uchar pm2cntlen; + uchar pmtmrlen; + uchar gpe0blklen; + uchar gpe1blklen; + uchar gp1base; + uchar cstcnt; + u16int plvl2lat; + u16int plvl3lat; + u16int flushsz; + u16int flushstride; + uchar dutyoff; + uchar dutywidth; + uchar dayalrm; + uchar monalrm; + uchar century; + u16int iapcbootarch; + u32int flags; + Gas resetreg; + uchar resetval; + u64int xfacs; + u64int xdsdt; + Gas xpm1aevtblk; + Gas xpm1bevtblk; + Gas xpm1acntblk; + Gas xpm1bcntblk; + Gas xpm2cntblk; + Gas xpmtmrblk; + Gas xgpe0blk; + Gas xgpe1blk; +}; +#pragma varargck type "G" Gas* + +struct Acpicfg { + uint sval[6][2]; /* p1a.ctl, p1b.ctl */ +}; + +Tbl* acpigettbl(void*); + +extern Fadt fadt; +extern Acpicfg acpicfg; + +extern void hpetinit(uint, uint, uintmem, int); + +enum{ + MemHotPlug= 1<<1, + MemNonVolatile= 1<<2, +}; +extern void memaffinity(u64int, u64int, u32int, int); + +/* + * ACPI 4.0 E820 AddressRange types (table 14-1) + */ +enum { + AddrsNone = 0, + AddrsMemory = 1, + AddrsReserved = 2, + AddrsACPI = 3, + AddrsNVS = 4, + AddrsUnusable = 5, + AddrsDisabled = 6, + + AddrsDEV = 9, /* our internal code */ + + AddrsNVDIMM = 0x5a, /* Viking NVDIMM */ + + /* extended attribute flags, not currently used */ + AddrsNonVolatile = 1<<1, + AddrsSlowAccess = 1<<2, + AddrsErrorLog = 1<<3, +}; diff -Nru /sys/src/9k/k10/amd64.h /sys/src/9k/k10/amd64.h --- /sys/src/9k/k10/amd64.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/amd64.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,209 @@ +enum { /* Cr0 */ + Pe = 0x00000001, /* Protected Mode Enable */ + Mp = 0x00000002, /* Monitor Coprocessor */ + Em = 0x00000004, /* Emulate Coprocessor */ + Ts = 0x00000008, /* Task Switched */ + Et = 0x00000010, /* Extension Type */ + Ne = 0x00000020, /* Numeric Error */ + Wp = 0x00010000, /* Write Protect */ + Am = 0x00040000, /* Alignment Mask */ + Nw = 0x20000000, /* Not Writethrough */ + Cd = 0x40000000, /* Cache Disable */ + Pg = 0x80000000, /* Paging Enable */ +}; + +enum { /* Cr3 */ + Pwt = 0x00000008, /* Page-Level Writethrough */ + Pcd = 0x00000010, /* Page-Level Cache Disable */ +}; + +enum { /* Cr4 */ + Vme = 0x00000001, /* Virtual-8086 Mode Extensions */ + Pvi = 0x00000002, /* Protected Mode Virtual Interrupts */ + Tsd = 0x00000004, /* Time-Stamp Disable */ + De = 0x00000008, /* Debugging Extensions */ + Pse = 0x00000010, /* Page-Size Extensions */ + Pae = 0x00000020, /* Physical Address Extension */ + Mce = 0x00000040, /* Machine Check Enable */ + Pge = 0x00000080, /* Page-Global Enable */ + Pce = 0x00000100, /* Performance Monitoring Counter Enable */ + Osfxsr = 0x00000200, /* FXSAVE/FXRSTOR Support */ + Osxmmexcpt = 0x00000400, /* Unmasked Exception Support */ +}; + +enum { /* cpuid fn 1 dx */ + Pat = 1<<16, /* page table attributes (memory type control */ +}; + +enum { /* Rflags */ + Cf = 0x00000001, /* Carry Flag */ + Pf = 0x00000004, /* Parity Flag */ + Af = 0x00000010, /* Auxiliary Flag */ + Zf = 0x00000040, /* Zero Flag */ + Sf = 0x00000080, /* Sign Flag */ + Tf = 0x00000100, /* Trap Flag */ + If = 0x00000200, /* Interrupt Flag */ + Df = 0x00000400, /* Direction Flag */ + Of = 0x00000800, /* Overflow Flag */ + Iopl0 = 0x00000000, /* I/O Privilege Level */ + Iopl1 = 0x00001000, + Iopl2 = 0x00002000, + Iopl3 = 0x00003000, + Nt = 0x00004000, /* Nested Task */ + Rf = 0x00010000, /* Resume Flag */ + Vm = 0x00020000, /* Virtual-8086 Mode */ + Ac = 0x00040000, /* Alignment Check */ + Vif = 0x00080000, /* Virtual Interrupt Flag */ + Vip = 0x00100000, /* Virtual Interrupt Pending */ + Id = 0x00200000, /* ID Flag */ +}; + +enum { /* MSRs */ + PerfEvtbase = 0xc0010000, /* Performance Event Select */ + PerfCtrbase = 0xc0010004, /* Performance Counters */ + + Efer = 0xc0000080, /* Extended Feature Enable */ + Star = 0xc0000081, /* Legacy Target IP and [CS]S */ + Lstar = 0xc0000082, /* Long Mode Target IP */ + Cstar = 0xc0000083, /* Compatibility Target IP */ + Sfmask = 0xc0000084, /* SYSCALL Flags Mask */ + FSbase = 0xc0000100, /* 64-bit FS Base Address */ + GSbase = 0xc0000101, /* 64-bit GS Base Address */ + KernelGSbase = 0xc0000102, /* SWAPGS instruction */ +}; + +enum { /* Efer */ + Sce = 0x00000001, /* System Call Extension */ + Lme = 0x00000100, /* Long Mode Enable */ + Lma = 0x00000400, /* Long Mode Active */ + Nxe = 0x00000800, /* No-Execute Enable */ + Svme = 0x00001000, /* SVM Extension Enable */ + Ffxsr = 0x00004000, /* Fast FXSAVE/FXRSTOR */ +}; + +enum { /* PML4E/PDPE/PDE/PTE */ + PteP = 0x0000000000000001ull,/* Present */ + PteRW = 0x0000000000000002ull,/* Read/Write */ + PteU = 0x0000000000000004ull,/* User/Supervisor */ + PtePWT = 0x0000000000000008ull,/* Page-Level Write Through */ + PtePCD = 0x0000000000000010ull,/* Page Level Cache Disable */ + PteA = 0x0000000000000020ull,/* Accessed */ + PteD = 0x0000000000000040ull,/* Dirty */ + PtePS = 0x0000000000000080ull,/* Page Size */ + Pte4KPAT = PtePS, /* PTE PAT */ + PteG = 0x0000000000000100ull,/* Global */ + Pte2MPAT = 0x0000000000001000ull,/* PDE PAT */ + Pte1GPAT = Pte2MPAT, /* PDPE PAT */ + PteNX = 0x8000000000000000ull,/* No Execute */ +}; + +enum { + PATUC = 0, /* uncachable */ + PATWC = 1, /* use write-combining buffers */ + PATWT = 4, /* write-through */ + PATWP = 5, /* write protect */ + PATWB = 6, /* write back */ + PATUCMINUS = 7, /* UC-; strongly uncacheable */ +}; + +enum { /* Exceptions */ + IdtDE = 0, /* Divide-by-Zero Error */ + IdtDB = 1, /* Debug */ + IdtNMI = 2, /* Non-Maskable-Interrupt */ + IdtBP = 3, /* Breakpoint */ + IdtOF = 4, /* Overflow */ + IdtBR = 5, /* Bound-Range */ + IdtUD = 6, /* Invalid-Opcode */ + IdtNM = 7, /* Device-Not-Available */ + IdtDF = 8, /* Double-Fault */ + Idt09 = 9, /* unsupported */ + IdtTS = 10, /* Invalid-TSS */ + IdtNP = 11, /* Segment-Not-Present */ + IdtSS = 12, /* Stack */ + IdtGP = 13, /* General-Protection */ + IdtPF = 14, /* Page-Fault */ + Idt0F = 15, /* reserved */ + IdtMF = 16, /* x87 FPE-Pending */ + IdtAC = 17, /* Alignment-Check */ + IdtMC = 18, /* Machine-Check */ + IdtXF = 19, /* SIMD Floating-Point */ +}; + +/* + * Vestigial Segmented Virtual Memory. + */ +enum { /* Segment Descriptor */ + SdISTM = 0x0000000700000000ull,/* Interrupt Stack Table Mask */ + SdA = 0x0000010000000000ull,/* Accessed */ + SdR = 0x0000020000000000ull,/* Readable (Code) */ + SdW = 0x0000020000000000ull,/* Writeable (Data) */ + SdE = 0x0000040000000000ull,/* Expand Down */ + SdaTSS = 0x0000090000000000ull,/* Available TSS */ + SdbTSS = 0x00000b0000000000ull,/* Busy TSS */ + SdCG = 0x00000c0000000000ull,/* Call Gate */ + SdIG = 0x00000e0000000000ull,/* Interrupt Gate */ + SdTG = 0x00000f0000000000ull,/* Trap Gate */ + SdCODE = 0x0000080000000000ull,/* Code/Data */ + SdS = 0x0000100000000000ull,/* System/User */ + SdDPL0 = 0x0000000000000000ull,/* Descriptor Privilege Level */ + SdDPL1 = 0x0000200000000000ull, + SdDPL2 = 0x0000400000000000ull, + SdDPL3 = 0x0000600000000000ull, + SdP = 0x0000800000000000ull,/* Present */ + Sd4G = 0x000f00000000ffffull,/* 4G Limit */ + SdL = 0x0020000000000000ull,/* Long Attribute */ + SdD = 0x0040000000000000ull,/* Default Operand Size */ + SdG = 0x0080000000000000ull,/* Granularity */ +}; + +/* + * Performance Counter Configuration + */ +enum { /* Performance Event Selector */ + + PeHo = 0x0000020000000000ull,/* Host only */ + PeGo = 0x0000010000000000ull,/* Guest only */ + PeEvMskH = 0x0000000f00000000ull,/* Event mask H */ + PeCtMsk = 0x00000000ff000000ull,/* Counter mask */ + PeInMsk = 0x0000000000800000ull,/* Invert mask */ + PeCtEna = 0x0000000000400000ull,/* Counter enable */ + PeInEna = 0x0000000000100000ull,/* Interrupt enable */ + PePnCtl = 0x0000000000080000ull,/* Pin control */ + PeEdg = 0x0000000000040000ull,/* Edge detect */ + PeOS = 0x0000000000020000ull,/* OS mode */ + PeUsr = 0x0000000000010000ull,/* User mode */ + PeUnMsk = 0x000000000000ff00ull,/* Unit Mask */ + PeEvMskL = 0x00000000000000ffull,/* Event Mask L */ + + PeEvMsksh = 32ull, /* Event mask shift */ +}; + +enum { /* Segment Selector */ + SsRPL0 = 0x0000, /* Requestor Privilege Level */ + SsRPL1 = 0x0001, + SsRPL2 = 0x0002, + SsRPL3 = 0x0003, + SsTIGDT = 0x0000, /* GDT Table Indicator */ + SsTILDT = 0x0004, /* LDT Table Indicator */ + SsSIM = 0xfff8, /* Selector Index Mask */ +}; + +#define SSEL(si, tirpl) (((si)<<3)|(tirpl)) /* Segment Selector */ + +enum { + SiNULL = 0, /* NULL selector index */ + SiCS = 1, /* CS selector index */ + SiDS = 2, /* DS selector index */ + SiU32CS = 3, /* User CS selector index */ + SiUDS = 4, /* User DS selector index */ + SiUCS = 5, /* User CS selector index */ + SiFS = 6, /* FS selector index */ + SiGS = 7, /* GS selector index */ + SiTSS = 8, /* TSS selector index */ +}; + +/* + * Extern registers. + */ +#define RMACH R15 /* m-> */ +#define RUSER R14 /* up-> */ diff -Nru /sys/src/9k/k10/apic.h /sys/src/9k/k10/apic.h --- /sys/src/9k/k10/apic.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/apic.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,123 @@ +/* + * There are 2 flavours of APIC, Local APIC and IOAPIC, + * which don't necessarily share one APIC ID space. + * Each IOAPIC has a unique address, Local APICs are all + * at the same address as they can only be accessed by + * the local CPU. + */ +typedef struct IOapic IOapic; +typedef struct Lapic Lapic; + +struct IOapic { + int useable; /* en */ + + Lock; /* register access */ + u32int* addr; /* register base */ + uintmem paddr; /* register base */ + int nrdt; /* size of RDT */ + int gsib; /* global RDT index */ +}; + +struct Lapic { + int useable; /* en */ + int machno; + int dom; + u32int lvt[10]; + int nlvt; + int ver; /* unused */ + + vlong hz; /* APIC Timer frequency */ + vlong max; + vlong min; + vlong div; +}; + +enum { + Nbus = 256, + Napic = 254, /* xAPIC architectural limit */ + Nrdt = 128, +}; + +/* + * Common bits for + * IOAPIC Redirection Table Entry (RDT); + * APIC Local Vector Table Entry (LVT); + * APIC Interrupt Command Register (ICR). + * [10:8] Message Type + * [11] Destination Mode (RW) + * [12] Delivery Status (RO) + * [13] Interrupt Input Pin Polarity (RW) + * [14] Remote IRR (RO) + * [15] Trigger Mode (RW) + * [16] Interrupt Mask + */ +enum { + MTf = 0x00000000, /* Fixed */ + MTlp = 0x00000100, /* Lowest Priority */ + MTsmi = 0x00000200, /* SMI */ + MTrr = 0x00000300, /* Remote Read */ + MTnmi = 0x00000400, /* NMI */ + MTir = 0x00000500, /* INIT/RESET */ + MTsipi = 0x00000600, /* Startup IPI */ + MTei = 0x00000700, /* ExtINT */ + + Pm = 0x00000000, /* Physical Mode */ + Lm = 0x00000800, /* Logical Mode */ + + Ds = 0x00001000, /* Delivery Status */ + IPhigh = 0x00000000, /* IIPP High */ + IPlow = 0x00002000, /* IIPP Low */ + Rirr = 0x00004000, /* Remote IRR Status */ + TMedge = 0x00000000, /* Trigger Mode Edge */ + TMlevel = 0x00008000, /* Trigger Mode Level */ + Im = 0x00010000, /* Interrupt Mask */ +}; + +#define l16get(p) (((p)[1]<<8)|(p)[0]) +#define l32get(p) (((u32int)l16get(p+2)<<16)|l16get(p)) +#define l64get(p) (((u64int)l32get(p+4)<<32)|l32get(p)) + +void apictimerenab(void); +int gsitoapicid(int, uint*); + +void ioapicdump(void); +IOapic* ioapicinit(int, int, uintmem); +void ioapicintrinit(int, int, int, int, int, u32int); +IOapic* ioapiclookup(uint); +void ioapiconline(void); +void iordtdump(void); + +void lapicdump(void); +int lapiceoi(int); +void lapicinit(int, uintmem, int); +void lapicipi(int); +int lapicisr(int); +Lapic* lapiclookup(uint); +int lapiconline(void); +void lapicpri(int); +void lapicsetdom(int, int); +void lapicsipi(int, uintmem); + +int pcimsienable(Pcidev*, uvlong); +int pcimsimask(Pcidev*, int); + +/* + * lapic.c + */ +extern int lapiceoi(int); +extern void lapicinit(int, uintptr, int); +extern int lapicisr(int); +extern int lapiconline(void); +extern void lapicsipi(int, uintptr); +extern void lapictimerdisable(void); +extern void lapictimerenable(void); +extern void lapictprput(int); + +/* + * ioapic.c + */ +extern IOapic* ioapicinit(int, int, uintmem); +extern void ioapicintrinit(int, int, int, int, int, u32int); +extern void ioapiconline(void); +extern int ioapicintrenable(Vctl*); +extern int ioapicintrdisable(int); diff -Nru /sys/src/9k/k10/arch.c /sys/src/9k/k10/arch.c --- /sys/src/9k/k10/arch.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/arch.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,78 @@ +/* + * EPISODE 12B + * How to recognise different types of trees from quite a long way away. + * NO. 1 + * THE LARCH + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +void +procrestore(Proc *p) +{ + uvlong t; + + if(p->kp) + return; + cycles(&t); + p->pcycles -= t; + + fpuprocrestore(p); +} + +/* + * Save the mach dependent part of the process state. + */ +void +procsave(Proc *p) +{ + uvlong t; + + cycles(&t); + p->pcycles += t; + + fpuprocsave(p); + + /* + */ + mmuflushtlb(m->pml4->pa); +} + +static void +linkproc(void) +{ + spllo(); + up->kpfun(up->kparg); + pexit("kproc dying", 0); +} + +void +kprocchild(Proc* p, void (*func)(void*), void* arg) +{ + /* + * gotolabel() needs a word on the stack in + * which to place the return PC used to jump + * to linkproc(). + */ + p->sched.pc = PTR2UINT(linkproc); + p->sched.sp = PTR2UINT(p->kstack+KSTACK-BY2SE); + p->sched.sp = STACKALIGN(p->sched.sp); + + p->kpfun = func; + p->kparg = arg; +} + +/* + * put the processor in the halt state if we've no processes to run. + * an interrupt will get us going again. + */ +void +idlehands(void) +{ + if(sys->nonline == 1) + halt(); +} diff -Nru /sys/src/9k/k10/archk10.c /sys/src/9k/k10/archk10.c --- /sys/src/9k/k10/archk10.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/archk10.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,373 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +static int +cpuidinit(void) +{ + u32int eax, info[4]; + + /* + * Standard CPUID functions. + * Functions 0 and 1 will be needed multiple times + * so cache the info now. + */ + if((m->ncpuinfos = cpuid(0, 0, m->cpuinfo[0])) == 0) + return 0; + m->ncpuinfos++; + + if(memcmp(&m->cpuinfo[0][1], "GenuntelineI", 12) == 0) + m->isintelcpu = 1; + cpuid(1, 0, m->cpuinfo[1]); + + /* + * Extended CPUID functions. + */ + if((eax = cpuid(0x80000000, 0, info)) >= 0x80000000) + m->ncpuinfoe = (eax & ~0x80000000) + 1; + + return 1; +} + +static int +cpuidinfo(u32int eax, u32int ecx, u32int info[4]) +{ + if(m->ncpuinfos == 0 && cpuidinit() == 0) + return 0; + + if(!(eax & 0x80000000)){ + if(eax >= m->ncpuinfos) + return 0; + } + else if(eax >= (0x80000000|m->ncpuinfoe)) + return 0; + + cpuid(eax, ecx, info); + + return 1; +} + +static vlong +cpuidhz(u32int info[2][4]) +{ + int f, r; + vlong hz; + u64int msr; + + if(memcmp(&info[0][1], "GenuntelineI", 12) == 0){ + switch(info[1][0] & 0x0fff3ff0){ + default: + return 0; + case 0x00000f30: /* Xeon (MP), Pentium [4D] */ + case 0x00000f40: /* Xeon (MP), Pentium [4D] */ + case 0x00000f60: /* Xeon 7100, 5000 or above */ + msr = rdmsr(0x2c); + r = (msr>>16) & 0x07; + switch(r){ + default: + return 0; + case 0: + hz = 266666666666ll; + break; + case 1: + hz = 133333333333ll; + break; + case 2: + hz = 200000000000ll; + break; + case 3: + hz = 166666666666ll; + break; + case 4: + hz = 333333333333ll; + break; + } + + /* + * Hz is *1000 at this point. + * Do the scaling then round it. + * The manual is conflicting about + * the size of the msr field. + */ + hz = (((hz*(msr>>24))/100)+5)/10; + break; + case 0x00000690: /* Pentium M, Celeron M */ + case 0x000006d0: /* Pentium M, Celeron M */ + hz = ((rdmsr(0x2a)>>22) & 0x1f)*100 * 1000000ll; + break; + case 0x000006e0: /* Core Duo */ + case 0x000006f0: /* Core 2 Duo/Quad/Extreme */ + case 0x00010670: /* Core 2 Extreme */ + /* + * Get the FSB frequemcy. + * If processor has Enhanced Intel Speedstep Technology + * then non-integer bus frequency ratios are possible. + */ + if(info[1][2] & 0x00000080){ + msr = rdmsr(0x198); + r = (msr>>40) & 0x1f; + } + else{ + msr = 0; + r = rdmsr(0x2a) & 0x1f; + } + f = rdmsr(0xcd) & 0x07; + switch(f){ + default: + return 0; + case 5: + hz = 100000000000ll; + break; + case 1: + hz = 133333333333ll; + break; + case 3: + hz = 166666666666ll; + break; + case 2: + hz = 200000000000ll; + break; + case 0: + hz = 266666666666ll; + break; + case 4: + hz = 333333333333ll; + break; + case 6: + hz = 400000000000ll; + break; + } + + /* + * Hz is *1000 at this point. + * Do the scaling then round it. + */ + if(msr & 0x0000400000000000ll) + hz = hz*r + hz/2; + else + hz = hz*r; + hz = ((hz/100)+5)/10; + break; + } + DBG("cpuidhz: 0x2a: %#llux hz %lld\n", rdmsr(0x2a), hz); + } + else if(memcmp(&info[0][1], "AuthcAMDenti", 12) == 0){ + switch(info[1][0] & 0x0fff0ff0){ + default: + return 0; + case 0x00000f50: /* K8 */ + msr = rdmsr(0xc0010042); + if(msr == 0) + return 0; + hz = (800 + 200*((msr>>1) & 0x1f)) * 1000000ll; + break; + case 0x00100f90: /* K10 */ + case 0x00000620: /* QEMU64 */ + msr = rdmsr(0xc0010064); + r = (msr>>6) & 0x07; + hz = (((msr & 0x3f)+0x10)*100000000ll)/(1<ncpuinfos == 0 && cpuidinit() == 0) + return; + + for(i = 0; i < m->ncpuinfos; i++){ + cpuid(i, 0, info); + DBG("eax = %#8.8ux: %8.8ux %8.8ux %8.8ux %8.8ux\n", + i, info[0], info[1], info[2], info[3]); + } + for(i = 0; i < m->ncpuinfoe; i++){ + cpuid(0x80000000|i, 0, info); + DBG("eax = %#8.8ux: %8.8ux %8.8ux %8.8ux %8.8ux\n", + 0x80000000|i, info[0], info[1], info[2], info[3]); + } +} + +vlong +archhz(void) +{ + vlong hz; + u32int info[2][4]; + + if(DBGFLG && m->machno == 0) + cpuiddump(); + if(!cpuidinfo(0, 0, info[0]) || !cpuidinfo(1, 0, info[1])) + return 0; + + hz = cpuidhz(info); + if(hz != 0) + return hz; + else if(m->machno != 0) + return sys->machptr[0]->cpuhz; + + return i8254hz(info); +} + +void +archenable(void) +{ + if((m->cpuinfo[1][3] & (1<<3)) == 0){ + print("cpu%d: no monitor/mwait\n", m->machno); + }else + wrmsr(0x1a0, rdmsr(0x1a0) | (1<<18)); /* enable monitor/mwait */ +} + +int +archmmu(void) +{ + u32int info[4]; + + /* + * Should the check for m->machno != 0 be here + * or in the caller (mmuinit)? + * + * To do here: + * check and enable Pse; + * Pge; Nxe. + */ + + /* + * How many page sizes are there? + * Always have 4*KiB, but need to check + * configured correctly. + */ + assert(PGSZ == 4*KiB); + + m->pgszlg2[0] = 12; + m->pgszmask[0] = (1<<12)-1; + m->npgsz = 1; + if(m->ncpuinfos == 0 && cpuidinit() == 0) + return 1; + + /* + * Check the Pse bit in function 1 DX for 2*MiB support; + * if false, only 4*KiB is available. + */ + if(!(m->cpuinfo[1][3] & 0x00000008)) + return 1; + m->pgszlg2[1] = 21; + m->pgszmask[1] = (1<<21)-1; + m->npgsz = 2; + + /* + * Check the Page1GB bit in function 0x80000001 DX for 1*GiB support. + */ + if(cpuidinfo(0x80000001, 0, info) && (info[3] & 0x04000000)){ + m->pgszlg2[2] = 30; + m->pgszmask[2] = (1<<30)-1; + m->npgsz = 3; + } + + return m->npgsz; +} + +static int +fmtP(Fmt* f) +{ + uintmem pa; + + pa = va_arg(f->args, uintmem); + + if(f->flags & FmtSharp) + return fmtprint(f, "%#16.16llux", pa); + + return fmtprint(f, "%llud", pa); +} + +static int +fmtL(Fmt* f) +{ + Mpl pl; + + pl = va_arg(f->args, Mpl); + + return fmtprint(f, "%#16.16llux", pl); +} + +static int +fmtR(Fmt* f) +{ + u64int r; + + r = va_arg(f->args, u64int); + + return fmtprint(f, "%#16.16llux", r); +} + +void +archfmtinstall(void) +{ + /* + * Architecture-specific formatting. Not as neat as they + * could be (e.g. there's no defined type for a 'register': + * L - Mpl, mach priority level + * P - uintmem, physical address + * R - register + * With a little effort these routines could be written + * in a fairly architecturally-independent manner, relying + * on the compiler to optimise-away impossible conditions, + * and/or by exploiting the innards of the fmt library. + */ + fmtinstall('P', fmtP); + + fmtinstall('L', fmtL); + fmtinstall('R', fmtR); +} + +void +archidle(void) +{ + halt(); +} + +void +microdelay(int microsecs) +{ + u64int r, t; + + r = rdtsc(); + for(t = r + m->cpumhz*microsecs; r < t; r = rdtsc()) + pause(); +} + +void +millidelay(int millisecs) +{ + u64int r, t; + + r = rdtsc(); + for(t = r + m->cpumhz*1000ull*millisecs; r < t; r = rdtsc()) + pause(); +} + +int +isdmaok(void *a, usize len, int range) +{ + uintmem pa; + + if(!iskaddr(a) || (char*)a < etext) + return 0; + pa = mmuphysaddr(PTR2UINT(a)); + if(pa == 0 || pa == ~(uintmem)0) + return 0; + return range > 32 || pa+len <= 0xFFFFFFFFULL; +} diff -Nru /sys/src/9k/k10/cga.c /sys/src/9k/k10/cga.c --- /sys/src/9k/k10/cga.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/cga.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,138 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +enum { + Black = 0x00, + Blue = 0x01, + Green = 0x02, + Cyan = 0x03, + Red = 0x04, + Magenta = 0x05, + Brown = 0x06, + Grey = 0x07, + + Bright = 0x08, + Blinking = 0x80, + + Attr = (Black<<4)|Grey, /* (background<<4)|foreground */ +}; + +enum { + Index = 0x3d4, + Data = Index+1, + + Width = 80*2, + Height = 25, + + Poststrlen = 0, + Postcodelen = 2, + Postlen = Poststrlen+Postcodelen, +}; + +#define CGA (BIOSSEG(0xb800)) + +static Lock cgalock; +static int cgapos; +static int cgainitdone; + +static int +cgaregr(int index) +{ + outb(Index, index); + return inb(Data) & 0xff; +} + +static void +cgaregw(int index, int data) +{ + outb(Index, index); + outb(Data, data); +} + +static void +cgacursor(void) +{ + uchar *cga; + + cgaregw(0x0e, (cgapos/2>>8) & 0xff); + cgaregw(0x0f, cgapos/2 & 0xff); + + cga = CGA; + cga[cgapos+1] = Attr; +} + +static void +cgaputc(int c) +{ + int i; + uchar *cga, *p; + + cga = CGA; + + if(c == '\n'){ + cgapos = cgapos/Width; + cgapos = (cgapos+1)*Width; + } + else if(c == '\t'){ + i = 8 - ((cgapos/2)&7); + while(i-- > 0) + cgaputc(' '); + } + else if(c == '\b'){ + if(cgapos >= 2) + cgapos -= 2; + cgaputc(' '); + cgapos -= 2; + } + else{ + cga[cgapos++] = c; + cga[cgapos++] = Attr; + } + if(cgapos >= (Width*Height)-Postlen*2){ + memmove(cga, &cga[Width], Width*(Height-1)); + p = &cga[Width*(Height-1)-Postlen*2]; + for(i = 0; i < Width/2; i++){ + *p++ = ' '; + *p++ = Attr; + } + cgapos -= Width; + } + cgacursor(); +} + +void +cgaconsputs(char* s, int n) +{ + ilock(&cgalock); + while(n-- > 0) + cgaputc(*s++); + iunlock(&cgalock); +} + +void +cgapost(int code) +{ + uchar *cga; + + static char hex[] = "0123456789ABCDEF"; + + cga = CGA; + cga[Width*Height-Postcodelen*2] = hex[(code>>4) & 0x0f]; + cga[Width*Height-Postcodelen*2+1] = Attr; + cga[Width*Height-Postcodelen*2+2] = hex[code & 0x0f]; + cga[Width*Height-Postcodelen*2+3] = Attr; +} + +void +cgainit(void) +{ + ilock(&cgalock); + cgapos = cgaregr(0x0e)<<8; + cgapos |= cgaregr(0x0f); + cgapos *= 2; + cgainitdone = 1; + iunlock(&cgalock); +} diff -Nru /sys/src/9k/k10/dat.h /sys/src/9k/k10/dat.h --- /sys/src/9k/k10/dat.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/dat.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,347 @@ +typedef struct Fxsave Fxsave; +typedef struct IOConf IOConf; +typedef struct ISAConf ISAConf; +typedef struct Label Label; +typedef struct Lock Lock; +typedef struct LockEntry LockEntry; +typedef struct MCPU MCPU; +typedef struct MFPU MFPU; +typedef struct MMMU MMMU; +typedef struct Mach Mach; +typedef u64int Mpl; +typedef Mpl Mreg; /* GAK */ +typedef struct Page Page; +typedef struct Ptpage Ptpage; +typedef struct Pcidev Pcidev; +typedef struct PFPU PFPU; +typedef struct PMMU PMMU; +typedef struct PNOTIFY PNOTIFY; +typedef struct PPAGE PPAGE; +typedef u64int PTE; +typedef struct Proc Proc; +typedef struct Sys Sys; +typedef u64int uintmem; /* horrible name */ +typedef struct Ureg Ureg; +typedef struct Vctl Vctl; + +#pragma incomplete Ureg + +#define MAXSYSARG 5 /* for mount(fd, afd, mpt, flag, arg) */ + +/* + * parameters for sysproc.c + */ +#define AOUT_MAGIC (S_MAGIC) + +/* + * machine dependent definitions used by ../port/portdat.h + */ +struct Lock +{ + LockEntry* head; + LockEntry* e; +}; + +struct Label +{ + uintptr sp; + uintptr pc; +}; + +struct Fxsave { + u16int fcw; /* x87 control word */ + u16int fsw; /* x87 status word */ + u8int ftw; /* x87 tag word */ + u8int zero; /* 0 */ + u16int fop; /* last x87 opcode */ + u64int rip; /* last x87 instruction pointer */ + u64int rdp; /* last x87 data pointer */ + u32int mxcsr; /* MMX control and status */ + u32int mxcsrmask; /* supported MMX feature bits */ + uchar st[128]; /* shared 64-bit media and x87 regs */ + uchar xmm[256]; /* 128-bit media regs */ + uchar ign[96]; /* reserved, ignored */ +}; + +/* + * FPU stuff in Proc + */ +struct PFPU { + int fpustate; + uchar fxsave[sizeof(Fxsave)+15]; + void* fpusave; +}; + +struct Ptpage +{ + PTE* pte; /* kernel-addressible page table entries */ + uintmem pa; /* physical address (from physalloc) */ + Ptpage* next; /* next in level's set, or free list */ + Ptpage* parent; /* parent page table page or page directory */ + uint ptoff; /* index of this table's entry in parent */ +}; + +/* + * MMU stuff in Proc + */ +struct PMMU +{ + Ptpage* mmuptp[4]; /* page table pages for each level */ + Ptpage* ptpfree; + int nptpbusy; +}; + +/* + * MMU stuff in Page + */ +struct PPAGE +{ + uchar nothing[]; +}; + +/* + * things saved in the Proc structure during a notify + */ +struct PNOTIFY +{ + void emptiness; +}; + +struct IOConf +{ + int nomsi; + int nolegacyprobe; /* acpi tells us. all negated in case acpi unavailable */ + int noi8042kbd; + int novga; + int nocmos; +}; +extern IOConf ioconf; + +#define MAXMDOM 8 /* maximum memory/cpu domains */ + +#include "../port/portdat.h" + +/* + * CPU stuff in Mach. + */ +struct MCPU { + u32int cpuinfo[2][4]; /* CPUID instruction output E[ABCD]X */ + int ncpuinfos; /* number of standard entries */ + int ncpuinfoe; /* number of extended entries */ + int isintelcpu; /* */ +}; + +/* + * FPU stuff in Mach. + */ +struct MFPU { + u16int fcw; /* x87 control word */ + u32int mxcsr; /* MMX control and status */ + u32int mxcsrmask; /* supported MMX feature bits */ +}; + +/* + * MMU stuff in Mach. + */ +enum +{ + NPGSZ = 4 +}; + +struct MMMU +{ + Ptpage* pml4; /* pml4 for this processor */ + PTE* pmap; /* unused as of yet */ + Ptpage* ptpfree; /* per-mach free list */ + int nptpfree; + + uint pgszlg2[NPGSZ]; /* per Mach or per Sys? */ + uintmem pgszmask[NPGSZ]; + uint pgsz[NPGSZ]; + int npgsz; + + Ptpage pml4kludge; +}; + +/* + * Per processor information. + * + * The offsets of the first few elements may be known + * to low-level assembly code, so do not re-order: + * machno - no dependency, convention + * splpc - splhi, spllo, splx + * proc - syscallentry + */ +struct Mach +{ + int machno; /* physical id of processor */ + uintptr splpc; /* pc of last caller to splhi */ + Proc* proc; /* current process on this processor */ + + int apicno; + int online; + int mode; /* fold into online? GAK */ + + void* dbgreg; /* registers for debugging this processor */ + void* dbgsp; /* sp for debugging this processor */ + + MMMU; + + uintptr stack; + uchar* vsvm; + void* gdt; + void* tss; + + ulong ticks; /* of the clock since boot time */ + Label sched; /* scheduler wakeup */ + Lock alarmlock; /* access to alarm list */ + void* alarm; /* alarms bound to this clock */ + int inclockintr; + + Proc* readied; /* for runproc */ + ulong schedticks; /* next forced context switch */ + + int color; + + int tlbfault; + int tlbpurge; + int pfault; + int cs; + int syscall; + int load; + int intr; + int mmuflush; /* make current proc flush it's mmu state */ + int ilockdepth; + uintptr ilockpc; + Perf perf; /* performance counters */ + void (*perfintr)(Ureg*, void*); + + int lastintr; + + Lock apictimerlock; + uvlong cyclefreq; /* Frequency of user readable cycle counter */ + vlong cpuhz; + int cpumhz; + u64int rdtsc; + + LockEntry locks[8]; + + MFPU; + MCPU; +}; + +/* + * This is the low memory map, between 0x100000 and 0x110000. + * It is located there to allow fundamental datastructures to be + * created and used before knowing where free memory begins + * (e.g. there may be modules located after the kernel BSS end). + * The layout is known in the bootstrap code in l32p.s. + * It is logically two parts: the per processor data structures + * for the bootstrap processor (stack, Mach, vsvm, and page tables), + * and the global information about the system (syspage, ptrpage). + * Some of the elements must be aligned on page boundaries, hence + * the unions. + */ +struct Sys { + uchar machstk[MACHSTKSZ]; + + PTE pml4[PTSZ/sizeof(PTE)]; /* */ + PTE pdp[PTSZ/sizeof(PTE)]; + PTE pd[PTSZ/sizeof(PTE)]; + PTE pt[PTSZ/sizeof(PTE)]; + + uchar vsvmpage[4*KiB]; + + union { + Mach mach; + uchar machpage[MACHSZ]; + }; + + union { + struct { + u64int pmstart; /* physical memory */ + u64int pmoccupied; /* how much is occupied */ + u64int pmunassigned; /* how much to keep back from page pool */ + u64int pmpaged; /* how much assigned to page pool */ + + uintptr vmstart; /* base address for malloc */ + uintptr vmunused; /* 1st unused va */ + uintptr vmunmapped; /* 1st unmapped va in KSEG0 */ + + u64int epoch; /* crude time synchronisation */ + int nmach; /* how many machs */ + int nonline; /* how many machs are online */ + uint ticks; /* since boot (type?) */ + uint copymode; /* 0=copy on write; 1=copy on reference */ + }; + uchar syspage[4*KiB]; + }; + + union { + Mach* machptr[MACHMAX]; + uchar ptrpage[4*KiB]; + }; + + uchar _57344_[2][4*KiB]; /* unused */ +}; + +extern Sys* sys; + +/* + * KMap + */ +typedef void KMap; +extern KMap* kmap(Page*); + +#define kunmap(k) +#define VA(k) (void*)(k) + +struct +{ + Lock; + int exiting; /* shutdown */ + int ispanic; /* shutdown in response to a panic */ + int thunderbirdsarego; /* F.A.B. */ +}active; + +/* + * a parsed plan9.ini line + */ +#define NISAOPT 8 + +struct ISAConf { + char *type; + uintptr port; + int irq; + ulong dma; + uintptr mem; + usize size; + ulong freq; + int tbdf; /* type+busno+devno+funcno */ + + int nopt; + char *opt[NISAOPT]; +}; + +/* + * The Mach structures must be available via the per-processor + * MMU information array machptr, mainly for disambiguation and access to + * the clock which is only maintained by the bootstrap processor (0). + */ +extern register Mach* m; /* R15 */ +extern register Proc* up; /* R14 */ + +/* + * Horrid. + */ +#ifdef _DBGC_ +#define DBGFLG (dbgflg[_DBGC_]) +#else +#define DBGFLG (0) +#endif /* _DBGC_ */ + +#define DBG(...) if(!DBGFLG){}else dbgprint(__VA_ARGS__) + +extern char dbgflg[256]; + +#define dbgprint print /* for now */ diff -Nru /sys/src/9k/k10/devacpi.c /sys/src/9k/k10/devacpi.c --- /sys/src/9k/k10/devacpi.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/devacpi.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,558 @@ +/* + * ACPI 5.0 support. overly ornate. + * - split table parsing out from file server + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "acpi.h" +#include + +enum { + /* ACPI PM1 control */ + Pscien = 1<<0, /* Generate SCI and not SMI */ + Pbmrld = 1<<1, /* busmaster → C0 */ + Pgblrls = 1<<2, /* global release */ + + /* pm1 events */ + Etimer = 1<<0, + Ebme = 1<<4, + Eglobal = 1<<5, + Epowerbtn = 1<<8, /* power button pressed */ + Esleepbtn = 1<<9, + Ertc = 1<<10, + Epciewake = 1<<14, + Ewake = 1<<15, +}; + +typedef struct Aconf Aconf; +typedef struct Gpe Gpe; + +struct Aconf { + Lock; + int init; + void (*powerbutton)(void); + + u32int eventopen; + Queue *event; +}; + +struct Gpe { + uintptr stsio; /* port used for status */ + int stsbit; /* bit number */ + uintptr enio; /* port used for enable */ + int enbit; /* bit number */ + int nb; /* event number */ + char* obj; /* handler object */ + int id; /* id as supplied by user */ +}; + +enum { + CMgpe, /* gpe name id */ + CMpowerbut, + CMpower, + + Qdir = 0, + Qctl, + Qevent, +}; + +static Cmdtab ctls[] = { + {CMgpe, "gpe", 3}, + {CMpowerbut, "powerbutton", 2}, + {CMpower, "power", 2}, +}; + +static Dirtab acpidir[]={ + ".", {Qdir, 0, QTDIR}, 0, DMDIR|0555, + "acpictl", {Qctl}, 0, 0666, + "acpievent", {Qevent, 0, QTEXCL}, 0, DMEXCL|0440, +}; + +static Gpe* gpes; /* General purpose events */ +static int ngpe; +static Aconf aconf; + +static int +acpigen(Chan *c, char*, Dirtab *tab, int ntab, int i, Dir *dp) +{ + Qid qid; + + if(i == DEVDOTDOT){ + mkqid(&qid, Qdir, 0, QTDIR); + devdir(c, qid, ".", 0, eve, 0555, dp); + return 1; + } + i++; /* skip first element for . itself */ + if(tab==0 || i>=ntab) + return -1; + tab += i; + qid = tab->qid; + qid.path &= ~Qdir; + qid.vers = 0; + devdir(c, qid, tab->name, tab->length, eve, tab->perm, dp); + return 1; +} + +/* ra/rb are int not uintmem because inb/outb are in the i/o address space. */ +static uint +getbanked(int ra, int rb, int sz) +{ + uint r; + + r = 0; + switch(sz){ + case 1: + if(ra != 0) + r |= inb(ra); + if(rb != 0) + r |= inb(rb); + break; + case 2: + if(ra != 0) + r |= ins(ra); + if(rb != 0) + r |= ins(rb); + break; + case 4: + if(ra != 0) + r |= inl(ra); + if(rb != 0) + r |= inl(rb); + break; + default: + print("getbanked: wrong size\n"); + } + return r; +} + +static uint +setbanked(int ra, int rb, int sz, int v) +{ + uint r; + + r = -1; + switch(sz){ + case 1: + if(ra != 0) + outb(ra, v); + if(rb != 0) + outb(rb, v); + break; + case 2: + if(ra != 0) + outs(ra, v); + if(rb != 0) + outs(rb, v); + break; + case 4: + if(ra != 0) + outl(ra, v); + if(rb != 0) + outl(rb, v); + break; + default: + print("setbanked: wrong size\n"); + } + return r; +} + +/* + * we must read the register group *as a whole* + */ +static uint +getpm1ctl(void) +{ + return getbanked(fadt.pm1acntblk, fadt.pm1bcntblk, fadt.pm1cntlen); +} + +static uint +getpm1sts(void) +{ + return getbanked(fadt.pm1aevtblk, fadt.pm1bevtblk, fadt.pm1evtlen) & 0xffff; +} + +static uint +getpm1en(void) +{ + return getbanked(fadt.pm1aevtblk, fadt.pm1bevtblk, fadt.pm1evtlen)>>16; +} + +static void +setpm1en(uint v) +{ + u32int r; + + r = getbanked(fadt.pm1aevtblk, fadt.pm1bevtblk, fadt.pm1evtlen); + r &= 0xffff; + setbanked(fadt.pm1aevtblk, fadt.pm1bevtblk, fadt.pm1evtlen, r | v<<16); +} + +static void +setpm1sts(uint v) +{ + u32int r; + + DBG("acpi: setpm1sts %#ux\n", v); + + r = getbanked(fadt.pm1aevtblk, fadt.pm1bevtblk, fadt.pm1evtlen); + r &= 0xffff0000; + setbanked(fadt.pm1aevtblk, fadt.pm1bevtblk, fadt.pm1evtlen, r | v); +} + +static int +getgpeen(int n) +{ + return inb(gpes[n].enio) & 1<>3); + gpes[i].enbit = (n0 + i)&7; + gpes[i].enio = fadt.gpe0blk + ((n0 + i)>>3); + } + for(i = 0; i + n0 < ngpe; i++){ + gpes[i + n0].nb = fadt.gp1base + i; + gpes[i + n0].stsbit = i&7; + gpes[i + n0].stsio = fadt.gpe1blk + (i>>3); + gpes[i + n0].enbit = (n1 + i)&7; + gpes[i + n0].enio = fadt.gpe1blk + ((n1 + i)>>3); + } + for(i = 0; i < ngpe; i++){ + setgpeen(i, 0); + clrgpests(i); + } +} + +static void +acpiioalloc(uint addr, int len, char *name) +{ + char buf[32]; + + if(addr != 0){ + snprint(buf, sizeof buf, "acpi %s", name); + ioalloc(addr, len, 0, buf); + } +} + +static void +init(void) +{ + int i; + + aconf.powerbutton = acpipoweroff; + + /* should we use fadt->xpm* and fadt->xgpe* registers for 64 bits? */ + acpiioalloc(fadt.smicmd, 1, "scicmd"); + acpiioalloc(fadt.pm1aevtblk, fadt.pm1evtlen, "pm1aevt"); + acpiioalloc(fadt.pm1bevtblk, fadt.pm1evtlen, "pm1bevt"); + acpiioalloc(fadt.pm1acntblk, fadt.pm1cntlen, "pm1acnt"); + acpiioalloc(fadt.pm1bcntblk, fadt.pm1cntlen, "pm1bcnt"); + acpiioalloc(fadt.pm2cntblk, fadt.pm2cntlen, "pm2cnt"); + acpiioalloc(fadt.pmtmrblk, fadt.pmtmrlen, "pmtmr"); + acpiioalloc(fadt.gpe0blk, fadt.gpe0blklen, "gpe0"); + acpiioalloc(fadt.gpe1blk, fadt.gpe1blklen, "gpe1"); + + initgpes(); + + /* + * This starts ACPI, which requires we handle + * power mgmt events ourselves. + */ + if(fadt.sciint == 0) + return; + if((getpm1ctl() & Pscien) == 0){ + outb(fadt.smicmd, fadt.acpienable); + for(i = 0;; i++){ + if(i == 10){ + print("acpi: failed to enable\n"); + outb(fadt.smicmd, fadt.acpidisable); + return; + } + if(getpm1ctl() & Pscien) + break; + } + } + + if(0){ + print("acpi: enable interrupt\n"); + setpm1sts(getpm1sts()); + setpm1en(Epowerbtn); + intrenable(fadt.sciint, acpiintr, 0, BUSUNKNOWN, "acpi"); + } +} + +static Chan* +acpiattach(char *spec) +{ + if(fadt.smicmd == 0) + error("no acpi"); + return devattach(L'α', spec); +} + +static Walkqid* +acpiwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, acpidir, nelem(acpidir), acpigen); +} + +static long +acpistat(Chan *c, uchar *dp, long n) +{ + return devstat(c, dp, n, acpidir, nelem(acpidir), acpigen); +} + +static Chan* +acpiopen(Chan *c, int omode) +{ + c = devopen(c, omode, acpidir, nelem(acpidir), acpigen); + switch((uint)c->qid.path){ + case Qevent: + if(tas32(&aconf.eventopen) != 0){ + c->flag &= ~COPEN; + error(Einuse); + } + if(aconf.event == nil){ + aconf.event = qopen(8*1024, Qmsg, 0, 0); + if(aconf.event == nil){ + c->flag &= ~COPEN; + error(Enomem); + } + qnoblock(aconf.event, 1); + }else + qreopen(aconf.event); + break; + } + return c; +} + +static void +acpiclose(Chan *c) +{ + switch((uint)c->qid.path){ + case Qevent: + if(c->flag & COPEN){ + aconf.eventopen = 0; + qhangup(aconf.event, nil); + } + break; + } +} + +static long +acpiread(Chan *c, void *a, long n, vlong off) +{ + char *s, *p, *e, buf[256]; + int i; + long q; + + q = c->qid.path; + switch(q){ + case Qdir: + return devdirread(c, a, n, acpidir, nelem(acpidir), acpigen); + case Qctl: + p = buf; + e = buf + sizeof buf; + for(i = 0; i < nelem(pwrbuttab); i++) + if(pwrbuttab[i].f == aconf.powerbutton) + break; + if(i == nelem(pwrbuttab)) + s = "??"; + else + s = pwrbuttab[i].name; + p = seprint(p, e, "powerbutton %s\n", s); + p = seprint(p, e, "ngpe %d\n", ngpe); + USED(p); + return readstr(off, a, n, buf); + + case Qevent: + return qread(aconf.event, a, n); + } + error(Eperm); + return -1; +} + +static long +acpiwrite(Chan *c, void *a, long n, vlong) +{ + uint i; + Cmdtab *ct; + Cmdbuf *cb; + + if(c->qid.path != Qctl) + error(Eperm); + + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + ct = lookupcmd(cb, ctls, nelem(ctls)); + switch(ct->index){ + case CMgpe: + i = strtoul(cb->f[1], nil, 0); + if(i >= ngpe) + error("gpe out of range"); + kstrdup(&gpes[i].obj, cb->f[2]); + DBG("gpe %d %s\n", i, gpes[i].obj); + setgpeen(i, 1); + break; + case CMpowerbut: + for(i = 0; i < nelem(pwrbuttab); i++) + if(strcmp(cb->f[1], pwrbuttab[i].name) == 0){ + ilock(&aconf); + aconf.powerbutton = pwrbuttab[i].f; + iunlock(&aconf); + break; + } + if(i == nelem(pwrbuttab)) + error("unknown power button action"); + break; + case CMpower: + if(strcmp(cb->f[1], "off") == 0) + aconf.powerbutton(); + else + error("unknown power button command"); + break; + } + poperror(); + free(cb); + return n; +} + +Dev acpidevtab = { + L'α', + "acpi", + + devreset, + init, + devshutdown, + acpiattach, + acpiwalk, + acpistat, + acpiopen, + devcreate, + acpiclose, + acpiread, + devbread, + acpiwrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru /sys/src/9k/k10/devarch.c /sys/src/9k/k10/devarch.c --- /sys/src/9k/k10/devarch.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/devarch.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,594 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ureg.h" + +typedef struct IOMap IOMap; +struct IOMap +{ + IOMap *next; + int reserved; + char tag[13]; + ulong start; + ulong end; +}; + +static struct +{ + Lock; + IOMap *map; + IOMap *free; + IOMap maps[32]; // some initial free maps + + QLock ql; // lock for reading map +} iomap; + +enum { + Qdir = 0, + Qioalloc = 1, + Qiob, + Qiow, + Qiol, + Qbase, + + Qmax = 32, +}; + +typedef long Rdwrfn(Chan*, void*, long, vlong); + +static Rdwrfn *readfn[Qmax]; +static Rdwrfn *writefn[Qmax]; + +static Dirtab archdir[Qmax] = { + ".", { Qdir, 0, QTDIR }, 0, 0555, + "ioalloc", { Qioalloc, 0 }, 0, 0444, + "iob", { Qiob, 0 }, 0, 0660, + "iow", { Qiow, 0 }, 0, 0660, + "iol", { Qiol, 0 }, 0, 0660, +}; +Lock archwlock; /* the lock is only for changing archdir */ +int narchdir = Qbase; + +/* + * Add a file to the #P listing. Once added, you can't delete it. + * You can't add a file with the same name as one already there, + * and you get a pointer to the Dirtab entry so you can do things + * like change the Qid version. Changing the Qid path is disallowed. + */ +Dirtab* +addarchfile(char *name, int perm, Rdwrfn *rdfn, Rdwrfn *wrfn) +{ + int i; + Dirtab d; + Dirtab *dp; + + memset(&d, 0, sizeof d); + strcpy(d.name, name); + d.perm = perm; + + lock(&archwlock); + if(narchdir >= Qmax){ + unlock(&archwlock); + return nil; + } + + for(i=0; inext){ + map = *l; + if (map->start < 0x400) + continue; + i = map->start - port; + if(i > size) + break; + if(align > 0) + port = ((port+align-1)/align)*align; + else + port = map->end; + } + if(*l == nil){ + unlock(&iomap); + return -1; + } + map = iomap.free; + if(map == nil){ + print("ioalloc: out of maps"); + unlock(&iomap); + return port; + } + iomap.free = map->next; + map->next = *l; + map->start = port; + map->end = port + size; + map->reserved = 1; + strncpy(map->tag, tag, sizeof(map->tag)); + map->tag[sizeof(map->tag)-1] = 0; + *l = map; + + archdir[0].qid.vers++; + + unlock(&iomap); + return map->start; +} + +// +// alloc some io port space and remember who it was +// alloced to. if port < 0, find a free region. +// +int +ioalloc(int port, int size, int align, char *tag) +{ + IOMap *map, **l; + int i; + + lock(&iomap); + if(port < 0){ + // find a free port above 0x400 and below 0x1000 + port = 0x400; + for(l = &iomap.map; *l; l = &(*l)->next){ + map = *l; + if (map->start < 0x400) + continue; + i = map->start - port; + if(i > size) + break; + if(align > 0) + port = ((port+align-1)/align)*align; + else + port = map->end; + } + if(*l == nil){ + unlock(&iomap); + return -1; + } + } else { + // Only 64KB I/O space on the x86. + if((port+size) > 0x10000){ + unlock(&iomap); + return -1; + } + // see if the space clashes with previously allocated ports + for(l = &iomap.map; *l; l = &(*l)->next){ + map = *l; + if(map->end <= port) + continue; + if(map->reserved && map->start == port && map->end == port + size) { + map->reserved = 0; + unlock(&iomap); + return map->start; + } + if(map->start >= port+size) + break; + unlock(&iomap); + return -1; + } + } + map = iomap.free; + if(map == nil){ + print("ioalloc: out of maps"); + unlock(&iomap); + return port; + } + iomap.free = map->next; + map->next = *l; + map->start = port; + map->end = port + size; + strncpy(map->tag, tag, sizeof(map->tag)); + map->tag[sizeof(map->tag)-1] = 0; + *l = map; + + archdir[0].qid.vers++; + + unlock(&iomap); + return map->start; +} + +void +iofree(int port) +{ + IOMap *map, **l; + + lock(&iomap); + for(l = &iomap.map; *l; l = &(*l)->next){ + if((*l)->start == port){ + map = *l; + *l = map->next; + map->next = iomap.free; + iomap.free = map; + break; + } + if((*l)->start > port) + break; + } + archdir[0].qid.vers++; + unlock(&iomap); +} + +int +iounused(int start, int end) +{ + IOMap *map; + + for(map = iomap.map; map; map = map->next){ + if(start >= map->start && start < map->end + || start <= map->start && end > map->start) + return 0; + } + return 1; +} + +static void +checkport(int start, int end) +{ + /* standard vga regs are OK */ + if(start >= 0x2b0 && end <= 0x2df+1) + return; + if(start >= 0x3c0 && end <= 0x3da+1) + return; + + if(iounused(start, end)) + return; + error(Eperm); +} + +static Chan* +archattach(char* spec) +{ + return devattach('P', spec); +} + +Walkqid* +archwalk(Chan* c, Chan *nc, char** name, int nname) +{ + return devwalk(c, nc, name, nname, archdir, narchdir, devgen); +} + +static long +archstat(Chan* c, uchar* dp, long n) +{ + return devstat(c, dp, n, archdir, narchdir, devgen); +} + +static Chan* +archopen(Chan* c, int omode) +{ + return devopen(c, omode, archdir, narchdir, devgen); +} + +static void +archclose(Chan*) +{ +} + +enum +{ + Linelen= 31, +}; + +static long +archread(Chan *c, void *a, long n, vlong offset) +{ + char *buf, *p; + int port; + ushort *sp; + ulong *lp; + IOMap *map; + Rdwrfn *fn; + + switch((ulong)c->qid.path){ + + case Qdir: + return devdirread(c, a, n, archdir, narchdir, devgen); + + case Qiob: + port = offset; + checkport(offset, offset+n); + for(p = a; port < offset+n; port++) + *p++ = inb(port); + return n; + + case Qiow: + if(n & 1) + error(Ebadarg); + checkport(offset, offset+n); + sp = a; + for(port = offset; port < offset+n; port += 2) + *sp++ = ins(port); + return n; + + case Qiol: + if(n & 3) + error(Ebadarg); + checkport(offset, offset+n); + lp = a; + for(port = offset; port < offset+n; port += 4) + *lp++ = inl(port); + return n; + + case Qioalloc: + break; + + default: + if(c->qid.path < narchdir && (fn = readfn[c->qid.path])) + return fn(c, a, n, offset); + error(Eperm); + break; + } + + if((buf = malloc(n)) == nil) + error(Enomem); + p = buf; + n = n/Linelen; + offset = offset/Linelen; + + lock(&iomap); + for(map = iomap.map; n > 0 && map != nil; map = map->next){ + if(offset-- > 0) + continue; + sprint(p, "%#8lux %#8lux %-12.12s\n", map->start, map->end-1, map->tag); + p += Linelen; + n--; + } + unlock(&iomap); + + n = p - buf; + memmove(a, buf, n); + free(buf); + + return n; +} + +static long +archwrite(Chan *c, void *a, long n, vlong offset) +{ + char *p; + int port; + ushort *sp; + ulong *lp; + Rdwrfn *fn; + + switch((ulong)c->qid.path){ + + case Qiob: + p = a; + checkport(offset, offset+n); + for(port = offset; port < offset+n; port++) + outb(port, *p++); + return n; + + case Qiow: + if(n & 1) + error(Ebadarg); + checkport(offset, offset+n); + sp = a; + for(port = offset; port < offset+n; port += 2) + outs(port, *sp++); + return n; + + case Qiol: + if(n & 3) + error(Ebadarg); + checkport(offset, offset+n); + lp = a; + for(port = offset; port < offset+n; port += 4) + outl(port, *lp++); + return n; + + default: + if(c->qid.path < narchdir && (fn = writefn[c->qid.path])) + return fn(c, a, n, offset); + error(Eperm); + break; + } + return 0; +} + +Dev archdevtab = { + 'P', + "arch", + + devreset, + devinit, + devshutdown, + archattach, + archwalk, + archstat, + archopen, + devcreate, + archclose, + archread, + devbread, + archwrite, + devbwrite, + devremove, + devwstat, +}; + +/* + */ +void +nop(void) +{ +} + +void (*coherence)(void) = mfence; + +static long +cputyperead(Chan*, void *a, long n, vlong off) +{ + char str[32]; + + snprint(str, sizeof(str), "%s %ud\n", "AMD64", m->cpumhz); + return readstr(off, a, n, str); +} + +void +archinit(void) +{ + addarchfile("cputype", 0444, cputyperead, nil); +} + +void +archreset(void) +{ + int i; + + /* + * BUG: virtualbox does not reset with the code after this call, + * but this makes the kernel reset correctly everywhere. + */ + i8042reset(); + + /* + * And sometimes there is no keyboard... + * + * The reset register (0xcf9) is usually in one of the bridge + * chips. The actual location and sequence could be extracted from + * ACPI but why bother, this is the end of the line anyway. + print("Takes a licking and keeps on ticking...\n"); + */ + *(ushort*)KADDR(0x472) = 0x1234; /* BIOS warm-boot flag */ + i = inb(0xcf9); /* ICHx reset control */ + i &= 0x06; + outb(0xcf9, i|0x02); /* SYS_RST */ + millidelay(1); + outb(0xcf9, i|0x06); /* RST_CPU transition */ + + for(;;) + pause(); +} + +/* + * return value and speed of timer + */ +uvlong +fastticks(uvlong* hz) +{ + if(hz != nil) + *hz = m->cpuhz; + return rdtsc(); +} + +ulong +µs(void) +{ + return fastticks2us(rdtsc()); +} + +/* + * set next timer interrupt + */ +void +timerset(uvlong x) +{ + extern void lapictimerset(uvlong); + + lapictimerset(x); +} + +void +cycles(uvlong* t) +{ + *t = rdtsc(); +} + +void +delay(int millisecs) +{ + u64int r, t; + + if(millisecs <= 0) + millisecs = 1; + r = rdtsc(); + for(t = r + m->cpumhz*1000ull*millisecs; r < t; r = rdtsc()) + ; +} + +/* + * performance measurement ticks. must be low overhead. + * doesn't have to count over a second. + */ +ulong +perfticks(void) +{ + uvlong x; + +// if(m->havetsc) + cycles(&x); +// else +// x = 0; + return x; +} diff -Nru /sys/src/9k/k10/devkbin.c /sys/src/9k/k10/devkbin.c --- /sys/src/9k/k10/devkbin.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/devkbin.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,119 @@ +/* + * keyboard scan code input from outside the kernel. + * to avoid duplication of keyboard map processing for usb. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +extern void kbdputsc(int, int); + +enum { + Qdir, + Qkbd, +}; + +Dirtab kbintab[] = { + ".", {Qdir, 0, QTDIR}, 0, 0555, + "kbin", {Qkbd, 0, QTEXCL}, 0, DMEXCL|0200, +}; + +static ulong kbinbusy; /* test and set whether /dev/kbin is open */ + +static Chan * +kbinattach(char *spec) +{ + return devattach(L'Ι', spec); +} + +static Walkqid* +kbinwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, kbintab, nelem(kbintab), devgen); +} + +static long +kbinstat(Chan *c, uchar *dp, long n) +{ + return devstat(c, dp, n, kbintab, nelem(kbintab), devgen); +} + +static Chan* +kbinopen(Chan *c, int omode) +{ + c = devopen(c, omode, kbintab, nelem(kbintab), devgen); + if(c->qid.path == Qkbd && + tas32(&kbinbusy) != 0){ + c->flag &= ~COPEN; + error(Einuse); + } + return c; +} + +static void +kbinclose(Chan *c) +{ + if((c->flag & COPEN) == 0) + return; + if(c->aux){ + free(c->aux); + c->aux = nil; + } + if(c->qid.path == Qkbd) + kbinbusy = 0; +} + +static long +kbinread(Chan *c, void *a, long n, vlong ) +{ + if(c->qid.type & QTDIR) + return devdirread(c, a, n, kbintab, nelem(kbintab), devgen); + return 0; +} + +static long +kbinwrite(Chan *c, void *a, long n, vlong) +{ + int i; + uchar *p; + + if(c->qid.type & QTDIR) + error(Eisdir); + switch((int)c->qid.path){ + case Qkbd: + p = a; + for(i = 0; i < n; i++) + kbdputsc(*p++, 1); /* external source */ + break; + default: + error(Egreg); + } + return n; +} + +Dev kbindevtab = { + L'Ι', + "kbin", + + devreset, + devinit, + devshutdown, + kbinattach, + kbinwalk, + kbinstat, + kbinopen, + devcreate, + kbinclose, + kbinread, +// devreadv, + devbread, + kbinwrite, +// devwritev, + devbwrite, + devremove, + devwstat, +}; diff -Nru /sys/src/9k/k10/etherif.h /sys/src/9k/k10/etherif.h --- /sys/src/9k/k10/etherif.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/etherif.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,56 @@ +enum +{ + Eaddrlen = 6, + ETHERMINTU = 60, /* minimum transmit size */ + ETHERMAXTU = 1514, /* maximum transmit size */ + ETHERHDRSIZE = 14, /* size of an ethernet header */ + + MaxEther = 48, + Ntypes = 16, +}; + +typedef struct Ether Ether; +struct Ether { + ISAConf; /* hardware info */ + + int ctlrno; + int minmtu; + int maxmtu; + uchar ea[Eaddrlen]; + + void (*attach)(Ether*); /* filled in by reset routine */ + void (*detach)(Ether*); + void (*transmit)(Ether*); + void (*interrupt)(Ureg*, void*); + long (*ifstat)(Ether*, void*, long, ulong); + long (*ctl)(Ether*, void*, long); /* custom ctl messages */ + void (*power)(Ether*, int); /* power on/off */ + void (*shutdown)(Ether*); /* shutdown hardware before reboot */ + void *ctlr; + void *vector; + + int scan[Ntypes]; /* base station scanning interval */ + int nscan; /* number of base station scanners */ + + Netif; +}; + +typedef struct Etherpkt Etherpkt; +struct Etherpkt +{ + uchar d[Eaddrlen]; + uchar s[Eaddrlen]; + uchar type[2]; + uchar data[1500]; +}; + +extern Block* etheriq(Ether*, Block*, int); +extern void addethercard(char*, int(*)(Ether*)); +extern ulong ethercrc(uchar*, int); +extern int parseether(uchar*, char*); +extern int ethercfgmatch(Ether*, Pcidev*, uintmem); + +#define NEXT(x, l) (((x)+1)%(l)) +#define PREV(x, l) (((x) == 0) ? (l)-1: (x)-1) + +extern Ether* etherc2e(Chan*); diff -Nru /sys/src/9k/k10/fns.h /sys/src/9k/k10/fns.h --- /sys/src/9k/k10/fns.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/fns.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,217 @@ +#include "../port/portfns.h" + +void aamloop(int); +void acpiinit(int); +Dirtab* addarchfile(char*, int, long(*)(Chan*,void*,long,vlong), long(*)(Chan*,void*,long,vlong)); +void archenable(void); +void archfmtinstall(void); +void archidle(void); +void archinit(void); +int archmmu(void); +void archreset(void); +vlong archhz(void); +int asmfree(uvlong, uvlong, int); +uvlong asmalloc(uvlong, uvlong, int, int); +void asminit(void); +void asmmapinit(u64int, u64int, int); +void asmmodinit(u32int, u32int, char*); +void cgaconsputs(char*, int); +void cgainit(void); +void cgapost(int); +void (*coherence)(void); +int corecolor(int); +u32int cpuid(u32int, u32int, u32int[4]); +int dbgprint(char*, ...); +void delay(int); +#define evenaddr(x) /* x86 doesn't care */ +int e820(void); +int fpudevprocio(Proc*, void*, long, uintptr, int); +void fpuinit(void); +void fpunoted(void); +void fpunotify(Ureg*); +void fpuprocrestore(Proc*); +void fpuprocsave(Proc*); +void fpusysprocsetup(Proc*); +void fpusysrfork(Ureg*); +void fpusysrforkchild(Proc*, Proc*); +char* getconf(char*); +void _halt(void); +void halt(void); +void hpetinit(uint, uint, uintmem, int); +int i8042auxcmd(int); +int i8042auxcmds(uchar*, int); +void i8042auxenable(void (*)(int, int)); +void i8042reset(void); +Uart* i8250console(char*); +void* i8250alloc(int, int, int); +vlong i8254hz(u32int[2][4]); +void idlehands(void); +void idthandlers(void); +int inb(int); +void insb(int, void*, int); +ushort ins(int); +void inss(int, void*, int); +ulong inl(int); +void insl(int, void*, int); +int intrdisable(void*); +void* intrenable(int, void (*)(Ureg*, void*), void*, int, char*); +void invlpg(uintptr); +void iofree(int); +void ioinit(void); +int iounused(int, int); +int ioalloc(int, int, int, char*); +int ioreserve(int, int, int, char*); +int iprint(char*, ...); +int isaconfig(char*, int, ISAConf*); +int isdmaok(void*, usize, int); +void kbdenable(void); +void kbdinit(void); +void kexit(Ureg*); +#define kmapinval() +void lfence(void); +void links(void); +int machdom(Mach*); +void machinit(void); +void mach0init(void); +void mapraminit(uvlong, uvlong); +void memdebug(void); +void meminit(void); +int memcolor(uintmem addr, uintmem *sizep); +void memmaprange(uintptr, uintmem, uintmem, PTE (*alloc)(usize), PTE); +void memreserve(uintmem, uintmem); +void mfence(void); +void mmudump(Proc*); +void mmuflushtlb(u64int); +void mmuinit(void); +#define mmucachectl(pg, why) USED(pg, why) /* x86 doesn't need it */ +u64int mmuphysaddr(uintptr); +int mmuwalk(uintptr, int, PTE**, u64int (*)(usize)); +int multiboot(u32int, u32int, int); +u32int mwait32(void*, u32int); +u64int mwait64(void*, u64int); +void ndnr(void); +uchar nvramread(int); +void nvramwrite(int, uchar); +void optionsinit(char*); +void outb(int, int); +void outsb(int, void*, int); +void outs(int, ushort); +void outss(int, void*, int); +void outl(int, ulong); +void outsl(int, void*, int); +void pause(void); +int pciscan(int, Pcidev**); +ulong pcibarsize(Pcidev*, int); +int pcicap(Pcidev*, int); +int pcicfgr8(Pcidev*, int); +int pcicfgr16(Pcidev*, int); +uint pcicfgr32(Pcidev*, int); +void pcicfgw8(Pcidev*, int, int); +void pcicfgw16(Pcidev*, int, int); +void pcicfgw32(Pcidev*, int, int); +void pciclrbme(Pcidev*); +void pciclrioe(Pcidev*); +void pciclrmwi(Pcidev*); +int pcigetpms(Pcidev*); +void pcihinv(Pcidev*); +uchar pciipin(Pcidev*, uchar); +Pcidev* pcimatch(Pcidev*, int, int); +Pcidev* pcimatchtbdf(int); +void pcireset(void); +void pcisetbme(Pcidev*); +void pcisetioe(Pcidev*); +void pcisetmwi(Pcidev*); +int pcisetpms(Pcidev*, int); +uintmem pcixcfgspace(int); +void* pcixcfgaddr(Pcidev*, int); +void printcpufreq(void); +int screenprint(char*, ...); /* debugging */ +void sfence(void); +void spldone(void); +void (*specialmem)(uintmem, uintmem, int); +u64int splhi(void); +u64int spllo(void); +void splx(u64int); +void splxpc(u64int); +void syncclock(void); +void* sysexecregs(uintptr, ulong, ulong); +uintptr sysexecstack(uintptr, int); +void sysprocsetup(Proc*); +void tssrsp0(u64int); +void trapenable(int, void (*)(Ureg*, void*), void*, char*); +void trapinit(void); +int userureg(Ureg*); +void* vmap(uintmem, usize); +void vsvminit(int); +void vunmap(void*, usize); + +extern Mreg cr0get(void); +extern void cr0put(Mreg); +extern Mreg cr2get(void); +extern Mreg cr3get(void); +extern void cr3put(Mreg); +extern Mreg cr4get(void); +extern void cr4put(Mreg); +extern void gdtget(void*); +extern void gdtput(int, u64int, u16int); +extern void idtput(int, u64int); +extern u64int rdmsr(u32int); +extern u64int rdtsc(void); +extern void trput(u64int); +extern void wrmsr(u32int, u64int); +int xaddb(void*); + +extern int islo(void); +extern void spldone(void); +extern Mreg splhi(void); +extern Mreg spllo(void); +extern void splx(Mreg); + +int cas32(void*, u32int, u32int); +int cas64(void*, u64int, u64int); +int tas32(void*); + +#define CASU(p, e, n) cas64((p), (u64int)(e), (u64int)(n)) +#define CASV(p, e, n) cas64((p), (u64int)(e), (u64int)(n)) +#define CASW(p, e, n) cas32((p), (e), (n)) +#define TAS(addr) tas32((addr)) + +void touser(uintptr); +void syscallentry(void); +void syscallreturn(void); +void sysrforkret(void); + +#define waserror() (up->nerrlab++, setlabel(&up->errlab[up->nerrlab-1])) + +#define dcflush(a, b) + +#define PTR2UINT(p) ((uintptr)(p)) +#define UINT2PTR(i) ((void*)(i)) + +void* KADDR(uintmem); +uintptr PADDR(void*); + +#define BIOSSEG(a) KADDR(((uint)(a))<<4) + +/* + * archk10.c + */ +extern void millidelay(int); + +/* + * i8259.c + */ +extern int i8259init(int); +extern int i8259irqdisable(int); +extern int i8259irqenable(int); +extern int i8259isr(int); + +/* + * sipi.c + */ +extern void sipi(void); + +void* basealloc(usize, uint, usize*); +void basefree(void*, usize); +void physallocinit(void); +void uartpush(void); diff -Nru /sys/src/9k/k10/fpu.c /sys/src/9k/k10/fpu.c --- /sys/src/9k/k10/fpu.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/fpu.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,475 @@ +/* + * SIMD Floating Point. + * Assembler support to get at the individual instructions + * is in l64fpu.s. + * There are opportunities to be lazier about saving and + * restoring the state and allocating the storage needed. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "amd64.h" +#include "ureg.h" + +enum { /* FCW, FSW and MXCSR */ + I = 0x00000001, /* Invalid-Operation */ + D = 0x00000002, /* Denormalized-Operand */ + Z = 0x00000004, /* Zero-Divide */ + O = 0x00000008, /* Overflow */ + U = 0x00000010, /* Underflow */ + P = 0x00000020, /* Precision */ +}; + +enum { /* FCW */ + PCs = 0x00000000, /* Precision Control -Single */ + PCd = 0x00000200, /* -Double */ + PCde = 0x00000300, /* -Double Extended */ + RCn = 0x00000000, /* Rounding Control -Nearest */ + RCd = 0x00000400, /* -Down */ + RCu = 0x00000800, /* -Up */ + RCz = 0x00000C00, /* -Toward Zero */ +}; + +enum { /* FSW */ + Sff = 0x00000040, /* Stack Fault Flag */ + Es = 0x00000080, /* Error Summary Status */ + C0 = 0x00000100, /* ZF - Condition Code Bits */ + C1 = 0x00000200, /* O/U# */ + C2 = 0x00000400, /* PF */ + C3 = 0x00004000, /* ZF */ + B = 0x00008000, /* Busy */ +}; + +enum { /* MXCSR */ + Daz = 0x00000040, /* Denormals are Zeros */ + Im = 0x00000080, /* I Mask */ + Dm = 0x00000100, /* D Mask */ + Zm = 0x00000200, /* Z Mask */ + Om = 0x00000400, /* O Mask */ + Um = 0x00000800, /* U Mask */ + Pm = 0x00001000, /* P Mask */ + Rn = 0x00000000, /* Round to Nearest */ + Rd = 0x00002000, /* Round Down */ + Ru = 0x00004000, /* Round Up */ + Rz = 0x00006000, /* Round toward Zero */ + Fz = 0x00008000, /* Flush to Zero for Um */ +}; + +enum { /* PFPU.state */ + Init = 0, /* The FPU has not been used */ + Busy = 1, /* The FPU is being used */ + Idle = 2, /* The FPU has been used */ + + Hold = 4, /* Handling an FPU note */ +}; + +extern void _clts(void); +extern void _fldcw(u16int); +extern void _fnclex(void); +extern void _fninit(void); +extern void _fxrstor(Fxsave*); +extern void _fxsave(Fxsave*); +extern void _fwait(void); +extern void _ldmxcsr(u32int); +extern void _stts(void); + +int +fpudevprocio(Proc* proc, void* a, long n, uintptr offset, int write) +{ + uchar *p; + + /* + * Called from procdevtab.read and procdevtab.write + * allow user process access to the FPU registers. + * This is the only FPU routine which is called directly + * from the port code; it would be nice to have dynamic + * creation of entries in the device file trees... + */ + if(offset >= sizeof(Fxsave)) + return 0; + if((p = proc->fpusave) == nil) + return 0; + switch(write){ + default: + if(offset+n > sizeof(Fxsave)) + n = sizeof(Fxsave) - offset; + memmove(p+offset, a, n); + break; + case 0: + if(offset+n > sizeof(Fxsave)) + n = sizeof(Fxsave) - offset; + memmove(a, p+offset, n); + break; + } + + return n; +} + +void +fpunotify(Ureg*) +{ + /* + * Called when a note is about to be delivered to a + * user process, usually at the end of a system call. + * Note handlers are not allowed to use the FPU so + * the state is marked (after saving if necessary) and + * checked in the Device Not Available handler. + */ + if(up->fpustate == Busy){ + _fxsave(up->fpusave); + _stts(); + up->fpustate = Idle; + } + up->fpustate |= Hold; +} + +void +fpunoted(void) +{ + /* + * Called from sysnoted() via the machine-dependent + * noted() routine. + * Clear the flag set above in fpunotify(). + */ + up->fpustate &= ~Hold; +} + +void +fpusysrfork(Ureg*) +{ + /* + * Called early in the non-interruptible path of + * sysrfork() via the machine-dependent syscall() routine. + * Save the state so that it can be easily copied + * to the child process later. + */ + if(up->fpustate != Busy) + return; + + _fxsave(up->fpusave); + _stts(); + up->fpustate = Idle; +} + +void +fpusysrforkchild(Proc* child, Proc* parent) +{ + /* + * Called later in sysrfork() via the machine-dependent + * sysrforkchild() routine. + * Copy the parent FPU state to the child. + */ + child->fpustate = parent->fpustate; + child->fpusave = (void*)((PTR2UINT(up->fxsave) + 15) & ~15); + if(child->fpustate == Init) + return; + + memmove(child->fpusave, parent->fpusave, sizeof(Fxsave)); +} + +void +fpuprocsave(Proc* p) +{ + /* + * Called from sched() and sleep() via the machine-dependent + * procsave() routine. + * About to go in to the scheduler. + * If the process wasn't using the FPU + * there's nothing to do. + */ + if(p->fpustate != Busy) + return; + + /* + * The process is dead so clear and disable the FPU + * and set the state for whoever gets this proc struct + * next. + */ + if(p->state == Moribund){ + _clts(); + _fnclex(); + _stts(); + p->fpustate = Init; + return; + } + + /* + * Save the FPU state without handling pending + * unmasked exceptions and disable. Postnote() can't + * be called here as sleep() already has up->rlock, + * so the handling of pending exceptions is delayed + * until the process runs again and generates a + * Device Not Available exception fault to activate + * the FPU. + */ + _fxsave(p->fpusave); + _stts(); + p->fpustate = Idle; +} + +void +fpuprocrestore(Proc* p) +{ + /* + * The process has been rescheduled and is about to run. + * Nothing to do here right now. If the process tries to use + * the FPU again it will cause a Device Not Available + * exception and the state will then be restored. + */ + USED(p); +} + +void +fpusysprocsetup(Proc* p) +{ + /* + * Disable the FPU. + * Called from sysexec() via sysprocsetup() to + * set the FPU for the new process. + */ + if(p->fpustate != Init){ + _clts(); + _fnclex(); + _stts(); + p->fpustate = Init; + } +} + +static void +fpupostnote(void) +{ + ushort fsw; + Fxsave *fpusave; + char *m, n[ERRMAX]; + + /* + * The Sff bit is sticky, meaning it should be explicitly + * cleared or there's no way to tell if the exception was an + * invalid operation or a stack fault. + */ + fpusave = up->fpusave; + fsw = (fpusave->fsw & ~fpusave->fcw) & (Sff|P|U|O|Z|D|I); + if(fsw & I){ + if(fsw & Sff){ + if(fsw & C1) + m = "Stack Overflow"; + else + m = "Stack Underflow"; + } + else + m = "Invalid Operation"; + } + else if(fsw & D) + m = "Denormal Operand"; + else if(fsw & Z) + m = "Divide-By-Zero"; + else if(fsw & O) + m = "Numeric Overflow"; + else if(fsw & U) + m = "Numeric Underflow"; + else if(fsw & P) + m = "Precision"; + else + m = "Unknown"; + + snprint(n, sizeof(n), "sys: fp: %s Exception ipo=%#llux fsw=%#ux", + m, fpusave->rip, fsw); + postnote(up, 1, n, NDebug); +} + +static void +fpuxf(Ureg* ureg, void*) +{ + u32int mxcsr; + Fxsave *fpusave; + char *m, n[ERRMAX]; + + /* + * #XF - SIMD Floating Point Exception (Vector 18). + */ + + /* + * Save FPU state to check out the error. + */ + fpusave = up->fpusave; + _fxsave(fpusave); + _stts(); + up->fpustate = Idle; + + if(ureg->ip & KZERO) + panic("#MF: ip=%#p", ureg->ip); + + /* + * Notify the user process. + * The path here is similar to the x87 path described + * in fpupostnote above but without the fpupostnote() + * call. + */ + mxcsr = fpusave->mxcsr; + if((mxcsr & (Im|I)) == I) + m = "Invalid Operation"; + else if((mxcsr & (Dm|D)) == D) + m = "Denormal Operand"; + else if((mxcsr & (Zm|Z)) == Z) + m = "Divide-By-Zero"; + else if((mxcsr & (Om|O)) == O) + m = "Numeric Overflow"; + else if((mxcsr & (Um|U)) == U) + m = "Numeric Underflow"; + else if((mxcsr & (Pm|P)) == P) + m = "Precision"; + else + m = "Unknown"; + + snprint(n, sizeof(n), "sys: fp: %s Exception mxcsr=%#ux", m, mxcsr); + postnote(up, 1, n, NDebug); +} + +static void +fpumf(Ureg* ureg, void*) +{ + Fxsave *fpusave; + + /* + * #MF - x87 Floating Point Exception Pending (Vector 16). + */ + + /* + * Save FPU state to check out the error. + */ + fpusave = up->fpusave; + _fxsave(fpusave); + _stts(); + up->fpustate = Idle; + + if(ureg->ip & KZERO) + panic("#MF: ip=%#p rip=%#p", ureg->ip, fpusave->rip); + + /* + * Notify the user process. + * The path here is + * call trap->fpumf->fpupostnote->postnote + * return ->fpupostnote->fpumf->trap + * call notify->fpunotify + * return ->notify + * then either + * call pexit + * or + * return ->trap + * return ->user note handler + */ + fpupostnote(); +} + +static void +fpunm(Ureg* ureg, void*) +{ + Fxsave *fpusave; + + /* + * #NM - Device Not Available (Vector 7). + */ + if(up == nil) + panic("#NM: fpu in kernel: ip %#p\n", ureg->ip); + + /* + * Someone tried to use the FPU in a note handler. + * That's a no-no. + */ + if(up->fpustate & Hold){ + postnote(up, 1, "sys: floating point in note handler", NDebug); + return; + } + if(ureg->ip & KZERO) + panic("#NM: proc %d %s state %d ip %#p\n", + up->pid, up->text, up->fpustate, ureg->ip); + + switch(up->fpustate){ + case Busy: + default: + panic("#NM: state %d ip %#p\n", up->fpustate, ureg->ip); + break; + case Init: + /* + * A process tries to use the FPU for the + * first time and generates a 'device not available' + * exception. + * Turn the FPU on and initialise it for use. + * Set the precision and mask the exceptions + * we don't care about from the generic Mach value. + */ + _clts(); + _fninit(); + _fwait(); + _fldcw(m->fcw); + _ldmxcsr(m->mxcsr); + up->fpusave = (void*)((PTR2UINT(up->fxsave) + 15) & ~15); + up->fpustate = Busy; + break; + case Idle: + /* + * Before restoring the state, check for any pending + * exceptions, there's no way to restore the state without + * generating an unmasked exception. + */ + fpusave = up->fpusave; + if((fpusave->fsw & ~fpusave->fcw) & (Sff|P|U|O|Z|D|I)){ + fpupostnote(); + break; + } + + /* + * Sff is sticky. + */ + fpusave->fcw &= ~Sff; + _clts(); + _fxrstor(fpusave); + up->fpustate = Busy; + break; + } +} + +void +fpuinit(void) +{ + u64int r; + Fxsave *fxsave; + uchar buf[sizeof(Fxsave)+15]; + + /* + * It's assumed there is an integrated FPU, so Em is cleared; + */ + r = cr0get(); + r &= ~(Ts|Em); + r |= Ne|Mp; + cr0put(r); + + r = cr4get(); + r |= Osxmmexcpt|Osfxsr; + cr4put(r); + + _fninit(); + fxsave = (Fxsave*)((PTR2UINT(buf) + 15) & ~15); + memset(fxsave, 0, sizeof(Fxsave)); + _fxsave(fxsave); + m->fcw = RCn|PCd|P|U|D; + if(fxsave->mxcsrmask == 0) + m->mxcsrmask = 0x0000FFBF; + else + m->mxcsrmask = fxsave->mxcsrmask; + m->mxcsr = (Rn|Pm|Um|Dm) & m->mxcsrmask; + _stts(); + + if(m->machno != 0) + return; + + /* + * Set up the exception handlers. + */ + trapenable(IdtNM, fpunm, 0, "#NM"); + trapenable(IdtMF, fpumf, 0, "#MF"); + trapenable(IdtXF, fpuxf, 0, "#XF"); +} diff -Nru /sys/src/9k/k10/hpet.c /sys/src/9k/k10/hpet.c --- /sys/src/9k/k10/hpet.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/hpet.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,101 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" +#include "../port/error.h" + +typedef struct Hpet Hpet; +typedef struct Tn Tn; + +struct Hpet { /* Event Timer Block */ + u32int cap; /* General Capabilities */ + u32int period; /* Main Counter Tick Period */ + u32int _8_[2]; + u32int cnf; /* General Configuration */ + u32int _20_[3]; + u32int sts; /* General Interrupt Status */ + u32int _36_[51]; + u64int counter; /* Main Counter Value */ + u32int _248[2]; + Tn tn[]; /* Timers */ +}; + +struct Tn { /* Timer */ + u32int cnf; /* Configuration */ + u32int cap; /* Capabilities */ + u64int comparator; /* Comparator */ + u32int val; /* FSB Interrupt Value */ + u32int addr; /* FSB Interrupt Address */ + u32int _24_[2]; +}; + +static Hpet* etb[8]; /* Event Timer Blocks */ +static u64int zerostamp; +static u64int *stamper = &zerostamp; /* hpet counter used for time stamps, or 0 if no hpet */ +static u32int period; /* period of active hpet */ + +uvlong +hpetticks(uvlong*) +{ + return *stamper; +} + +uvlong +hpetticks2ns(uvlong ticks) +{ + return ticks*period / 1000 / 1000; +} + +uvlong +hpetticks2us(uvlong ticks) +{ + return hpetticks2ns(ticks) / 1000; +} + +/* + * called from acpi + */ +void +hpetinit(uint id, uint seqno, uintmem pa, int minticks) +{ + Tn *tn; + int i, n; + Hpet *hpet; + + print("hpet: id %#ux seqno %d pa %#P minticks %d\n", id, seqno, pa, minticks); + if(seqno >= nelem(etb)) + return; + if((hpet = vmap(pa, 1024)) == nil) /* HPET §3.2.4 */ + return; + memreserve(pa, 1024); + etb[seqno] = hpet; + + print("HPET: cap %#8.8ux period %#8.8ux\n", hpet->cap, hpet->period); + print("HPET: cnf %#8.8ux sts %#8.8ux\n",hpet->cnf, hpet->sts); + print("HPET: counter %#.16llux\n", hpet->counter); + + n = ((hpet->cap>>8) & 0x0F) + 1; + for(i = 0; i < n; i++){ + tn = &hpet->tn[i]; + DBG("Tn%d: cnf %#8.8ux cap %#8.8ux\n", i, tn->cnf, tn->cap); + DBG("Tn%d: comparator %#.16llux\n", i, tn->comparator); + DBG("Tn%d: val %#8.8ux addr %#8.8ux\n", i, tn->val, tn->addr); + USED(tn); + } + /* + * hpet->period is the number of femtoseconds per counter tick. + */ + + /* + * activate the first hpet as the source of time stamps + */ + if(seqno == 0){ + period = hpet->period; + stamper = &hpet->counter; + /* the timer block must be enabled to start the main counter for timestamping */ + hpet->cap |= 1<<0; /* ENABLE_CNF */ + } +} diff -Nru /sys/src/9k/k10/ht.c /sys/src/9k/k10/ht.c --- /sys/src/9k/k10/ht.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/ht.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,200 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "io.h" + +static void +htcapabilities(Pcidev* pci, int cp) +{ + u64int idr; + u32int command, r; + + /* + * Top 5 bits of command give type: + * 000xx slave or primary interface + * 001xx host or secondary interface + * 10000 interrupt discovery and configuration + * Other values don't concern this interface. + */ + r = pcicfgr32(pci, cp); + command = (r>>16) & 0xFFFF; + if((command & 0xE000) == 0x0000){ + DBG("HT: slave or primary interface\n"); + } + else if((command & 0xE000) == 0x2000){ + DBG("HT: host or secondary interface\n"); + } + else if((command & 0xF800) == 0x8000){ + /* + * The Interrupt and Discovery block uses + * an index and data scheme to access the + * registers. Index is a byte at +2, data is + * 32 bits at +4. + * The only interesting information is the 64-bit + * Interrupt Definition Register at offset 0x10. + */ + pcicfgw8(pci, cp+0x02, 0x11); + idr = pcicfgr32(pci, cp+0x04); + idr <<= 32; + pcicfgw8(pci, cp+0x02, 0x10); + idr |= (u32int)pcicfgr32(pci, cp+0x04); + DBG("HT: Interrupt and discovery block: idr %#16.16llux\n", idr); + } + else{ + DBG("HT: capability code %#ux\n", command>>11); + } +} + +void +htlink(void) +{ + int cp; + char *p; + Pcidev *pci; + u32int r, *rp; + + pci = nil; + while(pci = pcimatch(pci, 0, 0)){ + /* + * AMD-8111 Hypertransport I/O Hub + */ + if(pci->vid == 0x1022 && pci->did == 0x1100){ + DBG("HT: AMD-8111: tc %#8.8ux ic %#8.8ux\n", + pcicfgr32(pci, 0x68), pcicfgr32(pci, 0x6C)); + } + + /* + * AMD-8111 PCI Bridge + */ + if(pci->vid == 0x1022 && pci->did == 0x7460){ + pcicfgw32(pci, 0xF0, 1); + DBG("HT: AMD-8111: 0xF4: %#8.8ux\n", + pcicfgr32(pci, 0xF4)); + pcicfgw32(pci, 0xF0, 0x10); + DBG("HT: AMD-8111: 0x10: %#8.8ux\n", + pcicfgr32(pci, 0xF4)); + pcicfgw32(pci, 0xF0, 0x11); + DBG("HT: AMD-8111: 0x11: %#8.8ux\n", + pcicfgr32(pci, 0xF4)); + } + + /* + * AMD-8111 LPC Bridge + */ + if(pci->vid == 0x1022 && pci->did == 0x7468){ + r = pcicfgr32(pci, 0xA0); + DBG("HT: HPET @ %#ux\n", r); + if((rp = vmap(r & ~0x0F, 0x200)) != nil){ + DBG("HT: HPET00: %#8.8ux%8.8ux\n", + rp[4/4], rp[0/4]); + DBG("HT: HPET10: %#8.8ux%8.8ux\n", + rp[0x10/4], rp[0x10/4]); + DBG("HT: HPET20: %#8.8ux%8.8ux\n", + rp[0x24/4], rp[0x20/4]); + DBG("HT: HPETF0: %#8.8ux%8.8ux\n", + rp[0xF4/4], rp[0xF0/4]); + DBG("HT: HPET100: %#8.8ux%8.8ux\n", + rp[0x104/4], rp[0x100/4]); + DBG("HT: HPET120: %#8.8ux%8.8ux\n", + rp[0x124/4], rp[0x120/4]); + DBG("HT: HPET140: %#8.8ux%8.8ux\n", + rp[0x144/4], rp[0x140/4]); + vunmap(rp, 0x200); + } + } + + /* + * Check if there are extended capabilities implemented, + * (bit 4 in the status register). + * Find the capabilities pointer based on PCI header type. + * + * Make this more general (e.g. pcigetcap(pcidev, id, cp)) + * and merge back into PCI code. + */ + if(!(pcicfgr16(pci, PciPSR) & 0x0010)) + continue; + + switch(pcicfgr8(pci, PciHDT)){ + default: + continue; + case 0: /* all other */ + case 1: /* PCI to PCI bridge */ + cp = PciCP; + break; + } + + for(cp = pcicfgr8(pci, cp); cp != 0; cp = pcicfgr8(pci, cp+1)){ + /* + * Check for validity. + * Can't be in standard header and must be double + * word aligned. + */ + if(cp < 0x40 || (cp & ~0xFC)) + break; + r = pcicfgr32(pci, cp); + switch(r & 0xFF){ + default: + DBG("HT: %#4.4ux/%#4.4ux: unknown ID %d\n", + pci->vid, pci->did, r & 0xFF); + continue; + case 0x01: + p = "PMI"; + break; + case 0x02: + p = "AGP"; + break; + case 0x03: + p = "VPD"; + break; + case 0x04: + p = "Slot Identification"; + break; + case 0x05: + p = "MSI"; + break; + case 0x06: + p = "CPCI Hot Swap"; + break; + case 0x07: + p = "PCI-X"; + break; + case 0x08: + DBG("HT: %#4.4ux/%#4.4ux: HT\n", + pci->vid, pci->did); + htcapabilities(pci, cp); + continue; + case 0x09: + p = "Vendor Specific"; + break; + case 0x0A: + p = "Debug Port"; + break; + case 0x0B: + p = "CPCI Central Resource Control"; + break; + case 0x0C: + p = "PCI Hot-Plug"; + break; + case 0x0E: + p = "AGP 8x"; + break; + case 0x0F: + p = "Secure Device"; + break; + case 0x10: + p = "PCIe"; + break; + case 0x11: + p = "MSI-X"; + break; + case 0x12: + p = "SATA HBA"; + break; + } + DBG("HT: %#4.4ux/%#4.4ux: %s\n", pci->vid, pci->did, p); + } + } +} diff -Nru /sys/src/9k/k10/i8254.c /sys/src/9k/k10/i8254.c --- /sys/src/9k/k10/i8254.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/i8254.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,174 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +/* + * 8254 Programmable Interval Timer and compatibles. + */ +enum { /* I/O ports */ + Timer1 = 0x40, + Timer2 = 0x48, /* Counter0 is watchdog (EISA) */ + + Counter0 = 0, /* Counter 0 Access Port */ + Counter1 = 1, /* Counter 1 Access Port */ + Counter2 = 2, /* Counter 2 Access Port */ + Control = 3, /* Timer Control Word */ +}; + +enum { /* Control */ + Bcd = 0x01, /* Binary/BCD countdown select */ + + Mode0 = 0x00, /* [3:1] interrupt on terminal count */ + Mode1 = 0x02, /* hardware re-triggerable one-shot */ + Mode2 = 0x04, /* rate generator */ + Mode3 = 0x06, /* square-wave generator */ + Mode4 = 0x08, /* sofware triggered strobe */ + Mode5 = 0x0A, /* hardware triggered strobe */ + + Clc = 0x00, /* [5:4] Counter Latch Command */ + RWlsb = 0x10, /* R/W LSB */ + RWmsb = 0x20, /* R/W MSB */ + RW16 = 0x30, /* R/W LSB then MSB */ + Cs0 = 0x00, /* [7:6] Counter 0 Select */ + Cs1 = 0x40, /* Counter 1 Select */ + Cs2 = 0x80, /* Counter 2 Select */ + + Rbc = 0xC0, /* Read-Back Command */ + RbCnt0 = 0x02, /* Select Counter 0 */ + RbCnt1 = 0x04, /* Select Counter 1 */ + RbCnt2 = 0x08, /* Select Counter 2 */ + RbS = 0x20, /* Read-Back Status */ + RbC = 0x10, /* Read-Back Count */ + RbCS = 0x00, /* Read-Back Count and Status */ + + RbNULL = 0x40, /* NULL-Count Flag */ + RbOUT = 0x80, /* OUT-pin */ +}; + +enum { + Osc = 1193182, /* 14.318180MHz/12 */ + Hz = 82, /* 2*41*14551 = 1193182 */ +}; + +static void +i8254set(int port, int hz) +{ + int counter, timeo; + + /* + * Initialise Counter0 to be the system clock if necessary, + * it's normally connected to IRQ0 on an interrupt controller. + * Use a periodic square wave (Mode3). + */ + counter = Osc/hz; + outb(port+Control, Cs0|RW16|Mode3); + outb(port+Counter0, counter); + outb(port+Counter0, counter>>8); + + /* + * Wait until the counting register has been loaded + * into the counting element. + */ + for(timeo = 0; timeo < 100000; timeo++){ + outb(port+Control, Rbc|RbS|RbCnt0); + if(!(inb(port+Counter0) & RbNULL)) + break; + } +} + +vlong +i8254hz(u32int info[2][4]) +{ + u32int ax; + u64int a, b; + int aamcycles, incr, loops, x, y; + + /* + * Use the cpuid family info to get the + * cycles for the AAM instruction. + */ + ax = info[1][0] & 0x00000f00; + if(memcmp(&info[0][1], "GenuntelineI", 12) == 0){ + switch(ax){ + default: + return 0; + case 0x00000600: + case 0x00000f00: + aamcycles = 16; + break; + } + } + else if(memcmp(&info[0][1], "AuthcAMDenti", 12) == 0){ + switch(ax){ + default: + return 0; + case 0x00000600: + case 0x00000f00: + aamcycles = 11; + break; + } + } + else if(memcmp(&info[0][1], "CentaulsaurH", 12) == 0){ + switch(ax){ + default: + return 0; + case 0x00000600: + aamcycles = 23; + break; + } + } + else + return 0; + + i8254set(Timer1, Hz); + + /* + * Find biggest loop that doesn't wrap. + */ + SET(a, b); + incr = 16000000/(aamcycles*Hz*2); + x = 2000; + for(loops = incr; loops < 64*1024; loops += incr) { + /* + * Measure time for the loop + * + * MOVL loops,CX + * aaml1: + * AAM + * LOOP aaml1 + * + * The time for the loop should be independent of external + * cache and memory system since it fits in the execution + * prefetch buffer. + * The AAM instruction is not available in 64-bit mode. + */ + outb(Timer1+Control, Cs0|Clc); + + a = rdtsc(); + x = inb(Timer1+Counter0); + x |= inb(Timer1+Counter0)<<8; + aamloop(loops); + outb(Timer1+Control, Cs0|Clc); + b = rdtsc(); + + y = inb(Timer1+Counter0); + y |= inb(Timer1+Counter0)<<8; + x -= y; + + if(x < 0) + x += Osc/Hz; + + if(x > Osc/(3*Hz)) + break; + } + + /* + * Figure out clock frequency. + */ + b = (b-a)<<1; + b *= Osc; + + return b/x; +} diff -Nru /sys/src/9k/k10/i8259.c /sys/src/9k/k10/i8259.c --- /sys/src/9k/k10/i8259.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/i8259.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,131 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "io.h" + +/* + * 8259 Interrupt Controller and compatibles. + */ +enum { /* I/O ports */ + Cntrl1 = 0x20, + Cntrl2 = 0xa0, + + Icw1 = 0, /* Initialisation Command Word 1 */ + Icw2 = 1, + Icw3 = 1, + Icw4 = 1, + + Ocw1 = 1, /* Operational Control Word 1 */ + Ocw2 = 0, + Ocw3 = 0, + + Imr = Ocw1, /* Interrupt Mask Register */ + Isr = Ocw3, /* In-Service Register */ + Irr = Ocw3, /* Interrupt Request Register */ + + Elcr1 = 0x4d0, /* Edge/Level Control Register */ + Elcr2 = 0x4d1, +}; + +enum { /* Icw1 */ + Ic4 = 0x01, /* there will be an Icw4 */ + Icw1sel = 0x10, /* Icw/Ocw select */ +}; + +enum { /* Icw3 */ + Cascaded = 0x04, /* Cntrl1 - Cascaded Mode Enable */ + SlaveIRQ2 = 0x02, /* Cntrl2 - Slave Identification Code */ +}; + +enum { /* Icw4 */ + Microprocessor = 0x01, /* 80x86-based system */ +}; + +enum { /* Ocw2 */ + Ocw2sel = 0x00, /* Ocw2 select */ + Eoi = 0x20, /* Non-spcific EOI command */ +}; + +enum { /* Ocw3 */ + Irrread = 0x02, /* Read IRQ register */ + Isrread = 0x03, /* Read IS register */ + Ocw3sel = 0x08, /* Ocw3 select */ +}; + +static Lock i8259lock; +static int i8259mask = ~0; /* mask of disabled interrupts */ +static int i8259elcr; /* mask of level interrupts */ + +int +i8259init(int vectorbase) +{ + int elcr; + + vectorbase &= ~0x07; + + ilock(&i8259lock); + + /* + * Boilerplate to initialise the pair of 8259 controllers, + * see one of the Intel bridge datasheets for details, + * e.g. 82371AB (PIIX4). The default settings are 80x86 mode, + * edge-sensitive detection, normal EOI, non-buffered and + * cascade mode. Cntrl1 is connected as the master and Cntrl2 + * as the slave; IRQ2 is used to cascade the two controllers. + */ + outb(Cntrl1+Icw1, Icw1sel|Ic4); + outb(Cntrl1+Icw2, vectorbase); + outb(Cntrl1+Icw3, Cascaded); + outb(Cntrl1+Icw4, Microprocessor); + + outb(Cntrl2+Icw1, Icw1sel|Ic4); + outb(Cntrl2+Icw2, vectorbase+8); + outb(Cntrl2+Icw3, SlaveIRQ2); + outb(Cntrl2+Icw4, Microprocessor); + + /* + * Set the interrupt masks, allowing interrupts + * to pass from Cntrl2 to Cntrl1 on IRQ2. + */ + i8259mask &= ~(1<<2); + outb(Cntrl2+Imr, (i8259mask>>8) & 0xff); + outb(Cntrl1+Imr, i8259mask & 0xff); + + outb(Cntrl1+Ocw2, Ocw2sel|Eoi); + outb(Cntrl2+Ocw2, Ocw2sel|Eoi); + + /* + * Set Ocw3 to return the ISR when read for i8259isr() + * (after initialisation status read is set to return the IRR). + * Read IRR first to possibly deassert an outstanding + * interrupt. + */ + inb(Cntrl1+Irr); + outb(Cntrl1+Ocw3, Ocw3sel|Isrread); + inb(Cntrl2+Irr); + outb(Cntrl2+Ocw3, Ocw3sel|Isrread); + + /* + * Check for Edge/Level Control register. + * This check may not work for all chipsets. + * First try a non-intrusive test - the bits for + * IRQs 13, 8, 2, 1 and 0 must be edge (0). If + * that's OK try a R/W test. + */ + elcr = (inb(Elcr2)<<8)|inb(Elcr1); + if(!(elcr & 0x2107)){ + outb(Elcr1, 0); + if(inb(Elcr1) == 0){ + outb(Elcr1, 0x20); + if(inb(Elcr1) == 0x20) + i8259elcr = elcr; + outb(Elcr1, elcr & 0xff); + } + } + iunlock(&i8259lock); + + return vectorbase; +} diff -Nru /sys/src/9k/k10/init9.c /sys/src/9k/k10/init9.c --- /sys/src/9k/k10/init9.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/init9.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,7 @@ +extern void startboot(char*, char**); + +void +main(char* argv0) +{ + startboot(argv0, &argv0); +} diff -Nru /sys/src/9k/k10/intrmon.c /sys/src/9k/k10/intrmon.c --- /sys/src/9k/k10/intrmon.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/intrmon.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,81 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +enum { + Nalarm = 3*1000, /* alarm when interrupt has run for this long */ +}; + +typedef struct Mon Mon; +struct Mon { + ulong tk; /* tick of mach entry */ + ulong ltk; /* local tick we started tracking it */ +}; + +static void intrmon(Ureg*, Timer*); + +/* + * This code relies on sys->nmach being set properly. Do not call intrmoninit on + * any processor until *ncpu/nmach has been sorted. + */ +void +intrmoninit(void) +{ + Timer *t; + + m->intrmon.mon = mallocz(sys->nmach * sizeof (Mon), 1); + if(m->intrmon.mon == nil) { + iprint("intrmoninit: mon alloc failure for cpu%d\n", m->machno); + return; + } + + t = malloc(sizeof(*t)); + t->tmode = Trelative; + t->tt = nil; + /* + * stagger out the timers by mach no to start in + * order to get one running per second + */ + t->tns = m->machno*1000*1000*1000; + t->tf = intrmon; + timeradd(t); +} + +static void +intrmon(Ureg*, Timer *t) +{ + Mon *mon; + Mach *mp; + ulong tk; + int i; + Mpl s; + + s = splhi(); + + /* reset timer from startup to scaled out by # procs */ + t->tmode = Tperiodic; + t->tns = sys->nmach*1000*1000*1000; + + mon = m->intrmon.mon; /* my personal monitor array to protect others */ + for(i = 0; i < sys->nmach; i++){ + mp = sys->machptr[i]; + if(mp == nil || !mp->online) + continue; + tk = mp->intrmon.tk; + if(tk == 0 || tk != mon[i].tk){ + mon[i].tk = tk; + mon[i].ltk = m->ticks; + continue; + } + if(TK2MS(m->ticks - mon[i].ltk) < Nalarm) + continue; + /* Lazy avoid multiple cpus reporting the same condition. */ + mp->intrmon.tk++; + xdpanic("cpu%d intr watchdog(%d): tk %ld vno %d fn 0x%ulx\n", + i, m->machno, mp->intrmon.tk, mp->intrmon.vno, + mp->intrmon.fn); + } + splx(s); +} diff -Nru /sys/src/9k/k10/io.h /sys/src/9k/k10/io.h --- /sys/src/9k/k10/io.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/io.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,286 @@ +enum { + IrqCLOCK = 0, /* legacy external i8259 IRQs */ + IrqKBD = 1, + IrqUART1 = 3, + IrqUART0 = 4, + IrqPCMCIA = 5, + IrqFLOPPY = 6, + IrqLPT = 7, + IrqIRQ7 = 7, + IrqAUX = 12, /* PS/2 port */ + IrqIRQ13 = 13, /* coprocessor on 386 */ + IrqATA0 = 14, + IrqATA1 = 15, +}; + +enum { + IdtPIC = 32, /* external i8259 interrupts */ + + IdtLINT0 = 48, /* local APIC interrupts */ + IdtLINT1 = 49, + IdtTIMER = 50, + IdtERROR = 51, + IdtPMC = 52, + IdtTHS = 53, + + IdtIPI = 62, + IdtSPURIOUS = 63, /* end local APIC interrupts */ + IdtSYSCALL = 64, /* unused on AMD64 */ + IdtIOAPIC = 65, /* IOAPIC interrupts */ + + IdtMAX = 255, /* end IOAPIC interrupts */ +}; + +typedef struct Vctl { + Vctl* next; /* handlers on this vector */ + + int isintr; /* interrupt or fault/trap */ + int affinity; /* processor affinity (-1 for none) */ + + int irq; + void (*f)(Ureg*, void*); /* handler to call */ + void* a; /* argument to call it with */ + int tbdf; + char name[KNAMELEN]; /* of driver */ + char *type; + + int (*isr)(int); /* get isr bit for this irq */ + int (*eoi)(int); /* eoi */ + int (*mask)(Vctl*, int); /* interrupt enable returns masked vector */ + int vno; +} Vctl; + +enum { + BusCBUS = 0, /* Corollary CBUS */ + BusCBUSII, /* Corollary CBUS II */ + BusEISA, /* Extended ISA */ + BusFUTURE, /* IEEE Futurebus */ + BusINTERN, /* Internal bus */ + BusISA, /* Industry Standard Architecture */ + BusMBI, /* Multibus I */ + BusMBII, /* Multibus II */ + BusMCA, /* Micro Channel Architecture */ + BusMPI, /* MPI */ + BusMPSA, /* MPSA */ + BusNUBUS, /* Apple Macintosh NuBus */ + BusPCI, /* Peripheral Component Interconnect */ + BusPCMCIA, /* PC Memory Card International Association */ + BusTC, /* DEC TurboChannel */ + BusVL, /* VESA Local bus */ + BusVME, /* VMEbus */ + BusXPRESS, /* Express System Bus */ +}; + +#define MKBUS(t,b,d,f) (((t)<<24)|(((b)&0xFF)<<16)|(((d)&0x1F)<<11)|(((f)&0x07)<<8)) +#define BUSFNO(tbdf) (((tbdf)>>8)&0x07) +#define BUSDNO(tbdf) (((tbdf)>>11)&0x1F) +#define BUSBNO(tbdf) (((tbdf)>>16)&0xFF) +#define BUSTYPE(tbdf) ((tbdf)>>24) +#define BUSBDF(tbdf) ((tbdf)&0x00FFFF00) +#define BUSUNKNOWN (-1) + +enum { + MaxEISA = 16, + CfgEISA = 0xC80, +}; + +/* + * PCI support code. + */ +enum { /* type 0 and type 1 pre-defined header */ + PciVID = 0x00, /* vendor ID */ + PciDID = 0x02, /* device ID */ + PciPCR = 0x04, /* command */ + PciPSR = 0x06, /* status */ + PciRID = 0x08, /* revision ID */ + PciCCRp = 0x09, /* programming interface class code */ + PciCCRu = 0x0A, /* sub-class code */ + PciCCRb = 0x0B, /* base class code */ + PciCLS = 0x0C, /* cache line size */ + PciLTR = 0x0D, /* latency timer */ + PciHDT = 0x0E, /* header type */ + PciBST = 0x0F, /* BIST */ + + PciBAR0 = 0x10, /* base address */ + PciBAR1 = 0x14, + + PciCP = 0x34, /* capabilities pointer */ + + PciINTL = 0x3C, /* interrupt line */ + PciINTP = 0x3D, /* interrupt pin */ +}; + +/* ccrb (base class code) values; controller types */ +enum { + Pcibcpci1 = 0, /* pci 1.0; no class codes defined */ + Pcibcstore = 1, /* mass storage */ + Pcibcnet = 2, /* network */ + Pcibcdisp = 3, /* display */ + Pcibcmmedia = 4, /* multimedia */ + Pcibcmem = 5, /* memory */ + Pcibcbridge = 6, /* bridge */ + Pcibccomm = 7, /* simple comms (e.g., serial) */ + Pcibcbasesys = 8, /* base system */ + Pcibcinput = 9, /* input */ + Pcibcdock = 0xa, /* docking stations */ + Pcibcproc = 0xb, /* processors */ + Pcibcserial = 0xc, /* serial bus (e.g., USB) */ + Pcibcwireless = 0xd, /* wireless */ + Pcibcintell = 0xe, /* intelligent i/o */ + Pcibcsatcom = 0xf, /* satellite comms */ + Pcibccrypto = 0x10, /* encryption/decryption */ + Pcibcdacq = 0x11, /* data acquisition & signal proc. */ +}; + +/* ccru (sub-class code) values; common cases only */ +enum { + /* mass storage */ + Pciscscsi = 0, /* SCSI */ + Pciscide = 1, /* IDE (ATA) */ + Pciscsata = 6, /* SATA */ + + /* network */ + Pciscether = 0, /* Ethernet */ + + /* display */ + Pciscvga = 0, /* VGA */ + Pciscxga = 1, /* XGA */ + Pcisc3d = 2, /* 3D */ + + /* bridges */ + Pcischostpci = 0, /* host/pci */ + Pciscpcicpci = 1, /* pci/pci */ + + /* simple comms */ + Pciscserial = 0, /* 16450, etc. */ + Pciscmultiser = 1, /* multiport serial */ + + /* serial bus */ + Pciscusb = 3, /* USB */ +}; + +enum { /* type 0 pre-defined header */ + PciCIS = 0x28, /* cardbus CIS pointer */ + PciSVID = 0x2C, /* subsystem vendor ID */ + PciSID = 0x2E, /* cardbus CIS pointer */ + PciEBAR0 = 0x30, /* expansion ROM base address */ + PciMGNT = 0x3E, /* burst period length */ + PciMLT = 0x3F, /* maximum latency between bursts */ +}; + +enum { /* type 1 pre-defined header */ + PciPBN = 0x18, /* primary bus number */ + PciSBN = 0x19, /* secondary bus number */ + PciUBN = 0x1A, /* subordinate bus number */ + PciSLTR = 0x1B, /* secondary latency timer */ + PciIBR = 0x1C, /* I/O base */ + PciILR = 0x1D, /* I/O limit */ + PciSPSR = 0x1E, /* secondary status */ + PciMBR = 0x20, /* memory base */ + PciMLR = 0x22, /* memory limit */ + PciPMBR = 0x24, /* prefetchable memory base */ + PciPMLR = 0x26, /* prefetchable memory limit */ + PciPUBR = 0x28, /* prefetchable base upper 32 bits */ + PciPULR = 0x2C, /* prefetchable limit upper 32 bits */ + PciIUBR = 0x30, /* I/O base upper 16 bits */ + PciIULR = 0x32, /* I/O limit upper 16 bits */ + PciEBAR1 = 0x28, /* expansion ROM base address */ + PciBCR = 0x3E, /* bridge control register */ +}; + +enum { /* type 2 pre-defined header */ + PciCBExCA = 0x10, + PciCBSPSR = 0x16, + PciCBPBN = 0x18, /* primary bus number */ + PciCBSBN = 0x19, /* secondary bus number */ + PciCBUBN = 0x1A, /* subordinate bus number */ + PciCBSLTR = 0x1B, /* secondary latency timer */ + PciCBMBR0 = 0x1C, + PciCBMLR0 = 0x20, + PciCBMBR1 = 0x24, + PciCBMLR1 = 0x28, + PciCBIBR0 = 0x2C, /* I/O base */ + PciCBILR0 = 0x30, /* I/O limit */ + PciCBIBR1 = 0x34, /* I/O base */ + PciCBILR1 = 0x38, /* I/O limit */ + PciCBSVID = 0x40, /* subsystem vendor ID */ + PciCBSID = 0x42, /* subsystem ID */ + PciCBLMBAR = 0x44, /* legacy mode base address */ +}; + +/* capabilities */ +enum { + PciCapPMG = 0x01, /* power management */ + PciCapAGP = 0x02, + PciCapVPD = 0x03, /* vital product data */ + PciCapSID = 0x04, /* slot id */ + PciCapMSI = 0x05, + PciCapCHS = 0x06, /* compact pci hot swap */ + PciCapPCIX = 0x07, + PciCapHTC = 0x08, /* hypertransport irq conf */ + PciCapVND = 0x09, /* vendor specific information */ + PciCapPCIe = 0x10, + PciCapMSIX = 0x11, + PciCapSATA = 0x12, + PciCapHSW = 0x0c, /* hot swap */ +}; + +typedef struct Pcisiz Pcisiz; +struct Pcisiz +{ + Pcidev* dev; + int siz; + int bar; +}; + +typedef struct Pcidev Pcidev; +struct Pcidev +{ + int tbdf; /* type+bus+device+function */ + ushort vid; /* vendor ID */ + ushort did; /* device ID */ + ushort svid; /* subsystem vid */ + ushort sdid; /* subsystem did */ + + ushort pcr; + + uchar rid; + uchar ccrp; + uchar ccru; + uchar ccrb; + uchar cls; + uchar ltr; + + struct { + ulong bar; /* base address */ + int size; + } mem[6]; + + struct { + ulong bar; + int size; + } rom; + uchar intl; /* interrupt line */ + + Pcidev* list; + Pcidev* link; /* next device on this bno */ + + Pcidev* bridge; /* down a bus */ + struct { + ulong bar; + int size; + } ioa, mema; + + int pmrb; /* power management register block */ + void* xcfg; /* PCIe configuration block */ +}; + +#define PCIWINDOW 0 +#define PCIWADDR64(va) (PADDR(va)+PCIWINDOW) +#define PCIWADDR32(va) ((ulong)PCIWADDR64(va)) +#define ISAWINDOW 0 +#define ISAWADDR(va) (PADDR(va)+ISAWINDOW) +#define PCIWADDRL(va) ((ulong)PCIWADDR64(va)) +#define PCIWADDRH(va) ((ulong)(PCIWADDR64(va)>>32)) + +#pragma varargck type "T" int diff -Nru /sys/src/9k/k10/ioapic.c /sys/src/9k/k10/ioapic.c --- /sys/src/9k/k10/ioapic.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/ioapic.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,595 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "apic.h" +#include "io.h" +#include "adr.h" + +typedef struct Rbus Rbus; +typedef struct Rdt Rdt; + +struct Rbus { + Rbus *next; + int bustype; + int devno; + Rdt *rdt; +}; + +struct Rdt { + IOapic *apic; + int intin; + u32int lo; + + int ref; /* could map to multiple busses */ + int enabled; /* times enabled */ +}; + +enum { /* IOAPIC registers */ + Ioregsel = 0x00, /* indirect register address */ + Iowin = 0x04, /* indirect register data */ + Ioipa = 0x08, /* IRQ Pin Assertion */ + Ioeoi = 0x10, /* EOI */ + + IOapicid = 0x00, /* Identification */ + IOapicver = 0x01, /* Version */ + IOapicarb = 0x02, /* Arbitration */ + Ioabcfg = 0x03, /* Boot Configuration */ + Ioredtbl = 0x10, /* Redirection Table */ +}; + +static Rdt rdtarray[Nrdt]; +static int nrdtarray; +static int gsib; +static Rbus* rdtbus[Nbus]; +static Rdt* rdtvecno[IdtMAX+1]; +static int dfpolicy = 1; /* round-robin */ + +static Lock idtnolock; +static int idtno = IdtIOAPIC; + +static IOapic xioapic[Napic]; +static int isabusno = -1; + +/* BOTCH: no need for this concept; we've got the bustype */ +static void +ioapicisabus(int busno) +{ + if(isabusno != -1){ + if(busno == isabusno) + return; + print("ioapic: isabus redefined: %d ↛ %d\n", isabusno, busno); +// return; + } + DBG("ioapic: isa busno %d\n", busno); + isabusno = busno; +} + +IOapic* +ioapiclookup(uint id) +{ + IOapic *a; + + if(id > nelem(xioapic)) + return nil; + a = xioapic + id; + if(a->useable) + return a; + return nil; +} + +int +gsitoapicid(int gsi, uint *intin) +{ + int i; + IOapic *a; + + for(i=0; iuseable) + continue; + if(gsi >= a->gsib && gsi < a->gsib+a->nrdt){ + if(intin != nil) + *intin = gsi - a->gsib; + return a - xioapic; + } + } +// print("gsitoapicid: no ioapic found for gsi %d\n", gsi); + return -1; +} + +static void +rtblget(IOapic* apic, int sel, u32int* hi, u32int* lo) +{ + sel = Ioredtbl + 2*sel; + + apic->addr[Ioregsel] = sel+1; + *hi = apic->addr[Iowin]; + apic->addr[Ioregsel] = sel; + *lo = apic->addr[Iowin]; +} + +static void +rtblput(IOapic* apic, int sel, u32int hi, u32int lo) +{ + sel = Ioredtbl + 2*sel; + + apic->addr[Ioregsel] = sel+1; + apic->addr[Iowin] = hi; + apic->addr[Ioregsel] = sel; + apic->addr[Iowin] = lo; +} + +Rdt* +rdtlookup(IOapic *apic, int intin) +{ + int i; + Rdt *r; + + for(i = 0; i < nrdtarray; i++){ + r = rdtarray + i; + if(apic == r->apic && intin == r->intin) + return r; + } + return nil; +} + +void +ioapicintrinit(int bustype, int busno, int apicno, int intin, int devno, u32int lo) +{ + Rbus *rbus; + Rdt *rdt; + IOapic *apic; + + if(busno >= Nbus || apicno >= Napic || nrdtarray >= Nrdt) + return; + + if(bustype == BusISA) + ioapicisabus(busno); + + apic = &xioapic[apicno]; + if(!apic->useable || intin >= apic->nrdt) + panic("ioapic: intrinit: usable %d nrdt %d: bus %d apic %d intin %d dev %d lo %.8ux\n", + apic->useable, apic->nrdt, busno, apicno, intin, devno, lo); + + rdt = rdtlookup(apic, intin); + if(rdt == nil){ + if(nrdtarray == nelem(rdtarray)){ + print("ioapic: intrinit: rdtarray too small\n"); + return; + } + rdt = &rdtarray[nrdtarray++]; + rdt->apic = apic; + rdt->intin = intin; + rdt->lo = lo; + }else{ + if(lo != rdt->lo){ + if(bustype == BusISA && intin < 16 && lo == (Im|IPhigh|TMedge)){ + DBG("override: isa %d %.8ux\n", intin, rdt->lo); + return; /* expected; default was overridden*/ + } + print("multiple irq botch type %d bus %d %d/%d/%d lo %.8ux vs %.8ux\n", + bustype, busno, apicno, intin, devno, lo, rdt->lo); + return; + } + DBG("dup rdt %d %d %d %d %.8ux\n", busno, apicno, intin, devno, lo); + } + rdt->ref++; + rbus = malloc(sizeof(*rbus)); + rbus->rdt = rdt; + rbus->bustype = bustype; + rbus->devno = devno; + rbus->next = rdtbus[busno]; + rdtbus[busno] = rbus; +} + +/* + * deal with ioapics at the same physical address. seen on + * certain supermicro atom systems. the hope is that only + * one will be used, and it will be the second one initialized. + * (the pc kernel ignores this issue.) it could be that mp and + * acpi have different numbering? + */ +static IOapic* +dupaddr(uintmem pa) +{ + int i; + IOapic *p; + + for(i = 0; i < nelem(xioapic); i++){ + p = xioapic + i; + if(p->paddr == pa) + return p; + } + return nil; +} + +IOapic* +ioapicinit(int id, int ibase, uintmem pa) +{ + IOapic *apic, *p; + + /* + * Mark the IOAPIC useable if it has a good ID + * and the registers can be mapped. + */ + if(id >= Napic) + return nil; + if((apic = xioapic+id)->useable) + return apic; + + if((p = dupaddr(pa)) != nil){ + print("ioapic%d: same pa as apic%ld\n", id, p-xioapic); + if(ibase != -1) + return nil; /* mp irqs reference mp apic#s */ + apic->addr = p->addr; + } + else{ + //adrmapck(pa, 1024, Ammio, Mfree, Cnone); /* not in adr? */ /* TO DO */ + if((apic->addr = vmap(pa, 1024)) == nil){ + print("ioapic%d: can't vmap %#P\n", id, pa); + return nil; + } + } + apic->useable = 1; + apic->paddr = pa; + + /* + * Initialise the I/O APIC. + * The MultiProcessor Specification says it is the + * responsibility of the O/S to set the APIC ID. + */ + lock(apic); + apic->addr[Ioregsel] = IOapicver; + apic->nrdt = ((apic->addr[Iowin]>>16) & 0xff) + 1; + if(ibase != -1) + apic->gsib = ibase; + else{ + apic->gsib = gsib; + gsib += apic->nrdt; + } + apic->addr[Ioregsel] = IOapicid; + apic->addr[Iowin] = id<<24; + unlock(apic); + + return apic; +} + +void +iordtdump(void) +{ + int i; + Rbus *rbus; + Rdt *rdt; + + if(!DBGFLG) + return; + for(i = 0; i < Nbus; i++){ + if((rbus = rdtbus[i]) == nil) + continue; + print("iointr bus %d:\n", i); + for(; rbus != nil; rbus = rbus->next){ + rdt = rbus->rdt; + print(" apic %ld devno %#ux (%d %d) intin %d lo %#ux ref %d\n", + rdt->apic-xioapic, rbus->devno, rbus->devno>>2, + rbus->devno & 0x03, rdt->intin, rdt->lo, rdt->ref); + } + } +} + +void +ioapicdump(void) +{ + int i, n; + IOapic *apic; + u32int hi, lo; + + if(!DBGFLG) + return; + for(i = 0; i < Napic; i++){ + apic = &xioapic[i]; + if(!apic->useable || apic->addr == 0) + continue; + print("ioapic %d addr %#p nrdt %d ibase %d\n", + i, apic->addr, apic->nrdt, apic->gsib); + for(n = 0; n < apic->nrdt; n++){ + lock(apic); + rtblget(apic, n, &hi, &lo); + unlock(apic); + print(" rdt %2.2d %#8.8ux %#8.8ux\n", n, hi, lo); + } + } +} + +static char* +ioapicprint(char *p, char *e, IOapic *a, int i) +{ + char *s; + + s = "ioapic"; + p = seprint(p, e, "%-8s ", s); + p = seprint(p, e, "%8ux ", i); + p = seprint(p, e, "%6d ", a->gsib); + p = seprint(p, e, "%6d ", a->gsib+a->nrdt-1); + p = seprint(p, e, "%#P ", a->paddr); + p = seprint(p, e, "\n"); + return p; +} + +static long +ioapicread(Chan*, void *a, long n, vlong off) +{ + char *s, *e, *p; + long i, r; + + s = malloc(READSTR); + e = s+READSTR; + p = s; + + for(i = 0; i < nelem(xioapic); i++) + if(xioapic[i].useable) + p = ioapicprint(p, e, xioapic + i, i); + r = -1; + if(!waserror()){ + r = readstr(off, a, n, s); + poperror(); + } + free(s); + return r; +} + +void +ioapiconline(void) +{ + int i; + IOapic *apic; + + addarchfile("ioapic", 0444, ioapicread, nil); + for(apic = xioapic; apic < &xioapic[Napic]; apic++){ + if(!apic->useable || apic->addr == nil) + continue; + for(i = 0; i < apic->nrdt; i++){ + lock(apic); + rtblput(apic, i, 0, Im); + unlock(apic); + } + } + print("init ioapic dump\n"); + ioapicdump(); +} + +static int +ioapicintrdd(u32int* hi, u32int* lo) +{ + Lapic *lapic; + Mach *mach; + int i; + static int df; + + /* + * Set delivery mode (lo) and destination field (hi) + * + * Currently, assign each interrupt to a different CPU + * using physical mode delivery. Using the topology + * (packages/cores/threads) could be helpful. + */ + switch(dfpolicy){ + case 0: + i = sys->machptr[0]->apicno; + break; + default: /* round-robin */ + for(;;){ + i = df; + if(++df >= Napic) + df = 0; + if((lapic = lapiclookup(i)) != nil && + (mach = sys->machptr[lapic->machno]) != nil && + mach->online) + break; + } + break; + } + *hi = i<<24; + *lo |= Pm|MTf; + return i; +} + +int +nextvec(void) +{ + uint vecno; + + lock(&idtnolock); + vecno = idtno; + idtno = (idtno+8) % IdtMAX; + if(idtno < IdtIOAPIC) + idtno += IdtIOAPIC; + unlock(&idtnolock); + + return vecno; +} + +static int +msimask(Vctl *v, int mask) +{ + Pcidev *p; + + p = pcimatchtbdf(v->tbdf); + if(p == nil) + return -1; + return pcimsimask(p, mask); +} + +static int +intrenablemsi(Vctl* v, Pcidev *p) +{ + uint vno, lo, hi; + uvlong msivec; + + vno = nextvec(); + + lo = IPlow | TMedge | vno; + v->affinity = ioapicintrdd(&hi, &lo); + + if(lo & Lm) + lo |= MTlp; + + msivec = (uvlong)hi<<32 | lo; + if(pcimsienable(p, msivec) == -1) + return -1; + v->isr = lapicisr; + v->eoi = lapiceoi; + v->vno = vno; + v->type = "msi"; + v->mask = msimask; + + DBG("msiirq: %T: enabling %.16llux %s irq %d vno %d\n", p->tbdf, msivec, v->name, v->irq, vno); + return vno; +} + +int +disablemsi(Vctl*, Pcidev *p) +{ + if(p == nil) + return -1; + return pcimsimask(p, 1); +} + +int +ioapicintrenable(Vctl* v) +{ + Rbus *rbus; + Rdt *rdt; + u32int hi, lo; + int bustype, busno, devno, vecno; + + if(v->tbdf == BUSUNKNOWN){ + if(v->irq >= IdtLINT0 && v->irq <= IdtMAX){ + if(v->irq != IdtSPURIOUS) + v->isr = lapiceoi; + v->type = "lapic"; + return v->irq; + } + else{ + /* + * Legacy ISA. + * Make a busno and devno using the + * ISA bus number and the irq. + */ + if(isabusno == -1) + panic("no ISA bus allocated"); + busno = isabusno; + devno = v->irq; + bustype = BusISA; + } + } + else if((bustype = BUSTYPE(v->tbdf)) == BusPCI){ + /* + * PCI. + * Make a devno from BUSDNO(tbdf) and pcidev->intp. + */ + Pcidev *pcidev; + + busno = BUSBNO(v->tbdf); + if((pcidev = pcimatchtbdf(v->tbdf)) == nil) + panic("no PCI dev for tbdf %T", v->tbdf); + if((vecno = intrenablemsi(v, pcidev)) != -1) + return vecno; + disablemsi(v, pcidev); + if((devno = pcicfgr8(pcidev, PciINTP)) == 0) + panic("no INTP for tbdf %T", v->tbdf); + devno = BUSDNO(v->tbdf)<<2|(devno-1); + DBG("ioapicintrenable: tbdf %T busno %d devno %d\n", + v->tbdf, busno, devno); + } + else{ + SET(busno, devno); + panic("unknown tbdf %T", v->tbdf); + } + rdt = nil; + for(rbus = rdtbus[busno]; rbus != nil; rbus = rbus->next) + if(rbus->devno == devno && rbus->bustype == bustype){ + rdt = rbus->rdt; + break; + } + if(rdt == nil){ + /* + * PCI devices defaulted to ISA (ACPI). + */ + if((busno = isabusno) == -1) + return -1; + devno = v->irq; + for(rbus = rdtbus[busno]; rbus != nil; rbus = rbus->next) + if(rbus->devno == devno){ + rdt = rbus->rdt; + break; + } + DBG("isa: tbdf %T busno %d devno %d %#p\n", + v->tbdf, busno, devno, rdt); + } + if(rdt == nil) + return -1; + + /* + * Assume this is a low-frequency event so just lock + * the whole IOAPIC to initialise the RDT entry + * rather than putting a Lock in each entry. + */ + lock(rdt->apic); + DBG("%T: %ld/%d/%d (%d)\n", v->tbdf, rdt->apic - xioapic, rbus->devno, rdt->intin, devno); + if((rdt->lo & 0xff) == 0){ + vecno = nextvec(); + rdt->lo |= vecno; + rdtvecno[vecno] = rdt; + }else + DBG("%T: mutiple irq bus %d dev %d\n", v->tbdf, busno, devno); + + rdt->enabled++; + lo = (rdt->lo & ~Im); + v->affinity = ioapicintrdd(&hi, &lo); + rtblput(rdt->apic, rdt->intin, hi, lo); + vecno = lo & 0xff; + unlock(rdt->apic); + + DBG("busno %d devno %d hi %#.8ux lo %#.8ux vecno %d\n", + busno, devno, hi, lo, vecno); + v->isr = lapicisr; + v->eoi = lapiceoi; + v->vno = vecno; + v->type = "ioapic"; + + return vecno; +} + +int +ioapicintrdisable(int vecno) +{ + Rdt *rdt; + + /* + * FOV. Oh dear. This isn't very good. + * Fortunately rdtvecno[vecno] is static + * once assigned. + * Must do better. + * + * What about any pending interrupts? + */ + if(vecno < 0 || vecno > IdtMAX){ + panic("ioapicintrdisable: vecno %d out of range", vecno); + return -1; + } + if((rdt = rdtvecno[vecno]) == nil){ + panic("ioapicintrdisable: vecno %d has no rdt", vecno); + return -1; + } + + lock(rdt->apic); + rdt->enabled--; + if(rdt->enabled == 0) + rtblput(rdt->apic, rdt->intin, 0, rdt->lo); + unlock(rdt->apic); + + return 0; +} diff -Nru /sys/src/9k/k10/k10cpu /sys/src/9k/k10/k10cpu --- /sys/src/9k/k10/k10cpu Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/k10cpu Wed Dec 9 00:00:00 2015 @@ -0,0 +1,197 @@ +dev +dev + root + cons uidgid + arch + env + pipe + proc + mnt +# pci pci smbus + pci pci + srv + dup + rtc + ssl + cap + kprof + + ether netif + ip arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium ptclbsumamd64 + + uart + usb +# kbin + acpi hpet acpi lapic ioapic msi sipi + sd + mem + + +uart +dev + uarti8250 + uartpci pci + +ip +dev + il + tcp + udp + ipifc + icmp + icmp6 + +link +dev + ether8139 pci + ether8169 pci ethermii + ether82557 pci + ether82563 pci + ether82598 pci + etherigbe pci ethermii + ethermedium + loopbackmedium + netdevmedium + + usbuhci + usbohci + usbehci usbehcipc + +# acpi hpet +# ht + +misc +dev + nocache + +sd +dev + sdata pci sdscsi + sdiahci pci sdscsi +# sdide pci sdscsi +# sdaoe sdscsi + +#boot cpu +# int cpuflag = 1; +#boot cpu boot $3 +# int cpuflag = 1; +# char* bootdisk = "$3"; +#boot rootdir $3 +# char* rootdir = "$3"; +#boot (bboot|romboot|dosboot) +# int cpuflag = 1; +# char* bootprog = $2; +#boot boot $3 +# char* bootdisk = "$3"; +# +boot cpu + tcp + il + +rootdir + bootk10cpu.out boot + /amd64/bin/auth/factotum factotum + /amd64/bin/ip/ipconfig ipconfig + +conf + int cpuserver = 1; + +# +#dbgflg +# chan 'c' +# apic 'A' +# acpi 'C' +# hpet 'H' +# ht 'H' +# ioapic 'I' +# mp 'M' +# pci 'P' +# arch 'V' +# +dbgflg + lapic 'A' + acpi 'C' + devacpi 'C' + hpet 'H' + ht 'H' + ioapic 'I' + mp 'M' + arch 'V' + archk10 'V' + sysproc 'E' + main 'x' + mmu 'm' + sipi 'y' + sysseg 's' + physalloc 'p' + qmalloc 'q' + +amd64 +dev + l32p + l64v + l64idt + l64syscall + l64fpu + l64cpuid + arch + archk10 + cga + options + fpu + i8254 + i8259 + kbd + main + map + memory + mmu + multiboot + qmalloc + random + syscall + trap + vsvm + +port + alarm + allocb + bud + chan + dev + devtab + edf + fault + image + latin1 + mcslock + net + page + parse + pgrp + portclock + print + proc + ps + physalloc + qio + qlock + rebootcmd + ref + rmap + segment + sysauth + syscallfmt + sysfile + sysproc + sysseg + systab + tod + +# +#dir +# pc -.I. +# +dir + 386 + ip + port + +lib + libc + libip + libsec + libaml diff -Nru /sys/src/9k/k10/l32p.s /sys/src/9k/k10/l32p.s --- /sys/src/9k/k10/l32p.s Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/l32p.s Wed Dec 9 00:00:00 2015 @@ -0,0 +1,244 @@ +#include "mem.h" +#include "amd64l.h" + +MODE $32 + +#define BLOCK Lx: JMP Lx +#define WAWN(x, r) MOVB $(x), r; MOVL $0x3F8, DX; OUTB + +#define pFARJMP32(s, o) BYTE $0xea; /* far jump to ptr32:16 */\ + LONG $o; WORD $s + +/* + * Enter here in 32-bit protected mode. Welcome to 1982. + * Make sure the GDT is set as it should be: + * disable interrupts; + * load the GDT with the table in _gdt32p; + * load all the data segments + * load the code segment via a far jump. + */ +TEXT _protected<>(SB), 1, $-4 + CLI + BYTE $0xe9; LONG $0x00000058; /* JMP _endofheader */ + +_startofheader: + BYTE $0x90 /* NOP */ + BYTE $0x90 /* NOP */ + +TEXT _multibootheader<>(SB), 1, $-4 /* must be 4-byte aligned */ + LONG $0x1badb002 /* magic */ + LONG $0x00000003 /* flags */ + LONG $-(0x1badb002 + 0x00000003) /* checksum */ + +TEXT _gdt32p<>(SB), 1, $-4 + QUAD $0x0000000000000000 /* NULL descriptor */ + QUAD $0x00cf9a000000ffff /* CS */ + QUAD $0x00cf92000000ffff /* DS */ + QUAD $0x0020980000000000 /* Long mode CS */ + +TEXT _gdtptr32p<>(SB), 1, $-4 + WORD $(4*8-1) + LONG $_gdt32p<>-KZERO(SB) + +TEXT _gdt64<>(SB), 1, $-4 + QUAD $0x0000000000000000 /* NULL descriptor */ + QUAD $0x0020980000000000 /* CS */ + +TEXT _gdtptr64p<>(SB), 1, $-4 + WORD $(2*8-1) + QUAD $_gdt64<>-KZERO(SB) + +TEXT _gdtptr64v<>(SB), 1, $-4 + WORD $(3*8-1) + QUAD $_gdt64<>(SB) + +_endofheader: + MOVL AX, BP /* possible passed-in magic */ + + MOVL $_gdtptr32p<>-KZERO(SB), AX + MOVL (AX), GDTR + + MOVL $SSEL(SiDS, SsTIGDT|SsRPL0), AX + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + MOVW AX, SS + + pFARJMP32(SSEL(SiCS, SsTIGDT|SsRPL0), _warp64<>-KZERO(SB)) + +/* + * Make the basic page tables for CPU0 to map 0-4MiB physical + * to KZERO, and include an identity map for the switch from protected + * to paging mode. There's an assumption here that the creation and later + * removal of the identity map will not interfere with the KZERO mappings; + * the conditions for clearing the identity map are + * clear PML4 entry when (KZERO & 0x0000ff8000000000) != 0; + * clear PDP entry when (KZERO & 0x0000007fc0000000) != 0; + * don't clear PD entry when (KZERO & 0x000000003fe00000) == 0; + * the code below assumes these conditions are met. + * + * Assume a recent processor with Page Size Extensions + * and use two 2MiB entries. + */ +/* + * The layout is decribed in dat.h: + * _protected: start of kernel text + * - 4*KiB unused + * - 4*KiB unused + * - 4*KiB ptrpage + * - 4*KiB syspage + * - MACHSZ m + * - 4*KiB vsvmpage for gdt, tss + * - PTSZ PT for PMAPADDR unused - assumes in KZERO PD + * - PTSZ PD + * - PTSZ PDP + * - PTSZ PML4 + * - MACHSTKSZ stack + */ + +/* + * Macros for accessing page table entries; change the + * C-style array-index macros into a page table byte offset + */ +#define PML4O(v) ((PTLX((v), 3))<<3) +#define PDPO(v) ((PTLX((v), 2))<<3) +#define PDO(v) ((PTLX((v), 1))<<3) +#define PTO(v) ((PTLX((v), 0))<<3) + +TEXT _warp64<>(SB), 1, $-4 + MOVL $_protected<>-(MACHSTKSZ+4*PTSZ+5*(4*KiB)+MACHSZ+KZERO)(SB), SI + + MOVL SI, DI + XORL AX, AX + MOVL $((MACHSTKSZ+4*PTSZ+5*(4*KiB)+MACHSZ)>>2), CX + + CLD + REP; STOSL /* stack, P*, vsvm, m, sys */ + + MOVL SI, AX /* sys-KZERO */ + ADDL $(MACHSTKSZ), AX /* PML4 */ + MOVL AX, CR3 /* load the mmu */ + MOVL AX, DX + ADDL $(PTSZ|PteRW|PteP), DX /* PDP at PML4 + PTSZ */ + MOVL DX, PML4O(0)(AX) /* PML4E for identity map */ + MOVL DX, PML4O(KZERO)(AX) /* PML4E for KZERO, PMAPADDR */ + + ADDL $PTSZ, AX /* PDP at PML4 + PTSZ */ + ADDL $PTSZ, DX /* PD at PML4 + 2*PTSZ */ + MOVL DX, PDPO(0)(AX) /* PDPE for identity map */ + MOVL DX, PDPO(KZERO)(AX) /* PDPE for KZERO, PMAPADDR */ + + ADDL $PTSZ, AX /* PD at PML4 + 2*PTSZ */ + MOVL $(PtePS|PteRW|PteP), DX + MOVL DX, PDO(0)(AX) /* PDE for identity 0-2MiB */ + + MOVL AX, CX + ADDL $PDO(KZERO), CX +memloop: + MOVL DX, 0(CX) + ADDL $PGLSZ(1), DX + ADDL $8, CX + CMPL DX, $(10*MiB) + JLT memloop + + MOVL AX, DX /* PD at PML4 + 2*PTSZ */ + ADDL $(PTSZ|PteRW|PteP), DX /* PT at PML4 + 3*PTSZ */ + MOVL DX, PDO(PMAPADDR)(AX) /* PDE for PMAPADDR */ + +/* + * Enable and activate Long Mode. From the manual: + * make sure Page Size Extentions are off, and Page Global + * Extensions and Physical Address Extensions are on in CR4; + * set Long Mode Enable in the Extended Feature Enable MSR; + * set Paging Enable in CR0; + * make an inter-segment jump to the Long Mode code. + * It's all in 32-bit mode until the jump is made. + */ +TEXT _lme<>(SB), 1, $-4 + MOVL CR4, AX + ANDL $~Pse, AX /* Page Size */ + ORL $(Pge|Pae), AX /* Page Global, Phys. Address */ + MOVL AX, CR4 + + MOVL $Efer, CX /* Extended Feature Enable */ + RDMSR + ORL $Lme, AX /* Long Mode Enable */ + WRMSR + + MOVL CR0, DX + ANDL $~(Cd|Nw|Ts|Mp), DX + ORL $(Pg|Wp), DX /* Paging Enable */ + MOVL DX, CR0 + + pFARJMP32(SSEL(3, SsTIGDT|SsRPL0), _identity<>-KZERO(SB)) + +/* + * Long mode. Welcome to 2003. + * Jump out of the identity map space; + * load a proper long mode GDT. + */ +MODE $64 + +TEXT _identity<>(SB), 1, $-4 + MOVQ $_start64v<>(SB), AX + JMP* AX + +TEXT _start64v<>(SB), 1, $-4 + MOVQ $_gdtptr64v<>(SB), AX + MOVL (AX), GDTR + + XORQ DX, DX + MOVW DX, DS /* not used in long mode */ + MOVW DX, ES /* not used in long mode */ + MOVW DX, FS + MOVW DX, GS + MOVW DX, SS /* not used in long mode */ + + MOVLQZX SI, SI /* sys-KZERO */ + MOVQ SI, AX + ADDQ $KZERO, AX + MOVQ AX, sys(SB) /* sys */ + + ADDQ $(MACHSTKSZ), AX /* PML4 and top of stack */ + MOVQ AX, SP /* set stack */ + +_zap0pml4: + CMPQ DX, $PML4O(KZERO) /* KZERO & 0x0000ff8000000000 */ + JEQ _zap0pdp + MOVQ DX, PML4O(0)(AX) /* zap identity map PML4E */ +_zap0pdp: + ADDQ $PTSZ, AX /* PDP at PML4 + PTSZ */ + CMPQ DX, $PDPO(KZERO) /* KZERO & 0x0000007fc0000000 */ + JEQ _zap0pd + MOVQ DX, PDPO(0)(AX) /* zap identity map PDPE */ +_zap0pd: + ADDQ $PTSZ, AX /* PD at PML4 + 2*PTSZ */ + CMPQ DX, $PDO(KZERO) /* KZERO & 0x000000003fe00000 */ + JEQ _zap0done + MOVQ DX, PDO(0)(AX) /* zap identity map PDE */ +_zap0done: + + ADDQ $(MACHSTKSZ), SI /* PML4-KZERO */ + MOVQ SI, CR3 /* flush TLB */ + + ADDQ $(2*PTSZ+4*KiB), AX /* PD+PT+vsvm */ + MOVQ AX, RMACH /* Mach */ + MOVQ DX, RUSER + + PUSHQ DX /* clear flags */ + POPFQ + + MOVLQZX BX, BX /* push multiboot args */ + PUSHQ BX /* multiboot info* */ + MOVLQZX RARG, RARG + PUSHQ RARG /* multiboot magic */ + + CALL main(SB) + +TEXT ndnr(SB), 1, $-4 /* no deposit, no return */ +_dnr: + STI + HLT + JMP _dnr /* do not resuscitate */ + diff -Nru /sys/src/9k/k10/l64cpuid.s /sys/src/9k/k10/l64cpuid.s --- /sys/src/9k/k10/l64cpuid.s Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/l64cpuid.s Wed Dec 9 00:00:00 2015 @@ -0,0 +1,26 @@ +/* + * The CPUID instruction is always supported on the amd64. + */ +TEXT cpuid(SB), $-4 + MOVL RARG, AX /* function in AX */ + MOVLQZX cx+8(FP), CX /* iterator/index/etc. */ + + CPUID + + MOVQ info+16(FP), BP + MOVL AX, 0(BP) + MOVL BX, 4(BP) + MOVL CX, 8(BP) + MOVL DX, 12(BP) + RET + +/* + * Basic timing loop to determine CPU frequency. + * The AAM instruction is not available in 64-bit mode. + */ +TEXT aamloop(SB), 1, $-4 + MOVLQZX RARG, CX +aaml1: + XORQ AX, AX /* close enough */ + LOOP aaml1 + RET diff -Nru /sys/src/9k/k10/l64fpu.s /sys/src/9k/k10/l64fpu.s --- /sys/src/9k/k10/l64fpu.s Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/l64fpu.s Wed Dec 9 00:00:00 2015 @@ -0,0 +1,46 @@ +/* + * SIMD Floating Point. + * Note: for x87 instructions which have both a 'wait' and 'nowait' version, + * 6a only knows the 'wait' mnemonic but does NOT insert the WAIT prefix byte + * (i.e. they act like their FNxxx variations) so WAIT instructions must be + * explicitly placed in the code if necessary. + */ +TEXT _clts(SB), 1, $-4 + CLTS + RET + +TEXT _fldcw(SB), 1, $-4 /* Load x87 FPU Control Word */ + MOVQ RARG, cw+0(FP) + FLDCW cw+0(FP) + RET + +TEXT _fnclex(SB), 1, $-4 + FCLEX + RET + +TEXT _fninit(SB), 1, $-4 + FINIT /* no WAIT */ + RET + +TEXT _fxrstor(SB), 1, $-4 + FXRSTOR64 (RARG) + RET + +TEXT _fxsave(SB), 1, $-4 + FXSAVE64 (RARG) + RET + +TEXT _fwait(SB), 1, $-4 + WAIT + RET + +TEXT _ldmxcsr(SB), 1, $-4 /* Load MXCSR */ + MOVQ RARG, mxcsr+0(FP) + LDMXCSR mxcsr+0(FP) + RET + +TEXT _stts(SB), 1, $-4 + MOVQ CR0, AX + ORQ $8, AX /* Ts */ + MOVQ AX, CR0 + RET diff -Nru /sys/src/9k/k10/l64idt.s /sys/src/9k/k10/l64idt.s --- /sys/src/9k/k10/l64idt.s Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/l64idt.s Wed Dec 9 00:00:00 2015 @@ -0,0 +1,341 @@ +/* + * Interrupt/exception handling. + */ +#include "amd64l.h" + +MODE $64 + +TEXT _intrp<>(SB), 1, $-4 /* no error code pushed */ + PUSHQ AX /* save AX */ + MOVQ 8(SP), AX /* idthandlers(SB) PC */ + JMP _intrcommon + +TEXT _intre<>(SB), 1, $-4 /* error code pushed */ + XCHGQ AX, (SP) +_intrcommon: + MOVBQZX (AX), AX + XCHGQ AX, (SP) + + SUBQ $24, SP /* R1[45], [DEFG]S */ + CMPW 48(SP), $SSEL(SiCS, SsTIGDT|SsRPL0) /* old CS */ + JEQ _intrnested + + MOVQ RUSER, 0(SP) + MOVQ RMACH, 8(SP) + MOVW DS, 16(SP) + MOVW ES, 18(SP) + MOVW FS, 20(SP) + MOVW GS, 22(SP) + + SWAPGS + BYTE $0x65; MOVQ 0, RMACH /* m-> (MOVQ GS:0x0, R15) */ + MOVQ 16(RMACH), RUSER /* up */ + +_intrnested: + PUSHQ R13 + PUSHQ R12 + PUSHQ R11 + PUSHQ R10 + PUSHQ R9 + PUSHQ R8 + PUSHQ BP + PUSHQ DI + PUSHQ SI + PUSHQ DX + PUSHQ CX + PUSHQ BX + PUSHQ AX + + MOVQ SP, RARG + PUSHQ SP + CALL trap(SB) + +TEXT _intrr<>(SB), 1, $-4 /* so ktrace can pop frame */ + POPQ AX + + POPQ AX + POPQ BX + POPQ CX + POPQ DX + POPQ SI + POPQ DI + POPQ BP + POPQ R8 + POPQ R9 + POPQ R10 + POPQ R11 + POPQ R12 + POPQ R13 + + CMPQ 48(SP), $SSEL(SiCS, SsTIGDT|SsRPL0) + JEQ _iretnested + + SWAPGS + MOVW 22(SP), GS + MOVW 20(SP), FS + MOVW 18(SP), ES + MOVW 16(SP), DS + MOVQ 8(SP), RMACH + MOVQ 0(SP), RUSER + +_iretnested: + ADDQ $40, SP + IRETQ + +TEXT idthandlers(SB), 1, $-4 + CALL _intrp<>(SB); BYTE $IdtDE /* #DE Divide-by-Zero Error */ + CALL _intrp<>(SB); BYTE $IdtDB /* #DB Debug */ + CALL _intrp<>(SB); BYTE $IdtNMI /* #NMI Borked */ + CALL _intrp<>(SB); BYTE $IdtBP /* #BP Breakpoint */ + CALL _intrp<>(SB); BYTE $IdtOF /* #OF Overflow */ + CALL _intrp<>(SB); BYTE $IdtBR /* #BR Bound-Range */ + CALL _intrp<>(SB); BYTE $IdtUD /* #UD Invalid-Opcode */ + CALL _intrp<>(SB); BYTE $IdtNM /* #NM Device-Not-Available */ + CALL _intre<>(SB); BYTE $IdtDF /* #DF Double-Fault */ + CALL _intrp<>(SB); BYTE $Idt09 /* reserved */ + CALL _intre<>(SB); BYTE $IdtTS /* #TS Invalid-TSS */ + CALL _intre<>(SB); BYTE $IdtNP /* #NP Segment-Not-Present */ + CALL _intre<>(SB); BYTE $IdtSS /* #SS Stack */ + CALL _intre<>(SB); BYTE $IdtGP /* #GP General-Protection */ + CALL _intre<>(SB); BYTE $IdtPF /* #PF Page-Fault */ + CALL _intrp<>(SB); BYTE $Idt0F /* reserved */ + CALL _intrp<>(SB); BYTE $IdtMF /* #MF x87 FPE-Pending */ + CALL _intre<>(SB); BYTE $IdtAC /* #AC Alignment-Check */ + CALL _intrp<>(SB); BYTE $IdtMC /* #MC Machine-Check */ + CALL _intrp<>(SB); BYTE $IdtXF /* #XF SIMD Floating-Point */ + CALL _intrp<>(SB); BYTE $0x14 /* reserved */ + CALL _intrp<>(SB); BYTE $0x15 /* reserved */ + CALL _intrp<>(SB); BYTE $0x16 /* reserved */ + CALL _intrp<>(SB); BYTE $0x17 /* reserved */ + CALL _intrp<>(SB); BYTE $0x18 /* reserved */ + CALL _intrp<>(SB); BYTE $0x19 /* reserved */ + CALL _intrp<>(SB); BYTE $0x1a /* reserved */ + CALL _intrp<>(SB); BYTE $0x1b /* reserved */ + CALL _intrp<>(SB); BYTE $0x1c /* reserved */ + CALL _intrp<>(SB); BYTE $0x1d /* reserved */ + CALL _intrp<>(SB); BYTE $0x1e /* reserved */ + CALL _intrp<>(SB); BYTE $0x1f /* reserved */ + CALL _intrp<>(SB); BYTE $0x20 + CALL _intrp<>(SB); BYTE $0x21 + CALL _intrp<>(SB); BYTE $0x22 + CALL _intrp<>(SB); BYTE $0x23 + CALL _intrp<>(SB); BYTE $0x24 + CALL _intrp<>(SB); BYTE $0x25 + CALL _intrp<>(SB); BYTE $0x26 + CALL _intrp<>(SB); BYTE $0x27 + CALL _intrp<>(SB); BYTE $0x28 + CALL _intrp<>(SB); BYTE $0x29 + CALL _intrp<>(SB); BYTE $0x2a + CALL _intrp<>(SB); BYTE $0x2b + CALL _intrp<>(SB); BYTE $0x2c + CALL _intrp<>(SB); BYTE $0x2d + CALL _intrp<>(SB); BYTE $0x2e + CALL _intrp<>(SB); BYTE $0x2f + CALL _intrp<>(SB); BYTE $0x30 + CALL _intrp<>(SB); BYTE $0x31 + CALL _intrp<>(SB); BYTE $0x32 + CALL _intrp<>(SB); BYTE $0x33 + CALL _intrp<>(SB); BYTE $0x34 + CALL _intrp<>(SB); BYTE $0x35 + CALL _intrp<>(SB); BYTE $0x36 + CALL _intrp<>(SB); BYTE $0x37 + CALL _intrp<>(SB); BYTE $0x38 + CALL _intrp<>(SB); BYTE $0x39 + CALL _intrp<>(SB); BYTE $0x3a + CALL _intrp<>(SB); BYTE $0x3b + CALL _intrp<>(SB); BYTE $0x3c + CALL _intrp<>(SB); BYTE $0x3d + CALL _intrp<>(SB); BYTE $0x3e + CALL _intrp<>(SB); BYTE $0x3f + CALL _intrp<>(SB); BYTE $0x40 + CALL _intrp<>(SB); BYTE $0x41 + CALL _intrp<>(SB); BYTE $0x42 + CALL _intrp<>(SB); BYTE $0x43 + CALL _intrp<>(SB); BYTE $0x44 + CALL _intrp<>(SB); BYTE $0x45 + CALL _intrp<>(SB); BYTE $0x46 + CALL _intrp<>(SB); BYTE $0x47 + CALL _intrp<>(SB); BYTE $0x48 + CALL _intrp<>(SB); BYTE $0x49 + CALL _intrp<>(SB); BYTE $0x4a + CALL _intrp<>(SB); BYTE $0x4b + CALL _intrp<>(SB); BYTE $0x4c + CALL _intrp<>(SB); BYTE $0x4d + CALL _intrp<>(SB); BYTE $0x4e + CALL _intrp<>(SB); BYTE $0x4f + CALL _intrp<>(SB); BYTE $0x50 + CALL _intrp<>(SB); BYTE $0x51 + CALL _intrp<>(SB); BYTE $0x52 + CALL _intrp<>(SB); BYTE $0x53 + CALL _intrp<>(SB); BYTE $0x54 + CALL _intrp<>(SB); BYTE $0x55 + CALL _intrp<>(SB); BYTE $0x56 + CALL _intrp<>(SB); BYTE $0x57 + CALL _intrp<>(SB); BYTE $0x58 + CALL _intrp<>(SB); BYTE $0x59 + CALL _intrp<>(SB); BYTE $0x5a + CALL _intrp<>(SB); BYTE $0x5b + CALL _intrp<>(SB); BYTE $0x5c + CALL _intrp<>(SB); BYTE $0x5d + CALL _intrp<>(SB); BYTE $0x5e + CALL _intrp<>(SB); BYTE $0x5f + CALL _intrp<>(SB); BYTE $0x60 + CALL _intrp<>(SB); BYTE $0x61 + CALL _intrp<>(SB); BYTE $0x62 + CALL _intrp<>(SB); BYTE $0x63 + CALL _intrp<>(SB); BYTE $0x64 + CALL _intrp<>(SB); BYTE $0x65 + CALL _intrp<>(SB); BYTE $0x66 + CALL _intrp<>(SB); BYTE $0x67 + CALL _intrp<>(SB); BYTE $0x68 + CALL _intrp<>(SB); BYTE $0x69 + CALL _intrp<>(SB); BYTE $0x6a + CALL _intrp<>(SB); BYTE $0x6b + CALL _intrp<>(SB); BYTE $0x6c + CALL _intrp<>(SB); BYTE $0x6d + CALL _intrp<>(SB); BYTE $0x6e + CALL _intrp<>(SB); BYTE $0x6f + CALL _intrp<>(SB); BYTE $0x70 + CALL _intrp<>(SB); BYTE $0x71 + CALL _intrp<>(SB); BYTE $0x72 + CALL _intrp<>(SB); BYTE $0x73 + CALL _intrp<>(SB); BYTE $0x74 + CALL _intrp<>(SB); BYTE $0x75 + CALL _intrp<>(SB); BYTE $0x76 + CALL _intrp<>(SB); BYTE $0x77 + CALL _intrp<>(SB); BYTE $0x78 + CALL _intrp<>(SB); BYTE $0x79 + CALL _intrp<>(SB); BYTE $0x7a + CALL _intrp<>(SB); BYTE $0x7b + CALL _intrp<>(SB); BYTE $0x7c + CALL _intrp<>(SB); BYTE $0x7d + CALL _intrp<>(SB); BYTE $0x7e + CALL _intrp<>(SB); BYTE $0x7f + CALL _intrp<>(SB); BYTE $0x80 + CALL _intrp<>(SB); BYTE $0x81 + CALL _intrp<>(SB); BYTE $0x82 + CALL _intrp<>(SB); BYTE $0x83 + CALL _intrp<>(SB); BYTE $0x84 + CALL _intrp<>(SB); BYTE $0x85 + CALL _intrp<>(SB); BYTE $0x86 + CALL _intrp<>(SB); BYTE $0x87 + CALL _intrp<>(SB); BYTE $0x88 + CALL _intrp<>(SB); BYTE $0x89 + CALL _intrp<>(SB); BYTE $0x8a + CALL _intrp<>(SB); BYTE $0x8b + CALL _intrp<>(SB); BYTE $0x8c + CALL _intrp<>(SB); BYTE $0x8d + CALL _intrp<>(SB); BYTE $0x8e + CALL _intrp<>(SB); BYTE $0x8f + CALL _intrp<>(SB); BYTE $0x90 + CALL _intrp<>(SB); BYTE $0x91 + CALL _intrp<>(SB); BYTE $0x92 + CALL _intrp<>(SB); BYTE $0x93 + CALL _intrp<>(SB); BYTE $0x94 + CALL _intrp<>(SB); BYTE $0x95 + CALL _intrp<>(SB); BYTE $0x96 + CALL _intrp<>(SB); BYTE $0x97 + CALL _intrp<>(SB); BYTE $0x98 + CALL _intrp<>(SB); BYTE $0x99 + CALL _intrp<>(SB); BYTE $0x9a + CALL _intrp<>(SB); BYTE $0x9b + CALL _intrp<>(SB); BYTE $0x9c + CALL _intrp<>(SB); BYTE $0x9d + CALL _intrp<>(SB); BYTE $0x9e + CALL _intrp<>(SB); BYTE $0x9f + CALL _intrp<>(SB); BYTE $0xa0 + CALL _intrp<>(SB); BYTE $0xa1 + CALL _intrp<>(SB); BYTE $0xa2 + CALL _intrp<>(SB); BYTE $0xa3 + CALL _intrp<>(SB); BYTE $0xa4 + CALL _intrp<>(SB); BYTE $0xa5 + CALL _intrp<>(SB); BYTE $0xa6 + CALL _intrp<>(SB); BYTE $0xa7 + CALL _intrp<>(SB); BYTE $0xa8 + CALL _intrp<>(SB); BYTE $0xa9 + CALL _intrp<>(SB); BYTE $0xaa + CALL _intrp<>(SB); BYTE $0xab + CALL _intrp<>(SB); BYTE $0xac + CALL _intrp<>(SB); BYTE $0xad + CALL _intrp<>(SB); BYTE $0xae + CALL _intrp<>(SB); BYTE $0xaf + CALL _intrp<>(SB); BYTE $0xb0 + CALL _intrp<>(SB); BYTE $0xb1 + CALL _intrp<>(SB); BYTE $0xb2 + CALL _intrp<>(SB); BYTE $0xb3 + CALL _intrp<>(SB); BYTE $0xb4 + CALL _intrp<>(SB); BYTE $0xb5 + CALL _intrp<>(SB); BYTE $0xb6 + CALL _intrp<>(SB); BYTE $0xb7 + CALL _intrp<>(SB); BYTE $0xb8 + CALL _intrp<>(SB); BYTE $0xb9 + CALL _intrp<>(SB); BYTE $0xba + CALL _intrp<>(SB); BYTE $0xbb + CALL _intrp<>(SB); BYTE $0xbc + CALL _intrp<>(SB); BYTE $0xbd + CALL _intrp<>(SB); BYTE $0xbe + CALL _intrp<>(SB); BYTE $0xbf + CALL _intrp<>(SB); BYTE $0xc0 + CALL _intrp<>(SB); BYTE $0xc1 + CALL _intrp<>(SB); BYTE $0xc2 + CALL _intrp<>(SB); BYTE $0xc3 + CALL _intrp<>(SB); BYTE $0xc4 + CALL _intrp<>(SB); BYTE $0xc5 + CALL _intrp<>(SB); BYTE $0xc6 + CALL _intrp<>(SB); BYTE $0xc7 + CALL _intrp<>(SB); BYTE $0xc8 + CALL _intrp<>(SB); BYTE $0xc9 + CALL _intrp<>(SB); BYTE $0xca + CALL _intrp<>(SB); BYTE $0xcb + CALL _intrp<>(SB); BYTE $0xcc + CALL _intrp<>(SB); BYTE $0xce + CALL _intrp<>(SB); BYTE $0xce + CALL _intrp<>(SB); BYTE $0xcf + CALL _intrp<>(SB); BYTE $0xd0 + CALL _intrp<>(SB); BYTE $0xd1 + CALL _intrp<>(SB); BYTE $0xd2 + CALL _intrp<>(SB); BYTE $0xd3 + CALL _intrp<>(SB); BYTE $0xd4 + CALL _intrp<>(SB); BYTE $0xd5 + CALL _intrp<>(SB); BYTE $0xd6 + CALL _intrp<>(SB); BYTE $0xd7 + CALL _intrp<>(SB); BYTE $0xd8 + CALL _intrp<>(SB); BYTE $0xd9 + CALL _intrp<>(SB); BYTE $0xda + CALL _intrp<>(SB); BYTE $0xdb + CALL _intrp<>(SB); BYTE $0xdc + CALL _intrp<>(SB); BYTE $0xdd + CALL _intrp<>(SB); BYTE $0xde + CALL _intrp<>(SB); BYTE $0xdf + CALL _intrp<>(SB); BYTE $0xe0 + CALL _intrp<>(SB); BYTE $0xe1 + CALL _intrp<>(SB); BYTE $0xe2 + CALL _intrp<>(SB); BYTE $0xe3 + CALL _intrp<>(SB); BYTE $0xe4 + CALL _intrp<>(SB); BYTE $0xe5 + CALL _intrp<>(SB); BYTE $0xe6 + CALL _intrp<>(SB); BYTE $0xe7 + CALL _intrp<>(SB); BYTE $0xe8 + CALL _intrp<>(SB); BYTE $0xe9 + CALL _intrp<>(SB); BYTE $0xea + CALL _intrp<>(SB); BYTE $0xeb + CALL _intrp<>(SB); BYTE $0xec + CALL _intrp<>(SB); BYTE $0xed + CALL _intrp<>(SB); BYTE $0xee + CALL _intrp<>(SB); BYTE $0xef + CALL _intrp<>(SB); BYTE $0xf0 + CALL _intrp<>(SB); BYTE $0xf1 + CALL _intrp<>(SB); BYTE $0xf2 + CALL _intrp<>(SB); BYTE $0xf3 + CALL _intrp<>(SB); BYTE $0xf4 + CALL _intrp<>(SB); BYTE $0xf5 + CALL _intrp<>(SB); BYTE $0xf6 + CALL _intrp<>(SB); BYTE $0xf7 + CALL _intrp<>(SB); BYTE $0xf8 + CALL _intrp<>(SB); BYTE $0xf9 + CALL _intrp<>(SB); BYTE $0xfa + CALL _intrp<>(SB); BYTE $0xfb + CALL _intrp<>(SB); BYTE $0xfc + CALL _intrp<>(SB); BYTE $0xfd + CALL _intrp<>(SB); BYTE $0xfe + CALL _intrp<>(SB); BYTE $0xff diff -Nru /sys/src/9k/k10/l64sipi.s /sys/src/9k/k10/l64sipi.s --- /sys/src/9k/k10/l64sipi.s Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/l64sipi.s Wed Dec 9 00:00:00 2015 @@ -0,0 +1,225 @@ +/* + * Start-up request IPI handler. + * + * This code is executed on an application processor in response to receiving + * a Start-up IPI (SIPI) from another processor. + * This must be placed on a 4KiB boundary + * somewhere in the 1st MiB of conventional memory. However, + * due to some shortcuts below it's restricted further to within the 1st 64KiB. + * The AP starts in real-mode, with + * CS selector set to the startup memory address/16; + * CS base set to startup memory address; + * CS limit set to 64KiB; + * CPL and IP set to 0. + * Parameters are passed to this code via a vector in low memory + * indexed by the APIC number of the processor. The layout, size, + * and location have to be kept in sync with the setup in sipi.s. + */ +#include "mem.h" +#include "amd64l.h" + +/* + * Some machine instructions not handled well by [68][al]. + * This is a messy piece of code, requiring instructions in real mode, + * protected mode (+long mode on amd64). The MODE pseudo-op of 6[al] handles + * the latter two OK, but 'MODE $16' is incomplete, e.g. it does + * not truncate operands appropriately, hence the ugly 'rMOVAX' macro. + * Fortunately, the only other instruction executed in real mode that + * could cause a problem (ORL) is encoded such that it will work OK. + */ +#define DELAY BYTE $0xeb; /* JMP .+2 */ \ + BYTE $0x00 +#define NOP BYTE $0x90 /* NOP */ + +#define pFARJMP32(s, o) BYTE $0xea; /* far jmp ptr32:16 */ \ + LONG $o; WORD $s + +#define rFARJMP16(s, o) BYTE $0xea; /* far jump ptr16:16 */ \ + WORD $o; WORD $s; +#define rFARJMP32(s, o) BYTE $0x66; /* far jump ptr32:16 */ \ + pFARJMP32(s, o) +#define rLGDT(gdtptr) BYTE $0x0f; /* LGDT */ \ + BYTE $0x01; BYTE $0x16; \ + WORD $gdtptr +#define rMOVAX(i) BYTE $0xb8; /* i -> AX */ \ + WORD $i; + +/* + * Real mode. Welcome to 1978. + * Load a basic GDT, turn on protected mode and make + * inter-segment jump to the protected mode code. + */ +MODE $16 + +TEXT _real<>(SB), 1, $-4 + rFARJMP16(0, _endofheader<>-KZERO(SB)) /* */ + +_startofheader: + NOP; NOP; NOP + QUAD $0xa5a5a5a5a5a5a5a5 + +TEXT _gdt32p<>(SB), 1, $-4 + QUAD $0x0000000000000000 /* NULL descriptor */ + QUAD $0x00cf9a000000ffff /* CS */ + QUAD $0x00cf92000000ffff /* DS */ + QUAD $0x0020980000000000 /* Long mode CS */ + +TEXT _gdtptr32p<>(SB), 1, $-4 + WORD $(4*8-1) /* includes long mode */ + LONG $_gdt32p<>-KZERO(SB) + +TEXT _gdt64<>(SB), 1, $-4 + QUAD $0x0000000000000000 /* NULL descriptor */ + QUAD $0x0020980000000000 /* CS */ + QUAD $0x0000800000000000 /* DS */ + +TEXT _gdtptr64v<>(SB), 1, $-4 + WORD $(3*8-1) + QUAD $_gdt64<>(SB) + +TEXT _endofheader<>(SB), 1, $-4 + MOVW CS, AX + MOVW AX, DS /* initialise DS */ + + rLGDT(_gdtptr32p<>-KZERO(SB)) /* load a basic gdt */ + + MOVL CR0, AX + ORL $Pe, AX + MOVL AX, CR0 /* turn on protected mode */ + DELAY /* JMP .+2 */ + + rMOVAX (SSEL(SiDS, SsTIGDT|SsRPL0)) /* */ + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + MOVW AX, SS + + rFARJMP32(SSEL(SiCS, SsTIGDT|SsRPL0), _protected<>-KZERO(SB)) + +/* + * Protected mode. Welcome to 1982. + * Get the local APIC ID from the memory mapped APIC + * and use it to locate the index to the parameter vector; + * load the PDB with the page table address from the + * information vector; + * make an identity map for the inter-segment jump below, + * using the stack space to hold a temporary PDP and PD; + * enable and activate long mode; + * make an inter-segment jump to the long mode code. + */ +MODE $32 + +/* + * Macros for accessing page table entries; must turn + * the C-style array-index macros into a page table byte + * offset. + */ +#define PML4O(v) ((PTLX((v), 3))<<3) +#define PDPO(v) ((PTLX((v), 2))<<3) +#define PDO(v) ((PTLX((v), 1))<<3) +#define PTO(v) ((PTLX((v), 0))<<3) + +TEXT _protected<>(SB), 1, $-4 + MOVL $0xfee00000, BP /* apicbase */ + MOVL 0x20(BP), BP /* Id */ + SHRL $24, BP /* becomes RARG later */ + + MOVL BP, AX /* apicno */ + IMULL $32, AX /* [apicno] */ + MOVL $_real<>-KZERO(SB), BX + ADDL $4096, BX /* sipi */ + ADDL AX, BX /* sipi[apicno] */ + + MOVL 0(BX), SI /* sipi[apicno].pml4 */ + + MOVL SI, AX + MOVL AX, CR3 /* load the mmu */ + + MOVL AX, DX + SUBL $MACHSTKSZ, DX /* PDP for identity map */ + ADDL $(PteRW|PteP), DX + MOVL DX, PML4O(0)(AX) /* PML4E for identity map */ + + SUBL $MACHSTKSZ, AX /* PDP for identity map */ + ADDL $PTSZ, DX + MOVL DX, PDPO(0)(AX) /* PDPE for identity map */ + MOVL $(PtePS|PteRW|PteP), DX + ADDL $PTSZ, AX /* PD for identity map */ + MOVL DX, PDO(0)(AX) /* PDE for identity 0-[24]MiB */ + + +/* + * Enable and activate Long Mode. From the manual: + * make sure Page Size Extentions are off, and Page Global + * Extensions and Physical Address Extensions are on in CR4; + * set Long Mode Enable in the Extended Feature Enable MSR; + * set Paging Enable in CR0; + * make an inter-segment jump to the Long Mode code. + * It's all in 32-bit mode until the jump is made. + */ +TEXT _lme<>(SB), 1, $-4 + MOVL CR4, AX + ANDL $~Pse, AX /* Page Size */ + ORL $(Pge|Pae), AX /* Page Global, Phys. Address */ + MOVL AX, CR4 + + MOVL $Efer, CX /* Extended Feature Enable */ + RDMSR + ORL $Lme, AX /* Long Mode Enable */ + WRMSR + + MOVL CR0, DX + ANDL $~(Cd|Nw|Ts|Mp), DX + ORL $(Pg|Wp), DX /* Paging Enable */ + MOVL DX, CR0 + + pFARJMP32(SSEL(3, SsTIGDT|SsRPL0), _identity<>-KZERO(SB)) + +/* + * Long mode. Welcome to 2003. + * Jump out of the identity map space; + * load a proper long mode GDT; + * zap the identity map; + * initialise the stack, RMACH, RUSER, + * and call the C startup code. + */ +MODE $64 + +TEXT _identity<>(SB), 1, $-4 + MOVQ $_start64v<>(SB), AX + JMP* AX + +TEXT _start64v<>(SB), 1, $-4 + MOVQ $_gdtptr64v<>(SB), AX + MOVL (AX), GDTR + + XORQ DX, DX /* DX is 0 from here on */ + MOVW DX, DS /* not used in long mode */ + MOVW DX, ES /* not used in long mode */ + MOVW DX, FS + MOVW DX, GS + MOVW DX, SS /* not used in long mode */ + + MOVLQZX SI, SI /* sipi[apicno].pml4 */ + MOVQ SI, AX + ADDQ $KZERO, AX /* PML4 */ + MOVQ DX, PML4O(0)(AX) /* zap identity map */ + MOVQ SI, CR3 /* flush TLB */ + + ADDQ $KZERO, BX /* &sipi[apicno] */ + + MOVQ 8(BX), SP /* sipi[apicno].stack */ + + PUSHQ DX /* clear flags */ + POPFQ + MOVLQZX RARG, RARG /* APIC ID */ + PUSHQ RARG /* apicno */ + + MOVQ 16(BX), RMACH /* sipi[apicno].mach */ + MOVQ DX, RUSER + MOVQ 24(BX), AX /* sipi[apicno].pc */ + CALL* AX /* (*sipi[apicno].pc)(apicno) */ + +_ndnr: + JMP _ndnr diff -Nru /sys/src/9k/k10/l64syscall.s /sys/src/9k/k10/l64syscall.s --- /sys/src/9k/k10/l64syscall.s Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/l64syscall.s Wed Dec 9 00:00:00 2015 @@ -0,0 +1,72 @@ +#include "mem.h" +#include "amd64l.h" + +MODE $64 + +/* + */ +TEXT touser(SB), 1, $-4 + CLI + SWAPGS + MOVQ $SSEL(SiUDS, SsRPL3), AX + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + + MOVQ $(UTZERO+0x28), CX /* ip */ + MOVQ $If, R11 /* flags */ + + MOVQ RARG, SP /* sp */ + + BYTE $0x48; SYSRET /* SYSRETQ */ + +/* + */ +TEXT syscallentry(SB), 1, $-4 + SWAPGS + BYTE $0x65; MOVQ 0, RMACH /* m-> (MOVQ GS:0x0, R15) */ + MOVQ 16(RMACH), RUSER /* m->proc */ + MOVQ SP, R13 + MOVQ 16(RUSER), SP /* m->proc->kstack */ + ADDQ $KSTACK, SP + PUSHQ $SSEL(SiUDS, SsRPL3) /* old stack segment */ + PUSHQ R13 /* old sp */ + PUSHQ R11 /* old flags */ + PUSHQ $SSEL(SiUCS, SsRPL3) /* old code segment */ + PUSHQ CX /* old ip */ + + SUBQ $(18*8), SP /* unsaved registers */ + + MOVW $SSEL(SiUDS, SsRPL3), (15*8+0)(SP) + MOVW ES, (15*8+2)(SP) + MOVW FS, (15*8+4)(SP) + MOVW GS, (15*8+6)(SP) + + PUSHQ SP /* Ureg* */ + PUSHQ RARG /* system call number */ + CALL syscall(SB) + +TEXT syscallreturn(SB), 1, $-4 + MOVQ 16(SP), AX /* Ureg.ax */ + MOVQ (16+6*8)(SP), BP /* Ureg.bp */ +_syscallreturn: + ADDQ $(17*8), SP /* registers + arguments */ + + CLI + SWAPGS + MOVW 0(SP), DS + MOVW 2(SP), ES + MOVW 4(SP), FS + MOVW 6(SP), GS + + MOVQ 24(SP), CX /* ip */ + MOVQ 40(SP), R11 /* flags */ + + MOVQ 48(SP), SP /* sp */ + + BYTE $0x48; SYSRET /* SYSRETQ */ + +TEXT sysrforkret(SB), 1, $-4 + MOVQ $0, AX + JMP _syscallreturn diff -Nru /sys/src/9k/k10/l64v.s /sys/src/9k/k10/l64v.s --- /sys/src/9k/k10/l64v.s Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/l64v.s Wed Dec 9 00:00:00 2015 @@ -0,0 +1,416 @@ +#include "amd64l.h" + +MODE $64 + +/* + * Port I/O. + */ +TEXT inb(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + XORL AX, AX + INB + RET + +TEXT insb(SB), 1, $-4 + MOVL RARG, DX + MOVQ address+8(FP), DI + MOVL count+16(FP), CX + CLD + REP; INSB + RET + +TEXT ins(SB), 1, $-4 + MOVL RARG, DX + XORL AX, AX + INW + RET + +TEXT inss(SB), 1, $-4 + MOVL RARG, DX + MOVQ address+8(FP), DI + MOVL count+16(FP), CX + CLD + REP; INSW + RET + +TEXT inl(SB), 1, $-4 + MOVL RARG, DX + INL + RET + +TEXT insl(SB), 1, $-4 + MOVL RARG, DX + MOVQ address+8(FP), DI + MOVL count+16(FP), CX + CLD + REP; INSL + RET + +TEXT outb(SB), 1, $-1 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVL byte+8(FP), AX + OUTB + RET + +TEXT outsb(SB), 1, $-4 + MOVL RARG, DX + MOVQ address+8(FP), SI + MOVL count+16(FP), CX + CLD + REP; OUTSB + RET + +TEXT outs(SB), 1, $-4 + MOVL RARG, DX + MOVL short+8(FP), AX + OUTW + RET + +TEXT outss(SB), 1, $-4 + MOVL RARG, DX + MOVQ address+8(FP), SI + MOVL count+16(FP), CX + CLD + REP; OUTSW + RET + +TEXT outl(SB), 1, $-4 + MOVL RARG, DX + MOVL long+8(FP), AX + OUTL + RET + +TEXT outsl(SB), 1, $-4 + MOVL RARG, DX + MOVQ address+8(FP), SI + MOVL count+16(FP), CX + CLD + REP; OUTSL + RET + +/* + * Load/store segment descriptor tables: + * GDT - global descriptor table + * IDT - interrupt descriptor table + * TR - task register + * GDTR and LDTR take an m16:m64 argument, + * so shuffle the stack arguments to + * get it in the right format. + */ +TEXT gdtget(SB), 1, $-4 + MOVL GDTR, (RARG) /* Note: 10 bytes returned */ + RET + +TEXT gdtput(SB), 1, $-4 + SHLQ $48, RARG + MOVQ RARG, m16+0(FP) + LEAQ m16+6(FP), RARG + + MOVL (RARG), GDTR + + XORQ AX, AX + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + MOVW AX, SS + + POPQ AX + MOVWQZX cs+16(FP), BX + PUSHQ BX + PUSHQ AX + RETFQ + +TEXT idtput(SB), 1, $-4 + SHLQ $48, RARG + MOVQ RARG, m16+0(FP) + LEAQ m16+6(FP), RARG + MOVL (RARG), IDTR + RET + +TEXT trput(SB), 1, $-4 + MOVW RARG, TASK + RET + +/* + * Read/write various system registers. + */ +TEXT cr0get(SB), 1, $-4 /* CR0 - processor control */ + MOVQ CR0, AX + RET + +TEXT cr0put(SB), 1, $-4 + MOVQ RARG, AX + MOVQ AX, CR0 + RET + +TEXT cr2get(SB), 1, $-4 /* CR2 - #PF virtual address */ + MOVQ CR2, AX + RET + +TEXT cr3get(SB), 1, $-4 /* CR3 - pml4 base */ + MOVQ CR3, AX + RET + +TEXT cr3put(SB), 1, $-4 + MOVQ RARG, AX + MOVQ AX, CR3 + RET + +TEXT cr4get(SB), 1, $-4 /* CR4 - processor extensions */ + MOVQ CR4, AX + RET + +TEXT cr4put(SB), 1, $-4 + MOVQ RARG, AX + MOVQ AX, CR4 + RET + +TEXT rdtsc(SB), 1, $-4 /* time stamp counter */ + RDTSC + XCHGL DX, AX /* swap lo/hi, zero-extend */ + SHLQ $32, AX /* hi<<32 */ + ORQ DX, AX /* (hi<<32)|lo */ + RET + +TEXT rdmsr(SB), 1, $-4 /* model-specific register */ + MOVL RARG, CX + RDMSR + XCHGL DX, AX /* swap lo/hi, zero-extend */ + SHLQ $32, AX /* hi<<32 */ + ORQ DX, AX /* (hi<<32)|lo */ + RET + +TEXT wrmsr(SB), 1, $-4 + MOVL RARG, CX + MOVL lo+8(FP), AX + MOVL hi+12(FP), DX + WRMSR + RET + +/* + */ +TEXT invlpg(SB), 1, $-4 /* INVLPG va+0(FP) */ + MOVQ RARG, va+0(FP) + INVLPG va+0(FP) + RET + +TEXT wbinvd(SB), 1, $-4 + WBINVD + RET + +TEXT lfence(SB), 1, $-4 + LFENCE + RET + +TEXT mfence(SB), 1, $-4 + MFENCE + RET + +TEXT sfence(SB), 1, $-4 + SFENCE + RET + +/* + * Note: CLI and STI are not serialising instructions. + * Is that assumed anywhere? + */ +TEXT splhi(SB), 1, $-4 +_splhi: + PUSHFQ + POPQ AX + TESTQ $If, AX /* If - Interrupt Flag */ + + JZ alreadyhi + MOVQ (SP), BX + MOVQ BX, 8(RMACH) /* save PC in m->splpc */ + +alreadyhi: + CLI + RET + +TEXT spllo(SB), 1, $-4 +_spllo: + PUSHFQ + POPQ AX + TESTQ $If, AX /* If - Interrupt Flag */ + JNZ alreadylo + MOVQ $0, 8(RMACH) /* clear m->splpc */ + +alreadylo: + STI + RET + +TEXT splx(SB), 1, $-4 + TESTQ $If, RARG /* If - Interrupt Flag */ + JNZ _spllo + JMP _splhi + +TEXT spldone(SB), 1, $-4 + RET + +TEXT islo(SB), 1, $-4 + PUSHFQ + POPQ AX + ANDQ $If, AX /* If - Interrupt Flag */ + RET + +/* + * Test-And-Set + */ +TEXT tas32(SB), 1, $-4 + MOVL $0xdeaddead, AX + XCHGL AX, (RARG) /* lock->key */ + RET + +TEXT xaddb(SB), 1, $-4 /* int xaddb(void*) */ + MOVL $1, AX + LOCK; XADDB AX, (RARG) + RET + +TEXT ainc(SB), 1, $-4 /* int ainc(int*); */ + MOVL $1, AX + LOCK; XADDL AX, (RARG) + ADDL $1, AX + RET + +TEXT adec(SB), 1, $-4 /* int adec(int*); */ + MOVL $-1, AX + LOCK; XADDL AX, (RARG) + SUBL $1, AX + RET + +TEXT cas32(SB), 1, $-4 + MOVL exp+8(FP), AX + MOVL new+16(FP), BX + LOCK; CMPXCHGL BX, (RARG) + MOVL $1, AX + JNZ _cas32r0 +_cas32r1: + RET +_cas32r0: + DECL AX + RET + +TEXT cas64(SB), 1, $-4 + MOVQ exp+8(FP), AX + MOVQ new+16(FP), BX + LOCK; CMPXCHGQ BX, (RARG) + MOVL $1, AX + JNZ _cas64r0 +_cas64r1: + RET +_cas64r0: + DECL AX + RET + +TEXT xchg32(SB), 1, $-4 + MOVL v+8(FP), AX + LOCK; XCHGL AX, (RARG) + RET + +TEXT xchgm(SB), 1, $-4 + MOVQ v+8(FP), AX + LOCK; XCHGQ AX, (RARG) + RET + +TEXT gotolabel(SB), 1, $-4 + MOVQ 0(RARG), SP /* restore sp */ + MOVQ 8(RARG), AX /* put return pc on the stack */ + MOVQ AX, 0(SP) + MOVL $1, AX /* return 1 */ + RET + +TEXT setlabel(SB), 1, $-4 + MOVQ SP, 0(RARG) /* store sp */ + MOVQ 0(SP), BX /* store return pc */ + MOVQ BX, 8(RARG) + MOVL $0, AX /* return 0 */ + RET + +TEXT pause(SB), 1, $-4 + PAUSE + RET + +TEXT halt(SB), 1, $-4 + CLI + CMPL nrdy(SB), $0 + JEQ _nothingready + STI + RET + +_nothingready: + STI + HLT + RET + +TEXT _halt(SB), 1, $-4 + HLT + RET + +#define MONITOR BYTE $0x0f; BYTE $0x01; BYTE $0xc8 +#define MWAIT BYTE $0x0f; BYTE $0x01; BYTE $0xc9 + +/* + * u32int mwait32(void*, u32int) + */ +TEXT mwait32(SB),1,$0 + MOVL val+8(FP), BX + + CMPL (RARG), BX /* changed yet? */ + JNE _mm32done + + MOVQ RARG, AX /* linear address to monitor */ + XORQ CX, CX /* extensions */ + XORQ DX, DX /* hints */ + MONITOR + + CMPL (RARG), BX /* changed yet? */ + JNE _mm32done + + /*XORQ CX, CX*/ /* extensions (different from monitor) */ + XORQ AX, AX /* hints */ + MWAIT +_mm32done: + MOVL (RARG), AX + RET + +/* + * u64int mwait64(void*, u64int) + */ +TEXT mwait64(SB),1,$0 + MOVQ val+8(FP), BX + + CMPQ (RARG), BX /* changed yet? */ + JNE _mm64done + + MOVQ RARG, AX /* linear address to monitor */ + XORQ CX, CX /* extensions */ + XORQ DX, DX /* hints */ + MONITOR + + CMPQ (RARG), BX /* changed yet? */ + JNE _mm64done + + /*XORQ CX, CX*/ /* extensions (different from monitor) */ + XORQ AX, AX /* hints */ + MWAIT +_mm64done: + MOVQ (RARG), AX + RET + +TEXT mul64fract(SB), 1, $-4 + MOVQ a+8(FP), AX + MULQ b+16(FP) /* a*b */ + SHRQ $32, AX:DX + MOVQ AX, (RARG) + RET + + +///* +// * Testing. +// */ +//TEXT ud2(SB), $-4 +// BYTE $0x0f; BYTE $0x0b +// RET +// diff -Nru /sys/src/9k/k10/lapic.c /sys/src/9k/k10/lapic.c --- /sys/src/9k/k10/lapic.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/lapic.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,468 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "apic.h" +#include "io.h" +#include "adr.h" + +#undef DBG +#define DBG print + +enum { /* Local APIC registers */ + Id = 0x0020, /* Identification */ + Ver = 0x0030, /* Version */ + Tp = 0x0080, /* Task Priority */ + Ap = 0x0090, /* Arbitration Priority */ + Pp = 0x00a0, /* Processor Priority */ + Eoi = 0x00b0, /* EOI */ + Ld = 0x00d0, /* Logical Destination */ + Df = 0x00e0, /* Destination Format */ + Siv = 0x00f0, /* Spurious Interrupt Vector */ + Is = 0x0100, /* Interrupt Status (8) */ + Tm = 0x0180, /* Trigger Mode (8) */ + Ir = 0x0200, /* Interrupt Request (8) */ + Es = 0x0280, /* Error Status */ + Iclo = 0x0300, /* Interrupt Command */ + Ichi = 0x0310, /* Interrupt Command [63:32] */ + Lvt0 = 0x0320, /* Local Vector Table 0 */ + Lvt5 = 0x0330, /* Local Vector Table 5 */ + Lvt4 = 0x0340, /* Local Vector Table 4 */ + Lvt1 = 0x0350, /* Local Vector Table 1 */ + Lvt2 = 0x0360, /* Local Vector Table 2 */ + Lvt3 = 0x0370, /* Local Vector Table 3 */ + Tic = 0x0380, /* Timer Initial Count */ + Tcc = 0x0390, /* Timer Current Count */ + Tdc = 0x03e0, /* Timer Divide Configuration */ + + Tlvt = Lvt0, /* Timer */ + Lint0 = Lvt1, /* Local Interrupt 0 */ + Lint1 = Lvt2, /* Local Interrupt 1 */ + Elvt = Lvt3, /* Error */ + Pclvt = Lvt4, /* Performance Counter */ + Tslvt = Lvt5, /* Thermal Sensor */ +}; + +enum { /* Siv */ + Swen = 0x00000100, /* Software Enable */ + Fdis = 0x00000200, /* Focus Disable */ +}; + +enum { /* Iclo */ + Lassert = 0x00004000, /* Assert level */ + + DSnone = 0x00000000, /* Use Destination Field */ + DSself = 0x00040000, /* Self is only destination */ + DSallinc = 0x00080000, /* All including self */ + DSallexc = 0x000c0000, /* All Excluding self */ +}; + +enum { /* Tlvt */ + Periodic = 0x00020000, /* Periodic Timer Mode */ +}; + +enum { /* Tdc */ + DivX2 = 0x00000000, /* Divide by 2 */ + DivX4 = 0x00000001, /* Divide by 4 */ + DivX8 = 0x00000002, /* Divide by 8 */ + DivX16 = 0x00000003, /* Divide by 16 */ + DivX32 = 0x00000008, /* Divide by 32 */ + DivX64 = 0x00000009, /* Divide by 64 */ + DivX128 = 0x0000000a, /* Divide by 128 */ + DivX1 = 0x0000000b, /* Divide by 1 */ +}; + +static u32int* lapicbase; + +static Lapic xlapic[Napic]; + +Lapic* +lapiclookup(uint id) +{ + Lapic *a; + + if(id >= nelem(xlapic)) + return nil; + a = xlapic + id; + if(a->useable) + return a; + return nil; +} + +static u32int +lapicrget(int r) +{ + return lapicbase[r/4]; +} + +static void +lapicrput(int r, u32int data) +{ + lapicbase[r/4] = data; +} + +int +lapiceoi(int vecno) +{ + lapicrput(Eoi, 0); + return vecno; +} + +int +lapicisr(int vecno) +{ + int isr; + + isr = lapicrget(Is + (vecno/32)*16); + + return isr & (1<<(vecno%32)); +} + +static char* +lapicprint(char *p, char *e, Lapic *a, int i) +{ + char *s; + + s = "proc"; + p = seprint(p, e, "%-8s ", s); + p = seprint(p, e, "%8ux ", i); +// p = seprint(p, e, "%.8ux ", a->dest); +// p = seprint(p, e, "%.8ux ", a->mask); +// p = seprint(p, e, "%c", a->flags & PcmpBP? 'b': ' '); +// p = seprint(p, e, "%c ", a->flags & PcmpEN? 'e': ' '); +// p = seprint(p, e, "%8ux %8ux", a->lintr[0], a->lintr[1]); + p = seprint(p, e, "%12d\n", a->machno); + return p; +} + +static long +lapicread(Chan*, void *a, long n, vlong off) +{ + char *s, *e, *p; + long i, r; + + s = malloc(READSTR); + e = s+READSTR; + p = s; + + for(i = 0; i < nelem(xlapic); i++) + if(xlapic[i].useable) + p = lapicprint(p, e, xlapic + i, i); + r = -1; + if(!waserror()){ + r = readstr(off, a, n, s); + poperror(); + } + free(s); + return r; +} + +void +lapicinit(int lapicno, uintmem pa, int isbp) +{ + Lapic *apic; + + /* + * Mark the LAPIC useable if it has a good ID, and the registers can + * be mapped. There is x2LAPIC to be dealt with at some point. + */ + DBG("lapicinit: lapicno %d pa %#P isbp %d caller %#p\n", lapicno, pa, isbp, getcallerpc(&lapicno)); + addarchfile("lapic", 0444, lapicread, nil); + + if(lapicno >= Napic){ + panic("lapicinit%d: out of range", lapicno); + return; + } + if((apic = &xlapic[lapicno])->useable){ + print("lapicinit%d: already initialised\n", lapicno); + return; + } + if(lapicbase == nil){ + //adrmapck(pa, 1024, Ammio, Mfree, Cnone); + if((lapicbase = vmap(pa, 1024)) == nil){ + panic("lapicinit%d: can't map lapicbase %#P", lapicno, pa); + return; + } + DBG("lapicinit%d: lapicbase %#P -> %#p\n", lapicno, pa, lapicbase); + } + apic->useable = 1; + + /* + * Assign a machno to the processor associated with this + * LAPIC, it may not be an identity map. + * Machno 0 is always the bootstrap processor. + */ + if(isbp){ + apic->machno = 0; + m->apicno = lapicno; + } + else + apic->machno = sys->nmach++; +} + +void +lapicsetdom(int lapicno, int dom) +{ + Lapic *apic; + + DBG("lapic%d: setdom: %d\n", lapicno, dom); + if(lapicno >= Napic){ + panic("lapic%d: lapicsetdom: apic out of range", lapicno); + return; + } + if((apic = &xlapic[lapicno])->useable) + apic->dom = dom; + else + print("lapic%d: lapicsetdom: apic not usable\n", lapicno); +} + +int +machdom(Mach *mp) +{ + return xlapic[mp->apicno].dom; +} + +static void +lapicdump0(Lapic *apic, int i) +{ + if(!apic->useable) + return; + DBG("lapic%d: machno %d lint0 %#8.8ux lint1 %#8.8ux\n", + i, apic->machno, apic->lvt[0], apic->lvt[1]); + DBG(" tslvt %#8.8ux pclvt %#8.8ux elvt %#8.8ux\n", + lapicrget(Tslvt), lapicrget(Pclvt), lapicrget(Elvt)); + DBG(" tlvt %#8.8ux lint0 %#8.8ux lint1 %#8.8ux siv %#8.8ux\n", + lapicrget(Tlvt), lapicrget(Lint0), + lapicrget(Lint1), lapicrget(Siv)); +} + +void +lapicdump(void) +{ + int i; + + if(!DBGFLG) + return; + + DBG("lapicbase %#p\n", lapicbase); + for(i = 0; i < Napic; i++) + lapicdump0(xlapic + i, i); +} + +int +lapiconline(void) +{ + Lapic *apic; + u64int tsc; + u32int dfr, ver; + int apicno, nlvt; + + if(lapicbase == nil) + panic("lapiconline: no lapic base"); + + if((apicno = ((lapicrget(Id)>>24) & 0xff)) >= Napic) + panic("lapic: id too large %d", apicno); + if(apicno != m->apicno){ + panic("lapic: %d != %d", m->apicno, apicno); + dfr = lapicrget(Id) & ~(0xff<<24); + dfr |= m->apicno<<24; + lapicrput(Id, dfr); + apicno = m->apicno; + } + apic = &xlapic[apicno]; + if(!apic->useable) + panic("lapiconline: lapic%d: unusable %d", apicno, apic->useable); + + /* + * Things that can only be done when on the processor + * owning the APIC, apicinit above runs on the bootstrap + * processor. + */ + ver = lapicrget(Ver); + nlvt = ((ver>>16) & 0xff) + 1; + if(nlvt > nelem(apic->lvt)){ + print("lapiconline%d: nlvt %d > max (%d)\n", + apicno, nlvt, nelem(apic->lvt)); + nlvt = nelem(apic->lvt); + } + apic->nlvt = nlvt; + apic->ver = ver & 0xff; + + /* + * These don't really matter in Physical mode; + * set the defaults anyway. + */ +// if(memcmp(m->cpuinfo, "AuthenticAMD", 12) == 0) +// dfr = 0xf0000000; +// else + dfr = 0xffffffff; + lapicrput(Df, dfr); + lapicrput(Ld, 0x00000000); + + /* + * Disable interrupts until ready by setting the Task Priority + * register to 0xff. + */ + lapicrput(Tp, 0xff); + + /* + * Software-enable the APIC in the Spurious Interrupt Vector + * register and set the vector number. The vector number must have + * bits 3-0 0x0f unless the Extended Spurious Vector Enable bit + * is set in the HyperTransport Transaction Control register. + */ + lapicrput(Siv, Swen|IdtSPURIOUS); + + /* + * Acknowledge any outstanding interrupts. + */ + lapicrput(Eoi, 0); + + /* + * Use the TSC to determine the lapic timer frequency. + * It might be possible to snarf this from a chipset + * register instead. + */ + lapicrput(Tdc, DivX1); + lapicrput(Tlvt, Im); + tsc = rdtsc() + m->cpuhz/10; + lapicrput(Tic, 0xffffffff); + + while(rdtsc() < tsc) + ; + + apic->hz = (0xffffffff-lapicrget(Tcc))*10; + apic->max = apic->hz/HZ; + apic->min = apic->hz/(100*HZ); + apic->div = ((m->cpuhz/apic->max)+HZ/2)/HZ; + + if(m->machno == 0 || DBGFLG){ + print("lapic%d: hz %lld max %lld min %lld div %lld\n", apicno, + apic->hz, apic->max, apic->min, apic->div); + } + + /* + * Mask interrupts on Performance Counter overflow and + * Thermal Sensor if implemented, and on Lintr0 (Legacy INTR), + * and Lintr1 (Legacy NMI). + * Clear any Error Status (write followed by read) and enable + * the Error interrupt. + */ + switch(apic->nlvt){ + case 7: + case 6: + lapicrput(Tslvt, Im); + /*FALLTHROUGH*/ + case 5: + lapicrput(Pclvt, Im); + /*FALLTHROUGH*/ + default: + break; + } + lapicrput(Lint1, apic->lvt[1]|Im|IdtLINT1); + lapicrput(Lint0, apic->lvt[0]|Im|IdtLINT0); + + lapicrput(Es, 0); + lapicrget(Es); + lapicrput(Elvt, IdtERROR); + + /* + * Reload the timer to de-synchronise the processors. + * When the caller is ready for the APIC to accept interrupts, + * it should call lapicpri to lower the task priority. + * + * The timer is enabled later by the core-specific startup + * i.e. don't start the timer unless the core needs it, + * to reduce the likelihood of at least one (spurious) interrupt + * from the timer when priority is lowered. + */ + microdelay((TK2MS(1)*1000/sys->nmach) * m->machno); + lapicrput(Tic, apic->max); + return 1; +} + +void +lapictimerenable(void) +{ + /* + * Perhaps apictimerenable/apictimerdisable should just + * clear/set Im in the existing settings of Tlvt, there may + * be a time when the timer is used in a different mode; + * if so will need to ensure the mode is set when the timer + * is initialised. + */ + lapicrput(Tlvt, Periodic|IdtTIMER); +} + +void +lapictimerdisable(void) +{ + lapicrput(Tlvt, Im|IdtTIMER); +} + +void +lapictimerset(uvlong next) +{ + Lapic *apic; + vlong period; + + apic = &xlapic[(lapicrget(Id)>>24) & 0xff]; + + ilock(&m->apictimerlock); + + period = apic->max; + if(next != 0){ + period = next - fastticks(nil); /* fastticks is just rdtsc() */ + period /= apic->div; + + if(period < apic->min) + period = apic->min; + else if(period > apic->max - apic->min) + period = apic->max; + } + lapicrput(Tic, period); + + iunlock(&m->apictimerlock); +} + +void +lapicsipi(int lapicno, uintmem pa) +{ + int i; + u32int crhi, crlo; + + /* + * SIPI - Start-up IPI. + * To do: checks on lapic validity. + */ + crhi = lapicno<<24; + lapicrput(Ichi, crhi); + lapicrput(Iclo, DSnone|TMlevel|Lassert|MTir); + microdelay(200); + lapicrput(Iclo, DSnone|TMlevel|MTir); + delay(10); + + crlo = DSnone|TMedge|MTsipi|((u32int)pa/(4*KiB)); + for(i = 0; i < 2; i++){ + lapicrput(Ichi, crhi); + lapicrput(Iclo, crlo); + microdelay(200); + } +} + +void +lapicipi(int lapicno) +{ + lapicrput(Ichi, lapicno<<24); + lapicrput(Iclo, DSnone|TMedge|Lassert|MTf|IdtIPI); + while(lapicrget(Iclo) & Ds) + ; +} + +void +lapicpri(int pri) +{ + lapicrput(Tp, pri); +} diff -Nru /sys/src/9k/k10/main.c /sys/src/9k/k10/main.c --- /sys/src/9k/k10/main.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/main.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,518 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "io.h" +#include "apic.h" + +#include "init.h" + +#include + +extern void confoptions(void); /* XXX - must go */ +extern void confsetenv(void); /* XXX - must go */ + +static uintptr sp; /* XXX - must go - user stack of init proc */ + +Sys* sys = nil; +usize sizeofSys = sizeof(Sys); + +/* + * Option arguments from the command line. + * oargv[0] is the boot file. + * Optionsinit() is called from multiboot() to + * set it all up. + */ +static int oargc; +static char* oargv[20]; +static char oargb[128]; +static int oargblen; + +IOConf ioconf; +int procmax; + +char dbgflg[256]; +static int vflag = 0; + +void +optionsinit(char* s) +{ + oargblen = strecpy(oargb, oargb+sizeof(oargb), s) - oargb; + oargc = tokenize(oargb, oargv, nelem(oargv)-1); + oargv[oargc] = nil; +} + +static void +options(int argc, char* argv[]) +{ + char *p; + int n, o; + + /* + * Process flags. + * Flags [A-Za-z] may be optionally followed by + * an integer level between 1 and 127 inclusive + * (no space between flag and level). + * '--' ends flag processing. + */ + while(--argc > 0 && (*++argv)[0] == '-' && (*argv)[1] != '-'){ + while(o = *++argv[0]){ + if(!(o >= 'A' && o <= 'Z') && !(o >= 'a' && o <= 'z')) + continue; + n = strtol(argv[0]+1, &p, 0); + if(p == argv[0]+1 || n < 1 || n > 127) + n = 1; + argv[0] = p-1; + dbgflg[o] = n; + } + } + vflag = dbgflg['v']; +} + +void +squidboy(int apicno) +{ + vlong hz; + + sys->machptr[m->machno] = m; + + /* + * Need something for initial delays + * until a timebase is worked out. + */ + m->cpuhz = sys->machptr[0]->cpuhz; + m->cyclefreq = m->cpuhz; + m->cpumhz = sys->machptr[0]->cpumhz; + m->perf.period = 1; + + DBG("Hello Squidboy %d %d\n", apicno, m->machno); + + //trapinit(); + vsvminit(MACHSTKSZ); + + /* + * Beware the Curse of The Non-Interruptable Were-Temporary. + */ + hz = archhz(); + if(hz == 0) + ndnr(); + m->cpuhz = hz; + m->cpumhz = hz/1000000ll; + + archenable(); + + mmuinit(); + if(!lapiconline()) + ndnr(); + + fpuinit(); + + /* + * Handshake with sipi to let it + * know the Startup IPI succeeded. + */ + m->splpc = 0; + + /* + * Handshake with main to proceed with initialisation. + */ + while(sys->epoch == 0) + ; + wrmsr(0x10, sys->epoch); + m->rdtsc = rdtsc(); + + DBG("mach %d is go %#p %#p %3p\n", m->machno, m, m->pml4->pte, &apicno); + switch(m->mode){ + default: +// vsvminit(MACHSTKSZ); + + timersinit(); + + /* + * Cannot allow interrupts while waiting for online. + * However, by taking the lowering of the APIC task priority + * out of apiconline something could be done here with + * MONITOR/MWAIT perhaps to drop the energy used by the + * idle core. + */ + while(!m->online) + pause(); + lapictimerenable(); + lapicpri(0); + + print("mach%d: online color %d\n", m->machno, m->color); + schedinit(); + break; + } + panic("squidboy returns (type %d)", m->mode); +} + +#define D(c) if(0)outb(0x3f8, (c)) + +void +main(u32int ax, u32int bx) +{ + int i; + vlong hz; + char *p; + + memset(edata, 0, end - edata); + + /* + * ilock via i8250enable via i8250console + * needs m->machno, sys->machptr[] set, and + * also 'up' set to nil. + */ + cgapost(sizeof(uintptr)*8); + memset(m, 0, sizeof(Mach)); + m->machno = 0; + m->online = 1; + sys->machptr[m->machno] = &sys->mach; + m->stack = PTR2UINT(sys->machstk); + m->vsvm = sys->vsvmpage; + sys->nmach = 1; + sys->nonline = 1; + sys->copymode = 0; /* copy on write */ + up = nil; + + confoptions(); + asminit(); + multiboot(ax, bx, 0); + options(oargc, oargv); + p = getconf("*dbflags"); + if(p != nil){ + for(; *p != 0; p++) + if(*p >= 'a' && *p <= 'z' || *p >= 'A' && *p <= 'Z') + dbgflg[*p] = 1; + } + + /* + * Need something for initial delays + * until a timebase is worked out. + */ + m->cpuhz = 2000000000ll; + m->cpumhz = 2000; + + cgainit(); + i8250console("0"); + consputs = cgaconsputs; + + vsvminit(MACHSTKSZ); + + active.exiting = 0; + + fmtinit(); + print("\nPlan 9/64\n"); + if(vflag){ + print("&ax = %#p, ax = %#ux, bx = %#ux\n", &ax, ax, bx); + multiboot(ax, bx, vflag); + } + e820(); + + m->perf.period = 1; + if((hz = archhz()) != 0ll){ + m->cpuhz = hz; + m->cpumhz = hz/1000000ll; + } + + archenable(); + + /* + * Mmuinit before meminit because it + * makes mappings and + * flushes the TLB via m->pml4->pa. + */ + mmuinit(); + + ioinit(); + kbdinit(); + + meminit(); + archinit(); + physallocinit(); +D('a'); + mallocinit(); +D('b'); + memdebug(); + trapinit(); +D('c'); + + /* + * Printinit will cause the first malloc + * call to happen (printinit->qopen->malloc). + * If the system dies here it's probably due + * to malloc not being initialised + * correctly, or the data segment is misaligned + * (it's amazing how far you can get with + * things like that completely broken). + */ + printinit(); +D('d'); + /* + * This is necessary with GRUB and QEMU. + * Without it an interrupt can occur at a weird vector, + * because the vector base is likely different, causing + * havoc. Do it before any APIC initialisation. + */ + i8259init(IdtPIC); +D('e'); + + acpiinit(MACHMAX); +D('f'); +// mpsinit(); +D('g'); + lapiconline(); + ioapiconline(); +D('h'); + intrenable(IdtTIMER, timerintr, 0, -1, "APIC timer"); + lapictimerenable(); + lapicpri(0); +D('i'); + + timersinit(); +D('j'); + kbdenable(); +D('k'); + fpuinit(); +D('l'); + p = getconf("*procmax"); + if(p != nil) + procmax = strtoull(p, nil, 0); + if(procmax == 0) + procmax = 2000; + psinit(procmax); +D('m'); + initimage(); +D('n'); + links(); +D('o'); + devtabreset(); +D('p'); + pageinit(); +D('r'); + + userinit(); +D('s'); + if(!dbgflg['S']) + sipi(); +D('t'); + + sys->epoch = rdtsc(); + wrmsr(0x10, sys->epoch); + m->rdtsc = rdtsc(); + +D('u'); + /* + * Release the hounds. + */ + for(i = 1; i < MACHMAX; i++){ + if(sys->machptr[i] == nil) + continue; + + ainc(&sys->nonline); + + sys->machptr[i]->color = corecolor(i); + if(sys->machptr[i]->color < 0) + sys->machptr[i]->color = 0; + sys->machptr[i]->online = 1; + } +D('v'); +prflush(); + schedinit(); +} + +void +init0(void) +{ + char buf[2*KNAMELEN]; + + up->nerrlab = 0; + +// if(consuart == nil) +// i8250console("0"); + spllo(); + + /* + * These are o.k. because rootinit is null. + * Then early kproc's will have a root and dot. + */ + up->slash = namec("#/", Atodir, 0, 0); + pathclose(up->slash->path); + up->slash->path = newpath("/"); + up->dot = cclone(up->slash); + + devtabinit(); + + if(!waserror()){ + snprint(buf, sizeof(buf), "%s %s", "AMD64", conffile); + ksetenv("terminal", buf, 0); + ksetenv("cputype", "amd64", 0); + if(cpuserver) + ksetenv("service", "cpu", 0); + else + ksetenv("service", "terminal", 0); + confsetenv(); + poperror(); + } + kproc("alarm", alarmkproc, 0); + touser(sp); +} + +void +bootargs(uintptr base) +{ + int i; + ulong ssize; + char **av, *p; + + /* + * Push the boot args onto the stack. + * Make sure the validaddr check in syscall won't fail + * because there are fewer than the maximum number of + * args by subtracting sizeof(up->arg). + */ + i = oargblen+1; + p = UINT2PTR(STACKALIGN(base + PGSZ - sizeof(Tos) - sizeof(up->arg) - i)); + memmove(p, oargb, i); + + /* + * Now push argc and the argv pointers. + * This isn't strictly correct as the code jumped to by + * touser in init9.[cs] calls startboot (port/initcode.c) which + * expects arguments + * startboot(char* argv0, char* argv[]) + * not the usual (int argc, char* argv[]), but argv0 is + * unused so it doesn't matter (at the moment...). + */ + av = (char**)(p - (oargc+2)*sizeof(char*)); + ssize = base + PGSZ - PTR2UINT(av); + *av++ = (char*)oargc; + for(i = 0; i < oargc; i++) + *av++ = (oargv[i] - oargb) + (p - base) + (USTKTOP - PGSZ); + *av = nil; + + sp = USTKTOP - ssize; +} + +void +userinit(void) +{ + Proc *p; + Segment *s; + KMap *k; + Page *pg; + + p = newproc(); + p->pgrp = newpgrp(); + p->egrp = smalloc(sizeof(Egrp)); + p->egrp->ref = 1; + p->fgrp = dupfgrp(nil); + p->rgrp = newrgrp(); + p->procmode = 0640; + + kstrdup(&eve, ""); + kstrdup(&p->text, "*init*"); + kstrdup(&p->user, eve); + + /* + * Kernel Stack + * + * N.B. make sure there's enough space for syscall to check + * for valid args and + * space for gotolabel's return PC + * AMD64 stack must be quad-aligned. + */ + p->sched.pc = PTR2UINT(init0); + p->sched.sp = PTR2UINT(p->kstack+KSTACK-sizeof(up->arg)-sizeof(uintptr)); + p->sched.sp = STACKALIGN(p->sched.sp); +D('0'); + /* + * User Stack + * + * Technically, newpage can't be called here because it + * should only be called when in a user context as it may + * try to sleep if there are no pages available, but that + * shouldn't be the case here. + */ + s = newseg(SG_STACK, USTKTOP-USTKSIZE, USTKTOP, nil, 0); +D('1'); + p->seg[SSEG] = s; + pg = newpage(1, s->pages->lg2pgsize, 0); + addpage(s->pages, s->pages->npages-1, pg); +D('2'); + k = kmap(pg); + bootargs(PTR2UINT(VA(k))); + kunmap(k); + + /* + * Text + */ + s = newseg(SG_TEXT, UTZERO, UTZERO+PGSZ, nil, 0); + s->flushme++; + p->seg[TSEG] = s; + pg = newpage(1, s->pages->lg2pgsize, 0); + mmucachectl(pg, PG_TXTFLUSH); + addpage(s->pages, 0, pg); + k = kmap(pg); + memmove(VA(k), initcode, sizeof initcode); + kunmap(k); + + ready(p); +} + +static void +fullstop(void) +{ + splhi(); + lapicpri(0xff); + /* i8259 was initialised as disabled */ + for(;;) + _halt(); +} + +static void +shutdown(int ispanic) +{ + int ms; + + if(!m->online) + fullstop(); + + active.ispanic = ispanic; + m->online = 0; + active.exiting = 1; + adec(&sys->nonline); + + iprint("cpu%d: exiting\n", m->machno); + /* wait for any other processors to shutdown */ + //spllo(); + prflush(); + for(ms = 10*1000; ms > 0; ms -= 2){ + delay(2); + if(sys->nonline == 0 && consactive() == 0) + break; + } + + if(active.ispanic){ + if(!cpuserver || getconf("*debug") || 1) + fullstop(); + delay(10000); + } + else + delay(1000); +} + +void +reboot(void*, void*, long) +{ + panic("reboot\n"); +} + +void +exit(int ispanic) +{ + shutdown(ispanic); + archreset(); +} diff -Nru /sys/src/9k/k10/map.c /sys/src/9k/k10/map.c --- /sys/src/9k/k10/map.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/map.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,51 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +/* + * Before mmuinit is done, we can't rely on sys->vmunmapped + * being set, so we use the static limit TMFM + */ + +void* +KADDR(uintmem pa) +{ + u8int* va; + + va = UINT2PTR(pa); + if(sys->vmunmapped != 0){ + if(pa < sys->vmunmapped-KSEG0) + return KSEG0+va; + }else if(pa < TMFM) + return KSEG0+va; + return KSEG2+va; +} + +uintmem +PADDR(void* va) +{ + uintmem pa; + + pa = PTR2UINT(va); + if(pa >= KSEG2 && pa < KSEG1) + return pa-KSEG2; + if(pa >= KSEG0 && pa < KSEG0+TMFM) + return pa-KSEG0; + if(pa > KSEG2) + return pa-KSEG2; + + panic("PADDR: va %#p pa #%p @ %#p\n", va, mmuphysaddr(PTR2UINT(va)), getcallerpc(&va)); + return 0; +} + +KMap* +kmap(Page* page) +{ + DBG("kmap(%#llux) @ %#p: %#p %#p\n", + page->pa, getcallerpc(&page), + page->pa, KADDR(page->pa)); + + return KADDR(page->pa); +} diff -Nru /sys/src/9k/k10/mem.h /sys/src/9k/k10/mem.h --- /sys/src/9k/k10/mem.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/mem.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,109 @@ +/* + * Memory and machine-specific definitions. Used in C and assembler. + */ +#define KiB 1024 /* Kibi 0x0000000000000400 */ +#define MiB 1048576 /* Mebi 0x0000000000100000 */ +#define GiB 1073741824 /* Gibi 000000000040000000 */ +#define TiB 1099511627776ll /* Tebi 0x0000010000000000 */ +#define PiB 1125899906842624ll /* Pebi 0x0004000000000000 */ +#define EiB 1152921504606846976ll /* Exbi 0x1000000000000000 */ + +#define HOWMANY(x, y) (((x)+((y)-1))/(y)) +#define ROUNDUP(x, y) (HOWMANY((x), (y))*(y)) +#define ROUNDDN(x, y) (((x)/(y))*(y)) +#define MIN(a, b) ((a) < (b)? (a): (b)) +#define MAX(a, b) ((a) > (b)? (a): (b)) + +/* + * Sizes + */ +#define BI2BY 8 /* bits per byte */ +#define BY2V 8 /* bytes per double word */ +#define BY2SE 8 /* bytes per stack element */ +#define BLOCKALIGN 8 + +#define PGSZ (4*KiB) /* page size */ +#define PGSHFT 12 /* log(PGSZ) */ +#define PTSZ (4*KiB) /* page table page size */ +#define PTSHFT 9 /* */ + +#define MACHSZ (4*KiB) /* Mach+stack size */ +#define MACHMAX 32 /* max. number of cpus */ +#define MACHSTKSZ (6*(4*KiB)) /* Mach stack size */ + +#define KSTACK (16*1024) /* Size of Proc kernel stack */ +#define STACKALIGN(sp) ((sp) & ~(BY2SE-1)) /* bug: assure with alloc */ + +/* + * Time + */ +#define HZ (100) /* clock frequency */ +#define MS2HZ (1000/HZ) /* millisec per clock tick */ +#define TK2SEC(t) ((t)/HZ) /* ticks to seconds */ + +/* + * Address spaces + * + * (Kernel gets loaded at 1*MiB+64*KiB. + * mem from 0 to the end of kernel is not used for other things.) + * + * User is at low addresses; kernel vm starts at KZERO + * KSEG0 maps the first TMFM bytes, one to one,(i.e KZERO) + * KSEG1 maps the PML4 into itself. + * KSEG2 maps all remaining physical memory. (from TMFM up). + */ + +#define UTZERO (0+2*MiB) /* first address in user text */ +#define UTROUND(t) ROUNDUP((t), 2*MiB) /* first address beyond text for user data */ +#define USTKTOP 0x00007ffffffff000ull +#define USTKSIZE (16*1024*1024) /* size of user stack */ +#define TSTKTOP (USTKTOP-USTKSIZE) /* end of new stack in sysexec */ + +#define KSEG0 (0xfffffffff0000000ull) /* 256MB - this is confused */ +#define KSEG1 (0xffffff0000000000ull) /* 512GB - embedded PML4 */ +#define KSEG2 (0xfffffe0000000000ull) /* 1TB - KMAP */ +#define PMAPADDR (0xffffffffffe00000ull) /* unused as of yet (KMAP?) */ + +#define KZERO (0xfffffffff0000000ull) +#define KTZERO (KZERO+1*MiB+64*KiB) + +/* + * virtual MMU + */ +#define PTEPERTAB (256) +#define PTEMAPMEM (PTEPERTAB*PGSZ) +#define SEGMAPSIZE 1984 +#define SSEGMAPSIZE 16 + +/* + * This is the interface between fixfault and mmuput. + * Should be in port. + */ +#define PTEVALID (1<<0) +#define PTEWRITE (1<<1) +#define PTERONLY (0<<1) +#define PTEUSER (1<<2) +#define PTEUNCACHED (1<<4) + +#define getpgcolor(a) 0 + +/* + * Hierarchical Page Tables. + * For example, traditional IA-32 paging structures have 2 levels, + * level 1 is the PD, and level 0 the PT pages; with IA-32e paging, + * level 3 is the PML4(!), level 2 the PDP, level 1 the PD, + * and level 0 the PT pages. The PTLX macro gives an index into the + * page-table page at level 'l' for the virtual address 'v'. + */ +#define PTLX(v, l) (((v)>>(((l)*PTSHFT)+PGSHFT)) & ((1<pmstart = ROUNDUP(PADDR(end), PGSZ); + rmapalloc(&rmapram, 0, sys->pmstart, 0); /* TO DO: should be empty, surely */ + rmapfree(&rmapunavail, 0, sys->pmstart); +} + +static PTE +asmwalkalloc(usize size) +{ + uintmem pa; + + assert(size == PTSZ && sys->vmunused+size <= sys->vmunmapped); + + if((pa = mmuphysaddr(sys->vmunused)) != ~(uintmem)0) + sys->vmunused += size; + + return pa; +} + +void +memmaprange(uintptr va, uintmem lo, uintmem hi, PTE (*alloc)(usize), PTE flags) +{ + uintmem mem, nextmem; + PTE *pte, f; + int i, l; + + if(alloc == nil) + alloc = asmwalkalloc; + /* Convert a range into pages */ + for(mem = lo; mem < hi; mem = nextmem){ + nextmem = (mem + PGLSZ(0)) & ~m->pgszmask[0]; + + /* Try large pages first */ + for(i = m->npgsz - 1; i >= 0; i--){ + if((mem & m->pgszmask[i]) != 0) + continue; + if(mem + PGLSZ(i) > hi) + continue; + /* This page fits entirely within the range. */ + /* Mark it as usable */ + + if((l = mmuwalk(va, i, &pte, alloc)) < 0) + panic("meminit 3"); + + f = flags; + if(l > 0){ + if(f & Pte4KPAT) + f ^= Pte4KPAT | Pte2MPAT; /* it's the same for 1G */ + f |= PtePS; + } + *pte = mem|PteRW|PteP|f; + + nextmem = mem + PGLSZ(i); + va += PGLSZ(i); + npg[i]++; + break; + } + } +} + +/* + * Called after reading the physical memory maps (e820 or multiboot), + * and following mmuinit, which sets sys->vmstart/vmunmapped/vmunused, + * the boundaries of the initial, contiguous kernel virtual address space. + * + * Extend the page tables to complete the mapping of physical memory + * into the region beyond vmunmapped, claiming the memory from the + * physical memory map; and map the remaining chunks of + * physical memory into the region KSEG2. + */ +void +meminit(void) +{ + RMapel *asm; + uintptr va; + uintmem hi, lo, n; + + /* + * do we need a map, like vmap, for best use of mapping kmem? + * - in fact, a rewritten pdmap could do the job, no? + * have to assume up to vmend is contiguous. + * can't mmuphysaddr(sys->vmunmapped) because... + */ + + /* assume already 2MiB aligned and 2MiB pages available */ + assert(m->pgszlg2[1] == 21); + assert((sys->vmunmapped & m->pgszmask[1]) == 0); + + print("vmunmapped %#llux\n", sys->vmunmapped); + + n = TMFM; + if(n > sys->pmoccupied) + n = sys->pmoccupied/2; + sys->pmunassigned = ROUNDDN(n, MiB); + + for(asm = rmapram.map; asm != nil; asm = asm->next){ + va = KSEG2+asm->addr; + lo = asm->addr; + hi = asm->addr+asm->size; + DBG("mem %#P %#P (%P) va %#p\n", lo, hi, hi-lo, va); + memmaprange(va, lo, hi, asmwalkalloc, 0); + } + + n = sys->pmoccupied; + if(n > 600*MiB) + n = 600*MiB; + ialloclimit(n/3); +} + +void +memdebug(void) +{ + if(DBGFLG || 1){ + rmapprint(&rmapram); + rmapprint(&rmapunavail); + print("k ptes:"); + for(int i = 0; i < nelem(npg); i++) + print(" %d", npg[i]); + print("\n"); + } +} + +void +memreserve(uintmem pa, uintmem size) +{ + rmapfree(&rmapunavail, pa, size); +} + +void +memaffinity(u64int base, u64int len, u32int dom, int flags) +{ + if(flags & MemNonVolatile){ + memreserve(base, len); + return; + } + DBG("mem affinity: %#16.16llux %#16.16llux -> %d\n", base, base+len-1, dom); + /* TO DO: add [base, base+len[ to dom table */ +} + +void* +basealloc(usize nb, uint align, usize *alloced) +{ + uintmem pa; + + if(align < CACHELINESZ) + align = CACHELINESZ; + nb = ROUNDUP(nb, CACHELINESZ); + pa = rmapalloc(&rmapram, 0, nb, align); + if(pa == 0) + return nil; + if(alloced != nil) + *alloced = nb; + return KADDR(pa); +} + +void +basefree(void *p, usize nb) +{ + uintmem pa; + + pa = PADDR(p); + rmapfree(&rmapram, pa, nb); +} + +int +e820(void) +{ + char *p, *s; + u64int base, len; + uint type; + int v; + + p = getconf("*e820"); + if(p == nil) + return 0; + v = 0; + for(s = p;;){ + if(*s == 0) + break; + type = strtoul(s, &s, 16); + if(*s != ' ') + break; + base = strtoull(s, &s, 16); + if(*s != ' ') + break; + len = strtoull(s, &s, 16) - base; + if(*s != ' ' && *s != 0 || len == 0) + break; + DBG("E820: %llux %llux %#ux\n", base, len, type); + asmmapinit(base, len, type); + v = 1; + } + return v; +} + +/* + * Notes: + * asmmapinit and asmmodinit called from multiboot or e820; + * subject to change; the numerology here is probably suspect. + * Multiboot defines the alignment of modules as 4096. + */ +void +asmmapinit(uintmem addr, uintmem size, int type) +{ + if(type == AddrsMemory){ + /* + * Adjust things for the peculiarities of this + * architecture. + */ + if(addr < 1*MiB || addr+size < sys->pmstart) + return; + if(addr < sys->pmstart){ + size -= sys->pmstart - addr; + addr = sys->pmstart; + } + rmapfree(&rmapram, addr, size); + sys->pmoccupied += size; + }else if(specialmem != nil) + specialmem(addr, size, type); + rmapfree(&rmapunavail, addr, size); +} + +void +asmmodinit(u32int start, u32int end, char*) +{ + if(start < sys->pmstart) + return; + end = ROUNDUP(end, 4096); + if(end > sys->pmstart){ + rmapalloc(&rmapram, sys->pmstart, end-sys->pmstart, 0); + sys->pmstart = end; + } +} diff -Nru /sys/src/9k/k10/mkfile /sys/src/9k/k10/mkfile --- /sys/src/9k/k10/mkfile Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/mkfile Wed Dec 9 00:00:00 2015 @@ -0,0 +1,90 @@ +CONF=k10cpu +CONFLIST=k10cpu k8cpu k8root + +objtype=amd64 + $p$CONF.gz + +install:V: $p$CONF + cp $p$CONF /$objtype + #import lookout / /n/lookout && cp $p$CONF $p$CONF.gz /n/lookout/$objtype/ + +init.out: init9.$O initcode.$O /$objtype/lib/libc.a + $LD -l -R1 -s -o init.out init9.$O initcode.$O -lc + +l32p.$O: ${objtype}l.h +l64idt.$O: ${objtype}l.h +l64syscall.$O: ${objtype}l.h +l64v.$O: ${objtype}l.h +lsipi.$O: ${objtype}l.h + +${objtype}l.h: $objtype.h + rc ../mk/mkenum $objtype.h > $target + +lapic.$O: apic.h io.h +devarch.$O: ../port/error.h /$objtype/include/ureg.h +fpu.$O: amd64.h +fpu.$O: /$objtype/include/ureg.h +ioapic.$O: apic.h io.h +main.$O: /sys/include/pool.h init.h +memory.$O: amd64.h +mmu.$O: amd64.h +mp.$O: apic.h +sipi.$O: apic.h sipi.h +svm.$O: amd64.h +svm.$O: /$objtype/include/ureg.h +syscall.$O: ../port/error.h /sys/src/libc/9syscall/sys.h +syscall.$O: /sys/include/tos.h /$objtype/include/ureg.h +syscall.$O: amd64.h +trap.$O: ../port/error.h io.h +trap.$O: /sys/include/tos.h /$objtype/include/ureg.h +devacpi.$O acpi.$O: acpi.h /sys/include/aml.h + +devaoe.$O: ../port/error.h ../port/netif.h ../ip/ip.h +devaoe.$O: ../port/aoe.h etherif.h +devether.$O: ../port/error.h ../port/netif.h etherif.h +devrtc.$O: ../port/error.h +ether8169.$O: ../port/error.h ../port/ethermii.h ../port/netif.h +ether8169.$O: etherif.h +ether82557.$O: ../port/netif.h +ether82557.$O: etherif.h io.h +etherigbe.$O: ../port/error.h ../port/ethermii.h ../port/netif.h +etherigbe.$O: etherif.h io.h +etherm10g.$O: ../port/error.h ../port/netif.h +etherm10g.$O: etherif.h io.h +etherm10g.$O: ../386/etherm10g2k.i ../386/etherm10g4k.i +i8259.$O: io.h +kbd.$O: ../port/error.h io.h +pci.$O: io.h +sdaoe.$O: ../port/error.h ../port/netif.h ../port/sd.h +sdaoe.$O: ../port/aoe.h etherif.h io.h +sdiahci.$O: ../386/ahci.h +sdscsi.$O: ../port/error.h + +random.$O: ../port/error.h + +sipi.h: l64sipi.$O mkfile + $LD -o l64sipi.out -T0xfffffffff0003000 -R4 -l -s l64sipi.$O + {echo 'uchar sipihandler[]={' + xd -1x l64sipi.out | + sed -e 's/^[0-9a-f]+ //' \ + -e '1,2d' -e '3s/^ .. .. .. .. .. .. .. ..//' \ + -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g' + echo '};'} > $target diff -Nru /sys/src/9k/k10/mmu.c /sys/src/9k/k10/mmu.c --- /sys/src/9k/k10/mmu.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/mmu.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,803 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "amd64.h" + +#define PDMAP (0xffffffffff800000ull) +#define PDPX(v) PTLX((v), 2) +#define PDX(v) PTLX((v), 1) +#define PTX(v) PTLX((v), 0) + +#define VMAP (0xffffffffe0000000ull) +#define VMAPSZ (256*MiB) + +#define KSEG1PML4 (0xffff000000000000ull\ + |(PTLX(KSEG1, 3)<<(((3)*PTSHFT)+PGSHFT))\ + |(PTLX(KSEG1, 3)<<(((2)*PTSHFT)+PGSHFT))\ + |(PTLX(KSEG1, 3)<<(((1)*PTSHFT)+PGSHFT))\ + |(PTLX(KSEG1, 3)<<(((0)*PTSHFT)+PGSHFT))) + +#define KSEG1PTP(va, l) ((0xffff000000000000ull\ + |(KSEG1PML4<<((3-(l))*PTSHFT))\ + |(((va) & 0xffffffffffffull)>>(((l)+1)*PTSHFT))\ + & ~0xfffull)) + +static Lock vmaplock; +static Ptpage mach0pml4; +static struct{ + Lock; + Ptpage* next; +} ptpfreelist; +int ptpcount; + +void +mmuflushtlb(u64int) +{ + if(m->pml4->ptoff){ + memset(m->pml4->pte, 0, m->pml4->ptoff*sizeof(PTE)); + m->pml4->ptoff = 0; + } + cr3put(m->pml4->pa); +} + +void +mmuflush(void) +{ + int s; + + s = splhi(); + up->newtlb = 1; + mmuswitch(up); + splx(s); +} + +static void +mmuptpfree(Proc* proc, int clear) +{ + int l; + PTE *pte; + Ptpage **last, *page; + + for(l = 0; l < 4; l++){ + last = &proc->mmuptp[l]; + if(*last == nil) + continue; + for(; (page = *last) != nil; last = &page->next){ + if(l <= 2 && clear) + memset(page->pte, 0, PTSZ); + pte = page->parent->pte; + pte[page->ptoff] = 0; + proc->nptpbusy--; + } + *last = proc->ptpfree; + proc->ptpfree = proc->mmuptp[l]; + proc->mmuptp[l] = nil; + } + + m->pml4->ptoff = 0; +} + +static Ptpage* +mmuptpalloc(void) +{ + Ptpage *page; + uintmem pa; + + page = m->ptpfree; + if(page != nil){ + m->ptpfree = page->next; + m->nptpfree--; + }else{ + lock(&ptpfreelist); + page = ptpfreelist.next; + if(page != nil) + ptpfreelist.next = page->next; + unlock(&ptpfreelist); + } + if(page != nil){ + page->next = nil; +memset(page->pte, 0, PTSZ); + return page; + } + pa = physalloc(PTSZ); + if(pa == 0){ + physdump(); + panic("mmuptpalloc"); + } + DBG("ptp %#P\n", pa); + page = mallocz(sizeof(*page), 0); + if(page == nil) + panic("mmuptpalloc 2"); + page->pte = KADDR(pa); + page->pa = pa; + page->next = nil; + page->parent = nil; + page->ptoff = 0; + memset(page->pte, 0, PTSZ); + return page; +} + +void +mmuswitch(Proc* proc) +{ + PTE *pte; + Ptpage *page; + + if(proc->newtlb){ + mmuptpfree(proc, 1); + proc->newtlb = 0; + } + + if(m->pml4->ptoff){ + memset(m->pml4->pte, 0, m->pml4->ptoff*sizeof(PTE)); + m->pml4->ptoff = 0; + } + + /* install new page directory pointers in pml4 */ + pte = m->pml4->pte; + for(page = proc->mmuptp[3]; page != nil; page = page->next){ + pte[page->ptoff] = PPN(page->pa)|PteU|PteRW|PteP; + if(page->ptoff >= m->pml4->ptoff) + m->pml4->ptoff = page->ptoff+1; + page->parent = m->pml4; + } + + tssrsp0(STACKALIGN(PTR2UINT(proc->kstack+KSTACK))); + cr3put(m->pml4->pa); +} + +void +mmurelease(Proc* proc) +{ + Ptpage *page, **last; + + mmuptpfree(proc, 0); + + /* keep a few page tree pages per cpu */ + while((page = proc->ptpfree) != nil){ + page->parent = nil; + if(sys->nmach != 1 && m->nptpfree > 20) + break; + proc->ptpfree = page->next; + page->next = m->ptpfree; + m->ptpfree = page; + m->nptpfree++; + } + + if(proc->ptpfree != nil){ + /* add the rest to the global pool */ + for(last = &proc->ptpfree; (page = *last) != nil; last = &page->next) + page->parent = nil; + lock(&ptpfreelist); + *last = ptpfreelist.next; + ptpfreelist.next = proc->ptpfree; + proc->ptpfree = nil; + unlock(&ptpfreelist); + } + + if(proc->nptpbusy) + print("%ud: ptpbusy %s %d\n", proc->pid, proc->text, proc->nptpbusy); + proc->nptpbusy = 0; + + tssrsp0(STACKALIGN(m->stack+MACHSTKSZ)); + cr3put(m->pml4->pa); +} + +static PTE* +mmuptpget(uintptr va, int level) +{ + return (PTE*)KSEG1PTP(va, level); +} + +static Ptpage* +makeptp(Ptpage *parent, int l, PTE *ptp, int x) +{ + Ptpage *page; + PTE *pte; + + for(page = up->mmuptp[l]; page != nil; page = page->next) + if(page->parent == parent && page->ptoff == x) + return page; + pte = &ptp[x]; + if(up->ptpfree == nil){ + page = mmuptpalloc(); + } + else { + page = up->ptpfree; + up->ptpfree = page->next; + } + page->ptoff = x; + page->next = up->mmuptp[l]; + up->mmuptp[l] = page; + page->parent = parent; + *pte = page->pa|PteU|PteRW|PteP; + if(l == 3 && x >= m->pml4->ptoff) + m->pml4->ptoff = x+1; + up->nptpbusy++; + DBG("%d: pte put l%d %#p[%d] -> %#P pte %#p\n", up->pid, l, ptp, x, *pte, pte); + return page; +} + +void +mmuput(uintptr va, uintmem pa, Page *pg) +{ + Mpl pl; + int l, x, tl; + PTE *pte, *ptp; + Ptpage *prev; + uint attr; + static void mmuptpcheck(Proc*); + + tl = (pg->lg2size-12)/9; + pl = splhi(); +// mmuptpcheck(up); + for(l = 3; l != tl; l--){ + ptp = mmuptpget(va, l); + pte = &ptp[PTLX(va,l)]; + if(l == tl) + break; + if((*pte & PteP) == 0 || *pte & PtePS) + break; + } + if(l != tl){ + /* add missing intermediate level */ + prev = m->pml4; + for(l = 3; l > tl; l--){ + ptp = mmuptpget(va, l); + x = PTLX(va, l); + prev = makeptp(prev, l, ptp, x); + } + } + ptp = mmuptpget(va, tl); + pte = &ptp[PTLX(va, tl)]; + attr = 0; + if(tl > 0) + attr |= PtePS; + *pte = pa | attr | PteU; + DBG("%d: put pte %#p: %#p -> %#P\n", up->pid, pte, va, *pte); +//if(pa & PteRW) +// *pte |= PteNX; + splx(pl); + invlpg(va); /* only if old entry valid? */ +} + +static PTE +pdeget(uintptr va) +{ + PTE *pdp; + + if(va < 0xffffffffc0000000ull) + panic("pdeget(%#p)", va); + + pdp = (PTE*)(PDMAP+PDX(PDMAP)*4096); + + return pdp[PDX(va)]; +} + +/* + * Add kernel mappings for va -> pa for a section of size bytes. + * Called only after the va range is known to be unoccupied. + */ +static int +pdmap(uintmem pa, int attr, uintptr va, usize size) +{ + uintmem pae; + PTE *pd, *pde, *pt, *pte; + uintmem pdpa; + int pdx, pgsz; + + pd = (PTE*)(PDMAP+PDX(PDMAP)*4096); + + for(pae = pa + size; pa < pae; pa += pgsz){ + pdx = PDX(va); + pde = &pd[pdx]; + + /* + * Check if it can be mapped using a big page, + * i.e. is big enough and starts on a suitable boundary. + * Assume processor can do it. + */ + if(ALIGNED(pa, PGLSZ(1)) && ALIGNED(va, PGLSZ(1)) && (pae-pa) >= PGLSZ(1)){ + assert(*pde == 0); + *pde = pa|attr|PtePS|PteP; + pgsz = PGLSZ(1); + } + else{ + pt = (PTE*)(PDMAP+pdx*PTSZ); + if(*pde == 0){ + pdpa = physalloc(PTSZ); + if(pdpa == 0) + panic("pdmap"); + *pde = pdpa|PteRW|PteP; +//print("*pde %#llux va %#p\n", *pde, va); + memset(pt, 0, PTSZ); + } + + pte = &pt[PTX(va)]; + assert(!(*pte & PteP)); + *pte = pa|attr|PteP; + pgsz = PGLSZ(0); + } + va += pgsz; + } + + return 0; +} + +static int +findhole(PTE* a, int n, int count) +{ + int have, i; + + have = 0; + for(i = 0; i < n; i++){ + if(a[i] == 0) + have++; + else + have = 0; + if(have >= count) + return i+1 - have; + } + + return -1; +} + +/* + * Look for free space in the vmap. + */ +static uintptr +vmapalloc(usize size) +{ + int i, n, o; + PTE *pd, *pt; + int pdsz, ptsz; + + pd = (PTE*)(PDMAP+PDX(PDMAP)*4096); + pd += PDX(VMAP); + pdsz = VMAPSZ/PGLSZ(1); + + /* + * Look directly in the PD entries if the size is + * larger than the range mapped by a single entry. + */ + if(size >= PGLSZ(1)){ + n = HOWMANY(size, PGLSZ(1)); + if((o = findhole(pd, pdsz, n)) != -1) + return VMAP + o*PGLSZ(1); + return 0; + } + + /* + * Size is smaller than that mapped by a single PD entry. + * Look for an already mapped PT page that has room. + */ + n = HOWMANY(size, PGLSZ(0)); + ptsz = PGLSZ(0)/sizeof(PTE); + for(i = 0; i < pdsz; i++){ + if(!(pd[i] & PteP) || (pd[i] & PtePS)) + continue; + + pt = (PTE*)(PDMAP+(PDX(VMAP)+i)*4096); + if((o = findhole(pt, ptsz, n)) != -1) + return VMAP + i*PGLSZ(1) + o*PGLSZ(0); + } + + /* + * Nothing suitable, start using a new PD entry. + */ + if((o = findhole(pd, pdsz, 1)) != -1) + return VMAP + o*PGLSZ(1); + + return 0; +} + +void* +vmap(uintptr pa, usize size) +{ + uintptr va; + usize o, sz; + + DBG("vmap(%#p, %lud)\n", pa, size); + + if(m->machno != 0) + panic("vmap"); + + /* + * This is incomplete; the checks are not comprehensive + * enough. + * Sometimes the request is for an already-mapped piece + * of low memory, in which case just return a good value + * and hope that a corresponding vunmap of the address + * will have the same address. + * To do this properly will require keeping track of the + * mappings; perhaps something like kmap, but kmap probably + * can't be used early enough for some of the uses. + */ + if(pa+size <= 1ull*MiB) + return KADDR(pa); + if(pa < 1ull*MiB) + return nil; + + /* + * Might be asking for less than a page. + * This should have a smaller granularity if + * the page size is large. + */ + o = pa & ((1< %#p\n", pa+o, size, va+o); + + return UINT2PTR(va + o); +} + +void +vunmap(void* v, usize size) +{ + uintptr va; + + DBG("vunmap(%#p, %lud)\n", v, size); + + if(m->machno != 0) + panic("vunmap"); + + /* + * See the comments above in vmap. + */ + va = PTR2UINT(v); + if(va >= KZERO && va+size < KZERO+1ull*MiB) + return; + + /* + * Here will have to deal with releasing any + * resources used for the allocation (e.g. page table + * pages). + */ + DBG("vunmap(%#p, %lud)\n", v, size); +} + +int +mmuwalk(uintptr va, int level, PTE** ret, u64int (*alloc)(usize)) +{ + int l; + Mpl pl; + uintmem pa; + PTE *pte, *ptp; + + DBG("mmuwalk%d: va %#p level %d\n", m->machno, va, level); + pte = nil; + pl = splhi(); + for(l = 3; l >= 0; l--){ + ptp = mmuptpget(va, l); + pte = &ptp[PTLX(va, l)]; + if(l == level) + break; + if(!(*pte & PteP)){ + if(alloc == nil) + return -1; + pa = alloc(PTSZ); + if(pa == ~(uintmem)0 || pa == 0) + return -1; +if(pa & 0xfffull) print("mmuwalk pa %#llux\n", pa); + *pte = pa|PteRW|PteP; + if((ptp = mmuptpget(va, l-1)) == nil) + panic("mmuwalk: mmuptpget(%#p, %d)", va, l-1); + memset(ptp, 0, PTSZ); + } + else if(*pte & PtePS) + break; + } + *ret = pte; + splx(pl); + + return l; +} + +u64int +mmuphysaddr(uintptr va) +{ + int l; + PTE *pte; + u64int mask, pa; + + /* + * Given a VA, find the PA. + * This is probably not the right interface, + * but will do as an experiment. Usual + * question, should va be void* or uintptr? + */ + l = mmuwalk(va, 0, &pte, nil); + DBG("physaddr: va %#p l %d\n", va, l); + if(l < 0 || (*pte & PteP) == 0) + return ~(uintmem)0; + + mask = (1ull<<(((l)*PTSHFT)+PGSHFT))-1; + pa = (*pte & ~mask) + (va & mask); + + DBG("physaddr: l %d va %#p pa %#llux\n", l, va, pa); + + return pa; +} + +void +mmuinit(void) +{ + int l; + uchar *p; + PTE *pte; + Ptpage *page; + uintptr pml4; + u64int o, pa, r, sz; + + archmmu(); + DBG("mach%d: %#p npgsz %d\n", m->machno, m, m->npgsz); + if(m->machno != 0){ + /* + * GAK: Has to go when each mach is using + * its own page table + */ + p = UINT2PTR(m->stack); + p += MACHSTKSZ; + memmove(p, mach0pml4.pte, PTSZ); + m->pml4 = &m->pml4kludge; + m->pml4->pte = (PTE*)p; + m->pml4->pa = PADDR(p); + m->pml4->ptoff = mach0pml4.ptoff; /* # of user mappings in pml4 */ + if(m->pml4->ptoff){ + memset(p, 0, m->pml4->ptoff*sizeof(PTE)); + m->pml4->ptoff = 0; + } +pte = (PTE*)p; +pte[PTLX(KSEG1PML4, 3)] = m->pml4->pa|PteRW|PteP; + + r = rdmsr(Efer); + r |= Nxe; + wrmsr(Efer, r); + cr3put(m->pml4->pa); + DBG("mach%d: %#p pml4 %#p\n", m->machno, m, m->pml4); + return; + } + + page = &mach0pml4; + page->pa = cr3get(); + page->pte = sys->pml4; + + m->pml4 = page; + + r = rdmsr(Efer); + r |= Nxe; + wrmsr(Efer, r); + + /* + * Set up the various kernel memory allocator limits: + * pmstart/pmend bound the unused physical memory; + * vmstart/vmunmapped bound the total possible virtual memory + * used by the kernel in KSEG0; + * vmunused is the highest virtual address currently mapped + * and used by the kernel; + * vmunmapped is the highest virtual address currently + * mapped by the kernel. + * Vmunused can be bumped up to vmunmapped before more + * physical memory needs to be allocated and mapped. + * + * This is set up here so meminit can map appropriately. + */ + o = sys->pmstart; + sz = ROUNDUP(o+128*KiB, 4*MiB) - o; /* add extra 128k for initial pt/pd allocations */ +print("mmuinit: rmapalloc: %#P pmstart=%#llux\n", o, sys->pmstart); + pa = rmapalloc(&rmapram, o, sz, 0); + if(pa != o) + panic("mmuinit: pa %#llux memstart %#llux\n", pa, o); + sys->pmstart += sz; + + sys->vmstart = KSEG0; + sys->vmunused = sys->vmstart + ROUNDUP(o, 4*KiB); + sys->vmunmapped = sys->vmstart + o + sz; + + print("mmuinit: vmstart %#p vmunused %#p vmunmapped %#p\n", + sys->vmstart, sys->vmunused, sys->vmunmapped); + + /* + * Set up the map for PD entry access by inserting + * the relevant PDP entry into the PD. It's equivalent + * to PADDR(sys->pd)|PteRW|PteP. + * + * Change code that uses this to use the KSEG1PML4 + * map below. + */ + sys->pd[PDX(PDMAP)] = sys->pdp[PDPX(PDMAP)] & ~(PteD|PteA); + print("sys->pd %#p %#p\n", sys->pd[PDX(PDMAP)], sys->pdp[PDPX(PDMAP)]); + + assert((pdeget(PDMAP) & ~(PteD|PteA)) == (PADDR(sys->pd)|PteRW|PteP)); + + /* + * Set up the map for PTE access by inserting + * the relevant PML4 into itself. + * Note: outwith level 0, PteG is MBZ on AMD processors, + * is 'Reserved' on Intel processors, and the behaviour + * can be different. + */ + pml4 = cr3get(); + sys->pml4[PTLX(KSEG1PML4, 3)] = pml4|PteRW|PteP; + cr3put(m->pml4->pa); + + if((l = mmuwalk(KZERO, 3, &pte, nil)) >= 0) + print("l %d %#p %llux\n", l, pte, *pte); + if((l = mmuwalk(KZERO, 2, &pte, nil)) >= 0) + print("l %d %#p %llux\n", l, pte, *pte); + if((l = mmuwalk(KZERO, 1, &pte, nil)) >= 0) + print("l %d %#p %llux\n", l, pte, *pte); + if((l = mmuwalk(KZERO, 0, &pte, nil)) >= 0) + print("l %d %#p %llux\n", l, pte, *pte); + + mmuphysaddr(PTR2UINT(end)); +} + +void +mmudump(Proc *p) +{ + Ptpage *ptp; + int i, l; + + for(l = 3; l >= 0; l--){ + for(ptp = p->mmuptp[l]; ptp != nil; ptp = ptp->next){ + print("pid %d level %d ptp %#p\n", p->pid, l, ptp); + for(i = 0; i < PTSZ/sizeof(PTE); i++) + if(ptp->pte[i]) + print("%.4d %#P\n", i, ptp->pte[i]); + } + } +} + +/* + * Double-check the user MMU. + * Error checking only. + */ +void +checkmmu(uintptr va, uintmem pa) +{ + uintmem mpa; + + mpa = mmuphysaddr(va); + if(mpa != ~(uintmem)0 && mpa != pa) + print("***%d %s: mmu mismatch va=%#p pa=%#P mmupa=%#P\n", + up->pid, up->text, va, pa, mpa); +} + +static void +tabs(int n) +{ + int i; + + for(i = 0; i < n; i++) + print(" "); +} + +void +dumpptepg(int lvl, uintmem pa) +{ + PTE *pte; + int tab, i; + + tab = 4 - lvl; + pte = UINT2PTR(KADDR(pa)); + for(i = 0; i < PTSZ/sizeof(PTE); i++) + if(pte[i] & PteP){ + tabs(tab); + print("l%d %#p[%#05x]: %#llux\n", lvl, pa, i, pte[i]); + + /* skip kernel mappings */ + if((pte[i]&PteU) == 0){ + tabs(tab+1); + print("...kern...\n"); + continue; + } + if(lvl > 2) + dumpptepg(lvl-1, PPN(pte[i])); + } +} + +void +dumpmmu(Proc *p) +{ + int i; + Ptpage *pt; + + print("proc %#p\n", p); + for(i = 3; i >= 0; i--){ + print("mmuptp[%d]:\n", i); + for(pt = p->mmuptp[i]; pt != nil; pt = pt->next) + print("\tpt %#p = va %#p pa %#P" + " ptoff %#ux next %#p parent %#p\n", + pt, pt->pte, pt->pa, pt->ptoff, pt->next, pt->parent); + } + print("pml4 %#P\n", m->pml4->pa); + if(0)dumpptepg(4, m->pml4->pa); +} + +void +dumpmmuwalk(uintmem addr) +{ + int l; + PTE *pte; + + if((l = mmuwalk(addr, 3, &pte, nil)) >= 0) + print("cpu%d: mmu l%d pte %#p = %llux\n", m->machno, l, pte, *pte); + if((l = mmuwalk(addr, 2, &pte, nil)) >= 0) + print("cpu%d: mmu l%d pte %#p = %llux\n", m->machno, l, pte, *pte); + if((l = mmuwalk(addr, 1, &pte, nil)) >= 0) + print("cpu%d: mmu l%d pte %#p = %llux\n", m->machno, l, pte, *pte); + if((l = mmuwalk(addr, 0, &pte, nil)) >= 0) + print("cpu%d: mmu l%d pte %#p = %llux\n", m->machno, l, pte, *pte); +} + +static void +mmuptpcheck(Proc *proc) +{ + int lvl, npgs, i; + Mpl pl; + Ptpage *lp, *p, **pgs, *fp; + enum{Tsize = 512}; + static Ptpage *pgtab[MACHMAX][Tsize]; + static uint idxtab[MACHMAX][Tsize]; + uint *idx; + + if(proc == nil) + return; + pl = splhi(); + pgs = pgtab[m->machno]; + idx = idxtab[m->machno]; + lp = m->pml4; + for(lvl = 3; lvl >= 1; lvl--){ + npgs = 0; + for(p = proc->mmuptp[lvl]; p != nil; p = p->next){ + for(fp = proc->ptpfree; fp != nil; fp = fp->next) + if(fp == p){ + dumpmmu(proc); + panic("ptpcheck: using free page"); + } + for(i = 0; i < npgs; i++){ + if(pgs[i] == p){ + dumpmmu(proc); + panic("ptpcheck: dup page"); + } + if(idx[i] == p->ptoff){ + dumpmmu(proc); + panic("ptcheck: dup daddr"); + } + } + if(npgs >= Tsize) + panic("ptpcheck: pgs is too small"); + idx[npgs] = p->ptoff; + pgs[npgs++] = p; + if(lvl == 3 && p->parent != lp){ + dumpmmu(proc); + panic("ptpcheck: wrong parent"); + } + } + + } + npgs = 0; + for(fp = proc->ptpfree; fp != nil; fp = fp->next){ + for(i = 0; i < npgs; i++) + if(pgs[i] == fp) + panic("ptpcheck: dup free page"); + pgs[npgs++] = fp; + } + splx(pl); +} diff -Nru /sys/src/9k/k10/mp.c /sys/src/9k/k10/mp.c --- /sys/src/9k/k10/mp.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/mp.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,492 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "io.h" +#include "apic.h" + +#undef DBG +#define DBG print +/* + * MultiProcessor Specification Version 1.[14]. + */ +typedef struct { /* MP Floating Pointer */ + u8int signature[4]; /* "_MP_" */ + u8int addr[4]; /* PCMP */ + u8int length; /* 1 */ + u8int revision; /* [14] */ + u8int checksum; + u8int feature[5]; +} _MP_; + +typedef struct { /* MP Configuration Table */ + u8int signature[4]; /* "PCMP" */ + u8int length[2]; + u8int revision; /* [14] */ + u8int checksum; + u8int string[20]; /* OEM + Product ID */ + u8int oaddr[4]; /* OEM table pointer */ + u8int olength[2]; /* OEM table length */ + u8int entry[2]; /* entry count */ + u8int apicpa[4]; /* local APIC address */ + u8int xlength[2]; /* extended table length */ + u8int xchecksum; /* extended table checksum */ + u8int reserved; + + u8int entries[]; +} PCMP; + +typedef struct { + char type[6]; + int polarity; /* default for this bus */ + int trigger; /* default for this bus */ +} Mpbus; + +static Mpbus mpbusdef[] = { + { "PCI ", IPlow, TMlevel, }, + { "ISA ", IPhigh, TMedge, }, +}; +static Mpbus* mpbus[Nbus]; +int mpisabusno = -1; + +static void +mpintrprint(char* s, u8int* p) +{ + char buf[128], *b, *e; + char format[] = " type %d flags %#ux bus %d IRQ %d APIC %d INTIN %d\n"; + + b = buf; + e = b + sizeof(buf); + b = seprint(b, e, "mpparse: intr:"); + if(s != nil) + b = seprint(b, e, " %s:", s); + seprint(b, e, format, p[1], l16get(p+2), p[4], p[5], p[6], p[7]); + print(buf); +} + +static u32int +mpmkintr(u8int* p) +{ + u32int v; + Lapic *apic; + IOapic *ioapic; + int n, polarity, trigger; + + /* + * Check valid bus, interrupt input pin polarity + * and trigger mode. If the APIC ID is 0xff it means + * all APICs of this type so those checks for useable + * APIC and valid INTIN must also be done later in + * the appropriate init routine in that case. It's hard + * to imagine routing a signal to all IOAPICs, the + * usual case is routing NMI and ExtINT to all LAPICs. + */ + if(mpbus[p[4]] == nil){ + mpintrprint("no source bus", p); + return 0; + } + if(p[6] != 0xff){ + if(Napic < 256 && p[6] >= Napic){ + mpintrprint("APIC ID out of range", p); + return 0; + } + switch(p[0]){ + default: + mpintrprint("INTIN botch", p); + return 0; + case 3: /* IOINTR */ + ioapic = ioapiclookup(p[6]); + if(ioapic == nil){ + mpintrprint("unuseable IO APIC", p); + return 0; + } + if(p[7] >= ioapic->nrdt){ + mpintrprint("IO INTIN out of range", p); + return 0; + } + break; + case 4: /* LINTR */ + apic = lapiclookup(p[6]); + if(apic == nil){ + mpintrprint("unuseable local APIC", p); + return 0; + } + if(p[7] >= nelem(apic->lvt)){ + mpintrprint("LOCAL INTIN out of range", p); + return 0; + } + break; + } + } + n = l16get(p+2); + if((polarity = (n & 0x03)) == 2 || (trigger = ((n>>2) & 0x03)) == 2){ + mpintrprint("invalid polarity/trigger", p); + return 0; + } + + /* + * Create the low half of the vector table entry (LVT or RDT). + * For the NMI, SMI and ExtINT cases, the polarity and trigger + * are fixed (but are not always consistent over IA-32 generations). + * For the INT case, either the polarity/trigger are given or + * it defaults to that of the source bus; + * whether INT is Fixed or Lowest Priority is left until later. + */ + v = Im; + switch(p[1]){ + default: + mpintrprint("invalid type", p); + return 0; + case 0: /* INT */ + switch(polarity){ + case 0: + v |= mpbus[p[4]]->polarity; + break; + case 1: + v |= IPhigh; + break; + case 3: + v |= IPlow; + break; + } + switch(trigger){ + case 0: + v |= mpbus[p[4]]->trigger; + break; + case 1: + v |= TMedge; + break; + case 3: + v |= TMlevel; + break; + } + break; + case 1: /* NMI */ + v |= TMedge|IPhigh|MTnmi; + break; + case 2: /* SMI */ + v |= TMedge|IPhigh|MTsmi; + break; + case 3: /* ExtINT */ + v |= TMedge|IPhigh|MTei; + break; + } + + return v; +} + +static void +mpparse(PCMP* pcmp) +{ + u32int lo; + u8int *e, *p; + int i, n, bustype; + Lapic *apic; + + p = pcmp->entries; + e = ((uchar*)pcmp)+l16get(pcmp->length); + while(p < e) switch(*p){ + default: + print("mpparse: unknown PCMP type %d (e-p %#ld)\n", *p, e-p); + for(i = 0; p < e; i++){ + if(i && ((i & 0x0f) == 0)) + print("\n"); + print(" %#2.2ux", *p); + p++; + } + print("\n"); + break; + case 0: /* processor */ + /* + * Initialise the APIC if it is enabled (p[3] & 0x01). + * p[1] is the APIC ID, the memory mapped address comes + * from the PCMP structure as the addess is local to the + * CPU and identical for all. Indicate whether this is + * the bootstrap processor (p[3] & 0x02). + */ + DBG("mpparse: APIC %d pa %#ux useable %d\n", + p[1], l32get(pcmp->apicpa), p[3] & 0x01); + if(p[3] & 0x01) + lapicinit(p[1], l32get(pcmp->apicpa), p[3] & 0x02); + p += 20; + break; + case 1: /* bus */ + DBG("mpparse: bus: %d type %6.6s\n", p[1], (char*)p+2); + if(mpbus[p[1]] != nil){ + print("mpparse: bus %d already allocated\n", p[1]); + p += 8; + break; + } + for(i = 0; i < nelem(mpbusdef); i++){ + if(memcmp(p+2, mpbusdef[i].type, 6) != 0) + continue; + if(memcmp(p+2, "ISA ", 6) == 0){ + if(mpisabusno != -1){ + print("mpparse: bus %d already have ISA bus %d\n", + p[1], mpisabusno); + continue; + } + mpisabusno = p[1]; + } + mpbus[p[1]] = &mpbusdef[i]; + break; + } + if(mpbus[p[1]] == nil) + print("mpparse: bus %d type %6.6s unknown\n", + p[1], (char*)p+2); + + p += 8; + break; + case 2: /* IOAPIC */ + /* + * Initialise the IOAPIC if it is enabled (p[3] & 0x01). + * p[1] is the APIC ID, p[4-7] is the memory mapped address. + */ + DBG("mpparse: IOAPIC %d pa %#ux useable %d\n", + p[1], l32get(p+4), p[3] & 0x01); + if(p[3] & 0x01) + ioapicinit(p[1], -1, l32get(p+4)); + + p += 8; + break; + case 3: /* IOINTR */ + /* + * p[1] is the interrupt type; + * p[2-3] contains the polarity and trigger mode; + * p[4] is the source bus; + * p[5] is the IRQ on the source bus; + * p[6] is the destination APIC; + * p[7] is the INITIN pin on the destination APIC. + */ + if(p[6] == 0xff){ + mpintrprint("routed to all IOAPICs", p); + p += 8; + break; + } + if((lo = mpmkintr(p)) == 0){ + p += 8; + break; + } + if(DBGFLG) + mpintrprint(nil, p); + + /* + * Always present the device number in the style + * of a PCI Interrupt Assignment Entry. For the ISA + * bus the IRQ is the device number but unencoded. + * May need to handle other buses here in the future + * (but unlikely). + */ + bustype = -1; + if(memcmp(mpbus[p[4]]->type, "PCI ", 6) == 0) + bustype = BusPCI; /* had devno = p[5]<<2 */ + else if(memcmp(mpbus[p[4]]->type, "ISA ", 6) == 0) + bustype = BusISA; + if(bustype != -1) + ioapicintrinit(bustype, p[4], p[6], p[7], p[5], lo); + + p += 8; + break; + case 4: /* LINTR */ + /* + * Format is the same as IOINTR above. + */ + if((lo = mpmkintr(p)) == 0){ + p += 8; + break; + } + if(DBGFLG) + mpintrprint(nil, p); + + /* + * Everything was checked in mpmkintr above. + */ + if(p[6] == 0xff){ + for(i = 0; i < Napic; i++){ + apic = lapiclookup(i); + if(apic != nil) + apic->lvt[p[7]] = lo; + } + } + else{ + apic = lapiclookup(p[6]); + if(apic != nil) + apic->lvt[p[7]] = lo; + } + p += 8; + break; + } + + /* + * There's nothing of real interest in the extended table, + * should just move along, but check it for consistency. + */ + p = e; + e = p + l16get(pcmp->xlength); + while(p < e) switch(*p){ + default: + n = p[1]; + print("mpparse: unknown extended entry %d length %d\n", *p, n); + for(i = 0; i < n; i++){ + if(i && ((i & 0x0f) == 0)) + print("\n"); + print(" %#2.2ux", *p); + p++; + } + print("\n"); + break; + case 128: + DBG("address space mapping\n"); + DBG(" bus %d type %d base %#llux length %#llux\n", + p[2], p[3], l64get(p+4), l64get(p+12)); + p += p[1]; + break; + case 129: + DBG("bus hierarchy descriptor\n"); + DBG(" bus %d sd %d parent bus %d\n", + p[2], p[3], p[4]); + p += p[1]; + break; + case 130: + DBG("compatibility bus address space modifier\n"); + DBG(" bus %d pr %d range list %d\n", + p[2], p[3], l32get(p+4)); + p += p[1]; + break; + } +} + +static int +sigchecksum(void* address, int length) +{ + u8int *p, sum; + + sum = 0; + for(p = address; length-- > 0; p++) + sum += *p; + + return sum; +} + +static void* +sigscan(u8int* address, int length, char* signature) +{ + u8int *e, *p; + int siglength; + + DBG("check for %s in system base memory @ %#p\n", signature, address); + + e = address+length; + siglength = strlen(signature); + for(p = address; p+siglength < e; p += 16){ + if(memcmp(p, signature, siglength)) + continue; + return p; + } + + return nil; +} + +static uintmem mptab[] = {0, 1024, 0x9fc00, 1024, 0xf0000, 0x10000}; + +static void* +sigsearch(char* signature) +{ + uintmem p; + int i; + u8int *bda; + void *r; + + /* + * Search for the data structure: + * 1) within the first KiB of the Extended BIOS Data Area (EBDA), or + * 2) within the last KiB of system base memory if the EBDA segment + * is undefined, or + * 3) within the BIOS ROM address space between 0xf0000 and 0xfffff + */ + + for(i = 0; i < nelem(mptab); i += 2) + if(r = sigscan(KADDR(mptab[i]), mptab[i+1], signature)) + return r; + bda = KADDR(0x400); + if((p = (bda[0x0F]<<8|bda[0x0E])<<4) != 0){ + if((r = sigscan(KADDR(p), 1024, signature)) != nil) + return r; + } + if((p = ((bda[0x14]<<8)|bda[0x13])*1024) != 0){ + if((r = sigscan(KADDR(p-1024), 1024, signature)) != nil) + return r; + } + return nil; +} + +void +mpsinit(void) +{ + u8int *p; + int i, n; + _MP_ *mp; + PCMP *pcmp; + + if((mp = sigsearch("_MP_")) == nil) + return; + if(DBGFLG){ + DBG("_MP_ @ %#p, addr %#ux length %ud rev %d", + mp, l32get(mp->addr), mp->length, mp->revision); + for(i = 0; i < sizeof(mp->feature); i++) + DBG(" %2.2#ux", mp->feature[i]); + DBG("\n"); + } + if(mp->revision != 1 && mp->revision != 4) + return; + if(sigchecksum(mp, mp->length*16) != 0) + return; + + if((pcmp = vmap(l32get(mp->addr), sizeof(PCMP))) == nil) + return; + if(pcmp->revision != 1 && pcmp->revision != 4){ + vunmap(pcmp, sizeof(PCMP)); + return; + } + n = l16get(pcmp->length) + l16get(pcmp->xlength); + vunmap(pcmp, sizeof(PCMP)); + if((pcmp = vmap(l32get(mp->addr), n)) == nil) + return; + if(sigchecksum(pcmp, l16get(pcmp->length)) != 0){ + vunmap(pcmp, n); + return; + } + if(DBGFLG){ + DBG("PCMP @ %#p length %#ux revision %d\n", + pcmp, l16get(pcmp->length), pcmp->revision); + DBG(" %20.20s oaddr %#ux olength %#ux\n", + (char*)pcmp->string, l32get(pcmp->oaddr), + l16get(pcmp->olength)); + DBG(" entry %d apicpa %#ux\n", + l16get(pcmp->entry), l32get(pcmp->apicpa)); + + DBG(" xlength %#ux xchecksum %#ux\n", + l16get(pcmp->xlength), pcmp->xchecksum); + } + if(pcmp->xchecksum != 0){ + p = ((u8int*)pcmp) + l16get(pcmp->length); + i = sigchecksum(p, l16get(pcmp->xlength)); + if(((i+pcmp->xchecksum) & 0xff) != 0){ + print("extended table checksums to %#ux\n", i); + vunmap(pcmp, n); + return; + } + } + + /* + * Parse the PCMP table and set up the datastructures + * for later interrupt enabling and application processor + * startup. + */ + mpparse(pcmp); + + lapicdump(); + iordtdump(); +} diff -Nru /sys/src/9k/k10/msi.c /sys/src/9k/k10/msi.c --- /sys/src/9k/k10/msi.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/msi.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,120 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "apic.h" + +enum { + Dpcicap = 1<<0, + Dmsicap = 1<<1, + Dvec = 1<<2, + Debug = 0, +}; + +enum { + /* address */ + Msiabase = 0xfee00000u, + Msiadest = 1<<12, /* same as 63:56 of apic vector */ + Msiaedest = 1<<4, /* same as 55:48 of apic vector */ + Msialowpri = 1<<3, /* redirection hint */ + Msialogical = 1<<2, + + /* data */ + Msidlevel = 1<<15, + Msidassert = 1<<14, + Msidlogical = 1<<11, + Msidmode = 1<<8, /* 3 bits; delivery mode */ + Msidvector = 0xff<<0, +}; + +enum{ + /* msi capabilities */ + Vmask = 1<<8, + Cap64 = 1<<7, + Mmesgmsk = 7<<4, + Mmcap = 7<<1, + Msienable = 1<<0, +}; + +static int +msicap(Pcidev *p) +{ + int c; + + c = pcicap(p, PciCapMSI); + if(c == -1) + return 0; + return c; +} + +static int +blacklist(Pcidev *p) +{ + switch(p->vid<<16 | p->did){ + case 0x11ab<<16 | 0x6485: + return -1; + } + return 0; +} + +int +pcimsienable(Pcidev *p, uvlong vec) +{ + char *s; + uint c, f, d, datao, lopri, dmode, logical; + + if(ioconf.nomsi) + return -1; + + c = msicap(p); + if(c == 0) + return -1; + + f = pcicfgr16(p, c + 2) & ~Mmesgmsk; + + if(blacklist(p) != 0) + return -1; + datao = 8; + d = vec>>48; + lopri = (vec & 0x700) == MTlp; + logical = (vec & Lm) != 0; + pcicfgw32(p, c + 4, Msiabase | Msiaedest * d + | Msialowpri * lopri | Msialogical * logical); + if(f & Cap64){ + datao += 4; + pcicfgw32(p, c + 8, 0); + } + dmode = (vec >> 8) & 7; + pcicfgw16(p, c + datao, Msidassert | Msidlogical * logical + | Msidmode * dmode | (uint)vec & 0xff); + if(f & Vmask) + pcicfgw32(p, c + datao + 4, 0); + + /* leave vectors configured but disabled for debugging */ + if((s = getconf("*nomsi")) != nil && atoi(s) != 0) + return -1; + + pcicfgw16(p, c + 2, f); + return 0; +} + +int +pcimsimask(Pcidev *p, int mask) +{ + uint c, f; + + c = msicap(p); + if(c == 0) + return -1; + f = pcicfgr16(p, c + 2) & ~Msienable; + if(mask){ + pcicfgw16(p, c + 2, f & ~Msienable); +// pciclrbme(p); cheeze + }else{ + pcisetbme(p); + pcicfgw16(p, c + 2, f | Msienable); + } + return 0; +} diff -Nru /sys/src/9k/k10/multiboot.c /sys/src/9k/k10/multiboot.c --- /sys/src/9k/k10/multiboot.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/multiboot.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,147 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +typedef struct Mbi Mbi; +struct Mbi { + u32int flags; + u32int memlower; + u32int memupper; + u32int bootdevice; + u32int cmdline; + u32int modscount; + u32int modsaddr; + u32int syms[4]; + u32int mmaplength; + u32int mmapaddr; + u32int driveslength; + u32int drivesaddr; + u32int configtable; + u32int bootloadername; + u32int apmtable; + u32int vbe[6]; +}; + +enum { /* flags */ + Fmem = 0x00000001, /* mem* valid */ + Fbootdevice = 0x00000002, /* bootdevice valid */ + Fcmdline = 0x00000004, /* cmdline valid */ + Fmods = 0x00000008, /* mod* valid */ + Fsyms = 0x00000010, /* syms[] has a.out info */ + Felf = 0x00000020, /* syms[] has ELF info */ + Fmmap = 0x00000040, /* mmap* valid */ + Fdrives = 0x00000080, /* drives* valid */ + Fconfigtable = 0x00000100, /* configtable* valid */ + Fbootloadername = 0x00000200, /* bootloadername* valid */ + Fapmtable = 0x00000400, /* apmtable* valid */ + Fvbe = 0x00000800, /* vbe[] valid */ +}; + +typedef struct Mod Mod; +struct Mod { + u32int modstart; + u32int modend; + u32int string; + u32int reserved; +}; + +typedef struct MMap MMap; +struct MMap { + u32int size; + u32int base[2]; + u32int length[2]; + u32int type; +}; + +int +multiboot(u32int magic, u32int pmbi, int vflag) +{ + char *p; + int i, n; + Mbi *mbi; + Mod *mod; + MMap *mmap; + u64int addr, len; + + if(vflag) + print("magic %#ux pmbi %#ux\n", magic, pmbi); + if(magic != 0x2badb002) + return -1; + + mbi = KADDR(pmbi); + if(vflag) + print("flags %#ux\n", mbi->flags); + if(mbi->flags & Fcmdline){ + p = KADDR(mbi->cmdline); + if(vflag) + print("cmdline <%s>\n", p); + else + optionsinit(p); + } + if(mbi->flags & Fmods){ + for(i = 0; i < mbi->modscount; i++){ + mod = KADDR(mbi->modsaddr + i*16); + if(mod->string != 0) + p = KADDR(mod->string); + else + p = ""; + if(vflag) + print("mod %#ux %#ux <%s>\n", + mod->modstart, mod->modend, p); + else + asmmodinit(mod->modstart, mod->modend, p); + } + } + if(mbi->flags & Fmmap){ + mmap = KADDR(mbi->mmapaddr); + n = 0; + while(n < mbi->mmaplength){ + addr = (((u64int)mmap->base[1])<<32)|mmap->base[0]; + len = (((u64int)mmap->length[1])<<32)|mmap->length[0]; + switch(mmap->type){ + default: + if(vflag) + print("type %ud", mmap->type); + break; + case 1: + if(vflag) + print("Memory"); + else + asmmapinit(addr, len, mmap->type); + break; + case 2: + if(vflag) + print("reserved"); + else + asmmapinit(addr, len, mmap->type); + break; + case 3: + if(vflag) + print("ACPI Reclaim Memory"); + else + asmmapinit(addr, len, mmap->type); + break; + case 4: + if(vflag) + print("ACPI NVS Memory"); + else + asmmapinit(addr, len, mmap->type); + break; + } + if(vflag) + print("\n\t%#16.16llux %#16.16llux (%llud)\n", + addr, addr+len, len); + + n += mmap->size+sizeof(mmap->size); + mmap = KADDR(mbi->mmapaddr+n); + } + } + if(vflag && (mbi->flags & Fbootloadername)){ + p = KADDR(mbi->bootloadername); + print("bootloadername <%s>\n", p); + } + + return 0; +} diff -Nru /sys/src/9k/k10/options.c /sys/src/9k/k10/options.c --- /sys/src/9k/k10/options.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/options.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,129 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +/* +Conf conf; +char *confname[1] = { + "console", +}; +char *confval[1] = { + "0 b115200", +}; +int nconf = nelem(confname); + */ + +/* + * Where configuration info is left for the loaded programme. + * This will turn into a structure as more is done by the boot loader + * (e.g. why parse the .ini file twice?). + * There are 3584 bytes available at CONFADDR. + */ +#define CONFADDR PTR2UINT(KADDR(0x0001200)) + +#define BOOTLINE ((char*)CONFADDR) +#define BOOTLINELEN 64 +#define BOOTARGS ((char*)(CONFADDR+BOOTLINELEN)) +#define BOOTARGSLEN (4096-0x200-BOOTLINELEN) +#define MAXCONF 64 + +char *confname[MAXCONF]; +char *confval[MAXCONF]; +int nconf; + +void +confoptions(void) +{ + long i, n; + char *cp, *line[MAXCONF], *p, *q; + + /* + * parse configuration args from dos file plan9.ini + */ + cp = BOOTARGS; /* where b.com leaves its config */ + cp[BOOTARGSLEN-1] = 0; + + /* + * Strip out '\r', change '\t' -> ' '. + */ + p = cp; + for(q = cp; *q; q++){ + if(*q == '\r') + continue; + if(*q == '\t') + *q = ' '; + *p++ = *q; + } + *p = 0; + + n = getfields(cp, line, MAXCONF, 1, "\n"); + for(i = 0; i < n; i++){ + if(*line[i] == '#') + continue; + cp = strchr(line[i], '='); + if(cp == nil) + continue; + *cp++ = '\0'; + confname[nconf] = line[i]; + confval[nconf] = cp; + nconf++; + } +} + +char* +getconf(char *name) +{ + int i; + + for(i = 0; i < nconf; i++) + if(cistrcmp(confname[i], name) == 0) + return confval[i]; + return 0; +} + +void +confsetenv(void) +{ + int i; + + for(i = 0; i < nconf; i++){ + if(confname[i][0] != '*') + ksetenv(confname[i], confval[i], 0); + ksetenv(confname[i], confval[i], 1); + } +} + +int +isaconfig(char *class, int ctlrno, ISAConf *isa) +{ + char cc[32], *p; + int i; + + snprint(cc, sizeof cc, "%s%d", class, ctlrno); + p = getconf(cc); + if(p == nil) + return 0; + + isa->type = ""; + isa->nopt = tokenize(p, isa->opt, NISAOPT); + for(i = 0; i < isa->nopt; i++){ + p = isa->opt[i]; + if(cistrncmp(p, "type=", 5) == 0) + isa->type = p + 5; + else if(cistrncmp(p, "port=", 5) == 0) + isa->port = strtoul(p+5, &p, 0); + else if(cistrncmp(p, "irq=", 4) == 0) + isa->irq = strtoul(p+4, &p, 0); + else if(cistrncmp(p, "dma=", 4) == 0) + isa->dma = strtoul(p+4, &p, 0); + else if(cistrncmp(p, "mem=", 4) == 0) + isa->mem = strtoul(p+4, &p, 0); + else if(cistrncmp(p, "size=", 5) == 0) + isa->size = strtoul(p+5, &p, 0); + else if(cistrncmp(p, "freq=", 5) == 0) + isa->freq = strtoul(p+5, &p, 0); + } + return 1; +} diff -Nru /sys/src/9k/k10/ptclbsumamd64.s /sys/src/9k/k10/ptclbsumamd64.s --- /sys/src/9k/k10/ptclbsumamd64.s Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/ptclbsumamd64.s Wed Dec 9 00:00:00 2015 @@ -0,0 +1,125 @@ +TEXT ptclbsum(SB), 1, $0 + MOVQ RARG, SI /* addr+0(FP) */ + MOVL len+8(FP), CX + + XORL AX, AX /* sum */ + + TESTL $1, SI /* byte aligned? */ + MOVQ SI, DI + JEQ _2align + + DECL CX + JLT _return + + MOVB 0x00(SI), AH + INCQ SI + +_2align: + TESTL $2, SI /* word aligned? */ + JEQ _32loop + + CMPL CX, $2 /* less than 2 bytes? */ + JLT _1dreg + SUBL $2, CX + + XORL BX, BX + MOVW 0x00(SI), BX + ADDL BX, AX + ADCL $0, AX + LEAQ 2(SI), SI + +_32loop: + CMPL CX, $0x20 + JLT _8loop + + MOVL CX, BP + SHRL $5, BP + ANDL $0x1F, CX + +_32loopx: + MOVL 0x00(SI), BX + MOVL 0x1C(SI), DX + ADCL BX, AX + MOVL 0x04(SI), BX + ADCL DX, AX + MOVL 0x10(SI), DX + ADCL BX, AX + MOVL 0x08(SI), BX + ADCL DX, AX + MOVL 0x14(SI), DX + ADCL BX, AX + MOVL 0x0C(SI), BX + ADCL DX, AX + MOVL 0x18(SI), DX + ADCL BX, AX + LEAQ 0x20(SI), SI + ADCL DX, AX + + DECL BP + JNE _32loopx + + ADCL $0, AX + +_8loop: + CMPL CX, $0x08 + JLT _2loop + + MOVL CX, BP + SHRL $3, BP + ANDL $0x07, CX + +_8loopx: + MOVL 0x00(SI), BX + ADCL BX, AX + MOVL 0x04(SI), DX + ADCL DX, AX + + LEAQ 0x08(SI), SI + DECL BP + JNE _8loopx + + ADCL $0, AX + +_2loop: + CMPL CX, $0x02 + JLT _1dreg + + MOVL CX, BP + SHRL $1, BP + ANDL $0x01, CX + +_2loopx: + MOVWLZX 0x00(SI), BX + ADCL BX, AX + + LEAQ 0x02(SI), SI + DECL BP + JNE _2loopx + + ADCL $0, AX + +_1dreg: + TESTL $1, CX /* 1 byte left? */ + JEQ _fold + + XORL BX, BX + MOVB 0x00(SI), BX + ADDL BX, AX + ADCL $0, AX + +_fold: + MOVL AX, BX + SHRL $16, BX + JEQ _swab + + ANDL $0xFFFF, AX + ADDL BX, AX + JMP _fold + +_swab: + TESTL $1, DI /* copy of original RARG */ + JNE _return + XCHGB AH, AL + +_return: + RET diff -Nru /sys/src/9k/k10/root/cpu.c /sys/src/9k/k10/root/cpu.c --- /sys/src/9k/k10/root/cpu.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/root/cpu.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1222 @@ +/* + * cpu.c - Make a connection to a cpu server + * + * Invoked by listen as 'cpu -R' + * by users as 'cpu [-h system] [-c cmd args ...]' + */ + +#include +#include +#include +#include +#include +#include + +#define Maxfdata 8192 +#define MaxStr 128 + +void remoteside(int); +void fatal(int, char*, ...); +void lclnoteproc(int); +void rmtnoteproc(void); +void catcher(void*, char*); +void usage(void); +void writestr(int, char*, char*, int); +int readstr(int, char*, int); +char *rexcall(int*, char*, char*); +int setamalg(char*); +char *keyspec = ""; + +int notechan; +int exportpid; +char *system; +int cflag; +int dbg; +char *user; +char *patternfile; +int Nflag; + +char *srvname = "ncpu"; +char *exportfs = "/bin/exportfs"; +char *ealgs = "rc4_256 sha1"; + +/* message size for exportfs; may be larger so we can do big graphics in CPU window */ +int msgsize = Maxfdata+IOHDRSZ; + +/* authentication mechanisms */ +static int netkeyauth(int); +static int netkeysrvauth(int, char*); +static int p9auth(int); +static int srvp9auth(int, char*); +static int noauth(int); +static int srvnoauth(int, char*); + +typedef struct AuthMethod AuthMethod; +struct AuthMethod { + char *name; /* name of method */ + int (*cf)(int); /* client side authentication */ + int (*sf)(int, char*); /* server side authentication */ +} authmethod[] = +{ + { "p9", p9auth, srvp9auth,}, + { "netkey", netkeyauth, netkeysrvauth,}, + { "none", noauth, srvnoauth,}, + { nil, nil} +}; +AuthMethod *am = authmethod; /* default is p9 */ + +char *p9authproto = "p9any"; + +int setam(char*); + +void +usage(void) +{ + fprint(2, "usage: cpu [-h system] [-u user] [-a authmethod] [-e 'crypt hash'] [-k keypattern] [-P patternfile] [-c cmd args ...]\n"); + exits("usage"); +} + +void +main(int argc, char **argv) +{ + char dat[MaxStr], buf[MaxStr], cmd[MaxStr], *p, *err; + int ac, fd, ms, data; + char *av[10]; + + /* see if we should use a larger message size */ + fd = open("/dev/draw", OREAD); + if(fd > 0){ + ms = iounit(fd); + if(msgsize < ms+IOHDRSZ) + msgsize = ms+IOHDRSZ; + close(fd); + } + + user = getuser(); + if(user == nil) + fatal(1, "can't read user name"); + ARGBEGIN{ + case 'a': + p = EARGF(usage()); + if(setam(p) < 0) + fatal(0, "unknown auth method %s", p); + break; + case 'e': + ealgs = EARGF(usage()); + if(*ealgs == 0 || strcmp(ealgs, "clear") == 0) + ealgs = nil; + break; + case 'd': + dbg++; + break; + case 'f': + /* ignored but accepted for compatibility */ + break; + case 'O': + p9authproto = "p9sk2"; + remoteside(1); /* From listen */ + break; + case 'R': /* From listen */ + remoteside(0); + break; + case 'h': + system = EARGF(usage()); + break; + case 'c': + cflag++; + cmd[0] = '!'; + cmd[1] = '\0'; + while(p = ARGF()) { + strcat(cmd, " "); + strcat(cmd, p); + } + break; + case 'k': + keyspec = smprint("%s %s", keyspec, EARGF(usage())); + break; + case 'P': + patternfile = EARGF(usage()); + break; + case 'u': + user = EARGF(usage()); + keyspec = smprint("%s user=%s", keyspec, user); + break; + case 'N': + Nflag = 1; + break; + default: + usage(); + }ARGEND; + + + if(argc != 0) + usage(); + + if(system == nil) { + p = getenv("cpu"); + if(p == 0) + fatal(0, "set $cpu"); + system = p; + } + + if(err = rexcall(&data, system, srvname)) + fatal(1, "%s: %s", err, system); + + /* Tell the remote side the command to execute and where our working directory is */ + if(cflag) + writestr(data, cmd, "command", 0); + if(getwd(dat, sizeof(dat)) == 0) + writestr(data, "NO", "dir", 0); + else + writestr(data, dat, "dir", 0); + + /* start up a process to pass along notes */ + lclnoteproc(data); + + /* + * Wait for the other end to execute and start our file service + * of /mnt/term + */ + if(readstr(data, buf, sizeof(buf)) < 0) + fatal(1, "waiting for FS: %r"); + if(strncmp("FS", buf, 2) != 0) { + print("remote cpu: %s", buf); + exits(buf); + } + + /* Begin serving the gnot namespace */ + close(0); + dup(data, 0); + close(data); + + sprint(buf, "%d", msgsize); + ac = 0; + av[ac++] = exportfs; + av[ac++] = "-m"; + av[ac++] = buf; + if(dbg) + av[ac++] = "-d"; + if(patternfile != nil){ + av[ac++] = "-P"; + av[ac++] = patternfile; + } + av[ac] = nil; + exec(exportfs, av); + fatal(1, "starting exportfs"); +} + +void +fatal(int syserr, char *fmt, ...) +{ + Fmt f; + char *str; + va_list arg; + + fmtstrinit(&f); + fmtprint(&f, "cpu: "); + va_start(arg, fmt); + fmtvprint(&f, fmt, arg); + va_end(arg); + if(syserr) + fmtprint(&f, ": %r"); + fmtprint(&f, "\n"); + str = fmtstrflush(&f); + write(2, str, strlen(str)); + exits(str); +} + +char *negstr = "negotiating authentication method"; + +char bug[256]; + +int +old9p(int fd) +{ + int p[2]; + + if(pipe(p) < 0) + fatal(1, "pipe"); + + switch(rfork(RFPROC|RFFDG|RFNAMEG)) { + case -1: + fatal(1, "rfork srvold9p"); + case 0: + if(fd != 1){ + dup(fd, 1); + close(fd); + } + if(p[0] != 0){ + dup(p[0], 0); + close(p[0]); + } + close(p[1]); + if(0){ + fd = open("/sys/log/cpu", OWRITE); + if(fd != 2){ + dup(fd, 2); + close(fd); + } + execl("/bin/srvold9p", "srvold9p", "-ds", nil); + } else + execl("/bin/srvold9p", "srvold9p", "-s", nil); + fatal(1, "exec srvold9p"); + default: + close(fd); + close(p[0]); + } + return p[1]; +} + +/* Invoked with stdin, stdout and stderr connected to the network connection */ +void +remoteside(int old) +{ + char user[MaxStr], home[MaxStr], buf[MaxStr], xdir[MaxStr], cmd[MaxStr]; + int i, n, fd, badchdir, gotcmd; + + rfork(RFENVG); + putenv("service", "cpu"); + fd = 0; + + /* negotiate authentication mechanism */ + n = readstr(fd, cmd, sizeof(cmd)); + if(n < 0) + fatal(1, "authenticating"); + if(setamalg(cmd) < 0 || (strcmp(am->name, "none") == 0 && !Nflag)){ + writestr(fd, "unsupported auth method", nil, 0); + fatal(1, "bad auth method %s", cmd); + } else + writestr(fd, "", "", 1); + + fd = (*am->sf)(fd, user); + if(fd < 0) + fatal(1, "srvauth"); + + /* Set environment values for the user */ + putenv("user", user); + sprint(home, "/usr/%s", user); + putenv("home", home); + + /* Now collect invoking cpu's current directory or possibly a command */ + gotcmd = 0; + if(readstr(fd, xdir, sizeof(xdir)) < 0) + fatal(1, "dir/cmd"); + if(xdir[0] == '!') { + strcpy(cmd, &xdir[1]); + gotcmd = 1; + if(readstr(fd, xdir, sizeof(xdir)) < 0) + fatal(1, "dir"); + } + + /* Establish the new process at the current working directory of the + * gnot */ + badchdir = 0; + if(strcmp(xdir, "NO") == 0) + chdir(home); + else if(chdir(xdir) < 0) { + badchdir = 1; + chdir(home); + } + + /* Start the gnot serving its namespace */ + writestr(fd, "FS", "FS", 0); + writestr(fd, "/", "exportfs dir", 0); + + n = read(fd, buf, sizeof(buf)); + if(n != 2 || buf[0] != 'O' || buf[1] != 'K') + exits("remote tree"); + + if(old) + fd = old9p(fd); + + /* make sure buffers are big by doing fversion explicitly; pick a huge number; other side will trim */ + strcpy(buf, VERSION9P); + if(fversion(fd, 64*1024, buf, sizeof buf) < 0) + exits("fversion failed"); + if(mount(fd, -1, "/mnt/term", MCREATE|MREPL, "") < 0) + exits("mount failed"); + + close(fd); + + /* the remote noteproc uses the mount so it must follow it */ + rmtnoteproc(); + + for(i = 0; i < 3; i++) + close(i); + + if(open("/mnt/term/dev/cons", OREAD) != 0) + exits("open stdin"); + if(open("/mnt/term/dev/cons", OWRITE) != 1) + exits("open stdout"); + dup(1, 2); + + if(badchdir) + print("cpu: failed to chdir to '%s'\n", xdir); + + if(gotcmd) + execl("/bin/rc", "rc", "-lc", cmd, nil); + else + execl("/bin/rc", "rc", "-li", nil); + fatal(1, "exec shell"); +} + +char* +rexcall(int *fd, char *host, char *service) +{ + char *na; + char dir[MaxStr]; + char err[ERRMAX]; + char msg[MaxStr]; + int n; + + na = netmkaddr(host, 0, service); + if((*fd = dial(na, 0, dir, 0)) < 0) + return "can't dial"; + + /* negotiate authentication mechanism */ + if(ealgs != nil) + snprint(msg, sizeof(msg), "%s %s", am->name, ealgs); + else + snprint(msg, sizeof(msg), "%s", am->name); + writestr(*fd, msg, negstr, 0); + n = readstr(*fd, err, sizeof err); + if(n < 0) + return negstr; + if(*err){ + werrstr(err); + return negstr; + } + + /* authenticate */ + *fd = (*am->cf)(*fd); + if(*fd < 0) + return "can't authenticate"; + return 0; +} + +void +writestr(int fd, char *str, char *thing, int ignore) +{ + int l, n; + + l = strlen(str); + n = write(fd, str, l+1); + if(!ignore && n < 0) + fatal(1, "writing network: %s", thing); +} + +int +readstr(int fd, char *str, int len) +{ + int n; + + while(len) { + n = read(fd, str, 1); + if(n < 0) + return -1; + if(*str == '\0') + return 0; + str++; + len--; + } + return -1; +} + +static int +readln(char *buf, int n) +{ + int i; + char *p; + + n--; /* room for \0 */ + p = buf; + for(i=0; ichal, "challenge", 1); + if(readstr(fd, response, sizeof response) < 0) + return -1; + ch->resp = response; + ch->nresp = strlen(response); + if((ai = auth_response(ch)) != nil) + break; + } + auth_freechal(ch); + if(ai == nil) + return -1; + writestr(fd, "", "challenge", 1); + if(auth_chuid(ai, 0) < 0) + fatal(1, "newns"); + auth_freeAI(ai); + return fd; +} + +static void +mksecret(char *t, uchar *f) +{ + sprint(t, "%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux", + f[0], f[1], f[2], f[3], f[4], f[5], f[6], f[7], f[8], f[9]); +} + +/* + * plan9 authentication followed by rc4 encryption + */ +static int +p9auth(int fd) +{ + uchar key[16]; + uchar digest[SHA1dlen]; + char fromclientsecret[21]; + char fromserversecret[21]; + int i; + AuthInfo *ai; + + ai = auth_proxy(fd, auth_getkey, "proto=%q role=client %s", p9authproto, keyspec); + if(ai == nil) + return -1; + memmove(key+4, ai->secret, ai->nsecret); + if(ealgs == nil) + return fd; + + /* exchange random numbers */ + srand(truerand()); + for(i = 0; i < 4; i++) + key[i] = rand(); + if(write(fd, key, 4) != 4) + return -1; + if(readn(fd, key+12, 4) != 4) + return -1; + + /* scramble into two secrets */ + sha1(key, sizeof(key), digest, nil); + mksecret(fromclientsecret, digest); + mksecret(fromserversecret, digest+10); + + /* set up encryption */ + i = pushssl(fd, ealgs, fromclientsecret, fromserversecret, nil); + if(i < 0) + werrstr("can't establish ssl connection: %r"); + return i; +} + +static char* +gethostowner(void) +{ + static char hostowner[64]; + int fd; + int n; + + if((fd = open("/dev/hostowner", OREAD)) < 0) + return "none"; + n = read(fd, hostowner, sizeof(hostowner)-1); + close(fd); + if(n <= 0) + strcpy(hostowner, "none"); + else + hostowner[n] = 0; + return hostowner; +} + +static int +chuid(char* to) +{ + int fd, r; + char *cap, *p; + uchar hash[SHA1dlen]; + + if((fd = open("#¤/caphash", OWRITE)) < 0){ + werrstr("open #¤/caphash: %r"); + return -1; + } + + cap = smprint("%s@Why can't we all just get along?", to); + p = strrchr(cap, '@'); + hmac_sha1((uchar*)cap, p-cap, (uchar*)p+1, strlen(p+1), hash, nil); + if(write(fd, hash, SHA1dlen) < 0){ + free(cap); + werrstr("write #¤/caphash: %r"); + return -1; + } + close(fd); + + if((fd = open("#¤/capuse", OWRITE)) < 0){ + free(cap); + werrstr("open #¤/capuse: %r"); + return -1; + } + r = write(fd, cap, strlen(cap)); + close(fd); + free(cap); + + if(r < 0) + werrstr("write #¤/capuse: %r"); + + return r; +} + +static int +noauth(int fd) +{ + char response[32]; + + ealgs = nil; + writestr(fd, user, "noauth user", 1); + if(readstr(fd, response, sizeof(response)) < 0 || response[0]){ + werrstr(response); + return -1; + } + return fd; +} + +static int +srvnoauth(int fd, char *user) +{ + int ufd; + + if(readstr(fd, user, MaxStr) < 0) + return -1; + if(strcmp(user, gethostowner()) == 0){ + writestr(fd, "permission denied: hostowner", "hostowner", 0); + return -1; + } + writestr(fd, "", "", 1); + ealgs = nil; + if(strcmp(user, "none") == 0){ + if((ufd = open("#c/user", OWRITE)) < 0) + return -1; + if(write(ufd, "none", 4) < 0) + return -1; + close(ufd); + } + else + chuid(user); + newns(user, nil); + return fd; +} + +void +loghex(uchar *p, int n) +{ + char buf[100]; + int i; + + for(i = 0; i < n; i++) + sprint(buf+2*i, "%2.2ux", p[i]); + syslog(0, "cpu", buf); +} + +static int +srvp9auth(int fd, char *user) +{ + uchar key[16]; + uchar digest[SHA1dlen]; + char fromclientsecret[21]; + char fromserversecret[21]; + int i; + AuthInfo *ai; + + ai = auth_proxy(0, nil, "proto=%q role=server %s", p9authproto, keyspec); + if(ai == nil) + return -1; + if(auth_chuid(ai, nil) < 0) + return -1; + strecpy(user, user+MaxStr, ai->cuid); + memmove(key+4, ai->secret, ai->nsecret); + + if(ealgs == nil) + return fd; + + /* exchange random numbers */ + srand(truerand()); + for(i = 0; i < 4; i++) + key[i+12] = rand(); + if(readn(fd, key, 4) != 4) + return -1; + if(write(fd, key+12, 4) != 4) + return -1; + + /* scramble into two secrets */ + sha1(key, sizeof(key), digest, nil); + mksecret(fromclientsecret, digest); + mksecret(fromserversecret, digest+10); + + /* set up encryption */ + i = pushssl(fd, ealgs, fromserversecret, fromclientsecret, nil); + if(i < 0) + werrstr("can't establish ssl connection: %r"); + return i; +} + +/* + * set authentication mechanism + */ +int +setam(char *name) +{ + for(am = authmethod; am->name != nil; am++) + if(strcmp(am->name, name) == 0) + return 0; + am = authmethod; + return -1; +} + +/* + * set authentication mechanism and encryption/hash algs + */ +int +setamalg(char *s) +{ + ealgs = strchr(s, ' '); + if(ealgs != nil) + *ealgs++ = 0; + return setam(s); +} + +char *rmtnotefile = "/mnt/term/dev/cpunote"; + +/* + * loop reading /mnt/term/dev/note looking for notes. + * The child returns to start the shell. + */ +void +rmtnoteproc(void) +{ + int n, fd, pid, notepid; + char buf[256]; + + /* new proc returns to start shell */ + pid = rfork(RFPROC|RFFDG|RFNOTEG|RFNAMEG|RFMEM); + switch(pid){ + case -1: + syslog(0, "cpu", "cpu -R: can't start noteproc: %r"); + return; + case 0: + return; + } + + /* new proc reads notes from other side and posts them to shell */ + switch(notepid = rfork(RFPROC|RFFDG|RFMEM)){ + case -1: + syslog(0, "cpu", "cpu -R: can't start wait proc: %r"); + _exits(0); + case 0: + fd = open(rmtnotefile, OREAD); + if(fd < 0){ + syslog(0, "cpu", "cpu -R: can't open %s", rmtnotefile); + _exits(0); + } + + for(;;){ + n = read(fd, buf, sizeof(buf)-1); + if(n <= 0){ + postnote(PNGROUP, pid, "hangup"); + _exits(0); + } + buf[n] = 0; + postnote(PNGROUP, pid, buf); + } + } + + /* original proc waits for shell proc to die and kills note proc */ + for(;;){ + n = waitpid(); + if(n < 0 || n == pid) + break; + } + postnote(PNPROC, notepid, "kill"); + _exits(0); +} + +enum +{ + Qdir, + Qcpunote, + + Nfid = 32, +}; + +struct { + char *name; + Qid qid; + ulong perm; +} fstab[] = +{ + [Qdir] { ".", {Qdir, 0, QTDIR}, DMDIR|0555 }, + [Qcpunote] { "cpunote", {Qcpunote, 0}, 0444 }, +}; + +typedef struct Note Note; +struct Note +{ + Note *next; + char msg[ERRMAX]; +}; + +typedef struct Request Request; +struct Request +{ + Request *next; + Fcall f; +}; + +typedef struct Fid Fid; +struct Fid +{ + int fid; + int file; + int omode; +}; +Fid fids[Nfid]; + +struct { + Lock; + Note *nfirst, *nlast; + Request *rfirst, *rlast; +} nfs; + +int +fsreply(int fd, Fcall *f) +{ + uchar buf[IOHDRSZ+Maxfdata]; + int n; + + if(dbg) + fprint(2, "notefs: <-%F\n", f); + n = convS2M(f, buf, sizeof buf); + if(n > 0){ + if(write(fd, buf, n) != n){ + close(fd); + return -1; + } + } + return 0; +} + +/* match a note read request with a note, reply to the request */ +int +kick(int fd) +{ + Request *rp; + Note *np; + int rv; + + for(;;){ + lock(&nfs); + rp = nfs.rfirst; + np = nfs.nfirst; + if(rp == nil || np == nil){ + unlock(&nfs); + break; + } + nfs.rfirst = rp->next; + nfs.nfirst = np->next; + unlock(&nfs); + + rp->f.type = Rread; + rp->f.count = strlen(np->msg); + rp->f.data = np->msg; + rv = fsreply(fd, &rp->f); + free(rp); + free(np); + if(rv < 0) + return -1; + } + return 0; +} + +void +flushreq(int tag) +{ + Request **l, *rp; + + lock(&nfs); + for(l = &nfs.rfirst; *l != nil; l = &(*l)->next){ + rp = *l; + if(rp->f.tag == tag){ + *l = rp->next; + unlock(&nfs); + free(rp); + return; + } + } + unlock(&nfs); +} + +Fid* +getfid(int fid) +{ + int i, freefid; + + freefid = -1; + for(i = 0; i < Nfid; i++){ + if(freefid < 0 && fids[i].file < 0) + freefid = i; + if(fids[i].fid == fid) + return &fids[i]; + } + if(freefid >= 0){ + fids[freefid].fid = fid; + return &fids[freefid]; + } + return nil; +} + +int +fsstat(int fd, Fid *fid, Fcall *f) +{ + Dir d; + uchar statbuf[256]; + + memset(&d, 0, sizeof(d)); + d.name = fstab[fid->file].name; + d.uid = user; + d.gid = user; + d.muid = user; + d.qid = fstab[fid->file].qid; + d.mode = fstab[fid->file].perm; + d.atime = d.mtime = time(0); + f->stat = statbuf; + f->nstat = convD2M(&d, statbuf, sizeof statbuf); + return fsreply(fd, f); +} + +int +fsread(int fd, Fid *fid, Fcall *f) +{ + Dir d; + uchar buf[256]; + Request *rp; + + switch(fid->file){ + default: + return -1; + case Qdir: + if(f->offset == 0 && f->count >0){ + memset(&d, 0, sizeof(d)); + d.name = fstab[Qcpunote].name; + d.uid = user; + d.gid = user; + d.muid = user; + d.qid = fstab[Qcpunote].qid; + d.mode = fstab[Qcpunote].perm; + d.atime = d.mtime = time(0); + f->count = convD2M(&d, buf, sizeof buf); + f->data = (char*)buf; + } else + f->count = 0; + return fsreply(fd, f); + case Qcpunote: + rp = mallocz(sizeof(*rp), 1); + if(rp == nil) + return -1; + rp->f = *f; + lock(&nfs); + if(nfs.rfirst == nil) + nfs.rfirst = rp; + else + nfs.rlast->next = rp; + nfs.rlast = rp; + unlock(&nfs); + return kick(fd);; + } +} + +char Eperm[] = "permission denied"; +char Enofile[] = "out of files"; +char Enotdir[] = "not a directory"; + +void +notefs(int fd) +{ + uchar buf[IOHDRSZ+Maxfdata]; + int i, n, ncpunote; + Fcall f; + Qid wqid[MAXWELEM]; + Fid *fid, *nfid; + int doreply; + + rfork(RFNOTEG); + fmtinstall('F', fcallfmt); + + for(n = 0; n < Nfid; n++){ + fids[n].file = -1; + fids[n].omode = -1; + } + + ncpunote = 0; + for(;;){ + n = read9pmsg(fd, buf, sizeof(buf)); + if(n <= 0){ + if(dbg) + fprint(2, "read9pmsg(%d) returns %d: %r\n", fd, n); + break; + } + if(convM2S(buf, n, &f) <= BIT16SZ) + break; + if(dbg) + fprint(2, "notefs: ->%F\n", &f); + doreply = 1; + fid = getfid(f.fid); + if(fid == nil){ +nofids: + f.type = Rerror; + f.ename = Enofile; + fsreply(fd, &f); + continue; + } + switch(f.type++){ + default: + f.type = Rerror; + f.ename = "unknown type"; + break; + case Tflush: + flushreq(f.oldtag); + break; + case Tversion: + if(f.msize > IOHDRSZ+Maxfdata) + f.msize = IOHDRSZ+Maxfdata; + break; + case Tauth: + f.type = Rerror; + f.ename = "authentication not required"; + break; + case Tattach: + f.qid = fstab[Qdir].qid; + fid->file = Qdir; + break; + case Twalk: + nfid = nil; + if(f.newfid != f.fid){ + nfid = getfid(f.newfid); + if(nfid == nil) + goto nofids; + nfid->file = fid->file; + fid = nfid; + } + for(i=0; ifile != Qdir){ + f.type = Rerror; + f.ename = Enotdir; + break; + } + if(strcmp(f.wname[i], "..") == 0){ + wqid[i] = fstab[Qdir].qid; + continue; + } + if(strcmp(f.wname[i], "cpunote") != 0){ + if(i == 0){ + f.type = Rerror; + f.ename = "file does not exist"; + } + break; + } + fid->file = Qcpunote; + wqid[i] = fstab[Qcpunote].qid; + } + if(nfid != nil && (f.type == Rerror || i < f.nwname)) + nfid ->file = -1; + if(f.type != Rerror){ + f.nwqid = i; + for(i=0; iomode = f.mode; + if(fid->file == Qcpunote) + ncpunote++; + f.qid = fstab[fid->file].qid; + f.iounit = 0; + break; + case Tread: + if(fsread(fd, fid, &f) < 0) + goto err; + doreply = 0; + break; + case Tclunk: + if(fid->omode != -1 && fid->file == Qcpunote){ + ncpunote--; + if(ncpunote == 0) /* remote side is done */ + goto err; + } + fid->file = -1; + fid->omode = -1; + break; + case Tstat: + if(fsstat(fd, fid, &f) < 0) + goto err; + doreply = 0; + break; + case Tcreate: + case Twrite: + case Tremove: + case Twstat: + f.type = Rerror; + f.ename = Eperm; + break; + } + if(doreply) + if(fsreply(fd, &f) < 0) + break; + } +err: + if(dbg) + fprint(2, "notefs exiting: %r\n"); + werrstr("success"); + postnote(PNGROUP, exportpid, "kill"); + if(dbg) + fprint(2, "postnote PNGROUP %d: %r\n", exportpid); + close(fd); +} + +char notebuf[ERRMAX]; + +void +catcher(void*, char *text) +{ + int n; + + n = strlen(text); + if(n >= sizeof(notebuf)) + n = sizeof(notebuf)-1; + memmove(notebuf, text, n); + notebuf[n] = '\0'; + noted(NCONT); +} + +/* + * mount in /dev a note file for the remote side to read. + */ +void +lclnoteproc(int netfd) +{ + Waitmsg *w; + Note *np; + int pfd[2]; + int pid; + + if(pipe(pfd) < 0){ + fprint(2, "cpu: can't start note proc: pipe: %r\n"); + return; + } + + /* new proc mounts and returns to start exportfs */ + switch(pid = rfork(RFPROC|RFNAMEG|RFFDG|RFMEM)){ + default: + exportpid = pid; + break; + case -1: + fprint(2, "cpu: can't start note proc: rfork: %r\n"); + return; + case 0: + close(pfd[0]); + if(mount(pfd[1], -1, "/dev", MBEFORE, "") < 0) + fprint(2, "cpu: can't mount note proc: %r\n"); + close(pfd[1]); + return; + } + + close(netfd); + close(pfd[1]); + + /* new proc listens for note file system rpc's */ + switch(rfork(RFPROC|RFNAMEG|RFMEM)){ + case -1: + fprint(2, "cpu: can't start note proc: rfork1: %r\n"); + _exits(0); + case 0: + notefs(pfd[0]); + _exits(0); + } + + /* original proc waits for notes */ + notify(catcher); + w = nil; + for(;;) { + *notebuf = 0; + free(w); + w = wait(); + if(w == nil) { + if(*notebuf == 0) + break; + np = mallocz(sizeof(Note), 1); + if(np != nil){ + strcpy(np->msg, notebuf); + lock(&nfs); + if(nfs.nfirst == nil) + nfs.nfirst = np; + else + nfs.nlast->next = np; + nfs.nlast = np; + unlock(&nfs); + kick(pfd[0]); + } + unlock(&nfs); + } else if(w->pid == exportpid) + break; + } + + if(w == nil) + exits(nil); + exits(0); +/* exits(w->msg); */ +} diff -Nru /sys/src/9k/k10/root/k8root.namespace /sys/src/9k/k10/root/k8root.namespace --- /sys/src/9k/k10/root/k8root.namespace Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/root/k8root.namespace Wed Dec 9 00:00:00 2015 @@ -0,0 +1,29 @@ +# root +mount -a $rootsrv /root $rootspec +bind -a /root / +bind -c /root/mnt /mnt + +# kernel devices +bind #c /dev +bind #d /fd +bind -c #e /env +bind #p /proc +bind -c #s /srv + +# mount points + +# authentication +mount -a /srv/factotum /mnt + +# standard bin +bind /root/$cputype/bin /bin +bind -a /root/rc/bin /bin + +# networks +bind -a #I /net +bind -a #l0 /net +#mount -a /srv/cs /net +#mount -a /srv/dns /net + +bind -c /usr/$user/tmp /tmp +cd /usr/$user diff -Nru /sys/src/9k/k10/root/k8root.proto /sys/src/9k/k10/root/k8root.proto --- /sys/src/9k/k10/root/k8root.proto Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/root/k8root.proto Wed Dec 9 00:00:00 2015 @@ -0,0 +1,79 @@ +adm d775 sys sys /tmp/empty + nvram 400 sys sys ./root/nvram + timezone d775 sys sys /tmp/empty +lib d775 sys sys /tmp/empty + namespace 664 sys sys ./root/k8root.namespace + profile 664 sys sys ../root/profile +mnt d775 sys sys /tmp/empty + cons d775 sys sys /tmp/empty + cons d775 sys sys /tmp/empty + consctl d775 sys sys /tmp/empty + exportfs d775 sys sys /tmp/empty + 0 d775 sys sys /tmp/empty + 1 d775 sys sys /tmp/empty + 2 d775 sys sys /tmp/empty + 3 d775 sys sys /tmp/empty + 4 d775 sys sys /tmp/empty + 5 d775 sys sys /tmp/empty + 6 d775 sys sys /tmp/empty + 7 d775 sys sys /tmp/empty + 8 d775 sys sys /tmp/empty + 9 d775 sys sys /tmp/empty + 10 d775 sys sys /tmp/empty + 11 d775 sys sys /tmp/empty + 12 d775 sys sys /tmp/empty + 13 d775 sys sys /tmp/empty + 14 d775 sys sys /tmp/empty + 15 d775 sys sys /tmp/empty + 16 d775 sys sys /tmp/empty + keys d775 sys sys /tmp/empty + temp d775 sys sys /tmp/empty + term d775 sys sys /tmp/empty +n d775 sys sys /tmp/empty + dump d775 sys sys /tmp/empty + fs d775 sys sys /tmp/empty + io d775 sys sys /tmp/empty + mnt d775 sys sys /tmp/empty +rc d775 sys sys /tmp/empty + bin d775 sys sys /tmp/empty + service d775 sys sys /tmp/empty + tcp23 775 sys sys ../root/tcp23 + tcp17007 775 sys sys /bin/service/tcp17007 + tcp17010 775 sys sys ./root/tcp17010 + lib d775 sys sys /tmp/empty + rcmain 775 sys sys +tmp d775 sys sys /tmp/empty +usr d775 sys sys /tmp/empty + glenda d775 sys sys /tmp/empty + lib d775 sys sys /tmp/empty + profile 664 sys sys ../root/profile + +amd64 d775 sys sys /tmp/empty + bin d775 sys sys /tmp/empty + # used by k8cpu.rc to get off the ground + bind 775 sys sys + echo 775 sys sys + rc 775 sys sys + sed 775 sys sys + srv 775 sys sys + telnet 775 sys sys + test 775 sys sys + + # used to provide standalone service + auth d775 sys sys /tmp/empty + factotum 775 sys sys + aux d775 sys sys /tmp/empty + listen 775 sys sys + ip d775 sys sys /tmp/empty + telnetd 775 sys sys + ndb d775 sys sys /tmp/empty + cs 775 sys sys + + cpu 775 sys sys ./root/6.cpu + exportfs 775 sys sys + + # used for debugging + + include d775 sys sys /tmp/empty + lib d775 sys sys /tmp/empty + mkfile 775 sys sys diff -Nru /sys/src/9k/k10/root/k8root.rc /sys/src/9k/k10/root/k8root.rc --- /sys/src/9k/k10/root/k8root.rc Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/root/k8root.rc Wed Dec 9 00:00:00 2015 @@ -0,0 +1,114 @@ +#!/boot/rc -m /boot/rcmain +flag x + + +cputype=amd64 +objtype=$cputype +service=cpu +authid=bootes +rootdir=/root +rootspec='' +rootsrv=boot + +beetroot=k8root.rr +authentication='nvram=/boot/adm/nvram auth/factotum -sfactotum -S' # -a ... +# test xyzip=(0 0 0 104.9.33) +# test fsaddr='tcp!135.$xyzip(4)^!564' +ip=(135.104.9.32 255.255.255.0 135.104.9.0 135.104.9.1) + +# +# Post the read-only filesystem in #s/$beetroot +# and mount it on /boot so the commands in /boot/$cputype/bin +# are available to create the namespace (namespaces like to +# mount #s/boot on / and that should not be the read-only +# filesystem). +# Must set hostowner to be that of the owner of the nvram file +# before paqfs starts otherwise factotum will not be able to +# open it. +# +/boot/echo -n sys > '#c/hostowner' +/boot/paqfs -p -S $beetroot -m /boot -q /boot/$beetroot +cd /boot/$cputype/bin +bind '#c' /dev +bind '#d' /fd +bind -c '#e' /env +bind '#p' /proc +bind -c '#s' /srv + +# +# Configure the networks. +# +bind -a '#I' /net +bind -a '#l0' /net + +if(~ $#ip 4 && ! ~ $ip(1) '10.-1.-1.-1'){ + i=`{sed '' /net/ipifc/clone} + echo bind ether /net/ether0 > /net/ipifc/$i/ctl + echo add $ip(1) $ip(2) $ip(3) > /net/ipifc/$i/ctl + echo add 0 0 $ip(4) >>/net/iproute + echo I am $ip(1)^, default route $ip(4) +} +i=`{sed '' /net/ipifc/clone} +echo bind loopback /dev/null > /net/ipifc/$i/ctl +echo add 127.0.0.1 255.0.0.0 127.0.0.0 > /net/ipifc/$i/ctl + +# +# Set up authentication if necessary. +# Factotum has to be allowed to mount on /mnt here because +# auth_proxy (called by mount) will look for it there. +# Normally, factotum will set '#c/hostowner'; if not, do it +# by hand. +# +if(! ~ $authentication '') + eval `{echo $authentication} +if(~ `{sed '' '#c/hostowner'} sys) + echo -n $authid > '#c/hostowner' + +# +# Attach to the remote filesystem and mount it. +# If this fails, set $root(dir|srv) and continue, +# there's enough in the read-only filesystem to run +# listen and telnet; at least cat /dev/kmesg might +# then give a clue as to the problem. +# Must check for the presence of expected files after +# the mount because srv/mount do not always return +# proper status. +# $rootsrv is used in /lib/namespace because the +# root might not be served from the usual #s/boot. +# +if(! ~ $fsaddr '' && ! eval srv -c -m $fsaddr $rootsrv $rootdir) + echo srv -c -m $fsaddr $rootsrv $rootdir fails: $status +if(! test -d $rootdir/$cputype){ + rootdir=/boot + rootspec='' + rootsrv=$beetroot +} +rootsrv='#s/'$rootsrv +echo root is on $rootdir, root is served from $rootsrv + +# +# Finish the namespace setup. +# +bind -a $rootdir / +bind -c -b $rootdir/mnt /mnt +bind $rootdir/$cputype/bin /bin +bind -a $rootdir/rc/bin /bin +cd / + +# +# Finish environment setup and start services. +# Listen is run trusted if there is no factotum running, +# as 'cpu -R' with no authentication needs to be able to +# open '#¤/caphash' in order to change the owner and +# that can only be done if running as '#c/hostowner'. +# +sysname=cpu-$ip(1) +prompt=($sysname'# ' ' ') +bind /boot/rc/bin/service /bin/service +if(test -d /mnt/factotum) + aux/listen -q tcp +if not + aux/listen -t /bin/service tcp + +flag x - +while(echo Hello Squidboy) + . -i '#d/0' diff -Nru /sys/src/9k/k10/root/mkfile /sys/src/9k/k10/root/mkfile --- /sys/src/9k/k10/root/mkfile Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/root/mkfile Wed Dec 9 00:00:00 2015 @@ -0,0 +1,11 @@ +objtype=amd64 +ticks + +enum { + NCtlr = 4, + NCtlrdrv = 32, + NDrive = NCtlr*NCtlrdrv, + + Fahdrs = 4, + + Read = 0, + Write, + + Eesb = 1<<0, /* must have (Eesb & Emtype) == 0 */ + + /* pci space configuration */ + Pmap = 0x90, + Ppcs = 0x91, + + Nms = 256, + Mphywait = 2*1024/Nms - 1, + Midwait = 16*1024/Nms - 1, + Mcomrwait = 64*1024/Nms - 1, +}; + +enum { + Tesb, + Tsb600, + Tjmicron, + Tahci, + Tlast, +}; + +typedef struct Ctlrtype Ctlrtype; +typedef struct Ctlr Ctlr; +typedef struct Drive Drive; + +struct Ctlrtype { + uint type; + uint maxdmaxfr; + uint flags; + char *name; +}; + +Ctlrtype cttab[Tlast] = { +[Tesb] Tesb, 8192, 0, "63xxesb", +[Tsb600] Tsb600, 256, 0, "sb600", +[Tjmicron] Tjmicron, 8192, 0, "jmicron", +[Tahci] Tahci, 8192, 0, "ahci", +}; + +enum { + Dnull = 0, + Dmissing = 1<<0, + Dnew = 1<<1, + Dready = 1<<2, + Derror = 1<<3, + Dreset = 1<<4, + Doffline = 1<<5, + Dportreset = 1<<6, + Dlast = 8, +}; + +static char *diskstates[Dlast] = { + "null", + "missing", + "new", + "ready", + "error", + "reset", + "offline", + "portreset", +}; + +extern SDifc sdahciifc; + +enum { + DMautoneg, + DMsatai, + DMsataii, + DMsataiii, + DMlast, +}; + +static char *modes[DMlast] = { + "auto", + "satai", + "sataii", + "sataiii", +}; + +typedef struct Htab Htab; +struct Htab { + uint bit; + char *name; +}; + +struct Drive { + Lock; + + Ctlr *ctlr; + SDunit *unit; + char name[10]; + Aport *port; + Aportm portm; + Aportc portc; /* redundant ptr to port and portm. */ + Ledport; + + ulong totick; + ulong lastseen; + uint wait; + uchar mode; + uchar state; + + /* + * ahci allows non-sequential ports. + * to avoid this hassle, we let + * driveno ctlr*NCtlrdrv + unit + * portno nth available port + */ + uint driveno; + uint portno; +}; + +struct Ctlr { + Lock; + + Ctlrtype *type; + int enabled; + SDev *sdev; + Pcidev *pci; + + uchar *mmio; + u32int *lmmio; + Ahba *hba; + Aenc; + uint enctype; + + Drive rawdrive[NCtlrdrv]; + Drive* drive[NCtlrdrv]; + int ndrive; + uint pi; +}; + +static Ctlr iactlr[NCtlr]; +static SDev sdevs[NCtlr]; +static int niactlr; +static ushort olds[NCtlr*NCtlrdrv]; + +static Drive *iadrive[NDrive]; +static int niadrive; + +static int debug; +static int prid = 1; +static int datapi; +static int dled; + +static char stab[] = { +[0] 'i', 'm', +[8] 't', 'c', 'p', 'e', +[16] 'N', 'I', 'W', 'B', 'D', 'C', 'H', 'S', 'T', 'F', 'X' +}; + +static void +serrstr(u32int r, char *s, char *e) +{ + int i; + + e -= 3; + for(i = 0; i < nelem(stab) && s < e; i++) + if(r & (1<task, p->cmd, p->ci, p->isr); +} + +static void +esleep(int ms) +{ + if(waserror()) + return; + tsleep(&up->sleep, return0, 0, ms); + poperror(); +} + +typedef struct { + Aport *p; + int i; +} Asleep; + +static int +ahciclear(void *v) +{ + Asleep *s; + + s = v; + return (s->p->ci & s->i) == 0; +} + +static void +aesleep(Aportm *m, Asleep *a, int ms) +{ + if(waserror()) + return; + tsleep(m, ahciclear, a, ms); + poperror(); +} + +static int +ahciwait(Aportc *c, int ms) +{ + Aport *p; + Asleep as; + + p = c->p; + p->ci = 1; + as.p = p; + as.i = 1; + aesleep(c->m, &as, ms); + if((p->task & 1) == 0 && p->ci == 0) + return 0; + dreg("ahciwait fail/timeout ", c->p); + return -1; +} + +static void +mkalist(Aportm *m, uint flags, uchar *data, int len) +{ + Actab *t; + Alist *l; + Aprdt *p; + + t = m->ctab; + l = m->list; + l->flags = flags | 0x5; + l->len = 0; + l->ctab = Pciwaddrl(t); + l->ctabhi = Pciwaddrh(t); + if(data){ + l->flags |= 1<<16; + p = &t->prdt; + p->dba = Pciwaddrl(data); + p->dbahi = Pciwaddrh(data); + p->count = 1<<31 | len - 2 | 1; + } +} + +static int +settxmode(Aportc *pc, uchar f) +{ + uchar *c; + + c = pc->m->ctab->cfis; + if(txmodefis(pc->m, c, f) == -1) + return 0; + mkalist(pc->m, Lwrite, 0, 0); + return ahciwait(pc, 3*1000); +} + +static void +asleep(int ms) +{ + if(up == nil) + delay(ms); + else + esleep(ms); +} + +static int +ahciportreset(Aportc *c, uint mode) +{ + int i; + u32int *cmd; + Aport *p; + + p = c->p; + cmd = &p->cmd; + *cmd &= ~(Afre|Ast); + for(i = 0; i < 500; i += 25){ + if((*cmd & Acr) == 0) + break; + asleep(25); + } + p->sctl = 3*Aipm | 0*Aspd | Adet; + delay(1); + p->sctl = 3*Aipm | mode*Aspd; + return 0; +} + +static int +ahciquiet(Aport *a) +{ + int i; + u32int *p; + + p = &a->cmd; + *p &= ~Ast; + for(i = 0; i < 500; i += 50){ + if((*p & Acr) == 0) + goto stop; + asleep(50); + } + return -1; +stop: + if((a->task & (ASdrq|ASbsy)) == 0){ + *p |= Ast; + return 0; + } + + *p |= Aclo; + for(i = 0; i < 500; i += 50){ + if((*p & Aclo) == 0) + goto stop1; + asleep(50); + } + return -1; +stop1: + /* extra check */ + dprint("ahci: clo clear %ux\n", a->task); + if(a->task & ASbsy) + return -1; + *p |= Afre | Ast; + return 0; +} + +static int +ahcicomreset(Aportc *pc) +{ + uchar *c; + + dreg("comreset ", pc->p); + if(ahciquiet(pc->p) == -1){ + dprint("ahci: ahciquiet failed\n"); + return -1; + } + dreg("comreset ", pc->p); + + c = pc->m->ctab->cfis; + nopfis(pc->m, c, 1); + mkalist(pc->m, Lclear | Lreset, 0, 0); + if(ahciwait(pc, 500) == -1){ + dprint("ahci: comreset1 failed\n"); + return -1; + } + microdelay(250); + dreg("comreset ", pc->p); + + nopfis(pc->m, c, 0); + mkalist(pc->m, Lwrite, 0, 0); + if(ahciwait(pc, 150) == -1){ + dprint("ahci: comreset2 failed\n"); + return -1; + } + dreg("comreset ", pc->p); + return 0; +} + +static int +ahciidle(Aport *port) +{ + int i, r; + u32int *p; + + p = &port->cmd; + if((*p & Arun) == 0) + return 0; + *p &= ~Ast; + r = 0; + for(i = 0; i < 500; i += 25){ + if((*p & Acr) == 0) + goto stop; + asleep(25); + } + r = -1; +stop: + if((*p & Afre) == 0) + return r; + *p &= ~Afre; + for(i = 0; i < 500; i += 25){ + if((*p & Afre) == 0) + return 0; + asleep(25); + } + return -1; +} + +/* + * §6.2.2.1 first part; comreset handled by reset disk. + * - remainder is handled by configdisk. + * - ahcirecover is a quick recovery from a failed command. + */ +static int +ahciswreset(Aportc *pc) +{ + int i; + + i = ahciidle(pc->p); + pc->p->cmd |= Afre; + if(i == -1) + return -1; + if(pc->p->task & (ASdrq|ASbsy)) + return -1; + return 0; +} + +static int +ahcirecover(Aportc *pc) +{ + ahciswreset(pc); + pc->p->cmd |= Ast; + if(settxmode(pc, pc->m->udma) == -1) + return -1; + return 0; +} + +static void +setupfis(Afis *f) +{ + f->base = mallocalign(0x100, 0x100, 0, 0); + f->d = f->base + 0; + f->p = f->base + 0x20; + f->r = f->base + 0x40; + f->u = f->base + 0x60; + f->devicebits = (u32int*)(f->base + 0x58); +} + +static void +ahciwakeup(Aportc *c, uint mode) +{ + ushort s; + + s = c->p->sstatus; + if((s & Isleepy) == 0) + return; + if((s & Smask) != Spresent){ + print("ahci: slumbering drive missing %.3ux\n", s); + return; + } + ahciportreset(c, mode); +// iprint("ahci: wake %.3ux -> %.3lux\n", s, c->p->sstatus); +} + +static int +ahciconfigdrive(Ahba *h, Aportc *c, int mode) +{ + Aportm *m; + Aport *p; + + p = c->p; + m = c->m; + + if(m->list == 0){ + setupfis(&m->fis); + m->list = mallocalign(sizeof *m->list, 1024, 0, 0); + m->ctab = mallocalign(sizeof *m->ctab, 128, 0, 0); + } + + p->list = Pciwaddrl(m->list); + p->listhi = Pciwaddrh(m->list); + p->fis = Pciwaddrl(m->fis.base); + p->fishi = Pciwaddrh(m->fis.base); + + p->cmd |= Afre; + + if((p->sstatus & Sbist) == 0 && (p->cmd & Apwr) != Apwr) + if((p->sstatus & Sphylink) == 0 && h->cap & Hss){ + dprint("ahci: spin up ... [%.3ux]\n", p->sstatus); + p->cmd |= Apwr; + for(int i = 0; i < 1400; i += 50){ + if(p->sstatus & (Sphylink | Sbist)) + break; + asleep(50); + } + } + + p->serror = SerrAll; + + if((p->sstatus & SSmask) == (Isleepy | Spresent)) + ahciwakeup(c, mode); + /* disable power managment sequence from book. */ + p->sctl = 3*Aipm | mode*Aspd | 0*Adet; + p->cmd &= ~Aalpe; + + p->cmd |= Ast; + p->ie = IEM; + + return 0; +} + +static void +setstate(Drive *d, int state) +{ + ilock(d); + d->state = state; + iunlock(d); +} + +static void +ahcienable(Ahba *h) +{ + h->ghc |= Hie; +} + +static void +ahcidisable(Ahba *h) +{ + h->ghc &= ~Hie; +} + +static int +countbits(u32int u) +{ + int i, n; + + n = 0; + for(i = 0; i < 32; i++) + if(u & (1<hba = (Ahba*)c->mmio; + u = h->cap; + if((u & Ham) == 0) + h->ghc |= Hae; + return countbits(h->pi); +} + +static int +ahcihbareset(Ahba *h) +{ + int wait; + + h->ghc |= Hhr; + for(wait = 0; wait < 1000; wait += 100){ + if(h->ghc == 0) + return 0; + delay(100); + } + return -1; +} + +/* under development */ +static int +ahcibioshandoff(Ahba *h) +{ + int i, wait; + + if((h->cap2 & Boh) == 0) + return 0; + if((h->bios & Bos) == 0) + return 0; + + print("ahcibioshandoff: claim\n"); + h->bios |= Oos; + + wait = 25; + for(i = 0; i < wait; i++){ + delay(1); + if((h->bios & Bos) == 0) + break; + if(i < 25 && h->bios & Bb){ + print("ahcibioshandoff: busy\n"); + wait = 2000; + } + } + if(i == wait){ + print("ahcibioshandoff: timeout %.1ux\n", h->bios); + h->bios = Oos; + } + return 0; +} + +static char* +dstate(uint s) +{ + int i; + + for(i = 0; s; i++) + s >>= 1; + return diskstates[i]; +} + +static char* +tnam(Ctlr *c) +{ + return c->type->name; +} + +static char* +dnam(Drive *d) +{ + char *s; + + s = d->name; + if(d->unit && d->unit->name) + s = d->unit->name; + return s; +} + +static void +clearci(Aport *p) +{ + if(p->cmd & Ast){ + p->cmd &= ~Ast; + p->cmd |= Ast; + } +} + +static int +intel(Ctlr *c) +{ + return c->pci->vid == 0x8086; +} + +static int +ignoreahdrs(Drive *d) +{ + return d->portm.feat & Datapi && d->ctlr->type->type == Tsb600; +} + +static void +updatedrive(Drive *d) +{ + u32int f, cause, serr, s0, pr, ewake; + Aport *p; + static u32int last; + + pr = 1; + ewake = 0; + f = 0; + p = d->port; + cause = p->isr; + if(d->ctlr->type->type == Tjmicron) + cause &= ~Aifs; + serr = p->serror; + p->isr = cause; + + if(p->ci == 0){ + f |= Fdone; + pr = 0; + }else if(cause & Adps) + pr = 0; + if(cause & Ifatal){ + ewake = 1; + dprint("%s: fatal\n", dnam(d)); + } + if(cause & Adhrs){ + if(p->task & 33){ + if(ignoreahdrs(d) && serr & ErrE) + f |= Fahdrs; + dprint("%s: Adhrs cause %ux serr %ux task %ux\n", + dnam(d), cause, serr, p->task); + f |= Ferror; + ewake = 1; + } + pr = 0; + } + if(p->task & 1 && last != cause) + dprint("%s: err ca %ux serr %ux task %ux sstat %.3ux\n", + dnam(d), cause, serr, p->task, p->sstatus); + if(pr) + dprint("%s: upd %ux ta %ux\n", dnam(d), cause, p->task); + + if(cause & (Aprcs|Aifs)){ + s0 = d->state; + switch(p->sstatus & Smask){ + case Smissing: + d->state = Dmissing; + break; + case Spresent: + if((p->sstatus & Imask) == Islumber) + d->state = Dnew; + else + d->state = Derror; + break; + case Sphylink: + /* power mgnt crap for suprise removal */ + p->ie |= Aprcs|Apcs; /* is this required? */ + d->state = Dreset; + break; + case Sbist: + d->state = Doffline; + break; + } + dprint("%s: %s → %s [Apcrs] %.3ux\n", dnam(d), dstate(s0), + dstate(d->state), p->sstatus); + if(s0 == Dready && d->state != Dready) + idprint("%s: pulled\n", dnam(d)); + if(d->state != Dready) + f |= Ferror; + if(d->state != Dready || p->ci) + ewake = 1; + } + p->serror = serr; + if(ewake) + clearci(p); + if(f){ + d->portm.flag = f; + wakeup(&d->portm); + } + last = cause; +} + +static void +pstatus(Drive *d, u32int s) +{ + /* + * bogus code because the first interrupt is currently dropped. + * likely my fault. serror is maybe cleared at the wrong time. + */ + if(s) + d->lastseen = Ticks; + switch(s){ + default: + print("%s: pstatus: bad status %.3ux\n", dnam(d), s); + case Smissing: + d->state = Dmissing; + break; + case Spresent: + break; + case Sphylink: + d->wait = 0; + d->state = Dnew; + break; + case Sbist: + d->state = Doffline; + break; + } +} + +static int +configdrive(Drive *d) +{ + if(ahciconfigdrive(d->ctlr->hba, &d->portc, d->mode) == -1) + return -1; + ilock(d); + pstatus(d, d->port->sstatus & Smask); + iunlock(d); + return 0; +} + +static void +resetdisk(Drive *d) +{ + uint state, det, stat; + Aport *p; + + p = d->port; + det = p->sctl & 7; + stat = p->sstatus & Smask; + state = (p->cmd>>28) & 0xf; + dprint("%s: resetdisk: icc %ux det %.3ux sdet %.3ux\n", dnam(d), state, det, stat); + + ilock(d); + state = d->state; + if(d->state != Dready || d->state != Dnew) + d->portm.flag |= Ferror; + clearci(p); /* satisfy sleep condition. */ + wakeup(&d->portm); + d->state = Derror; + iunlock(d); + + if(stat != Sphylink){ + setstate(d, Dportreset); + return; + } + + qlock(&d->portm); + if(p->cmd&Ast && ahciswreset(&d->portc) == -1) + setstate(d, Dportreset); /* get a bigger stick. */ + else{ + setstate(d, Dmissing); + configdrive(d); + } + dprint("%s: resetdisk: %s → %s\n", dnam(d), dstate(state), dstate(d->state)); + qunlock(&d->portm); +} + +static int +newdrive(Drive *d) +{ + Aportc *c; + Aportm *m; + + c = &d->portc; + m = &d->portm; + + qlock(c->m); + setfissig(m, c->p->sig); + qunlock(c->m); + + if(ataonline(d->unit, m) != 0) + goto lose; + m->atamaxxfr = 128; + if(d->portm.feat & Dllba) + m->atamaxxfr = d->ctlr->type->maxdmaxfr; + + setstate(d, Dready); + pronline(d->unit, m); + return 0; + +lose: + qlock(c->m); + idprint("%s: can't be initialized\n", dnam(d)); + setstate(d, Dnull); + qunlock(c->m); + return -1; +} + +static int +doportreset(Drive *d) +{ + int i; + + i = -1; + qlock(&d->portm); + if(ahciportreset(&d->portc, d->mode) == -1) + dprint("ahci: ahciportreset fails\n"); + else + i = 0; + qunlock(&d->portm); + dprint("ahci: portreset → %s [task %.4ux ss %.3ux]\n", + dstate(d->state), d->port->task, d->port->sstatus); + return i; +} + +static void +statechange(Drive *d) +{ + Aportm *m; + + m = &d->portm; + switch(d->state){ + case Dnull: + case Doffline: + if(d->unit) + if(d->unit->sectors != 0){ + m->sectors = 0; + m->drivechange = 1; + } + case Dready: + d->wait = 0; + } +} + +static uint +maxmode(Ctlr *c) +{ + return (c->hba->cap & 0xf*Hiss)/Hiss; +} + +static void +checkdrive(Drive *d, int i) +{ + ushort s, sig; + + ilock(d); + s = d->port->sstatus; + if(s) + d->lastseen = Ticks; + if(s != olds[i]){ + dprint("%s: status: %.3ux -> %.3ux: %s\n", + dnam(d), olds[i], s, dstate(d->state)); + olds[i] = s; + d->wait = 0; + } + switch(d->state){ + case Dnull: + case Dready: + break; + case Dmissing: + case Dnew: + switch(s & (Iactive|Smask)){ + case Spresent: + ahciwakeup(&d->portc, d->mode); + case Smissing: + break; + default: + dprint("%s: unknown status %.3ux\n", dnam(d), s); + /* fall through */ + case Iactive: /* active, no device */ + if(++d->wait&Mphywait) + break; +reset: + if(d->mode == 0) + d->mode = maxmode(d->ctlr); + else + d->mode--; + if(d->mode == DMautoneg){ + d->state = Dportreset; + goto portreset; + } + dprint("%s: reset; new mode %s\n", dnam(d), + modes[d->mode]); + iunlock(d); + resetdisk(d); + ilock(d); + break; + case Iactive | Sphylink: + if(d->unit == nil) + break; + if((++d->wait&Midwait) == 0){ + dprint("%s: slow reset %.3ux task=%ux; %d\n", + dnam(d), s, d->port->task, d->wait); + goto reset; + } + s = (uchar)d->port->task; + sig = d->port->sig >> 16; + if(s == 0x7f || s&ASbsy || + (sig != 0xeb14 && (s & ASdrdy) == 0)) + break; + iunlock(d); + newdrive(d); + ilock(d); + break; + } + break; + case Doffline: + if(d->wait++ & Mcomrwait) + break; + /* fallthrough */ + case Derror: + case Dreset: + dprint("%s: reset [%s]: mode %d; status %.3ux\n", + dnam(d), dstate(d->state), d->mode, s); + iunlock(d); + resetdisk(d); + ilock(d); + break; + case Dportreset: +portreset: + if(d->wait++ & 0xff && (s & Iactive) == 0) + break; + dprint("%s: portreset [%s]: mode %d; status %.3ux\n", + dnam(d), dstate(d->state), d->mode, s); + d->portm.flag |= Ferror; + clearci(d->port); + wakeup(&d->portm); + if((s & Smask) == 0){ + d->state = Dmissing; + break; + } + iunlock(d); + doportreset(d); + ilock(d); + break; + } + statechange(d); + iunlock(d); +} + +static void +satakproc(void*) +{ + int i; + + for(;;){ + tsleep(&up->sleep, return0, 0, Nms); + for(i = 0; i < niadrive; i++) + checkdrive(iadrive[i], i); + } +} + +static void +iainterrupt(Ureg*, void *a) +{ + int i; + u32int cause, m; + Ctlr *c; + Drive *d; + + c = a; + ilock(c); + cause = c->hba->isr; + for(i = 0; cause; i++){ + m = 1 << i; + if((cause & m) == 0) + continue; + cause &= ~m; + d = c->rawdrive + i; + ilock(d); + if(d->port->isr && c->pi & m) + updatedrive(d); + c->hba->isr = m; + iunlock(d); + } + iunlock(c); +} + +static int +ahciencreset(Ctlr *c) +{ + Ahba *h; + + if(c->enctype == Eesb) + return 0; + h = c->hba; + h->emctl |= Emrst; + while(h->emctl & Emrst) + delay(1); + return 0; +} + +/* + * from the standard: (http://en.wikipedia.org/wiki/IBPI) + * rebuild is preferred as locate+fail; alternate 1hz fail + * we're going to assume no locate led. + */ +enum { + Ledsleep = 125, /* 8hz */ + + N0 = Ledon*Aled, + L0 = Ledon*Aled | Ledon*Locled, + L1 = Ledon*Aled | Ledoff*Locled, + R0 = Ledon*Aled | Ledon*Locled | Ledon*Errled, + R1 = Ledon*Aled | Ledoff*Errled, + S0 = Ledon*Aled | Ledon*Locled /*| Ledon*Errled*/, /* botch */ + S1 = Ledon*Aled | Ledoff*Errled, + P0 = Ledon*Aled | Ledon*Errled, + P1 = Ledon*Aled | Ledoff*Errled, + F0 = Ledon*Aled | Ledon*Errled, + C0 = Ledon*Aled | Ledon*Locled, + C1 = Ledon*Aled | Ledoff*Locled, + +}; + +//static ushort led3[Ibpilast*8] = { +//[Ibpinone*8] 0, 0, 0, 0, 0, 0, 0, 0, +//[Ibpinormal*8] N0, N0, N0, N0, N0, N0, N0, N0, +//[Ibpirebuild*8] R0, R0, R0, R0, R1, R1, R1, R1, +//[Ibpilocate*8] L0, L1, L0, L1, L0, L1, L0, L1, +//[Ibpispare*8] S0, S1, S0, S1, S1, S1, S1, S1, +//[Ibpipfa*8] P0, P1, P0, P1, P1, P1, P1, P1, /* first 1 sec */ +//[Ibpifail*8] F0, F0, F0, F0, F0, F0, F0, F0, +//[Ibpicritarray*8] C0, C0, C0, C0, C1, C1, C1, C1, +//[Ibpifailarray*8] C0, C1, C0, C1, C0, C1, C0, C1, +//}; + +static ushort led2[Ibpilast*8] = { +[Ibpinone*8] 0, 0, 0, 0, 0, 0, 0, 0, +[Ibpinormal*8] N0, N0, N0, N0, N0, N0, N0, N0, +[Ibpirebuild*8] R0, R0, R0, R0, R1, R1, R1, R1, +[Ibpilocate*8] L0, L0, L0, L0, L0, L0, L0, L0, +[Ibpispare*8] S0, S0, S0, S0, S1, S1, S1, S1, +[Ibpipfa*8] P0, P1, P0, P1, P1, P1, P1, P1, /* first 1 sec */ +[Ibpifail*8] F0, F0, F0, F0, F0, F0, F0, F0, +[Ibpicritarray*8] C0, C0, C0, C0, C1, C1, C1, C1, +[Ibpifailarray*8] C0, C1, C0, C1, C0, C1, C0, C1, +}; + +static int +ledstate(Drive *d, uint seq) +{ + ushort i; + Ledport *p; + + p = d; + if(p->led == Ibpipfa && seq%32 >= 8) + i = P1; + else + i = led2[8*p->led + seq%8]; + if(i != p->ledbits){ + p->ledbits = i; + ledprint("%s: led %,.011ub %ud\n", dnam(d), p->ledbits, seq%8); + return 1; + } + return 0; +} + +static int +blink(Drive *d, uint t) +{ + Ahba *h; + Ctlr *c; + Aledmsg msg; + + if(ledstate(d, t) == 0) + return 0; + c = d->ctlr; + h = c->hba; + /* ensure last message has been transmitted */ + while(h->emctl & Tmsg) + microdelay(1); + switch(c->enctype){ + default: + panic("%s: bad led type %d", dnam(d), c->enctype); + case Elmt: + memset(&msg, 0, sizeof msg); + msg.type = Mled; + msg.dsize = 0; + msg.msize = Ledmsz - 4; + msg.led[0] = d->ledbits; + msg.led[1] = d->ledbits>>8; + msg.pm = 0; + msg.hba = d->driveno; + memmove(c->enctx, &msg, Ledmsz); + break; + } + h->emctl |= Tmsg; + return 1; +} + +enum { + Esbdrv0 = 4, /* start pos in bits */ + Esbiota = 3, /* shift in bits */ + Esbact = 1, + Esbloc = 2, + Esberr = 4, +}; + +uint +esbbits(uint s) +{ + uint i, e; /* except after c */ + + e = 0; + for(i = 0; i < 3; i++) + e |= ((s>>3*i & 7) != 0)<ndrive; i++){ + d = c->drive[i]; + s |= ledstate(d, t); /* no port mapping */ + } + if(s == 0) + return 0; + memset(u, 0, sizeof u); + for(i = 0; i < c->ndrive; i++){ + d = c->drive[i]; + s = Esbdrv0 + Esbiota*i; + v = esbbits(d->ledbits) * (1ull << s%32); + u[s/32 + 0] |= v; + u[s/32 + 1] |= v>>32; + } + for(i = 0; i < c->encsz; i++) + c->enctx[i] = u[i]; + return 1; +} + +static long +ahciledr(SDunit *u, Chan *ch, void *a, long n, vlong off) +{ + Ctlr *c; + Drive *d; + + c = u->dev->ctlr; + d = c->drive[u->subno]; + return ledr(d, ch, a, n, off); +} + +static long +ahciledw(SDunit *u, Chan *ch, void *a, long n, vlong off) +{ + Ctlr *c; + Drive *d; + + c = u->dev->ctlr; + d = c->drive[u->subno]; + return ledw(d, ch, a, n, off); +} + +static void +ledkproc(void*) +{ + uchar map[NCtlr]; + uint i, j, t0, t1; + Ctlr *c; + Drive *d; + + j = 0; + memset(map, 0, sizeof map); + for(i = 0; i < niactlr; i++) + if(iactlr[i].enctype != 0){ + ahciencreset(iactlr + i); + map[i] = 1; + j++; + } + if(j == 0) + pexit("no work", 1); + for(i = 0; i < niadrive; i++){ + iadrive[i]->nled = 3; /* hardcoded */ + if(iadrive[i]->ctlr->enctype == Eesb) + iadrive[i]->nled = 3; + iadrive[i]->ledbits = -1; + } + for(i = 0; ; i++){ + t0 = Ticks; + for(j = 0; j < niadrive; ){ + c = iadrive[j]->ctlr; + if(map[c - iactlr] == 0) + j += c->ndrive; + else if(c->enctype == Eesb){ + blinkesb(c, i); + j += c->ndrive; + }else{ + d = iadrive[j++]; + blink(d, i); + } + } + t1 = Ticks; + esleep(Ledsleep - TK2MS(t1 - t0)); + } +} + +static int +iaverify(SDunit *u) +{ + Ctlr *c; + Drive *d; + + c = u->dev->ctlr; + d = c->drive[u->subno]; + ilock(c); + ilock(d); + if(d->unit == nil){ + d->unit = u; + if(c->enctype != 0) + sdaddfile(u, "led", 0644, eve, ahciledr, ahciledw); + } + iunlock(d); + iunlock(c); + checkdrive(d, d->driveno); /* c->d0 + d->driveno */ + return 1; +} + +static int +iaenable(SDev *s) +{ + char name[32]; + Ctlr *c; + static int once; + + c = s->ctlr; + ilock(c); + if(!c->enabled){ + if(once == 0) + kproc("iasata", satakproc, 0); + if(c->ndrive == 0) + panic("iaenable: zero s->ctlr->ndrive"); + pcisetbme(c->pci); + snprint(name, sizeof name, "%s (%s)", s->name, s->ifc->name); + intrenable(c->pci->intl, iainterrupt, c, c->pci->tbdf, name); + /* supposed to squelch leftover interrupts here. */ + ahcienable(c->hba); + c->enabled = 1; + if(++once == niactlr) + kproc("ialed", ledkproc, 0); + } + iunlock(c); + return 1; +} + +static int +iadisable(SDev *s) +{ + char name[32]; + Ctlr *c; + + c = s->ctlr; + c->enabled = 0; + ahcidisable(c->hba); + intrdisable(c->vector); + return 1; +} + +static int +iaonline(SDunit *u) +{ + int r; + Ctlr *c; + Drive *d; + Aportm *m; + + c = u->dev->ctlr; + d = c->drive[u->subno]; + m = &d->portm; + r = 0; + + if(m->feat & Datapi && m->drivechange){ + r = scsionlinex(u, m) == SDok; + if(r > 0) + m->drivechange = 0; + return r; + } + + ilock(d); + if(m->drivechange){ + r = 2; + m->drivechange = 0; + /* devsd resets this after online is called; why? */ + u->sectors = m->sectors; + u->secsize = m->secsize; + }else if(d->state == Dready) + r = 1; + iunlock(d); + return r; +} + +static Alist* +ahcibuildpkt(Aportm *m, SDreq *r, void *data, int n) +{ + uint flags; + uchar *c; + Actab *t; + Alist *l; + + l = m->list; + t = m->ctab; + c = t->cfis; + atapirwfis(m, c, r->cmd, r->clen, n); + flags = 1<<16 | Lpref | Latapi; + if(r->write != 0 && data) + flags |= Lwrite; + mkalist(m, flags, data, n); + return l; +} + +static Alist* +ahcibuildfis(Aportm *m, SDreq *r, void *data, uint n) +{ + uchar *c; + uint flags, dir; + Alist *l; + + l = m->list; + c = m->ctab->cfis; + if((r->ataproto & Pprotom) != Ppkt){ + memmove(c, r->cmd, r->clen); + flags = Lpref; + if(r->ataproto&Pout && n > 0) + flags |= Lwrite; + dir = r->ataproto&Pdatam; + if(dir == Pnd && n == 0) + flags |= Lwrite; + mkalist(m, flags, data, n); + }else{ + atapirwfis(m, c, r->cmd, r->clen, n); + flags = 1<<16 | Lpref | Latapi; + if(r->write && data) + flags |= Lwrite; + mkalist(m, flags, data, n); + } + return l; +} + +static int +isready(Drive *d) +{ + u32int s; + ulong δ; + + if(d->state & (Dreset | Dportreset /*| Dnew*/)) + return 1; + δ = TK2MS(Ticks - d->lastseen); + if(d->state == Dnull || δ > 10*1000){ + dprint("%s: last seen too long ago: %ld\n", dnam(d), δ); + return -1; + } + ilock(d); + s = d->port->sstatus; + iunlock(d); + if((s & Imask) == 0 && δ > 1500){ + dprint("%s: phy off %ldms\n", dnam(d), δ); + return -1; + } + if(d->state & (Dready | Dnew) && (s & Smask) == Sphylink) + return 0; + return 1; +} + +static int +waitready(Drive *d, int tk) +{ + int r; + + for(;;){ + r = isready(d); + if(r <= 0) + return r; + if(tk - Ticks - 10 < 1ul<<31) + return -1; + esleep(10); + } +} + +static int +io(Drive *d, uint proto, int totk, int interrupt) +{ + uint task, flag, rv; + Aport *p; + Asleep as; + + switch(waitready(d, totk)){ + case -1: + return SDeio; + case 1: + return SDretry; + } + + ilock(d); + d->portm.flag = 0; + iunlock(d); + p = d->port; + p->ci = 1; + + as.p = p; + as.i = 1; + d->totick = 0; + if(totk > 0) + d->totick = totk | 1; /* fix fencepost */ + + while(waserror()) + if(interrupt){ + d->port->ci = 0; + if(ahcicomreset(&d->portc) == -1) + setstate(d, Dreset); + return SDtimeout; + } + sleep(&d->portm, ahciclear, &as); + poperror(); + + ilock(d); + flag = d->portm.flag; + task = p->task; + iunlock(d); + + rv = SDok; + if(proto & Ppkt){ + rv = task >> 8 + 4 & 0xf; + flag &= ~Fahdrs; + flag |= Fdone; + }else if(task & (Efatal<<8) || task & (ASbsy|ASdrq) && d->state == Dready){ + p->ci = 0; + ahcirecover(&d->portc); + task = p->task; + flag &= ~Fdone; /* either an error or do-over */ + } + if(flag == 0){ + print("%s: retry\n", dnam(d)); + return SDretry; + } + if(flag & (Fahdrs | Ferror)){ + if((task & Eidnf) == 0) + print("%s: i/o error %ux\n", dnam(d), task); + return SDcheck; + } + return rv; +} + +static int +iariopkt(SDreq *r, Drive *d) +{ + int n, count, t, max, δ; + uchar *cmd; + + cmd = r->cmd; + aprint("%s: %.2ux %.2ux %c %d %p\n", dnam(d), cmd[0], cmd[2], + "rw"[r->write], r->dlen, r->data); + r->rlen = 0; + count = r->dlen; + max = 65536; + δ = r->timeout - Ticks; + + for(t = r->timeout; setreqto(r, t) != -1;){ + n = count; + if(n > max) + n = max; + qlock(&d->portm); + ahcibuildpkt(&d->portm, r, r->data, n); + r->status = io(d, Ppkt, r->timeout, 0); + qunlock(&d->portm); + switch(r->status){ + case SDeio: + return r->status = SDcheck; + case SDretry: + continue; + } +// aprint("%s: OK %.2ux :: %d :: %.4lux\n", dnam(d), r->cmd[0], r->status, d->port->task); + r->rlen = d->portm.list->len; + return SDok; + } + print("%s: atapi timeout %dms\n", dnam(d), TK2MS(δ)); + return r->status = SDcheck; +} + +static long +ahcibio(SDunit *u, int lun, int write, void *a, long count0, uvlong lba) +{ + Ctlr *c; + Drive *d; + + c = u->dev->ctlr; + d = c->drive[u->subno]; + if(d->portm.feat & Datapi) + return scsibiox(u, &d->portm, lun, write, a, count0, lba); + return atabio(u, &d->portm, lun, write, a, count0, lba); +} + +static int +iario(SDreq *r) +{ + Ctlr *c; + Drive *d; + SDunit *u; + + u = r->unit; + c = u->dev->ctlr; + d = c->drive[u->subno]; + if((d->state & (Dnew | Dready)) == 0) + return sdsetsense(r, SDcheck, 3, 0x04, 0x24); + if(r->timeout == 0) + r->timeout = totk(Ms2tk(600*1000)); + if(d->portm.feat & Datapi) + return iariopkt(r, d); + return atariosata(u, &d->portm, r); +} + +static uchar bogusrfis[16] = { +[Ftype] 0x34, +[Fioport] 0x40, +[Fstatus] 0x50, +[Fdev] 0xa0, +}; + +static void +sdr0(Drive *d) +{ + uchar *c; + + c = d->portm.fis.r; + memmove(c, bogusrfis, sizeof bogusrfis); + coherence(); +} + +static int +sdr(SDreq *r, Drive *d, int st) +{ + uchar *c; + uint t; + + if((r->ataproto & Pprotom) == Ppkt){ + t = d->port->task; + if(t & ASerr) + st = t >> 8 + 4 & 0xf; + } + c = d->portm.fis.r; + memmove(r->cmd, c, 16); + r->status = st; + if(st == SDcheck) + st = SDok; + return st; +} + +static int +fisreqchk(Sfis *f, SDreq *r) +{ + if((r->ataproto & Pprotom) == Ppkt) + return SDnostatus; + /* + * handle oob requests; + * restrict & sanitize commands + */ + if(r->clen != 16) + error(Eio); + if(r->cmd[0] == 0xf0){ + sigtofis(f, r->cmd); + r->status = SDok; + return SDok; + } + r->cmd[0] = 0x27; + r->cmd[1] = 0x80; + r->cmd[7] |= 0xa0; + return SDnostatus; +} + +static int +iaataio(SDreq *r) +{ + Ctlr *c; + Drive *d; + SDunit *u; + + u = r->unit; + c = u->dev->ctlr; + d = c->drive[u->subno]; + + if(r->timeout == 0) + r->timeout = totk(Ms2tk(600*1000)); + if((r->status = fisreqchk(&d->portm, r)) != SDnostatus) + return r->status; + r->rlen = 0; + sdr0(d); + + qlock(&d->portm); + ahcibuildfis(&d->portm, r, r->data, r->dlen); + r->status = io(d, r->ataproto & Pprotom, -1, 1); + qunlock(&d->portm); + if(r->status != SDok) + return r->status; + r->rlen = r->dlen; + if((r->ataproto & Pprotom) == Ppkt) + r->rlen = d->portm.list->len; + return sdr(r, d, r->status); +} + +/* configure drives 0-5 as ahci sata (c.f. errata) */ +static int +iaahcimode(Pcidev *p) +{ + uint u; + + u = pcicfgr16(p, 0x92); + dprint("ahci: %T: iaahcimode %.2ux %.4ux\n", p->tbdf, pcicfgr8(p, 0x91), u); + pcicfgw16(p, 0x92, u | 0xf); /* ports 0-15 (sic) */ + return 0; +} + +enum{ + Ghc = 0x04/4, /* global host control */ + Pi = 0x0c/4, /* ports implemented */ + Cmddec = 1<<15, /* enable command block decode */ + + /* Ghc bits */ + Ahcien = 1<<31, /* ahci enable */ +}; + +static void +iasetupahci(Ctlr *c) +{ + pcicfgw16(c->pci, 0x40, pcicfgr16(c->pci, 0x40) & ~Cmddec); + pcicfgw16(c->pci, 0x42, pcicfgr16(c->pci, 0x42) & ~Cmddec); + + c->lmmio[Ghc] |= Ahcien; + c->lmmio[Pi] = (1 << 6) - 1; /* 5 ports (supposedly ro pi reg) */ + + /* enable ahci mode; from ich9 datasheet */ + pcicfgw16(c->pci, 0x90, 1<<6 | 1<<5); +} + +static void +sbsetupahci(Pcidev *p) +{ + print("sbsetupahci: tweaking %.4ux ccru %.2ux ccrp %.2ux\n", + p->did, p->ccru, p->ccrp); + pcicfgw8(p, 0x40, pcicfgr8(p, 0x40) | 1); + pcicfgw8(p, PciCCRu, 6); + pcicfgw8(p, PciCCRp, 1); + p->ccru = 6; + p->ccrp = 1; +} + +static int +esbenc(Ctlr *c) +{ + c->encsz = 1; + c->enctx = (u32int*)(c->mmio + 0xa0); + c->enctype = Eesb; + c->enctx[0] = 0; + return 0; +} + +static int +ahciencinit(Ctlr *c) +{ + uint type, sz, o; + u32int *bar; + Ahba *h; + + h = c->hba; + if(c->type == Tesb) + return esbenc(c); + if((h->cap & Hems) == 0) + return -1; + type = h->emctl & Emtype; + switch(type){ + case Esgpio: + case Eses2: + case Esafte: + return -1; + case Elmt: + break; + default: + return -1; + } + + sz = h->emloc & 0xffff; + o = h->emloc>>16; + if(sz == 0 || o == 0) + return -1; + bar = c->lmmio; + ledprint("size = %#.4ux; loc = %#.4ux*4\n", sz, o); + + c->encsz = sz; + c->enctx = bar + o; + if((h->emctl & Xonly) == 0){ + if(h->emctl & Smb) + c->encrx = bar + o; + else + c->encrx = bar + o*2; + } + c->enctype = type; + return 0; +} + +static ushort itab[] = { + 0xfffc, 0x2680, Tesb, + 0xfffb, 0x27c1, Tahci, /* 82801g[bh]m */ + 0xffff, 0x2821, Tahci, /* 82801h[roh] */ + 0xfffe, 0x2824, Tahci, /* 82801h[b] */ + 0xfeff, 0x2829, Tahci, /* ich8 */ + 0xfffe, 0x2922, Tahci, /* ich9 */ + 0xffff, 0x3a02, Tahci, /* 82801jd/do */ + 0xfefe, 0x3a22, Tahci, /* ich10, pch */ + 0xfff7, 0x3b28, Tahci, /* pchm */ + 0xfffe, 0x3b22, Tahci, /* pch */ +}; + +static int +didtype(Pcidev *p) +{ + int type, i; + + type = Tahci; + switch(p->vid){ + default: + return -1; + case 0x8086: + for(i = 0; i < nelem(itab); i += 3) + if((p->did & itab[i]) == itab[i+1]) + return itab[i+2]; + break; + case 0x1002: + if(p->ccru == 1 || p->ccrp != 1) + if(p->did == 0x4380 || p->did == 0x4390) + sbsetupahci(p); + type = Tsb600; + break; + case 0x1106: + /* + * unconfirmed report that the programming + * interface is set incorrectly. + */ + if(p->did == 0x3349) + return Tahci; + break; + case 0x10de: + case 0x1039: + case 0x1b4b: + case 0x11ab: + break; + case 0x197b: + case 0x10b9: + type = Tjmicron; + break; + } + if(p->ccrb == 1 && p->ccru == 6 && p->ccrp == 1) + return type; + return -1; +} + +static SDev* +iapnp(void) +{ + int i, n, nunit, type; + uintmem io; + Ctlr *c; + Drive *d; + Pcidev *p; + SDev *s; + static int done; + + if(done) + return nil; + done = 1; + memset(olds, 0xff, sizeof olds); + p = nil; +loop: + while((p = pcimatch(p, 0, 0)) != nil){ + if((type = didtype(p)) == -1) + continue; + if(p->mem[Abar].bar == 0) + continue; + if(niactlr == NCtlr){ + print("iapnp: %s: too many controllers\n", cttab[type].name); + break; + } + c = iactlr + niactlr; + s = sdevs + niactlr; + memset(c, 0, sizeof *c); + memset(s, 0, sizeof *s); + c->type = cttab + type; + io = p->mem[Abar].bar & ~(uintmem)0xf; + c->mmio = vmap(io, p->mem[Abar].size); + if(c->mmio == nil){ + print("%s: %T: address %#P in use\n", + tnam(c), p->tbdf, io); + continue; + } + c->lmmio = (u32int*)c->mmio; + c->pci = p; + + s->ifc = &sdahciifc; + s->idno = 'E'; + s->ctlr = c; + c->sdev = s; + + if(intel(c) && p->did != 0x2681) + iasetupahci(c); + ahcibioshandoff((Ahba*)c->mmio); +// ahcihbareset((Ahba*)c->mmio); + nunit = ahciconf(c); + c->pi = c->hba->pi; + if(0 && p->vid == 0x1002 && p->did == 0x4391){ + c->pi = 0x3f; /* noah's opteron */ + nunit = 6; + } + if(intel(c) && iaahcimode(p) == -1 || nunit < 1){ + vunmap(c->mmio, p->mem[Abar].size); + continue; + } + c->ndrive = s->nunit = nunit; + + /* map the drives -- they don't all need to be enabled. */ + memset(c->rawdrive, 0, sizeof c->rawdrive); + n = 0; + for(i = 0; i < NCtlrdrv; i++){ + d = c->rawdrive + i; + d->portno = i; + d->driveno = -1; + d->portm.tler = 5000; + d->portm.sectors = 0; + d->portm.serial[0] = ' '; + d->led = Ibpinormal; + d->ctlr = c; + if((c->pi & 1<name, sizeof d->name, "iahci%d.%d", niactlr, i); + d->port = (Aport*)(c->mmio + 0x80*i + 0x100); + d->portc.p = d->port; + d->portc.m = &d->portm; + d->driveno = n++; + c->drive[d->driveno] = d; + iadrive[niadrive + d->driveno] = d; + } + for(i = 0; i < n; i++) + if(ahciidle(c->drive[i]->port) == -1){ + print("%s: port %d wedged; abort\n", + tnam(c), i); + goto loop; + } + for(i = 0; i < n; i++){ + c->drive[i]->mode = DMautoneg; + configdrive(c->drive[i]); + } + ahciencinit(c); + + niadrive += n; + niactlr++; + sdadddevs(s); + i = (c->hba->cap >> 21) & 1; + print("#S/%s: %s: sata-%s with %d ports\n", s->name, + tnam(c), "I\0II" + i*2, nunit); + } + return nil; +} + +static Htab ctab[] = { + Aasp, "asp", + Aalpe , "alpe ", + Adlae, "dlae", + Aatapi, "atapi", + Apste, "pste", + Afbsc, "fbsc", + Aesp, "esp", + Acpd, "cpd", + Ampsp, "mpsp", + Ahpcp, "hpcp", + Apma, "pma", + Acps, "cps", + Acr, "cr", + Afr, "fr", + Ampss, "mpss", + Apod, "pod", + Asud, "sud", + Ast, "st", +}; + +static char* +capfmt(char *p, char *e, Htab *t, int n, u32int cap) +{ + uint i; + + *p = 0; + for(i = 0; i < n; i++) + if(cap & t[i].bit) + p = seprint(p, e, "%s ", t[i].name); + return p; +} + +static int +iarctl(SDunit *u, char *p, int l) +{ + char buf[32], *e, *op; + Aport *o; + Ctlr *c; + Drive *d; + + if((c = u->dev->ctlr) == nil) + return 0; + d = c->drive[u->subno]; + o = d->port; + + e = p+l; + op = p; + if(d->state == Dready) + p = sfisxrdctl(&d->portm, p, e); + else + p = seprint(p, e, "no disk present [%s]\n", dstate(d->state)); + serrstr(o->serror, buf, buf + sizeof buf - 1); + p = seprint(p, e, "reg\ttask %ux cmd %ux serr %ux %s ci %ux is %ux " + "sig %ux sstatus %.3ux\n", o->task, o->cmd, o->serror, buf, + o->ci, o->isr, o->sig, o->sstatus); + p = seprint(p, e, "cmd\t"); + p = capfmt(p, e, ctab, nelem(ctab), o->cmd); + p = seprint(p, e, "\n"); + p = seprint(p, e, "mode\t%s %s\n", modes[d->mode], modes[maxmode(c)]); + p = seprint(p, e, "geometry %llud %lud\n", u->sectors, u->secsize); + return p - op; +} + +static void +forcemode(Drive *d, char *mode) +{ + int i; + + for(i = 0; i < nelem(modes); i++) + if(strcmp(mode, modes[i]) == 0) + break; + if(i == nelem(modes)) + i = 0; + ilock(d); + d->mode = i; + iunlock(d); +} + +static void +forcestate(Drive *d, char *state) +{ + int i; + + for(i = 1; i < nelem(diskstates); i++) + if(strcmp(state, diskstates[i]) == 0) + break; + if(i == nelem(diskstates)) + error(Ebadctl); + setstate(d, 1 << i-1); +} + +static int +iawctl(SDunit *u, Cmdbuf *cmd) +{ + char **f; + Ctlr *c; + Drive *d; + + c = u->dev->ctlr; + d = c->drive[u->subno]; + f = cmd->f; + + if(strcmp(f[0], "mode") == 0) + forcemode(d, f[1]? f[1]: "satai"); + else if(strcmp(f[0], "state") == 0) + forcestate(d, f[1]? f[1]: "null"); + else + cmderror(cmd, Ebadctl); + return 0; +} + +static char * +portr(char *p, char *e, uint x) +{ + int i, a; + + p[0] = 0; + a = -1; + for(i = 0; i < 32; i++){ + if((x & (1< 0) + p = seprint(p, e, ", "); + p = seprint(p, e, "%d", a = i); + } + } + if(a != -1 && i - 1 != a) + p = seprint(p, e, "-%d", i - 1); + return p; +} + +static Htab htab[] = { + H64a, "64a", + Hncq, "ncq", + Hsntf, "ntf", + Hmps, "mps", + Hss, "ss", + Halp, "alp", + Hal, "led", + Hclo, "clo", + Ham, "am", + Hpm, "pm", + Hfbs, "fbs", + Hpmb, "pmb", + Hssc, "slum", + Hpsc, "pslum", + Hcccs, "coal", + Hems, "ems", + Hxs, "xs", +}; + +static Htab htab2[] = { + Apts, "apts", + Nvmp, "nvmp", + Boh, "boh", +}; + +static Htab emtab[] = { + Pm, "pm", + Alhd, "alhd", + Xonly, "xonly", + Smb, "smb", + Esgpio, "esgpio", + Eses2, "eses2", + Esafte, "esafte", + Elmt, "elmt", +}; + +static char* +iartopctl(SDev *s, char *p, char *e) +{ + char pr[25]; + u32int cap; + Ahba *h; + Ctlr *c; + + c = s->ctlr; + h = c->hba; + cap = h->cap; + p = seprint(p, e, "sd%c ahci %s port %#p: ", s->idno, tnam(c), h); + p = capfmt(p, e, htab, nelem(htab), cap); + p = capfmt(p, e, htab2, nelem(htab2), h->cap2); + p = capfmt(p, e, emtab, nelem(emtab), h->emctl); + portr(pr, pr + sizeof pr, h->pi); + return seprint(p, e, + "iss %d ncs %d np %d ghc %ux isr %ux pi %ux %s ver %ux\n", + (cap>>20) & 0xf, (cap>>8) & 0x1f, 1 + (cap & 0x1f), + h->ghc, h->isr, h->pi, pr, h->ver); +} + +static int +iawtopctl(SDev *, Cmdbuf *cmd) +{ + int *v; + char **f; + + f = cmd->f; + v = 0; + + if(strcmp(f[0], "debug") == 0) + v = &debug; + else if(strcmp(f[0], "idprint") == 0) + v = &prid; + else if(strcmp(f[0], "aprint") == 0) + v = &datapi; + else if(strcmp(f[0], "ledprint") == 0) + v = &dled; + else + cmderror(cmd, Ebadctl); + + switch(cmd->nf){ + default: + cmderror(cmd, Ebadarg); + case 1: + *v ^= 1; + return 0; + case 2: + *v = strcmp(f[1], "on") == 0; + return 0; + } +} + +SDifc sdahciifc = { + "ahci", + + iapnp, + nil, /* legacy */ + iaenable, + iadisable, + + iaverify, + iaonline, + iario, + iarctl, + iawctl, + + ahcibio, + nil, /* probe */ + nil, /* clear */ + iartopctl, + iawtopctl, + iaataio, +}; diff -Nru /sys/src/9k/k10/sipi.c /sys/src/9k/k10/sipi.c --- /sys/src/9k/k10/sipi.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/sipi.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,122 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "apic.h" +#include "sipi.h" + +#define SIPIHANDLER (KZERO+0x3000) + +/* + * Parameters are passed to the bootstrap code via a vector + * in low memory indexed by the APIC number of the processor. + * The layout, size, and location have to be kept in sync + * with the handler code in l64sipi.s. + */ +typedef struct Sipi Sipi; +struct Sipi { + u32int pml4; + u32int _4_; + uintptr stack; + Mach* mach; + uintptr pc; +}; + +void +sipi(void) +{ + Lapic *apic; + Mach *mach; + Sipi *sipi; + int apicno, i, nproc; + u8int *sipiptr; + uintmem sipipa; + u8int *alloc, *p; + extern void squidboy(int); + + /* + * Move the startup code into place, + * must be aligned properly. + */ + sipipa = mmuphysaddr(SIPIHANDLER); + if((sipipa & (4*KiB - 1)) || sipipa > (1*MiB - 2*4*KiB)) + return; + sipiptr = UINT2PTR(SIPIHANDLER); + memmove(sipiptr, sipihandler, sizeof(sipihandler)); + memset(sipiptr+4*KiB, 0, sizeof(Sipi)*Napic); + + /* + * Notes: + * The Universal Startup Algorithm described in the MP Spec. 1.4. + * The data needed per-processor is the sum of the stack, page + * table pages, vsvm page and the Mach page. The layout is similar + * to that described in data.h for the bootstrap processor, but + * with any unused space elided. + */ + nproc = 0; + for(apicno = 0; apicno < Napic; apicno++){ + apic = lapiclookup(apicno); + if(apic == nil || !apic->useable || apic->machno == 0) + continue; + if(++nproc >= MACHMAX){ + print("sipi: MACHMAX too small, need %d\n", nproc); + break; + } + sipi = &((Sipi*)(sipiptr+4*KiB))[apicno]; + + /* + * NOTE: for now, share the page tables with the + * bootstrap processor, until this code is worked out, + * so only the Mach and stack portions are used below. + */ + alloc = mallocalign(MACHSTKSZ+4*PTSZ+4*KiB+MACHSZ, 4096, 0, 0); + if(alloc == nil) + continue; + memset(alloc, 0, MACHSTKSZ+4*PTSZ+4*KiB+MACHSZ); + p = alloc+MACHSTKSZ; + + sipi->pml4 = cr3get(); + sipi->stack = PTR2UINT(p); + + p += 4*PTSZ+4*KiB; + + /* + * Committed. If the AP startup fails, can't safely + * release the resources, who knows what mischief + * the AP is up to. Perhaps should try to put it + * back into the INIT state? + */ + mach = (Mach*)p; + sipi->mach = mach; + mach->machno = apic->machno; /* NOT one-to-one... */ + mach->splpc = PTR2UINT(squidboy); + sipi->pc = mach->splpc; + mach->apicno = apicno; + mach->stack = PTR2UINT(alloc); + mach->vsvm = alloc+MACHSTKSZ+4*PTSZ; + mach->pml4 = m->pml4; + + p = KADDR(0x467); + *p++ = sipipa; + *p++ = sipipa>>8; + *p++ = 0; + *p = 0; + + nvramwrite(0x0f, 0x0a); + lapicsipi(apicno, sipipa); + + for(i = 0; i < 1000; i++){ + if(mach->splpc == 0) + break; + millidelay(5); + } + nvramwrite(0x0f, 0x00); + + DBG("apicno%d: machno %d mach %#p (%#p) %dMHz\n", + apicno, mach->machno, + mach, sys->machptr[mach->machno], + mach->cpumhz); + } +} diff -Nru /sys/src/9k/k10/sipi.h /sys/src/9k/k10/sipi.h --- /sys/src/9k/k10/sipi.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/sipi.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,25 @@ +uchar sipihandler[]={ +0xea,0x58,0x30,0x00,0x00,0x90,0x90,0x90, +0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0xff,0xff,0x00,0x00,0x00,0x9a,0xcf,0x00,0xff,0xff,0x00,0x00,0x00,0x92,0xcf,0x00, +0x00,0x00,0x00,0x00,0x00,0x98,0x20,0x00,0x1f,0x00,0x10,0x30,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x98,0x20,0x00,0x00,0x00, +0x00,0x00,0x00,0x80,0x00,0x00,0x17,0x00,0x36,0x30,0x00,0xf0,0xff,0xff,0xff,0xff, +0x8c,0xc8,0x8e,0xd8,0x0f,0x01,0x16,0x30,0x30,0x0f,0x20,0xc0,0x83,0xc8,0x01,0x0f, +0x22,0xc0,0xeb,0x00,0xb8,0x10,0x00,0x8e,0xd8,0x8e,0xc0,0x8e,0xe0,0x8e,0xe8,0x8e, +0xd0,0x66,0xea,0x81,0x30,0x00,0x00,0x08,0x00,0xbd,0x00,0x00,0xe0,0xfe,0x8b,0x6d, +0x20,0xc1,0xed,0x18,0x89,0xe8,0x6b,0xc0,0x20,0xbb,0x00,0x30,0x00,0x00,0x81,0xc3, +0x00,0x10,0x00,0x00,0x01,0xc3,0x8b,0x33,0x89,0xf0,0x0f,0x22,0xd8,0x89,0xc2,0x81, +0xea,0x00,0x60,0x00,0x00,0x83,0xc2,0x03,0x89,0x10,0x2d,0x00,0x60,0x00,0x00,0x81, +0xc2,0x00,0x10,0x00,0x00,0x89,0x10,0xba,0x83,0x00,0x00,0x00,0x05,0x00,0x10,0x00, +0x00,0x89,0x10,0x0f,0x20,0xe0,0x83,0xe0,0xef,0x0d,0xa0,0x00,0x00,0x00,0x0f,0x22, +0xe0,0xb9,0x80,0x00,0x00,0xc0,0x0f,0x32,0x0d,0x00,0x01,0x00,0x00,0x0f,0x30,0x0f, +0x20,0xc2,0x81,0xe2,0xf5,0xff,0xff,0x9f,0x81,0xca,0x00,0x00,0x01,0x80,0x0f,0x22, +0xc2,0xea,0x00,0x31,0x00,0x00,0x18,0x00,0x48,0xc7,0xc0,0x09,0x31,0x00,0xf0,0xff, +0xe0,0x48,0xc7,0xc0,0x4e,0x30,0x00,0xf0,0x0f,0x01,0x10,0x48,0x31,0xd2,0x8e,0xda, +0x8e,0xc2,0x8e,0xe2,0x8e,0xea,0x8e,0xd2,0x8b,0xf6,0x48,0x89,0xf0,0x48,0x05,0x00, +0x00,0x00,0xf0,0x48,0x89,0x10,0x0f,0x22,0xde,0x48,0x81,0xc3,0x00,0x00,0x00,0xf0, +0x48,0x8b,0x63,0x08,0x52,0x9d,0x8b,0xed,0x55,0x4c,0x8b,0x7b,0x10,0x49,0x89,0xd6, +0x48,0x8b,0x43,0x18,0xff,0xd0,0xeb,0xfe, + +}; diff -Nru /sys/src/9k/k10/smbus.h /sys/src/9k/k10/smbus.h --- /sys/src/9k/k10/smbus.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/smbus.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,75 @@ +typedef struct SMBus SMBus; +typedef struct SMdev SMdev; + +/* SMBus transactions */ +enum +{ + SMBquick, /* sends address only */ + + /* write */ + SMBsend, /* sends address and cmd */ + SMBbytewrite, /* sends address and cmd and 1 byte */ + SMBwordwrite, /* sends address and cmd and 2 bytes */ + + /* read */ + SMBrecv, /* sends address, recvs 1 byte */ + SMBbyteread, /* sends address and cmd, recv's byte */ + SMBwordread, /* sends address and cmd, recv's 2 bytes */ + + /* read or write is a function of bit 0 in the slave addr */ +// SMBquick= 0, + SMBbyte= 1, + SMBbytedata, + SMBworddata, + SMBprocess, + SMBblock, + SMBi2cread, + SMBblockprocess, +}; + +typedef struct Udid Udid; +struct Udid { + uchar cap; + uchar ver; + uchar vid[2]; + uchar did[2]; + uchar ifc[2]; + uchar svid[2]; + uchar sdid[2]; + uchar vsid[4]; +}; + +enum { + STsmb, + STi2c, +}; + +typedef struct Smbdev Smbdev; +struct Smbdev { // smbus relies on this structure format. + Udid udid; + uchar addr; + uchar type; + ushort vid; + ushort did; +}; + +typedef struct SMBus SMBus; +struct SMBus { + QLock; /* mutex */ + Rendez r; /* rendezvous point for completion interrupts */ + void *arg; /* implementation dependent */ + ulong base; /* port or memory base of smbus */ + int busy; + Smbdev* (*smbmatch)(Smbdev*, int, int); + int (*transact)(SMBus*, int, int, int, uchar*, int); + void (*enumerate)(SMBus*); +}; + + +Smbdev* smbmatch(Smbdev*, int, int); +Smbdev* smbmatchaddr(int); +int smbrdbyte(Smbdev*, int); +int smbwrbyte(Smbdev*, int, int); +SMBus* smbus(void); +long smbctl(char*, long); +void smbreset(void); diff -Nru /sys/src/9k/k10/syscall.c /sys/src/9k/k10/syscall.c --- /sys/src/9k/k10/syscall.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/syscall.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,407 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "../port/error.h" + +#include "/sys/src/libc/9syscall/sys.h" + +#include +#include + +#include "amd64.h" +#include "ureg.h" + +typedef struct { + uintptr ip; + Ureg* arg0; + char* arg1; + char msg[ERRMAX]; + Ureg* old; + Ureg ureg; +} NFrame; + +/* + * Return user to state before notify() + */ +static void +noted(Ureg* cur, uintptr arg0) +{ + NFrame *nf; + Note note; + Ureg *nur; + + qlock(&up->debug); + if(arg0 != NRSTR && !up->notified){ + qunlock(&up->debug); + pprint("suicide: call to noted when not notified\n"); + pexit("Suicide", 0); + } + up->notified = 0; + fpunoted(); + + nf = up->ureg; + + /* sanity clause */ + if(!okaddr(PTR2UINT(nf), sizeof(NFrame), 0)){ + qunlock(&up->debug); + pprint("suicide: bad ureg %#p in noted\n", nf); + pexit("Suicide", 0); + } + + /* + * Check the segment selectors are all valid. + */ + nur = &nf->ureg; + if(nur->cs != SSEL(SiUCS, SsRPL3) || nur->ss != SSEL(SiUDS, SsRPL3) + || nur->ds != SSEL(SiUDS, SsRPL3) || nur->es != SSEL(SiUDS, SsRPL3) + || nur->fs != SSEL(SiUDS, SsRPL3) || nur->gs != SSEL(SiUDS, SsRPL3)){ + qunlock(&up->debug); + pprint("suicide: bad segment selector in noted\n"); + pexit("Suicide", 0); + } + + /* don't let user change system flags */ + nur->flags &= (Of|Df|Sf|Zf|Af|Pf|Cf); + nur->flags |= cur->flags & ~(Of|Df|Sf|Zf|Af|Pf|Cf); + + memmove(cur, nur, sizeof(Ureg)); + + switch((int)arg0){ + case NCONT: + case NRSTR: + if(!okaddr(nur->ip, BY2SE, 0) || !okaddr(nur->sp, BY2SE, 0)){ + qunlock(&up->debug); + pprint("suicide: trap in noted pc=%#p sp=%#p\n", + nur->ip, nur->sp); + pexit("Suicide", 0); + } + up->ureg = nf->old; + qunlock(&up->debug); + break; + case NSAVE: + if(!okaddr(nur->ip, BY2SE, 0) || !okaddr(nur->sp, BY2SE, 0)){ + qunlock(&up->debug); + pprint("suicide: trap in noted pc=%#p sp=%#p\n", + nur->ip, nur->sp); + pexit("Suicide", 0); + } + qunlock(&up->debug); + + splhi(); + nf->arg1 = nf->msg; + nf->arg0 = &nf->ureg; + cur->bp = PTR2UINT(nf->arg0); + nf->ip = 0; + cur->sp = PTR2UINT(nf); + break; + default: + memmove(¬e, &up->lastnote, sizeof(Note)); + qunlock(&up->debug); + pprint("suicide: bad arg %#p in noted: %s\n", arg0, note.msg); + pexit(note.msg, 0); + break; + case NDFLT: + memmove(¬e, &up->lastnote, sizeof(Note)); + qunlock(&up->debug); + if(note.flag == NDebug) + pprint("suicide: %s\n", note.msg); + pexit(note.msg, note.flag != NDebug); + break; + } +} + +/* + * Call user, if necessary, with note. + * Pass user the Ureg struct and the note on his stack. + */ +int +notify(Ureg* ureg) +{ + int l; + Mreg s; + Note note; + uintptr sp; + NFrame *nf; + + if(up->procctl) + procctl(up); + if(up->nnote == 0) + return 0; + + fpunotify(ureg); + + s = spllo(); + qlock(&up->debug); + + up->notepending = 0; + up->notedeferred = 0; + memmove(¬e, &up->note[0], sizeof(Note)); + if(strncmp(note.msg, "sys:", 4) == 0){ + l = strlen(note.msg); + if(l > ERRMAX-sizeof(" pc=0x0123456789abcdef")) + l = ERRMAX-sizeof(" pc=0x0123456789abcdef"); + sprint(note.msg+l, " pc=%#p", ureg->ip); + } + + if(note.flag != NUser && (up->notified || up->notify == nil)){ + qunlock(&up->debug); + if(note.flag == NDebug) + pprint("suicide: %s\n", note.msg); + pexit(note.msg, note.flag != NDebug); + } + + if(up->notified){ + qunlock(&up->debug); + splhi(); + return 0; + } + + if(up->notify == nil){ + qunlock(&up->debug); + pexit(note.msg, note.flag != NDebug); + } + if(!okaddr(PTR2UINT(up->notify), sizeof(ureg->ip), 0)){ + qunlock(&up->debug); + pprint("suicide: bad function address %#p in notify\n", + up->notify); + pexit("Suicide", 0); + } + + sp = ureg->sp - sizeof(NFrame); + if(!okaddr(sp, sizeof(NFrame), 1)){ + qunlock(&up->debug); + pprint("suicide: bad stack address %#p in notify\n", sp); + pexit("Suicide", 0); + } + + nf = UINT2PTR(sp); + memmove(&nf->ureg, ureg, sizeof(Ureg)); + nf->old = up->ureg; + up->ureg = nf; + memmove(nf->msg, note.msg, ERRMAX); + nf->arg1 = nf->msg; + nf->arg0 = &nf->ureg; + ureg->bp = PTR2UINT(nf->arg0); + nf->ip = 0; + + ureg->sp = sp; + ureg->ip = PTR2UINT(up->notify); + up->notified = 1; + up->nnote--; + memmove(&up->lastnote, ¬e, sizeof(Note)); + memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note)); + + qunlock(&up->debug); + splx(s); + + return 1; +} + +void +syscall(int scallnr, Ureg* ureg) +{ + char *e; + uintptr sp; + int i, s; + char *str; + vlong startns, stopns; + Ar0 ar0; + static Ar0 zar0; + void (*pt)(Proc*, int, vlong, vlong); + + if(!userureg(ureg)) + panic("syscall: cs %#llux\n", ureg->cs); + + cycles(&up->kentry); + + m->syscall++; + up->insyscall = 1; + up->pc = ureg->ip; + up->dbgreg = ureg; + if(up->trace && (pt = proctrace) != nil) + pt(up, STrap, todget(nil), STrapSC|scallnr); + + if(up->procctl == Proc_tracesyscall){ + up->procctl = Proc_stopme; + procctl(up); + } + + up->scallnr = scallnr; + if(scallnr == RFORK) + fpusysrfork(ureg); + spllo(); + + startns = 0; + sp = ureg->sp; + up->nerrlab = 0; + ar0 = zar0; + if(!waserror()){ + if(scallnr >= nsyscall || systab[scallnr].f == nil){ + pprint("bad sys call number %d pc %#llux\n", + scallnr, ureg->ip); + postnote(up, 1, "sys: bad sys call", NDebug); + error(Ebadarg); + } + + if(sp < (USTKTOP-PGSZ) || sp > (USTKTOP-sizeof(up->arg)-BY2SE)) + validaddr(UINT2PTR(sp), sizeof(up->arg)+BY2SE, 0); + + memmove(up->arg, UINT2PTR(sp+BY2SE), sizeof(up->arg)); + up->psstate = systab[scallnr].n; + + if(up->syscallq != nil){ + qlock(&up->debug); + if(up->syscallq != nil){ + str = syscallfmt(scallnr, ureg->ip, (va_list)up->arg); + notedefer(); + if(!waserror()){ + qwrite(up->syscallq, str, strlen(str)); + poperror(); + } + noteallow(); + free(str); + } + qunlock(&up->debug); + startns = todget(nil); + } + systab[scallnr].f(&ar0, (va_list)up->arg); + poperror(); + } + else{ + /* failure: save the error buffer for errstr */ + e = up->syserrstr; + up->syserrstr = up->errstr; + up->errstr = e; + if(DBGFLG && up->pid == 1) + iprint("%s: syscall %s error %s\n", + up->text, systab[scallnr].n, up->syserrstr); + ar0 = systab[scallnr].r; + } + if(up->nerrlab){ + print("bad errstack [%d]: %d extra\n", scallnr, up->nerrlab); + for(i = 0; i < NERR; i++) + print("sp=%#p pc=%#p\n", + up->errlab[i].sp, up->errlab[i].pc); + panic("error stack"); + } + + /* + * Put return value in frame. + * Which element of Ar0 to use is based on specific + * knowledge of the architecture. + */ + ureg->ax = ar0.p; + + if(up->syscallq != nil){ + stopns = todget(nil); + qlock(&up->debug); + if(up->syscallq != nil){ + str = sysretfmt(scallnr, (va_list)up->arg, &ar0, startns, stopns); + notedefer(); + if(!waserror()){ + qwrite(up->syscallq, str, strlen(str)); + poperror(); + } + noteallow(); + free(str); + } + qunlock(&up->debug); + } + + if(up->procctl == Proc_tracesyscall){ + up->procctl = Proc_stopme; + s = splhi(); + procctl(up); + splx(s); + } + + up->insyscall = 0; + up->psstate = 0; + + if(scallnr == NOTED) + noted(ureg, *(uintptr*)(sp+BY2SE)); + + splhi(); + if(scallnr != RFORK && scallnr != NSEC && (up->procctl || up->nnote)) + notify(ureg); + + /* if we delayed sched because we held a lock, sched now */ + if(up->delaysched){ + sched(); + splhi(); + } + kexit(ureg); +} + +uintptr +sysexecstack(uintptr stack, int argc) +{ + /* + * Given a current bottom-of-stack and a count + * of pointer arguments to be pushed onto it followed + * by an integer argument count, return a suitably + * aligned new bottom-of-stack which will satisfy any + * hardware stack-alignment contraints. + * Rounding the stack down to be aligned with the + * natural size of a pointer variable usually suffices, + * but some architectures impose further restrictions, + * e.g. 32-bit SPARC, where the stack must be 8-byte + * aligned although pointers and integers are 32-bits. + */ + USED(argc); + + return STACKALIGN(stack); +} + +void* +sysexecregs(uintptr entry, ulong ssize, ulong nargs) +{ + uintptr *sp; + Ureg *ureg; + + sp = (uintptr*)(USTKTOP - ssize); + *--sp = nargs; + + ureg = up->dbgreg; + ureg->sp = PTR2UINT(sp); + ureg->ip = entry; + ureg->type = 64; /* fiction for acid */ + + /* + * return the address of kernel/user shared data + * (e.g. clock stuff) + */ + return UINT2PTR(USTKTOP-sizeof(Tos)); +} + +void +sysprocsetup(Proc* p) +{ + fpusysprocsetup(p); +} + +void +sysrforkchild(Proc* child, Proc* parent) +{ + Ureg *cureg; + + /* + * Add 3*BY2SE to the stack to account for + * - the return PC + * - trap's arguments (syscallnr, ureg) + */ + child->sched.sp = PTR2UINT(child->kstack+KSTACK-(sizeof(Ureg)+3*BY2SE)); + child->sched.pc = PTR2UINT(sysrforkret); + + cureg = (Ureg*)(child->sched.sp+3*BY2SE); + memmove(cureg, parent->dbgreg, sizeof(Ureg)); + + /* Things from bottom of syscall which were never executed */ + child->psstate = 0; + child->insyscall = 0; + + fpusysrforkchild(child, parent); +} diff -Nru /sys/src/9k/k10/trap.c /sys/src/9k/k10/trap.c --- /sys/src/9k/k10/trap.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/trap.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,666 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include +#include +#include "ureg.h" + +#include "io.h" +#include "apic.h" + +#include "amd64.h" + +extern int notify(Ureg*); + +static void debugbpt(Ureg*, void*); +static void faultamd64(Ureg*, void*); +static void doublefault(Ureg*, void*); +static void unexpected(Ureg*, void*); +static void dumpstackwithureg(Ureg*); + +static Lock vctllock; +static Vctl *vctl[256]; + +enum +{ + Ntimevec = 20 /* number of time buckets for each intr */ +}; +ulong intrtimes[256][Ntimevec]; + +void* +intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name) +{ + int vno; + Vctl *v; + + if(f == nil){ + print("intrenable: nil handler for %d, tbdf %#ux for %s\n", + irq, tbdf, name); + return nil; + } + + v = malloc(sizeof(Vctl)); + v->isintr = 1; + v->irq = irq; + v->tbdf = tbdf; + v->f = f; + v->a = a; + strncpy(v->name, name, KNAMELEN-1); + v->name[KNAMELEN-1] = 0; + + ilock(&vctllock); + vno = ioapicintrenable(v); + if(vno == -1){ + iunlock(&vctllock); + print("intrenable: couldn't enable irq %d, tbdf %#ux for %s\n", + irq, tbdf, v->name); + free(v); + return nil; + } + if(vctl[vno]){ + if(vctl[v->vno]->isr != v->isr || vctl[v->vno]->eoi != v->eoi) + panic("intrenable: handler: %s %s %#p %#p %#p %#p", + vctl[v->vno]->name, v->name, + vctl[v->vno]->isr, v->isr, vctl[v->vno]->eoi, v->eoi); + } + v->vno = vno; + v->next = vctl[vno]; + vctl[vno] = v; + iunlock(&vctllock); + + if(v->mask != nil) + v->mask(v, 0); + + /* + * Return the assigned vector so intrdisable can find + * the handler; the IRQ is useless in the wondrefule world + * of the IOAPIC. + */ + return v; +} + +int +intrdisable(void* vector) +{ + Vctl *v, **vl; + + ilock(&vctllock); + v = vector; + for(vl = &vctl[v->vno]; *vl != nil; vl = &(*vl)->next) + if(*vl == v) + break; + if(*vl == nil) + panic("intrdisable: v %#p", v); + if(v->mask != nil) + v->mask(v, 1); + v->f(nil, v->a); + *vl = v->next; + ioapicintrdisable(v->vno); + iunlock(&vctllock); + + free(v); + + return 0; +} + +static long +irqallocread(Chan*, void *vbuf, long n, vlong offset) +{ + char *buf, *p, str[2*(11+1)+KNAMELEN+1+1]; + int ns, vno; + long oldn; + Vctl *v; + + if(n < 0 || offset < 0) + error(Ebadarg); + + oldn = n; + buf = vbuf; + for(vno=0; vnonext){ + ns = snprint(str, sizeof str, "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name); + if(ns <= offset) /* if do not want this, skip entry */ + offset -= ns; + else{ + /* skip offset bytes */ + ns -= offset; + p = str+offset; + offset = 0; + + /* write at most max(n,ns) bytes */ + if(ns > n) + ns = n; + memmove(buf, p, ns); + n -= ns; + buf += ns; + + if(n == 0) + return oldn; + } + } + } + return oldn - n; +} + +void +trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name) +{ + Vctl *v; + + if(vno < 0 || vno >= 256) + panic("trapenable: vno %d\n", vno); + v = malloc(sizeof(Vctl)); + v->tbdf = BUSUNKNOWN; + v->f = f; + v->a = a; + strncpy(v->name, name, KNAMELEN); + v->name[KNAMELEN-1] = 0; + + ilock(&vctllock); + v->next = vctl[vno]; + vctl[vno] = v; + iunlock(&vctllock); +} + +static void +nmienable(void) +{ + int x; + + /* + * Hack: should be locked with NVRAM access. + */ + outb(0x70, 0x80); /* NMI latch clear */ + outb(0x70, 0); + + x = inb(0x61) & 0x07; /* Enable NMI */ + outb(0x61, 0x08|x); + outb(0x61, x); +} + +void +trapinit(void) +{ + /* + * Need to set BPT interrupt gate - here or in vsvminit? + */ + /* + * Special traps. + * Syscall() is called directly without going through trap(). + */ + trapenable(IdtBP, debugbpt, 0, "#BP"); + trapenable(IdtPF, faultamd64, 0, "#PF"); + trapenable(IdtDF, doublefault, 0, "#DF"); + trapenable(Idt0F, unexpected, 0, "#15"); + nmienable(); + + if(m->machno == 0) + addarchfile("irqalloc", 0444, irqallocread, nil); +} + +static char* excname[32] = { + "#DE", /* Divide-by-Zero Error */ + "#DB", /* Debug */ + "#NMI", /* Non-Maskable-Interrupt */ + "#BP", /* Breakpoint */ + "#OF", /* Overflow */ + "#BR", /* Bound-Range */ + "#UD", /* Invalid-Opcode */ + "#NM", /* Device-Not-Available */ + "#DF", /* Double-Fault */ + "#9 (reserved)", + "#TS", /* Invalid-TSS */ + "#NP", /* Segment-Not-Present */ + "#SS", /* Stack */ + "#GP", /* General-Protection */ + "#PF", /* Page-Fault */ + "#15 (reserved)", + "#MF", /* x87 FPE-Pending */ + "#AC", /* Alignment-Check */ + "#MC", /* Machine-Check */ + "#XF", /* SIMD Floating-Point */ + "#20 (reserved)", + "#21 (reserved)", + "#22 (reserved)", + "#23 (reserved)", + "#24 (reserved)", + "#25 (reserved)", + "#26 (reserved)", + "#27 (reserved)", + "#28 (reserved)", + "#29 (reserved)", + "#30 (reserved)", + "#31 (reserved)", +}; + +/* + * keep histogram of interrupt service times + */ +void +intrtime(Mach*, int vno) +{ + ulong diff; + ulong x; + + x = perfticks(); + diff = x - m->perf.intrts; + m->perf.intrts = x; + + m->perf.inintr += diff; + if(up == nil && m->perf.inidle > diff) + m->perf.inidle -= diff; + + diff /= m->cpumhz*100; // quantum = 100µsec + if(diff >= Ntimevec) + diff = Ntimevec-1; + intrtimes[vno][diff]++; +} + +/* go to user space */ +void +kexit(Ureg*) +{ + uvlong t; + Tos *tos; + + /* precise time accounting, kernel exit */ + tos = (Tos*)(USTKTOP-sizeof(Tos)); + cycles(&t); + tos->kcycles += t - up->kentry; + tos->pcycles = up->pcycles; + tos->pid = up->pid; + tos->cyclefreq = m->cyclefreq; +} + +/* + * All traps come here. It is slower to have all traps call trap() + * rather than directly vectoring the handler. However, this avoids a + * lot of code duplication and possible bugs. The only exception is + * for a system call. + * Trap is called with interrupts disabled via interrupt-gates. + */ +void +trap(Ureg* ureg) +{ + int clockintr, vno, user; + void (*pt)(Proc*, int, vlong, vlong); + char buf[ERRMAX]; + Vctl *ctl, *v; + Proc *oup; + + m->perf.intrts = perfticks(); + user = userureg(ureg); + if(user){ + up->dbgreg = ureg; + cycles(&up->kentry); + } + + clockintr = 0; + + vno = ureg->type; + if(ctl = vctl[vno]){ + if(ctl->isintr){ + m->intr++; + if(vno >= IdtPIC && vno != IdtSYSCALL) + m->lastintr = ctl->irq; + + oup = up; + up = nil; + if(ctl->isr) + ctl->isr(vno); + for(v = ctl; v != nil; v = v->next){ + if(v->f) + v->f(ureg, v->a); + } + if(ctl->eoi) + ctl->eoi(vno); + up = oup; + + intrtime(m, vno); + + if(ctl->irq == IdtPIC+IrqCLOCK || ctl->irq == IdtTIMER){ + checkflushmmu(); + clockintr = 1; + } + + if(up && !clockintr) + preempted(); + }else{ + if(user && up->trace && (pt = proctrace) != nil){ + if(vno != IdtPF) + pt(up, STrap, 0, vno); + } + for(v = ctl; v != nil; v = v->next){ + if(v->f) + v->f(ureg, v->a); + } + } + } + else if(vno <= nelem(excname) && user){ + spllo(); + snprint(buf, sizeof(buf), "sys: trap: %s", excname[vno]); + postnote(up, 1, buf, NDebug); + } + else{ + if(vno == IdtNMI){ + if(active.ispanic){ + /* + * Use of m->dbgsp avoids stack confusion + * caused by writing the address of the SP to + * the top of the stack. + */ + m->dbgreg = ureg; + m->dbgsp = &ureg->sp; + for(;;) + _halt(); + } + if(m->perfintr != nil){ + m->perfintr(ureg, nil); + nmienable(); + return; + } + nmienable(); + } + if(vno == 39){ + /* We get this one and didn't track it down yet: it's ok */ + iprint("vno %d: buggeration @ %#p...\n", vno, ureg->ip); + }else if(vno < nelem(excname)){ + dumpregs(ureg); + panic("%s pc %#p", excname[vno], ureg->ip); + }else + panic("unknown trap/intr: %d pc %#p\n", vno, ureg->ip); + } + splhi(); + + /* delaysched set because we held a lock or because our quantum ended */ + if(up && up->delaysched && clockintr){ + sched(); + splhi(); + } + + checkflushmmu(); + + if(user){ + if(up->procctl || up->nnote) + notify(ureg); + kexit(ureg); + } +} + +/* + * Dump general registers. + */ +static void +dumpgpr(Ureg* ureg) +{ + if(up != nil) + iprint("cpu%d: registers for %s %d [%#p]\n", + m->machno, up->text, up->pid, getcallerpc(&ureg)); + else + iprint("cpu%d: registers for kernel\n", m->machno); +if(1){ + iprint("ax\t%#16.16llux ", ureg->ax); + iprint("bx\t%#16.16llux\n", ureg->bx); + iprint("cx\t%#16.16llux ", ureg->cx); + iprint("dx\t%#16.16llux\n", ureg->dx); + iprint("di\t%#16.16llux ", ureg->di); + iprint("si\t%#16.16llux\n", ureg->si); + iprint("bp\t%#16.16llux ", ureg->bp); + iprint("r8\t%#16.16llux\n", ureg->r8); + iprint("r9\t%#16.16llux ", ureg->r9); + iprint("r10\t%#16.16llux\n", ureg->r10); + iprint("r11\t%#16.16llux ", ureg->r11); + iprint("r12\t%#16.16llux\n", ureg->r12); + iprint("r13\t%#16.16llux ", ureg->r13); + iprint("r14\t%#16.16llux\n", ureg->r14); + iprint("r15\t%#16.16llux\n", ureg->r15); +} + iprint("ds %#4.4ux es %#4.4ux fs %#4.4ux gs %#4.4ux\n", + ureg->ds, ureg->es, ureg->fs, ureg->gs); + iprint("type\t%#llux ", ureg->type); + iprint("error\t%#llux\n", ureg->error); + iprint("pc\t%#llux ", ureg->ip); + iprint("cs\t%#llux\n", ureg->cs); + iprint("flags\t%#llux\n", ureg->flags); + iprint("sp\t%#llux ", ureg->sp); + iprint("ss\t%#llux\n", ureg->ss); + iprint("type\t%#llux\n", ureg->type); + + iprint("m\t%#16.16p up\t%#16.16p\n", m, up); +} + +void +dumpregs(Ureg* ureg) +{ + iprint("dumpregs: %#p ", getcallerpc(&ureg)); + dumpgpr(ureg); + + /* + * Processor control registers. + * If machine check exception, time stamp counter, page size extensions + * or enhanced virtual 8086 mode extensions are supported, there is a + * CR4. If there is a CR4 and machine check extensions, read the machine + * check address and machine check type registers if RDMSR supported. + */ + iprint("cr0\t%#16.16llux\n", cr0get()); + iprint("cr2\t%#16.16llux\n", cr2get()); + iprint("cr3\t%#16.16llux\n", cr3get()); + +// archdumpregs(); +} + +/* + * Fill in enough of Ureg to get a stack trace, and call a function. + * Used by debugging interface rdb. + */ +void +callwithureg(void (*fn)(Ureg*)) +{ + Ureg ureg; + memset(&ureg, 0, sizeof(ureg)); + ureg.ip = getcallerpc(&fn); + ureg.sp = PTR2UINT(&fn); + fn(&ureg); +} + +static void +dumpstackwithureg(Ureg* ureg) +{ + uintptr l, v, i, estack; + extern ulong etext; + char *s; + int x; + + if((s = getconf("*nodumpstack")) != nil && atoi(s) != 0){ + iprint("dumpstack disabled\n"); + return; + } + + x = 0; + x += iprint("ktrace /kernel/path %#p %#p\n", ureg->ip, ureg->sp); + i = 0; + if(up != nil + && (uintptr)&l >= (uintptr)up->kstack + && (uintptr)&l <= (uintptr)up->kstack+KSTACK) + estack = (uintptr)up->kstack+KSTACK; + else if((uintptr)&l >= m->stack && (uintptr)&l <= m->stack+MACHSTKSZ) + estack = m->stack+MACHSTKSZ; + else{ + if(up != nil) + iprint("&up->kstack %#p &l %#p\n", up->kstack, &l); + else + iprint("&m %#p &l %#p\n", m, &l); + return; + } + x += iprint("estackx %#p\n", estack); + + for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){ + v = *(uintptr*)l; + if((KTZERO < v && v < (uintptr)&etext) + || ((uintptr)&l < v && v < estack) || estack-l < 256){ + x += iprint("%#16.16p=%#16.16p ", l, v); + i++; + } + if(i == 2){ + i = 0; + x += iprint("\n"); + } + } + if(i) + iprint("\n"); +} + +void +dumpstack(void) +{ + callwithureg(dumpstackwithureg); +} + +static void +debugbpt(Ureg* ureg, void*) +{ + char buf[ERRMAX]; + + if(up == 0) + panic("kernel bpt"); + /* restore pc to instruction that caused the trap */ + ureg->ip--; + sprint(buf, "sys: breakpoint"); + postnote(up, 1, buf, NDebug); +} + +static void +doublefault(Ureg*, void*) +{ + panic("double fault"); +} + +static void +unexpected(Ureg* ureg, void*) +{ + iprint("unexpected trap %llud; ignoring\n", ureg->type); +} + +void printpages(Pages*); + +static void +faultamd64(Ureg* ureg, void*) +{ + u64int addr, arg; + int read, user, insyscall; + char buf[ERRMAX]; + void (*pt)(Proc*, int, vlong, vlong); + + addr = cr2get(); + user = userureg(ureg); +// if(!user && mmukmapsync(addr)) +// return; + + /* + * There must be a user context. + * If not, the usual problem is causing a fault during + * initialisation before the system is fully up. + */ + if(up == nil){ + panic("fault with up == nil; pc %#llux addr %#llux\n", + ureg->ip, addr); + } + read = !(ureg->error & 2); + + if(up->trace && (pt = proctrace) != nil){ + if(read) + arg = STrapRPF | (addr&STrapMask); + else + arg = STrapWPF | (addr&STrapMask); + pt(up, STrap, 0, arg); + } + + insyscall = up->insyscall; + up->insyscall = 1; +if(iskaddr(addr)){ + print("kaddr %#llux pc %#p\n", addr, ureg->ip); prflush(); + dumpregs(ureg); +} + if(fault(addr, read) < 0){ + splhi(); + if(!user){ + dumpregs(ureg); + panic("fault: %#llux pc %#p\n", addr, ureg->ip); + } + sprint(buf, "sys: trap: fault %s addr=%#llux", + read? "read": "write", addr); + for(int i = 0; i < NSEG; i++){ + if(up->seg[i] != nil) + printpages(up->seg[i]->pages); + } + //mmudump(up); + checkpages(); + postnote(up, 1, buf, NDebug); + if(insyscall) + error(buf); + } + up->insyscall = insyscall; +} + +/* + * return the userpc the last exception happened at + */ +uintptr +userpc(Ureg* ureg) +{ + if(ureg == nil) + ureg = up->dbgreg; + return ureg->ip; +} + +/* This routine must save the values of registers the user is not permitted + * to write from devproc and then restore the saved values before returning. + */ +void +setregisters(Ureg* ureg, char* pureg, char* uva, int n) +{ + u64int cs, flags, ss; + u16int ds, es, fs, gs; + + ss = ureg->ss; + flags = ureg->flags; + cs = ureg->cs; + gs = ureg->cs; + fs = ureg->cs; + es = ureg->cs; + ds = ureg->cs; + memmove(pureg, uva, n); + ureg->ds = ds; + ureg->es = es; + ureg->fs = fs; + ureg->gs = gs; + ureg->cs = cs; + ureg->flags = (ureg->flags & 0x00ff) | (flags & 0xff00); + ureg->ss = ss; +} + +/* Give enough context in the ureg to produce a kernel stack for + * a sleeping process + */ +void +setkernur(Ureg* ureg, Proc* p) +{ + ureg->ip = p->sched.pc; + ureg->sp = p->sched.sp+BY2SE; +} + +uintptr +dbgpc(Proc *p) +{ + Ureg *ureg; + + ureg = p->dbgreg; + if(ureg == 0) + return 0; + + return ureg->ip; +} diff -Nru /sys/src/9k/k10/usbehci.h /sys/src/9k/k10/usbehci.h --- /sys/src/9k/k10/usbehci.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/usbehci.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,83 @@ +/* override default macros from ../port/usb.h */ +#undef dprint +#undef ddprint +#undef deprint +#undef ddeprint +#define dprint(...) do if(ehcidebug)print(__VA_ARGS__); while(0) +#define ddprint(...) do if(ehcidebug>1)print(__VA_ARGS__); while(0) +#define deprint(...) do if(ehcidebug || ep->debug)print(__VA_ARGS__); while(0) +#define ddeprint(...) do if(ehcidebug>1 || ep->debug>1)print(__VA_ARGS__); while(0) + +typedef struct Ctlr Ctlr; +typedef struct Eopio Eopio; +typedef struct Isoio Isoio; +typedef struct Poll Poll; +typedef struct Qh Qh; +typedef struct Qtree Qtree; + +#pragma incomplete Ctlr; +#pragma incomplete Eopio; +#pragma incomplete Isoio; +#pragma incomplete Poll; +#pragma incomplete Qh; +#pragma incomplete Qtree; + +struct Poll +{ + Lock; + Rendez; + int must; + int does; +}; + +struct Ctlr +{ + Rendez; /* for waiting to async advance doorbell */ + Lock; /* for ilock. qh lists and basic ctlr I/O */ + QLock portlck; /* for port resets/enable... (and doorbell) */ + int active; /* in use or not */ + Pcidev* pcidev; + Ecapio* capio; /* Capability i/o regs */ + Eopio* opio; /* Operational i/o regs */ + + int nframes; /* 1024, 512, or 256 frames in the list */ + ulong* frames; /* periodic frame list (hw) */ + Qh* qhs; /* async Qh circular list for bulk/ctl */ + Qtree* tree; /* tree of Qhs for the periodic list */ + int ntree; /* number of dummy qhs in tree */ + Qh* intrqhs; /* list of (not dummy) qhs in tree */ + Isoio* iso; /* list of active Iso I/O */ + ulong load; + ulong isoload; + int nintr; /* number of interrupts attended */ + int ntdintr; /* number of intrs. with something to do */ + int nqhintr; /* number of async td intrs. */ + int nisointr; /* number of periodic td intrs. */ + int nreqs; + Poll poll; +}; + +/* + * Operational registers (hw) + */ +struct Eopio +{ + u32int cmd; /* 00 command */ + u32int sts; /* 04 status */ + u32int intr; /* 08 interrupt enable */ + u32int frno; /* 0c frame index */ + u32int seg; /* 10 bits 63:32 of EHCI datastructs (unused) */ + u32int frbase; /* 14 frame list base addr, 4096-byte boundary */ + u32int link; /* 18 link for async list */ + uchar d2c[0x40-0x1c]; /* 1c dummy */ + u32int config; /* 40 1: all ports default-routed to this HC */ + u32int portsc[1]; /* 44 Port status and control, one per port */ +}; + +extern int ehcidebug; +extern Ecapio *ehcidebugcapio; +extern int ehcidebugport; + +void ehcilinkage(Hci *hp); +void ehcimeminit(Ctlr *ctlr); +void ehcirun(Ctlr *ctlr, int on); diff -Nru /sys/src/9k/k10/usbehcipc.c /sys/src/9k/k10/usbehcipc.c --- /sys/src/9k/k10/usbehcipc.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/usbehcipc.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,312 @@ +/* + * PC-specific code for + * USB Enhanced Host Controller Interface (EHCI) driver + * High speed USB 2.0. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/usb.h" +#include "../port/portusbehci.h" +#include "usbehci.h" + +static Ctlr* ctlrs[Nhcis]; +static int maxehci = Nhcis; + +static int +ehciecap(Ctlr *ctlr, int cap) +{ + int i, off; + + off = (ctlr->capio->capparms >> Ceecpshift) & Ceecpmask; + for(i=0; i<48; i++){ + if(off < 0x40 || (off & 3) != 0) + break; + if(pcicfgr8(ctlr->pcidev, off) == cap) + return off; + off = pcicfgr8(ctlr->pcidev, off+1); + } + return -1; +} + +static void +getehci(Ctlr* ctlr) +{ + int i, off; + + off = ehciecap(ctlr, Clegacy); + if(off == -1) + return; + if(pcicfgr8(ctlr->pcidev, off+CLbiossem) != 0){ + dprint("ehci %#p: bios active, taking over...\n", ctlr->capio); + pcicfgw8(ctlr->pcidev, off+CLossem, 1); + for(i = 0; i < 100; i++){ + if(pcicfgr8(ctlr->pcidev, off+CLbiossem) == 0) + break; + delay(10); + } + if(i == 100) + print("ehci %#p: bios timed out\n", ctlr->capio); + } + pcicfgw32(ctlr->pcidev, off+CLcontrol, 0); /* no SMIs */ +} + +static void +ehcireset(Ctlr *ctlr) +{ + Eopio *opio; + int i; + + ilock(ctlr); + dprint("ehci %#p reset\n", ctlr->capio); + opio = ctlr->opio; + + /* + * reclaim from bios + */ + getehci(ctlr); + + /* + * route interrupts to other controllers until we're ready + */ + ehcirun(ctlr, 0); + opio->config = 0; + + /* clear high 32 bits of address signals if it's 64 bits capable. + * This is probably not needed but it does not hurt and others do it. + */ + if((ctlr->capio->capparms & C64) != 0){ + dprint("ehci: 64 bits\n"); + opio->seg = 0; + coherence(); + } + + if(ehcidebugcapio != ctlr->capio){ + opio->cmd |= Chcreset; /* controller reset */ + coherence(); + for(i = 0; i < 100; i++){ + if((opio->cmd & Chcreset) == 0) + break; + delay(1); + } + if(i == 100) + print("ehci %#p controller reset timed out\n", ctlr->capio); + } + + /* requesting more interrupts per µframe may miss interrupts */ + opio->cmd &= ~Citcmask; + opio->cmd |= 1 << Citcshift; /* max of 1 intr. per 125 µs */ + coherence(); + switch(opio->cmd & Cflsmask){ + case Cfls1024: + ctlr->nframes = 1024; + break; + case Cfls512: + ctlr->nframes = 512; + break; + case Cfls256: + ctlr->nframes = 256; + break; + default: + panic("ehci: unknown fls %d", opio->cmd & Cflsmask); + } + dprint("ehci: %d frames\n", ctlr->nframes); + iunlock(ctlr); +} + +static void +setdebug(Hci*, int d) +{ + ehcidebug = d; +} + +static void +shutdown(Hci *hp) +{ + int i; + Ctlr *ctlr; + Eopio *opio; + + ctlr = hp->aux; + ilock(ctlr); + opio = ctlr->opio; + opio->cmd |= Chcreset; /* controller reset */ + coherence(); + for(i = 0; i < 100; i++){ + if((opio->cmd & Chcreset) == 0) + break; + delay(1); + } + if(i >= 100) + print("ehci %#p controller reset timed out\n", ctlr->capio); + delay(100); + ehcirun(ctlr, 0); + opio->frbase = 0; + iunlock(ctlr); +} + +static int +checkdev(Pcidev *p) +{ + char *conf, *s, dev[32]; + + conf = getconf("*badehci"); + if(conf == nil) + return 0; + snprint(dev, sizeof dev, "%.4ux/%.4ux", p->vid, p->did); + + s = strstr(conf, dev); + if(s != nil && (s[9] == 0 || s[9] == ' ')) + return -1; + return 0; +} + +static void +scanpci(void) +{ + int i; + uintmem io; + Ctlr *ctlr; + Pcidev *p; + Ecapio *capio; + static int already; + + if(already) + return; + already = 1; + i = 0; + for(p = nil; (p = pcimatch(p, 0, 0)) != nil; ) { + /* + * Find EHCI controllers (Programming Interface = 0x20). + */ + if(p->ccrb != 0xc || p->ccru != 3 || p->ccrp != 0x20) + continue; + if(i == Nhcis){ + print("ehci: bug: more than %d controllers\n", Nhcis); + continue; + } + if(checkdev(p) == -1){ + print("usbehci: ignore %.4ux/%.4ux\n", p->vid, p->did); + continue; + } + io = p->mem[0].bar & ~(uintmem)0xf; + if(io == 0){ + print("usbehci: %x %x: failed to map registers\n", + p->vid, p->did); + continue; + } + if(p->intl == 0xff || p->intl == 0) { + print("usbehci: no irq assigned for port %#P\n", io); + continue; + } + dprint("usbehci: %#x %#x: port %#P size %#x irq %d\n", + p->vid, p->did, io, p->mem[0].size, p->intl); + capio = vmap(io, p->mem[0].size); + if(capio == nil){ + print("usbehci: can't vmap %#P\n", io); + continue; + } + + ctlr = malloc(sizeof(Ctlr)); + if (ctlr == nil) + panic("usbehci: out of memory"); + ctlr->pcidev = p; + ctlr->capio = capio; + ctlr->opio = (Eopio*)((uintptr)capio + (capio->cap & 0xff)); + pcisetbme(p); + pcisetpms(p, 0); + + /* + * currently, if we enable a second ehci controller on zt + * systems w x58m motherboard, we'll wedge solid after iunlock + * in init for the second one. + */ + if (i >= maxehci) { + print("usbehci: ignoring controllers after first %d, " + "at %#P\n", maxehci, io); + pciclrbme(p); + vunmap(capio, p->mem[0].size); + free(ctlr); + continue; + } + ctlrs[i++] = ctlr; + } +} + +static int +reset(Hci *hp) +{ + int i; + char *s; + Ctlr *ctlr; + Ecapio *capio; + Pcidev *p; + static Lock resetlck; + + s = getconf("*maxehci"); + if(s != nil){ + i = strtoul(s, &s, 0); + if(*s == 0) + maxehci = i; + } + if(maxehci == 0 || getconf("*nousbehci")) + return -1; + + ilock(&resetlck); + scanpci(); + + /* + * Any adapter matches if no hp->port is supplied, + * otherwise the ports must match. + */ + ctlr = nil; + for(i = 0; i < Nhcis && ctlrs[i] != nil; i++){ + ctlr = ctlrs[i]; + if(ctlr->active == 0) + if(hp->port == 0 || hp->port == (uintptr)ctlr->capio){ + ctlr->active = 1; + break; + } + } + iunlock(&resetlck); + if(i >= Nhcis || ctlrs[i] == nil) + return -1; + + p = ctlr->pcidev; + hp->aux = ctlr; + hp->port = (uintptr)ctlr->capio; + hp->irq = p->intl; + hp->tbdf = p->tbdf; + + capio = ctlr->capio; + hp->nports = capio->parms & Cnports; + + ddprint("echi: %s, ncc %ud npcc %ud\n", + capio->parms & 0x10000 ? "leds" : "no leds", + (capio->parms >> 12) & 0xf, (capio->parms >> 8) & 0xf); + ddprint("ehci: routing %s, %sport power ctl, %d ports\n", + capio->parms & 0x40 ? "explicit" : "automatic", + capio->parms & 0x10 ? "" : "no ", hp->nports); + + ehcireset(ctlr); + ehcimeminit(ctlr); + + /* + * Linkage to the generic HCI driver. + */ + ehcilinkage(hp); + hp->shutdown = shutdown; + hp->debug = setdebug; + return 0; +} + +void +usbehcilink(void) +{ + addhcitype("ehci", reset); +} diff -Nru /sys/src/9k/k10/usbohci.c /sys/src/9k/k10/usbohci.c --- /sys/src/9k/k10/usbohci.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/usbohci.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,2591 @@ +/* + * USB Open Host Controller Interface (Ohci) driver + * + * BUGS: + * - not really 64-bit safe (Tds aren't necessarly in low memory) + * - Missing isochronous input streams. + * - Too many delays and ilocks. + * - bandwidth admission control must be done per-frame. + * - Buffering could be handled like in uhci, to avoid + * needed block allocation and avoid allocs for small Tds. + * - must warn of power overruns. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" + +#include "../port/usb.h" + +typedef struct Ctlio Ctlio; +typedef struct Ctlr Ctlr; +typedef struct Ed Ed; +typedef struct Edpool Edpool; +typedef struct Epx Epx; +typedef struct Hcca Hcca; +typedef struct Isoio Isoio; +typedef struct Ohci Ohci; +typedef struct Qio Qio; +typedef struct Qtree Qtree; +typedef struct Td Td; +typedef struct Tdpool Tdpool; + +enum +{ + Incr = 64, /* for Td and Ed pools */ + + Align = 0x20, /* OHCI only requires 0x10 */ + /* use always a power of 2 */ + + Abortdelay = 1, /* delay after cancelling Tds (ms) */ + Tdatomic = 8, /* max nb. of Tds per bulk I/O op. */ + Enabledelay = 100, /* waiting for a port to enable */ + + + /* Queue states (software) */ + Qidle = 0, + Qinstall, + Qrun, + Qdone, + Qclose, + Qfree, + + /* Ed control bits */ + Edmpsmask = 0x7ff, /* max packet size */ + Edmpsshift = 16, + Edlow = 1 << 13, /* low speed */ + Edskip = 1 << 14, /* skip this ed */ + Ediso = 1 << 15, /* iso Tds used */ + Edtddir = 0, /* get dir from td */ + Edin = 2 << 11, /* direction in */ + Edout = 1 << 11, /* direction out */ + Eddirmask = 3 << 11, /* direction bits */ + Edhalt = 1, /* halted (in head ptr) */ + Edtoggle = 2, /* toggle (in head ptr) 1 == data1 */ + + /* Td control bits */ + Tdround = 1<<18, /* (rounding) short packets ok */ + Tdtoksetup = 0<<19, /* setup packet */ + Tdtokin = 2<<19, /* in packet */ + Tdtokout = 1<<19, /* out packet */ + Tdtokmask = 3<<19, /* in/out/setup bits */ + Tdnoioc = 7<<21, /* intr. cnt. value for no interrupt */ + Tdusetog = 1<<25, /* use toggle from Td (1) or Ed (0) */ + Tddata1 = 1<<24, /* data toggle (1 == data1) */ + Tddata0 = 0<<24, + Tdfcmask = 7, /* frame count (iso) */ + Tdfcshift = 24, + Tdsfmask = 0xFFFF, /* starting frame (iso) */ + Tderrmask = 3, /* error counter */ + Tderrshift = 26, + Tdccmask = 0xf, /* condition code (status) */ + Tdccshift = 28, + Tdiccmask = 0xf, /* condition code (iso, offsets) */ + Tdiccshift = 12, + + Ntdframes = 0x10000, /* # of different iso frame numbers */ + + /* Td errors (condition code) */ + Tdok = 0, + Tdcrc = 1, + Tdbitstuff = 2, + Tdbadtog = 3, + Tdstalled = 4, + Tdtmout = 5, + Tdpidchk = 6, + Tdbadpid = 7, + Tddataovr = 8, + Tddataund = 9, + Tdbufovr = 0xC, + Tdbufund = 0xD, + Tdnotacc = 0xE, + + /* control register */ + Cple = 0x04, /* periodic list enable */ + Cie = 0x08, /* iso. list enable */ + Ccle = 0x10, /* ctl list enable */ + Cble = 0x20, /* bulk list enable */ + Cfsmask = 3 << 6, /* functional state... */ + Cfsreset = 0 << 6, + Cfsresume = 1 << 6, + Cfsoper = 2 << 6, + Cfssuspend = 3 << 6, + + /* command status */ + Sblf = 1 << 2, /* bulk list (load) flag */ + Sclf = 1 << 1, /* control list (load) flag */ + Shcr = 1 << 0, /* host controller reset */ + + /* intr enable */ + Mie = 1 << 31, + Oc = 1 << 30, + Rhsc = 1 << 6, + Fno = 1 << 5, + Ue = 1 << 4, + Rd = 1 << 3, + Sf = 1 << 2, + Wdh = 1 << 1, + So = 1 << 0, + + Fmaxpktmask = 0x7fff, + Fmaxpktshift = 16, + HcRhDescA_POTPGT_MASK = 0xff << 24, + HcRhDescA_POTPGT_SHIFT = 24, + + /* Rh status */ + Lps = 1 << 0, + Cgp = 1 << 0, + Oci = 1 << 1, + Psm = 1 << 8, + Nps = 1 << 9, + Drwe = 1 << 15, + Srwe = 1 << 15, + Lpsc = 1 << 16, + Ccic = 1 << 17, + Crwe = 1 << 31, + + /* port status */ + Ccs = 0x00001, /* current connect status */ + Pes = 0x00002, /* port enable status */ + Pss = 0x00004, /* port suspend status */ + Poci = 0x00008, /* over current indicator */ + Prs = 0x00010, /* port reset status */ + Pps = 0x00100, /* port power status */ + Lsda = 0x00200, /* low speed device attached */ + Csc = 0x10000, /* connect status change */ + Pesc = 0x20000, /* enable status change */ + Pssc = 0x40000, /* suspend status change */ + Ocic = 0x80000, /* over current ind. change */ + Prsc = 0x100000, /* reset status change */ + + /* port status write bits */ + Cpe = 0x001, /* clear port enable */ + Spe = 0x002, /* set port enable */ + Spr = 0x010, /* set port reset */ + Spp = 0x100, /* set port power */ + Cpp = 0x200, /* clear port power */ + +}; + +/* + * Endpoint descriptor. (first 4 words used by hardware) + */ +struct Ed { + u32int ctrl; + u32int tail; /* transfer descriptor */ + u32int head; + u32int nexted; + + Ed* next; /* sw; in free list or next in list */ + Td* tds; /* in use by current xfer; all for iso */ + Ep* ep; /* debug/align */ + Ed* inext; /* debug/align (dump interrupt eds). */ +}; + +/* + * Endpoint I/O state (software), per direction. + */ +struct Qio +{ + QLock; /* for the entire I/O process */ + Rendez; /* wait for completion */ + Ed* ed; /* to place Tds on it */ + int sched; /* queue number (intr/iso) */ + int toggle; /* Tddata0/Tddata1 */ + ulong usbid; /* device/endpoint address */ + int tok; /* Tdsetup, Tdtokin, Tdtokout */ + long iotime; /* last I/O time; to hold interrupt polls */ + int debug; /* for the endpoint */ + char* err; /* error status */ + int state; /* Qidle -> Qinstall -> Qrun -> Qdone | Qclose */ + long bw; /* load (intr/iso) */ +}; + +struct Ctlio +{ + Qio; /* single Ed for all transfers */ + uchar* data; /* read from last ctl req. */ + int ndata; /* number of bytes read */ +}; + +struct Isoio +{ + Qio; + int nframes; /* number of frames for a full second */ + Td* atds; /* Tds avail for further I/O */ + int navail; /* number of avail Tds */ + u32int frno; /* next frame number avail for I/O */ + u32int left; /* remainder after rounding Hz to samples/ms */ + int nerrs; /* consecutive errors on iso I/O */ +}; + +/* + * Transfer descriptor. Size must be multiple of 32 + * First block is used by hardware (aligned to 32). + */ +struct Td +{ + u32int ctrl; + u32int cbp; /* current buffer pointer */ + u32int nexttd; + u32int be; + u16int offsets[8]; /* used by Iso Tds only */ + + Td* next; /* in free or Ed tds list */ + Td* anext; /* in avail td list (iso) */ + Ep* ep; /* using this Td for I/O */ + Qio* io; /* using this Td for I/O */ + Block* bp; /* data for this Td */ + ulong nbytes; /* bytes in this Td */ + u32int cbp0; /* initial value for cbp */ + int last; /* true for last Td in Qio */ +}; + +/* + * Host controller communication area (hardware) + */ +struct Hcca +{ + u32int intrtable[32]; + u16int framenumber; + u16int pad1; + u32int donehead; + uchar reserved[116]; +}; + +/* + * I/O registers + */ +struct Ohci +{ + /* control and status group */ + u32int revision; /*00*/ + u32int control; /*04*/ + u32int cmdsts; /*08*/ + u32int intrsts; /*0c*/ + u32int intrenable; /*10*/ + u32int intrdisable; /*14*/ + + /* memory pointer group */ + u32int hcca; /*18*/ + u32int periodcurred; /*1c*/ + u32int ctlheaded; /*20*/ + u32int ctlcurred; /*24*/ + u32int bulkheaded; /*28*/ + u32int bulkcurred; /*2c*/ + u32int donehead; /*30*/ + + /* frame counter group */ + u32int fminterval; /*34*/ + u32int fmremaining; /*38*/ + u32int fmnumber; /*3c*/ + u32int periodicstart; /*40*/ + u32int lsthreshold; /*44*/ + + /* root hub group */ + u32int rhdesca; /*48*/ + u32int rhdescb; /*4c*/ + u32int rhsts; /*50*/ + u32int rhportsts[15]; /*54*/ + u32int pad25[20]; /*90*/ + + /* unknown */ + u32int hostueaddr; /*e0*/ + u32int hostuests; /*e4*/ + u32int hosttimeoutctrl; /*e8*/ + u32int pad59; /*ec*/ + u32int pad60; /*f0*/ + u32int hostrevision; /*f4*/ + u32int pad62[2]; + /*100*/ +}; + +/* + * Endpoint tree (software) + */ +struct Qtree +{ + int nel; + int depth; + ulong* bw; + Ed** root; +}; + +struct Tdpool +{ + Lock; + Td* free; + int nalloc; + int ninuse; + int nfree; +}; + +struct Edpool +{ + Lock; + Ed* free; + int nalloc; + int ninuse; + int nfree; +}; + +struct Ctlr +{ + Lock; /* for ilock; lists and basic ctlr I/O */ + QLock resetl; /* lock controller during USB reset */ + int active; + Ctlr* next; + int nports; + + Ohci* ohci; /* base I/O address */ + Hcca* hcca; /* intr/done Td lists (used by hardware) */ + int overrun; /* sched. overrun */ + Ed* intrhd; /* list of intr. eds in tree */ + Qtree* tree; /* tree for t Ep i/o */ + int ntree; /* number of dummy Eds in tree */ + Pcidev* pcidev; +}; + +#define dqprint if(debug || io && io->debug)print +#define ddqprint if(debug>1 || (io && io->debug>1))print +#define diprint if(debug || iso && iso->debug)print +#define ddiprint if(debug>1 || (iso && iso->debug>1))print +#define TRUNC(x, sz) ((x) & ((sz)-1)) + +static int ohciinterrupts[Nttypes]; +static char* iosname[] = { "idle", "install", "run", "done", "close", "FREE" }; + +static int debug; +static Edpool edpool; +static Tdpool tdpool; +static Ctlr* ctlrs[Nhcis]; + +static char EnotWritten[] = "usb write unfinished"; +static char EnotRead[] = "usb read unfinished"; +static char Eunderrun[] = "usb endpoint underrun"; + +static QLock usbhstate; /* protects name space state */ + +static int schedendpt(Ctlr *ub, Ep *ep); +static void unschedendpt(Ctlr *ub, Ep *ep); +static long qtd(Ctlr*, Ep*, int, Block*, uchar*, uchar*, int, ulong); + +static char* errmsgs[] = +{ +[Tdcrc] "crc error", +[Tdbitstuff] "bit stuffing error", +[Tdbadtog] "bad toggle", +[Tdstalled] Estalled, +[Tdtmout] "timeout error", +[Tdpidchk] "pid check error", +[Tdbadpid] "bad pid", +[Tddataovr] "data overrun", +[Tddataund] "data underrun", +[Tdbufovr] "buffer overrun", +[Tdbufund] "buffer underrun", +[Tdnotacc] "not accessed" +}; + +static void* +pa2ptr(uintmem pa) +{ + if(pa == 0) + return nil; + else if(pa > 0xffffffff) + panic("usb: ohci: highmem pa %#P", pa); + return KADDR(pa); +} + +static uintmem +ptr2pa(void *p) +{ + uintmem pa; + + if(p == nil) + return 0; + pa = PADDR(p); + if(pa > 0xffffffff) + panic("usb: ohci: highmemptr %#p", p); + return pa; +} + +static void +waitSOF(Ctlr *ub) +{ + int frame = ub->hcca->framenumber & 0x3f; + + do { + delay(2); + } while(frame == (ub->hcca->framenumber & 0x3f)); +} + +static char* +errmsg(int err) +{ + + if(err < nelem(errmsgs)) + return errmsgs[err]; + return nil; +} + +static Ed* +ctlhd(Ctlr *ctlr) +{ + return pa2ptr(ctlr->ohci->ctlheaded); +} + +static Ed* +bulkhd(Ctlr *ctlr) +{ + return pa2ptr(ctlr->ohci->bulkheaded); +} + +static void +edlinked(Ed *ed, Ed *next) +{ + if(ed == nil) + print("edlinked: nil ed: pc %#p\n", getcallerpc(&ed)); + ed->nexted = ptr2pa(next); + ed->next = next; +} + +static void +setctlhd(Ctlr *ctlr, Ed *ed) +{ + ctlr->ohci->ctlheaded = ptr2pa(ed); + if(ed != nil) + ctlr->ohci->cmdsts |= Sclf; /* reload it on next pass */ +} + +static void +setbulkhd(Ctlr *ctlr, Ed *ed) +{ + ctlr->ohci->bulkheaded = ptr2pa(ed); + if(ed != nil) + ctlr->ohci->cmdsts |= Sblf; /* reload it on next pass */ +} + +static void +unlinkctl(Ctlr *ctlr, Ed *ed) +{ + Ed *this, *prev, *next; + + ctlr->ohci->control &= ~Ccle; + waitSOF(ctlr); + this = ctlhd(ctlr); + ctlr->ohci->ctlcurred = 0; + prev = nil; + while(this != nil && this != ed){ + prev = this; + this = this->next; + } + if(this == nil){ + print("unlinkctl: not found\n"); + return; + } + next = this->next; + if(prev == nil) + setctlhd(ctlr, next); + else + edlinked(prev, next); + ctlr->ohci->control |= Ccle; + edlinked(ed, nil); /* wipe out next field */ +} + +static void +unlinkbulk(Ctlr *ctlr, Ed *ed) +{ + Ed *this, *prev, *next; + + ctlr->ohci->control &= ~Cble; + waitSOF(ctlr); + this = bulkhd(ctlr); + ctlr->ohci->bulkcurred = 0; + prev = nil; + while(this != nil && this != ed){ + prev = this; + this = this->next; + } + if(this == nil){ + print("unlinkbulk: not found\n"); + return; + } + next = this->next; + if(prev == nil) + setbulkhd(ctlr, next); + else + edlinked(prev, next); + ctlr->ohci->control |= Cble; + edlinked(ed, nil); /* wipe out next field */ +} + +static void +edsetaddr(Ed *ed, ulong addr) +{ + ulong ctrl; + + ctrl = ed->ctrl & ~((Epmax<<7)|Devmax); + ctrl |= (addr & ((Epmax<<7)|Devmax)); + ed->ctrl = ctrl; +} + +static void +edsettog(Ed *ed, int c) +{ + if(c != 0) + ed->head |= Edtoggle; + else + ed->head &= ~Edtoggle; +} + +static int +edtoggle(Ed *ed) +{ + return ed->head & Edtoggle; +} + +static int +edhalted(Ed *ed) +{ + return ed->head & Edhalt; +} + +static int +edmaxpkt(Ed *ed) +{ + return (ed->ctrl >> Edmpsshift) & Edmpsmask; +} + +static void +edsetmaxpkt(Ed *ed, int m) +{ + ulong c; + + c = ed->ctrl & ~(Edmpsmask << Edmpsshift); + ed->ctrl = c | ((m&Edmpsmask) << Edmpsshift); +} + +static int +tderrs(Td *td) +{ + return (td->ctrl >> Tdccshift) & Tdccmask; +} + +static int +tdtok(Td *td) +{ + return td->ctrl & Tdtokmask; +} + +static void* +lomallocalign(usize sz, usize align) +{ + void *va; + + va = mallocalign(sz, align, 0, 0); + if(PADDR(va) > 0xffffffff) + panic("usb: ohci: lomallocalign: mallocalign gives high mem %#p", va); + return va; +} + +static Td* +tdalloc(void) +{ + uchar *mem; + int i, sz; + Td *td; + + lock(&tdpool); + if(tdpool.free == nil){ + ddprint("ohci: tdalloc %d Tds\n", Incr); + sz = ROUNDUP(sizeof *td, Align); + mem = lomallocalign(Incr*sz, Align); + if(mem == nil) + panic("tdalloc"); + for(i = 0; i < Incr; i++){ + td = (Td*)(mem + i*sz); + td->next = tdpool.free; + tdpool.free = td; + } + tdpool.nalloc += Incr; + tdpool.nfree += Incr; + } + tdpool.ninuse++; + tdpool.nfree--; + td = tdpool.free; + tdpool.free = td->next; + memset(td, 0, sizeof(Td)); + unlock(&tdpool); + + assert(((uintptr)td & 0xF) == 0); + return td; +} + +static void +tdfree(Td *td) +{ + if(td == nil) + return; + freeb(td->bp); + td->bp = nil; + lock(&tdpool); + if(td->nexttd == 0x77777777) + panic("ohci: tdfree: double free"); + memset(td, 7, sizeof(Td)); /* poison */ + td->next = tdpool.free; + tdpool.free = td; + tdpool.ninuse--; + tdpool.nfree++; + unlock(&tdpool); +} + +static Ed* +edalloc(void) +{ + uchar *mem; + int i, sz; + Ed *ed; + + lock(&edpool); + if(edpool.free == nil){ + ddprint("ohci: edalloc %d Eds\n", Incr); + sz = ROUNDUP(sizeof *ed, Align); + mem = lomallocalign(Incr*sz, Align); + if(mem == nil) + panic("edalloc"); + for(i = 0; i < Incr; i++){ + ed = (Ed*)(mem + i*sz); + ed->next = edpool.free; + edpool.free = ed; + } + edpool.nalloc += Incr; + edpool.nfree += Incr; + } + edpool.ninuse++; + edpool.nfree--; + ed = edpool.free; + edpool.free = ed->next; + memset(ed, 0, sizeof(Ed)); + unlock(&edpool); + + return ed; +} + +static void +edfree(Ed *ed) +{ + Td *td, *next; + int i; + + if(ed == 0) + return; + i = 0; + for(td = ed->tds; td != nil; td = next){ + next = td->next; + tdfree(td); + if(i++ > 2000){ + print("ohci: bug: ed with more than 2000 tds\n"); + break; + } + } + lock(&edpool); + if(ed->nexted == 0x99999999) + panic("ohci: edfree: double free"); + memset(ed, 9, sizeof(Ed)); /* poison */ + ed->next = edpool.free; + edpool.free = ed; + edpool.ninuse--; + edpool.nfree++; + unlock(&edpool); + ddprint("edfree: ed %#p\n", ed); +} + +/* + * return smallest power of 2 >= n + */ +static int +flog2(int n) +{ + int i; + + for(i = 0; (1 << i) < n; i++) + ; + return i; +} + +/* + * return smallest power of 2 <= n + */ +static int +flog2lower(int n) +{ + int i; + + for(i = 0; (1 << (i + 1)) <= n; i++) + ; + return i; +} + +static int +pickschedq(Qtree *qt, int pollival, ulong bw, ulong limit) +{ + int i, j, d, upperb, q; + ulong best, worst, total; + + d = flog2lower(pollival); + if(d > qt->depth) + d = qt->depth; + q = -1; + worst = 0; + best = ~0; + upperb = (1 << (d+1)) - 1; + for(i = (1 << d) - 1; i < upperb; i++){ + total = qt->bw[0]; + for(j = i; j > 0; j = (j - 1) / 2) + total += qt->bw[j]; + if(total < best){ + best = total; + q = i; + } + if(total > worst) + worst = total; + } + if(worst + bw >= limit) + return -1; + return q; +} + +static int +schedq(Ctlr *ctlr, Qio *io, int pollival) +{ + int q; + Ed *ted; + + q = pickschedq(ctlr->tree, pollival, io->bw, ~0); + ddqprint("ohci: sched %#p q %d, ival %d, bw %ld\n", io, q, pollival, io->bw); + if(q < 0){ + print("ohci: no room for ed\n"); + return -1; + } + ctlr->tree->bw[q] += io->bw; + ted = ctlr->tree->root[q]; + io->sched = q; + edlinked(io->ed, ted->next); + edlinked(ted, io->ed); + io->ed->inext = ctlr->intrhd; + ctlr->intrhd = io->ed; + return 0; +} + +static void +unschedq(Ctlr *ctlr, Qio *qio) +{ + int q; + Ed *prev, *this, *next; + Ed **l; + + q = qio->sched; + if(q < 0) + return; + ctlr->tree->bw[q] -= qio->bw; + + prev = ctlr->tree->root[q]; + this = prev->next; + while(this != nil && this != qio->ed){ + prev = this; + this = this->next; + } + if(this == nil) + print("ohci: unschedq %d: not found\n", q); + else{ + next = this->next; + edlinked(prev, next); + } + waitSOF(ctlr); + for(l = &ctlr->intrhd; *l != nil; l = &(*l)->inext) + if(*l == qio->ed){ + *l = (*l)->inext; + return; + } + print("ohci: unschedq: ed %#p not found\n", qio->ed); +} + +static char* +seprinttdtok(char *s, char *e, int tok) +{ + switch(tok){ + case Tdtoksetup: + s = seprint(s, e, " setup"); + break; + case Tdtokin: + s = seprint(s, e, " in"); + break; + case Tdtokout: + s = seprint(s, e, " out"); + break; + } + return s; +} + + +static char* +seprinttd(char *s, char *e, Td *td, int iso) +{ + int i; + Block *bp; + + if(td == nil) + return seprint(s, e, "\n"); + s = seprint(s, e, "%#p ep %#p ctrl %#.8ux", td, td->ep, td->ctrl); + s = seprint(s, e, " cc=%#ux", (td->ctrl >> Tdccshift) & Tdccmask); + if(iso == 0){ + if((td->ctrl & Tdround) != 0) + s = seprint(s, e, " rnd"); + s = seprinttdtok(s, e, td->ctrl & Tdtokmask); + if((td->ctrl & Tdusetog) != 0) + s = seprint(s, e, " d%d", (td->ctrl & Tddata1) ? 1 : 0); + else + s = seprint(s, e, " d-"); + s = seprint(s, e, " ec=%ud", (td->ctrl >> Tderrshift) & Tderrmask); + }else{ + s = seprint(s, e, " fc=%ud", (td->ctrl >> Tdfcshift) & Tdfcmask); + s = seprint(s, e, " sf=%ud", td->ctrl & Tdsfmask); + } + s = seprint(s, e, " cbp0 %#.8ux cbp %#.8ux next %#.8ux be %#.8ux %s", + td->cbp0, td->cbp, td->nexttd, td->be, td->last ? "last" : ""); + s = seprint(s, e, "\n\t\t%ld bytes", td->nbytes); + if((bp = td->bp) != nil){ + s = seprint(s, e, " rp %#p wp %#p ", bp->rp, bp->wp); + if(BLEN(bp) > 0) + s = seprintdata(s, e, bp->rp, bp->wp - bp->rp); + } + if(iso == 0) + return seprint(s, e, "\n"); + s = seprint(s, e, "\n\t\t"); + /* we use only offsets[0] */ + i = 0; + s = seprint(s, e, "[%d] %#ux cc=%#ux sz=%ud\n", i, td->offsets[i], + (td->offsets[i] >> Tdiccshift) & Tdiccmask, + td->offsets[i] & 0x7FF); + return s; +} + +static void +dumptd(Td *td, char *p, int iso) +{ + static char buf[512]; /* Too much */ + char *s; + + s = seprint(buf, buf+sizeof(buf), "%s: ", p); + s = seprinttd(s, buf+sizeof(buf), td, iso); + if(s > buf && s[-1] != '\n') + s[-1] = '\n'; + print("\t%s", buf); +} + +static void +dumptds(Td *td, char *p, int iso) +{ + int i; + + for(i = 0; td != nil; td = td->next){ + dumptd(td, p, iso); + if(td->last) + break; + if(tdtok(td) == Tdtokin && ++i > 2){ + print("\t\t...\n"); + break; + } + } +} + +static void +dumped(Ed *ed) +{ + char *buf, *s, *e; + + if(ed == nil){ + print("\n"); + return; + } + buf = malloc(512); + /* no waserror; may want to use from interrupt context */ + if(buf == nil) + return; + e = buf+512; + s = seprint(buf, e, "\ted %#p: ctrl %#.8ux", ed, ed->ctrl); + if((ed->ctrl & Edskip) != 0) + s = seprint(s, e, " skip"); + if((ed->ctrl & Ediso) != 0) + s = seprint(s, e, " iso"); + if((ed->ctrl & Edlow) != 0) + s = seprint(s, e, " low"); + s = seprint(s, e, " d%d", (ed->head & Edtoggle) ? 1 : 0); + if((ed->ctrl & Eddirmask) == Edin) + s = seprint(s, e, " in"); + if((ed->ctrl & Eddirmask) == Edout) + s = seprint(s, e, " out"); + if(edhalted(ed)) + s = seprint(s, e, " hlt"); + s = seprint(s, e, " ep%ud.%ud", (ed->ctrl>>7)&Epmax, ed->ctrl&0x7f); + s = seprint(s, e, " maxpkt %ud", (ed->ctrl>>Edmpsshift)&Edmpsmask); + seprint(s, e, " tail %#.8ux head %#.8ux next %#.8ux\n", ed->tail, ed->head, ed->nexted); + print("%s", buf); + free(buf); + if(ed->tds != nil && (ed->ctrl & Ediso) == 0) + dumptds(ed->tds, "td", 0); +} + +static char* +seprintio(char *s, char *e, Qio *io, char *pref) +{ + s = seprint(s, e, "%s qio %#p ed %#p", pref, io, io->ed); + s = seprint(s, e, " tog %d iot %ld err %s id %#ulx", + io->toggle, io->iotime, io->err, io->usbid); + s = seprinttdtok(s, e, io->tok); + s = seprint(s, e, " %s\n", iosname[io->state]); + return s; +} + +static char* +seprintep(char* s, char* e, Ep *ep) +{ + Isoio *iso; + Qio *io; + Ctlio *cio; + + if(ep == nil) + return seprint(s, e, "\n"); + if(ep->aux == nil) + return seprint(s, e, "no mdep\n"); + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + s = seprintio(s, e, cio, "c"); + s = seprint(s, e, "\trepl %d ndata %d\n", ep->rhrepl, cio->ndata); + break; + case Tbulk: + case Tintr: + io = ep->aux; + if(ep->mode != OWRITE) + s = seprintio(s, e, &io[OREAD], "r"); + if(ep->mode != OREAD) + s = seprintio(s, e, &io[OWRITE], "w"); + break; + case Tiso: + iso = ep->aux; + s = seprintio(s, e, iso, "w"); + s = seprint(s, e, "\tntds %d avail %d frno %ud left %ud next avail %#p\n", + iso->nframes, iso->navail, iso->frno, iso->left, iso->atds); + break; + } + return s; +} + +static char* +seprintctl(char *s, char *se, ulong ctl) +{ + s = seprint(s, se, "en="); + if((ctl&Cple) != 0) + s = seprint(s, se, "p"); + if((ctl&Cie) != 0) + s = seprint(s, se, "i"); + if((ctl&Ccle) != 0) + s = seprint(s, se, "c"); + if((ctl&Cble) != 0) + s = seprint(s, se, "b"); + switch(ctl & Cfsmask){ + case Cfsreset: + return seprint(s, se, " reset"); + case Cfsresume: + return seprint(s, se, " resume"); + case Cfsoper: + return seprint(s, se, " run"); + case Cfssuspend: + return seprint(s, se, " suspend"); + default: + return seprint(s, se, " ???"); + } +} + +static void +dump(Hci *hp) +{ + Ctlr *ctlr; + Ed *ed; + char cs[20]; + + ctlr = hp->aux; + ilock(ctlr); + seprintctl(cs, cs+sizeof(cs), ctlr->ohci->control); + print("ohci ctlr %#p: frno %#ux ctl %#ux %s sts %#ux intr %#ux\n", + ctlr, ctlr->hcca->framenumber, ctlr->ohci->control, cs, + ctlr->ohci->cmdsts, ctlr->ohci->intrsts); + print("ctlhd %#ux cur %#ux bulkhd %#ux cur %#ux done %#ux\n", + ctlr->ohci->ctlheaded, ctlr->ohci->ctlcurred, + ctlr->ohci->bulkheaded, ctlr->ohci->bulkcurred, + ctlr->ohci->donehead); + if(ctlhd(ctlr) != nil) + print("[ctl]\n"); + for(ed = ctlhd(ctlr); ed != nil; ed = ed->next) + dumped(ed); + if(bulkhd(ctlr) != nil) + print("[bulk]\n"); + for(ed = bulkhd(ctlr); ed != nil; ed = ed->next) + dumped(ed); + if(ctlr->intrhd != nil) + print("[intr]\n"); + for(ed = ctlr->intrhd; ed != nil; ed = ed->inext) + dumped(ed); + if(ctlr->tree->root[0]->next != nil) + print("[iso]"); + for(ed = ctlr->tree->root[0]->next; ed != nil; ed = ed->next) + dumped(ed); + print("%d eds in tree\n", ctlr->ntree); + iunlock(ctlr); + lock(&tdpool); + print("%d tds allocated = %d in use + %d free\n", + tdpool.nalloc, tdpool.ninuse, tdpool.nfree); + unlock(&tdpool); + lock(&edpool); + print("%d eds allocated = %d in use + %d free\n", + edpool.nalloc, edpool.ninuse, edpool.nfree); + unlock(&edpool); +} + +/* + * Compute size for the next iso Td and setup its + * descriptor for I/O according to the buffer size. + */ +static void +isodtdinit(Ep *ep, Isoio *iso, Td *td) +{ + Block *bp; + long size; + int i; + + bp = td->bp; + assert(bp != nil && BLEN(bp) == 0); + size = (ep->hz+iso->left) * ep->pollival / 1000; + iso->left = (ep->hz+iso->left) * ep->pollival % 1000; + size *= ep->samplesz; + if(size > ep->maxpkt){ + print("ohci: ep%d.%d: size > maxpkt\n", + ep->dev->nb, ep->nb); + print("size = %uld max = %ld\n", size, ep->maxpkt); + size = ep->maxpkt; + } + td->nbytes = size; + memset(bp->wp, 0, size); /* in case we don't fill it on time */ + td->cbp0 = td->cbp = ptr2pa(bp->rp) & ~0xFFF; + td->ctrl = TRUNC(iso->frno, Ntdframes); + td->offsets[0] = (ptr2pa(bp->rp) & 0xFFF); + td->offsets[0] |= (Tdnotacc << Tdiccshift); + /* in case the controller checks out the offests... */ + for(i = 1; i < nelem(td->offsets); i++) + td->offsets[i] = td->offsets[0]; + td->be = ptr2pa(bp->rp + size - 1); + td->ctrl |= (0 << Tdfcshift); /* frame count is 1 */ + + iso->frno = TRUNC(iso->frno + ep->pollival, Ntdframes); +} + +/* + * start I/O on the dummy td and setup a new dummy to fill up. + */ +static void +isoadvance(Ep *ep, Isoio *iso, Td *td) +{ + Td *dtd; + + dtd = iso->atds; + iso->atds = dtd->anext; + iso->navail--; + dtd->anext = nil; + dtd->bp->wp = dtd->bp->rp; + dtd->nexttd = 0; + td->nexttd = ptr2pa(dtd); + isodtdinit(ep, iso, dtd); + iso->ed->tail = ptr2pa(dtd); +} + +static int +isocanwrite(void *a) +{ + Isoio *iso; + + iso = a; + return iso->state == Qclose || iso->err != nil || + iso->navail > iso->nframes / 2; +} + +/* + * Service a completed/failed Td from the done queue. + * It may be of any transfer type. + * The queue is not in completion order. + * (It's actually in reverse completion order). + * + * When an error, a short packet, or a last Td is found + * we awake the process waiting for the transfer. + * Although later we will process other Tds completed + * before, epio won't be able to touch the current Td + * until interrupt returns and releases the lock on the + * controller. + */ +static void +qhinterrupt(Ctlr *, Ep *ep, Qio *io, Td *td, int) +{ + Block *bp; + int mode, err; + Ed *ed; + + ed = io->ed; + if(io->state != Qrun) + return; + if(tdtok(td) == Tdtokin) + mode = OREAD; + else + mode = OWRITE; + bp = td->bp; + err = tderrs(td); + + switch(err){ + case Tddataovr: /* Overrun is not an error */ + break; + case Tdok: + /* virtualbox doesn't always report underflow on short packets */ + if(td->cbp == 0) + break; + /* fall through */ + case Tddataund: + /* short input packets are ok */ + if(mode == OREAD){ + if(td->cbp == 0) + panic("ohci: short packet but cbp == 0"); + /* + * td->cbp and td->cbp0 are the real addresses + * corresponding to virtual addresses bp->wp and + * bp->rp respectively. + */ + bp->wp = bp->rp + (td->cbp - td->cbp0); + if(bp->wp < bp->rp) + panic("ohci: wp < rp"); + /* + * It's ok. clear error and flag as last in xfer. + * epio must ignore following Tds. + */ + td->last = 1; + td->ctrl &= ~(Tdccmask << Tdccshift); + break; + } + /* else fall; it's an error */ + case Tdcrc: + case Tdbitstuff: + case Tdbadtog: + case Tdstalled: + case Tdtmout: + case Tdpidchk: + case Tdbadpid: + bp->wp = bp->rp; /* no bytes in xfer. */ + io->err = errmsg(err); + if(debug || ep->debug){ + print("tdinterrupt: failed err %d (%s)\n", err, io->err); + dumptd(td, "failed", ed->ctrl & Ediso); + } + td->last = 1; + break; + default: + panic("ohci: td cc %ud unknown", err); + } + + if(td->last != 0){ + /* + * clear td list and halt flag. + */ + ed->head = (ed->head & Edtoggle) | ed->tail; + ed->tds = pa2ptr(ed->tail); + io->state = Qdone; + wakeup(io); + } +} + +/* + * BUG: Iso input streams are not implemented. + */ +static void +isointerrupt(Ctlr *ctlr, Ep *ep, Qio *io, Td *td, int) +{ + Isoio *iso; + Block *bp; + Ed *ed; + int err, isoerr; + + iso = ep->aux; + ed = io->ed; + if(io->state == Qclose) + return; + bp = td->bp; + /* + * When we get more than half the frames consecutive errors + * we signal an actual error. Errors in the entire Td are + * more serious and are always singaled. + * Errors like overrun are not really errors. In fact, for + * output, errors cannot be really detected. The driver will + * hopefully notice I/O errors on input endpoints and detach the device. + */ + err = tderrs(td); + isoerr = (td->offsets[0] >> Tdiccshift) & Tdiccmask; + if(isoerr == Tdok || isoerr == Tdnotacc) + iso->nerrs = 0; + else if(iso->nerrs++ > iso->nframes/2) + err = Tdstalled; + if(err != Tdok && err != Tddataovr){ + bp->wp = bp->rp; + io->err = errmsg(err); + if(debug || ep->debug){ + print("ohci: isointerrupt: ep%d.%d: err %d (%s) frnum %#ux\n", + ep->dev->nb, ep->nb, + err, errmsg(err), ctlr->ohci->fmnumber); + dumptd(td, "failed", ed->ctrl & Ediso); + } + } + td->bp->wp = td->bp->rp; + td->nbytes = 0; + td->anext = iso->atds; + iso->atds = td; + iso->navail++; + /* + * If almost all Tds are avail the user is not doing I/O at the + * required rate. We put another Td in place to keep the polling rate. + */ + if(iso->err == nil && iso->navail > iso->nframes - 10) + isoadvance(ep, iso, pa2ptr(iso->ed->tail)); + /* + * If there's enough buffering futher I/O can be done. + */ + if(isocanwrite(iso)) + wakeup(iso); +} + +static void +interrupt(Ureg *, void *arg) +{ + Td *td, *ntd, *td0; + Hci *hp; + Ctlr *ctlr; + u32int status, curred; + int i, frno; + + hp = arg; + ctlr = hp->aux; + ilock(ctlr); + status = ctlr->ohci->intrsts; + status &= ctlr->ohci->intrenable; + status &= Oc|Rhsc|Fno|Ue|Rd|Sf|Wdh|So; + frno = TRUNC(ctlr->ohci->fmnumber, Ntdframes); + if((status & Wdh) != 0){ + /* lsb of donehead has bit to flag other intrs. */ + td = pa2ptr(ctlr->hcca->donehead & ~0xF); + }else + td = nil; + td0 = td; + + for(i = 0; td != nil && i < 1024; i++){ + if(0)ddprint("ohci tdinterrupt: td %#p\n", td); + ntd = pa2ptr(td->nexttd & ~0xF); + td->nexttd = 0; + if(td->ep == nil || td->io == nil) + panic("ohci: interrupt: ep %#p io %#p", td->ep, td->io); + ohciinterrupts[td->ep->ttype]++; + if(td->ep->ttype == Tiso) + isointerrupt(ctlr, td->ep, td->io, td, frno); + else + qhinterrupt(ctlr, td->ep, td->io, td, frno); + td = ntd; + } + if(i == 1024) + print("ohci: bug: more than 1024 done Tds?\n"); + + if(pa2ptr(ctlr->hcca->donehead & ~0xF) != td0) + print("ohci: bug: donehead changed before ack\n"); + ctlr->hcca->donehead = 0; + + ctlr->ohci->intrsts = status; + status &= ~Wdh; + status &= ~Sf; + if(status & So){ + print("ohci: sched overrun: too much load\n"); + ctlr->overrun++; + status &= ~So; + } + if((status & Ue) != 0){ + curred = ctlr->ohci->periodcurred; + print("ohci: unrecoverable error frame %#.8ux ed %#.8ux, " + "ints %d %d %d %d\n", + ctlr->ohci->fmnumber, curred, + ohciinterrupts[Tctl], ohciinterrupts[Tintr], + ohciinterrupts[Tbulk], ohciinterrupts[Tiso]); + if(curred != 0) + dumped(pa2ptr(curred)); + status &= ~Ue; + } + if(status != 0) + print("ohci interrupt: unhandled sts %#.8ux\n", status); + iunlock(ctlr); +} + +/* + * The old dummy Td is used to implement the new Td. + * A new dummy is linked at the end of the old one and + * returned, to link further Tds if needed. + */ +static Td* +epgettd(Ep *ep, Qio *io, Td **dtdp, int flags, void *a, int count) +{ + Td *td, *dtd; + Block *bp; + + if(count <= PGSZ) + bp = allocb(count); + else{ + if(count > 2*PGSZ) + panic("ohci: transfer > two pages"); + /* maximum of one physical page crossing allowed */ + bp = allocb(count+PGSZ); + bp->rp = (uchar*)ROUNDUP((uintptr)bp->rp, PGSZ); + bp->wp = bp->rp; + } + dtd = *dtdp; + td = dtd; + td->bp = bp; + if(count > 0){ + td->cbp0 = td->cbp = ptr2pa(bp->wp); + td->be = ptr2pa(bp->wp + count - 1); + if(a != nil){ + /* validaddr((uintptr)a, count, 0); DEBUG */ + memmove(bp->wp, a, count); + } + bp->wp += count; + } + td->nbytes = count; + td->ctrl = io->tok|Tdusetog|io->toggle|flags; + if(io->toggle == Tddata0) + io->toggle = Tddata1; + else + io->toggle = Tddata0; + assert(td->ep == ep); + td->io = io; + dtd = tdalloc(); /* new dummy */ + dtd->ep = ep; + td->nexttd = ptr2pa(dtd); + td->next = dtd; + *dtdp = dtd; + return td; +} + +/* + * Try to get them idle + */ +static void +aborttds(Qio *io) +{ + Ed *ed; + Td *td; + + ed = io->ed; + if(ed == nil) + return; + ed->ctrl |= Edskip; + for(td = ed->tds; td != nil; td = td->next) + if(td->bp != nil) + td->bp->wp = td->bp->rp; + ed->head = (ed->head&0xF) | ed->tail; + if((ed->ctrl & Ediso) == 0) + ed->tds = pa2ptr(ed->tail); +} + +static int +epiodone(void *a) +{ + Qio *io; + + io = a; + return io->state != Qrun; +} + +static void +epiowait(Ctlr *ctlr, Qio *io, int tmout, ulong) +{ + Ed *ed; + int timedout; + + ed = io->ed; + if(0)ddqprint("ohci io %#p sleep on ed %#p state %s\n", + io, ed, iosname[io->state]); + timedout = 0; + if(waserror()){ + dqprint("ohci io %#p ed %#p timed out\n", io, ed); + timedout++; + }else{ + if(tmout == 0) + sleep(io, epiodone, io); + else + tsleep(io, epiodone, io, tmout); + poperror(); + } + ilock(ctlr); + if(io->state == Qrun) + timedout = 1; + else if(io->state != Qdone && io->state != Qclose) + panic("epio: ed not done and not closed"); + if(timedout){ + aborttds(io); + io->err = "request timed out"; + iunlock(ctlr); + if(!waserror()){ + tsleep(&up->sleep, return0, 0, Abortdelay); + poperror(); + } + ilock(ctlr); + } + if(io->state != Qclose) + io->state = Qidle; + iunlock(ctlr); +} + +/* + * Non iso I/O. + * To make it work for control transfers, the caller may + * lock the Qio for the entire control transfer. + */ +static long +epio(Ep *ep, Qio *io, void *a, long count, int mustlock) +{ + Ed *ed; + Ctlr *ctlr; + char buf[80]; + char *err; + uchar *c; + Td *td, *ltd, *ntd, *td0; + int last, ntds, tmout; + long tot, n; + ulong load; + + ed = io->ed; + ctlr = ep->hp->aux; + io->debug = ep->debug; + tmout = ep->tmout; + ddeprint("ohci: %s ep%d.%d io %#p count %ld\n", + io->tok == Tdtokin ? "in" : "out", + ep->dev->nb, ep->nb, io, count); + if((debug > 1 || ep->debug > 1) && io->tok != Tdtokin){ + seprintdata(buf, buf+sizeof(buf), a, count); + print("\t%s\n", buf); + } + if(mustlock){ + qlock(io); + if(waserror()){ + qunlock(io); + nexterror(); + } + } + io->err = nil; + ilock(ctlr); + if(io->state == Qclose){ /* Tds released by cancelio */ + iunlock(ctlr); + error(io->err ? io->err : Eio); + } + if(io->state != Qidle) + panic("epio: qio not idle"); + io->state = Qinstall; + + c = a; + ltd = td0 = ed->tds; + load = tot = 0; + do{ + n = 2*PGSZ; + if(count-tot < n) + n = count-tot; + if(c != nil && io->tok != Tdtokin) + td = epgettd(ep, io, <d, 0, c+tot, n); + else + td = epgettd(ep, io, <d, 0, nil, n); + tot += n; + load += ep->load; + }while(tot < count); + if(td0 == nil || ltd == nil || td0 == ltd) + panic("epio: no td"); + td->last = 1; + if(debug > 2 || ep->debug > 2) + dumptds(td0, "put td", ep->ttype == Tiso); + iunlock(ctlr); + + ilock(ctlr); + if(io->state != Qclose){ + io->iotime = TK2MS(sys->ticks); + io->state = Qrun; + ed->tail = ptr2pa(ltd); + if(ep->ttype == Tctl) + ctlr->ohci->cmdsts |= Sclf; + else if(ep->ttype == Tbulk) + ctlr->ohci->cmdsts |= Sblf; + } + iunlock(ctlr); + + epiowait(ctlr, io, tmout, load); + ilock(ctlr); + if(debug > 1 || ep->debug > 1) + dumptds(td0, "got td", 0); + iunlock(ctlr); + + tot = 0; + c = a; + ntds = last = 0; + for(td = td0; td != ltd; td = ntd){ + ntds++; + /* + * If the Td is flagged as last we must + * ignore any following Td. The block may + * seem to have bytes but interrupt has not seen + * those Tds through the done queue, and they are void. + */ + if(last == 0 && tderrs(td) == Tdok){ + n = BLEN(td->bp); + tot += n; + if(c != nil && tdtok(td) == Tdtokin && n > 0){ + memmove(c, td->bp->rp, n); + c += n; + } + } + last |= td->last; + ntd = td->next; + tdfree(td); + } + if(edtoggle(ed) == 0) + io->toggle = Tddata0; + else + io->toggle = Tddata1; + + err = io->err; + if(mustlock){ + qunlock(io); + poperror(); + } + ddeprint("ohci: io %#p: %d tds: return %ld err '%s'\n\n", + io, ntds, tot, err); + if(err != nil) + error(err); + if(tot < 0) + error(Eio); + return tot; +} + +/* + * halt condition was cleared on the endpoint. update our toggles. + */ +static void +clrhalt(Ep *ep) +{ + Qio *io; + + ep->clrhalt = 0; + switch(ep->ttype){ + case Tbulk: + case Tintr: + io = ep->aux; + if(ep->mode != OREAD){ + qlock(&io[OWRITE]); + io[OWRITE].toggle = Tddata0; + deprint("ep clrhalt for io %#p\n", io+OWRITE); + qunlock(&io[OWRITE]); + } + if(ep->mode != OWRITE){ + qlock(&io[OREAD]); + io[OREAD].toggle = Tddata0; + deprint("ep clrhalt for io %#p\n", io+OREAD); + qunlock(&io[OREAD]); + } + break; + } +} + +static long +epread(Ep *ep, void *a, long count) +{ + Ctlio *cio; + Qio *io; + char buf[80]; + ulong delta; + + if(ep->aux == nil) + panic("epread: not open"); + + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + qlock(cio); + if(waserror()){ + qunlock(cio); + nexterror(); + } + ddeprint("epread ctl ndata %d\n", cio->ndata); + if(cio->ndata < 0) + error("request expected"); + else if(cio->ndata == 0){ + cio->ndata = -1; + count = 0; + }else{ + if(count > cio->ndata) + count = cio->ndata; + if(count > 0) + memmove(a, cio->data, count); + /* BUG for big transfers */ + free(cio->data); + cio->data = nil; + cio->ndata = 0; /* signal EOF next time */ + } + qunlock(cio); + poperror(); + if(debug>1 || ep->debug){ + seprintdata(buf, buf+sizeof(buf), a, count); + print("epread: %s\n", buf); + } + return count; + case Tbulk: + io = ep->aux; + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OREAD], a, count, 1); + case Tintr: + io = ep->aux; + delta = TK2MS(sys->ticks) - io[OREAD].iotime + 1; + if(delta < ep->pollival / 2) + tsleep(&up->sleep, return0, 0, ep->pollival/2 - delta); + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OREAD], a, count, 1); + case Tiso: + panic("ohci: iso read not implemented"); + break; + default: + panic("epread: bad ep ttype %d", ep->ttype); + } + return -1; +} + +/* + * Control transfers are one setup write (data0) + * plus zero or more reads/writes (data1, data0, ...) + * plus a final write/read with data1 to ack. + * For both host to device and device to host we perform + * the entire transfer when the user writes the request, + * and keep any data read from the device for a later read. + * We call epio three times instead of placing all Tds at + * the same time because doing so leads to crc/tmout errors + * for some devices. + * Upon errors on the data phase we must still run the status + * phase or the device may cease responding in the future. + */ +static long +epctlio(Ep *ep, Ctlio *cio, void *a, long count) +{ + uchar *c; + long len; + + ddeprint("epctlio: cio %#p ep%d.%d count %ld\n", + cio, ep->dev->nb, ep->nb, count); + if(count < Rsetuplen) + error("short usb command"); + qlock(cio); + free(cio->data); + cio->data = nil; + cio->ndata = 0; + if(waserror()){ + qunlock(cio); + free(cio->data); + cio->data = nil; + cio->ndata = 0; + nexterror(); + } + + /* set the address if unset and out of configuration state */ + if(ep->dev->state != Dconfig && ep->dev->state != Dreset) + if(cio->usbid == 0){ + cio->usbid = (ep->nb<<7)|(ep->dev->nb & Devmax); + edsetaddr(cio->ed, cio->usbid); + } + /* adjust maxpkt if the user has learned a different one */ + if(edmaxpkt(cio->ed) != ep->maxpkt) + edsetmaxpkt(cio->ed, ep->maxpkt); + c = a; + cio->tok = Tdtoksetup; + cio->toggle = Tddata0; + if(epio(ep, cio, a, Rsetuplen, 0) < Rsetuplen) + error(Eio); + + a = c + Rsetuplen; + count -= Rsetuplen; + + cio->toggle = Tddata1; + if(c[Rtype] & Rd2h){ + cio->tok = Tdtokin; + len = GET2(c+Rcount); + if(len <= 0) + error("bad length in d2h request"); + if(len > Maxctllen) + error("d2h data too large to fit in ohci"); + a = cio->data = smalloc(len+1); + }else{ + cio->tok = Tdtokout; + len = count; + } + if(len > 0) + if(waserror()) + len = -1; + else{ + len = epio(ep, cio, a, len, 0); + poperror(); + } + if(c[Rtype] & Rd2h){ + count = Rsetuplen; + cio->ndata = len; + cio->tok = Tdtokout; + }else{ + if(len < 0) + count = -1; + else + count = Rsetuplen + len; + cio->tok = Tdtokin; + } + cio->toggle = Tddata1; + epio(ep, cio, nil, 0, 0); + qunlock(cio); + poperror(); + ddeprint("epctlio cio %#p return %ld\n", cio, count); + return count; +} + +/* + * Put new samples in the dummy Td. + * BUG: This does only a transfer per Td. We could do up to 8. + */ +static long +putsamples(Ctlr *ctlr, Ep *ep, Isoio *iso, uchar *b, long count) +{ + Td *td; + ulong n; + + td = pa2ptr(iso->ed->tail); + n = count; + if(n > td->nbytes - BLEN(td->bp)) + n = td->nbytes - BLEN(td->bp); + assert(td->bp->wp + n <= td->bp->lim); + memmove(td->bp->wp, b, n); + td->bp->wp += n; + if(BLEN(td->bp) == td->nbytes){ /* full Td: activate it */ + ilock(ctlr); + isoadvance(ep, iso, td); + iunlock(ctlr); + } + return n; +} + +static long +episowrite(Ep *ep, void *a, long count) +{ + long tot, nw; + char *err; + uchar *b; + Ctlr *ctlr; + Isoio *iso; + + ctlr = ep->hp->aux; + iso = ep->aux; + iso->debug = ep->debug; + + qlock(iso); + if(waserror()){ + qunlock(iso); + nexterror(); + } + diprint("ohci: episowrite: %#p ep%d.%d\n", iso, ep->dev->nb, ep->nb); + ilock(ctlr); + if(iso->state == Qclose){ + iunlock(ctlr); + error(iso->err ? iso->err : Eio); + } + iso->state = Qrun; + b = a; + for(tot = 0; tot < count; tot += nw){ + while(isocanwrite(iso) == 0){ + iunlock(ctlr); + diprint("ohci: episowrite: %#p sleep\n", iso); + if(waserror()){ + if(iso->err == nil) + iso->err = "I/O timed out"; + ilock(ctlr); + break; + } + tsleep(iso, isocanwrite, iso, ep->tmout); + poperror(); + ilock(ctlr); + } + err = iso->err; + iso->err = nil; + if(iso->state == Qclose || err != nil){ + iunlock(ctlr); + error(err ? err : Eio); + } + if(iso->state != Qrun) + panic("episowrite: iso not running"); + iunlock(ctlr); /* We could page fault here */ + nw = putsamples(ctlr, ep, iso, b+tot, count-tot); + ilock(ctlr); + } + if(iso->state != Qclose) + iso->state = Qdone; + iunlock(ctlr); + err = iso->err; /* in case it failed early */ + iso->err = nil; + qunlock(iso); + poperror(); + if(err != nil) + error(err); + diprint("ohci: episowrite: %#p %ld bytes\n", iso, tot); + return tot; +} + +static long +epwrite(Ep *ep, void *a, long count) +{ + Qio *io; + Ctlio *cio; + ulong delta; + uchar *b; + long tot, nw; + + if(ep->aux == nil) + panic("ohci: epwrite: not open"); + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + return epctlio(ep, cio, a, count); + case Tbulk: + io = ep->aux; + if(ep->clrhalt) + clrhalt(ep); + /* + * Put at most Tdatomic Tds (512 bytes) at a time. + * Otherwise some devices produce babble errors. + */ + b = a; + assert(a != nil); + for(tot = 0; tot < count ; tot += nw){ + nw = count - tot; + if(nw > Tdatomic * ep->maxpkt) + nw = Tdatomic * ep->maxpkt; + nw = epio(ep, &io[OWRITE], b+tot, nw, 1); + } + return tot; + case Tintr: + io = ep->aux; + delta = TK2MS(sys->ticks) - io[OWRITE].iotime + 1; + if(delta < ep->pollival) + tsleep(&up->sleep, return0, 0, ep->pollival - delta); + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OWRITE], a, count, 1); + case Tiso: + return episowrite(ep, a, count); + default: + panic("ohci: epwrite: bad ep ttype %d", ep->ttype); + } + return -1; +} + +static Ed* +newed(Ctlr *ctlr, Ep *ep, Qio *io, char *) +{ + Ed *ed; + Td *td; + + ed = io->ed = edalloc(); /* no errors raised here, really */ + td = tdalloc(); + td->ep = ep; + td->io = io; + ed->tail = ptr2pa(td); + ed->head = ptr2pa(td); + ed->tds = td; + ed->ep = ep; + ed->ctrl = (ep->maxpkt & Edmpsmask) << Edmpsshift; + if(ep->ttype == Tiso) + ed->ctrl |= Ediso; + if(waserror()){ + edfree(ed); + io->ed = nil; + nexterror(); + } + /* For setup endpoints we start with the config address */ + if(ep->ttype != Tctl) + edsetaddr(io->ed, io->usbid); + if(ep->dev->speed == Lowspeed) + ed->ctrl |= Edlow; + switch(io->tok){ + case Tdtokin: + ed->ctrl |= Edin; + break; + case Tdtokout: + ed->ctrl |= Edout; + break; + default: + ed->ctrl |= Edtddir; /* Td will say */ + break; + } + + switch(ep->ttype){ + case Tctl: + ilock(ctlr); + edlinked(ed, ctlhd(ctlr)); + setctlhd(ctlr, ed); + iunlock(ctlr); + break; + case Tbulk: + ilock(ctlr); + edlinked(ed, bulkhd(ctlr)); + setbulkhd(ctlr, ed); + iunlock(ctlr); + break; + case Tintr: + case Tiso: + ilock(ctlr); + schedq(ctlr, io, ep->pollival); + iunlock(ctlr); + break; + default: + panic("ohci: newed: bad ttype"); + } + poperror(); + return ed; +} + +static void +isoopen(Ctlr *ctlr, Ep *ep) +{ + Td *td, *edtds; + Isoio *iso; + int i; + + iso = ep->aux; + iso->usbid = (ep->nb<<7)|(ep->dev->nb & Devmax); + iso->bw = ep->hz * ep->samplesz; /* bytes/sec */ + if(ep->mode != OWRITE){ + print("ohci: bug: iso input streams not implemented\n"); + error("ohci iso input streams not implemented"); + }else + iso->tok = Tdtokout; + + iso->left = 0; + iso->nerrs = 0; + iso->frno = TRUNC(ctlr->ohci->fmnumber + 10, Ntdframes); + iso->nframes = 1000 / ep->pollival; + if(iso->nframes < 10){ + print("ohci: isoopen: less than 10 frames; using 10.\n"); + iso->nframes = 10; + } + iso->navail = iso->nframes; + iso->atds = edtds = nil; + for(i = 0; i < iso->nframes-1; i++){ /* -1 for dummy */ + td = tdalloc(); + td->ep = ep; + td->io = iso; + td->bp = allocb(ep->maxpkt); + td->anext = iso->atds; /* link as avail */ + iso->atds = td; + td->next = edtds; + edtds = td; + } + newed(ctlr, ep, iso, "iso"); /* allocates a dummy td */ + iso->ed->tds->bp = allocb(ep->maxpkt); /* but not its block */ + iso->ed->tds->next = edtds; + isodtdinit(ep, iso, iso->ed->tds); +} + +/* + * Allocate the endpoint and set it up for I/O + * in the controller. This must follow what's said + * in Ep regarding configuration, including perhaps + * the saved toggles (saved on a previous close of + * the endpoint data file by epclose). + */ +static void +epopen(Ep *ep) +{ + Ctlr *ctlr; + Qio *io; + Ctlio *cio; + u32int usbid; + + ctlr = ep->hp->aux; + deprint("ohci: epopen ep%d.%d\n", ep->dev->nb, ep->nb); + if(ep->aux != nil) + panic("ohci: epopen called with open ep"); + if(waserror()){ + free(ep->aux); + ep->aux = nil; + nexterror(); + } + switch(ep->ttype){ + case Tnone: + error("endpoint not configured"); + case Tiso: + ep->aux = smalloc(sizeof(Isoio)); + isoopen(ctlr, ep); + break; + case Tctl: + cio = ep->aux = smalloc(sizeof(Ctlio)); + cio->debug = ep->debug; + cio->ndata = -1; + cio->data = nil; + cio->tok = -1; /* invalid; Tds will say */ + if(ep->dev->isroot != 0 && ep->nb == 0) /* root hub */ + break; + newed(ctlr, ep, cio, "epc"); + break; + case Tbulk: + ep->pollival = 1; /* assume this; doesn't really matter */ + /* and fall... */ + case Tintr: + io = ep->aux = smalloc(sizeof(Qio)*2); + io[OREAD].debug = io[OWRITE].debug = ep->debug; + usbid = (ep->nb<<7)|(ep->dev->nb & Devmax); + if(ep->mode != OREAD){ + if(ep->toggle[OWRITE] != 0) + io[OWRITE].toggle = Tddata1; + else + io[OWRITE].toggle = Tddata0; + io[OWRITE].tok = Tdtokout; + io[OWRITE].usbid = usbid; + io[OWRITE].bw = ep->maxpkt*1000/ep->pollival; /* bytes/s */ + newed(ctlr, ep, io+OWRITE, "epw"); + } + if(ep->mode != OWRITE){ + if(ep->toggle[OREAD] != 0) + io[OREAD].toggle = Tddata1; + else + io[OREAD].toggle = Tddata0; + io[OREAD].tok = Tdtokin; + io[OREAD].usbid = usbid; + io[OREAD].bw = ep->maxpkt*1000/ep->pollival; /* bytes/s */ + newed(ctlr, ep, io+OREAD, "epr"); + } + break; + } + deprint("ohci: epopen done:\n"); + if(debug || ep->debug) + dump(ep->hp); + poperror(); +} + +static void +cancelio(Ep *ep, Qio *io) +{ + Ed *ed; + Ctlr *ctlr; + + ctlr = ep->hp->aux; + + ilock(ctlr); + if(io == nil || io->state == Qclose){ + assert(io == nil || io->ed == nil); + iunlock(ctlr); + return; + } + ed = io->ed; + io->state = Qclose; + io->err = Eio; + aborttds(io); + iunlock(ctlr); + if(!waserror()){ + tsleep(&up->sleep, return0, 0, Abortdelay); + poperror(); + } + + wakeup(io); + qlock(io); + /* wait for epio if running */ + qunlock(io); + + ilock(ctlr); + switch(ep->ttype){ + case Tctl: + unlinkctl(ctlr, ed); + break; + case Tbulk: + unlinkbulk(ctlr, ed); + break; + case Tintr: + case Tiso: + unschedq(ctlr, io); + break; + default: + panic("ohci cancelio: bad ttype"); + } + iunlock(ctlr); + edfree(io->ed); + io->ed = nil; +} + +static void +epclose(Ep *ep) +{ + Ctlio *cio; + Isoio *iso; + Qio *io; + + deprint("ohci: epclose ep%d.%d\n", ep->dev->nb, ep->nb); + if(ep->aux == nil) + panic("ohci: epclose called with closed ep"); + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + cancelio(ep, cio); + free(cio->data); + cio->data = nil; + break; + case Tbulk: + case Tintr: + io = ep->aux; + if(ep->mode != OWRITE){ + cancelio(ep, &io[OREAD]); + if(io[OREAD].toggle == Tddata1) + ep->toggle[OREAD] = 1; + } + if(ep->mode != OREAD){ + cancelio(ep, &io[OWRITE]); + if(io[OWRITE].toggle == Tddata1) + ep->toggle[OWRITE] = 1; + } + break; + case Tiso: + iso = ep->aux; + cancelio(ep, iso); + break; + default: + panic("epclose: bad ttype %d", ep->ttype); + } + + deprint("ohci: epclose ep%d.%d: done\n", ep->dev->nb, ep->nb); + free(ep->aux); + ep->aux = nil; +} + +static int +portreset(Hci *hp, int port, int on) +{ + Ctlr *ctlr; + Ohci *ohci; + + if(on == 0) + return 0; + + ctlr = hp->aux; + qlock(&ctlr->resetl); + if(waserror()){ + qunlock(&ctlr->resetl); + nexterror(); + } + ilock(ctlr); + ohci = ctlr->ohci; + ohci->rhportsts[port - 1] = Spp; + if((ohci->rhportsts[port - 1] & Ccs) == 0){ + iunlock(ctlr); + error("port not connected"); + } + ohci->rhportsts[port - 1] = Spr; + while((ohci->rhportsts[port - 1] & Prsc) == 0){ + iunlock(ctlr); + dprint("ohci: portreset, wait for reset complete\n"); + ilock(ctlr); + } + ohci->rhportsts[port - 1] = Prsc; + iunlock(ctlr); + poperror(); + qunlock(&ctlr->resetl); + return 0; +} + +static int +portenable(Hci *hp, int port, int on) +{ + Ctlr *ctlr; + + ctlr = hp->aux; + dprint("ohci: %#p port %d enable=%d\n", ctlr->ohci, port, on); + qlock(&ctlr->resetl); + if(waserror()){ + qunlock(&ctlr->resetl); + nexterror(); + } + ilock(ctlr); + if(on) + ctlr->ohci->rhportsts[port - 1] = Spe | Spp; + else + ctlr->ohci->rhportsts[port - 1] = Cpe; + iunlock(ctlr); + tsleep(&up->sleep, return0, 0, Enabledelay); + poperror(); + qunlock(&ctlr->resetl); + return 0; +} + +static int +portstatus(Hci *hp, int port) +{ + int v; + Ctlr *ub; + u32int ohcistatus; + + /* + * We must return status bits as a + * get port status hub request would do. + */ + ub = hp->aux; + ohcistatus = ub->ohci->rhportsts[port - 1]; + v = 0; + if(ohcistatus & Ccs) + v |= HPpresent; + if(ohcistatus & Pes) + v |= HPenable; + if(ohcistatus & Pss) + v |= HPsuspend; + if(ohcistatus & Prs) + v |= HPreset; + else { + /* port is not in reset; these potential writes are ok */ + if(ohcistatus & Csc){ + v |= HPstatuschg; + ub->ohci->rhportsts[port - 1] = Csc; + } + if(ohcistatus & Pesc){ + v |= HPchange; + ub->ohci->rhportsts[port - 1] = Pesc; + } + } + if(ohcistatus & Lsda) + v |= HPslow; + if(v & (HPstatuschg|HPchange)) + ddprint("ohci port %d sts %#ux hub sts %#x\n", port, ohcistatus, v); + return v; +} + +static void +dumpohci(Ctlr *ctlr) +{ + int i; + u32int *ohci; + + ohci = &ctlr->ohci->revision; + print("ohci registers: \n"); + for(i = 0; i < sizeof(Ohci)/sizeof *ohci; i++) + if(i < 3 || ohci[i] != 0) + print("\t[%#2.2x]\t%#8.8ux\n", i * 4, ohci[i]); + print("\n"); +} + +static void +init(Hci *hp) +{ + Ctlr *ctlr; + Ohci *ohci; + int i; + u32int ival, ctrl, fmi; + + ctlr = hp->aux; + dprint("ohci %#p init\n", ctlr->ohci); + ohci = ctlr->ohci; + + fmi = ctlr->ohci->fminterval; + ctlr->ohci->cmdsts = Shcr; /* reset the block */ + while(ctlr->ohci->cmdsts & Shcr) + delay(1); /* wait till reset complete, Ohci says 10us max. */ + ctlr->ohci->fminterval = fmi; + + /* + * now that soft reset is done we are in suspend state. + * Setup registers which take in suspend state + * (will only be here for 2ms). + */ + + ctlr->ohci->hcca = ptr2pa(ctlr->hcca); + setctlhd(ctlr, nil); + ctlr->ohci->ctlcurred = 0; + setbulkhd(ctlr, nil); + ctlr->ohci->bulkcurred = 0; + + ohci->intrenable = Mie | Wdh | Ue; + ohci->control |= Ccle | Cble | Cple | Cie | Cfsoper; + + /* set frame after operational */ + ohci->rhdesca = Nps; /* no power switching */ + if(ohci->rhdesca & Nps){ + dprint("ohci: ports are not power switched\n"); + }else{ + dprint("ohci: ports are power switched\n"); + ohci->rhdesca &= ~Psm; + ohci->rhsts &= ~Lpsc; + } + for(i = 0; i < ctlr->nports; i++) /* paranoia */ + ohci->rhportsts[i] = 0; /* this has no effect */ + delay(50); + + for(i = 0; i < ctlr->nports; i++){ + ohci->rhportsts[i] = Spp; + if((ohci->rhportsts[i] & Ccs) != 0) + ohci->rhportsts[i] |= Spr; + } + delay(100); + + ctrl = ohci->control; + if((ctrl & Cfsmask) != Cfsoper){ + ctrl = (ctrl & ~Cfsmask) | Cfsoper; + ohci->control = ctrl; + ohci->rhsts = Lpsc; + } + ival = ohci->fminterval & ~(Fmaxpktmask << Fmaxpktshift); + ohci->fminterval = ival | (5120 << Fmaxpktshift); + + if(debug > 1) + dumpohci(ctlr); +} + +static void +scanpci(void) +{ + uintmem pa; + void *va; + Ctlr *ctlr; + Pcidev *p; + int i; + static int already = 0; + + if(already) + return; + already = 1; + i = 0; + for(p = nil; p = pcimatch(p, 0, 0); ) { + /* + * Find Ohci controllers (Programming Interface = 0x10). + */ + if(p->ccrb != 0xc || p->ccru != 3 || p->ccrp != 0x10) + continue; + pa = p->mem[0].bar & ~0x0F; + if(p->intl == 0xFF || p->intl == 0) { + print("usb: ohci: no irq assigned for port %#P\n", pa); + continue; + } + dprint("ohci: %.4ux/%.4ux port %#P size %#ux irq %d\n", + p->vid, p->did, pa, p->mem[0].size, p->intl); + if(i == Nhcis){ + print("ohci: %T ignored; increase Nhcis\n", p->tbdf); + continue; + } + va = vmap(pa, p->mem[0].size); + if(va == nil){ + print("ohci: failed to map registers\n"); + continue; + } + + ctlr = malloc(sizeof(Ctlr)); + ctlr->pcidev = p; + ctlr->ohci = va; + dprint("scanpci: ctlr %#p, ohci %#p\n", ctlr, ctlr->ohci); + pcisetbme(p); + pcisetpms(p, 0); + + ctlrs[i++] = ctlr; + } +} + +static void +usbdebug(Hci*, int d) +{ + debug = d; +} + +/* + * build the periodic scheduling tree: + * framesize must be a multiple of the tree size + */ +static void +mkqhtree(Ctlr *ctlr) +{ + int i, n, d, o, leaf0, depth; + Ed **tree; + Qtree *qt; + + depth = flog2(32); + n = (1 << (depth+1)) - 1; + qt = mallocz(sizeof(*qt), 1); + if(qt == nil) + panic("usb: can't allocate scheduling tree"); + qt->nel = n; + qt->depth = depth; + qt->bw = mallocz(n * sizeof(qt->bw), 1); + qt->root = tree = mallocz(n * sizeof(Ed *), 1); + if(qt->bw == nil || qt->root == nil) + panic("usb: can't allocate scheduling tree"); + for(i = 0; i < n; i++){ + if((tree[i] = edalloc()) == nil) + panic("mkqhtree"); + tree[i]->ctrl = (8 << Edmpsshift); /* not needed */ + tree[i]->ctrl |= Edskip; + + if(i > 0) + edlinked(tree[i], tree[(i-1)/2]); + else + edlinked(tree[i], nil); + } + ctlr->ntree = i; + dprint("ohci: tree: %d endpoints allocated\n", i); + + /* distribute leaves evenly round the frame list */ + leaf0 = n / 2; + for(i = 0; i < 32; i++){ + o = 0; + for(d = 0; d < depth; d++){ + o <<= 1; + if(i & (1 << d)) + o |= 1; + } + if(leaf0 + o >= n){ + print("leaf0=%d o=%d i=%d n=%d\n", leaf0, o, i, n); + break; + } + ctlr->hcca->intrtable[i] = ptr2pa(tree[leaf0 + o]); + } + ctlr->tree = qt; +} + +static void +ohcimeminit(Ctlr *ctlr) +{ + Hcca *hcca; + + edfree(edalloc()); /* allocate pools now */ + tdfree(tdalloc()); + + hcca = lomallocalign(sizeof(Hcca), 256); + if(hcca == nil) + panic("usbhreset: no memory for Hcca"); + ctlr->hcca = hcca; + + mkqhtree(ctlr); +} + +static void +ohcireset(Ctlr *ctlr) +{ + ilock(ctlr); + dprint("ohci %#p reset\n", ctlr->ohci); + + /* + * usually enter here in reset, wait till its through, + * then do our own so we are on known timing conditions. + * Is this needed? + */ + delay(100); + ctlr->ohci->control = 0; + delay(100); + + /* legacy support register: turn off lunacy mode */ + pcicfgw16(ctlr->pcidev, 0xc0, 0x2000); + + iunlock(ctlr); +} + +static void +shutdown(Hci *hp) +{ + Ctlr *ctlr; + + ctlr = hp->aux; + + ilock(ctlr); + ctlr->ohci->intrdisable = Mie | Wdh | Ue; + ctlr->ohci->control = 0; + delay(100); + iunlock(ctlr); +} + +static int +reset(Hci *hp) +{ + int i; + Ctlr *ctlr; + Pcidev *p; + static Lock resetlck; + + if(getconf("*nousbohci")) + return -1; + ilock(&resetlck); + scanpci(); + + /* + * Any adapter matches if no hp->port is supplied, + * otherwise the ports must match. + */ + ctlr = nil; + for(i = 0; i < Nhcis && ctlrs[i] != nil; i++){ + ctlr = ctlrs[i]; + if(ctlr->active == 0) + if(hp->port == 0 || hp->port == (uintptr)ctlr->ohci){ + ctlr->active = 1; + break; + } + } + iunlock(&resetlck); + if(ctlrs[i] == nil || i == Nhcis) + return -1; + if(ctlr->ohci->control == ~0) + return -1; + + + p = ctlr->pcidev; + hp->aux = ctlr; + hp->port = (uintptr)ctlr->ohci; + hp->irq = p->intl; + hp->tbdf = p->tbdf; + ctlr->nports = hp->nports = ctlr->ohci->rhdesca & 0xff; + + ohcireset(ctlr); + ohcimeminit(ctlr); + + /* + * Linkage to the generic HCI driver. + */ + hp->init = init; + hp->dump = dump; + hp->interrupt = interrupt; + hp->epopen = epopen; + hp->epclose = epclose; + hp->epread = epread; + hp->epwrite = epwrite; + hp->seprintep = seprintep; + hp->portenable = portenable; + hp->portreset = portreset; + hp->portstatus = portstatus; + hp->shutdown = shutdown; + hp->debug = usbdebug; + hp->type = "ohci"; + return 0; +} + +void +usbohcilink(void) +{ + addhcitype("ohci", reset); +} diff -Nru /sys/src/9k/k10/usbuhci.c /sys/src/9k/k10/usbuhci.c --- /sys/src/9k/k10/usbuhci.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/usbuhci.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,2321 @@ +/* + * USB Universal Host Controller Interface (sic) driver. + * + * The device does not support 64-bit addressing. + * + * BUGS: + * - Too many delays and ilocks. + * - bandwidth admission control must be done per-frame. + * - interrupt endpoints should go on a tree like [oe]hci. + * - must warn of power overruns. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/usb.h" + +typedef struct Ctlio Ctlio; +typedef struct Ctlr Ctlr; +typedef struct Isoio Isoio; +typedef struct Qh Qh; +typedef struct Qhpool Qhpool; +typedef struct Qio Qio; +typedef struct Td Td; +typedef struct Tdpool Tdpool; + +enum +{ + Resetdelay = 100, /* delay after a controller reset (ms) */ + Enabledelay = 100, /* waiting for a port to enable */ + Abortdelay = 5, /* delay after cancelling Tds (ms) */ + Incr = 64, /* for Td and Qh pools */ + + Tdatomic = 8, /* max nb. of Tds per bulk I/O op. */ + + /* Queue states (software) */ + Qidle = 0, + Qinstall, + Qrun, + Qdone, + Qclose, + Qfree, + + /* + * HW constants + */ + + Nframes = 1024, /* 2ⁿ for xspanalloc; max 1024 */ + Align = 16, /* for data structures */ + + /* Size of small buffer kept within Tds. (software) */ + /* Keep as a multiple of Align to maintain alignment of Tds in pool */ + Tdndata = 1*Align, + + /* i/o space + * Some ports are short, some are long, some are byte. + * We use ins[bsl] and not vmap. + */ + Cmd = 0, + Crun = 0x01, + Chcreset = 0x02, /* host controller reset */ + Cgreset = 0x04, /* global reset */ + Cegsm = 0x08, /* enter global suspend */ + Cfgr = 0x10, /* forge global resume */ + Cdbg = 0x20, /* single step, debug */ + Cmaxp = 0x80, /* max packet */ + + Status = 2, + Susbintr = 0x01, /* interrupt */ + Seintr = 0x02, /* error interrupt */ + Sresume = 0x04, /* resume detect */ + Shserr = 0x08, /* host system error */ + Shcerr = 0x10, /* host controller error */ + Shalted = 0x20, /* controller halted */ + Sall = 0x3F, + + Usbintr = 4, + Itmout = 0x01, /* timeout or crc */ + Iresume = 0x02, /* resume interrupt enable */ + Ioc = 0x04, /* interrupt on complete */ + Ishort = 0x08, /* short packet interrupt */ + Iall = 0x0F, + Frnum = 6, + Flbaseadd = 8, + SOFmod = 0xC, /* start of frame modifier register */ + + Portsc0 = 0x10, + PSpresent = 0x0001, /* device present */ + PSstatuschg = 0x0002, /* PSpresent changed */ + PSenable = 0x0004, /* device enabled */ + PSchange = 0x0008, /* PSenable changed */ + PSresume = 0x0040, /* resume detected */ + PSreserved1 = 0x0080, /* always read as 1; reserved */ + PSslow = 0x0100, /* device has low speed */ + PSreset = 0x0200, /* port reset */ + PSsuspend = 0x1000, /* port suspended */ + + /* Transfer descriptor link */ + Tdterm = 0x1, /* nil (terminate) */ + Tdlinkqh = 0x2, /* link refers to a QH */ + Tdvf = 0x4, /* run linked Tds first (depth-first)*/ + + /* Transfer status bits */ + Tdbitstuff = 0x00020000, /* bit stuffing error */ + Tdcrcto = 0x00040000, /* crc or timeout error */ + Tdnak = 0x00080000, /* nak packet received */ + Tdbabble = 0x00100000, /* babble detected */ + Tddberr = 0x00200000, /* data buf. error */ + Tdstalled = 0x00400000, /* serious error to ep. */ + Tdactive = 0x00800000, /* enabled/in use by hw */ + /* Transfer control bits */ + Tdioc = 0x01000000, /* interrupt on complete */ + Tdiso = 0x02000000, /* isochronous select */ + Tdlow = 0x04000000, /* low speed device */ + Tderr1 = 0x08000000, /* bit 0 of error counter */ + Tderr2 = 0x10000000, /* bit 1 of error counter */ + Tdspd = 0x20000000, /* short packet detect */ + + Tdlen = 0x000003FF, /* actual length field */ + + Tdfatalerr = Tdnak|Tdbabble|Tdstalled, /* hw retries others */ + Tderrors = Tdfatalerr|Tdbitstuff|Tdcrcto|Tddberr, + + /* Transfer descriptor token bits */ + Tddata0 = 0, + Tddata1 = 0x80000, /* data toggle (1==DATA1) */ + Tdtokin = 0x69, + Tdtokout = 0xE1, + Tdtoksetup = 0x2D, + + Tdmaxpkt = 0x800, /* max packet size */ + + /* Queue head bits */ + QHterm = 1<<0, /* nil (terminate) */ + QHlinkqh = 1<<1, /* link refers to a QH */ + QHvf = 1<<2, /* vertical first (depth first) */ +}; + +struct Ctlr +{ + Lock; /* for ilock. qh lists and basic ctlr I/O */ + QLock portlck; /* for port resets/enable... */ + Pcidev* pcidev; + int active; + int port; /* I/O address */ + Qh* qhs; /* list of Qhs for this controller */ + Qh* qh[Tmax]; /* Dummy Qhs to insert Qhs after */ + Isoio* iso; /* list of active iso I/O */ + u32int* frames; /* frame list (used by hw) */ + ulong load; /* max load for a single frame */ + ulong isoload; /* max iso load for a single frame */ + int nintr; /* number of interrupts attended */ + int ntdintr; /* number of intrs. with something to do */ + int nqhintr; /* number of intrs. for Qhs */ + int nisointr; /* number of intrs. for iso transfers */ +}; + +struct Qio +{ + QLock; /* for the entire I/O process */ + Rendez; /* wait for completion */ + Qh* qh; /* Td list (field const after init) */ + int usbid; /* usb address for endpoint/device */ + int toggle; /* Tddata0/Tddata1 */ + int tok; /* Tdtoksetup, Tdtokin, Tdtokout */ + ulong iotime; /* time of last I/O */ + int debug; /* debug flag from the endpoint */ + char* err; /* error string */ +}; + +struct Ctlio +{ + Qio; /* a single Qio for each RPC */ + uchar* data; /* read from last ctl req. */ + int ndata; /* number of bytes read */ +}; + +struct Isoio +{ + QLock; + Rendez; /* wait for space/completion/errors */ + int usbid; /* address used for device/endpoint */ + int tok; /* Tdtokin or Tdtokout */ + int state; /* Qrun -> Qdone -> Qrun... -> Qclose */ + int nframes; /* Nframes/ep->pollival */ + uchar* data; /* iso data buffers if not embedded */ + int td0frno; /* frame number for first Td */ + Td* tdu; /* next td for user I/O in tdps */ + Td* tdi; /* next td processed by interrupt */ + char* err; /* error string */ + int nerrs; /* nb of consecutive I/O errors */ + long nleft; /* number of bytes left from last write */ + int debug; /* debug flag from the endpoint */ + Isoio* next; /* in list of active Isoios */ + Td* tdps[Nframes]; /* pointer to Td used for i-th frame or nil */ +}; + +struct Tdpool +{ + Lock; + Td* free; + int nalloc; + int ninuse; + int nfree; +}; + +struct Qhpool +{ + Lock; + Qh* free; + int nalloc; + int ninuse; + int nfree; +}; + +/* + * HW data structures + */ + +/* + * Queue header (known by hw). + * 16-byte aligned. first two words used by hw. + * They are taken from the pool upon endpoint opening and + * queued after the dummy queue header for the endpoint type + * in the controller. Actual I/O happens as Tds are linked into it. + * The driver does I/O in lock-step. + * The user builds a list of Tds and links it into the Qh, + * then the Qh goes from Qidle to Qrun and nobody touches it until + * it becomes Qdone at interrupt time. + * At that point the user collects the Tds and it goes Qidle. + * A premature cancel may set the state to Qclose and abort I/O. + * The Ctlr lock protects change of state for Qhs in use. + */ +struct Qh +{ + u32int link; /* link to next horiz. item (eg. Qh) */ + u32int elink; /* link to element (eg. Td; updated by hw) */ + + u32int state; /* Qidle -> Qinstall -> Qrun -> Qdone | Qclose */ + Qio* io; /* for this queue */ + + Qh* next; /* in active or free list */ + Td* tds; /* Td list in this Qh (initially, elink) */ + char* tag; /* debug and align, mostly */ + ulong align; +}; + +/* + * Transfer descriptor. + * 16-byte aligned. first two words used by hw. Next 4 by sw. + * We keep an embedded buffer for small I/O transfers. + * They are taken from the pool when buffers are needed for I/O + * and linked at the Qh/Isoio for the endpoint and direction requiring it. + * The block keeps actual data. They are protected from races by + * the queue or the pool keeping it. The owner of the link to the Td + * is free to use it and can be the only one using it. + */ +struct Td +{ + u32int link; /* Link to next Td or Qh */ + u32int csw; /* control and status word (updated by hw) */ + u32int token; /* endpt, device, pid */ + u32int buffer; /* buffer pointer */ + + Td* next; /* in qh or Isoio or free list */ + u32int ndata; /* bytes available/used at data */ + uchar* data; /* pointer to actual data */ + void* buff; /* allocated data, for large transfers */ + + uchar sbuff[Tdndata]; /* embedded buffer, for small transfers */ +}; + +#define INB(x) inb(ctlr->port+(x)) +#define INS(x) ins(ctlr->port+(x)) +#define INL(x) inl(ctlr->port+(x)) +#define OUTB(x, v) outb(ctlr->port+(x), (v)) +#define OUTS(x, v) outs(ctlr->port+(x), (v)) +#define OUTL(x, v) outl(ctlr->port+(x), (v)) +#define TRUNC(x, sz) ((x) & ((sz)-1)) +#define PTR(q) ((void*)KADDR((ulong)(q) & ~ (0xF|PCIWINDOW))) +#define QPTR(q) ((Qh*)PTR(q)) +#define TPTR(q) ((Td*)PTR(q)) +#define PORT(p) (Portsc0 + 2*(p)) +#define diprint if(debug || iso->debug)print +#define ddiprint if(debug>1 || iso->debug>1)print +#define dqprint if(debug || (qh->io && qh->io->debug))print +#define ddqprint if(debug>1 || (qh->io && qh->io->debug>1))print + +static Ctlr* ctlrs[Nhcis]; + +static Tdpool tdpool; +static Qhpool qhpool; +static int debug; + +static char* qhsname[] = { "idle", "install", "run", "done", "close", "FREE" }; + +static void +uhcicmd(Ctlr *ctlr, int c) +{ + OUTS(Cmd, c); +} + +static void +uhcirun(Ctlr *ctlr, int on) +{ + int i; + + ddprint("uhci %#ux setting run to %d\n", ctlr->port, on); + + if(on) + uhcicmd(ctlr, INS(Cmd)|Crun); + else + uhcicmd(ctlr, INS(Cmd) & ~Crun); + for(i = 0; i < 100; i++) + if(on == 0 && (INS(Status) & Shalted) != 0) + break; + else if(on != 0 && (INS(Status) & Shalted) == 0) + break; + else + delay(1); + if(i == 100) + dprint("uhci %#x run cmd timed out\n", ctlr->port); + ddprint("uhci %#ux cmd %#ux sts %#ux\n", + ctlr->port, INS(Cmd), INS(Status)); +} + +static int +tdlen(Td *td) +{ + return (td->csw+1) & Tdlen; +} + +static int +maxtdlen(Td *td) +{ + return ((td->token>>21)+1) & (Tdmaxpkt-1); +} + +static int +tdtok(Td *td) +{ + return td->token & 0xFF; +} + +static char* +seprinttd(char *s, char *se, Td *td) +{ + s = seprint(s, se, "%#p link %#ux", td, td->link); + if((td->link & Tdvf) != 0) + s = seprint(s, se, "V"); + if((td->link & Tdterm) != 0) + s = seprint(s, se, "T"); + if((td->link & Tdlinkqh) != 0) + s = seprint(s, se, "Q"); + s = seprint(s, se, " csw %#ux ", td->csw); + if(td->csw & Tdactive) + s = seprint(s, se, "a"); + if(td->csw & Tdiso) + s = seprint(s, se, "I"); + if(td->csw & Tdioc) + s = seprint(s, se, "i"); + if(td->csw & Tdlow) + s = seprint(s, se, "l"); + if((td->csw & (Tderr1|Tderr2)) == 0) + s = seprint(s, se, "z"); + if(td->csw & Tderrors) + s = seprint(s, se, " err %#ux", td->csw & Tderrors); + if(td->csw & Tdstalled) + s = seprint(s, se, "s"); + if(td->csw & Tddberr) + s = seprint(s, se, "d"); + if(td->csw & Tdbabble) + s = seprint(s, se, "b"); + if(td->csw & Tdnak) + s = seprint(s, se, "n"); + if(td->csw & Tdcrcto) + s = seprint(s, se, "c"); + if(td->csw & Tdbitstuff) + s = seprint(s, se, "B"); + s = seprint(s, se, " stslen %d", tdlen(td)); + + s = seprint(s, se, " token %#ux", td->token); + if(td->token == 0) /* the BWS loopback Td, ignore rest */ + return s; + s = seprint(s, se, " maxlen %d", maxtdlen(td)); + if(td->token & Tddata1) + s = seprint(s, se, " d1"); + else + s = seprint(s, se, " d0"); + s = seprint(s, se, " id %#ux:", (td->token>>15) & Epmax); + s = seprint(s, se, "%#ux", (td->token>>8) & Devmax); + switch(tdtok(td)){ + case Tdtokin: + s = seprint(s, se, " in"); + break; + case Tdtokout: + s = seprint(s, se, " out"); + break; + case Tdtoksetup: + s = seprint(s, se, " setup"); + break; + default: + s = seprint(s, se, " BADPID"); + } + s = seprint(s, se, "\n\t buffer %#ux data %#p", td->buffer, td->data); + s = seprint(s, se, " ndata %ud sbuff %#p buff %#p", + td->ndata, td->sbuff, td->buff); + if(td->ndata > 0) + s = seprintdata(s, se, td->data, td->ndata); + return s; +} + +static void +isodump(Isoio *iso, int all) +{ + char buf[256]; + Td *td; + int i; + + print("iso %#p %s state %d nframes %d" + " td0 %#p tdu %#p tdi %#p data %#p\n", + iso, iso->tok == Tdtokin ? "in" : "out", + iso->state, iso->nframes, iso->tdps[iso->td0frno], + iso->tdu, iso->tdi, iso->data); + if(iso->err != nil) + print("\terr='%s'\n", iso->err); + if(all == 0){ + seprinttd(buf, buf+sizeof(buf), iso->tdu); + print("\ttdu %s\n", buf); + seprinttd(buf, buf+sizeof(buf), iso->tdi); + print("\ttdi %s\n", buf); + }else{ + td = iso->tdps[iso->td0frno]; + for(i = 0; i < iso->nframes; i++){ + seprinttd(buf, buf+sizeof(buf), td); + if(td == iso->tdi) + print("i->"); + if(td == iso->tdu) + print("u->"); + print("\t%s\n", buf); + td = td->next; + } + } +} + +static int +sameptr(void *p, ulong l) +{ + if(l & QHterm) + return p == nil; + return PTR(l) == p; +} + +static void +dumptd(Td *td, char *pref) +{ + char buf[256]; + char *s; + char *se; + int i; + + i = 0; + se = buf+sizeof(buf); + for(; td != nil; td = td->next){ + s = seprinttd(buf, se, td); + if(!sameptr(td->next, td->link)) + seprint(s, se, " next %#p != link %#ux %#p", + td->next, td->link, TPTR(td->link)); + print("%std %s\n", pref, buf); + if(i++ > 20){ + print("...more tds...\n"); + break; + } + } +} + +static void +qhdump(Qh *qh, char *pref) +{ + char buf[256]; + char *s; + char *se; + ulong td; + int i; + + s = buf; + se = buf+sizeof(buf); + s = seprint(s, se, "%sqh %s %#p state %s link %#ux", pref, + qh->tag, qh, qhsname[qh->state], qh->link); + if(!sameptr(qh->tds, qh->elink)) + s = seprint(s, se, " [tds %#p != elink %#ux %#p]", + qh->tds, qh->elink, TPTR(qh->elink)); + if(!sameptr(qh->next, qh->link)) + s = seprint(s, se, " [next %#p != link %#ux %#p]", + qh->next, qh->link, QPTR(qh->link)); + if((qh->link & Tdterm) != 0) + s = seprint(s, se, "T"); + if((qh->link & Tdlinkqh) != 0) + s = seprint(s, se, "Q"); + s = seprint(s, se, " elink %#ux", qh->elink); + if((qh->elink & Tdterm) != 0) + s = seprint(s, se, "T"); + if((qh->elink & Tdlinkqh) != 0) + s = seprint(s, se, "Q"); + s = seprint(s, se, " io %#p", qh->io); + if(qh->io != nil && qh->io->err != nil) + seprint(s, se, " err='%s'", qh->io->err); + print("%s\n", buf); + dumptd(qh->tds, "\t"); + if((qh->elink & QHterm) == 0){ + print("\thw tds:"); + i = 0; + for(td = qh->elink; (td & Tdterm) == 0; td = TPTR(td)->link){ + print(" %#ulx", td); + if(td == TPTR(td)->link) /* BWS Td */ + break; + if(i++ > 40){ + print("..."); + break; + } + } + print("\n"); + } +} + +static void +xdump(Ctlr *ctlr, int doilock) +{ + Isoio *iso; + Qh *qh; + int i; + + if(doilock){ + if(ctlr == ctlrs[0]){ + lock(&tdpool); + print("tds: alloc %d = inuse %d + free %d\n", + tdpool.nalloc, tdpool.ninuse, tdpool.nfree); + unlock(&tdpool); + lock(&qhpool); + print("qhs: alloc %d = inuse %d + free %d\n", + qhpool.nalloc, qhpool.ninuse, qhpool.nfree); + unlock(&qhpool); + } + ilock(ctlr); + } + print("uhci port %#x frames %#p nintr %d ntdintr %d", + ctlr->port, ctlr->frames, ctlr->nintr, ctlr->ntdintr); + print(" nqhintr %d nisointr %d\n", ctlr->nqhintr, ctlr->nisointr); + print("cmd %#ux sts %#ux fl %#lux ps1 %#ux ps2 %#ux frames[0] %#ux\n", + INS(Cmd), INS(Status), + INL(Flbaseadd), INS(PORT(0)), INS(PORT(1)), + ctlr->frames[0]); + for(iso = ctlr->iso; iso != nil; iso = iso->next) + isodump(iso, 1); + i = 0; + for(qh = ctlr->qhs; qh != nil; qh = qh->next){ + qhdump(qh, ""); + if(i++ > 20){ + print("qhloop\n"); + break; + } + } + print("\n"); + if(doilock) + iunlock(ctlr); +} + +static void +dump(Hci *hp) +{ + xdump(hp->aux, 1); +} + +static Td* +tdalloc(void) +{ + uchar *pool, *end; + int sz; + Td *td; + + lock(&tdpool); + if(tdpool.free == nil){ + ddprint("uhci: tdalloc %d Tds\n", Incr); + sz = ROUNDUP(sizeof *td, 16); + pool = mallocalign(Incr*sz, Align, 0, 0); + if(pool == nil) + panic("tdalloc"); + for(end = pool + Incr*sz; pool < end; pool += sz){ + td = (Td*)pool; + td->next = tdpool.free; + tdpool.free = td; + } + tdpool.nalloc += Incr; + tdpool.nfree += Incr; + } + td = tdpool.free; + tdpool.free = td->next; + tdpool.ninuse++; + tdpool.nfree--; + unlock(&tdpool); + + memset(td, 0, sizeof(Td)); + td->link = Tdterm; + assert(((uintptr)td & 0xF) == 0); + return td; +} + +static void +tdfree(Td *td) +{ + if(td == nil) + return; + free(td->buff); + td->buff = nil; + lock(&tdpool); + td->next = tdpool.free; + tdpool.free = td; + tdpool.ninuse--; + tdpool.nfree++; + unlock(&tdpool); +} + +static void +qhlinkqh(Qh* qh, Qh* next) +{ + if(next == nil) + qh->link = QHterm; + else{ + next->link = qh->link; + next->next = qh->next; + qh->link = PCIWADDR32(next)|QHlinkqh; + } + qh->next = next; +} + +static void +qhlinktd(Qh *qh, Td *td) +{ + qh->tds = td; + if(td == nil) + qh->elink = QHvf|QHterm; + else + qh->elink = PCIWADDR32(td); +} + +static void +tdlinktd(Td *td, Td *next) +{ + td->next = next; + if(next == nil) + td->link = Tdterm; + else + td->link = PCIWADDR32(next)|Tdvf; +} + +static Qh* +qhalloc(Ctlr *ctlr, Qh *prev, Qio *io, char *tag) +{ + uchar *pool, *end; + int sz; + Qh *qh; + + lock(&qhpool); + if(qhpool.free == nil){ + ddprint("uhci: qhalloc %d Qhs\n", Incr); + sz = ROUNDUP(sizeof(*qh), 16); + pool = mallocalign(Incr*sz, Align, 0, 0); + if(pool == nil) + panic("qhalloc"); + for(end = pool+Incr*sz; pool < end; pool += sz){ + qh = (Qh*)pool; + qh->next = qhpool.free; + qhpool.free = qh; + } + qhpool.nalloc += Incr; + qhpool.nfree += Incr; + } + qh = qhpool.free; + qhpool.free = qh->next; + qh->next = nil; + qh->link = QHterm; + qhpool.ninuse++; + qhpool.nfree--; + unlock(&qhpool); + + qh->tds = nil; + qh->elink = QHterm; + qh->state = Qidle; + qh->io = io; + qh->tag = nil; + kstrdup(&qh->tag, tag); + + if(prev != nil){ + coherence(); + ilock(ctlr); + qhlinkqh(prev, qh); + iunlock(ctlr); + } + + assert(((uintptr)qh & 0xF) == 0); + return qh; +} + +static void +qhfree(Ctlr *ctlr, Qh *qh) +{ + Td *td; + Td *ltd; + Qh *q; + + if(qh == nil) + return; + + ilock(ctlr); + for(q = ctlr->qhs; q != nil; q = q->next) + if(q->next == qh) + break; + if(q == nil) + panic("qhfree: nil q"); + q->next = qh->next; + q->link = qh->link; + iunlock(ctlr); + + for(td = qh->tds; td != nil; td = ltd){ + ltd = td->next; + tdfree(td); + } + lock(&qhpool); + qh->state = Qfree; /* paranoia */ + qh->next = qhpool.free; + qh->tag = nil; + qh->io = nil; + qhpool.free = qh; + qhpool.ninuse--; + qhpool.nfree++; + unlock(&qhpool); + ddprint("qhfree: qh %#p\n", qh); +} + +static char* +errmsg(int err) +{ + if(err == 0) + return "ok"; + if(err & Tdcrcto) + return "crc/timeout error"; + if(err & Tdbabble) + return "babble detected"; + if(err & Tddberr) + return "db error"; + if(err & Tdbitstuff) + return "bit stuffing error"; + if(err & Tdstalled) + return Estalled; + return Eio; +} + +static int +isocanread(void *a) +{ + Isoio *iso; + + iso = a; + return iso->state == Qclose || + (iso->state == Qrun && + iso->tok == Tdtokin && iso->tdi != iso->tdu); +} + +static int +isocanwrite(void *a) +{ + Isoio *iso; + + iso = a; + return iso->state == Qclose || + (iso->state == Qrun && + iso->tok == Tdtokout && iso->tdu->next != iso->tdi); +} + +static void +tdisoinit(Isoio *iso, Td *td, long count) +{ + td->ndata = count; + td->token = ((count-1)<<21)| ((iso->usbid & 0x7FF)<<8) | iso->tok; + td->csw = Tderr1|Tdiso|Tdactive|Tdioc; +} + +/* + * Process Iso i/o on interrupt. For writes update just error status. + * For reads update tds to reflect data and also error status. + * When tdi aproaches tdu, advance tdu; data may be lost. + * (If nframes is << Nframes tdu might be far away but this avoids + * races regarding frno.) + * If we suffer errors for more than half the frames we stall. + */ +static void +isointerrupt(Ctlr *ctlr, Isoio* iso) +{ + Td *tdi; + int err; + int i; + int nframes; + + tdi = iso->tdi; + if((tdi->csw & Tdactive) != 0) /* nothing new done */ + return; + ctlr->nisointr++; + ddiprint("isointr: iso %#p: tdi %#p tdu %#p\n", iso, tdi, iso->tdu); + if(iso->state != Qrun && iso->state != Qdone) + panic("isointr: iso state"); + if(debug > 1 || iso->debug > 1) + isodump(iso, 0); + + nframes = iso->nframes / 2; /* limit how many we look */ + if(nframes > 64) + nframes = 64; + + for(i = 0; i < nframes && (tdi->csw & Tdactive) == 0; i++){ + tdi->csw &= ~Tdioc; + err = tdi->csw & Tderrors; + if(err == 0) + iso->nerrs = 0; + else if(iso->nerrs++ > iso->nframes/2) + tdi->csw |= Tdstalled; + if((tdi->csw & Tdstalled) != 0){ + if(iso->err == nil){ + iso->err = errmsg(err); + diprint("isointerrupt: tdi %#p error %#ux %s\n", + tdi, err, iso->err); + diprint("ctlr load %uld\n", ctlr->load); + } + tdi->ndata = 0; + }else + tdi->ndata = tdlen(tdi); + + if(tdi->next == iso->tdu || tdi->next->next == iso->tdu){ + memset(iso->tdu->data, 0, maxtdlen(iso->tdu)); + tdisoinit(iso, iso->tdu, maxtdlen(iso->tdu)); + iso->tdu = iso->tdu->next; + iso->nleft = 0; + } + tdi = tdi->next; + } + ddiprint("isointr: %d frames processed\n", nframes); + if(i == nframes) + tdi->csw |= Tdioc; + iso->tdi = tdi; + if(isocanwrite(iso) || isocanread(iso)){ + diprint("wakeup iso %#p tdi %#p tdu %#p\n", iso, + iso->tdi, iso->tdu); + wakeup(iso); + } + +} + +/* + * Process a Qh upon interrupt. There's one per ongoing user I/O. + * User process releases resources later, that is not done here. + * We may find in this order one or more Tds: + * - none/many non active and completed Tds + * - none/one (usually(!) not active) and failed Td + * - none/many active Tds. + * Upon errors the entire transfer is aborted and error reported. + * Otherwise, the transfer is complete only when all Tds are done or + * when a read with less than maxpkt is found. + * Use the software list and not qh->elink to avoid races. + * We could use qh->elink to see if there's something new or not. + */ +static void +qhinterrupt(Ctlr *ctlr, Qh *qh) +{ + Td *td; + int err; + + ctlr->nqhintr++; + if(qh->state != Qrun) + panic("qhinterrupt: qh state"); + if(qh->tds == nil) + panic("qhinterrupt: no tds"); + if((qh->tds->csw & Tdactive) == 0) + ddqprint("qhinterrupt port %#ux qh %#p p0 %#x p1 %#x\n", + ctlr->port, qh, INS(PORT(0)), INS(PORT(1))); + for(td = qh->tds; td != nil; td = td->next){ + if(td->csw & Tdactive) + return; + td->csw &= ~Tdioc; + if((td->csw & Tdstalled) != 0){ + err = td->csw & Tderrors; + /* just stalled is end of xfer but not an error */ + if(err != Tdstalled && qh->io->err == nil){ + qh->io->err = errmsg(td->csw & Tderrors); + dqprint("qhinterrupt: td %#p error %#ux %s\n", + td, err, qh->io->err); + dqprint("ctlr load %uld\n", ctlr->load); + } + break; + } + if((td->csw & Tdnak) != 0){ /* retransmit; not serious */ + td->csw &= ~Tdnak; + if(td->next == nil) + td->csw |= Tdioc; + } + td->ndata = tdlen(td); + if(td->ndata < maxtdlen(td)){ /* EOT */ + td = td->next; + break; + } + } + + /* + * Done. Make void the Tds not used (errors or EOT) and wakeup epio. + */ + qh->elink = QHterm; + for(; td != nil; td = td->next) + td->ndata = 0; + qh->state = Qdone; + wakeup(qh->io); +} + +static void +interrupt(Ureg*, void *a) +{ + Hci *hp; + Ctlr *ctlr; + int frptr; + int frno; + Qh *qh; + Isoio *iso; + int sts; + int cmd; + + hp = a; + ctlr = hp->aux; + ilock(ctlr); + ctlr->nintr++; + sts = INS(Status); + if((sts & Sall) == 0){ /* not for us; sharing irq */ + iunlock(ctlr); + return; + } + OUTS(Status, sts & Sall); + cmd = INS(Cmd); + if(cmd & Crun == 0){ + print("uhci %#ux: not running: uhci bug?\n", ctlr->port); + /* BUG: should abort everything in this case */ + } + if(debug > 1){ + frptr = INL(Flbaseadd); + frno = INL(Frnum); + frno = TRUNC(frno, Nframes); + print("cmd %#ux sts %#ux frptr %#ux frno %d\n", + cmd, sts, frptr, frno); + } + ctlr->ntdintr++; + /* + * Will we know in USB 3.0 who the interrupt was for?. + * Do they still teach indexing in CS? + * This is Intel's doing. + */ + for(iso = ctlr->iso; iso != nil; iso = iso->next) + if(iso->state == Qrun || iso->state == Qdone) + isointerrupt(ctlr, iso); + for(qh = ctlr->qhs; qh != nil; qh = qh->next) + if(qh->state == Qrun) + qhinterrupt(ctlr, qh); + else if(qh->state == Qclose) + qhlinktd(qh, nil); + iunlock(ctlr); +} + +/* + * iso->tdu is the next place to put data. When it gets full + * it is activated and tdu advanced. + */ +static long +putsamples(Isoio *iso, uchar *b, long count) +{ + long tot; + long n; + + for(tot = 0; isocanwrite(iso) && tot < count; tot += n){ + n = count-tot; + if(n > maxtdlen(iso->tdu) - iso->nleft) + n = maxtdlen(iso->tdu) - iso->nleft; + memmove(iso->tdu->data+iso->nleft, b+tot, n); + iso->nleft += n; + if(iso->nleft == maxtdlen(iso->tdu)){ + tdisoinit(iso, iso->tdu, iso->nleft); + iso->nleft = 0; + iso->tdu = iso->tdu->next; + } + } + return tot; +} + +/* + * Queue data for writing and return error status from + * last writes done, to maintain buffered data. + */ +static long +episowrite(Ep *ep, Isoio *iso, void *a, long count) +{ + Ctlr *ctlr; + uchar *b; + int tot; + int nw; + char *err; + + iso->debug = ep->debug; + diprint("uhci: episowrite: %#p ep%d.%d\n", iso, ep->dev->nb, ep->nb); + + ctlr = ep->hp->aux; + qlock(iso); + if(waserror()){ + qunlock(iso); + nexterror(); + } + ilock(ctlr); + if(iso->state == Qclose){ + iunlock(ctlr); + error(iso->err ? iso->err : Eio); + } + iso->state = Qrun; + b = a; + for(tot = 0; tot < count; tot += nw){ + while(isocanwrite(iso) == 0){ + iunlock(ctlr); + diprint("uhci: episowrite: %#p sleep\n", iso); + if(waserror()){ + if(iso->err == nil) + iso->err = "I/O timed out"; + ilock(ctlr); + break; + } + tsleep(iso, isocanwrite, iso, ep->tmout); + poperror(); + ilock(ctlr); + } + err = iso->err; + iso->err = nil; + if(iso->state == Qclose || err != nil){ + iunlock(ctlr); + error(err ? err : Eio); + } + if(iso->state != Qrun) + panic("episowrite: iso not running"); + iunlock(ctlr); /* We could page fault here */ + nw = putsamples(iso, b+tot, count-tot); + ilock(ctlr); + } + if(iso->state != Qclose) + iso->state = Qdone; + iunlock(ctlr); + err = iso->err; /* in case it failed early */ + iso->err = nil; + qunlock(iso); + poperror(); + if(err != nil) + error(err); + diprint("uhci: episowrite: %#p %d bytes\n", iso, tot); + return tot; +} + +/* + * Available data is kept at tdu and following tds, up to tdi (excluded). + */ +static long +episoread(Ep *ep, Isoio *iso, void *a, int count) +{ + Ctlr *ctlr; + uchar *b; + int nr; + int tot; + Td *tdu; + + iso->debug = ep->debug; + diprint("uhci: episoread: %#p ep%d.%d\n", iso, ep->dev->nb, ep->nb); + + b = a; + ctlr = ep->hp->aux; + qlock(iso); + if(waserror()){ + qunlock(iso); + nexterror(); + } + iso->err = nil; + iso->nerrs = 0; + ilock(ctlr); + if(iso->state == Qclose){ + iunlock(ctlr); + error(iso->err ? iso->err : Eio); + } + iso->state = Qrun; + while(isocanread(iso) == 0){ + iunlock(ctlr); + diprint("uhci: episoread: %#p sleep\n", iso); + if(waserror()){ + if(iso->err == nil) + iso->err = "I/O timed out"; + ilock(ctlr); + break; + } + tsleep(iso, isocanread, iso, ep->tmout); + poperror(); + ilock(ctlr); + } + if(iso->state == Qclose){ + iunlock(ctlr); + error(iso->err ? iso->err : Eio); + } + iso->state = Qdone; + assert(iso->tdu != iso->tdi); + + for(tot = 0; iso->tdi != iso->tdu && tot < count; tot += nr){ + tdu = iso->tdu; + if(tdu->csw & Tdactive){ + diprint("uhci: episoread: %#p tdu active\n", iso); + break; + } + nr = tdu->ndata; + if(tot + nr > count) + nr = count - tot; + if(nr == 0) + print("uhci: ep%d.%d: too many polls\n", + ep->dev->nb, ep->nb); + else{ + iunlock(ctlr); /* We could page fault here */ + memmove(b+tot, tdu->data, nr); + ilock(ctlr); + if(nr < tdu->ndata) + memmove(tdu->data, tdu->data+nr, tdu->ndata - nr); + tdu->ndata -= nr; + } + if(tdu->ndata == 0){ + tdisoinit(iso, tdu, ep->maxpkt); + iso->tdu = tdu->next; + } + } + iunlock(ctlr); + qunlock(iso); + poperror(); + diprint("uhci: episoread: %#p %d bytes err '%s'\n", iso, tot, iso->err); + if(iso->err != nil) + error(iso->err); + return tot; +} + +static int +nexttoggle(int tog) +{ + if(tog == Tddata0) + return Tddata1; + else + return Tddata0; +} + +static Td* +epgettd(Ep *ep, Qio *io, int flags, void *a, int count) +{ + Td *td; + int tok; + + if(ep->maxpkt < count) + error("maxpkt too short"); + td = tdalloc(); + if(count <= Tdndata) + td->data = td->sbuff; + else + td->data = td->buff = smalloc(ep->maxpkt); + td->buffer = PCIWADDR32(td->data); + td->ndata = count; + if(a != nil && count > 0) + memmove(td->data, a, count); + td->csw = Tderr2|Tderr1|flags; + if(ep->dev->speed == Lowspeed) + td->csw |= Tdlow; + tok = io->tok | io->toggle; + io->toggle = nexttoggle(io->toggle); + td->token = ((count-1)<<21) | ((io->usbid&0x7FF)<<8) | tok; + + return td; +} + +/* + * Try to get them idle + */ +static void +aborttds(Qh *qh) +{ + Td *td; + + qh->state = Qdone; + qh->elink = QHterm; + for(td = qh->tds; td != nil; td = td->next){ + if(td->csw & Tdactive) + td->ndata = 0; + td->csw &= ~(Tdactive|Tdioc); + } +} + +static int +epiodone(void *a) +{ + Qh *qh; + + qh = a; + return qh->state != Qrun; +} + +static void +epiowait(Ctlr *ctlr, Qio *io, int tmout, ulong load) +{ + Qh *qh; + int timedout; + + qh = io->qh; + ddqprint("uhci io %#p sleep on qh %#p state %ud\n", io, qh, qh->state); + timedout = 0; + if(waserror()){ + dqprint("uhci io %#p qh %#p timed out\n", io, qh); + timedout++; + }else{ + if(tmout == 0) + sleep(io, epiodone, qh); + else + tsleep(io, epiodone, qh, tmout); + poperror(); + } + ilock(ctlr); + if(qh->state == Qrun) + timedout = 1; + else if(qh->state != Qdone && qh->state != Qclose) + panic("epio: queue not done and not closed"); + if(timedout){ + aborttds(io->qh); + io->err = "request timed out"; + iunlock(ctlr); + if(!waserror()){ + tsleep(&up->sleep, return0, 0, Abortdelay); + poperror(); + } + ilock(ctlr); + } + if(qh->state != Qclose) + qh->state = Qidle; + qhlinktd(qh, nil); + ctlr->load -= load; + iunlock(ctlr); +} + +/* + * Non iso I/O. + * To make it work for control transfers, the caller may + * lock the Qio for the entire control transfer. + */ +static long +epio(Ep *ep, Qio *io, void *a, long count, int mustlock) +{ + Td *td, *ltd, *td0, *ntd; + Ctlr *ctlr; + Qh* qh; + long n, tot; + char buf[128]; + uchar *c; + int saved, ntds, tmout; + ulong load; + char *err; + + qh = io->qh; + ctlr = ep->hp->aux; + io->debug = ep->debug; + tmout = ep->tmout; + ddeprint("epio: %s ep%d.%d io %#p count %ld load %uld\n", + io->tok == Tdtokin ? "in" : "out", + ep->dev->nb, ep->nb, io, count, ctlr->load); + if((debug > 1 || ep->debug > 1) && io->tok != Tdtokin){ + seprintdata(buf, buf+sizeof(buf), a, count); + print("uchi epio: user data: %s\n", buf); + } + if(mustlock){ + qlock(io); + if(waserror()){ + qunlock(io); + nexterror(); + } + } + io->err = nil; + ilock(ctlr); + if(qh->state == Qclose){ /* Tds released by cancelio */ + iunlock(ctlr); + error(io->err ? io->err : Eio); + } + if(qh->state != Qidle) + panic("epio: qh not idle"); + qh->state = Qinstall; + iunlock(ctlr); + + c = a; + td0 = ltd = nil; + load = tot = 0; + do{ + n = ep->maxpkt; + if(count-tot < n) + n = count-tot; + if(c != nil && io->tok != Tdtokin) + td = epgettd(ep, io, Tdactive, c+tot, n); + else + td = epgettd(ep, io, Tdactive|Tdspd, nil, n); + if(td0 == nil) + td0 = td; + else + tdlinktd(ltd, td); + ltd = td; + tot += n; + load += ep->load; + }while(tot < count); + if(td0 == nil || ltd == nil) + panic("epio: no td"); + + ltd->csw |= Tdioc; /* the last one interrupts */ + ddeprint("uhci: load %uld ctlr load %uld\n", load, ctlr->load); + ilock(ctlr); + if(qh->state != Qclose){ + io->iotime = TK2MS(sys->ticks); + qh->state = Qrun; + coherence(); + qhlinktd(qh, td0); + ctlr->load += load; + } + iunlock(ctlr); + + epiowait(ctlr, io, tmout, load); + + if(debug > 1 || ep->debug > 1) + dumptd(td0, "epio: got tds: "); + + tot = 0; + c = a; + saved = 0; + ntds = 0; + for(td = td0; td != nil; td = ntd){ + ntds++; + /* + * Use td tok, not io tok, because of setup packets. + * Also, if the Td was stalled or active (previous Td + * was a short packet), we must save the toggle as it is. + */ + if(td->csw & (Tdstalled|Tdactive)){ + if(saved++ == 0) + io->toggle = td->token & Tddata1; + }else{ + tot += td->ndata; + if(c != nil && tdtok(td) == Tdtokin && td->ndata > 0){ + memmove(c, td->data, td->ndata); + c += td->ndata; + } + } + ntd = td->next; + tdfree(td); + } + err = io->err; + if(mustlock){ + qunlock(io); + poperror(); + } + ddeprint("epio: io %#p: %d tds: return %ld err '%s'\n", + io, ntds, tot, err); + if(err != nil) + error(err); + if(tot < 0) + error(Eio); + return tot; +} + +/* + * halt condition was cleared on the endpoint. update our toggles. + */ +static void +clrhalt(Ep *ep) +{ + Qio *io; + + ep->clrhalt = 0; + switch(ep->ttype){ + case Tbulk: + case Tintr: + io = ep->aux; + if(ep->mode != OREAD){ + qlock(&io[OWRITE]); + io[OWRITE].toggle = Tddata0; + deprint("ep clrhalt for io %#p\n", io+OWRITE); + qunlock(&io[OWRITE]); + } + if(ep->mode != OWRITE){ + qlock(&io[OREAD]); + io[OREAD].toggle = Tddata0; + deprint("ep clrhalt for io %#p\n", io+OREAD); + qunlock(&io[OREAD]); + } + break; + } +} + +static long +epread(Ep *ep, void *a, long count) +{ + Ctlio *cio; + Qio *io; + Isoio *iso; + char buf[160]; + ulong delta; + + ddeprint("uhci: epread\n"); + if(ep->aux == nil) + panic("epread: not open"); + + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + qlock(cio); + if(waserror()){ + qunlock(cio); + nexterror(); + } + ddeprint("epread ctl ndata %d\n", cio->ndata); + if(cio->ndata < 0) + error("request expected"); + else if(cio->ndata == 0){ + cio->ndata = -1; + count = 0; + }else{ + if(count > cio->ndata) + count = cio->ndata; + if(count > 0) + memmove(a, cio->data, count); + /* BUG for big transfers */ + free(cio->data); + cio->data = nil; + cio->ndata = 0; /* signal EOF next time */ + } + qunlock(cio); + poperror(); + if(debug>1 || ep->debug){ + seprintdata(buf, buf+sizeof(buf), a, count); + print("epread: %s\n", buf); + } + return count; + case Tbulk: + io = ep->aux; + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OREAD], a, count, 1); + case Tintr: + io = ep->aux; + delta = TK2MS(sys->ticks) - io[OREAD].iotime + 1; + if(delta < ep->pollival / 2) + tsleep(&up->sleep, return0, 0, ep->pollival/2 - delta); + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OREAD], a, count, 1); + case Tiso: + iso = ep->aux; + return episoread(ep, iso, a, count); + default: + panic("epread: bad ep ttype %d", ep->ttype); + } + return -1; +} + +/* + * Control transfers are one setup write (data0) + * plus zero or more reads/writes (data1, data0, ...) + * plus a final write/read with data1 to ack. + * For both host to device and device to host we perform + * the entire transfer when the user writes the request, + * and keep any data read from the device for a later read. + * We call epio three times instead of placing all Tds at + * the same time because doing so leads to crc/tmout errors + * for some devices. + * Upon errors on the data phase we must still run the status + * phase or the device may cease responding in the future. + */ +static long +epctlio(Ep *ep, Ctlio *cio, void *a, long count) +{ + uchar *c; + long len; + + ddeprint("epctlio: cio %#p ep%d.%d count %ld\n", + cio, ep->dev->nb, ep->nb, count); + if(count < Rsetuplen) + error("short usb comand"); + qlock(cio); + free(cio->data); + cio->data = nil; + cio->ndata = 0; + if(waserror()){ + qunlock(cio); + free(cio->data); + cio->data = nil; + cio->ndata = 0; + nexterror(); + } + + /* set the address if unset and out of configuration state */ + if(ep->dev->state != Dconfig && ep->dev->state != Dreset) + if(cio->usbid == 0) + cio->usbid = ((ep->nb&Epmax)<<7)|(ep->dev->nb&Devmax); + c = a; + cio->tok = Tdtoksetup; + cio->toggle = Tddata0; + if(epio(ep, cio, a, Rsetuplen, 0) < Rsetuplen) + error(Eio); + a = c + Rsetuplen; + count -= Rsetuplen; + + cio->toggle = Tddata1; + if(c[Rtype] & Rd2h){ + cio->tok = Tdtokin; + len = GET2(c+Rcount); + if(len <= 0) + error("bad length in d2h request"); + if(len > Maxctllen) + error("d2h data too large to fit in uhci"); + a = cio->data = smalloc(len+1); + }else{ + cio->tok = Tdtokout; + len = count; + } + if(len > 0) + if(waserror()) + len = -1; + else{ + len = epio(ep, cio, a, len, 0); + poperror(); + } + if(c[Rtype] & Rd2h){ + count = Rsetuplen; + cio->ndata = len; + cio->tok = Tdtokout; + }else{ + if(len < 0) + count = -1; + else + count = Rsetuplen + len; + cio->tok = Tdtokin; + } + cio->toggle = Tddata1; + epio(ep, cio, nil, 0, 0); + qunlock(cio); + poperror(); + ddeprint("epctlio cio %#p return %ld\n", cio, count); + return count; +} + +static long +epwrite(Ep *ep, void *a, long count) +{ + Ctlio *cio; + Isoio *iso; + Qio *io; + ulong delta; + char *b; + int tot; + int nw; + + ddeprint("uhci: epwrite ep%d.%d\n", ep->dev->nb, ep->nb); + if(ep->aux == nil) + panic("uhci: epwrite: not open"); + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + return epctlio(ep, cio, a, count); + case Tbulk: + io = ep->aux; + if(ep->clrhalt) + clrhalt(ep); + /* + * Put at most Tdatomic Tds (512 bytes) at a time. + * Otherwise some devices produce babble errors. + */ + b = a; + for(tot = 0; tot < count ; tot += nw){ + nw = count - tot; + if(nw > Tdatomic * ep->maxpkt) + nw = Tdatomic * ep->maxpkt; + nw = epio(ep, &io[OWRITE], b+tot, nw, 1); + } + return tot; + case Tintr: + io = ep->aux; + delta = TK2MS(sys->ticks) - io[OWRITE].iotime + 1; + if(delta < ep->pollival) + tsleep(&up->sleep, return0, 0, ep->pollival - delta); + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OWRITE], a, count, 1); + case Tiso: + iso = ep->aux; + return episowrite(ep, iso, a, count); + default: + panic("uhci: epwrite: bad ep ttype %d", ep->ttype); + } + return -1; +} + +static void +isoopen(Ep *ep) +{ + Ctlr *ctlr; + Isoio *iso; + int frno; + int i; + Td* td; + Td* ltd; + int size; + int left; + + if(ep->mode == ORDWR) + error("iso i/o is half-duplex"); + ctlr = ep->hp->aux; + iso = ep->aux; + iso->debug = ep->debug; + iso->next = nil; /* paranoia */ + if(ep->mode == OREAD) + iso->tok = Tdtokin; + else + iso->tok = Tdtokout; + iso->usbid = ((ep->nb & Epmax)<<7)|(ep->dev->nb & Devmax); + iso->state = Qidle; + iso->nframes = Nframes/ep->pollival; + if(iso->nframes < 3) + error("uhci isoopen bug"); /* we need at least 3 tds */ + + ilock(ctlr); + if(ctlr->load + ep->load > 800) + print("usb: uhci: bandwidth may be exceeded\n"); + ctlr->load += ep->load; + ctlr->isoload += ep->load; + dprint("uhci: load %uld isoload %uld\n", ctlr->load, ctlr->isoload); + iunlock(ctlr); + + /* + * From here on this cannot raise errors + * unless we catch them and release here all memory allocated. + */ + if(ep->maxpkt > Tdndata) + iso->data = smalloc(iso->nframes*ep->maxpkt); + ilock(ctlr); + frno = INS(Frnum) + 10; /* start 10ms ahead */ + frno = TRUNC(frno, Nframes); + iunlock(ctlr); + iso->td0frno = frno; + ltd = nil; + left = 0; + for(i = 0; i < iso->nframes; i++){ + td = iso->tdps[frno] = tdalloc(); + if(ep->mode == OREAD) + size = ep->maxpkt; + else{ + size = (ep->hz+left) * ep->pollival / 1000; + size *= ep->samplesz; + left = (ep->hz+left) * ep->pollival % 1000; + if(size > ep->maxpkt){ + print("uhci: ep%d.%d: size > maxpkt\n", + ep->dev->nb, ep->nb); + print("size = %d max = %ld\n", size, ep->maxpkt); + size = ep->maxpkt; + } + } + if(size > Tdndata) + td->data = iso->data + i * ep->maxpkt; + else + td->data = td->sbuff; + td->buffer = PCIWADDR32(td->data); + tdisoinit(iso, td, size); + if(ltd != nil) + ltd->next = td; + ltd = td; + frno = TRUNC(frno+ep->pollival, Nframes); + } + ltd->next = iso->tdps[iso->td0frno]; + iso->tdi = iso->tdps[iso->td0frno]; + iso->tdu = iso->tdi; /* read: right now; write: 1s ahead */ + ilock(ctlr); + frno = iso->td0frno; + for(i = 0; i < iso->nframes; i++){ + iso->tdps[frno]->link = ctlr->frames[frno]; + frno = TRUNC(frno+ep->pollival, Nframes); + } + coherence(); + frno = iso->td0frno; + for(i = 0; i < iso->nframes; i++){ + ctlr->frames[frno] = PCIWADDR32(iso->tdps[frno]); + frno = TRUNC(frno+ep->pollival, Nframes); + } + iso->next = ctlr->iso; + ctlr->iso = iso; + iso->state = Qdone; + iunlock(ctlr); + if(debug > 1 || iso->debug >1) + isodump(iso, 0); +} + +/* + * Allocate the endpoint and set it up for I/O + * in the controller. This must follow what's said + * in Ep regarding configuration, including perhaps + * the saved toggles (saved on a previous close of + * the endpoint data file by epclose). + */ +static void +epopen(Ep *ep) +{ + Ctlr *ctlr; + Qh *cqh; + Qio *io; + Ctlio *cio; + int usbid; + + ctlr = ep->hp->aux; + deprint("uhci: epopen ep%d.%d\n", ep->dev->nb, ep->nb); + if(ep->aux != nil) + panic("uhci: epopen called with open ep"); + if(waserror()){ + free(ep->aux); + ep->aux = nil; + nexterror(); + } + if(ep->maxpkt > Tdmaxpkt){ + print("uhci: maxkpkt too large: using %d\n", Tdmaxpkt); + ep->maxpkt = Tdmaxpkt; + } + cqh = ctlr->qh[ep->ttype]; + switch(ep->ttype){ + case Tnone: + error("endpoint not configured"); + case Tiso: + ep->aux = smalloc(sizeof(Isoio)); + isoopen(ep); + break; + case Tctl: + cio = ep->aux = smalloc(sizeof(Ctlio)); + cio->debug = ep->debug; + cio->ndata = -1; + cio->data = nil; + if(ep->dev->isroot != 0 && ep->nb == 0) /* root hub */ + break; + cio->qh = qhalloc(ctlr, cqh, cio, "epc"); + break; + case Tbulk: + case Tintr: + io = ep->aux = smalloc(sizeof(Qio)*2); + io[OREAD].debug = io[OWRITE].debug = ep->debug; + usbid = ((ep->nb&Epmax)<<7)|(ep->dev->nb &Devmax); + if(ep->mode != OREAD){ + if(ep->toggle[OWRITE] != 0) + io[OWRITE].toggle = Tddata1; + else + io[OWRITE].toggle = Tddata0; + io[OWRITE].tok = Tdtokout; + io[OWRITE].qh = qhalloc(ctlr, cqh, io+OWRITE, "epw"); + io[OWRITE].usbid = usbid; + } + if(ep->mode != OWRITE){ + if(ep->toggle[OREAD] != 0) + io[OREAD].toggle = Tddata1; + else + io[OREAD].toggle = Tddata0; + io[OREAD].tok = Tdtokin; + io[OREAD].qh = qhalloc(ctlr, cqh, io+OREAD, "epr"); + io[OREAD].usbid = usbid; + } + break; + } + if(debug>1 || ep->debug) + dump(ep->hp); + deprint("uhci: epopen done\n"); + poperror(); +} + +static void +cancelio(Ctlr *ctlr, Qio *io) +{ + Qh *qh; + + ilock(ctlr); + qh = io->qh; + if(io == nil || io->qh == nil || io->qh->state == Qclose){ + iunlock(ctlr); + return; + } + dqprint("uhci: cancelio for qh %#p state %s\n", + qh, qhsname[qh->state]); + aborttds(qh); + qh->state = Qclose; + iunlock(ctlr); + if(!waserror()){ + tsleep(&up->sleep, return0, 0, Abortdelay); + poperror(); + } + + wakeup(io); + qlock(io); + /* wait for epio if running */ + qunlock(io); + + qhfree(ctlr, qh); + io->qh = nil; +} + +static void +cancelisoio(Ctlr *ctlr, Isoio *iso, int pollival, ulong load) +{ + Isoio **il; + u32int *lp; + int i; + int frno; + Td *td; + + ilock(ctlr); + if(iso->state == Qclose){ + iunlock(ctlr); + return; + } + if(iso->state != Qrun && iso->state != Qdone) + panic("bad iso state"); + iso->state = Qclose; + if(ctlr->isoload < load) + panic("uhci: low isoload"); + ctlr->isoload -= load; + ctlr->load -= load; + for(il = &ctlr->iso; *il != nil; il = &(*il)->next) + if(*il == iso) + break; + if(*il == nil) + panic("isocancel: not found"); + *il = iso->next; + frno = iso->td0frno; + for(i = 0; i < iso->nframes; i++){ + td = iso->tdps[frno]; + td->csw &= ~(Tdioc|Tdactive); + for(lp=&ctlr->frames[frno]; !(*lp & Tdterm); + lp = &TPTR(*lp)->link) + if(TPTR(*lp) == td) + break; + if(*lp & Tdterm) + panic("cancelisoio: td not found"); + *lp = td->link; + frno = TRUNC(frno+pollival, Nframes); + } + iunlock(ctlr); + + /* + * wakeup anyone waiting for I/O and + * wait to be sure no I/O is in progress in the controller. + * and then wait to be sure episo-io is no longer running. + */ + wakeup(iso); + diprint("cancelisoio iso %#p waiting for I/O to cease\n", iso); + tsleep(&up->sleep, return0, 0, 5); + qlock(iso); + qunlock(iso); + diprint("cancelisoio iso %#p releasing iso\n", iso); + + frno = iso->td0frno; + for(i = 0; i < iso->nframes; i++){ + tdfree(iso->tdps[frno]); + iso->tdps[frno] = nil; + frno = TRUNC(frno+pollival, Nframes); + } + free(iso->data); + iso->data = nil; +} + +static void +epclose(Ep *ep) +{ + Ctlr *ctlr; + Ctlio *cio; + Isoio *iso; + Qio *io; + + ctlr = ep->hp->aux; + deprint("uhci: epclose ep%d.%d\n", ep->dev->nb, ep->nb); + + if(ep->aux == nil) + panic("uhci: epclose called with closed ep"); + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + cancelio(ctlr, cio); + free(cio->data); + cio->data = nil; + break; + case Tbulk: + case Tintr: + io = ep->aux; + ep->toggle[OREAD] = ep->toggle[OWRITE] = 0; + if(ep->mode != OWRITE){ + cancelio(ctlr, &io[OREAD]); + if(io[OREAD].toggle == Tddata1) + ep->toggle[OREAD] = 1; + } + if(ep->mode != OREAD){ + cancelio(ctlr, &io[OWRITE]); + if(io[OWRITE].toggle == Tddata1) + ep->toggle[OWRITE] = 1; + } + break; + case Tiso: + iso = ep->aux; + cancelisoio(ctlr, iso, ep->pollival, ep->load); + break; + default: + panic("epclose: bad ttype %d", ep->ttype); + } + + free(ep->aux); + ep->aux = nil; + +} + +static char* +seprintep(char *s, char *e, Ep *ep) +{ + Ctlio *cio; + Qio *io; + Isoio *iso; + Ctlr *ctlr; + + ctlr = ep->hp->aux; + ilock(ctlr); + if(ep->aux == nil){ + *s = 0; + iunlock(ctlr); + return s; + } + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + s = seprint(s,e,"cio %#p qh %#p" + " id %#x tog %#x tok %#x err %s\n", + cio, cio->qh, cio->usbid, cio->toggle, + cio->tok, cio->err); + break; + case Tbulk: + case Tintr: + io = ep->aux; + if(ep->mode != OWRITE) + s = seprint(s,e,"r: qh %#p id %#x tog %#x tok %#x err %s\n", + io[OREAD].qh, io[OREAD].usbid, io[OREAD].toggle, + io[OREAD].tok, io[OREAD].err); + if(ep->mode != OREAD) + s = seprint(s,e,"w: qh %#p id %#x tog %#x tok %#x err %s\n", + io[OWRITE].qh, io[OWRITE].usbid, io[OWRITE].toggle, + io[OWRITE].tok, io[OWRITE].err); + break; + case Tiso: + iso = ep->aux; + s = seprint(s,e,"iso %#p id %#x tok %#x tdu %#p tdi %#p err %s\n", + iso, iso->usbid, iso->tok, iso->tdu, iso->tdi, iso->err); + break; + } + iunlock(ctlr); + return s; +} + +static int +portenable(Hci *hp, int port, int on) +{ + int s; + int ioport; + Ctlr *ctlr; + + ctlr = hp->aux; + dprint("uhci: %#x port %d enable=%d\n", ctlr->port, port, on); + ioport = PORT(port-1); + qlock(&ctlr->portlck); + if(waserror()){ + qunlock(&ctlr->portlck); + nexterror(); + } + ilock(ctlr); + s = INS(ioport); + if(on) + OUTS(ioport, s | PSenable); + else + OUTS(ioport, s & ~PSenable); + microdelay(64); + iunlock(ctlr); + tsleep(&up->sleep, return0, 0, Enabledelay); + dprint("uhci %#ux port %d enable=%d: sts %#x\n", + ctlr->port, port, on, INS(ioport)); + qunlock(&ctlr->portlck); + poperror(); + return 0; +} + +static int +portreset(Hci *hp, int port, int on) +{ + int i, p; + Ctlr *ctlr; + + if(on == 0) + return 0; + ctlr = hp->aux; + dprint("uhci: %#ux port %d reset\n", ctlr->port, port); + p = PORT(port-1); + ilock(ctlr); + OUTS(p, PSreset); + delay(50); + OUTS(p, INS(p) & ~PSreset); + OUTS(p, INS(p) | PSenable); + microdelay(64); + for(i=0; i<1000 && (INS(p) & PSenable) == 0; i++) + ; + OUTS(p, (INS(p) & ~PSreset)|PSenable); + iunlock(ctlr); + dprint("uhci %#ux after port %d reset: sts %#x\n", + ctlr->port, port, INS(p)); + return 0; +} + +static int +portstatus(Hci *hp, int port) +{ + int s; + int r; + int ioport; + Ctlr *ctlr; + + ctlr = hp->aux; + ioport = PORT(port-1); + qlock(&ctlr->portlck); + if(waserror()){ + iunlock(ctlr); + qunlock(&ctlr->portlck); + nexterror(); + } + ilock(ctlr); + s = INS(ioport); + if(s & (PSstatuschg | PSchange)){ + OUTS(ioport, s); + ddprint("uhci %#ux port %d status %#x\n", ctlr->port, port, s); + } + iunlock(ctlr); + qunlock(&ctlr->portlck); + poperror(); + + /* + * We must return status bits as a + * get port status hub request would do. + */ + r = 0; + if(s & PSpresent) + r |= HPpresent; + if(s & PSenable) + r |= HPenable; + if(s & PSsuspend) + r |= HPsuspend; + if(s & PSreset) + r |= HPreset; + if(s & PSslow) + r |= HPslow; + if(s & PSstatuschg) + r |= HPstatuschg; + if(s & PSchange) + r |= HPchange; + return r; +} + +static void +scanpci(void) +{ + static int already = 0; + int io; + int i; + Ctlr *ctlr; + Pcidev *p; + + if(already) + return; + already = 1; + p = nil; + while(p = pcimatch(p, 0, 0)){ + /* + * Find UHCI controllers (Programming Interface = 0). + */ + if(p->ccrb != 0xc || p->ccru != 3) + continue; + switch(p->ccrp){ + case 0: + io = p->mem[4].bar & ~0x0F; + break; + default: + continue; + } + if(io == 0){ + print("usbuhci: %#x %#x: failed to map registers\n", + p->vid, p->did); + continue; + } + if(ioalloc(io, p->mem[4].size, 0, "usbuhci") < 0){ + print("usbuhci: port %#ux in use\n", io); + continue; + } + if(p->intl == 0xFF || p->intl == 0){ + print("usbuhci: no irq assigned for port %#ux\n", io); + continue; + } + + dprint("uhci: %#x %#x: port %#ux size %#x irq %d\n", + p->vid, p->did, io, p->mem[4].size, p->intl); + + ctlr = smalloc(sizeof(Ctlr)); + ctlr->pcidev = p; + ctlr->port = io; + for(i = 0; i < Nhcis; i++) + if(ctlrs[i] == nil){ + ctlrs[i] = ctlr; + break; + } + if(i == Nhcis) + print("uhci: bug: no more controllers\n"); + } +} + +static void +uhcimeminit(Ctlr *ctlr) +{ + Td* td; + Qh *qh; + int frsize; + int i; + + ctlr->qhs = ctlr->qh[Tctl] = qhalloc(ctlr, nil, nil, "CTL"); + ctlr->qh[Tintr] = qhalloc(ctlr, ctlr->qh[Tctl], nil, "INT"); + ctlr->qh[Tbulk] = qhalloc(ctlr, ctlr->qh[Tintr], nil, "BLK"); + + /* idle Td from dummy Qh at the end. looped back to itself */ + /* This is a workaround for PIIX4 errata 29773804.pdf */ + qh = qhalloc(ctlr, ctlr->qh[Tbulk], nil, "BWS"); + td = tdalloc(); + td->link = PCIWADDR32(td); + qhlinktd(qh, td); + + /* loop (hw only) from the last qh back to control xfers. + * this may be done only for some of them. Disable until ehci comes. + */ + if(0) + qh->link = PCIWADDR32(ctlr->qhs); + + frsize = Nframes*sizeof(ulong); + ctlr->frames = mallocalign(frsize, frsize, 0, 0); + if(ctlr->frames == nil) + panic("uhci reset: no memory"); + + ctlr->iso = nil; + for(i = 0; i < Nframes; i++) + ctlr->frames[i] = PCIWADDR32(ctlr->qhs)|QHlinkqh; + OUTL(Flbaseadd, PCIWADDR32(ctlr->frames)); + OUTS(Frnum, 0); + dprint("uhci %#ux flb %#lux frno %#ux\n", ctlr->port, + INL(Flbaseadd), INS(Frnum)); +} + +static void +init(Hci *hp) +{ + Ctlr *ctlr; + int sts; + int i; + + ctlr = hp->aux; + dprint("uhci %#ux init\n", ctlr->port); + coherence(); + ilock(ctlr); + OUTS(Usbintr, Itmout|Iresume|Ioc|Ishort); + uhcirun(ctlr, 1); + dprint("uhci: init: cmd %#ux sts %#ux sof %#ux", + INS(Cmd), INS(Status), INS(SOFmod)); + dprint(" flb %#lux frno %#ux psc0 %#ux psc1 %#ux", + INL(Flbaseadd), INS(Frnum), INS(PORT(0)), INS(PORT(1))); + /* guess other ports */ + for(i = 2; i < 6; i++){ + sts = INS(PORT(i)); + if(sts != 0xFFFF && (sts & PSreserved1) == 1){ + dprint(" psc%d %#ux", i, sts); + hp->nports++; + }else + break; + } + for(i = 0; i < hp->nports; i++) + OUTS(PORT(i), 0); + iunlock(ctlr); +} + +static void +uhcireset(Ctlr *ctlr) +{ + int i; + int sof; + + ilock(ctlr); + dprint("uhci %#ux reset\n", ctlr->port); + + /* + * Turn off legacy mode. Some controllers won't + * interrupt us as expected otherwise. + */ + uhcirun(ctlr, 0); + pcicfgw16(ctlr->pcidev, 0xc0, 0x2000); + + OUTS(Usbintr, 0); + sof = INB(SOFmod); + uhcicmd(ctlr, Cgreset); /* global reset */ + delay(Resetdelay); + uhcicmd(ctlr, 0); /* all halt */ + uhcicmd(ctlr, Chcreset); /* controller reset */ + for(i = 0; i < 100; i++){ + if((INS(Cmd) & Chcreset) == 0) + break; + delay(1); + } + if(i == 100) + print("uhci %#x controller reset timed out\n", ctlr->port); + OUTB(SOFmod, sof); + iunlock(ctlr); +} + +static void +setdebug(Hci*, int d) +{ + debug = d; +} + +static void +shutdown(Hci *hp) +{ + Ctlr *ctlr; + + ctlr = hp->aux; + + ilock(ctlr); + uhcirun(ctlr, 0); + delay(100); + iunlock(ctlr); +} + +static int +reset(Hci *hp) +{ + static Lock resetlck; + int i; + Ctlr *ctlr; + Pcidev *p; + + if(getconf("*nousbuhci")) + return -1; + + ilock(&resetlck); + scanpci(); + + /* + * Any adapter matches if no hp->port is supplied, + * otherwise the ports must match. + */ + ctlr = nil; + for(i = 0; i < Nhcis && ctlrs[i] != nil; i++){ + ctlr = ctlrs[i]; + if(ctlr->active == 0) + if(hp->port == 0 || hp->port == ctlr->port){ + ctlr->active = 1; + break; + } + } + iunlock(&resetlck); + if(ctlrs[i] == nil || i == Nhcis) + return -1; + + p = ctlr->pcidev; + hp->aux = ctlr; + hp->port = ctlr->port; + hp->irq = p->intl; + hp->tbdf = p->tbdf; + hp->nports = 2; /* default */ + + uhcireset(ctlr); + uhcimeminit(ctlr); + + /* + * Linkage to the generic HCI driver. + */ + hp->init = init; + hp->dump = dump; + hp->interrupt = interrupt; + hp->epopen = epopen; + hp->epclose = epclose; + hp->epread = epread; + hp->epwrite = epwrite; + hp->seprintep = seprintep; + hp->portenable = portenable; + hp->portreset = portreset; + hp->portstatus = portstatus; + hp->shutdown = shutdown; + hp->debug = setdebug; + hp->type = "uhci"; + return 0; +} + +void +usbuhcilink(void) +{ + addhcitype("uhci", reset); +} diff -Nru /sys/src/9k/k10/vsvm.c /sys/src/9k/k10/vsvm.c --- /sys/src/9k/k10/vsvm.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/k10/vsvm.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,192 @@ +/* + * Vestigial Segmented Virtual Memory. + * To do: + * dynamic allocation and free of descriptors; + * IST should perhaps point to a different handler; + * user-level descriptors (if not dynamic). + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "amd64.h" +#include "ureg.h" + +typedef struct Gd Gd; +typedef u64int Sd; +typedef u16int Ss; +typedef struct Tss Tss; + +struct Gd { + Sd sd; + u64int hi; +}; + +struct Tss { + u32int _0_; + u32int rsp0[2]; + u32int rsp1[2]; + u32int rsp2[2]; + u32int _28_[2]; + u32int ist[14]; + u16int _92_[5]; + u16int iomap; +}; + +enum { + Ngdt = 16, /* max. entries in gdt */ + Nidt = 256, /* max. entries in idt */ +}; + +static Sd gdt64[Ngdt] = { + 0ull, /* NULL descriptor */ + SdL|SdP|SdDPL0|SdS|SdCODE, /* CS */ + SdG|SdD|SdP|SdDPL0|SdS|SdW, /* DS */ + SdG|SdD|SdP|SdDPL3|SdS|SdCODE|SdR|Sd4G, /* User CS 32-bit */ + SdG|SdD|SdP|SdDPL3|SdS|SdW|Sd4G, /* User DS */ + SdL|SdP|SdDPL3|SdS|SdCODE, /* User CS 64-bit */ + + 0ull, /* FS */ + 0ull, /* GS */ + + 0ull, /* TSS lower */ + 0ull, /* TSS upper */ +}; +static int ngdt64 = 10; + +static Gd idt64[Nidt]; + +static Sd +mksd(u64int base, u64int limit, u64int bits, u64int* upper) +{ + Sd sd; + + sd = bits; + sd |= (((limit & 0x00000000000f0000ull)>>16)<<48) + |(limit & 0x000000000000ffffull); + sd |= (((base & 0x00000000ff000000ull)>>24)<<56) + |(((base & 0x0000000000ff0000ull)>>16)<<32) + |((base & 0x000000000000ffffull)<<16); + if(upper != nil) + *upper = base>>32; + + return sd; +} + +static void +mkgd(Gd* gd, u64int offset, Ss ss, u64int bits, int ist) +{ + Sd sd; + + sd = bits; + sd |= (((offset & 0x00000000ffff0000ull)>>16)<<48) + |(offset & 0x000000000000ffffull); + sd |= ((ss & 0x000000000000ffffull)<<16); + sd |= (ist & (SdISTM>>32))<<32; + gd->sd = sd; + gd->hi = offset>>32; +} + +static void +idtinit(void) +{ + Gd *gd; + int ist, v; + u64int dpl; + uintptr offset; + + gd = idt64; + offset = PTR2UINT(idthandlers); + + for(v = 0; v < Nidt; v++){ + ist = 0; + dpl = SdP|SdDPL0|SdIG; + switch(v){ + default: + break; + case IdtBP: /* #BP */ + dpl = SdP|SdDPL3|SdIG; + break; + case IdtUD: /* #UD */ + case IdtDF: /* #DF */ + ist = 1; + break; + } + mkgd(gd, offset, SSEL(SiCS, SsTIGDT|SsRPL0), dpl, ist); + gd++; + offset += 6; + } +} + +void +tssrsp0(uintptr sp) +{ + Tss *tss; + + tss = m->tss; + tss->rsp0[0] = sp; + tss->rsp0[1] = sp>>32; +} + +static void +tssinit(uintptr sp) +{ + int ist; + Tss *tss; + + tss = m->tss; + memset(tss, 0, sizeof(Tss)); + + tssrsp0(sp); + + sp = PTR2UINT(m->vsvm+PGSZ); + for(ist = 0; ist < 14; ist += 2){ + tss->ist[ist] = sp; + tss->ist[ist+1] = sp>>32; + } + tss->iomap = 0xdfff; +} + +void +vsvminit(int size) +{ + Sd *sd; + u64int r; + + if(m->machno == 0) + idtinit(); + + m->gdt = m->vsvm; + memmove(m->gdt, gdt64, sizeof(gdt64)); + m->tss = &m->vsvm[ROUNDUP(sizeof(gdt64), 16)]; + + sd = &((Sd*)m->gdt)[SiTSS]; + *sd = mksd(PTR2UINT(m->tss), sizeof(Tss)-1, SdP|SdDPL0|SdaTSS, sd+1); + + tssinit(m->stack+size); + + gdtput(sizeof(gdt64)-1, PTR2UINT(m->gdt), SSEL(SiCS, SsTIGDT|SsRPL0)); + idtput(sizeof(idt64)-1, PTR2UINT(idt64)); + trput(SSEL(SiTSS, SsTIGDT|SsRPL0)); + + wrmsr(FSbase, 0ull); + wrmsr(GSbase, PTR2UINT(&sys->machptr[m->machno])); + wrmsr(KernelGSbase, 0ull); + + r = rdmsr(Efer); + r |= Sce; + wrmsr(Efer, r); + r = ((u64int)SSEL(SiU32CS, SsRPL3))<<48; + r |= ((u64int)SSEL(SiCS, SsRPL0))<<32; + wrmsr(Star, r); + wrmsr(Lstar, PTR2UINT(syscallentry)); + wrmsr(Sfmask, If); +} + +int +userureg(Ureg* ureg) +{ + return ureg->cs == SSEL(SiUCS, SsRPL3); +} diff -Nru /sys/src/9k/mk/bootmkfile /sys/src/9k/mk/bootmkfile --- /sys/src/9k/mk/bootmkfile Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/mk/bootmkfile Wed Dec 9 00:00:00 2015 @@ -0,0 +1,34 @@ +BOOTDIR=../boot +BOOTLIB=$BOOTDIR/libboot.a$O + +BOOTFILES=\ + bootauth.$O\ + aux.$O\ + boot.$O\ + bootcache.$O\ + bootip.$O\ + local.$O\ + localpaq.$O\ + embed.$O\ + settime.$O\ + sac.$O\ + paq.$O\ + printstub.$O\ + +$BOOTLIB(%.$O):N: %.$O + +$BOOTLIB: ${BOOTFILES:%=$BOOTLIB(%)} + names=`{membername $newprereq} + ar vu $BOOTLIB $names + rm $names + +$BOOTFILES: $BOOTDIR/boot.h + +%.$O: $BOOTDIR/%.c + $CC -I$BOOTDIR $CFLAGS $BOOTDIR/$stem.c + +boot$CONF.out: ../mk/parse $CONF print.$O $BOOTDIR/boot.c $BOOTLIB + awk -f ../mk/parse -- -mkbootconf $CONF > boot$CONF.c + $CC $CFLAGS boot$CONF.c + $CC $CFLAGS ../boot/printstub.c + $LD -o boot$CONF.out boot$CONF.$O $BOOTLIB printstub.$O diff -Nru /sys/src/9k/mk/mkenum /sys/src/9k/mk/mkenum --- /sys/src/9k/mk/mkenum Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/mk/mkenum Wed Dec 9 00:00:00 2015 @@ -0,0 +1,59 @@ +#!/bin/rc + +awk ' +BEGIN{ + oargc = 0; + for(argc = 1; argc < ARGC; argc++){ + if(ARGV[argc] !~ /^-.+/ || ARGV[argc] ~ /--/) + break; + if(ARGV[argc] != "-D") + oargv[ARGV[argc]] = oargc++; + else + DEBUG = 1; + ARGV[argc] = ""; + } +} + +/^enum([ \t]*{|$)/{ + inenum = 1; + if(DEBUG) + printf "inenum = 1\n"; + next; +} + +inenum && /^};$/{ + if(DEBUG) + printf "inenum = 0\n"; + inenum = 0; +} + +inenum && $0 ~ /^[ \t]+[_A-Za-z][_0-9A-Za-z]+[ \t]+=[ \t]+[0-9A-Z_a-z()<> ]+,/{ + tab = "\t"; + if(length($1) < 8) + sep = tab tab; + else + sep = tab; + split($3, a, ","); + printf "#define %s%s%s", $1, sep, a[1]; + if(match($0, /\/\*.*\*\/$/)){ + len = length(a[1]); + sep = ""; + while(len < 24){ + sep = sep tab; + len += 8; + } + printf "%s%s", sep, substr($0, RSTART); + } + printf "\n" +} + +!inenum && /^#(define|include) /{ + printf "%s\n", $0; +} + +/^$/{ + printf "\n"; +} + +END{ +}' $* diff -Nru /sys/src/9k/mk/mkroot /sys/src/9k/mk/mkroot --- /sys/src/9k/mk/mkroot Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/mk/mkroot Wed Dec 9 00:00:00 2015 @@ -0,0 +1,15 @@ +#!/bin/rc + +rfork e +echo mkroot $* +if(! ~ $#* 2){ + echo usage: mkroot path name >[2=1] + exit 1 +} +n=`{basename $1} +cp $1 $2.out +t=`{file $2.out} +if(~ $"t *executable*) + strip $2.out +aux/data2s $2 < $2.out > $2.root.s +echo mkroot $* done diff -Nru /sys/src/9k/mk/mkrootall /sys/src/9k/mk/mkrootall --- /sys/src/9k/mk/mkrootall Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/mk/mkrootall Wed Dec 9 00:00:00 2015 @@ -0,0 +1,31 @@ +#!/bin/rc + +rfork e +n=`{echo $#*^'%3' | hoc} +if(! ~ $n 0){ + echo 'usage: mkrootall [name cname file]...' >[1=2] + exit usage +} + +tmp=mkroot.$pid.out +fn sigexit { + rm -f $tmp +} + +allcname=() +while(! ~ $#* 0){ + name=$1 + cname=$2 + file=$3 + shift + shift + shift + allcname=($allcname $cname) + cp $file $tmp + t=`{file $tmp} + # do not strip venti - it uses its own symbols + if(~ $"t *executable* && ! ~ $name venti) + strip $tmp + aux/data2s $cname < $tmp +} +exit 0 diff -Nru /sys/src/9k/mk/mkrr /sys/src/9k/mk/mkrr --- /sys/src/9k/mk/mkrr Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/mk/mkrr Wed Dec 9 00:00:00 2015 @@ -0,0 +1,45 @@ +#!/bin/rc + +rfork en + +switch($#*){ +case 1 + PROTO=$1.proto +case 2 + PROTO=$2 +case * + echo $0: usage: $0 conf [proto] + exit "usage" +} + +ramfs -S ramfs.$pid +mount -c /srv/ramfs.$pid /tmp +mkdir /tmp/mnt /tmp/empty + +# clean up files and procs on exit +fn sigexit { + echo sync>>/srv/flcons.$pid + unmount /tmp/mnt + unmount /tmp + echo halt>>/srv/flcons.$pid + rm -f /srv/*.$pid + kill ramfs fossil|rc +} + +{syscall seek 1 8388608 0; echo} >>/tmp/fldisk |[0=2] grep -v 'no error$' +fossil/flfmt -b 4096 -y /tmp/fldisk + +fossil/conf -w /tmp/fldisk < 0){ + if(/^[ \t]*$/ || /^#/) + continue; + + if(/^[^ \t]/){ + #section[$1] = 0; + tag = $1; + } + if(!tag) + continue; + sub(/^[ \t]*/, ""); + line[tag, section[tag]++] = $0; + } + + o = ""; + if(!oargc || ("-mkdevlist" in oargv)){ + s = mkdevlist(); + if(!("-mkdevlist" in oargv) || (oargc > 1)) + s = "DEVS=" s; + o = o s "\n"; + } + if((!oargc || ("-mkmach" in oargv)) && (objtype in section)){ + s = mkmach(); + if(!("-mkmach" in oargv) || (oargc > 1)) + s = "MACH=" s; + o = o s "\n"; + } + if((!oargc || ("-mklib" in oargv)) && ("lib" in section)){ + s = mklib(); + if(!("-mklib" in oargv) || (oargc > 1)) + s = "LIB=" s; + o = o s "\n"; + } + if((!oargc || ("-mkport" in oargv) ) && ("port" in section)){ + s = mkport(); + if(!("-mkport" in oargv) || (oargc > 1)) + s = "PORT=" s; + o = o s "\n"; + } + if("dbgflg" in section){ + for(i = 1; i < section["dbgflg"]; i++){ + n = split(line["dbgflg", i], a); + if(n < 2 || n > 4 || a[2] !~ /'[a-zA-Z]'/) + continue; + if(n > 2 && a[3] !~ /'[a-zA-Z]'/) + continue; + if(n == 4 && (a[4] < 1 || a[4] >= 128)) + continue; + dbgc[a[1]] = a[2]; + if(n == 4) + dbgflg[a[3]] = a[4]; + else if(n == 3) + dbgflg[a[3]] = 1; + } + } + if((!oargc || ("-mkrules" in oargv)) && ("dir" in section)){ + o = o mkrules(".", exists, a, c, "-I."); + for(i = 1; i < section["dir"]; i++){ + n = split(line["dir", i], a); + dir = "../" a[1]; + if(n == 1) + a[2] = "-I."; + s = a[2]; + o = o mkrules(dir, exists, a, c, s); + l = listolate(a, "|"); + if(l != ""){ + o = o "^(" l ")\\.$O:R: " dir "/\\1.s\n"; + o = o "\t$AS $AFLAGS " s " " dir "/$stem1.s\n"; + } + l = listolate(c, "|"); + if(l != ""){ + o = o "^(" l ")\\.$O:R: " dir "/\\1.c\n"; + o = o "\t$CC $CFLAGS " s " " dir "/$stem1.c\n"; + } + } + } + if((!oargc || ("-mkrootrules" in oargv)) && ("rootdir" in section)){ + mkrootrules(name, cname, src); + s = ARGV[argc] ".root.s:D:"; + for(i = 1; i < section["rootdir"]; i++) + s = s " " src[i]; + s = s "\n\t../mk/mkrootall\\\n"; + for(i = 1; i < section["rootdir"]; i++) + s = s "\t\t" name[i] " " cname[i] " " src[i] "\\\n"; + s = s "\t>$target\n"; + if(section["rootdir"] > 1) + o = o s; + } + if((!oargc || ("-mkrrrules" in oargv)) && ("rr" in section)){ + n = split(line["rr", 0], a); + if(n == 1) + a[2] = ARGV[argc] ".proto"; + s = "$CONF.rr:\t../mk/mkrr $CONF " a[2] "\n"; + s = s "\t../mk/mkrr $CONF " a[2] "\n"; + for(i = 1; i < section["rr"]; i++) + s = s "$CONF.rr:\t" line["rr", i] "\n"; + o = o s; + } + if("-mkdevc" in oargv) + o = o mkdevc(); + if("-mkerrstr" in oargv) + o = o mkerrstr(); + if("-mksystab" in oargv) + o = o mksystab(); + if("-mkbootconf" in oargv) + o = o mkbootconf(); + + # + # to do: + # bootmkfile + # mkrootall (can it be done at all?) + # + printf o; + + exit 0; +} + +function mkbootconf( a, n, s, t, u, c, d, p, r){ + s = "#include \n"; + s = s "#include \n\n"; + s = s "#include \"../boot/boot.h\"\n\n"; + s = s "Method method[] = {\n"; + + c = "0"; + d = "#S/sdC0/"; + p = "boot"; + r = "/root"; + + for(i = 0; i < section["boot"]; i++){ # NOTE: start at 0 + n = split(line["boot", i], a); + if(a[1] == "boot"){ + if(a[2] == "cpu"){ + c = "1"; + if(n == 4 && a[3] == "boot") + d = a[4]; + } + else if(a[2] == "rootdir" && n == 3) + r = a[3]; + else if(a[2] ~ /^(bboot|dosboot|romboot)$/){ + c = "1"; + p = a[2]; + } + else if(a[2] == "boot" && n == 3) + d = a[3]; + continue; + } + s = s "\t{ \"" a[1] "\", config" a[1] ", connect" a[1] ", "; + t = "nil"; + if(n > 1){ + u = line["boot", i]; + if(sub(/^[_A-Za-z][_A-Za-z0-9]*[ \t]*/, "", u)){ + if(match(u, /^".*"$/)) + u = substr(u, RSTART+1, RLENGTH-2); + t = "\"" u "\""; + } + } + s = s t ", },\n"; + } + s = s "\t{ nil },\n};\n\n"; + s = s "int cpuflag = " c ";\n"; + s = s "char* rootdir = \"" r "\";\n"; + s = s "char* bootdisk = \"" d "\";\n"; + s = s "extern void " p "(int, char**);\n\n"; + s = s "void\nmain(int argc, char **argv)\n"; + s = s "{\n\t" p "(argc, argv);\n}\n" + + t = "int (*cfs)(int) = 0;\n"; + for(i = 1; i < section["rootdir"]; i++){ + if($1 !~ /\/bin\/cfs$/) + continue; + t = "int (*cfs)(int) = cache;\n"; + break; + } + s = s t; + + return s; +} + +function mksystab( a, i, f, n, s, t){ + s = "#include \"u.h\"\n"; + s = s "#include \"../port/lib.h\"\n"; + s = s "#include \"mem.h\"\n"; + s = s "#include \"dat.h\"\n"; + s = s "#include \"fns.h\"\n\n"; + s = s "#include \"/sys/src/libc/9syscall/sys.h\"\n\n"; + + t = ""; + while(getline < "/sys/src/libc/9syscall/sys.h"){ + if($1 != "#define" || NF != 3) + continue; + + if(substr($2,1,1) == "_") + continue; # deprecated + + f = "sys" tolower($2); + if($2 == "SYSR1") + f = "sysr1"; + if($2 == "RENDEZVOUS") + n = "Rendez"; + else if($2 == "BRK_") + n = "Brk"; + else + n = substr($2, 1, 1) tolower(substr($2, 2)); + + s = s "extern void " f "(Ar0*, va_list);\n"; + t = t "\t[" $2 "]\t"; + if(length($2) < 6) + t = t "\t"; + t = t "{ \"" n "\", " f ", "; + # + # The following should really be defined properly in the + # manual and code, but changing Plan 9 now is too awkward. + # It will matter more when sizeof(long) != sizeof(int). + # + # if($2 ~ "^(FVERSION|STAT|FSTAT|WSTAT|FWSTAT|AWAIT)$") + # t = t "{ .u = 0 } },\n"; + # + # if($2 ~ "^(BIND|_MOUNT|MOUNT)$") + # t = t "{ .l = -1 } },\n"; + # + # The "^(...)$" are to ensure only exact matches are made. + # + if($2 ~ "^(EXEC|SEGBRK|SEGATTACH|RENDEZVOUS)$") + t = t "{ .v = (void*)-1 } },\n"; + else if($2 ~ "^(ALARM|_READ|_WRITE|PREAD|PWRITE)$") + t = t "{ .l = -1 } },\n"; + else + t = t "{ .i = -1 } },\n"; + } + if("syscall" in section){ + for(i = 1; i < section["syscall"]; i++){ + if(split(line["syscall", i], a) != 8) + continue; + if(line["syscall", i] !~ /#define.*{ \.[ilpuv] = .* }$/) + continue; + + f = "sys" tolower(a[2]); + n = substr(a[2], 1, 1) tolower(substr(a[2], 2)); + + s = s "\nSyscall " f ";\n"; + t = t a[1] " " a[2] "\t" a[3] "\n\t[" a[2] "]\t"; + if(length(a[2]) < 6) + t = t "\t"; + split(line["syscall", i], a, "{"); + t = t "{ \"" n "\", " f ", {" a[2] " },\n"; + } + } + s = s "struct {\n\tchar*\tn;\n\tvoid (*f)(Ar0*, va_list);\n\tAr0\tr;\n}"; + s = s " systab[] = {\n" t "};\n\nint nsyscall = nelem(systab);\n"; + + return s; +} + +function mkerrstr( a, s){ + FS="[ \t;]+"; + while(getline < "../port/error.h"){ + split($0, a, /\/\* | \*\//); + s = s $2 " " $3 " = \"" a[2] "\";\n"; + } + FS=" "; + + return s; +} + +function mkdevc( a, d, i, m, n, s, t, u, name, cname){ + s = "#include \"u.h\"\n"; + s = s "#include \"../port/lib.h\"\n"; + s = s "#include \"mem.h\"\n"; + s = s "#include \"dat.h\"\n"; + s = s "#include \"fns.h\"\n"; + s = s "#include \"../port/error.h\"\n\n"; + s = s "#include \"io.h\"\n\n"; + + t = ""; + for(i = 1; i < section["dev"]; i++){ + split(line["dev", i], a); + s = s "extern Dev " a[1] "devtab;\n"; + t = t "\t&" a[1] "devtab,\n"; + d[a[1]]++; + } + s = s "Dev* devtab[] = {\n" t "\tnil,\n};\n\n"; + + mkrootrules(name, cname, m); + t = ""; + for(i = 1; i < section["rootdir"]; i++){ + s = s "extern uchar " cname[i] "code[];\n"; + s = s "extern usize " cname[i] "len;\n"; + t = t "\taddbootfile(\"" name[i] "\", " cname[i] "code, " cname[i] "len);\n"; + } + for(i = 1; i < section["link"]; i++){ + split(line["link", i], a); + s = s "extern void " a[1] "link(void);\n"; + t = t "\t" a[1] "link();\n"; + } + s = s "void\nlinks(void)\n{\n" t "}\n\n"; + + if("ip" in d && "ip" in section){ + t = ""; + s = s "#include \"../ip/ip.h\"\n"; + for(i = 1; i < section["ip"]; i++){ + split(line["ip", i], a); + s = s "extern void " a[1] "init(Fs*);\n"; + t = t "\t" a[1] "init,\n"; + } + s = s "void (*ipprotoinit[])(Fs*) = {\n" t "\tnil,\n};\n\n"; + } + + if("sd" in d && "sd" in section){ + t = ""; + s = s "#include \"../port/sd.h\"\n"; + for(i = 1; i < section["sd"]; i++){ + split(line["sd", i], a); + s = s "extern SDifc " a[1] "ifc;\n"; + t = t "\t&" a[1] "ifc,\n"; + } + s = s "SDifc* sdifc[] = {\n" t "\tnil,\n};\n\n"; + } + + if("rd" in d && "rd" in section){ + t = ""; + s = s "#include \"../386/fis.h\"\n"; + s = s "#include \"../port/iofilter.h\"\n"; + s = s "#include \"../cor/rd.h\"\n"; + for(i = 1; i < section["rd"]; i++){ + split(line["rd", i], a); + s = s "extern Rdifc " a[1] "ifc;\n"; + t = t "\t&" a[1] "ifc,\n"; + } + s = s "Rdifc* rdifc[] = {\n" t "\tnil,\n};\n\n"; + } + + if("uart" in d && "uart" in section){ + t = ""; + for(i = 1; i < section["uart"]; i++){ + split(line["uart", i], a); + a[1] = substr(a[1], 5, length(a[1])-4) "physuart"; + s = s "extern PhysUart " a[1] ";\n"; + t = t "\t&" a[1] ",\n"; + } + s = s "PhysUart* physuart[] = {\n" t "\tnil,\n};\n\n"; + } + + t = ""; + n = 0; + if("physseg" in section){ + for(i = 1; i < section["physseg"]; i++){ + u = line["physseg", i]; + if(u ~ /^\.[_A-Za-z][_A-Za-z0-9]*/) + t = t "\t"; + t = t "\t" u "\n"; + if(sub(/.*\.pgalloc.*=[^_A-Za-z]*/, "", u)){ + if(match(u, /^[_A-Za-z][_A-Za-z0-9]*/)){ + u = substr(u, RSTART, RLENGTH); + s = s "extern Page *(*" u ")(Segment*, uintptr);\n"; + } + } + else if(sub(/.*\.pgfree.*=[^_A-Za-z]*/, "", u)){ + if(match(u, /^[_A-Za-z][_A-Za-z0-9]*/)){ + u = substr(u, RSTART, RLENGTH); + s = s "extern void (*" u ")(Page*);\n"; + } + } + if(match(u, /}/)) + n++; + } + } + s = s "Physseg physseg[" n+8 "] = {\n"; + s = s "\t{\t.attr\t= SG_SHARED,\n"; + s = s "\t\t.name\t= \"shared\",\n"; + s = s "\t\t.size\t= SEGMAXSIZE,\n\t},\n"; + s = s "\t{\t.attr\t= SG_BSS,\n"; + s = s "\t\t.name\t= \"memory\",\n"; + s = s "\t\t.size\t= SEGMAXSIZE,\n\t},\n"; + s = s t "};\nint nphysseg = " n+8 ";\n\n"; + + s = s "char dbgflg[256]"; + t = ""; + for(u in dbgflg) + t = t "\t[" u "]\t" dbgflg[u] ",\n"; + if(t != "") + s = s " = {\n" t "}"; + s = s ";\n\n"; + + for(i in m) + delete m[i]; + + for(i = 1; i < section["misc"]; i++){ + split(line["misc", i], a); + m[a[1]] = line["misc", i]; + } + + if(!("rdb" in misc)){ + s = s "void\n"; + s = s "rdb(void)\n"; + s = s "{\n"; + s = s "\tsplhi();\n"; + s = s "\tiprint(\"rdb...not installed\\n\");\n"; + s = s "\tfor(;;);\n"; + s = s "}\n\n"; + } + if(objtype == "power"){ + for(i = 1; i < section[objtype]; i++){ + split(line[objtype, i], a); + m[a[1]] = line[objtype, i]; + } + if(!("cnksyscall" in m)){ + s = s "void\n"; + s = s "cnksyscall(Ureg*)\n"; + s = s "{\n"; + s = s "\tpanic(\"cnkemu...not installed\\n\");\n"; + s = s "\tfor(;;);\n"; + s = s "}\n\n"; + s = s "void*\n"; + s = s "cnksysexecregs(uintptr, ulong, ulong)\n"; + s = s "{\n"; + s = s "\tpanic(\"cnkemu...not installed\\n\");\n"; + s = s "\tfor(;;);\n"; + s = s "}\n\n"; + } + } + if("conf" in section){ + for(i = 1; i < section["conf"]; i++) + s = s line["conf", i] "\n"; + s = s "\n"; + } + t = "."; + while("pwd" | getline > 0){ + if($0 ~ /^\//) + t = $0; + } + s = s "char* conffile = \"" t "/" ARGV[argc] "\";\n"; + s = s "ulong kerndate = KERNDATE;\n"; + + return s; +} + +function mkrootrules(name, cname, src, a, i, n){ + for(i = 1; i < section["rootdir"]; i++){ + n = split(line["rootdir", i], a); + if(n >= 2) + name[i] = a[2]; + else + name[i] = a[1]; + sub(/.*\//, "", name[i]); + cname[i] = a[1]; + gsub(/[^a-zA-Z0-9_]/, "_", cname[i]); + src[i] = a[1]; + } +} + +function mkrules(dir, exists, ameta, cmeta, flags, f, i, s, t){ + for(i in ameta) + delete ameta[i]; + for(i in cmeta) + delete cmeta[i]; + + s = ""; + while("cd " dir "; /bin/ls *.[cs]" | getline > 0){ + if($0 !~ /^[A-Za-z0-9]*\.[cs]$/) + continue; + f = $0; + if(!sub(/\.[cs]$/, "")) + continue; + if($0 in exists) + continue; + exists[$0] = dir; + if(f ~ /\.c$/){ + if(!($0 in dbgc)){ + cmeta[$0]++; + continue; + } + t = "$CC $CFLAGS " flags; + } + else{ + if(!($0 in dbgc)){ + ameta[$0]++; + continue; + } + t = "$AS $AFLAGS " flags; + } + s = s $0 ".$O:\t" dir "/" f "\n"; + s = s "\t" t " -D'_DBGC_='" dbgc[$0] "'' " dir "/" f "\n"; + } + return s; +} + +function mkport( array){ + arrayify(array, "port", "", ".$O", 1); + + return listolate(array, " "); +} + +function mklib( array){ + arrayify(array, "lib", "/$objtype/lib/", ".a", 1); + + return listolate(array," "); +} + +function mkmach( a, i, s){ + s = ""; + for(i = 1; i < section[objtype]; i++){ + if(!split(line[objtype, i], a)) + continue; + if(s == "") + s = a[1] ".$O"; + else + s = s " " a[1] ".$O"; + } + + return s; +} + +function mkdevlist( a, array, i, j, n, s){ + for(s in section){ + if(line[s, 0] !~ /[ \t]\+dev[^_A-Za-z0-9]*/) + continue; + if(s == "dev") + arrayify(array, s, "dev", ".$O", 1); + else if(s == objtype) + arrayify(array, s, "", ".$O", 0); + else + arrayify(array, s, "", ".$O", 1); + } + + return listolate(array, " "); +} + +function listolate(array, sep, a, s){ + s = ""; + for(a in array){ + if(s == "") + s = a; + else + s = a sep s; + } + + return s; +} + +function arrayify(array, tag, prefix, suffix, one, a, i, j, n){ + for(i = 1; i < section[tag]; i++){ + n = split(line[tag, i], a); + if(one) + array[prefix a[1] suffix]++; + for(j = 2; j <= n; j++){ + if(a[$j] ~ /[+=-].*/) + continue; + array[a[j] suffix]++; + } + } +} diff -Nru /sys/src/9k/mk/portmkfile /sys/src/9k/mk/portmkfile --- /sys/src/9k/mk/portmkfile Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/mk/portmkfile Wed Dec 9 00:00:00 2015 @@ -0,0 +1,133 @@ +%.$O: %.s + $AS $AFLAGS $stem.s + +%.$O: %.c + $CC $CFLAGS $stem.c + +%.m: %.$O + $LD -o $target -uX -l $prereq + +%.acid: %.c + $CC $CFLAGS -a $stem.c >$stem.acid + +%.acid: ../ip/%.c + $CC $CFLAGS -a -I. ../ip/$stem.c >$stem.acid + +%.acid: ../port/%.c + $CC $CFLAGS -a -I. ../port/$stem.c >$stem.acid + +%.db: main.$O + $CC -s$stem main.c | dbfmt > $stem.db + +%.$O: /$objtype/include/u.h +%.$O: ../port/lib.h +%.$O: mem.h +%.$O: dat.h ../port/portdat.h +%.$O: fns.h ../port/portfns.h + +alloc.$O: /sys/include/pool.h +chan.$O: ../port/error.h +dev.$O: ../port/error.h +devcap.$O: ../port/error.h +devcap.$O: /sys/include/libsec.h +devcons.$O: /sys/include/authsrv.h /sys/include/pool.h +devdup.$O: ../port/error.h +devenv.$O: ../port/error.h +devkprof.$O: ../port/error.h +devmnt.$O: ../port/error.h +devpipe.$O: ../port/error.h +devprobe.$O: ../port/netif.h probe.h +devproc.$O: ../port/error.h ../port/edf.h +devproc.$O: /sys/include/tos.h /sys/include/trace.h /$objtype/include/ureg.h +devsd.$O: ../port/error.h ../port/sd.h +devsrv.$O: ../port/error.h +devssl.$O: ../port/error.h +devssl.$O: /sys/include/libsec.h +devtab.$O: ../port/error.h +devtls.$O: ../port/error.h +devtls.$O: /sys/include/libsec.h +devuart.$O: ../port/error.h +devwd.$O: ../port/error.h +edf.$O: ../port/error.h ../port/edf.h +edf.$O: /sys/include/trace.h +ethermii.$O: ../port/ethermii.h ../port/netif.h +fault.$O: ../port/error.h +image.$O: ../port/error.h +initcode.$O: /sys/include/libc.h +latin1.$O: ../port/latin1.h +netif.$O: ../port/error.h ../port/netif.h +parse.$O: ../port/error.h +pgrp.$O: ../port/error.h +portclock.$O: /$objtype/include/ureg.h +proc.$O: ../port/error.h ../port/edf.h errstr.h +proc.$O: /sys/include/trace.h +qio.$O: ../port/error.h +rdb.$O: /$objtype/include/ureg.h +rebootcmd.$O: ../port/error.h +rebootcmd.$O: /sys/include/a.out.h +segment.$O: ../port/error.h +swap.$O: ../port/error.h +sysauth.$O: ../port/error.h +sysauth.$O: /sys/include/authsrv.h +sysfile.$O: ../port/error.h +sysproc.$O: ../port/error.h ../port/edf.h +sysproc.$O: /sys/include/a.out.h +sysseg.$O: ../port/error.h +taslock.$O: ../port/edf.h + +../port/latin1.h: /lib/keyboard + aux/mklatinkbd /lib/keyboard > $target + +../port/systab.c: ../mk/parse /sys/src/libc/9syscall/sys.h + awk -f ../mk/parse -- -mksystab /sys/src/libc/9syscall/sys.h $CONF > $target + +systab.$O: ../port/systab.c + $CC $CFLAGS -I. ../port/systab.c + +errstr.h: ../mk/parse ../port/error.h + awk -f ../mk/parse -- -mkerrstr > $target + +init.h: init.out + {echo 'uchar initcode[]={' + xd -1x $prereq | sed -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g' + echo '};'} > init.h + +$CONF.$O: $CONF.c + $CC $CFLAGS '-DKERNDATE='`{date -n} $CONF.c + +$CONF.c: ../mk/parse $CONF + awk -f ../mk/parse -- -mkdevc $CONF > $CONF.c + {echo 'uchar configfile[]={' + xd -1x $CONF | + sed -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g' + echo 0, + echo '};'} >> $CONF.c + +./root/$O.%: ./root/%.c + @{cd ./root; mk $O.$stem} + +../root/$O.%: ../root/%.c + @{cd ../root; mk $O.$stem} + +all:V: + for(i in $CONFLIST) + mk 'CONF='$i + +installall:V: + for(i in $CONFLIST) + mk 'CONF='$i install + +%.clean:V: + rm -f $stem.c [9bz]$stem [9bz]$stem.gz boot$stem.* [9bz]$stem.elf + +clean:V: + rm -f *.[$OS] *.root.[cs] *.out *.m *.acid errstr.h init.h $objtype^l.h + for(i in $CONFLIST) + mk $i.clean + @{cd ../root; mk clean} + if(test -d ./root) @{cd ./root; mk clean}; status='' + +nuke:V: clean + rm -f ../boot/libboot.a$O *.elf *.rr + @{cd ../root; mk clean nuke} + if(test -d ./root) @{cd ./root; mk clean nuke}; status='' diff -Nru /sys/src/9k/port/alarm.c /sys/src/9k/port/alarm.c --- /sys/src/9k/port/alarm.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/alarm.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,103 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +static Alarms alarms; +static Rendez alarmr; + +void +alarmkproc(void*) +{ + Proc *rp; + ulong now; + + for(;;){ + now = sys->ticks; + qlock(&alarms); + while((rp = alarms.head) && tickscmp(now, rp->alarm) >= 0){ + if(rp->alarm != 0L){ + if(canqlock(&rp->debug)){ + if(!waserror()){ + postnote(rp, 0, "alarm", NUser); + poperror(); + } + qunlock(&rp->debug); + rp->alarm = 0L; + }else + break; + } + alarms.head = rp->palarm; + } + qunlock(&alarms); + + sleep(&alarmr, return0, 0); + } +} + +/* + * called every clock tick + */ +void +checkalarms(void) +{ + Proc *p; + ulong now; + + p = alarms.head; + now = sys->ticks; + + if(p != nil && tickscmp(now, p->alarm) >= 0) + wakeup(&alarmr); +} + +ulong +procalarm(ulong time) +{ + Proc **l, *f; + ulong when, old; + + if(up->alarm) + old = tk2ms(up->alarm - sys->ticks); + else + old = 0; + if(time == 0) { + up->alarm = 0; + return old; + } + when = ms2tk(time)+sys->ticks; + if(when == 0) + when = 1; + + qlock(&alarms); + l = &alarms.head; + for(f = *l; f; f = f->palarm) { + if(up == f){ + *l = f->palarm; + break; + } + l = &f->palarm; + } + + up->palarm = 0; + if(alarms.head) { + l = &alarms.head; + for(f = *l; f; f = f->palarm) { + if(tickscmp(f->alarm, when) >= 0) { + up->palarm = f; + *l = up; + goto done; + } + l = &f->palarm; + } + *l = up; + } + else + alarms.head = up; +done: + up->alarm = when; + qunlock(&alarms); + + return old; +} diff -Nru /sys/src/9k/port/allocb.c /sys/src/9k/port/allocb.c --- /sys/src/9k/port/allocb.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/allocb.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,205 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +enum +{ + Hdrspc = 64, /* leave room for high-level headers */ + Bdead = 0x51494F42, /* "QIOB" */ +}; + +struct +{ + Lock; + ulong bytes; + ulong limit; + +} ialloc; + +static Block* +_allocb(int size, int align) +{ + Block *b; + uchar *p; + int n; + + if(align <= 0) + align = BLOCKALIGN; + n = align + ROUNDUP(size+Hdrspc, align) + sizeof(Block); + if((p = malloc(n)) == nil) + return nil; + + b = (Block*)(p + n - sizeof(Block)); /* block at end of allocated space */ + b->base = p; + b->next = nil; + b->list = nil; + b->free = 0; + b->flag = 0; + + /* align base and bounds of data */ + b->lim = (uchar*)(PTR2UINT(b) & ~(align-1)); + + /* align start of writable data, leaving space below for added headers */ + b->rp = b->lim - ROUNDUP(size, align); + b->wp = b->rp; + + if(b->rp < b->base || b->lim - b->rp < size) + panic("_allocb"); + + return b; +} + +Block* +allocb(int size) +{ + Block *b; + + /* + * Check in a process and wait until successful. + * Can still error out of here, though. + */ + if(up == nil) + panic("allocb without up: %#p\n", getcallerpc(&size)); + if((b = _allocb(size, 0)) == nil){ + mallocsummary(); + panic("allocb: no memory for %d bytes\n", size); + } + setmalloctag(b->base, getcallerpc(&size)); + + return b; +} + +Block* +allocbalign(int size, int align) +{ + Block *b; + + /* + * Check in a process and wait until successful. + * Can still error out of here, though. + */ + if(up == nil) + panic("allocbalign without up: %#p\n", getcallerpc(&size)); + if((b = _allocb(size, align)) == nil){ + mallocsummary(); + panic("allocbalign: no memory for %d bytes\n", size); + } + setmalloctag(b->base, getcallerpc(&size)); + + return b; +} + +void +ialloclimit(ulong limit) +{ + ialloc.limit = limit; +} + +Block* +iallocb(int size) +{ + Block *b; + static int m1, m2, mp; + + if(ialloc.bytes > ialloc.limit){ + if((m1++%10000)==0){ + if(mp++ > 1000){ + active.exiting = 1; + exit(0); + } + iprint("iallocb: limited %lud/%lud\n", + ialloc.bytes, ialloc.limit); + } + return nil; + } + + if((b = _allocb(size, 0)) == nil){ + if((m2++%10000)==0){ + if(mp++ > 1000){ + active.exiting = 1; + exit(0); + } + iprint("iallocb: no memory %lud/%lud\n", + ialloc.bytes, ialloc.limit); + } + return nil; + } + b->flag = BINTR; + setmalloctag(b->base, getcallerpc(&size)); + + ilock(&ialloc); + ialloc.bytes += b->lim - b->base; + iunlock(&ialloc); + + return b; +} + +void +freeb(Block *b) +{ + void *dead = (void*)Bdead; + uchar *p; + + if(b == nil) + return; + + /* + * drivers which perform non cache coherent DMA manage their own buffer + * pool of uncached buffers and provide their own free routine. + */ + if(b->free) { + b->free(b); + return; + } + if(b->flag & BINTR) { + ilock(&ialloc); + ialloc.bytes -= b->lim - b->base; + iunlock(&ialloc); + } + + p = b->base; + + /* poison the block in case someone is still holding onto it */ + b->next = dead; + b->rp = dead; + b->wp = dead; + b->lim = dead; + b->base = dead; + + free(p); +} + +void +checkb(Block *b, char *msg) +{ + void *dead = (void*)Bdead; + + if(b == dead) + panic("checkb b %s %#p", msg, b); + if(b->base == dead || b->lim == dead || b->next == dead + || b->rp == dead || b->wp == dead){ + print("checkb: base %#p lim %#p next %#p\n", + b->base, b->lim, b->next); + print("checkb: rp %#p wp %#p\n", b->rp, b->wp); + panic("checkb dead: %s\n", msg); + } + + if(b->base > b->lim) + panic("checkb 0 %s %#p %#p", msg, b->base, b->lim); + if(b->rp < b->base) + panic("checkb 1 %s %#p %#p", msg, b->base, b->rp); + if(b->wp < b->base) + panic("checkb 2 %s %#p %#p", msg, b->base, b->wp); + if(b->rp > b->lim) + panic("checkb 3 %s %#p %#p", msg, b->rp, b->lim); + if(b->wp > b->lim) + panic("checkb 4 %s %#p %#p", msg, b->wp, b->lim); +} + +void +iallocsummary(void) +{ + print("ialloc %lud/%lud\n", ialloc.bytes, ialloc.limit); +} diff -Nru /sys/src/9k/port/aoe.h /sys/src/9k/port/aoe.h --- /sys/src/9k/port/aoe.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/aoe.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,157 @@ +/* + * Copyright © 2011 Coraid, Inc. + * All rights reserved. + */ + +typedef struct Aoehdr Aoehdr; +typedef struct Aoeata Aoeata; +typedef struct Aoeqc Aoeqc; +typedef struct Mdir Mdir; +typedef struct Aoemask Aoemask; +typedef struct Aoesrr Aoesrr; +typedef struct Aoekrr Aoekrr; +typedef struct Kresp Kresp; +typedef struct Kreg Kreg; +typedef struct Kset Kset; +typedef struct Kreplace Kreplace; + +enum +{ + ACata, ACconfig, ACmask, ACresrel, ACkresrel, + + AQCread= 0, AQCtest, AQCprefix, AQCset, AQCfset, AQCtar, + + ETAOE= 0x88a2, + Aoever= 1, + + AEcmd= 1, AEarg, AEdev, AEcfg, AEver, AEres, + + AFerr= 1<<2, + AFrsp= 1<<3, + + AAFwrite= 1, + AAFext= 1<<6, + + AKstat = 0, AKreg, AKset, AKreplace, AKreset, + + Aoesectsz = 512, + Szaoeata = 24+12, + Szaoeqc = 24+8, + + /* mask commands */ + Mread= 0, + Medit, + + /* mask directives */ + MDnop= 0, + MDadd, + MDdel, + + /* mask errors */ + MEunspec= 1, + MEbaddir, + MEfull, + + /* Keyed-RR Rflags */ + KRnopreempt = 1<<0, +}; + +struct Aoehdr +{ + uchar dst[6]; + uchar src[6]; + uchar type[2]; + uchar verflags; + uchar error; + uchar major[2]; + uchar minor; + uchar cmd; + uchar tag[4]; +}; + +struct Aoeata +{ + Aoehdr; + uchar aflags; + uchar errfeat; + uchar scnt; + uchar cmdstat; + uchar lba[6]; + uchar res[2]; +}; + +struct Aoeqc +{ + Aoehdr; + uchar bufcnt[2]; + uchar fwver[2]; + uchar scnt; + uchar verccmd; + uchar cslen[2]; +}; + +// mask directive +struct Mdir { + uchar res; + uchar cmd; + uchar mac[6]; +}; + +struct Aoemask { + Aoehdr; + uchar rid; + uchar cmd; + uchar merror; + uchar nmacs; +// struct Mdir m[0]; +}; + +struct Aoesrr { + Aoehdr; + uchar rcmd; + uchar nmacs; +// uchar mac[6][nmacs]; +}; + +struct Aoekrr { + Aoehdr; + uchar rcmd; +}; + +struct Kresp { + Aoehdr; + uchar rcmd; + uchar rtype; + uchar nkeys[2]; + uchar res[4]; + uchar gencnt[4]; + uchar owner[8]; + uchar keys[1]; +}; + +struct Kreg { + Aoehdr; + uchar rcmd; + uchar nmacs; + uchar res[2]; + uchar key[8]; + uchar macs[1]; +}; + +struct Kset { + Aoehdr; + uchar rcmd; + uchar rtype; + uchar res[2]; + uchar key[8]; +}; + +struct Kreplace { + Aoehdr; + uchar rcmd; + uchar rtype; + uchar rflags; + uchar res; + uchar targkey[8]; + uchar replkey[8]; +}; diff -Nru /sys/src/9k/port/bud.c /sys/src/9k/port/bud.c --- /sys/src/9k/port/bud.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/bud.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,360 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +/* + * - locks + * - could instead coalesce free items on demand (cf. Wulf) + * - or lazy buddy (cf. Barkley) + */ + +enum{ + MinK= PGSHFT, /* default minimum size (one page) */ + Nbits= sizeof(uintmem)*8, + MaxK= Nbits-1, /* last usable k (largest block is 2^k) */ + + Busy= 0x80, /* bit set in byte map if block busy (low order bits are block size, 0=unavailable) */ +}; + +//#define usize uintmem + +typedef struct Blk Blk; +struct Blk{ + Blk* forw; /* free list */ + Blk* back; +}; + +typedef struct Bfree Bfree; +struct Bfree{ + Blk; /* header */ + Lock; + uint avail; +}; + +struct Bpool{ + Lock lk; /* TO DO: localise lock (need CAS update of pool->kofb) (also see Johnson & Davis 1992) */ + Bfree blist[Nbits]; /* increasing powers of two */ + uchar* kofb; /* k(block_index) with top bit set if busy */ + uint mink; + uint maxk; + uint maxb; /* limit to block index, in minbsize blocks (pool size) */ + Blk* blocks; /* free list pointers */ + uintmem base; + uintmem minbsize; + uintmem limit; +}; + +static uchar lg2table[256] = { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, +}; + +#define BI(a) ((a)>>pool->mink) +#define IB(x) ((uintmem)(x)<mink) + +static int +lg2ceil(uintmem m) +{ + uint n, h; + int r; + + r = (m & (m-1)) != 0; /* not a power of two => round up */ + n = (uint)m; + if(sizeof(uintmem)>sizeof(uint)){ + h = (u64int)m>>32; + if(h != 0){ + n = h; + r += 32; + } + } + if((n>>8) == 0) + return lg2table[n] + r; + if((n>>16) == 0) + return 8 + lg2table[n>>8] + r; + if((n>>24) == 0) + return 16 + lg2table[n>>16] + r; + return 24 + lg2table[n>>24] + r; +} + +Bpool* +bpoolcreate(uint mink, uint maxk, uintmem base, uintmem top, void* (*alloc)(usize, int)) +{ + int k; + Blk *b; + Bpool *pool; + + if(mink == 0) + mink = MinK; + if(maxk > MaxK) + panic("bpoolcreate"); + pool = alloc(sizeof(Bpool), 1); + if(pool == nil) + panic("bpoolcreate alloc"); + pool->mink = mink; + pool->maxk = maxk; + pool->base = base; + pool->limit = top; + pool->maxb = BI(top-base); + pool->minbsize = (uintmem)1<blocks = alloc((pool->maxb+1)*sizeof(*pool->blocks), 0); + if(pool->blocks == nil) + panic("bpoolinit: can't allocate %ud blocks", pool->maxb+1); + for(k = 0; k < nelem(pool->blist); k++){ + b = &pool->blist[k]; + b->forw = b->back = b; + } + pool->kofb = alloc((pool->maxb+1)*sizeof(*pool->kofb), 1); + if(pool->kofb == nil) + panic("physinit: can't allocate %ud kofb", pool->maxb+1); + print("pool %#p space base %#P top=%#P maxb=%#ux (%d)\n", pool, base, top, pool->maxb, pool->maxb); + return pool; +} + +uintmem +bpoolalloc(Bpool *pool, usize size) +{ + int j, k; + Blk *b, *b2; + uintmem a, a2; + uint bi; + + k = lg2ceil(size); + if(k < pool->mink) + k = pool->mink; + if(k > pool->maxk) + return 0; + DBG("%#p size=%#P k=%d\n", pool, (uintmem)size, k); + lock(&pool->lk); + for(j = k;;){ + b = pool->blist[j].forw; + if(b != &pool->blist[j]) + break; + if(++j > pool->maxk){ + unlock(&pool->lk); + return 0; /* out of space */ + } + } + if(b == nil) + panic("physalloc: nil"); + /* set busy state */ + bi = b - pool->blocks; + if(pool->kofb[bi] & Busy || b->forw == nil || b->back == nil) + panic("physalloc: inval k=%d j=%d %#p %d %#ux %#p %#p", k, j, b, bi, pool->kofb[bi], b->forw, b->back); + pool->kofb[bi] = k | Busy; + pool->blist[j].avail--; + b->forw->back = b->back; + b->back->forw = b->forw; + a = IB(bi); + while(j != k){ + /* split */ + j--; + a2 = a+((uintmem)1<kofb=%#ux\n", a, a2, j, (uintmem)1<kofb[bi]); + if(pool->kofb[bi] & Busy){ + if(pool->kofb[bi] & ~Busy) + panic("bal: busy block %#llux k=%d\n", a, pool->kofb[bi] & ~Busy); + } + pool->kofb[bi] = j; /* new size, not busy */ + b2 = &pool->blocks[bi]; + b2->forw = &pool->blist[j]; + b2->back = pool->blist[j].back; + pool->blist[j].back = b2; + b2->back->forw = b2; + pool->blist[j].avail++; + } + unlock(&pool->lk); + return a + pool->base; +} + +void +bpoolfree(Bpool *pool, uintmem a, usize size) +{ + int k; + Blk *b, *b2; + uintmem a2; + uint bi, bi2; + + k = lg2ceil(size); /* could look it up in pool->kofb */ + if(k < pool->mink) + return; + if(k > pool->maxk) + k = pool->maxk; + DBG("%#p free %#llux %#P k%d\n", pool, a, (uintmem)size, k); + if(a < pool->base) + panic("bpoolfree"); + a -= pool->base; + bi = BI(a); + lock(&pool->lk); + if(pool->kofb[bi] != 0 && pool->kofb[bi] != (Busy|k)){ + unlock(&pool->lk); + panic("balfree: busy %#llux odd k k=%d kofb=%#ux\n", a, k, pool->kofb[bi]); + } + for(; k != pool->maxk; k++){ + pool->kofb[bi] = Busy; + a2 = a ^ ((uintmem)1<blocks[bi2]; + if(bi2 >= pool->maxb || pool->kofb[bi2] != k) + break; + /* valid, not busy or empty, size k */ + DBG("combine %#llux %#llux %d %#llux\n", a, a2, k, (uintmem)1<back->forw = b2->forw; + b2->forw->back = b2->back; + pool->kofb[bi2] = Busy; + pool->blist[k].avail--; + if(a2 < a){ + a = a2; + bi = bi2; + } + } + pool->kofb[bi] = k; /* sets size and resets Busy */ + b = &pool->blocks[bi]; + b->forw = &pool->blist[k]; + b->back = pool->blist[k].back; + pool->blist[k].back = b; + b->back->forw = b; + pool->blist[k].avail++; + unlock(&pool->lk); +} + +void +bpoolallocrange(Bpool *pool, usize *low, usize *high) +{ + *low = (usize)1<mink; + *high = (usize)1<maxk; +} + +static void +ibpoolfree(Bpool *pool, uintmem base, usize size) +{ + bpoolfree(pool, base+pool->base, size); +} + +void +bpoolinitfree(Bpool *pool, uintmem base, uintmem lim) +{ + uintmem m, size; + int i; + + /* chop limit to min block alignment */ + if(base >= pool->limit) + return; + if(pool->base > base) + base = pool->base; + if(lim > pool->limit) + lim = pool->limit; + base -= pool->base; + lim -= pool->base; + lim &= ~(pool->minbsize-1); + if(BI(lim) > pool->maxb){ + print("physinitfree: address space too large"); + lim = IB(pool->maxb); + } + + /* round base to min block alignment */ + base = (base + pool->minbsize-1) & ~(pool->minbsize-1); + + size = lim - base; + if(size < pool->minbsize) + return; + DBG("bpoolinitfree %#p %#P-%#P [%#P]\n", pool, pool->base+base, pool->base+lim, size); + + /* move up from base in largest blocks that remain aligned */ + for(i=pool->mink; imaxk; i++){ + m = (uintmem)1 << i; + if(base & m){ + if(size < m) + break; + if(base & (m-1)){ + print(" ** error: %#P %#P\n", base, m); + return; + } + ibpoolfree(pool, base, m); + base += m; + size -= m; + } + } + + /* largest chunks, aligned */ + m = (uintmem)1<maxk; + while(size >= m){ + if(base & (m-1)){ + print(" ** error: %#P %#P\n", base, m); + return; + } + ibpoolfree(pool, base, m); + base += m; + size -= m; + } + + /* free remaining chunks, decreasing alignment */ + for(; size >= pool->minbsize; m >>= 1){ + if(size & m){ + DBG("\t%#P %#P\n", base, m); + if(base & (m-1)){ + print(" ** error: %#P %#P\n", base, m); + return; + } + ibpoolfree(pool, base, m); + base += m; + size &= ~m; + } + } +} + +char* +seprintbpoolstats(Bpool *pool, char *s, char *e) +{ + Bfree *b; + int i; + + lock(&pool->lk); + for(i = 0; i < nelem(pool->blist); i++){ + b = &pool->blist[i]; + if(b->avail != 0) + s = seprint(s, e, "%ud %ulldK blocks avail\n", + b->avail, (1ull<lk); + return s; +} + +void +bpooldump(Bpool *pool) +{ + uintmem a; + uint bi; + int i, k; + Blk *b; + + for(i=0; iblist); i++){ + b = pool->blist[i].forw; + if(b != &pool->blist[i]){ + print("%d ", i); + for(; b != &pool->blist[i]; b = b->forw){ + bi = b-pool->blocks; + a = IB(bi); + k = pool->kofb[bi]; + print(" [%#llux %d %#ux b=%#llux]", a, k, 1<cache); + e->cache = nil; + lock(&ecache); + e->next = ecache.head; + ecache.head = e; + ecache.free++; + unlock(&ecache); +} + +static Extent* +extentalloc(void) +{ + Extent *e; + int i; + + lock(&ecache); + if(ecache.head == nil){ + e = malloc(NEXTENT*sizeof(Extent)); + if(e == nil){ + unlock(&ecache); + return nil; + } + for(i = 0; i < NEXTENT; i++){ + e->next = ecache.head; + ecache.head = e; + e++; + } + ecache.free += NEXTENT; + ecache.total += NEXTENT; + } + + e = ecache.head; + ecache.head = e->next; + memset(e, 0, sizeof(Extent)); + ecache.free--; + unlock(&ecache); + + return e; +} + +void +cinit(void) +{ + int i; + Mntcache *mc; + + if((cache.head = malloc(sizeof(Mntcache)*NFILE)) == nil) + panic("cinit: no memory"); + mc = cache.head; + + /* a good algorithm to set maxcache would be nice */ + + for(i = 0; i < NFILE-1; i++) { + mc->next = mc+1; + mc->prev = mc-1; + mc++; + } + + cache.tail = mc; + cache.tail->next = 0; + cache.head->prev = 0; +} + +void +cnodata(Mntcache *mc) +{ + Extent *e, *n; + + /* + * Invalidate all extent data + * Image lru will waste the pages + */ + for(e = mc->list; e; e = n) { + n = e->next; + extentfree(e); + } + mc->list = 0; +} + +void +ctail(Mntcache *mc) +{ + /* Unlink and send to the tail */ + if(mc->prev) + mc->prev->next = mc->next; + else + cache.head = mc->next; + if(mc->next) + mc->next->prev = mc->prev; + else + cache.tail = mc->prev; + + if(cache.tail) { + mc->prev = cache.tail; + cache.tail->next = mc; + mc->next = 0; + cache.tail = mc; + } + else { + cache.head = mc; + cache.tail = mc; + mc->prev = 0; + mc->next = 0; + } +} + +void +copen(Chan *c) +{ + int h; + Extent *e, *next; + Mntcache *mc, *f, **l; + + /* directories aren't cacheable and append-only files confuse us */ + if(c->qid.type&(QTDIR|QTAPPEND)) + return; + + h = c->qid.path%NHASH; + lock(&cache); + for(mc = cache.hash[h]; mc != nil; mc = mc->hash) { + if(mc->qid.path == c->qid.path) + if(mc->qid.type == c->qid.type) + if(mc->devno == c->devno && mc->dev == c->dev) { + c->mc = mc; + ctail(mc); + unlock(&cache); + + /* File was updated, invalidate cache */ + if(mc->qid.vers != c->qid.vers) { + mc->qid.vers = c->qid.vers; + qlock(mc); + cnodata(mc); + qunlock(mc); + } + return; + } + } + + /* LRU the cache headers */ + mc = cache.head; + l = &cache.hash[mc->qid.path%NHASH]; + for(f = *l; f; f = f->hash) { + if(f == mc) { + *l = mc->hash; + break; + } + l = &f->hash; + } + + mc->qid = c->qid; + mc->devno = c->devno; + mc->dev = c->dev; + + l = &cache.hash[h]; + mc->hash = *l; + *l = mc; + ctail(mc); + + qlock(mc); + c->mc = mc; + e = mc->list; + mc->list = 0; + unlock(&cache); + + while(e) { + next = e->next; + extentfree(e); + e = next; + } + qunlock(mc); +} + +static int +cdev(Mntcache *mc, Chan *c) +{ + if(mc->qid.path != c->qid.path) + return 0; + if(mc->qid.type != c->qid.type) + return 0; + if(mc->devno != c->devno) + return 0; + if(mc->dev != c->dev) + return 0; + if(mc->qid.vers != c->qid.vers) + return 0; + return 1; +} + +int +cread(Chan *c, uchar *buf, int len, vlong off) +{ + char *p; + Mntcache *mc; + Extent *e, **t; + int o, l, total; + ulong offset; + + if(off+len > maxcache) + return 0; + + mc = c->mc; + if(mc == nil) + return 0; + + qlock(mc); + if(cdev(mc, c) == 0) { + qunlock(mc); + return 0; + } + + offset = off; + t = &mc->list; + for(e = *t; e; e = e->next) { + if(offset >= e->start && offset < e->start+e->len) + break; + t = &e->next; + } + + if(e == 0) { + qunlock(mc); + return 0; + } + + total = 0; + while(len) { + p = e->cache; + if(p == nil){ + *t = e->next; + extentfree(e); + qunlock(mc); + return total; + } + + o = offset - e->start; + l = len; + if(l > e->len-o) + l = e->len-o; + + if(waserror()) { + qunlock(mc); + nexterror(); + } + + memmove(buf, p+o, l); + + poperror(); + + buf += l; + len -= l; + offset += l; + total += l; + t = &e->next; + e = e->next; + if(e == 0 || e->start != offset) + break; + } + + qunlock(mc); + return total; +} + +Extent* +cchain(uchar *buf, ulong offset, int len, Extent **tail) +{ + int l; + char *p; + Extent *e, *start, **t; + + start = 0; + *tail = 0; + t = &start; + while(len) { + e = extentalloc(); + if(e == 0) + break; + + p = mallocz(PGSZ, 0); + if(p == nil){ + extentfree(e); + break; + } + l = len; + if(l > PGSZ) + l = PGSZ; + + e->cache = p; + e->start = offset; + e->len = l; + + memmove(p, buf, l); + + buf += l; + offset += l; + len -= l; + + *t = e; + *tail = e; + t = &e->next; + } + + return start; +} + +int +cpgmove(Extent *e, uchar *buf, int boff, int len) +{ + if(e->cache == nil){ + /* shouldn't happen */ + print("CACHE: cpgmove %#p %d %d nil\n", e, boff, len); + return 0; + } + + memmove(e->cache+boff, buf, len); + + return 1; +} + +void +cupdate(Chan *c, uchar *buf, int len, vlong off) +{ + Mntcache *mc; + Extent *tail; + Extent *e, *f, *p; + int o, ee, eblock; + ulong offset; + + if(off > maxcache || len == 0) + return; + + mc = c->mc; + if(mc == nil) + return; + qlock(mc); + if(cdev(mc, c) == 0) { + qunlock(mc); + return; + } + + /* + * Find the insertion point + */ + offset = off; + p = 0; + for(f = mc->list; f; f = f->next) { + if(f->start > offset) + break; + p = f; + } + + /* trim if there is a successor */ + eblock = offset+len; + if(f != 0 && eblock > f->start) { + len -= (eblock - f->start); + if(len <= 0) { + qunlock(mc); + return; + } + } + + if(p == 0) { /* at the head */ + e = cchain(buf, offset, len, &tail); + if(e != 0) { + mc->list = e; + tail->next = f; + } + qunlock(mc); + return; + } + + /* trim to the predecessor */ + ee = p->start+p->len; + if(offset < ee) { + o = ee - offset; + len -= o; + if(len <= 0) { + qunlock(mc); + return; + } + buf += o; + offset += o; + } + + /* try and pack data into the predecessor */ + if(offset == ee && p->len < PGSZ) { + o = len; + if(o > PGSZ - p->len) + o = PGSZ - p->len; + if(cpgmove(p, buf, p->len, o)) { + p->len += o; + buf += o; + len -= o; + offset += o; + if(len <= 0) { +if(f && p->start + p->len > f->start) print("CACHE: p->start=%uld p->len=%d f->start=%uld\n", p->start, p->len, f->start); + qunlock(mc); + return; + } + } + } + + e = cchain(buf, offset, len, &tail); + if(e != 0) { + p->next = e; + tail->next = f; + } + qunlock(mc); +} + +void +cwrite(Chan* c, uchar *buf, int len, vlong off) +{ + int o, eo; + Mntcache *mc; + ulong eblock, ee; + Extent *p, *f, *e, *tail; + ulong offset; + + if(off > maxcache || len == 0) + return; + + mc = c->mc; + if(mc == nil) + return; + + qlock(mc); + if(cdev(mc, c) == 0) { + qunlock(mc); + return; + } + + offset = off; + mc->qid.vers++; + c->qid.vers++; + + p = 0; + for(f = mc->list; f; f = f->next) { + if(f->start >= offset) + break; + p = f; + } + + if(p != 0) { + ee = p->start+p->len; + eo = offset - p->start; + /* pack in predecessor if there is space */ + if(offset <= ee && eo < PGSZ) { + o = len; + if(o > PGSZ - eo) + o = PGSZ - eo; + if(cpgmove(p, buf, eo, o)) { + if(eo+o > p->len) + p->len = eo+o; + buf += o; + len -= o; + offset += o; + } + } + } + + /* free the overlap -- it's a rare case */ + eblock = offset+len; + while(f && f->start < eblock) { + e = f->next; + extentfree(f); + f = e; + } + + /* link the block (if any) into the middle */ + e = cchain(buf, offset, len, &tail); + if(e != 0) { + tail->next = f; + f = e; + } + + if(p == 0) + mc->list = f; + else + p->next = f; + qunlock(mc); +} diff -Nru /sys/src/9k/port/chan.c /sys/src/9k/port/chan.c --- /sys/src/9k/port/chan.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/chan.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1742 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +enum +{ + PATHSLOP = 20, + PATHMSLOP = 20, +}; + +struct +{ + Lock; + int fid; + Chan *free; + Chan *list; +}chanalloc; + +typedef struct Elemlist Elemlist; + +struct Elemlist +{ + char *aname; /* original name */ + char *name; /* copy of name, so '/' can be overwritten */ + int nelems; + char **elems; + int *off; + int mustbedir; + int nerror; + int prefix; +}; + +char* +chanpath(Chan *c) +{ + if(c == nil) + return ""; + if(c->path == nil) + return ""; + if(c->path->s == nil) + return ""; + return c->path->s; +} + +int +isdotdot(char *p) +{ + return p[0]=='.' && p[1]=='.' && p[2]=='\0'; +} + +/* + * Rather than strncpy, which zeros the rest of the buffer, kstrcpy + * truncates if necessary, always zero terminates, does not zero fill, + * and puts ... at the end of the string if it's too long. Usually used to + * save a string in up->genbuf; + */ +void +kstrcpy(char *s, char *t, int ns) +{ + int nt; + + nt = strlen(t); + if(nt+1 <= ns){ + memmove(s, t, nt+1); + return; + } + /* too long */ + if(ns < 4){ + /* but very short! */ + strncpy(s, t, ns); + return; + } + /* truncate with ... at character boundary (very rare case) */ + memmove(s, t, ns-4); + ns -= 4; + s[ns] = '\0'; + /* look for first byte of UTF-8 sequence by skipping continuation bytes */ + while(ns>0 && (s[--ns]&0xC0)==0x80) + ; + strcpy(s+ns, "..."); +} + +int +emptystr(char *s) +{ + if(s == nil) + return 1; + if(s[0] == '\0') + return 1; + return 0; +} + +/* + * Atomically replace *p with copy of s + */ +void +kstrdup(char **p, char *s) +{ + int n; + char *t, *prev; + + n = strlen(s)+1; + /* if it's a user, we can wait for memory; if not, something's very wrong */ + if(up){ + t = smalloc(n); + setmalloctag(t, getcallerpc(&p)); + }else{ + t = malloc(n); + if(t == nil) + panic("kstrdup: no memory"); + } + memmove(t, s, n); + prev = *p; + *p = t; + free(prev); +} + +Chan* +newchan(void) +{ + Chan *c; + + lock(&chanalloc); + c = chanalloc.free; + if(c != 0) + chanalloc.free = c->next; + unlock(&chanalloc); + + if(c == nil){ + c = smalloc(sizeof(Chan)); + lock(&chanalloc); + c->fid = ++chanalloc.fid; + c->link = chanalloc.list; + chanalloc.list = c; + unlock(&chanalloc); + } + + c->dev = nil; + c->flag = 0; + c->ref = 1; + c->devno = 0; + c->offset = 0; + c->devoffset = 0; + c->iounit = 0; + c->umh = 0; + c->uri = 0; + c->dri = 0; + c->aux = 0; + c->mchan = 0; + c->mc = 0; + c->mux = 0; + memset(&c->mqid, 0, sizeof(c->mqid)); + c->path = 0; + c->ismtpt = 0; + + return c; +} + +Ref npath; + +Path* +newpath(char *s) +{ + int i; + Path *p; + + p = smalloc(sizeof(Path)); + i = strlen(s); + p->len = i; + p->alen = i+PATHSLOP; + p->s = smalloc(p->alen); + memmove(p->s, s, i+1); + p->ref = 1; + incref(&npath); + + /* + * Cannot use newpath for arbitrary names because the mtpt + * array will not be populated correctly. The names #/ and / are + * allowed, but other names with / in them draw warnings. + */ + if(strchr(s, '/') && strcmp(s, "#/") != 0 && strcmp(s, "/") != 0) + print("newpath: %s from %#p\n", s, getcallerpc(&s)); + + p->mlen = 1; + p->malen = PATHMSLOP; + p->mtpt = smalloc(p->malen*sizeof p->mtpt[0]); + return p; +} + +static Path* +copypath(Path *p) +{ + int i; + Path *pp; + + pp = smalloc(sizeof(Path)); + pp->ref = 1; + incref(&npath); + DBG("copypath %s %#p => %#p\n", p->s, p, pp); + + pp->len = p->len; + pp->alen = p->alen; + pp->s = smalloc(p->alen); + memmove(pp->s, p->s, p->len+1); + + pp->mlen = p->mlen; + pp->malen = p->malen; + pp->mtpt = smalloc(p->malen*sizeof pp->mtpt[0]); + for(i=0; imlen; i++){ + pp->mtpt[i] = p->mtpt[i]; + if(pp->mtpt[i]) + incref(pp->mtpt[i]); + } + + return pp; +} + +void +pathclose(Path *p) +{ + int i; + + if(p == nil) + return; +//XXX + DBG("pathclose %#p %s ref=%d =>", p, p->s, p->ref); + for(i=0; imlen; i++) + DBG(" %#p", p->mtpt[i]); + DBG("\n"); + + if(decref(p)) + return; + decref(&npath); + free(p->s); + for(i=0; imlen; i++) + if(p->mtpt[i]) + cclose(p->mtpt[i]); + free(p->mtpt); + free(p); +} + +/* + * In place, rewrite name to compress multiple /, eliminate ., and process .. + * (Really only called to remove a trailing .. that has been added. + * Otherwise would need to update n->mtpt as well.) + */ +static void +fixdotdotname(Path *p) +{ + char *r; + + if(p->s[0] == '#'){ + r = strchr(p->s, '/'); + if(r == nil) + return; + cleanname(r); + + /* + * The correct name is #i rather than #i/, + * but the correct name of #/ is #/. + */ + if(strcmp(r, "/")==0 && p->s[1] != '/') + *r = '\0'; + }else + cleanname(p->s); + p->len = strlen(p->s); +} + +static Path* +uniquepath(Path *p) +{ + Path *new; + + if(p->ref > 1){ + /* copy on write */ + new = copypath(p); + pathclose(p); + p = new; + } + return p; +} + +static Path* +addelem(Path *p, char *s, Chan *from) +{ + char *t; + int a, i; + Chan *c, **tt; + + if(s[0]=='.' && s[1]=='\0') + return p; + + p = uniquepath(p); + + i = strlen(s); + if(p->len+1+i+1 > p->alen){ + a = p->len+1+i+1 + PATHSLOP; + t = smalloc(a); + memmove(t, p->s, p->len+1); + free(p->s); + p->s = t; + p->alen = a; + } + /* don't insert extra slash if one is present */ + if(p->len>0 && p->s[p->len-1]!='/' && s[0]!='/') + p->s[p->len++] = '/'; + memmove(p->s+p->len, s, i+1); + p->len += i; + if(isdotdot(s)){ + fixdotdotname(p); + DBG("addelem %s .. => rm %#p\n", p->s, p->mtpt[p->mlen-1]); + if(p->mlen>1 && (c = p->mtpt[--p->mlen])){ + p->mtpt[p->mlen] = nil; + cclose(c); + } + }else{ + if(p->mlen >= p->malen){ + p->malen = p->mlen+1+PATHMSLOP; + tt = smalloc(p->malen*sizeof tt[0]); + memmove(tt, p->mtpt, p->mlen*sizeof tt[0]); + free(p->mtpt); + p->mtpt = tt; + } + DBG("addelem %s %s => add %#p\n", p->s, s, from); + p->mtpt[p->mlen++] = from; + if(from) + incref(from); + } + return p; +} + +void +chanfree(Chan *c) +{ + c->flag = CFREE; + + if(c->dirrock != nil){ + free(c->dirrock); + c->dirrock = 0; + c->nrock = 0; + c->mrock = 0; + } + if(c->umh != nil){ + putmhead(c->umh); + c->umh = nil; + } + if(c->umc != nil){ + cclose(c->umc); + c->umc = nil; + } + if(c->mux != nil){ + muxclose(c->mux); + c->mux = nil; + } + if(c->mchan != nil){ + cclose(c->mchan); + c->mchan = nil; + } + + if(c->dev != nil){ //XDYNX + //devtabdecr(c->dev); + c->dev = nil; + } + + pathclose(c->path); + c->path = nil; + + lock(&chanalloc); + c->next = chanalloc.free; + chanalloc.free = c; + unlock(&chanalloc); +} + +void +cclose(Chan *c) +{ + if(c->flag&CFREE) + panic("cclose %#p", getcallerpc(&c)); + + DBG("cclose %#p name=%s ref=%d\n", c, c->path->s, c->ref); + if(decref(c)) + return; + + if(!waserror()){ + if(c->dev != nil) //XDYNX + c->dev->close(c); + poperror(); + } + chanfree(c); +} + +/* + * Queue a chan to be closed by one of the clunk procs. + */ +struct { + Chan *head; + Chan *tail; + Lock l; + QLock q; + Rendez r; + int active; + int running; +} clunkq; + +static void closeproc(void*); + +void +ccloseq(Chan *c) +{ + int seq; + + if(c->flag&CFREE) + panic("ccloseq %#p", getcallerpc(&c)); + + DBG("ccloseq %#p name=%s ref=%d\n", c, c->path->s, c->ref); + + if(decref(c)) + return; + + lock(&clunkq.l); + c->next = nil; + if(clunkq.head != nil) + clunkq.tail->next = c; + else + clunkq.head = c; + clunkq.tail = c; + unlock(&clunkq.l); + + if(!wakeup(&clunkq.r)){ + seq = ainc(&clunkq.active); + if(seq < 10) + kproc("closeproc", closeproc, (void*)seq); + else + adec(&clunkq.active); + } +} + +static int +clunkwork(void*) +{ + return clunkq.head != nil; +} + +static void +closeproc(void *a) +{ + Chan *c; + int seq; + + seq = (uintptr)a; + for(;;){ + ainc(&clunkq.running); + qlock(&clunkq.q); + while(clunkq.head == nil){ + if(!waserror()){ + tsleep(&clunkq.r, clunkwork, nil, 5000); + poperror(); + } + } + lock(&clunkq.l); + c = clunkq.head; + clunkq.head = c->next; + unlock(&clunkq.l); + qunlock(&clunkq.q); + if(!waserror()){ + if(c->dev != nil) //XDYNX + c->dev->close(c); + poperror(); + } + chanfree(c); + if(adec(&clunkq.running) >= 5 && seq > 5){ + adec(&clunkq.active); + pexit("", 1); + } + } +} + +/* + * Make sure we have the only copy of c. (Copy on write.) + */ +Chan* +cunique(Chan *c) +{ + Chan *nc; + + if(c->ref != 1){ + nc = cclone(c); + cclose(c); + c = nc; + } + + return c; +} + +int +eqqid(Qid a, Qid b) +{ + return a.path == b.path && a.vers == b.vers; +} + +static int +eqchan(Chan *a, Chan *b, int skipvers) +{ + if(a->qid.path != b->qid.path) + return 0; + if(!skipvers && a->qid.vers != b->qid.vers) + return 0; + if(a->dev->dc != b->dev->dc) + return 0; + if(a->devno != b->devno) + return 0; + return 1; +} + +int +eqchanddq(Chan *c, int dc, uint devno, Qid qid, int skipvers) +{ + if(c->qid.path != qid.path) + return 0; + if(!skipvers && c->qid.vers != qid.vers) + return 0; + if(c->dev->dc != dc) + return 0; + if(c->devno != devno) + return 0; + return 1; +} + +Mhead* +newmhead(Chan *from) +{ + Mhead *mh; + + mh = smalloc(sizeof(Mhead)); + mh->ref = 1; + mh->from = from; + incref(from); + return mh; +} + +int +cmount(Chan **newp, Chan *old, int flag, char *spec) +{ + int order, flg; + Chan *new; + Mhead *mhead, **l, *mh; + Mount *nm, *f, *um, **h; + Pgrp *pg; + + if(QTDIR & (old->qid.type^(*newp)->qid.type)) + error(Emount); + + if(old->umh) + print("cmount: unexpected umh, caller %#p\n", getcallerpc(&newp)); + + order = flag&MORDER; + + if(!(old->qid.type & QTDIR) && order != MREPL) + error(Emount); + + new = *newp; + mh = new->umh; + + /* + * Not allowed to bind when the old directory is itself a union. + * (Maybe it should be allowed, but I don't see what the semantics + * would be.) + * + * We need to check mh->mount->next to tell unions apart from + * simple mount points, so that things like + * mount -c fd /root + * bind -c /root / + * work. + * + * The check of mount->mflag allows things like + * mount fd /root + * bind -c /root / + * + * This is far more complicated than it should be, but I don't + * see an easier way at the moment. + */ + if((flag&MCREATE) && mh && mh->mount + && (mh->mount->next || !(mh->mount->mflag&MCREATE))) + error(Emount); + + pg = up->pgrp; + wlock(&pg->ns); + + l = &MOUNTH(pg, old->qid); + for(mhead = *l; mhead; mhead = mhead->hash){ + if(eqchan(mhead->from, old, 1)) + break; + l = &mhead->hash; + } + + if(mhead == nil){ + /* + * nothing mounted here yet. create a mount + * head and add to the hash table. + */ + mhead = newmhead(old); + *l = mhead; + + /* + * if this is a union mount, add the old + * node to the mount chain. + */ + if(order != MREPL) + mhead->mount = newmount(mhead, old, 0, 0); + } + wlock(&mhead->lock); + if(waserror()){ + wunlock(&mhead->lock); + nexterror(); + } + wunlock(&pg->ns); + + nm = newmount(mhead, new, flag, spec); + if(mh != nil && mh->mount != nil){ + /* + * copy a union when binding it onto a directory + */ + flg = order; + if(order == MREPL) + flg = MAFTER; + h = &nm->next; + um = mh->mount; + for(um = um->next; um; um = um->next){ + f = newmount(mhead, um->to, flg, um->spec); + *h = f; + h = &f->next; + } + } + + if(mhead->mount && order == MREPL){ + mountfree(mhead->mount); + mhead->mount = 0; + } + + if(flag & MCREATE) + nm->mflag |= MCREATE; + + if(mhead->mount && order == MAFTER){ + for(f = mhead->mount; f->next; f = f->next) + ; + f->next = nm; + }else{ + for(f = nm; f->next; f = f->next) + ; + f->next = mhead->mount; + mhead->mount = nm; + } + + wunlock(&mhead->lock); + poperror(); + return nm->mountid; +} + +void +cunmount(Chan *mnt, Chan *mounted) +{ + Pgrp *pg; + Mhead *mh, **l; + Mount *f, **p; + + if(mnt->umh) /* should not happen */ + print("cunmount newp extra umh %#p has %#p\n", mnt, mnt->umh); + + /* + * It _can_ happen that mounted->umh is non-nil, + * because mounted is the result of namec(Aopen) + * (see sysfile.c:/^sysunmount). + * If we open a union directory, it will have a umh. + * Although surprising, this is okay, since the + * cclose will take care of freeing the umh. + */ + + pg = up->pgrp; + wlock(&pg->ns); + + l = &MOUNTH(pg, mnt->qid); + for(mh = *l; mh; mh = mh->hash){ + if(eqchan(mh->from, mnt, 1)) + break; + l = &mh->hash; + } + + if(mh == 0){ + wunlock(&pg->ns); + error(Eunmount); + } + + wlock(&mh->lock); + if(mounted == 0){ + *l = mh->hash; + wunlock(&pg->ns); + mountfree(mh->mount); + mh->mount = nil; + cclose(mh->from); + wunlock(&mh->lock); + putmhead(mh); + return; + } + + p = &mh->mount; + for(f = *p; f; f = f->next){ + /* BUG: Needs to be 2 pass */ + if(eqchan(f->to, mounted, 1) || + (f->to->mchan && eqchan(f->to->mchan, mounted, 1))){ + *p = f->next; + f->next = 0; + mountfree(f); + if(mh->mount == nil){ + *l = mh->hash; + cclose(mh->from); + wunlock(&mh->lock); + wunlock(&pg->ns); + putmhead(mh); + return; + } + wunlock(&mh->lock); + wunlock(&pg->ns); + return; + } + p = &f->next; + } + wunlock(&mh->lock); + wunlock(&pg->ns); + error(Eunion); +} + +Chan* +cclone(Chan *c) +{ + Chan *nc; + Walkqid *wq; + + wq = c->dev->walk(c, nil, nil, 0); //XDYNX? + if(wq == nil) + error("clone failed"); + nc = wq->clone; + free(wq); + nc->path = c->path; + if(c->path) + incref(c->path); + return nc; +} + +/* also used by sysfile.c:/^mountfix */ +int +findmount(Chan **cp, Mhead **mp, int dc, uint devno, Qid qid) +{ + Pgrp *pg; + Mhead *mh; + + pg = up->pgrp; + rlock(&pg->ns); + for(mh = MOUNTH(pg, qid); mh; mh = mh->hash){ + rlock(&mh->lock); + if(mh->from == nil){ + print("mh %#p: mh->from nil\n", mh); + runlock(&mh->lock); + continue; + } + if(eqchanddq(mh->from, dc, devno, qid, 1)){ + runlock(&pg->ns); + if(mp != nil){ + incref(mh); + if(*mp != nil) + putmhead(*mp); + *mp = mh; + } + if(*cp != nil) + cclose(*cp); + incref(mh->mount->to); + *cp = mh->mount->to; + runlock(&mh->lock); + return 1; + } + runlock(&mh->lock); + } + + runlock(&pg->ns); + return 0; +} + +/* + * Calls findmount but also updates path. + */ +static int +domount(Chan **cp, Mhead **mp, Path **path) +{ + Chan **lc; + Path *p; + + if(findmount(cp, mp, (*cp)->dev->dc, (*cp)->devno, (*cp)->qid) == 0) + return 0; + + if(path){ + p = *path; + p = uniquepath(p); + if(p->mlen <= 0) + print("domount: path %s has mlen==%d\n", p->s, p->mlen); + else{ + lc = &p->mtpt[p->mlen-1]; + DBG("domount %#p %s => add %#p (was %#p)\n", + p, p->s, (*mp)->from, p->mtpt[p->mlen-1]); + incref((*mp)->from); + if(*lc) + cclose(*lc); + *lc = (*mp)->from; + } + *path = p; + } + return 1; +} + +/* + * If c is the right-hand-side of a mount point, returns the left hand side. + * Changes name to reflect the fact that we've uncrossed the mountpoint, + * so name had better be ours to change! + */ +static Chan* +undomount(Chan *c, Path *path) +{ + Chan *nc; + + if(path->ref != 1 || path->mlen == 0) + print("undomount: path %s ref %d mlen %d caller %#p\n", + path->s, path->ref, path->mlen, getcallerpc(&c)); + + if(path->mlen>0 && (nc=path->mtpt[path->mlen-1]) != nil){ + DBG("undomount %#p %s => remove %p\n", path, path->s, nc); + cclose(c); + path->mtpt[path->mlen-1] = nil; + c = nc; + } + return c; +} + +/* + * Call dev walk but catch errors. + */ +static Walkqid* +ewalk(Chan *c, Chan *nc, char **name, int nname) +{ + Walkqid *wq; + + if(waserror()) + return nil; + wq = c->dev->walk(c, nc, name, nname); + poperror(); + return wq; +} + +/* + * Either walks all the way or not at all. No partial results in *cp. + * *nerror is the number of names to display in an error message. + */ +static char Edoesnotexist[] = "does not exist"; +int +walk(Chan **cp, char **names, int nnames, int nomount, int *nerror) +{ + int dc, devno, didmount, dotdot, i, n, nhave, ntry; + Chan *c, *nc, *mtpt; + Path *path; + Mhead *mh, *nmh; + Mount *f; + Walkqid *wq; + + c = *cp; + incref(c); + path = c->path; + incref(path); + mh = nil; + + /* + * While we haven't gotten all the way down the path: + * 1. step through a mount point, if any + * 2. send a walk request for initial dotdot or initial prefix without dotdot + * 3. move to the first mountpoint along the way. + * 4. repeat. + * + * Each time through the loop: + * + * If didmount==0, c is on the undomount side of the mount point. + * If didmount==1, c is on the domount side of the mount point. + * Either way, c's full path is path. + */ + didmount = 0; + for(nhave=0; nhaveqid.type & QTDIR)){ + if(nerror) + *nerror = nhave; + pathclose(path); + cclose(c); + strcpy(up->errstr, Enotdir); + if(mh != nil) + putmhead(mh); + return -1; + } + ntry = nnames - nhave; + if(ntry > MAXWELEM) + ntry = MAXWELEM; + dotdot = 0; + for(i=0; idev->dc; + devno = c->devno; + + if((wq = ewalk(c, nil, names+nhave, ntry)) == nil){ + /* try a union mount, if any */ + if(mh && !nomount){ + /* + * mh->mount->to == c, so start at mh->mount->next + */ + rlock(&mh->lock); + if(mh->mount != nil){ + for(f = mh->mount->next; f != nil; f = f->next){ + if((wq = ewalk(f->to, nil, names+nhave, ntry)) != nil){ + dc = f->to->dev->dc; + devno = f->to->devno; + break; + } + } + } + runlock(&mh->lock); + } + if(wq == nil){ + cclose(c); + pathclose(path); + if(nerror) + *nerror = nhave+1; + if(mh != nil) + putmhead(mh); + return -1; + } + } + + nmh = nil; + didmount = 0; + if(dotdot){ + assert(wq->nqid == 1); + assert(wq->clone != nil); + + path = addelem(path, "..", nil); + nc = undomount(wq->clone, path); + n = 1; + }else{ + nc = nil; + if(!nomount){ + for(i=0; inqid && iqid[i])){ + didmount = 1; + break; + } + } + } + if(nc == nil){ /* no mount points along path */ + if(wq->clone == nil){ + cclose(c); + pathclose(path); + if(wq->nqid==0 || (wq->qid[wq->nqid-1].type & QTDIR)){ + if(nerror) + *nerror = nhave+wq->nqid+1; + strcpy(up->errstr, Edoesnotexist); + }else{ + if(nerror) + *nerror = nhave+wq->nqid; + strcpy(up->errstr, Enotdir); + } + free(wq); + if(mh != nil) + putmhead(mh); + return -1; + } + n = wq->nqid; + nc = wq->clone; + }else{ /* stopped early, at a mount point */ + didmount = 1; + if(wq->clone != nil){ + cclose(wq->clone); + wq->clone = nil; + } + n = i+1; + } + for(i=0; ifrom; + path = addelem(path, names[nhave+i], mtpt); + } + } + cclose(c); + c = nc; + putmhead(mh); + mh = nmh; + free(wq); + } + + putmhead(mh); + + c = cunique(c); + + if(c->umh != nil){ //BUG + print("walk umh\n"); + putmhead(c->umh); + c->umh = nil; + } + + pathclose(c->path); + c->path = path; + + cclose(*cp); + *cp = c; + if(nerror) + *nerror = nhave; + return 0; +} + +/* + * c is a mounted non-creatable directory. find a creatable one. + */ +Chan* +createdir(Chan *c, Mhead *mh) +{ + Chan *nc; + Mount *f; + + rlock(&mh->lock); + if(waserror()){ + runlock(&mh->lock); + nexterror(); + } + for(f = mh->mount; f; f = f->next){ + if(f->mflag&MCREATE){ + nc = cclone(f->to); + runlock(&mh->lock); + poperror(); + cclose(c); + return nc; + } + } + error(Enocreate); + return 0; +} + +static void +saveregisters(void) +{ +} + +static void +growparse(Elemlist *e) +{ + char **new; + int *inew; + enum { Delta = 8 }; + + if(e->nelems % Delta == 0){ + new = smalloc((e->nelems+Delta) * sizeof(char*)); + memmove(new, e->elems, e->nelems*sizeof(char*)); + free(e->elems); + e->elems = new; + inew = smalloc((e->nelems+Delta+1) * sizeof(int)); + memmove(inew, e->off, (e->nelems+1)*sizeof(int)); + free(e->off); + e->off = inew; + } +} + +/* + * The name is known to be valid. + * Copy the name so slashes can be overwritten. + * An empty string will set nelem=0. + * A path ending in / or /. or /.//./ etc. will have + * e.mustbedir = 1, so that we correctly + * reject, e.g., "/adm/users/." when /adm/users is a file + * rather than a directory. + */ +static void +parsename(char *aname, Elemlist *e) +{ + char *name, *slash; + + kstrdup(&e->name, aname); + name = e->name; + e->nelems = 0; + e->elems = nil; + e->off = smalloc(sizeof(int)); + e->off[0] = skipslash(name) - name; + for(;;){ + name = skipslash(name); + if(*name == '\0'){ + e->off[e->nelems] = name+strlen(name) - e->name; + e->mustbedir = 1; + break; + } + growparse(e); + e->elems[e->nelems++] = name; + slash = utfrune(name, '/'); + if(slash == nil){ + e->off[e->nelems] = name+strlen(name) - e->name; + e->mustbedir = 0; + break; + } + e->off[e->nelems] = slash - e->name; + *slash++ = '\0'; + name = slash; + } + + if(DBGFLG > 1){ + int i; + + DBG("parsename %s:", e->name); + for(i=0; i<=e->nelems; i++) + DBG(" %d", e->off[i]); + DBG("\n"); + } +} + +static void* +memrchr(void *va, int c, long n) +{ + uchar *a, *e; + + a = va; + for(e=a+n-1; e>a; e--) + if(*e == c) + return e; + return nil; +} + +static void +namelenerror(char *aname, int len, char *err) +{ + char *ename, *name, *next; + int i, errlen; + + /* + * If the name is short enough, just use the whole thing. + */ + errlen = strlen(err); + if(len < ERRMAX/3 || len+errlen < 2*ERRMAX/3) + snprint(up->genbuf, sizeof up->genbuf, "%.*s", + utfnlen(aname, len), aname); + else{ + /* + * Print a suffix of the name, but try to get a little info. + */ + ename = aname+len; + next = ename; + do{ + name = next; + next = memrchr(aname, '/', name-aname); + if(next == nil) + next = aname; + len = ename-next; + }while(len < ERRMAX/3 || len + errlen < 2*ERRMAX/3); + + /* + * If the name is ridiculously long, chop it. + */ + if(name == ename){ + name = ename-ERRMAX/4; + if(name <= aname) + panic("bad math in namelenerror"); + /* walk out of current UTF sequence */ + for(i=0; (*name&0xC0)==0x80 && igenbuf, sizeof up->genbuf, "...%.*s", + utfnlen(name, ename-name), name); + } + snprint(up->errstr, ERRMAX, "%#q %s", up->genbuf, err); + nexterror(); +} + +void +nameerror(char *name, char *err) +{ + namelenerror(name, strlen(name), err); +} + +/* + * Turn a name into a channel. + * &name[0] is known to be a valid address. It may be a kernel address. + * + * Opening with amode Aopen, Acreate, Aremove, or Aaccess guarantees + * that the result will be the only reference to that particular fid. + * This is necessary since we might pass the result to + * devtab[]->remove(). + * + * Opening Atodir or Amount does not guarantee this. + * + * Under certain circumstances, opening Aaccess will cause + * an unnecessary clone in order to get a cunique Chan so it + * can attach the correct name. Sysstat and sys_stat need the + * correct name so they can rewrite the stat info. + */ +Chan* +namec(char *aname, int amode, int omode, int perm) +{ + int len, n, nomount; + Chan *c, *cnew; + Path *path; + Elemlist e; + Rune r; + Mhead *mh; + char *createerr, tmperrbuf[ERRMAX]; + char *name; + Dev *dev; + + if(aname[0] == '\0') + error("empty file name"); + aname = validnamedup(aname, 1); + if(waserror()){ + free(aname); + nexterror(); + } + DBG("namec %s %d %d\n", aname, amode, omode); + name = aname; + + /* + * Find the starting off point (the current slash, the root of + * a device tree, or the current dot) as well as the name to + * evaluate starting there. + */ + nomount = 0; + switch(name[0]){ + case '/': + c = up->slash; + incref(c); + break; + + case '#': + nomount = 1; + n = 0; + if(name[1] == '*'){ + name += 2; + while(*name != '\0' && *name != '/' && *name != '!'){ + if(n >= sizeof(up->genbuf)-1) + error(Efilename); + up->genbuf[n++] = *name++; + } + up->genbuf[n] = '\0'; + dev = devbyname(up->genbuf); + if(dev == nil) + error(Ebadsharp); + r = dev->dc; + up->genbuf[0] = '#'; + n = 1+runetochar(up->genbuf+1, &r); + if(*name == '!'){ + name++; + while(*name != '\0' && *name != '/'){ + if(n >= sizeof(up->genbuf)-1) + error(Efilename); + up->genbuf[n++] = *name++; + } + } + up->genbuf[n] = '\0'; + }else{ + n = 0; + while(*name != '\0' && (*name != '/' || n < 2)){ + if(n >= sizeof(up->genbuf)-1) + error(Efilename); + up->genbuf[n++] = *name++; + } + up->genbuf[n] = '\0'; + n = chartorune(&r, up->genbuf+1)+1; + } + /* actually / is caught by parsing earlier */ + if(utfrune("M", r)) + error(Enoattach); + /* + * noattach is sandboxing. + * + * the OK exceptions are: + * | it only gives access to pipes you create + * d this process's file descriptors + * e this process's environment + * the iffy exceptions are: + * c time and pid, but also cons and consctl + * p control of your own processes (and unfortunately + * any others left unprotected) + */ + if(up->pgrp->noattach && utfrune("|decp", r)==nil) + error(Enoattach); + dev = devtabget(r, 1); //XDYNX + if(dev == nil) + error(Ebadsharp); + //if(waserror()){ + // devtabdecr(dev); + // nexterror(); + //} + c = dev->attach(up->genbuf+n); + //poperror(); + //devtabdecr(dev); + break; + + default: + c = up->dot; + incref(c); + break; + } + + e.aname = aname; + e.prefix = name - aname; + e.name = nil; + e.elems = nil; + e.off = nil; + e.nelems = 0; + e.nerror = 0; + if(waserror()){ + cclose(c); + free(e.name); + free(e.elems); + /* + * Prepare nice error, showing first e.nerror elements of name. + */ + if(e.nerror == 0) + nexterror(); + strcpy(tmperrbuf, up->errstr); + if(e.off[e.nerror]==0) + print("nerror=%d but off=%d\n", + e.nerror, e.off[e.nerror]); + if(DBGFLG > 0){ + DBG("showing %d+%d/%d (of %d) of %s (%d %d)\n", + e.prefix, e.off[e.nerror], e.nerror, + e.nelems, aname, e.off[0], e.off[1]); + } + len = e.prefix+e.off[e.nerror]; + free(e.off); + namelenerror(aname, len, tmperrbuf); + } + + /* + * Build a list of elements in the name. + */ + parsename(name, &e); + + /* + * On create, .... + */ + if(amode == Acreate){ + /* perm must have DMDIR if last element is / or /. */ + if(e.mustbedir && !(perm&DMDIR)){ + e.nerror = e.nelems; + error("create without DMDIR"); + } + + /* don't try to walk the last path element just yet. */ + if(e.nelems == 0) + error(Eexist); + e.nelems--; + } + + if(walk(&c, e.elems, e.nelems, nomount, &e.nerror) < 0){ + if(e.nerror < 0 || e.nerror > e.nelems){ + print("namec %s walk error nerror=%d\n", aname, e.nerror); + e.nerror = 0; + } + nexterror(); + } + + if(e.mustbedir && !(c->qid.type & QTDIR)) + error(Enotdir); + + if(amode == Aopen && (omode&3) == OEXEC && (c->qid.type & QTDIR)) + error("cannot exec directory"); + + switch(amode){ + case Abind: + /* no need to maintain path - cannot dotdot an Abind */ + mh = nil; + if(!nomount) + domount(&c, &mh, nil); + if(c->umh != nil) + putmhead(c->umh); + c->umh = mh; + break; + + case Aaccess: + case Aremove: + case Aopen: + Open: + /* save&update the name; domount might change c */ + path = c->path; + incref(path); + if(waserror()){ + pathclose(path); + nexterror(); + } + + mh = nil; + if(!nomount) + domount(&c, &mh, &path); + + /* our own copy to open or remove */ + c = cunique(c); + + /* now it's our copy anyway, we can put the name back */ + poperror(); + pathclose(c->path); + c->path = path; + + /* record whether c is on a mount point */ + c->ismtpt = mh!=nil; + + switch(amode){ + case Aaccess: + case Aremove: + putmhead(mh); + break; + + case Aopen: + case Acreate: +if(c->umh != nil){ + print("cunique umh Open\n"); + putmhead(c->umh); + c->umh = nil; +} + /* only save the mount head if it's a multiple element union */ + if(mh && mh->mount && mh->mount->next) + c->umh = mh; + else + putmhead(mh); + + /* save registers else error() in open has wrong value of c saved */ + saveregisters(); + + if(omode == OEXEC) + c->flag &= ~CCACHE; + + +//open: //XDYNX +// get dev +// open +// if no error and read/write +// then fill in c->dev and +// don't put + c = c->dev->open(c, omode&~OCEXEC); + + if(omode & OCEXEC) + c->flag |= CCEXEC; + if(omode & ORCLOSE) + c->flag |= CRCLOSE; + break; + } + break; + + case Atodir: + /* + * Directories (e.g. for cd) are left before the mount point, + * so one may mount on / or . and see the effect. + */ + if(!(c->qid.type & QTDIR)) + error(Enotdir); + break; + + case Amount: + /* + * When mounting on an already mounted upon directory, + * one wants subsequent mounts to be attached to the + * original directory, not the replacement. Don't domount. + */ + break; + + case Acreate: + /* + * We've already walked all but the last element. + * If the last exists, try to open it OTRUNC. + * If omode&OEXCL is set, just give up. + */ + e.nelems++; + e.nerror++; + if(walk(&c, e.elems+e.nelems-1, 1, nomount, nil) == 0){ + if(omode&OEXCL) + error(Eexist); + omode |= OTRUNC; + goto Open; + } + + /* + * The semantics of the create(2) system call are that if the + * file exists and can be written, it is to be opened with truncation. + * On the other hand, the create(5) message fails if the file exists. + * If we get two create(2) calls happening simultaneously, + * they might both get here and send create(5) messages, but only + * one of the messages will succeed. To provide the expected create(2) + * semantics, the call with the failed message needs to try the above + * walk again, opening for truncation. This correctly solves the + * create/create race, in the sense that any observable outcome can + * be explained as one happening before the other. + * The create/create race is quite common. For example, it happens + * when two rc subshells simultaneously update the same + * environment variable. + * + * The implementation still admits a create/create/remove race: + * (A) walk to file, fails + * (B) walk to file, fails + * (A) create file, succeeds, returns + * (B) create file, fails + * (A) remove file, succeeds, returns + * (B) walk to file, return failure. + * + * This is hardly as common as the create/create race, and is really + * not too much worse than what might happen if (B) got a hold of a + * file descriptor and then the file was removed -- either way (B) can't do + * anything with the result of the create call. So we don't care about this race. + * + * Applications that care about more fine-grained decision of the races + * can use the OEXCL flag to get at the underlying create(5) semantics; + * by default we provide the common case. + * + * We need to stay behind the mount point in case we + * need to do the first walk again (should the create fail). + * + * We also need to cross the mount point and find the directory + * in the union in which we should be creating. + * + * The channel staying behind is c, the one moving forward is cnew. + */ + mh = nil; + cnew = nil; /* is this assignment necessary? */ + if(!waserror()){ /* try create */ + if(!nomount && findmount(&cnew, &mh, c->dev->dc, c->devno, c->qid)) + cnew = createdir(cnew, mh); + else{ + cnew = c; + incref(cnew); + } + + /* + * We need our own copy of the Chan because we're + * about to send a create, which will move it. Once we have + * our own copy, we can fix the name, which might be wrong + * if findmount gave us a new Chan. + */ + cnew = cunique(cnew); + pathclose(cnew->path); + cnew->path = c->path; + incref(cnew->path); + +//create: //XDYNX +// like open regarding read/write? + + cnew->dev->create(cnew, e.elems[e.nelems-1], omode&~(OEXCL|OCEXEC), perm); + poperror(); + if(omode & OCEXEC) + cnew->flag |= CCEXEC; + if(omode & ORCLOSE) + cnew->flag |= CRCLOSE; + if(mh) + putmhead(mh); + cclose(c); + c = cnew; + c->path = addelem(c->path, e.elems[e.nelems-1], nil); + break; + } + /* create failed */ + cclose(cnew); + if(mh) + putmhead(mh); + if(omode & OEXCL) + nexterror(); + /* save error */ + createerr = up->errstr; + up->errstr = tmperrbuf; + /* note: we depend that walk does not error */ + if(walk(&c, e.elems+e.nelems-1, 1, nomount, nil) < 0){ + up->errstr = createerr; + error(createerr); /* report true error */ + } + up->errstr = createerr; + omode |= OTRUNC; + goto Open; + + default: + panic("unknown namec access %d", amode); + } + + /* place final element in genbuf for e.g. exec */ + if(e.nelems > 0) + kstrcpy(up->genbuf, e.elems[e.nelems-1], sizeof up->genbuf); + else + kstrcpy(up->genbuf, ".", sizeof up->genbuf); + free(e.name); + free(e.elems); + free(e.off); + poperror(); /* e c */ + free(aname); + poperror(); /* aname */ + + return c; +} + +/* + * name is valid. skip leading / and ./ as much as possible + */ +char* +skipslash(char *name) +{ + while(name[0]=='/' || (name[0]=='.' && (name[1]==0 || name[1]=='/'))) + name++; + return name; +} + +char isfrog[256]={ + /*NUL*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*BKS*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*DLE*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*CAN*/ 1, 1, 1, 1, 1, 1, 1, 1, + ['/'] 1, + [0x7f] 1, +}; + +/* + * Check that the name + * a) is in valid memory. + * b) is shorter than 2^16 bytes, so it can fit in a 9P string field. + * c) contains no frogs. + * The first byte is known to be addressable by the requester, so the + * routine works for kernel and user memory both. + * The parameter slashok flags whether a slash character is an error + * or a valid character. + * + * The parameter dup flags whether the string should be copied + * out of user space before being scanned the second time. + * (Otherwise a malicious thread could remove the NUL, causing us + * to access unchecked addresses.) + */ +static char* +validname0(char *aname, int slashok, int dup, uintptr pc) +{ + char *ename, *name, *s; + int c, n; + Rune r; + + name = aname; + if(!iskaddr(name)){ + if(!dup) + print("warning: validname* called from %#p with user pointer", pc); + ename = vmemchr(name, 0, (1<<16)); + }else + ename = memchr(name, 0, (1<<16)); + + if(ename==nil || ename-name>=(1<<16)) + error("name too long"); + + s = nil; + if(dup){ + n = ename-name; + s = smalloc(n+1); + memmove(s, name, n); + s[n] = 0; + aname = s; + name = s; + setmalloctag(s, pc); + } + + while(*name){ + /* all characters above '~' are ok */ + c = *(uchar*)name; + if(c >= Runeself) + name += chartorune(&r, name); + else{ + if(isfrog[c]) + if(!slashok || c!='/'){ + snprint(up->genbuf, sizeof(up->genbuf), "%s: %q", Ebadchar, aname); + free(s); + error(up->genbuf); + } + name++; + } + } + return s; +} + +void +validname(char *aname, int slashok) +{ + validname0(aname, slashok, 0, getcallerpc(&aname)); +} + +char* +validnamedup(char *aname, int slashok) +{ + return validname0(aname, slashok, 1, getcallerpc(&aname)); +} + +void +isdir(Chan *c) +{ + if(c->qid.type & QTDIR) + return; + error(Enotdir); +} + +/* + * This is necessary because there are many + * pointers to the top of a given mount list: + * + * - the mhead in the namespace hash table + * - the mhead in chans returned from findmount: + * used in namec and then by unionread. + * - the mhead in chans returned from createdir: + * used in the open/create race protect, which is gone. + * + * The RWlock in the Mhead protects the mount list it contains. + * The mount list is deleted when we cunmount. + * The RWlock ensures that nothing is using the mount list at that time. + * + * It is okay to replace c->mh with whatever you want as + * long as you are sure you have a unique reference to it. + * + * This comment might belong somewhere else. + */ +void +putmhead(Mhead *mh) +{ + if(mh && decref(mh) == 0){ + mh->mount = (Mount*)0xCafeBeef; + free(mh); + } +} + diff -Nru /sys/src/9k/port/dev.c /sys/src/9k/port/dev.c --- /sys/src/9k/port/dev.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/dev.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,495 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +extern ulong kerndate; + +void +mkqid(Qid *q, vlong path, ulong vers, int type) +{ + q->type = type; + q->vers = vers; + q->path = path; +} + +void +devdir(Chan *c, Qid qid, char *n, vlong length, char *user, long perm, Dir *db) +{ + db->name = n; + if(c->flag&CMSG) + qid.type |= QTMOUNT; + db->qid = qid; + /* + * When called via devwalk c->dev is nil + * until the walk succeeds. + */ + if(c->dev != nil) + db->type = c->dev->dc; + else + db->type = -1; + db->dev = c->devno; + db->mode = perm; + db->mode |= qid.type << 24; + db->atime = seconds(); + db->mtime = kerndate; + db->length = length; + db->uid = user; + db->gid = eve; + db->muid = user; +} + +/* + * (here, Devgen is the prototype; devgen is the function in dev.c.) + * + * a Devgen is expected to return the directory entry for ".." + * if you pass it s==DEVDOTDOT (-1). otherwise... + * + * there are two contradictory rules. + * + * (i) if c is a directory, a Devgen is expected to list its children + * as you iterate s. + * + * (ii) whether or not c is a directory, a Devgen is expected to list + * its siblings as you iterate s. + * + * devgen always returns the list of children in the root + * directory. thus it follows (i) when c is the root and (ii) otherwise. + * many other Devgens follow (i) when c is a directory and (ii) otherwise. + * + * devwalk assumes (i). it knows that devgen breaks (i) + * for children that are themselves directories, and explicitly catches them. + * + * devstat assumes (ii). if the Devgen in question follows (i) + * for this particular c, devstat will not find the necessary info. + * with our particular Devgen functions, this happens only for + * directories, so devstat makes something up, assuming + * c->name, c->qid, eve, DMDIR|0555. + * + * devdirread assumes (i). the callers have to make sure + * that the Devgen satisfies (i) for the chan being read. + */ +/* + * the zeroth element of the table MUST be the directory itself for .. +*/ +int +devgen(Chan *c, char *name, Dirtab *tab, int ntab, int i, Dir *dp) +{ + if(tab == 0) + return -1; + if(i == DEVDOTDOT){ + /* nothing */ + }else if(name){ + for(i=1; i= ntab) + return -1; + tab += i; + } + devdir(c, tab->qid, tab->name, tab->length, eve, tab->perm, dp); + return 1; +} + +void +devreset(void) +{ +} + +void +devinit(void) +{ +} + +void +devshutdown(void) +{ +} + +Chan* +devattach(int dc, char *spec) +{ + Chan *c; + char *buf; + int n; + + /* + * There are no error checks here because + * this can only be called from the driver of dc + * which pretty much guarantees devtabget will + * succeed. + */ + c = newchan(); + mkqid(&c->qid, 0, 0, QTDIR); + c->dev = devtabget(dc, 0); + if(spec == nil) + spec = ""; + n = 1+UTFmax+strlen(spec)+1; + buf = smalloc(n); + snprint(buf, n, "#%C%s", dc, spec); + c->path = newpath(buf); + free(buf); + return c; +} + + +Chan* +devclone(Chan *c) +{ + Chan *nc; + + if(c->flag & COPEN){ + panic("devclone: file of type %C already open\n", + c->dev != nil? c->dev->dc: -1); + } + + nc = newchan(); + + /* + * The caller fills dev in if and when necessary. + nc->dev = nil; //XDYNXX + */ + nc->devno = c->devno; + nc->mode = c->mode; + nc->qid = c->qid; + nc->offset = c->offset; + nc->umh = nil; + nc->aux = c->aux; + nc->mqid = c->mqid; + nc->mc = c->mc; + return nc; +} + +Walkqid* +devwalk(Chan *c, Chan *nc, char **name, int nname, Dirtab *tab, int ntab, Devgen *gen) +{ + int i, j, alloc; + Walkqid *wq; + char *n; + Dir dir; + + if(nname > 0) + isdir(c); + + alloc = 0; + wq = smalloc(sizeof(Walkqid)+(nname-1)*sizeof(Qid)); + if(waserror()){ + if(alloc && wq->clone!=nil) + cclose(wq->clone); + free(wq); + return nil; + } + if(nc == nil){ + nc = devclone(c); + /* + * nc->dev remains nil for now. //XDYNX + */ + alloc = 1; + } + wq->clone = nc; + + for(j=0; jqid.type & QTDIR)){ + if(j==0) + error(Enotdir); + goto Done; + } + n = name[j]; + if(strcmp(n, ".") == 0){ + Accept: + wq->qid[wq->nqid++] = nc->qid; + continue; + } + if(strcmp(n, "..") == 0){ + /* + * Use c->dev->name in the error because + * nc->dev should be nil here. + */ + if((*gen)(nc, nil, tab, ntab, DEVDOTDOT, &dir) != 1){ + print("devgen walk .. in dev%s %#llux broken\n", + c->dev->name, nc->qid.path); + error("broken devgen"); + } + nc->qid = dir.qid; + goto Accept; + } + /* + * Ugly problem: If we're using devgen, make sure we're + * walking the directory itself, represented by the first + * entry in the table, and not trying to step into a sub- + * directory of the table, e.g. /net/net. Devgen itself + * should take care of the problem, but it doesn't have + * the necessary information (that we're doing a walk). + */ + if(gen==devgen && nc->qid.path!=tab[0].qid.path) + goto Notfound; + for(i=0;; i++) { + switch((*gen)(nc, n, tab, ntab, i, &dir)){ + case -1: + Notfound: + if(j == 0) + error(Enonexist); + kstrcpy(up->errstr, Enonexist, ERRMAX); + goto Done; + case 0: + continue; + case 1: + if(strcmp(n, dir.name) == 0){ + nc->qid = dir.qid; + goto Accept; + } + continue; + } + } + } + /* + * We processed at least one name, so will return some data. + * If we didn't process all nname entries succesfully, we drop + * the cloned channel and return just the Qids of the walks. + */ +Done: + poperror(); + if(wq->nqid < nname){ + if(alloc) + cclose(wq->clone); + wq->clone = nil; + }else if(wq->clone){ + /* attach cloned channel to same device */ +//what goes here: //XDYNX +// ->dev must be nil because can't walk an open chan, right? +// what about ref count on dev? + wq->clone->dev = c->dev; + //if(wq->clone->dev) //XDYNX + // devtabincr(wq->clone->dev); + } + return wq; +} + +long +devstat(Chan *c, uchar *db, long n, Dirtab *tab, int ntab, Devgen *gen) +{ + int i; + Dir dir; + char *p, *elem; + + for(i=0;; i++){ + switch((*gen)(c, nil, tab, ntab, i, &dir)){ + case -1: + if(c->qid.type & QTDIR){ + if(c->path == nil) + elem = "???"; + else if(strcmp(c->path->s, "/") == 0) + elem = "/"; + else + for(elem=p=c->path->s; *p; p++) + if(*p == '/') + elem = p+1; + devdir(c, c->qid, elem, 0, eve, DMDIR|0555, &dir); + n = convD2M(&dir, db, n); + if(n == 0) + error(Ebadarg); + return n; + } + + error(Enonexist); + case 0: + break; + case 1: + if(c->qid.path == dir.qid.path) { + if(c->flag&CMSG) + dir.mode |= DMMOUNT; + n = convD2M(&dir, db, n); + if(n == 0) + error(Ebadarg); + return n; + } + break; + } + } +} + +long +devdirread(Chan *c, char *d, long n, Dirtab *tab, int ntab, Devgen *gen) +{ + long m, dsz; + Dir dir; + + for(m=0; mdri++) { + switch((*gen)(c, nil, tab, ntab, c->dri, &dir)){ + case -1: + return m; + + case 0: + break; + + case 1: + dsz = convD2M(&dir, (uchar*)d, n-m); + if(dsz <= BIT16SZ){ /* <= not < because this isn't stat; read is stuck */ + if(m == 0) + error(Eshort); + return m; + } + m += dsz; + d += dsz; + break; + } + } + + return m; +} + +/* + * error(Eperm) if open permission not granted for up->user. + */ +void +devpermcheck(char *fileuid, int perm, int omode) +{ + int t; + static int access[] = { 0400, 0200, 0600, 0100 }; + + if(strcmp(up->user, fileuid) == 0) + perm <<= 0; + else if(ingroup(up->user, eve)) + perm <<= 3; + else + perm <<= 6; + + t = access[omode&3]; + if((t&perm) != t) + error(Eperm); +} + +Chan* +devopen(Chan *c, int omode, Dirtab *tab, int ntab, Devgen *gen) +{ + int i; + Dir dir; + + for(i=0;; i++) { + switch((*gen)(c, nil, tab, ntab, i, &dir)){ + case -1: + goto Return; + case 0: + break; + case 1: + if(c->qid.path == dir.qid.path) { + devpermcheck(dir.uid, dir.mode, omode); + goto Return; + } + break; + } + } +Return: + c->offset = 0; + if((c->qid.type & QTDIR) && omode!=OREAD) + error(Eperm); + c->mode = openmode(omode); + c->flag |= COPEN; + return c; +} + +void +devcreate(Chan*, char*, int, int) +{ + error(Eperm); +} + +Block* +devbread(Chan *c, long n, vlong offset) +{ + Block *bp; + + bp = allocb(n); + if(bp == 0) + error(Enomem); + if(waserror()) { + freeb(bp); + nexterror(); + } + bp->wp += c->dev->read(c, bp->wp, n, offset); + poperror(); + return bp; +} + +long +devbwrite(Chan *c, Block *bp, vlong offset) +{ + long n; + + if(waserror()) { + freeb(bp); + nexterror(); + } + n = c->dev->write(c, bp->rp, BLEN(bp), offset); + poperror(); + freeb(bp); + + return n; +} + +void +devremove(Chan*) +{ + error(Eperm); +} + +long +devwstat(Chan*, uchar*, long) +{ + error(Eperm); + return 0; +} + +void +devpower(int) +{ + error(Eperm); +} + +int +devconfig(int, char *, DevConf *) +{ + error(Eperm); + return 0; +} + +/* + * TO DO: this is not the usual meaning of scatter/gather + */ +static long +deviov(Chan *c, IOchunk *io, long nio, vlong offset, int mode) +{ + long i, n, len; + long (*fn)(Chan*, void*, long, vlong); + + fn = mode == OWRITE? c->dev->write: c->dev->read; + len = 0; + for(i = 0; i < nio; i++) { + n = fn(c, io[i].addr, io[i].len, offset); + if(n != io[i].len) + break; + offset += n; + len += n; + } + return len; +} + +long +devreadv(Chan *c, IOchunk *io, long nio, long, vlong offset) +{ + return deviov(c, io, nio, offset, OREAD); +} + +long +devwritev(Chan *c, IOchunk *io, long nio, long, vlong offset) +{ + return deviov(c, io, nio, offset, OWRITE); +} diff -Nru /sys/src/9k/port/devaoe.c /sys/src/9k/port/devaoe.c --- /sys/src/9k/port/devaoe.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devaoe.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,2573 @@ +/* + * copyright © 2010 CORAID + * coraid, inc. all rights reserved + * aoe storage initiator + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" +#include "../port/error.h" +#include "../port/netif.h" +#include "etherif.h" +#include "../ip/ip.h" +#include "../port/aoe.h" + +#define Ticks sys->ticks + + +#pragma varargck argpos eventlog 1 + +#define dprint(...) if(debug) eventlog(__VA_ARGS__); else USED(debug); +#define uprint(...) snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__); + +enum { + Maxunits = 0xff, + Maxframes = 128, + Maxmtu = 100000, + Ndevlink = 6, + Nea = 6, + Nnetlink = 6, +}; + +#define TYPE(q) ((ulong)(q).path & 0xf) +#define UNIT(q) (((ulong)(q).path>>4) & 0xff) +#define L(q) (((ulong)(q).path>>12) & 0xf) +#define QID(u, t) ((u)<<4 | (t)) +#define Q3(l, u, t) ((l)<<8 | QID(u, t)) +#define UP(d) ((d)->flag & Dup) + +#define Ms2tk(t) (((t)*HZ)/1000) +#define Tk2ms(t) (((t)*1000)/HZ) + +enum { + Qzero, + Qtopdir = 1, + Qtopbase, + Qtopctl = Qtopbase, + Qtoplog, + Qtopend, + + Qunitdir, + Qunitbase, + Qctl = Qunitbase, + Qdata, + Qconfig, + Qident, + + Qdevlinkdir, + Qdevlinkbase, + Qdevlink = Qdevlinkbase, + Qdevlinkend, + + Qtopfiles = Qtopend-Qtopbase, + Qdevlinkfiles = Qdevlinkend-Qdevlinkbase, + + Eventlen = 256, + Nevents = 64, + + Fread = 0, + Fwrite, + Tfree = -1, + Tmgmt, + + /* round trip bounds, timeouts, in ticks */ + Rtmax = Ms2tk(320), + Rtmin = Ms2tk(20), + Srbtimeout = 45*HZ, + + Dbcnt = 1024, + + Crd = 0x20, + Crdext = 0x24, + Cwr = 0x30, + Cwrext = 0x34, + Cid = 0xec, +}; + +enum { + Read, + Write, +}; + +/* + * unified set of flags + * a Netlink + Aoedev most both be jumbo capable + * to send jumbograms to that interface. + */ +enum { + /* sync with ahci.h */ + Dllba = 1<<0, + Dsmart = 1<<1, + Dpower = 1<<2, + Dnop = 1<<3, + Datapi = 1<<4, + Datapi16= 1<<5, + + /* aoe specific */ + Dup = 1<<6, + Djumbo = 1<<7, +}; + +static char *flagname[] = { + "llba", + "smart", + "power", + "nop", + "atapi", + "atapi16", + + "up", + "jumbo", +}; + +typedef struct { + uchar flag; + uchar lostjumbo; + int datamtu; + + Chan *cc; + Chan *dc; + Chan *mtu; /* open early to prevent bind issues. */ + char path[Maxpath]; + uchar ea[Eaddrlen]; +} Netlink; + +typedef struct { + Netlink *nl; + int nea; + ulong eaidx; + uchar eatab[Nea][Eaddrlen]; + int datamtu; + ulong npkt; + ulong resent; + uchar flag; + + ulong rttavg; + ulong mintimer; +} Devlink; + +typedef struct Srb Srb; +struct Srb { + Rendez; + Srb *next; + ulong ticksent; + ulong len; + vlong sector; + short write; + short nout; + char *error; + void *dp; + void *data; +}; + +typedef struct { + int tag; + ulong bcnt; + ulong dlen; + vlong lba; + ulong ticksent; + int nhdr; + uchar hdr[ETHERMINTU]; + void *dp; + Devlink *dl; + Netlink *nl; + int eaidx; + Srb *srb; +} Frame; + +typedef struct Aoedev Aoedev; +struct Aoedev { + QLock; + Aoedev *next; + + ulong vers; + + int ndl; + ulong dlidx; + Devlink *dl; + Devlink dltab[Ndevlink]; + + ushort fwver; + uchar flag; + int nopen; + int major; + int minor; + int unit; + int lasttag; + int nframes; /* BOTCH: need to track diff b/w maxframes & nframes */ + Frame *frames; + vlong bsize; + vlong realbsize; + + uint maxbcnt; + uint maxmtu; + ulong lostjumbo; + ushort nout; + ushort maxout; + ulong lastwadj; + Srb *head; + Srb *tail; + Srb *inprocess; + + /* magic numbers 'R' us */ + char serial[20+1]; + char firmware[8+1]; + char model[40+1]; + int nconfig; + uchar config[1024]; + uchar ident[512]; +}; + +#pragma varargck type "æ" Aoedev* + +static struct { + Lock; + QLock; + Rendez; + char buf[Eventlen*Nevents]; + char *rp; + char *wp; +} events; + +static struct { + RWlock; + int nd; + Aoedev *d; +} devs; + +static struct { + Lock; + int reader[Nnetlink]; /* reader is running. */ + Rendez rendez[Nnetlink]; /* confirm exit. */ + Netlink nl[Nnetlink]; +} netlinks; + +extern Dev aoedevtab; +static struct{ + Ref; + Lock; +} units; +static int drivevers; +static int debug; +static int autodiscover = 1; +static int rediscover; +extern char Enotup[] = "aoe device is down"; +extern char Echange[] = "media or partition has changed"; + +static Srb* +srballoc(ulong sz) +{ + Srb *srb; + + srb = malloc(sizeof *srb+sz); + srb->dp = srb->data = srb+1; + srb->ticksent = Ticks; + return srb; +} + +static Srb* +srbkalloc(void *db, ulong) +{ + Srb *srb; + + srb = malloc(sizeof *srb); + srb->dp = srb->data = db; + srb->ticksent = Ticks; + return srb; +} + +#define srbfree(srb) free(srb) + +static void +srberror(Srb *srb, char *s) +{ + srb->error = s; + srb->nout--; + wakeup(srb); +} + +static void +frameerror(Aoedev *d, Frame *f, char *s) +{ + Srb *srb; + + srb = f->srb; + if(f->tag == Tfree) + return; + f->srb = nil; + f->tag = Tfree; /* don't get fooled by way-slow responses */ + if(!srb) + return; + srberror(srb, s); + d->nout--; +} + +static char* +unitname(Aoedev *d) +{ + uprint("%d.%d", d->major, d->minor); + return up->genbuf; +} + +static long +eventlogread(void *a, long n) +{ + int len; + char *p, *buf; + + buf = smalloc(Eventlen); + qlock(&events); + lock(&events); + p = events.rp; + len = *p; + if(len == 0){ + n = 0; + unlock(&events); + } else { + if(n > len) + n = len; + /* can't move directly into pageable space with events lock held */ + memmove(buf, p+1, n); + *p = 0; + events.rp = p += Eventlen; + if(p >= events.buf + sizeof events.buf) + events.rp = events.buf; + unlock(&events); + + /* the concern here is page faults in memmove below */ + if(waserror()){ + free(buf); + qunlock(&events); + nexterror(); + } + memmove(a, buf, n); + poperror(); + } + free(buf); + qunlock(&events); + return n; +} + +static int +eventlog(char *fmt, ...) +{ + int dragrp, n; + char *p; + va_list arg; + + lock(&events); + p = events.wp; + dragrp = *p++; + va_start(arg, fmt); + n = vsnprint(p, Eventlen-1, fmt, arg); + *--p = n; + p = events.wp += Eventlen; + if(p >= events.buf + sizeof events.buf) + p = events.wp = events.buf; + if(dragrp) + events.rp = p; + unlock(&events); + wakeup(&events); + return n; +} + +static int +eventcount(void) +{ + int n; + + lock(&events); + if(*events.rp == 0) + n = 0; + else if(events.wp < events.rp) + n = Nevents - (events.rp - events.wp); + else + n = events.wp - events.rp; + unlock(&events); + return n/Eventlen; +} + +static int +tsince(int tag) +{ + int n; + + n = Ticks & 0xffff; + n -= tag & 0xffff; + if(n < 0) + n += 1<<16; + return n; +} + +static int +newtag(Aoedev *d) +{ + int t; + + do { + t = ++d->lasttag << 16; + t |= Ticks & 0xffff; + } while (t == Tfree || t == Tmgmt); + return t; +} + +static void +downdev(Aoedev *d, char *err) +{ + Frame *f, *e; + + d->flag &= ~Dup; + f = d->frames; + e = f + d->nframes; + for(; f < e; f->tag = Tfree, f->srb = nil, f++) + frameerror(d, f, Enotup); + d->inprocess = nil; + eventlog("%æ: removed; %s\n", d, err); +} + +static Block* +allocfb(Frame *f) +{ + int len; + Block *b; + + len = f->nhdr + f->dlen; + if(len < ETHERMINTU) + len = ETHERMINTU; + b = allocb(len); + memmove(b->wp, f->hdr, f->nhdr); + if(f->dlen) + memmove(b->wp + f->nhdr, f->dp, f->dlen); + b->wp += len; + return b; +} + +static void +putlba(Aoeata *a, vlong lba) +{ + uchar *c; + + c = a->lba; + c[0] = lba; + c[1] = lba >> 8; + c[2] = lba >> 16; + c[3] = lba >> 24; + c[4] = lba >> 32; + c[5] = lba >> 40; +} + +static Devlink* +pickdevlink(Aoedev *d) +{ + ulong i, n; + Devlink *l; + + for(i = 0; i < d->ndl; i++){ + n = d->dlidx++ % d->ndl; + l = d->dl + n; + if(l && l->flag & Dup) + return l; + } + return 0; +} + +static int +pickea(Devlink *l) +{ + if(l == 0) + return -1; + if(l->nea == 0) + return -1; + return l->eaidx++ % l->nea; +} + +static int +hset(Aoedev *d, Frame *f, Aoehdr *h, int cmd) +{ + int i; + Devlink *l; + + if(f->srb) + if((long)(Ticks-f->srb->ticksent) > Srbtimeout){ + eventlog("%æ: srb timeout\n", d); + frameerror(d, f, Etimedout); + return -1; + } + l = pickdevlink(d); + i = pickea(l); + if(i == -1){ + downdev(d, "resend fails; no netlink/ea"); + return -1; + } + memmove(h->dst, l->eatab[i], Eaddrlen); + memmove(h->src, l->nl->ea, sizeof h->src); + hnputs(h->type, ETAOE); + h->verflags = Aoever << 4; + h->error = 0; + hnputs(h->major, d->major); + h->minor = d->minor; + h->cmd = cmd; + + hnputl(h->tag, f->tag = newtag(d)); + f->dl = l; + f->nl = l->nl; + f->eaidx = i; + f->ticksent = Ticks; + + return f->tag; +} + +static int +resend(Aoedev *d, Frame *f) +{ + ulong n; + Aoeata *a; + + a = (Aoeata*)f->hdr; + if(hset(d, f, a, a->cmd) == -1) + return -1; + n = f->bcnt; + if(n > d->maxbcnt){ + n = d->maxbcnt; /* mtu mismatch (jumbo fail?) */ + if(f->dlen > n) + f->dlen = n; + } + a->scnt = n / Aoesectsz; + f->dl->resent++; + f->dl->npkt++; + if(waserror()) + /* should remove the netlink */ + return -1; + f->nl->dc->dev->bwrite(f->nl->dc, allocfb(f), 0); + poperror(); + return 0; +} + +static void +discover(int major, int minor) +{ + Aoehdr *h; + Block *b; + Netlink *nl, *e; + + nl = netlinks.nl; + e = nl + nelem(netlinks.nl); + for(; nl < e; nl++){ + if(nl->cc == nil) + continue; + b = allocb(ETHERMINTU); + if(waserror()){ + freeb(b); + nexterror(); + } + b->wp = b->rp + ETHERMINTU; + memset(b->rp, 0, ETHERMINTU); + h = (Aoehdr*)b->rp; + memset(h->dst, 0xff, sizeof h->dst); + memmove(h->src, nl->ea, sizeof h->src); + hnputs(h->type, ETAOE); + h->verflags = Aoever << 4; + hnputs(h->major, major); + h->minor = minor; + h->cmd = ACconfig; + poperror(); + nl->dc->dev->bwrite(nl->dc, b, 0); + } +} + +/* + * Check all frames on device and resend any frames that have been + * outstanding for 200% of the device round trip time average. + */ +static void +aoesweepproc(void*) +{ + ulong i, tx, timeout, nbc; + vlong starttick; + enum { Nms = 100, Nbcms = 30*1000, }; + uchar *ea; + Aoeata *a; + Aoedev *d; + Devlink *l; + Frame *f, *e; + + nbc = Nbcms/Nms; +loop: + if(nbc-- == 0){ + if(rediscover && !waserror()){ + discover(0xffff, 0xff); + poperror(); + } + nbc = Nbcms/Nms; + } + starttick = Ticks; + rlock(&devs); + for(d = devs.d; d; d = d->next){ + if(!canqlock(d)) + continue; + if(!UP(d)){ + qunlock(d); + continue; + } + tx = 0; + f = d->frames; + e = f + d->nframes; + for (; f < e; f++){ + if(f->tag == Tfree) + continue; + l = f->dl; + timeout = l->rttavg << 1; + i = tsince(f->tag); + if(i < timeout) + continue; + if(d->nout == d->maxout){ + if(d->maxout > 1) + d->maxout--; + d->lastwadj = Ticks; + } + a = (Aoeata*)f->hdr; + if(a->scnt > Dbcnt / Aoesectsz && + ++f->nl->lostjumbo > (d->nframes << 1)){ + ea = f->dl->eatab[f->eaidx]; + eventlog("%æ: jumbo failure on %s:%E; lba%lld\n", + d, f->nl->path, ea, f->lba); + d->maxbcnt = Dbcnt; + d->flag &= ~Djumbo; + } + resend(d, f); + if(tx++ == 0){ + if((l->rttavg <<= 1) > Rtmax) + l->rttavg = Rtmax; + eventlog("%æ: rtt %ldms\n", d, Tk2ms(l->rttavg)); + } + } + if(d->nout == d->maxout && d->maxout < d->nframes && + TK2MS(Ticks-d->lastwadj) > 10*1000){ + d->maxout++; + d->lastwadj = Ticks; + } + qunlock(d); + } + runlock(&devs); + i = Nms - TK2MS(Ticks - starttick); + if(i > 0) + tsleep(&up->sleep, return0, 0, i); + goto loop; +} + +static int +fmtæ(Fmt *f) +{ + char buf[16]; + Aoedev *d; + + d = va_arg(f->args, Aoedev*); + snprint(buf, sizeof buf, "aoe%d.%d", d->major, d->minor); + return fmtstrcpy(f, buf); +} + +static void netbind(char *path); + +static void +aoecfg(void) +{ + int n, i; + char *p, *f[32], buf[24]; + + if((p = getconf("aoeif")) == nil || (n = tokenize(p, f, nelem(f))) < 1) + return; + /* goo! */ + for(i = 0; i < n; i++){ + p = f[i]; + if(strncmp(p, "ether", 5) == 0) + snprint(buf, sizeof buf, "#l%c/ether%c", p[5], p[5]); + else if(strncmp(p, "#l", 2) == 0) + snprint(buf, sizeof buf, "#l%c/ether%c", p[2], p[2]); + else + continue; + if(!waserror()){ + netbind(buf); + poperror(); + } + } +} + +static void +aoeinit(void) +{ + static int init; + static QLock l; + + if(!canqlock(&l)) + return; + if(init == 0){ + fmtinstall(L'æ', fmtæ); + events.rp = events.wp = events.buf; + kproc("aoesweep", aoesweepproc, nil); + aoecfg(); + init = 1; + } + qunlock(&l); +} + +static Chan* +aoeattach(char *spec) +{ + Chan *c; + + if(*spec) + error(Enonexist); + aoeinit(); + c = devattach(L'æ', spec); + mkqid(&c->qid, Qzero, 0, QTDIR); + return c; +} + +static Aoedev* +unitseq(ulong unit) +{ + int i; + Aoedev *d; + + i = 0; + rlock(&devs); + for(d = devs.d; d; d = d->next) + if(i++ == unit) + break; + runlock(&devs); + return d; +} + +static Aoedev* +unit2dev(ulong unit) +{ + Aoedev *d; + + rlock(&devs); + for(d = devs.d; d; d = d->next) + if(d->unit == unit){ + runlock(&devs); + return d; + } + runlock(&devs); + error("unit lookup failure"); + return nil; +} + +static int +unitgen(Chan *c, ulong type, Dir *dp) +{ + int perm, t; + ulong vers; + vlong size; + char *p; + Aoedev *d; + Qid q; + + d = unit2dev(UNIT(c->qid)); + perm = 0644; + size = 0; + vers = d->vers; + t = QTFILE; + + switch(type){ + default: + return -1; + case Qctl: + p = "ctl"; + break; + case Qdata: + p = "data"; + perm = 0640; + if(UP(d)) + size = d->bsize; + break; + case Qconfig: + p = "config"; + if(UP(d)) + size = d->nconfig; + break; + case Qident: + p = "ident"; + if(UP(d)) + size = sizeof d->ident; + break; + case Qdevlinkdir: + p = "devlink"; + t = QTDIR; + perm = 0555; + break; + } + mkqid(&q, QID(UNIT(c->qid), type), vers, t); + devdir(c, q, p, size, eve, perm, dp); + return 1; +} + +static int +topgen(Chan *c, ulong type, Dir *d) +{ + int perm; + vlong size; + char *p; + Qid q; + + perm = 0444; + size = 0; + switch(type){ + default: + return -1; + case Qtopctl: + p = "ctl"; + perm = 0644; + break; + case Qtoplog: + p = "log"; + size = eventcount(); + break; + } + mkqid(&q, type, 0, QTFILE); + devdir(c, q, p, size, eve, perm, d); + return 1; +} + +static int +aoegen(Chan *c, char *, Dirtab *, int, int s, Dir *dp) +{ + int i; + Aoedev *d; + Qid q; + + if(c->qid.path == 0){ + switch(s){ + case DEVDOTDOT: + q.path = 0; + q.type = QTDIR; + devdir(c, q, "#æ", 0, eve, 0555, dp); + break; + case 0: + q.path = Qtopdir; + q.type = QTDIR; + devdir(c, q, "aoe", 0, eve, 0555, dp); + break; + default: + return -1; + } + return 1; + } + + switch(TYPE(c->qid)){ + default: + return -1; + case Qtopdir: + if(s == DEVDOTDOT){ + mkqid(&q, Qzero, 0, QTDIR); + devdir(c, q, "aoe", 0, eve, 0555, dp); + return 1; + } + if(s < Qtopfiles) + return topgen(c, Qtopbase + s, dp); + s -= Qtopfiles; + if((d = unitseq(s)) == 0) + return -1; + mkqid(&q, QID(d->unit, Qunitdir), 0, QTDIR); + devdir(c, q, unitname(d), 0, eve, 0555, dp); + return 1; + case Qtopctl: + case Qtoplog: + return topgen(c, TYPE(c->qid), dp); + case Qunitdir: + if(s == DEVDOTDOT){ + mkqid(&q, QID(0, Qtopdir), 0, QTDIR); + uprint("%uld", UNIT(c->qid)); + devdir(c, q, up->genbuf, 0, eve, 0555, dp); + return 1; + } + return unitgen(c, Qunitbase+s, dp); + case Qctl: + case Qdata: + case Qconfig: + case Qident: + return unitgen(c, TYPE(c->qid), dp); + case Qdevlinkdir: + i = UNIT(c->qid); + if(s == DEVDOTDOT){ + mkqid(&q, QID(i, Qunitdir), 0, QTDIR); + devdir(c, q, "devlink", 0, eve, 0555, dp); + return 1; + } + if(i >= units.ref) + return -1; + d = unit2dev(i); + if(s >= d->ndl) + return -1; + uprint("%d", s); + mkqid(&q, Q3(s, i, Qdevlink), 0, QTFILE); + devdir(c, q, up->genbuf, 0, eve, 0755, dp); + return 1; + case Qdevlink: + uprint("%d", s); + mkqid(&q, Q3(s, UNIT(c->qid), Qdevlink), 0, QTFILE); + devdir(c, q, up->genbuf, 0, eve, 0755, dp); + return 1; + } +} + +static Walkqid* +aoewalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, nil, 0, aoegen); +} + +static long +aoestat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, nil, 0, aoegen); +} + +static Chan* +aoeopen(Chan *c, int omode) +{ + Aoedev *d; + + if(TYPE(c->qid) != Qdata) + return devopen(c, omode, 0, 0, aoegen); + + d = unit2dev(UNIT(c->qid)); + qlock(d); + if(waserror()){ + qunlock(d); + nexterror(); + } + if(!UP(d)) + error(Enotup); + c = devopen(c, omode, 0, 0, aoegen); + d->nopen++; + poperror(); + qunlock(d); + return c; +} + +static void +aoeclose(Chan *c) +{ + Aoedev *d; + + if(TYPE(c->qid) != Qdata || (c->flag&COPEN) == 0) + return; + + d = unit2dev(UNIT(c->qid)); + qlock(d); + if(--d->nopen == 0 && !waserror()){ + discover(d->major, d->minor); + poperror(); + } + qunlock(d); +} + +static void +atarw(Aoedev *d, Frame *f) +{ + ulong bcnt; + char extbit, writebit; + Aoeata *ah; + Srb *srb; + + extbit = 0x4; + writebit = 0x10; + + srb = d->inprocess; + bcnt = d->maxbcnt; + if(bcnt > srb->len) + bcnt = srb->len; + f->nhdr = Szaoeata; + memset(f->hdr, 0, f->nhdr); + ah = (Aoeata*)f->hdr; + if(hset(d, f, ah, ACata) == -1) + return; + f->dp = srb->dp; + f->bcnt = bcnt; + f->lba = srb->sector; + f->srb = srb; + + ah->scnt = bcnt / Aoesectsz; + putlba(ah, f->lba); + if(d->flag & Dllba) + ah->aflags |= AAFext; + else { + extbit = 0; + ah->lba[3] &= 0x0f; + ah->lba[3] |= 0xe0; /* LBA bit+obsolete 0xa0 */ + } + if(srb->write){ + ah->aflags |= AAFwrite; + f->dlen = bcnt; + }else{ + writebit = 0; + f->dlen = 0; + } + ah->cmdstat = 0x20 | writebit | extbit; + + /* mark tracking fields and load out */ + srb->nout++; + srb->dp = (uchar*)srb->dp + bcnt; + srb->len -= bcnt; + srb->sector += bcnt / Aoesectsz; + if(srb->len == 0) + d->inprocess = nil; + d->nout++; + f->dl->npkt++; + if(waserror()){ + f->tag = Tfree; + d->inprocess = nil; + nexterror(); + } + f->nl->dc->dev->bwrite(f->nl->dc, allocfb(f), 0); + poperror(); +} + +static char* +aoeerror(Aoehdr *h) +{ + int n; + static char *errs[] = { + "aoe protocol error: unknown", + "aoe protocol error: bad command code", + "aoe protocol error: bad argument param", + "aoe protocol error: device unavailable", + "aoe protocol error: config string present", + "aoe protocol error: unsupported version", + }; + + if((h->verflags & AFerr) == 0) + return 0; + n = h->error; + if(n > nelem(errs)) + n = 0; + return errs[n]; +} + +static void +rtupdate(Devlink *l, int rtt) +{ + int n; + + n = rtt; + if(rtt < 0){ + n = -rtt; + if(n < Rtmin) + n = Rtmin; + else if(n > Rtmax) + n = Rtmax; + l->mintimer += (n - l->mintimer) >> 1; + } else if(n < l->mintimer) + n = l->mintimer; + else if(n > Rtmax) + n = Rtmax; + + /* g == .25; cf. Congestion Avoidance and Control, Jacobson&Karels; 1988 */ + n -= l->rttavg; + l->rttavg += n >> 2; +} + +static int +srbready(void *v) +{ + Srb *s; + + s = v; + return s->error || (!s->nout && !s->len); +} + +static Frame* +getframe(Aoedev *d, int tag) +{ + Frame *f, *e; + + f = d->frames; + e = f + d->nframes; + for(; f < e; f++) + if(f->tag == tag) + return f; + return nil; +} + +static Frame* +freeframe(Aoedev *d) +{ + if(d->nout < d->maxout) + return getframe(d, Tfree); + return nil; +} + +static void +work(Aoedev *d) +{ + Frame *f; + + while(f = freeframe(d)) { + if(d->inprocess == nil){ + if(d->head == nil) + return; + d->inprocess = d->head; + d->head = d->head->next; + if(d->head == nil) + d->tail = nil; + } + atarw(d, f); + } +} + +static void +strategy(Aoedev *d, Srb *srb) +{ + qlock(d); + if(waserror()){ + qunlock(d); + nexterror(); + } + srb->next = nil; + if(d->tail) + d->tail->next = srb; + d->tail = srb; + if(d->head == nil) + d->head = srb; + work(d); + poperror(); + qunlock(d); + + while(waserror()) + ; + sleep(srb, srbready, srb); + poperror(); +} + +static long +rw(Aoedev *d, int write, uchar *db, long len, uvlong off) +{ + long n, nlen, copy; + enum { Srbsz = 1<<19, }; + Srb *srb; + + if((off|len) & (Aoesectsz-1)) + error("offset and length must be sector multiple.\n"); + if(off >= d->bsize) + return 0; + if(off + len > d->bsize) + len = d->bsize - off; + copy = 0; + if(isdmaok(db, len, 32)){ + srb = srbkalloc(db, len); + copy = 1; + }else + srb = srballoc(Srbsz <= len? Srbsz: len); + if(waserror()){ + srbfree(srb); + nexterror(); + } + srb->write = write; + for(nlen = len; nlen; nlen -= n){ + if(!UP(d)) + error(Eio); + srb->sector = off / Aoesectsz; + srb->dp = srb->data; + n = nlen; + if(n > Srbsz) + n = Srbsz; + srb->len = n; + if(write && !copy) + memmove(srb->data, db, n); + strategy(d, srb); + if(srb->error) + error(srb->error); + if(!write && !copy) + memmove(db, srb->data, n); + db += n; + off += n; + } + poperror(); + srbfree(srb); + return len; +} + +static long +readmem(ulong off, void *dst, long n, void *src, long size) +{ + if(off >= size) + return 0; + if(off + n > size) + n = size - off; + memmove(dst, (uchar*)src + off, n); + return n; +} + +static char* +pflag(char *s, char *e, uchar f) +{ + uchar i; + + for(i = 0; i < nelem(flagname); i++) + if(f & 1 << i) + s = seprint(s, e, "%s ", flagname[i]); + return seprint(s, e, "\n"); +} + +static int +pstat(Aoedev *d, char *db, int len, int off) +{ + int i; + char *state, *s, *p, *e; + + s = p = malloc(1024); + e = p + 1024; + + state = "down"; + if(UP(d)) + state = "up"; + + p = seprint(p, e, + "state: %s\n" "nopen: %d\n" "nout: %d\n" + "nmaxout: %d\n" "nframes: %d\n" "maxbcnt: %d [maxmtu %d]\n" + "fw: %.4ux\n" + "model: %s\n" "serial: %s\n" "firmware: %s\n", + state, d->nopen, d->nout, + d->maxout, d->nframes, d->maxbcnt, d->maxmtu, + d->fwver, + d->model, d->serial, d->firmware); + p = seprint(p, e, "flag: "); + p = pflag(p, e, d->flag); + + if(p - s < len) + len = p - s; + i = readstr(off, db, len, s); + free(s); + return i; +} + +static long +unitread(Chan *c, void *db, long len, vlong off) +{ + Aoedev *d; + + d = unit2dev(UNIT(c->qid)); + if(d->vers != c->qid.vers) + error(Echange); + switch(TYPE(c->qid)){ + default: + error(Ebadarg); + case Qctl: + return pstat(d, db, len, off); + case Qdata: + return rw(d, Read, db, len, off); + case Qconfig: + if(!UP(d)) + error(Enotup); + return readmem(off, db, len, d->config, d->nconfig); + case Qident: + if(!UP(d)) + error(Enotup); + return readmem(off, db, len, d->ident, sizeof d->ident); + } +} + +static int +devlinkread(Chan *c, void *db, int len, int off) +{ + int i; + char *s, *p, *e; + Aoedev *d; + Devlink *l; + + d = unit2dev(UNIT(c->qid)); + i = L(c->qid); + if(i >= d->ndl) + return 0; + l = d->dl + i; + + s = p = malloc(1024); + e = s + 1024; + + p = seprint(p, e, "addr: "); + for(i = 0; i < l->nea; i++) + p = seprint(p, e, "%E ", l->eatab[i]); + p = seprint(p, e, "\n"); + p = seprint(p, e, "npkt: %uld\n", l->npkt); + p = seprint(p, e, "resent: %uld\n", l->resent); + p = seprint(p, e, "flag: "); p = pflag(p, e, l->flag); + p = seprint(p, e, "rttavg: %uld\n", Tk2ms(l->rttavg)); + p = seprint(p, e, "mintimer: %uld\n", Tk2ms(l->mintimer)); + + p = seprint(p, e, "nl path: %s\n", l->nl->path); + p = seprint(p, e, "nl ea: %E\n", l->nl->ea); + p = seprint(p, e, "nl flag: "); p = pflag(p, e, l->flag); + p = seprint(p, e, "nl lostjumbo: %d\n", l->nl->lostjumbo); + p = seprint(p, e, "nl datamtu: %d\n", l->nl->datamtu); + + if(p - s < len) + len = p - s; + i = readstr(off, db, len, s); + free(s); + return i; +} + +static long +topctlread(Chan *, void *db, int len, int off) +{ + int i; + char *s, *p, *e; + Netlink *n; + + s = p = malloc(1024); + e = s + 1024; + + p = seprint(p, e, "debug: %d\n", debug); + p = seprint(p, e, "autodiscover: %d\n", autodiscover); + p = seprint(p, e, "rediscover: %d\n", rediscover); + + for(i = 0; i < Nnetlink; i++){ + n = netlinks.nl+i; + if(n->cc == 0) + continue; + p = seprint(p, e, "if%d path: %s\n", i, n->path); + p = seprint(p, e, "if%d ea: %E\n", i, n->ea); + p = seprint(p, e, "if%d flag: ", i); p = pflag(p, e, n->flag); + p = seprint(p, e, "if%d lostjumbo: %d\n", i, n->lostjumbo); + p = seprint(p, e, "if%d datamtu: %d\n", i, n->datamtu); + } + + if(p - s < len) + len = p - s; + i = readstr(off, db, len, s); + free(s); + return i; +} + +static long +aoeread(Chan *c, void *db, long n, vlong off) +{ + switch(TYPE(c->qid)){ + default: + error(Eperm); + case Qzero: + case Qtopdir: + case Qunitdir: + case Qdevlinkdir: + return devdirread(c, db, n, 0, 0, aoegen); + case Qtopctl: + return topctlread(c, db, n, off); + case Qtoplog: + return eventlogread(db, n); + case Qctl: + case Qdata: + case Qconfig: + case Qident: + return unitread(c, db, n, off); + case Qdevlink: + return devlinkread(c, db, n, off); + } +} + +static long +configwrite(Aoedev *d, void *db, long len) +{ + char *s; + Aoeqc *ch; + Frame *f; + Srb *srb; + + if(!UP(d)) + error(Enotup); + if(len > sizeof d->config) + error(Etoobig); + srb = srballoc(len); + s = malloc(len); + memmove(s, db, len); + if(waserror()){ + srbfree(srb); + free(s); + nexterror(); + } + for (;;) { + qlock(d); + if(waserror()){ + qunlock(d); + nexterror(); + } + f = freeframe(d); + if(f != nil) + break; + poperror(); + qunlock(d); + if(waserror()) + nexterror(); + tsleep(&up->sleep, return0, 0, 100); + poperror(); + } + f->nhdr = Szaoeqc; + memset(f->hdr, 0, f->nhdr); + ch = (Aoeqc*)f->hdr; + if(hset(d, f, ch, ACconfig) == -1) + return 0; + f->srb = srb; + f->dp = s; + ch->verccmd = AQCfset; + hnputs(ch->cslen, len); + d->nout++; + srb->nout++; + f->dl->npkt++; + f->dlen = len; + /* + * these refer to qlock & waserror in the above for loop. + * there's still the first waserror outstanding. + */ + poperror(); + qunlock(d); + + f->nl->dc->dev->bwrite(f->nl->dc, allocfb(f), 0); + sleep(srb, srbready, srb); + if(srb->error) + error(srb->error); + + qlock(d); + if(waserror()){ + qunlock(d); + nexterror(); + } + memmove(d->config, s, len); + d->nconfig = len; + poperror(); + qunlock(d); + + poperror(); /* pop first waserror */ + + srbfree(srb); + memmove(db, s, len); + free(s); + return len; +} + +static int +getmtu(Chan *m) +{ + int n, mtu; + char buf[36]; + + mtu = 1514; + if(m == nil || waserror()) + return mtu; + n = m->dev->read(m, buf, sizeof buf - 1, 0); + poperror(); + if(n > 12){ + buf[n] = 0; + mtu = strtoul(buf + 12, 0, 0); + } + return mtu; +} + +static int +devmaxdata(Aoedev *d) +{ + int i, m, mtu; + Devlink *l; + Netlink *n; + + mtu = 100000; + for(i = 0; i < d->ndl; i++){ + l = d->dl + i; + n = l->nl; + if((l->flag & Dup) == 0 || (n->flag & Dup) == 0) + continue; + m = getmtu(n->mtu); + if(m > l->datamtu) + m = l->datamtu; + if(m < mtu) + mtu = m; + } + if(mtu == 100000) + mtu = 1514; + mtu -= Szaoeata; + mtu -= mtu % Aoesectsz; + return mtu; +} + +static int +toggle(char *s, int init) +{ + if(s == nil) + return init ^ 1; + return strcmp(s, "on") == 0; +} + +static void ataident(Aoedev*); + +static long +unitctlwrite(Aoedev *d, void *db, long n) +{ + uint maxbcnt, m; + uvlong bsize; + enum { + Failio, + Ident, + Jumbo, + Maxbno, + Mtu, + Setsize, + }; + Cmdbuf *cb; + Cmdtab *ct; + static Cmdtab cmds[] = { + {Failio, "failio", 1 }, + {Ident, "identify", 1 }, + {Jumbo, "jumbo", 0 }, + {Maxbno, "maxbno", 0 }, + {Mtu, "mtu", 0 }, + {Setsize, "setsize", 0 }, + }; + + cb = parsecmd(db, n); + qlock(d); + if(waserror()){ + qunlock(d); + free(cb); + nexterror(); + } + ct = lookupcmd(cb, cmds, nelem(cmds)); + switch(ct->index){ + case Failio: + downdev(d, "i/o failure"); + break; + case Ident: + ataident(d); + break; + case Jumbo: + m = 0; + if(d->flag & Djumbo) + m = 1; + toggle(cb->f[1], m); + if(m) + d->flag |= Djumbo; + else + d->flag &= ~Djumbo; + break; + case Maxbno: + case Mtu: + maxbcnt = devmaxdata(d); + if(cb->nf > 2) + error(Ecmdargs); + if(cb->nf == 2){ + m = strtoul(cb->f[1], 0, 0); + if(ct->index == Maxbno) + m *= Aoesectsz; + else{ + m -= Szaoeata; + m &= ~(Aoesectsz-1); + } + if(m == 0 || m > maxbcnt) + cmderror(cb, "invalid mtu"); + maxbcnt = m; + d->maxmtu = m; + } else + d->maxmtu = Maxmtu; + d->maxbcnt = maxbcnt; + break; + case Setsize: + bsize = d->realbsize; + if(cb->nf > 2) + error(Ecmdargs); + if(cb->nf == 2){ + bsize = strtoull(cb->f[1], 0, 0); + if(bsize % Aoesectsz) + cmderror(cb, "drive size must be sector aligned"); + } + d->bsize = bsize; + break; + default: + cmderror(cb, "unknown aoe control message"); + } + poperror(); + qunlock(d); + free(cb); + return n; +} + +static long +unitwrite(Chan *c, void *db, long n, vlong off) +{ + long rv; + char *buf; + Aoedev *d; + + d = unit2dev(UNIT(c->qid)); + switch(TYPE(c->qid)){ + default: + error(Ebadarg); + case Qctl: + return unitctlwrite(d, db, n); + case Qident: + error(Eperm); + case Qdata: + return rw(d, Write, db, n, off); + case Qconfig: + if(off + n > sizeof d->config) + error(Etoobig); + buf = malloc(sizeof d->config); + if(waserror()){ + free(buf); + nexterror(); + } + memmove(buf, d->config, d->nconfig); + memmove(buf + off, db, n); + rv = configwrite(d, buf, n + off); + poperror(); + free(buf); + return rv; + } +} + +static Netlink* +addnet(char *path, Chan *cc, Chan *dc, Chan *mtu, uchar *ea) +{ + Netlink *nl, *e; + + lock(&netlinks); + if(waserror()){ + unlock(&netlinks); + nexterror(); + } + nl = netlinks.nl; + e = nl + nelem(netlinks.nl); + for(; nl < e && nl->cc; nl++) + continue; + if(nl == e) + error("out of netlink structures"); + nl->cc = cc; + nl->dc = dc; + nl->mtu = mtu; + strncpy(nl->path, path, sizeof nl->path); + memmove(nl->ea, ea, sizeof nl->ea); + poperror(); + nl->flag |= Dup; + unlock(&netlinks); + return nl; +} + +static int +newunit(void) +{ + int x; + + lock(&units); + if(units.ref == Maxunits) + x = -1; + else + x = units.ref++; + unlock(&units); + return x; +} + +static int +dropunit(void) +{ + int x; + + lock(&units); + x = --units.ref; + unlock(&units); + return x; +} + +/* + * always allocate max frames. maxout may change. + */ +static Aoedev* +newdev(long major, long minor, int n) +{ + Aoedev *d; + Frame *f, *e; + + d = malloc(sizeof *d); + f = malloc(sizeof *f*Maxframes); + if(!d || !f) { + free(d); + free(f); + error("aoe device allocation failure"); + } + d->nframes = n; + d->frames = f; + for (e = f + Maxframes; f < e; f++) + f->tag = Tfree; + d->maxout = n; + d->major = major; + d->minor = minor; + d->maxbcnt = Dbcnt; + d->flag = Djumbo; + d->maxmtu = Maxmtu; + d->unit = newunit(); /* bzzt. inaccurate if units removed */ + if(d->unit == -1){ + free(d); + free(d->frames); + error("too many units"); + } + d->dl = d->dltab; + return d; +} + +static Aoedev* +mm2dev(int major, int minor) +{ + Aoedev *d; + + rlock(&devs); + for(d = devs.d; d; d = d->next) + if(d->major == major && d->minor == minor){ + runlock(&devs); + return d; + } + runlock(&devs); + eventlog("mm2dev: %d.%d not found\n", major, minor); + return nil; +} + +/* Find the device in our list. If not known, add it */ +static Aoedev* +getdev(long major, long minor, int n) +{ + Aoedev *d; + + if(major == 0xffff || minor == 0xff) + return 0; + wlock(&devs); + if(waserror()){ + wunlock(&devs); + nexterror(); + } + for(d = devs.d; d; d = d->next) + if(d->major == major && d->minor == minor) + break; + if(d == nil) { + d = newdev(major, minor, n); + d->next = devs.d; + devs.d = d; + } + poperror(); + wunlock(&devs); + return d; +} + +static ushort +gbit16(void *a) +{ + uchar *i; + + i = a; + return i[1] << 8 | i[0]; +} + +static ulong +gbit32(void *a) +{ + ulong j; + uchar *i; + + i = a; + j = i[3] << 24; + j |= i[2] << 16; + j |= i[1] << 8; + j |= i[0]; + return j; +} + +static uvlong +gbit64(void *a) +{ + uchar *i; + + i = a; + return (uvlong)gbit32(i+4) << 32 | gbit32(a); +} + +static void +ataident(Aoedev *d) +{ + Aoeata *a; + Block *b; + Frame *f; + + f = freeframe(d); + if(f == nil) + return; + f->nhdr = Szaoeata; + memset(f->hdr, 0, f->nhdr); + a = (Aoeata*)f->hdr; + if(hset(d, f, a, ACata) == -1) + return; + f->srb = srbkalloc(0, 0); + a->cmdstat = Cid; /* ata 6, page 110 */ + a->scnt = 1; + a->lba[3] = 0xa0; + d->nout++; + f->dl->npkt++; + f->bcnt = 512; + f->dlen = 0; + b = allocfb(f); + f->nl->dc->dev->bwrite(f->nl->dc, b, 0); +} + +static int +newdlea(Devlink *l, uchar *ea) +{ + int i; + uchar *t; + + for(i = 0; i < Nea; i++){ + t = l->eatab[i]; + if(i == l->nea){ + memmove(t, ea, Eaddrlen); + return l->nea++; + } + if(memcmp(t, ea, Eaddrlen) == 0) + return i; + } + return -1; +} + +static Devlink* +newdevlink(Aoedev *d, Netlink *n, Aoeqc *c) +{ + int i; + Devlink *l; + + for(i = 0; i < Ndevlink; i++){ + l = d->dl + i; + if(i == d->ndl){ + d->ndl++; + newdlea(l, c->src); + l->datamtu = c->scnt*Aoesectsz; + l->nl = n; + l->flag |= Dup; + l->mintimer = Rtmin; + l->rttavg = Rtmax; + return l; + } + if(l->nl == n){ + newdlea(l, c->src); + l->datamtu = c->scnt*Aoesectsz; + l->flag |= Dup; + return l; + } + } + eventlog("%æ: out of links: %s:%E to %E\n", d, n->path, n->ea, c->src); + return 0; +} + +static void +errrsp(Block *b, char *s) +{ + int n; + Aoedev *d; + Aoehdr *h; + Frame *f; + + h = (Aoehdr*)b->rp; + n = nhgetl(h->tag); + if(n == Tmgmt || n == Tfree) + return; + d = mm2dev(nhgets(h->major), h->minor); + if(d == 0) + return; + if(f = getframe(d, n)) + frameerror(d, f, s); +} + +static void +qcfgrsp(Block *b, Netlink *nl) +{ + int major, cmd, cslen, blen; + unsigned n; + Aoedev *d; + Aoeqc *ch; + Devlink *l; + Frame *f; + + ch = (Aoeqc*)b->rp; + major = nhgets(ch->major); + n = nhgetl(ch->tag); + if(n != Tmgmt){ + d = mm2dev(major, ch->minor); + if(d == nil) + return; + qlock(d); + f = getframe(d, n); + if(f == nil){ + qunlock(d); + eventlog("%æ: unknown response tag %ux\n", d, n); + return; + } + cslen = nhgets(ch->cslen); + blen = BLEN(b) - Szaoeqc; + if(cslen < blen) + eventlog("%æ: cfgrsp: tag %.8ux oversized %d %d\n", + d, n, cslen, blen); + if(cslen > blen){ + eventlog("%æ: cfgrsp: tag %.8ux runt %d %d\n", + d, n, cslen, blen); + cslen = blen; + } + memmove(f->dp, ch + 1, cslen); + f->srb->nout--; + wakeup(f->srb); + d->nout--; + f->srb = nil; + f->tag = Tfree; + qunlock(d); + return; + } + + cmd = ch->verccmd & 0xf; + if(cmd != 0){ + eventlog("aoe%d.%d: cfgrsp: bad command %d\n", major, ch->minor, cmd); + return; + } + n = nhgets(ch->bufcnt); + if(n > Maxframes) + n = Maxframes; + + if(waserror()){ + eventlog("getdev: %d.%d ignored: %s\n", major, ch->minor, up->errstr); + return; + } + d = getdev(major, ch->minor, n); + poperror(); + if(d == 0) + return; + + qlock(d); + *up->errstr = 0; + if(waserror()){ + qunlock(d); + eventlog("%æ: %s\n", d, up->errstr); + nexterror(); + } + + l = newdevlink(d, nl, ch); /* add this interface. */ + + d->fwver = nhgets(ch->fwver); + n = nhgets(ch->cslen); + if(n > sizeof d->config) + n = sizeof d->config; + d->nconfig = n; + memmove(d->config, ch + 1, n); + + /* manually set mtu may be reset lower if conditions warrant */ + if(l){ + n = devmaxdata(d); + if(!(d->flag & Djumbo)) + n = Dbcnt; + if(n > d->maxmtu) + n = d->maxmtu; + if(n != d->maxbcnt){ + eventlog("%æ: setting %d byte mtu on %s:%E\n", + d, n, nl->path, nl->ea); + d->maxbcnt = n; + } + } + if(d->nopen == 0) + ataident(d); + poperror(); + qunlock(d); +} + +static void +idmove(char *p, ushort *a, unsigned n) +{ + int i; + char *op, *e; + + op = p; + for(i = 0; i < n / 2; i++){ + *p++ = a[i] >> 8; + *p++ = a[i]; + } + *p = 0; + while(p > op && *--p == ' ') + *p = 0; + e = p; + p = op; + while(*p == ' ') + p++; + memmove(op, p, n - (e - p)); +} + +static vlong +aoeidentify(Aoedev *d, ushort *id) +{ + int i; + vlong s; + + d->flag &= ~(Dllba|Dpower|Dsmart|Dnop|Dup); + + i = gbit16(id+83) | gbit16(id+86); + if(i & (1<<10)){ + d->flag |= Dllba; + s = gbit64(id+100); + }else + s = gbit32(id+60); + + i = gbit16(id+83); + if((i>>14) == 1) { + if(i & (1<<3)) + d->flag |= Dpower; + i = gbit16(id+82); + if(i & 1) + d->flag |= Dsmart; + if(i & (1<<14)) + d->flag |= Dnop; + } +// eventlog("%æ up\n", d); + d->flag |= Dup; + memmove(d->ident, id, sizeof d->ident); + return s; +} + +static void +newvers(Aoedev *d) +{ + d->vers = ainc(&drivevers); +} + +static int +identify(Aoedev *d, ushort *id) +{ + vlong osectors, s; + uchar oserial[21]; + + s = aoeidentify(d, id); + if(s == -1) + return -1; + osectors = d->realbsize; + memmove(oserial, d->serial, sizeof d->serial); + + idmove(d->serial, id+10, 20); + idmove(d->firmware, id+23, 8); + idmove(d->model, id+27, 40); + + s *= Aoesectsz; + if(osectors != s || memcmp(oserial, d->serial, sizeof oserial)){ + d->bsize = s; + d->realbsize = s; +// d->mediachange = 1; + newvers(d); + } + return 0; +} + +static void +atarsp(Block *b) +{ + unsigned n; + short major; + Aoeata *ahin, *ahout; + Aoedev *d; + Frame *f; + Srb *srb; + + ahin = (Aoeata*)b->rp; + major = nhgets(ahin->major); + d = mm2dev(major, ahin->minor); + if(d == nil) + return; + qlock(d); + if(waserror()){ + qunlock(d); + nexterror(); + } + n = nhgetl(ahin->tag); + f = getframe(d, n); + if(f == nil){ + dprint("%æ: unexpected response; tag %ux\n", d, n); + goto bail; + } + rtupdate(f->dl, tsince(f->tag)); + ahout = (Aoeata*)f->hdr; + srb = f->srb; + + if(ahin->cmdstat & 0xa9){ + eventlog("%æ: ata error cmd %.2ux stat %.2ux\n", + d, ahout->cmdstat, ahin->cmdstat); + if(srb) + srb->error = Eio; + } else { + n = ahout->scnt * Aoesectsz; + switch(ahout->cmdstat){ + case Crd: + case Crdext: + if(BLEN(b) - Szaoeata < n){ + eventlog("%æ: runt read blen %ld expect %d\n", + d, BLEN(b), n); + goto bail; + } + memmove(f->dp, b->rp + Szaoeata, n); + case Cwr: + case Cwrext: + if(n > Dbcnt) + f->nl->lostjumbo = 0; + if(f->bcnt -= n){ + f->lba += n / Aoesectsz; + f->dp = (uchar*)f->dp + n; + resend(d, f); + goto bail; + } + break; + case Cid: + if(BLEN(b) - Szaoeata < 512){ + eventlog("%æ: runt identify blen %ld expect %d\n", + d, BLEN(b), n); + goto bail; + } + identify(d, (ushort*)(b->rp + Szaoeata)); + break; + default: + eventlog("%æ: unknown ata command %.2ux \n", + d, ahout->cmdstat); + } + } + + if(srb && --srb->nout == 0 && srb->len == 0) + wakeup(srb); + f->srb = nil; + f->tag = Tfree; + d->nout--; + + work(d); +bail: + poperror(); + qunlock(d); +} + +static void +netrdaoeproc(void *v) +{ + int idx; + char name[Maxpath+1], *s; + Aoehdr *h; + Block *b; + Netlink *nl; + + nl = (Netlink*)v; + idx = nl - netlinks.nl; + netlinks.reader[idx] = 1; + kstrcpy(name, nl->path, Maxpath); + + if(waserror()){ + eventlog("netrdaoe@%s: exiting: %s\n", name, up->errstr); + netlinks.reader[idx] = 0; + wakeup(netlinks.rendez + idx); + pexit(up->errstr, 1); + } + if(autodiscover) + discover(0xffff, 0xff); + for (;;) { + if(!(nl->flag & Dup)) + error("netlink is down"); + if(nl->dc == nil) + panic("netrdaoe: nl->dc == nil"); + b = nl->dc->dev->bread(nl->dc, 1<<16, 0); + if(b == nil) + error("network read"); + h = (Aoehdr*)b->rp; + if(h->verflags & AFrsp) + if(s = aoeerror(h)){ + eventlog("%s: %s\n", nl->path, s); + errrsp(b, s); + }else if(h->cmd == ACata) + atarsp(b); + else if(h->cmd == ACconfig) + qcfgrsp(b, nl); + else if((h->cmd & 0xf0) == 0){ + eventlog("%s: unknown cmd %d\n", + nl->path, h->cmd); + errrsp(b, "unknown command"); + } + freeb(b); + } +} + +static void +getaddr(char *path, uchar *ea) +{ + int n; + char buf[2*Eaddrlen+1]; + Chan *c; + + uprint("%s/addr", path); + c = namec(up->genbuf, Aopen, OREAD, 0); + if(waserror()) { + cclose(c); + nexterror(); + } + if(c == nil) + panic("æ: getaddr: c == nil"); + n = c->dev->read(c, buf, sizeof buf-1, 0); + poperror(); + cclose(c); + buf[n] = 0; + if(parseether(ea, buf) < 0) + error("invalid mac"); +} + +static void +netbind(char *path) +{ + char addr[Maxpath]; + uchar ea[2*Eaddrlen+1]; + Chan *dc, *cc, *mtu; + Netlink *nl; + + snprint(addr, sizeof addr, "%s!0x%x", path, ETAOE); + dc = chandial(addr, nil, nil, &cc); + snprint(addr, sizeof addr, "%s/mtu", path); + if(waserror()) + mtu = nil; + else { + mtu = namec(addr, Aopen, OREAD, 0); + poperror(); + } + + if(waserror()){ + cclose(dc); + cclose(cc); + if(mtu) + cclose(mtu); + nexterror(); + } + if(dc == nil || cc == nil) + error(Enonexist); + getaddr(path, ea); + nl = addnet(path, cc, dc, mtu, ea); + snprint(addr, sizeof addr, "netrdaoe@%s", path); + kproc(addr, netrdaoeproc, nl); + poperror(); +} + +static int +unbound(void *v) +{ + return *(int*)v != 0; +} + +static void +netunbind(char *path) +{ + int i, idx; + Aoedev *d, *p, *next; + Chan *dc, *cc; + Devlink *l; + Frame *f; + Netlink *n, *e; + + n = netlinks.nl; + e = n + nelem(netlinks.nl); + + lock(&netlinks); + for(; n < e; n++) + if(n->dc && strcmp(n->path, path) == 0) + break; + unlock(&netlinks); + if(n == e) + error("device not bound"); + + /* + * hunt down devices using this interface; disable + * this also terminates the reader. + */ + idx = n - netlinks.nl; + wlock(&devs); + for(d = devs.d; d; d = d->next){ + qlock(d); + for(i = 0; i < d->ndl; i++){ + l = d->dl + i; + if(l->nl == n) + l->flag &= ~Dup; + } + qunlock(d); + } + n->flag &= ~Dup; + wunlock(&devs); + + /* confirm reader is down. */ + while(waserror()) + ; + sleep(netlinks.rendez + idx, unbound, netlinks.reader + idx); + poperror(); + + /* reschedule packets. */ + wlock(&devs); + for(d = devs.d; d; d = d->next){ + qlock(d); + for(i = 0; i < d->nframes; i++){ + f = d->frames + i; + if(f->tag != Tfree && f->nl == n) + resend(d, f); + } + qunlock(d); + } + wunlock(&devs); + + /* squeeze devlink pool. (we assert nobody is using them now) */ + wlock(&devs); + for(d = devs.d; d; d = d->next){ + qlock(d); + for(i = 0; i < d->ndl; i++){ + l = d->dl + i; + if(l->nl == n) + memmove(l, l + 1, sizeof *l * (--d->ndl - i)); + } + qunlock(d); + } + wunlock(&devs); + + /* close device link. */ + lock(&netlinks); + dc = n->dc; + cc = n->cc; + if(n->mtu) + cclose(n->mtu); + memset(n, 0, sizeof *n); + unlock(&netlinks); + + cclose(dc); + cclose(cc); + + /* squeeze orphan devices */ + wlock(&devs); + for(p = d = devs.d; d; d = next){ + next = d->next; + if(d->ndl > 0){ + p = d; + continue; + } + qlock(d); + downdev(d, "orphan"); + qunlock(d); + if(p != devs.d) + p->next = next; + else{ + devs.d = next; + p = devs.d; + } + free(d->frames); + free(d); + dropunit(); + } + wunlock(&devs); +} + +static void +strtoss(char *f, ushort *shelf, ushort *slot) +{ + ulong sh; + char *s; + + *shelf = 0xffff; + *slot = 0xff; + if(!f) + return; + *shelf = sh = strtol(f, &s, 0); + if(s == f || sh > 0xffff) + error("bad shelf"); + f = s; + if(*f++ == '.'){ + *slot = strtol(f, &s, 0); + if(s == f || *slot > 0xff) + error("bad shelf"); + }else + *slot = 0xff; +} + +static void +discoverstr(char *f) +{ + ushort shelf, slot; + + strtoss(f, &shelf, &slot); + discover(shelf, slot); +} + +static void +removedev(Aoedev *d) +{ + int i; + Aoedev *p; + + wlock(&devs); + p = 0; + if(d != devs.d) + for(p = devs.d; p; p = p->next) + if(p->next == d) + break; + qlock(d); + d->flag &= ~Dup; + newvers(d); + d->ndl = 0; + qunlock(d); + for(i = 0; i < d->nframes; i++) + frameerror(d, d->frames+i, Enotup); + + if(p) + p->next = d->next; + else + devs.d = d->next; + free(d->frames); + free(d); + dropunit(); + wunlock(&devs); +} + + +static void +aoeremove(Chan *c) +{ + switch(TYPE(c->qid)){ + default: + case Qzero: + case Qtopdir: + case Qtoplog: + case Qtopctl: + case Qctl: + case Qdata: + case Qconfig: + case Qident: + error(Eperm); + case Qunitdir: + removedev(unit2dev(UNIT(c->qid))); + break; + } +} + +static void +removestr(char *f) +{ + ushort shelf, slot; + Aoedev *d; + + strtoss(f, &shelf, &slot); + wlock(&devs); + for(d = devs.d; d; d = d->next) + if(shelf == d->major && slot == d->minor){ + wunlock(&devs); /* BOTCH */ + removedev(d); + return; + } + wunlock(&devs); + error("device not bound"); +} + +static long +topctlwrite(void *db, long n) +{ + enum { + Autodiscover, + Bind, + Debug, + Discover, + Closewait, + Rediscover, + Remove, + Unbind, + }; + char *f; + Cmdbuf *cb; + Cmdtab *ct; + static Cmdtab cmds[] = { + { Autodiscover, "autodiscover", 0 }, + { Bind, "bind", 2 }, + { Debug, "debug", 0 }, + { Discover, "discover", 0 }, + { Rediscover, "rediscover", 0 }, + { Remove, "remove", 2 }, + { Unbind, "unbind", 2 }, + }; + + cb = parsecmd(db, n); + if(waserror()){ + free(cb); + nexterror(); + } + ct = lookupcmd(cb, cmds, nelem(cmds)); + f = cb->f[1]; + switch(ct->index){ + case Autodiscover: + autodiscover = toggle(f, autodiscover); + break; + case Bind: + netbind(f); + break; + case Debug: + debug = toggle(f, debug); + break; + case Discover: + discoverstr(f); + break; + case Rediscover: + rediscover = toggle(f, rediscover); + break; + case Remove: + removestr(f); /* depricated */ + break; + case Unbind: + netunbind(f); + break; + default: + cmderror(cb, "unknown aoe control message"); + } + poperror(); + free(cb); + return n; +} + +static long +aoewrite(Chan *c, void *db, long n, vlong off) +{ + switch(TYPE(c->qid)){ + default: + case Qzero: + case Qtopdir: + case Qunitdir: + case Qtoplog: + error(Eperm); + case Qtopctl: + return topctlwrite(db, n); + case Qctl: + case Qdata: + case Qconfig: + case Qident: + return unitwrite(c, db, n, off); + } +} + +Dev aoedevtab = { + L'æ', + "aoe", + + devreset, + devinit, + devshutdown, + aoeattach, + aoewalk, + aoestat, + aoeopen, + devcreate, + aoeclose, + aoeread, + devbread, + aoewrite, + devbwrite, + aoeremove, + devwstat, + devpower, + devconfig, +}; diff -Nru /sys/src/9k/port/devcap.c /sys/src/9k/port/devcap.c --- /sys/src/9k/port/devcap.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devcap.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,286 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include + +enum +{ + Hashlen= SHA1dlen, + Maxhash= 256, +}; + +/* + * if a process knows cap->cap, it can change user + * to capabilty->user. + */ +typedef struct Caphash Caphash; +struct Caphash +{ + Caphash *next; + char hash[Hashlen]; + ulong ticks; +}; + +struct +{ + QLock; + Caphash *first; + int nhash; +} capalloc; + +enum +{ + Qdir, + Qhash, + Quse, +}; + +/* caphash must be last */ +Dirtab capdir[] = +{ + ".", {Qdir,0,QTDIR}, 0, DMDIR|0500, + "capuse", {Quse}, 0, 0222, + "caphash", {Qhash}, 0, 0200, +}; +int ncapdir = nelem(capdir); + +static Chan* +capattach(char *spec) +{ + return devattach(L'¤', spec); +} + +static Walkqid* +capwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, capdir, ncapdir, devgen); +} + +static void +capremove(Chan *c) +{ + if(iseve() && c->qid.path == Qhash) + ncapdir = nelem(capdir)-1; + else + error(Eperm); +} + + +static long +capstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, capdir, ncapdir, devgen); +} + +/* + * if the stream doesn't exist, create it + */ +static Chan* +capopen(Chan *c, int omode) +{ + if(c->qid.type & QTDIR){ + if(omode != OREAD) + error(Ebadarg); + c->mode = omode; + c->flag |= COPEN; + c->offset = 0; + return c; + } + + switch((ulong)c->qid.path){ + case Qhash: + if(!iseve()) + error(Eperm); + break; + } + + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + return c; +} + +/* +static char* +hashstr(uchar *hash) +{ + static char buf[2*Hashlen+1]; + int i; + + for(i = 0; i < Hashlen; i++) + sprint(buf+2*i, "%2.2ux", hash[i]); + buf[2*Hashlen] = 0; + return buf; +} + */ + +static Caphash* +remcap(uchar *hash) +{ + Caphash *t, **l; + + qlock(&capalloc); + + /* find the matching capability */ + for(l = &capalloc.first; *l != nil;){ + t = *l; + if(memcmp(hash, t->hash, Hashlen) == 0) + break; + l = &t->next; + } + t = *l; + if(t != nil){ + capalloc.nhash--; + *l = t->next; + } + qunlock(&capalloc); + + return t; +} + +/* add a capability, throwing out any old ones */ +static void +addcap(uchar *hash) +{ + Caphash *p, *t, **l; + + p = smalloc(sizeof *p); + memmove(p->hash, hash, Hashlen); + p->next = nil; + p->ticks = m->ticks; + + qlock(&capalloc); + + /* trim extras */ + while(capalloc.nhash >= Maxhash){ + t = capalloc.first; + if(t == nil) + panic("addcap"); + capalloc.first = t->next; + free(t); + capalloc.nhash--; + } + + /* add new one */ + for(l = &capalloc.first; *l != nil; l = &(*l)->next) + ; + *l = p; + capalloc.nhash++; + + qunlock(&capalloc); +} + +static void +capclose(Chan*) +{ +} + +static long +capread(Chan *c, void *va, long n, vlong) +{ + switch((ulong)c->qid.path){ + case Qdir: + return devdirread(c, va, n, capdir, ncapdir, devgen); + + default: + error(Eperm); + break; + } + return n; +} + +static long +capwrite(Chan *c, void *va, long n, vlong) +{ + Caphash *p; + char *cp; + uchar hash[Hashlen]; + char *key, *from, *to; + char err[256]; + + switch((ulong)c->qid.path){ + case Qhash: + if(!iseve()) + error(Eperm); + if(n < Hashlen) + error(Eshort); + memmove(hash, va, Hashlen); + addcap(hash); + break; + + case Quse: + /* copy key to avoid a fault in hmac_xx */ + cp = nil; + if(waserror()){ + free(cp); + nexterror(); + } + cp = smalloc(n+1); + memmove(cp, va, n); + cp[n] = 0; + + from = cp; + key = strrchr(cp, '@'); + if(key == nil) + error(Eshort); + *key++ = 0; + + hmac_sha1((uchar*)from, strlen(from), (uchar*)key, strlen(key), hash, nil); + + p = remcap(hash); + if(p == nil){ + snprint(err, sizeof err, "invalid capability %s@%s", from, key); + error(err); + } + + /* if a from user is supplied, make sure it matches */ + to = strchr(from, '@'); + if(to == nil){ + to = from; + } else { + *to++ = 0; + if(strcmp(from, up->user) != 0) + error("capability must match user"); + } + + /* set user id */ + kstrdup(&up->user, to); + up->basepri = PriNormal; + + free(p); + free(cp); + poperror(); + break; + + default: + error(Eperm); + break; + } + + return n; +} + +Dev capdevtab = { + L'¤', + "cap", + + devreset, + devinit, + devshutdown, + capattach, + capwalk, + capstat, + capopen, + devcreate, + capclose, + capread, + devbread, + capwrite, + devbwrite, + capremove, + devwstat +}; diff -Nru /sys/src/9k/port/devcons.c /sys/src/9k/port/devcons.c --- /sys/src/9k/port/devcons.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devcons.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1420 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include + +enum +{ + Nconsdevs = 64, /* max number of consoles */ + + /* Consdev flags */ + Ciprint = 2, /* call this fn from iprint */ + Cntorn = 4, /* change \n to \r\n */ +}; + +typedef struct Consdev Consdev; + +struct Consdev +{ + Chan* c; /* external file */ + Queue* q; /* i/o queue, if any */ + void (*fn)(char*, int); /* i/o function when no queue */ + int flags; +}; + +void (*consdebug)(void) = nil; +void (*consputs)(char*, int) = nil; + +static void kmesgputs(char *, int); +static void kprintputs(char*, int); + +static Lock consdevslock; +static int nconsdevs = 3; +static Consdev consdevs[Nconsdevs] = /* keep this order */ +{ + {nil, nil, kmesgputs, 0}, /* kmesg */ + {nil, nil, kprintputs, Ciprint}, /* kprint */ + {nil, nil, uartputs, Ciprint|Cntorn}, /* serial */ +}; + +static int nkbdqs; +static int nkbdprocs; +static Queue* kbdqs[Nconsdevs]; +static int kbdprocs[Nconsdevs]; +static Queue* kbdq; /* unprocessed console input */ +static Queue* lineq; /* processed console input */ +static Queue* serialoq; /* serial console output */ +static Queue* kprintoq; /* console output, for /dev/kprint */ +static ulong kprintinuse; /* test and set whether /dev/kprint is open */ + +int panicking; + +static struct +{ + QLock; + + int raw; /* true if we shouldn't process input */ + Ref ctl; /* number of opens to the control file */ + int x; /* index into line */ + char line[1024]; /* current input line */ + + int count; + int ctlpoff; + + /* + * A place to save up characters at interrupt time + * before dumping them in the queue + */ + Lock lockputc; + char istage[1024]; + char *iw; + char *ir; + char *ie; +} kbd = { + .iw = kbd.istage, + .ir = kbd.istage, + .ie = kbd.istage + sizeof(kbd.istage), +}; + +char *sysname; +vlong fasthz; + +static void seedrand(void); +static int readtime(ulong, char*, int); +static int readbintime(char*, int); +static int writetime(char*, int); +static int writebintime(char*, int); + +enum +{ + CMhalt, + CMreboot, + CMpanic, +}; + +Cmdtab rebootmsg[] = +{ + CMhalt, "halt", 1, + CMreboot, "reboot", 0, + CMpanic, "panic", 0, +}; + +/* To keep the rest of the kernel unware of new consdevs for now */ +static void +kprintputs(char *s, int n) +{ + if(consputs != nil) + consputs(s, n); +} + +int +addconsdev(Queue *q, void (*fn)(char*,int), int i, int flags) +{ + Consdev *c; + + ilock(&consdevslock); + if(i < 0) + i = nconsdevs; + else + flags |= consdevs[i].flags; + if(nconsdevs == Nconsdevs) + panic("Nconsdevs too small"); + c = &consdevs[i]; + c->flags = flags; + c->q = q; + c->fn = fn; + if(i == nconsdevs) + nconsdevs++; + iunlock(&consdevslock); + return i; +} + +void +delconsdevs(void) +{ + nconsdevs = 2; /* throw away serial consoles and kprint */ + consdevs[1].q = nil; +} + +static void +conskbdqproc(void *a) +{ + char buf[64]; + Queue *q; + int nr; + + q = a; + while((nr = qread(q, buf, sizeof(buf))) > 0) + qwrite(kbdq, buf, nr); + pexit("hangup", 1); +} + +static void +kickkbdq(void) +{ + int i; + + if(up != nil && nkbdqs > 1 && nkbdprocs != nkbdqs){ + lock(&consdevslock); + if(nkbdprocs == nkbdqs){ + unlock(&consdevslock); + return; + } + for(i = 0; i < nkbdqs; i++) + if(kbdprocs[i] == 0){ + kbdprocs[i] = 1; + kproc("conskbdq", conskbdqproc, kbdqs[i]); + } + unlock(&consdevslock); + } +} + +int +addkbdq(Queue *q, int i) +{ + int n; + + ilock(&consdevslock); + if(i < 0) + i = nkbdqs++; + if(nkbdqs == Nconsdevs) + panic("Nconsdevs too small"); + kbdqs[i] = q; + n = nkbdqs; + iunlock(&consdevslock); + switch(n){ + case 1: + /* if there's just one, pull directly from it. */ + kbdq = q; + break; + case 2: + /* later we'll merge bytes from all kbdqs into a single kbdq */ + kbdq = qopen(4*1024, 0, 0, 0); + if(kbdq == nil) + panic("no kbdq"); + /* fall */ + default: + kickkbdq(); + } + return i; +} + +void +printinit(void) +{ + lineq = qopen(2*1024, 0, nil, nil); + if(lineq == nil) + panic("printinit"); + qnoblock(lineq, 1); +} + +int +consactive(void) +{ + int i; + Queue *q; + + for(i = 0; i < nconsdevs; i++) + if((q = consdevs[i].q) != nil && qlen(q) > 0) + return 1; + return 0; +} + +void +prflush(void) +{ +// ulong now; + +// now = m->ticks; + while(consactive()) + uartpush(); +// if(m->ticks - now >= 30*HZ) +// break; +} + +/* + * Log console output so it can be retrieved via /dev/kmesg. + * This is good for catching boot-time messages after the fact. + */ +struct { + Lock lk; + char buf[16384]; + uint n; +} kmesg; + +static void +kmesgputs(char *str, int n) +{ + uint nn, d; + + ilock(&kmesg.lk); + /* take the tail of huge writes */ + if(n > sizeof kmesg.buf){ + d = n - sizeof kmesg.buf; + str += d; + n -= d; + } + + /* slide the buffer down to make room */ + nn = kmesg.n; + if(nn + n >= sizeof kmesg.buf){ + d = nn + n - sizeof kmesg.buf; + if(d) + memmove(kmesg.buf, kmesg.buf+d, sizeof kmesg.buf-d); + nn -= d; + } + + /* copy the data in */ + memmove(kmesg.buf+nn, str, n); + nn += n; + kmesg.n = nn; + iunlock(&kmesg.lk); +} + +static void +consdevputs(Consdev *c, char *s, int n, int usewrite) +{ + Chan *cc; + Queue *q; + + if((cc = c->c) != nil && usewrite) + cc->dev->write(cc, s, n, 0); + else if((q = c->q) != nil && !qisclosed(q)) + if(usewrite) + qwrite(q, s, n); + else + qiwrite(q, s, n); + else if(c->fn != nil) + c->fn(s, n); +} + +/* + * Print a string on the console. Convert \n to \r\n for serial + * line consoles. Locking of the queues is left up to the screen + * or uart code. Multi-line messages to serial consoles may get + * interspersed with other messages. + */ +static void +putstrn0(char *str, int n, int usewrite) +{ + Consdev *c; + char *s, *t; + int i, len, m; + + if(!islo()) + usewrite = 0; + + for(i = 0; i < nconsdevs; i++){ + c = &consdevs[i]; + len = n; + s = str; + while(len > 0){ + t = nil; + if((c->flags&Cntorn) && !kbd.raw) + t = memchr(s, '\n', len); + if(t != nil && !kbd.raw){ + m = t-s; + consdevputs(c, s, m, usewrite); + consdevputs(c, "\r\n", 2, usewrite); + len -= m+1; + s = t+1; + }else{ + consdevputs(c, s, len, usewrite); + break; + } + } + } +} + +void +putstrn(char *str, int n) +{ + putstrn0(str, n, 0); +} + +int +print(char *fmt, ...) +{ + int n; + va_list arg; + char buf[PRINTSIZE]; + + va_start(arg, fmt); + n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf; + va_end(arg); + putstrn(buf, n); +if(strchr(buf, '\n') != nil)prflush(); + + return n; +} + +/* + * Want to interlock iprints to avoid interlaced output on + * multiprocessor, but don't want to deadlock if one processor + * dies during print and another has something important to say. + * Make a good faith effort. + */ +static Lock iprintlock; + +static int +iprintcanlock(Lock *l) +{ + int i; + + for(i=0; i<1000; i++){ + if(canlock(l)) + return 1; + if(ownlock(l)) + return 0; + microdelay(100); + } + return 0; +} + +int +iprint(char *fmt, ...) +{ + Mreg s; + int i, n, locked, mlocked; + va_list arg; + char buf[PRINTSIZE]; + + s = splhi(); + va_start(arg, fmt); + n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf; + va_end(arg); + mlocked = malloclocked(); + locked = iprintcanlock(&iprintlock); + for(i = 0; i < nconsdevs; i++){ + if((consdevs[i].flags&Ciprint) != 0){ + if(consdevs[i].q != nil && !mlocked) + qiwrite(consdevs[i].q, buf, n); + else + consdevs[i].fn(buf, n); + } + } + if(locked) + unlock(&iprintlock); + splx(s); + + return n; +} + +#pragma profile 0 +void +panic(char *fmt, ...) +{ + int n; + va_list arg; + char buf[PRINTSIZE]; + + if(panicking) + exit(1); + + panicking = 1; + + if(malloclocked()) + consdevs[2].q = nil; /* force direct uart output */ + consdevs[1].q = nil; /* don't try to write to /dev/kprint */ + + splhi(); + strcpy(buf, "panic: "); + va_start(arg, fmt); + n = vseprint(buf+strlen(buf), buf+sizeof(buf), fmt, arg) - buf; + va_end(arg); + iprint("%s\n", buf); + if(consdebug) + (*consdebug)(); + prflush(); +// splx(s); + buf[n] = '\n'; + putstrn(buf, n+1); + dumpstack(); + prflush(); +// dump(); + + exit(1); +} + +#pragma profile 1 +/* libmp at least contains a few calls to sysfatal; simulate with panic */ +void +sysfatal(char *fmt, ...) +{ + char err[256]; + va_list arg; + + va_start(arg, fmt); + vseprint(err, err + sizeof err, fmt, arg); + va_end(arg); + panic("sysfatal: %s", err); +} + +void +_assert(char *fmt) +{ + panic("assert failed at %#p: %s", getcallerpc(&fmt), fmt); +} + +int +pprint(char *fmt, ...) +{ + int n; + Chan *c; + va_list arg; + char buf[2*PRINTSIZE]; + + if(up == nil || up->fgrp == nil) + return 0; + + c = up->fgrp->fd[2]; + if(c==0 || (c->mode!=OWRITE && c->mode!=ORDWR)) + return 0; + n = snprint(buf, sizeof buf, "%s %d: ", up->text, up->pid); + va_start(arg, fmt); + n = vseprint(buf+n, buf+sizeof(buf), fmt, arg) - buf; + va_end(arg); + + if(waserror()) + return 0; + c->dev->write(c, buf, n, c->offset); + poperror(); + + lock(c); + c->offset += n; + unlock(c); + + return n; +} + +static void +echo(char *buf, int n) +{ + Mreg s; + static int ctrlt, pid; + char *e, *p; + + if(n == 0) + return; + + e = buf+n; + for(p = buf; p < e; p++){ + switch(*p){ + case 0x10: /* ^P */ + if(cpuserver && !kbd.ctlpoff){ + active.exiting = 1; + return; + } + break; + case 0x14: /* ^T */ + ctrlt++; + if(ctrlt > 2) + ctrlt = 2; + continue; + } + + if(ctrlt != 2) + continue; + + /* ^T escapes */ + ctrlt = 0; + switch(*p){ + case 'S': + s = splhi(); + dumpstack(); + procdump(); + splx(s); + return; + case 's': + dumpstack(); + return; + case 'x': + ixsummary(); + mallocsummary(); +// memorysummary(); + return; + case 'd': + if(consdebug == nil) + consdebug = rdb; + else + consdebug = nil; + print("consdebug now %#p\n", consdebug); + return; + case 'D': + if(consdebug == nil) + consdebug = rdb; + consdebug(); + return; + case 'p': + s = spllo(); + procdump(); + splx(s); + return; + case 'q': + scheddump(); + return; + case 'k': + killbig("^t ^t k"); + return; + case 'r': + exit(0); + return; + } + } + + if(kbdq != nil) + qproduce(kbdq, buf, n); + if(kbd.raw == 0) + putstrn(buf, n); +} + +/* + * Called by a uart interrupt for console input. + * + * turn '\r' into '\n' before putting it into the queue. + */ +int +kbdcr2nl(Queue*, int ch) +{ + char *next; + + ilock(&kbd.lockputc); /* just a mutex */ + if(ch == '\r' && !kbd.raw) + ch = '\n'; + next = kbd.iw+1; + if(next >= kbd.ie) + next = kbd.istage; + if(next != kbd.ir){ + *kbd.iw = ch; + kbd.iw = next; + } + iunlock(&kbd.lockputc); + return 0; +} + +/* + * Put character, possibly a rune, into read queue at interrupt time. + * Called at interrupt time to process a character. + */ +int +kbdputc(Queue*, int ch) +{ + int i, n; + char buf[UTFmax]; + Rune r; + char *next; + + if(kbd.ir == nil) + return 0; /* in case we're not inited yet */ + + ilock(&kbd.lockputc); /* just a mutex */ + r = ch; + n = runetochar(buf, &r); + for(i = 0; i < n; i++){ + next = kbd.iw+1; + if(next >= kbd.ie) + next = kbd.istage; + if(next == kbd.ir) + break; + *kbd.iw = buf[i]; + kbd.iw = next; + } + iunlock(&kbd.lockputc); + return 0; +} + +/* + * we save up input characters till clock time to reduce + * per character interrupt overhead. + */ +static void +kbdputcclock(void) +{ + char *iw; + + /* this amortizes cost of qproduce */ + if(kbd.iw != kbd.ir){ + iw = kbd.iw; + if(iw < kbd.ir){ + echo(kbd.ir, kbd.ie-kbd.ir); + kbd.ir = kbd.istage; + } + if(kbd.ir != iw){ + echo(kbd.ir, iw-kbd.ir); + kbd.ir = iw; + } + } +} + +enum{ + Qdir, + Qbintime, + Qconfig, + Qcons, + Qconsctl, + Qcputime, + Qdrivers, + Qkmesg, + Qkprint, + Qhostdomain, + Qhostowner, + Qnull, + Qosversion, + Qpgrpid, + Qpid, + Qppid, + Qrandom, + Qreboot, + Qswap, + Qsysname, + Qsysstat, + Qtime, + Quser, + Qusers, + Qzero, +}; + +enum +{ + VLNUMSIZE= 22, +}; + +static Dirtab consdir[]={ + ".", {Qdir, 0, QTDIR}, 0, DMDIR|0555, + "bintime", {Qbintime}, 24, 0664, + "config", {Qconfig}, 0, 0444, + "cons", {Qcons}, 0, 0660, + "consctl", {Qconsctl}, 0, 0220, + "cputime", {Qcputime}, 6*NUMSIZE, 0444, + "drivers", {Qdrivers}, 0, 0444, + "hostdomain", {Qhostdomain}, DOMLEN, 0664, + "hostowner", {Qhostowner}, 0, 0664, + "kmesg", {Qkmesg}, 0, 0440, + "kprint", {Qkprint, 0, QTEXCL}, 0, DMEXCL|0440, + "null", {Qnull}, 0, 0666, + "osversion", {Qosversion}, 0, 0444, + "pgrpid", {Qpgrpid}, NUMSIZE, 0444, + "pid", {Qpid}, NUMSIZE, 0444, + "ppid", {Qppid}, NUMSIZE, 0444, + "random", {Qrandom}, 0, 0444, + "reboot", {Qreboot}, 0, 0664, + "swap", {Qswap}, 0, 0664, + "sysname", {Qsysname}, 0, 0664, + "sysstat", {Qsysstat}, 0, 0666, + "time", {Qtime}, NUMSIZE+3*VLNUMSIZE, 0664, + "user", {Quser}, 0, 0666, + "users", {Qusers}, 0, 0644, + "zero", {Qzero}, 0, 0444, +}; + +int +readnum(ulong off, char *buf, ulong n, ulong val, int size) +{ + char tmp[64]; + + snprint(tmp, sizeof(tmp), "%*lud", size-1, val); + tmp[size-1] = ' '; + if(off >= size) + return 0; + if(off+n > size) + n = size-off; + memmove(buf, tmp+off, n); + return n; +} + +long +readstr(long offset, char *buf, long n, char *str) +{ + long size; + + size = strlen(str); + if(offset >= size) + return 0; + if(offset+n > size) + n = size-offset; + memmove(buf, str+offset, n); + return n; +} + +static void +consinit(void) +{ + todinit(); + randominit(); + /* + * at 115200 baud, the 1024 char buffer takes 56 ms to process, + * processing it every 22 ms should be fine + */ + addclock0link(kbdputcclock, 22); + kickkbdq(); +} + +static Chan* +consattach(char *spec) +{ + return devattach('c', spec); +} + +static Walkqid* +conswalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name,nname, consdir, nelem(consdir), devgen); +} + +static long +consstat(Chan *c, uchar *dp, long n) +{ + return devstat(c, dp, n, consdir, nelem(consdir), devgen); +} + +static Chan* +consopen(Chan *c, int omode) +{ + c->aux = nil; + c = devopen(c, omode, consdir, nelem(consdir), devgen); + switch((ulong)c->qid.path){ + case Qconsctl: + incref(&kbd.ctl); + break; + + case Qkprint: + if(TAS(&kprintinuse) != 0){ + c->flag &= ~COPEN; + error(Einuse); + } + if(kprintoq == nil){ + kprintoq = qopen(8*1024, Qcoalesce, 0, 0); + if(kprintoq == nil){ + c->flag &= ~COPEN; + error(Enomem); + } + qnoblock(kprintoq, 1); + consdevs[1].q = kprintoq; + }else + qreopen(kprintoq); + c->iounit = qiomaxatomic; + break; + } + return c; +} + +static void +consclose(Chan *c) +{ + switch((ulong)c->qid.path){ + /* last close of control file turns off raw */ + case Qconsctl: + if(c->flag&COPEN){ + if(decref(&kbd.ctl) == 0) + kbd.raw = 0; + } + break; + + /* close of kprint allows other opens */ + case Qkprint: + if(c->flag & COPEN){ + kprintinuse = 0; + qhangup(kprintoq, nil); + } + break; + } +} + +static long +consread(Chan *c, void *buf, long n, vlong off) +{ + ulong l; + Mach *mp; + char *b, *bp, *s, ch; + char tmp[6*NUMSIZE+1]; /* must be >= 6*NUMSIZE (Qcputime) */ + int i, id, send; + long offset; + extern char configfile[]; + + if(n <= 0) + return n; + + offset = off; + switch((ulong)c->qid.path){ + case Qdir: + return devdirread(c, buf, n, consdir, nelem(consdir), devgen); + + case Qcons: + qlock(&kbd); + if(waserror()) { + qunlock(&kbd); + nexterror(); + } + while(!qcanread(lineq)){ + if(qread(kbdq, &ch, 1) == 0) + continue; + send = 0; + if(ch == 0){ + /* flush output on rawoff -> rawon */ + if(kbd.x > 0) + send = !qcanread(kbdq); + }else if(kbd.raw){ + kbd.line[kbd.x++] = ch; + send = !qcanread(kbdq); + }else{ + switch(ch){ + case '\b': + i = kbd.x; + b = kbd.line; + if(i > 0){ + i--; + while(i > 0 && (b[i-1]&0xc0) == 0x80) + i--; + kbd.x = i; + } + break; + case 0x15: /* ^U */ + kbd.x = 0; + print("\b^U\n"); + break; + case '\n': + case 0x04: /* ^D */ + send = 1; + default: + if(ch != 0x04) + kbd.line[kbd.x++] = ch; + break; + } + } + if(send || kbd.x == sizeof kbd.line){ + qwrite(lineq, kbd.line, kbd.x); + kbd.x = 0; + } + } + n = qread(lineq, buf, n); + qunlock(&kbd); + poperror(); + return n; + + case Qcputime: + /* easiest to format in a separate buffer and copy out */ + for(i=0; i<6; i++){ + l = up->time[i]; + if(i == TReal) + l = sys->ticks - l; + l = TK2MS(l); + readnum(0, tmp+NUMSIZE*i, NUMSIZE, l, NUMSIZE); + } + tmp[sizeof(tmp)-1] = 0; + return readstr(offset, buf, n, tmp); + + case Qkmesg: + /* + * This is unlocked to avoid tying up a process + * that's writing to the buffer. kmesg.n never + * gets smaller, so worst case the reader will + * see a slurred buffer. + */ + if(off >= kmesg.n) + n = 0; + else{ + if(off+n > kmesg.n) + n = kmesg.n - off; + memmove(buf, kmesg.buf+off, n); + } + return n; + + case Qkprint: + return qread(kprintoq, buf, n); + + case Qpgrpid: + return readnum(offset, buf, n, up->pgrp->pgrpid, NUMSIZE); + + case Qpid: + return readnum(offset, buf, n, up->pid, NUMSIZE); + + case Qppid: + return readnum(offset, buf, n, up->parentpid, NUMSIZE); + + case Qtime: + return readtime(offset, buf, n); + + case Qbintime: + return readbintime(buf, n); + + case Qhostowner: + return readstr(offset, buf, n, eve); + + case Qhostdomain: + return readstr(offset, buf, n, hostdomain); + + case Quser: + return readstr(offset, buf, n, up->user); + + case Qusers: + b = usersread(); + if(!waserror()){ + n = readstr(offset, buf, n, b); + poperror(); + } + free(b); + return n; + + case Qnull: + return 0; + + case Qconfig: + return readstr((ulong)offset, buf, n, configfile); + + case Qsysstat: + b = smalloc(MACHMAX*(NUMSIZE*11+1) + 1); /* +1 for NUL */ + bp = b; + for(id = 0; id < MACHMAX; id++){ + if((mp = sys->machptr[id]) == nil || !mp->online) + continue; + readnum(0, bp, NUMSIZE, id, NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, mp->cs, NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, mp->intr, NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, mp->syscall, NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, mp->pfault, NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, mp->tlbfault, NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, mp->tlbpurge, NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, mp->load, NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, + (mp->perf.avg_inidle*100)/mp->perf.period, + NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, + (mp->perf.avg_inintr*100)/mp->perf.period, + NUMSIZE); + bp += NUMSIZE; + *bp++ = '\n'; + } + if(waserror()){ + free(b); + nexterror(); + } + n = readstr(offset, buf, n, b); + free(b); + poperror(); + return n; + + case Qswap: + bp = smalloc(READSTR); + if(waserror()){ + free(bp); + nexterror(); + } + s = seprintpagestats(bp, bp+READSTR); + s = seprintphysstats(s, bp+READSTR); + l = s - bp; + b = buf; + i = readstr(offset, b, n, bp); + poperror(); + free(bp); + b += i; + n -= i; + if(offset > l) + offset -= l; + else + offset = 0; + + return i + mallocreadsummary(c, b, n, offset); + + case Qsysname: + if(sysname == nil) + return 0; + return readstr(offset, buf, n, sysname); + + case Qrandom: + return randomread(buf, n); + + case Qdrivers: + return devtabread(c, buf, n, off); + + case Qzero: + memset(buf, 0, n); + return n; + + case Qosversion: + return readstr(offset, buf, n, "2000"); + + default: + print("consread %#llux\n", c->qid.path); + error(Egreg); + } + return -1; /* never reached */ +} + +static long +conswrite(Chan *c, void *va, long n, vlong off) +{ + char buf[256], ch; + long l, bp; + char *a; + Mach *mp; + int i; + ulong offset; + Cmdbuf *cb; + Cmdtab *ct; + + a = va; + offset = off; + + switch((ulong)c->qid.path){ + case Qcons: + /* + * Can't page fault in putstrn, so copy the data locally. + */ + l = n; + while(l > 0){ + bp = l; + if(bp > sizeof buf) + bp = sizeof buf; + memmove(buf, a, bp); + putstrn0(buf, bp, 1); + a += bp; + l -= bp; + } + break; + + case Qconsctl: + if(n >= sizeof(buf)) + n = sizeof(buf)-1; + strncpy(buf, a, n); + buf[n] = 0; + for(a = buf; a;){ + if(strncmp(a, "rawon", 5) == 0){ + kbd.raw = 1; + /* clumsy hack - wake up reader */ + ch = 0; + qwrite(kbdq, &ch, 1); + } + else if(strncmp(a, "rawoff", 6) == 0) + kbd.raw = 0; + else if(strncmp(a, "ctlpon", 6) == 0) + kbd.ctlpoff = 0; + else if(strncmp(a, "ctlpoff", 7) == 0) + kbd.ctlpoff = 1; + if(a = strchr(a, ' ')) + a++; + } + break; + + case Qtime: + if(!iseve()) + error(Eperm); + return writetime(a, n); + + case Qbintime: + if(!iseve()) + error(Eperm); + return writebintime(a, n); + + case Qhostowner: + return hostownerwrite(a, n); + + case Qhostdomain: + return hostdomainwrite(a, n); + + case Quser: + return userwrite(a, n); + + case Qusers: + return userswrite(a, n); + + case Qnull: + break; + + case Qconfig: + error(Eperm); + break; + + case Qreboot: + if(!iseve()) + error(Eperm); + cb = parsecmd(a, n); + + if(waserror()) { + free(cb); + nexterror(); + } + ct = lookupcmd(cb, rebootmsg, nelem(rebootmsg)); + switch(ct->index) { + case CMhalt: + reboot(nil, 0, 0); + break; + case CMreboot: + rebootcmd(cb->nf-1, cb->f+1); + break; + case CMpanic: + *(ulong*)0=0; + panic("/dev/reboot"); + } + poperror(); + free(cb); + break; + + case Qsysstat: + for(i = 0; i < MACHMAX; i++){ + if((mp = sys->machptr[i]) == nil || !mp->online) + continue; + mp->cs = 0; + mp->intr = 0; + mp->syscall = 0; + mp->pfault = 0; + mp->tlbfault = 0; + mp->tlbpurge = 0; + } + break; + + case Qswap: + /* no more */ + break; + + case Qsysname: + if(offset != 0) + error(Ebadarg); + if(n <= 0 || n >= sizeof buf) + error(Ebadarg); + strncpy(buf, a, n); + buf[n] = 0; + if(buf[n-1] == '\n') + buf[n-1] = 0; + kstrdup(&sysname, buf); + break; + + default: + print("conswrite: %#llux\n", c->qid.path); + error(Egreg); + } + return n; +} + +Dev consdevtab = { + 'c', + "cons", + + devreset, + consinit, + devshutdown, + consattach, + conswalk, + consstat, + consopen, + devcreate, + consclose, + consread, + devbread, + conswrite, + devbwrite, + devremove, + devwstat, +}; + +static ulong randn; + +static void +seedrand(void) +{ + if(!waserror()){ + randomread((void*)&randn, sizeof(randn)); + poperror(); + } +} + +int +nrand(int n) +{ + if(randn == 0) + seedrand(); + randn = randn*1103515245 + 12345 + sys->ticks; + return (randn>>16) % n; +} + +int +rand(void) +{ + nrand(1); + return randn; +} + +static uvlong uvorder = 0x0001020304050607ULL; + +static uchar* +le2vlong(vlong *to, uchar *f) +{ + uchar *t, *o; + int i; + + t = (uchar*)to; + o = (uchar*)&uvorder; + for(i = 0; i < sizeof(vlong); i++) + t[o[i]] = f[i]; + return f+sizeof(vlong); +} + +static uchar* +vlong2le(uchar *t, vlong from) +{ + uchar *f, *o; + int i; + + f = (uchar*)&from; + o = (uchar*)&uvorder; + for(i = 0; i < sizeof(vlong); i++) + t[i] = f[o[i]]; + return t+sizeof(vlong); +} + +static long order = 0x00010203; + +static uchar* +le2long(long *to, uchar *f) +{ + uchar *t, *o; + int i; + + t = (uchar*)to; + o = (uchar*)ℴ + for(i = 0; i < sizeof(long); i++) + t[o[i]] = f[i]; + return f+sizeof(long); +} + +static uchar* +long2le(uchar *t, long from) +{ + uchar *f, *o; + int i; + + f = (uchar*)&from; + o = (uchar*)ℴ + for(i = 0; i < sizeof(long); i++) + t[i] = f[o[i]]; + return t+sizeof(long); +} + +char *Ebadtimectl = "bad time control"; + +/* + * like the old #c/time but with added info. Return + * + * secs nanosecs fastticks fasthz + */ +static int +readtime(ulong off, char *buf, int n) +{ + vlong nsec, ticks; + long sec; + char str[7*NUMSIZE]; + + nsec = todget(&ticks); + if(fasthz == 0LL) + fastticks((uvlong*)&fasthz); + sec = nsec/1000000000ULL; + snprint(str, sizeof(str), "%*lud %*llud %*llud %*llud ", + NUMSIZE-1, sec, + VLNUMSIZE-1, nsec, + VLNUMSIZE-1, ticks, + VLNUMSIZE-1, fasthz); + return readstr(off, buf, n, str); +} + +/* + * set the time in seconds + */ +static int +writetime(char *buf, int n) +{ + char b[13]; + long i; + vlong now; + + if(n >= sizeof(b)) + error(Ebadtimectl); + strncpy(b, buf, n); + b[n] = 0; + i = strtol(b, 0, 0); + if(i <= 0) + error(Ebadtimectl); + now = i*1000000000LL; + todset(now, 0, 0); + return n; +} + +/* + * read binary time info. all numbers are little endian. + * ticks and nsec are syncronized. + */ +static int +readbintime(char *buf, int n) +{ + int i; + vlong nsec, ticks; + uchar *b = (uchar*)buf; + + i = 0; + if(fasthz == 0LL) + fastticks((uvlong*)&fasthz); + nsec = todget(&ticks); + if(n >= 3*sizeof(uvlong)){ + vlong2le(b+2*sizeof(uvlong), fasthz); + i += sizeof(uvlong); + } + if(n >= 2*sizeof(uvlong)){ + vlong2le(b+sizeof(uvlong), ticks); + i += sizeof(uvlong); + } + if(n >= 8){ + vlong2le(b, nsec); + i += sizeof(vlong); + } + return i; +} + +/* + * set any of the following + * - time in nsec + * - nsec trim applied over some seconds + * - clock frequency + */ +static int +writebintime(char *buf, int n) +{ + uchar *p; + vlong delta; + long period; + + n--; + p = (uchar*)buf + 1; + switch(*buf){ + case 'n': + if(n < sizeof(vlong)) + error(Ebadtimectl); + le2vlong(&delta, p); + todset(delta, 0, 0); + break; + case 'd': + if(n < sizeof(vlong)+sizeof(long)) + error(Ebadtimectl); + p = le2vlong(&delta, p); + le2long(&period, p); + todset(-1, delta, period); + break; + case 'f': + if(n < sizeof(uvlong)) + error(Ebadtimectl); + le2vlong(&fasthz, p); + todsetfreq(fasthz); + break; + } + return n; +} diff -Nru /sys/src/9k/port/devdup.c /sys/src/9k/port/devdup.c --- /sys/src/9k/port/devdup.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devdup.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,145 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +/* Qid is (2*fd + (file is ctl))+1 */ + +static int +dupgen(Chan *c, char *, Dirtab*, int, int s, Dir *dp) +{ + Fgrp *fgrp = up->fgrp; + Chan *f; + static int perm[] = { 0400, 0200, 0600, 0 }; + int p; + Qid q; + + if(s == DEVDOTDOT){ + devdir(c, c->qid, ".", 0, eve, DMDIR|0555, dp); + return 1; + } + if(s == 0) + return 0; + s--; + if(s/2 > fgrp->maxfd) + return -1; + if((f=fgrp->fd[s/2]) == nil) + return 0; + if(s & 1){ + p = 0400; + sprint(up->genbuf, "%dctl", s/2); + }else{ + p = perm[f->mode&3]; + sprint(up->genbuf, "%d", s/2); + } + mkqid(&q, s+1, 0, QTFILE); + devdir(c, q, up->genbuf, 0, eve, p, dp); + return 1; +} + +static Chan* +dupattach(char *spec) +{ + return devattach('d', spec); +} + +static Walkqid* +dupwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, (Dirtab *)0, 0, dupgen); +} + +static long +dupstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, (Dirtab *)0, 0L, dupgen); +} + +static Chan* +dupopen(Chan *c, int omode) +{ + Chan *f; + int fd, twicefd; + + if(c->qid.type & QTDIR){ + if(omode != 0) + error(Eisdir); + c->mode = 0; + c->flag |= COPEN; + c->offset = 0; + return c; + } + if(c->qid.type & QTAUTH) + error(Eperm); + twicefd = c->qid.path - 1; + fd = twicefd/2; + if((twicefd & 1)){ + /* ctl file */ + f = c; + f->mode = openmode(omode); + f->flag |= COPEN; + f->offset = 0; + }else{ + /* fd file */ + f = fdtochan(fd, openmode(omode), 0, 1); + cclose(c); + } + if(omode & OCEXEC) + f->flag |= CCEXEC; + return f; +} + +static void +dupclose(Chan*) +{ +} + +static long +dupread(Chan *c, void *va, long n, vlong off) +{ + char buf[256]; + int fd, twicefd; + + if(c->qid.type & QTDIR) + return devdirread(c, va, n, (Dirtab *)0, 0L, dupgen); + twicefd = c->qid.path - 1; + fd = twicefd/2; + if(twicefd & 1){ + c = fdtochan(fd, -1, 0, 1); + procfdprint(c, fd, 0, buf, sizeof buf); + cclose(c); + return readstr(off, va, n, buf); + } + panic("dupread"); + return 0; +} + +static long +dupwrite(Chan*, void*, long, vlong) +{ + error(Eperm); + return 0; /* not reached */ +} + +Dev dupdevtab = { + 'd', + "dup", + + devreset, + devinit, + devshutdown, + dupattach, + dupwalk, + dupstat, + dupopen, + devcreate, + dupclose, + dupread, + devbread, + dupwrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru /sys/src/9k/port/devenv.c /sys/src/9k/port/devenv.c --- /sys/src/9k/port/devenv.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devenv.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,439 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +enum +{ + Maxenvsize = 16300, +}; + +static Egrp *envgrp(Chan *c); +static int envwriteable(Chan *c); + +static Egrp confegrp; /* global environment group containing the kernel configuration */ + +static Evalue* +envlookup(Egrp *eg, char *name, ulong qidpath) +{ + Evalue *e; + int i; + + for(i=0; inent; i++){ + e = eg->ent[i]; + if(e->qid.path == qidpath || (name && e->name[0]==name[0] && strcmp(e->name, name) == 0)) + return e; + } + return nil; +} + +static int +envgen(Chan *c, char *name, Dirtab*, int, int s, Dir *dp) +{ + Egrp *eg; + Evalue *e; + + if(s == DEVDOTDOT){ + devdir(c, c->qid, "#e", 0, eve, DMDIR|0775, dp); + return 1; + } + + eg = envgrp(c); + rlock(eg); + e = 0; + if(name) + e = envlookup(eg, name, -1); + else if(s < eg->nent) + e = eg->ent[s]; + + if(e == 0) { + runlock(eg); + return -1; + } + + /* make sure name string continues to exist after we release lock */ + kstrcpy(up->genbuf, e->name, sizeof up->genbuf); + devdir(c, e->qid, up->genbuf, e->len, eve, 0666, dp); + runlock(eg); + return 1; +} + +static Chan* +envattach(char *spec) +{ + Chan *c; + Egrp *egrp = nil; + + if(spec && *spec) { + if(strcmp(spec, "c") == 0) + egrp = &confegrp; + if(egrp == nil) + error(Ebadarg); + } + + c = devattach('e', spec); + c->aux = egrp; + return c; +} + +static Walkqid* +envwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, 0, 0, envgen); +} + +static long +envstat(Chan *c, uchar *db, long n) +{ + if(c->qid.type & QTDIR) + c->qid.vers = envgrp(c)->vers; + return devstat(c, db, n, 0, 0, envgen); +} + +static Chan* +envopen(Chan *c, int omode) +{ + Egrp *eg; + Evalue *e; + int trunc; + + eg = envgrp(c); + if(c->qid.type & QTDIR) { + if(omode != OREAD) + error(Eperm); + } + else { + trunc = omode & OTRUNC; + if(omode != OREAD && !envwriteable(c)) + error(Eperm); + if(trunc) + wlock(eg); + else + rlock(eg); + e = envlookup(eg, nil, c->qid.path); + if(e == 0) { + if(trunc) + wunlock(eg); + else + runlock(eg); + error(Enonexist); + } + if(trunc && e->value) { + e->qid.vers++; + free(e->value); + e->value = 0; + e->len = 0; + } + if(trunc) + wunlock(eg); + else + runlock(eg); + } + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + return c; +} + +static void +envcreate(Chan *c, char *name, int omode, int) +{ + Egrp *eg; + Evalue *e; + Evalue **ent; + + if(c->qid.type != QTDIR) + error(Eperm); + + omode = openmode(omode); + eg = envgrp(c); + + wlock(eg); + if(waserror()) { + wunlock(eg); + nexterror(); + } + + if(envlookup(eg, name, -1)) + error(Eexist); + + e = smalloc(sizeof(Evalue)); + e->name = smalloc(strlen(name)+1); + strcpy(e->name, name); + + if(eg->nent == eg->ment){ + eg->ment += 32; + ent = smalloc(sizeof(eg->ent[0])*eg->ment); + if(eg->nent) + memmove(ent, eg->ent, sizeof(eg->ent[0])*eg->nent); + free(eg->ent); + eg->ent = ent; + } + e->qid.path = ++eg->path; + e->qid.vers = 0; + eg->vers++; + eg->ent[eg->nent++] = e; + c->qid = e->qid; + + wunlock(eg); + poperror(); + + c->offset = 0; + c->mode = omode; + c->flag |= COPEN; +} + +static void +envremove(Chan *c) +{ + int i; + Egrp *eg; + Evalue *e; + + if(c->qid.type & QTDIR) + error(Eperm); + + eg = envgrp(c); + wlock(eg); + e = 0; + for(i=0; inent; i++){ + if(eg->ent[i]->qid.path == c->qid.path){ + e = eg->ent[i]; + eg->nent--; + eg->ent[i] = eg->ent[eg->nent]; + eg->vers++; + break; + } + } + wunlock(eg); + if(e == 0) + error(Enonexist); + free(e->name); + if(e->value) + free(e->value); + free(e); +} + +static void +envclose(Chan *c) +{ + /* + * cclose can't fail, so errors from remove will be ignored. + * since permissions aren't checked, + * envremove can't not remove it if its there. + */ + if(c->flag & CRCLOSE) + envremove(c); +} + +static long +envread(Chan *c, void *a, long n, vlong off) +{ + Egrp *eg; + Evalue *e; + long offset; + + if(c->qid.type & QTDIR) + return devdirread(c, a, n, 0, 0, envgen); + + eg = envgrp(c); + rlock(eg); + e = envlookup(eg, nil, c->qid.path); + if(e == 0) { + runlock(eg); + error(Enonexist); + } + + offset = off; + if(offset > e->len) /* protects against overflow converting vlong to long */ + n = 0; + else if(offset + n > e->len) + n = e->len - offset; + if(n <= 0) + n = 0; + else + memmove(a, e->value+offset, n); + runlock(eg); + return n; +} + +static long +envwrite(Chan *c, void *a, long n, vlong off) +{ + char *s; + Egrp *eg; + Evalue *e; + long len, offset; + + if(n <= 0) + return 0; + offset = off; + if(offset > Maxenvsize || n > (Maxenvsize - offset)) + error(Etoobig); + + eg = envgrp(c); + wlock(eg); + e = envlookup(eg, nil, c->qid.path); + if(e == 0) { + wunlock(eg); + error(Enonexist); + } + + len = offset+n; + if(len > e->len) { + s = smalloc(len); + if(e->value){ + memmove(s, e->value, e->len); + free(e->value); + } + e->value = s; + e->len = len; + } + memmove(e->value+offset, a, n); + e->qid.vers++; + eg->vers++; + wunlock(eg); + return n; +} + +Dev envdevtab = { + 'e', + "env", + + devreset, + devinit, + devshutdown, + envattach, + envwalk, + envstat, + envopen, + envcreate, + envclose, + envread, + devbread, + envwrite, + devbwrite, + envremove, + devwstat, +}; + +void +envcpy(Egrp *to, Egrp *from) +{ + int i; + Evalue *ne, *e; + + rlock(from); + to->ment = (from->nent+31)&~31; + to->ent = smalloc(to->ment*sizeof(to->ent[0])); + for(i=0; inent; i++){ + e = from->ent[i]; + ne = smalloc(sizeof(Evalue)); + ne->name = smalloc(strlen(e->name)+1); + strcpy(ne->name, e->name); + if(e->value){ + ne->value = smalloc(e->len); + memmove(ne->value, e->value, e->len); + ne->len = e->len; + } + ne->qid.path = ++to->path; + to->ent[i] = ne; + } + to->nent = from->nent; + runlock(from); +} + +void +closeegrp(Egrp *eg) +{ + int i; + Evalue *e; + + if(decref(eg) == 0){ + for(i=0; inent; i++){ + e = eg->ent[i]; + free(e->name); + if(e->value) + free(e->value); + free(e); + } + free(eg->ent); + free(eg); + } +} + +static Egrp* +envgrp(Chan *c) +{ + if(c->aux == nil) + return up->egrp; + return c->aux; +} + +static int +envwriteable(Chan *c) +{ + return isevegroup() || c->aux == nil; +} + +/* + * to let the kernel set environment variables + */ +void +ksetenv(char *ename, char *eval, int conf) +{ + Chan *c; + char buf[2*KNAMELEN]; + + snprint(buf, sizeof(buf), "#e%s/%s", conf?"c":"", ename); + c = namec(buf, Acreate, OWRITE, 0600); + c->dev->write(c, eval, strlen(eval), 0); + cclose(c); +} + +/* + * Return a copy of configuration environment as a sequence of strings. + * The strings alternate between name and value. A zero length name string + * indicates the end of the list + */ +char * +getconfenv(void) +{ + Egrp *eg = &confegrp; + Evalue *e; + char *p, *q; + int i, n; + + rlock(eg); + if(waserror()) { + runlock(eg); + nexterror(); + } + + /* determine size */ + n = 0; + for(i=0; inent; i++){ + e = eg->ent[i]; + n += strlen(e->name) + e->len + 2; + } + p = malloc(n + 1); + if(p == nil) + error(Enomem); + q = p; + for(i=0; inent; i++){ + e = eg->ent[i]; + strcpy(q, e->name); + q += strlen(q) + 1; + memmove(q, e->value, e->len); + q[e->len] = 0; + /* move up to the first null */ + q += strlen(q) + 1; + } + *q = 0; + + poperror(); + runlock(eg); + return p; +} diff -Nru /sys/src/9k/port/devfs.c /sys/src/9k/port/devfs.c --- /sys/src/9k/port/devfs.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devfs.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1285 @@ +/* + * File system devices. + * Follows device config in Ken's file server. + * Builds mirrors, concatenations, interleavings, and partitions + * of devices out of other (inner) devices. + * It is ok if inner devices are provided by this driver. + * + * Built files are grouped on different directories + * (called trees, and used to represent disks). + * The "#k/fs" tree is always available and never goes away. + * Configuration changes happen only while no I/O is in progress. + * + * Default sector size is one byte unless changed by the "disk" ctl. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" +#include "../port/error.h" + +enum +{ + Fnone, + Fmirror, /* mirror of others */ + Fcat, /* catenation of others */ + Finter, /* interleaving of others */ + Fpart, /* part of other */ + Fclear, /* start over */ + Fdel, /* delete a configure device */ + Fdisk, /* set default tree and sector sz*/ + + Sectorsz = 1, + Blksize = 8*1024, /* for Finter only */ + + Incr = 5, /* Increments for the dev array */ + + /* + * All qids are decorated with the tree number. + * #k/fs is tree number 0, is automatically added and + * its first qid is for the ctl file. It never goes away. + */ + Qtop = 0, /* #k */ + Qdir, /* directory (#k/fs) */ + Qctl, /* ctl, only for #k/fs/ctl */ + Qfirst, /* first qid assigned for device */ + + Iswrite = 0, + Isread, + + Optional = 0, + Mustexist, + + /* tunable parameters */ + Maxconf = 4*1024, /* max length for config */ + Ndevs = 32, /* max. inner devs per command */ + Ntrees = 128, /* max. number of trees */ + Maxretries = 3, /* max. retries of i/o errors */ + Retrypause = 5000, /* ms. to pause between retries */ +}; + +typedef struct Inner Inner; +typedef struct Fsdev Fsdev; +typedef struct Tree Tree; + +struct Inner +{ + char *iname; /* inner device name */ + vlong isize; /* size of inner device */ + Chan *idev; /* inner device */ +}; + +struct Fsdev +{ + Ref; /* one per Chan doing I/O */ + int gone; /* true if removed */ + int vers; /* qid version for this device */ + int type; /* Fnone, Fmirror, ... */ + char *name; /* name for this fsdev */ + Tree* tree; /* where the device is kept */ + vlong size; /* min(inner[X].isize) */ + vlong start; /* start address (for Fpart) */ + uint ndevs; /* number of inner devices */ + int perm; /* minimum of inner device perms */ + Inner *inner[Ndevs]; /* inner devices */ +}; + +struct Tree +{ + char *name; /* name for #k/ */ + Fsdev **devs; /* devices in dir. */ + uint ndevs; /* number of devices */ + uint nadevs; /* number of allocated devices in devs */ +}; + +#define dprint if(debug)print + +extern Dev fsdevtab; /* forward */ + +static RWlock lck; /* r: use devices; w: change config */ +static Tree fstree; /* The main "fs" tree. Never goes away */ +static Tree *trees[Ntrees]; /* internal representation of config */ +static int ntrees; /* max number of trees */ +static int qidvers; + +static char *disk; /* default tree name used */ +static char *source; /* default inner device used */ +static int sectorsz = Sectorsz; /* default sector size */ + +static char confstr[Maxconf]; /* textual configuration */ + +static int debug; + +static char cfgstr[] = "fsdev:\n"; + +static Qid tqid = {Qtop, 0, QTDIR}; +static Qid cqid = {Qctl, 0, 0}; + +static char* tnames[] = { + [Fmirror] "mirror", + [Fcat] "cat", + [Finter] "inter", + [Fpart] "part", +}; + +static Cmdtab configs[] = { + Fmirror,"mirror", 0, + Fcat, "cat", 0, + Finter, "inter", 0, + Fpart, "part", 0, + Fclear, "clear", 1, + Fdel, "del", 2, + Fdisk, "disk", 0, +}; + +static char Egone[] = "device is gone"; /* file has been removed */ + +static char* +seprintdev(char *s, char *e, Fsdev *mp) +{ + int i; + + if(mp == nil) + return seprint(s, e, ""); + if(mp->type < 0 || mp->type >= nelem(tnames) || tnames[mp->type] == nil) + return seprint(s, e, "bad device type %d\n", mp->type); + + s = strecpy(s, e, tnames[mp->type]); + if(mp->tree != &fstree) + s = seprint(s, e, " %s/%s", mp->tree->name, mp->name); + else + s = seprint(s, e, " %s", mp->name); + for(i = 0; i < mp->ndevs; i++) + s = seprint(s, e, " %s", mp->inner[i]->iname); + switch(mp->type){ + case Fmirror: + case Fcat: + case Finter: + s = strecpy(s, e, "\n"); + break; + case Fpart: + s = seprint(s, e, " %ulld %ulld\n", mp->start, mp->size); + break; + default: + panic("#k: seprintdev bug"); + } + return s; +} + +static vlong +mkpath(int tree, int devno) +{ + return (tree&0xFFFF)<<16 | devno&0xFFFF; +} + +static int +path2treeno(int q) +{ + return q>>16 & 0xFFFF; +} + +static int +path2devno(int q) +{ + return q & 0xFFFF; +} + +static Tree* +gettree(int i, int mustexist) +{ + dprint("gettree %d\n", i); + if(i < 0) + panic("#k: bug: bad tree index %d in gettree", i); + if(i >= ntrees || trees[i] == nil) + if(mustexist) + error(Enonexist); + else + return nil; + return trees[i]; +} + +static Fsdev* +getdev(Tree *t, int i, int mustexist) +{ + dprint("getdev %d\n", i); + if(i < 0) + panic("#k: bug: bad dev index %d in getdev", i); + if(i >= t->nadevs || t->devs[i] == nil) + if(mustexist) + error(Enonexist); + else + return nil; + return t->devs[i]; +} + +static Fsdev* +path2dev(int q) +{ + Tree *t; + + dprint("path2dev %ux\n", q); + t = gettree(path2treeno(q), Mustexist); + return getdev(t, path2devno(q) - Qfirst, Mustexist); +} + +static Tree* +treealloc(char *name) +{ + int i; + Tree *t; + + dprint("treealloc %s\n", name); + for(i = 0; i < nelem(trees); i++) + if(trees[i] == nil) + break; + if(i == nelem(trees)) + return nil; + t = trees[i] = mallocz(sizeof(Tree), 1); + if(t == nil) + return nil; + if(i == ntrees) + ntrees++; + kstrdup(&t->name, name); + return t; +} + +static Tree* +lookuptree(char *name) +{ + int i; + + dprint("lookuptree %s\n", name); + for(i = 0; i < ntrees; i++) + if(trees[i] != nil && strcmp(trees[i]->name, name) == 0) + return trees[i]; + return nil; +} + +static Fsdev* +devalloc(Tree *t, char *name) +{ + int i, ndevs; + Fsdev *mp, **devs; + + dprint("devalloc %s %s\n", t->name, name); + mp = mallocz(sizeof(Fsdev), 1); + if(mp == nil) + return nil; + for(i = 0; i < t->nadevs; i++) + if(t->devs[i] == nil) + break; + if(i >= t->nadevs){ + if(t->nadevs % Incr == 0){ + ndevs = t->nadevs + Incr; + devs = realloc(t->devs, ndevs * sizeof(Fsdev*)); + if(devs == nil){ + free(mp); + return nil; + } + t->devs = devs; + } + t->devs[t->nadevs] = nil; + t->nadevs++; + } + kstrdup(&mp->name, name); + mp->vers = ++qidvers; + mp->tree = t; + t->devs[i] = mp; + t->ndevs++; + return mp; +} + +static void +deltree(Tree *t) +{ + int i; + + dprint("deltree %s\n", t->name); + for(i = 0; i < ntrees; i++) + if(trees[i] == t){ + if(i > 0){ /* "fs" never goes away */ + free(t->name); + free(t->devs); + free(t); + trees[i] = nil; + } + return; + } + panic("#k: deltree: bug: tree not found"); +} + +/* + * A device is gone and we know that all its users are gone. + * A tree is gone when all its devices are gone ("fs" is never gone). + * Must close devices outside locks, so we could nest our own devices. + */ +static void +mdeldev(Fsdev *mp) +{ + int i; + Inner *in; + Tree *t; + + dprint("deldev %s gone %d ref %d\n", mp->name, mp->gone, mp->ref); + + mp->gone = 1; + mp->vers = ++qidvers; + + wlock(&lck); + t = mp->tree; + for(i = 0; i < t->nadevs; i++) + if(t->devs[i] == mp){ + t->devs[i] = nil; + t->ndevs--; + if(t->ndevs == 0) + deltree(t); + break; + } + wunlock(&lck); + + free(mp->name); + for(i = 0; i < mp->ndevs; i++){ + in = mp->inner[i]; + if(in->idev != nil) + cclose(in->idev); + free(in->iname); + free(in); + } + if(debug) + memset(mp, 9, sizeof *mp); /* poison */ + free(mp); +} + +/* + * Delete one or all devices in one or all trees. + */ +static void +mdelctl(char *tname, char *dname) +{ + int i, alldevs, alltrees, some; + Fsdev *mp; + Tree *t; + + dprint("delctl %s\n", dname); + alldevs = strcmp(dname, "*") == 0; + alltrees = strcmp(tname, "*") == 0; + some = 0; +Again: + wlock(&lck); + for(i = 0; i < ntrees; i++){ + t = trees[i]; + if(t == nil) + continue; + if(alltrees == 0 && strcmp(t->name, tname) != 0) + continue; + for(i = 0; i < t->nadevs; i++){ + mp = t->devs[i]; + if(t->devs[i] == nil) + continue; + if(alldevs == 0 && strcmp(mp->name, dname) != 0) + continue; + /* + * Careful: must close outside locks and that + * may change the file tree we are looking at. + */ + some++; + mp->gone = 1; + if(mp->ref == 0){ + incref(mp); /* keep it there */ + wunlock(&lck); + mdeldev(mp); + goto Again; /* tree can change */ + } + } + } + wunlock(&lck); + if(some == 0 && alltrees == 0) + error(Enonexist); +} + +static void +setdsize(Fsdev* mp, vlong *ilen) +{ + int i; + vlong inlen; + Inner *in; + + dprint("setdsize %s\n", mp->name); + for (i = 0; i < mp->ndevs; i++){ + in = mp->inner[i]; + in->isize = ilen[i]; + inlen = in->isize; + switch(mp->type){ + case Finter: + /* truncate to multiple of Blksize */ + inlen &= ~(Blksize-1); + in->isize = inlen; + /* fall through */ + case Fmirror: + /* use size of smallest inner device */ + if (mp->size == 0 || mp->size > inlen) + mp->size = inlen; + break; + case Fcat: + mp->size += inlen; + break; + case Fpart: + if(mp->start > inlen) + error("partition starts after device end"); + if(inlen < mp->start + mp->size){ + print("#k: %s: partition truncated from " + "%lld to %lld bytes\n", mp->name, + mp->size, inlen - mp->start); + mp->size = inlen - mp->start; + } + break; + } + } + if(mp->type == Finter) + mp->size *= mp->ndevs; +} + +static void +validdevname(Tree *t, char *dname) +{ + int i; + + for(i = 0; i < t->nadevs; i++) + if(t->devs[i] != nil && strcmp(t->devs[i]->name, dname) == 0) + error(Eexist); +} + +static void +parseconfig(char *a, long n, Cmdbuf **cbp, Cmdtab **ctp) +{ + Cmdbuf *cb; + Cmdtab *ct; + + *cbp = cb = parsecmd(a, n); + *ctp = ct = lookupcmd(cb, configs, nelem(configs)); + + cb->f++; /* skip command */ + cb->nf--; + switch(ct->index){ + case Fmirror: + case Fcat: + case Finter: + if(cb->nf < 2) + error("too few arguments for ctl"); + if(cb->nf - 1 > Ndevs) + error("too many devices in ctl"); + break; + case Fdisk: + if(cb->nf < 1 || cb->nf > 3) + error("ctl usage: disk name [sz dev]"); + break; + case Fpart: + if(cb->nf != 4 && (cb->nf != 3 || source == nil)) + error("ctl usage: part new [file] off len"); + break; + } +} + +static void +parsename(char *name, char *disk, char **tree, char **dev) +{ + char *slash; + + slash = strchr(name, '/'); + if(slash == nil){ + if(disk != nil) + *tree = disk; + else + *tree = "fs"; + *dev = name; + }else{ + *tree = name; + *slash++ = 0; + *dev = slash; + } + validname(*tree, 0); + validname(*dev, 0); +} + +static int +getattrs(Chan *c, vlong *lenp, int *permp) +{ + uchar buf[128]; /* old DIRLEN plus a little should be plenty */ + Dir d; + long l; + + *lenp = 0; + *permp = 0; + l = c->dev->stat(c, buf, sizeof buf); + if (l >= 0 && convM2D(buf, l, &d, nil) > 0) { + *lenp = d.length; + *permp = d.mode & 0777; + } + return l; +} + +/* + * Process a single line of configuration, + * often of the form "cmd newname idev0 idev1". + * locking is tricky, because we need a write lock to + * add/remove devices yet adding/removing them may lead + * to calls to this driver that require a read lock (when + * inner devices are also provided by us). + */ +static void +mconfig(char* a, long n) +{ + int i; + int *iperm; + vlong size, start; + vlong *ilen; + char *tname, *dname, *fakef[4]; + Chan **idev; + Cmdbuf *cb; + Cmdtab *ct; + Fsdev *mp; + Inner *inprv; + Tree *t; + + /* ignore comments & empty lines */ + if (*a == '\0' || *a == '#' || *a == '\n') + return; + + dprint("mconfig\n"); + size = 0; + start = 0; + mp = nil; + cb = nil; + idev = nil; + ilen = nil; + iperm = nil; + + if(waserror()){ + free(cb); + nexterror(); + } + + parseconfig(a, n, &cb, &ct); + switch (ct->index) { + case Fdisk: + kstrdup(&disk, cb->f[0]); + if(cb->nf >= 2) + sectorsz = strtoul(cb->f[1], 0, 0); + else + sectorsz = Sectorsz; + if(cb->nf == 3) + kstrdup(&source, cb->f[2]); + else{ + free(source); + source = nil; + } + poperror(); + free(cb); + return; + case Fclear: + poperror(); + free(cb); + mdelctl("*", "*"); /* del everything */ + return; + case Fpart: + if(cb->nf == 3){ + /* + * got a request in the format of sd(3), + * pretend we got one in our format. + * later we change end to be len. + */ + fakef[0] = cb->f[0]; + fakef[1] = source; + fakef[2] = cb->f[1]; + fakef[3] = cb->f[2]; + cb->f = fakef; + cb->nf = 4; + } + start = strtoll(cb->f[2], nil, 10); + size = strtoll(cb->f[3], nil, 10); + if(cb->f == fakef) + size -= start; /* it was end */ + cb->nf -= 2; + break; + } + parsename(cb->f[0], disk, &tname, &dname); + for(i = 1; i < cb->nf; i++) + validname(cb->f[i], 1); + + if(ct->index == Fdel){ + mdelctl(tname, dname); + poperror(); + free(cb); + return; + } + + /* + * Open all inner devices while we have only a read lock. + */ + poperror(); + rlock(&lck); + if(waserror()){ + runlock(&lck); +Fail: + for(i = 1; i < cb->nf; i++) + if(idev != nil && idev[i-1] != nil) + cclose(idev[i-1]); + if(mp != nil) + mdeldev(mp); + free(idev); + free(ilen); + free(iperm); + free(cb); + nexterror(); + } + /* record names, lengths and perms of all named files */ + idev = smalloc(sizeof(Chan*) * Ndevs); + ilen = smalloc(sizeof(vlong) * Ndevs); + iperm = smalloc(sizeof(int) * Ndevs); + for(i = 1; i < cb->nf; i++){ + idev[i-1] = namec(cb->f[i], Aopen, ORDWR, 0); + getattrs(idev[i-1], &ilen[i-1], &iperm[i-1]); + } + poperror(); + runlock(&lck); + + /* + * Get a write lock and add the device if we can. + */ + wlock(&lck); + if(waserror()){ + wunlock(&lck); + goto Fail; + } + + t = lookuptree(tname); + if(t != nil) + validdevname(t, dname); + else + t = treealloc(tname); + if(t == nil) + error("no more trees"); + mp = devalloc(t, dname); + if(mp == nil){ + if(t->ndevs == 0) /* it was created for us */ + deltree(t); /* but we will not mdeldev() */ + error(Enomem); + } + + /* construct mp from iname, idev and iperm arrays */ + mp->type = ct->index; + if(mp->type == Fpart){ + mp->start = start * sectorsz; + mp->size = size * sectorsz; + } + mp->perm = 0666; + for(i = 1; i < cb->nf; i++){ + inprv = mp->inner[i-1] = mallocz(sizeof(Inner), 1); + if(inprv == nil) + error(Enomem); + mp->ndevs++; + kstrdup(&inprv->iname, cb->f[i]); + inprv->idev = idev[i-1]; + idev[i-1] = nil; + /* use the most restrictive of the inner permissions */ + mp->perm &= iperm[i-1]; + } + setdsize(mp, ilen); + + poperror(); + wunlock(&lck); + free(idev); + free(ilen); + free(iperm); + free(cb); +} + +static void +rdconf(void) +{ + int mustrd; + char *c, *e, *p, *s; + Chan *cc; + static int configed; + + /* only read config file once */ + if (configed) + return; + configed = 1; + + dprint("rdconf\n"); + /* add the std "fs" tree */ + trees[0] = &fstree; + ntrees++; + fstree.name = "fs"; + + /* identify the config file */ + s = getconf("fsconfig"); + if (s == nil){ + mustrd = 0; + s = "/dev/sdC0/fscfg"; + } else + mustrd = 1; + + /* read it */ + cc = nil; + c = nil; + if (waserror()){ + if (cc != nil) + cclose(cc); + if (c) + free(c); + if (!mustrd) + return; + nexterror(); + } + cc = namec(s, Aopen, OREAD, 0); + cc->dev->read(cc, confstr, sizeof confstr, 0); + cclose(cc); + cc = nil; + + /* validate, copy and erase config; mconfig will repopulate confstr */ + if (strncmp(confstr, cfgstr, sizeof cfgstr - 1) != 0) + error("bad #k config, first line must be: 'fsdev:\\n'"); + kstrdup(&c, confstr + sizeof cfgstr - 1); + memset(confstr, 0, sizeof confstr); + + /* process config copy one line at a time */ + for (p = c; p != nil && *p != '\0'; p = e){ + e = strchr(p, '\n'); + if (e == nil) + e = p + strlen(p); + else + e++; + mconfig(p, e - p); + } + USED(cc); /* until now, can be used in waserror clause */ + poperror(); +} + +static int +mgen(Chan *c, char*, Dirtab*, int, int i, Dir *dp) +{ + int treeno; + Fsdev *mp; + Qid qid; + Tree *t; + + dprint("mgen %#ullx %d\n", c->qid.path, i); + qid.type = QTDIR; + qid.vers = 0; + if(c->qid.path == Qtop){ + if(i == DEVDOTDOT){ + devdir(c, tqid, "#k", 0, eve, DMDIR|0775, dp); + return 1; + } + t = gettree(i, Optional); + if(t == nil){ + dprint("no\n"); + return -1; + } + qid.path = mkpath(i, Qdir); + devdir(c, qid, t->name, 0, eve, DMDIR|0775, dp); + return 1; + } + + treeno = path2treeno(c->qid.path); + t = gettree(treeno, Optional); + if(t == nil){ + dprint("no\n"); + return -1; + } + if((c->qid.type & QTDIR) != 0){ + if(i == DEVDOTDOT){ + devdir(c, tqid, "#k", 0, eve, DMDIR|0775, dp); + return 1; + } + if(treeno == 0){ + /* take care of #k/fs/ctl */ + if(i == 0){ + devdir(c, cqid, "ctl", 0, eve, 0664, dp); + return 1; + } + i--; + } + mp = getdev(t, i, Optional); + if(mp == nil){ + dprint("no\n"); + return -1; + } + qid.type = QTFILE; + qid.vers = mp->vers; + qid.path = mkpath(treeno, Qfirst+i); + devdir(c, qid, mp->name, mp->size, eve, mp->perm, dp); + return 1; + } + + if(i == DEVDOTDOT){ + qid.path = mkpath(treeno, Qdir); + devdir(c, qid, t->name, 0, eve, DMDIR|0775, dp); + return 1; + } + dprint("no\n"); + return -1; +} + +static Chan* +mattach(char *spec) +{ + dprint("mattach\n"); + return devattach(fsdevtab.dc, spec); +} + +static Walkqid* +mwalk(Chan *c, Chan *nc, char **name, int nname) +{ + Walkqid *wq; + + rdconf(); + + dprint("mwalk %llux\n", c->qid.path); + rlock(&lck); + if(waserror()){ + runlock(&lck); + nexterror(); + } + wq = devwalk(c, nc, name, nname, 0, 0, mgen); + poperror(); + runlock(&lck); + return wq; +} + +static long +mstat(Chan *c, uchar *db, long n) +{ + int p; + Dir d; + Fsdev *mp; + Qid q; + Tree *t; + + dprint("mstat %llux\n", c->qid.path); + rlock(&lck); + if(waserror()){ + runlock(&lck); + nexterror(); + } + p = c->qid.path; + memset(&d, 0, sizeof d); + switch(p){ + case Qtop: + devdir(c, tqid, "#k", 0, eve, DMDIR|0775, &d); + break; + case Qctl: + devdir(c, cqid, "ctl", 0, eve, 0664, &d); + break; + default: + t = gettree(path2treeno(p), Mustexist); + if(c->qid.type & QTDIR) + devdir(c, c->qid, t->name, 0, eve, DMDIR|0775, &d); + else{ + mp = getdev(t, path2devno(p) - Qfirst, Mustexist); + q = c->qid; + q.vers = mp->vers; + devdir(c, q, mp->name, mp->size, eve, mp->perm, &d); + } + } + n = convD2M(&d, db, n); + if (n == 0) + error(Ebadarg); + poperror(); + runlock(&lck); + return n; +} + +static Chan* +mopen(Chan *c, int omode) +{ + int q; + Fsdev *mp; + + dprint("mopen %llux\n", c->qid.path); + if((c->qid.type & QTDIR) && omode != OREAD) + error(Eperm); + if(c->qid.path != Qctl && (c->qid.type&QTDIR) == 0){ + rlock(&lck); + if(waserror()){ + runlock(&lck); + nexterror(); + } + q = c->qid.path; + mp = path2dev(q); + if(mp->gone) + error(Egone); + devpermcheck(eve, mp->perm, omode); + incref(mp); + poperror(); + runlock(&lck); + } + /* + * Our mgen does not return the info for the qid + * but only for its children. Don't use devopen here. + */ + c->offset = 0; + c->mode = openmode(omode & ~OTRUNC); + c->flag |= COPEN; + return c; +} + +static void +mclose(Chan *c) +{ + int mustdel, q; + Fsdev *mp; + + dprint("mclose %llux\n", c->qid.path); + if(c->qid.type & QTDIR || !(c->flag & COPEN)) + return; + rlock(&lck); + if(waserror()){ + runlock(&lck); + nexterror(); + } + mustdel = 0; + mp = nil; + q = c->qid.path; + if(q == Qctl){ + free(disk); + disk = nil; /* restore defaults */ + free(source); + source = nil; + sectorsz = Sectorsz; + }else{ + mp = path2dev(q); + if(mp->gone != 0 && mp->ref == 1) + mustdel = 1; + else + decref(mp); + } + poperror(); + runlock(&lck); + if(mustdel) + mdeldev(mp); +} + +static long +io(Fsdev *mp, Inner *in, int isread, void *a, long l, vlong off) +{ + long wl; + Chan *mc; + + mc = in->idev; + if(mc == nil) + error(Egone); + if (waserror()) { + print("#k: %s: byte %,lld count %ld (of #k/%s): %s error: %s\n", + in->iname, off, l, mp->name, (isread? "read": "write"), + (up && up->errstr? up->errstr: "")); + nexterror(); + } + if (isread) + wl = mc->dev->read(mc, a, l, off); + else + wl = mc->dev->write(mc, a, l, off); + poperror(); + return wl; +} + +/* NB: a transfer could span multiple inner devices */ +static long +catio(Fsdev *mp, int isread, void *a, long n, vlong off) +{ + int i; + long l, res; + Inner *in; + + if(debug) + print("catio %d %p %ld %lld\n", isread, a, n, off); + res = n; + for (i = 0; n > 0 && i < mp->ndevs; i++){ + in = mp->inner[i]; + if (off >= in->isize){ + off -= in->isize; + continue; /* not there yet */ + } + if (off + n > in->isize) + l = in->isize - off; + else + l = n; + if(debug) + print("\tdev %d %p %ld %lld\n", i, a, l, off); + + if (io(mp, in, isread, a, l, off) != l) + error(Eio); + + a = (char*)a + l; + off = 0; + n -= l; + } + if(debug) + print("\tres %ld\n", res - n); + return res - n; +} + +static long +interio(Fsdev *mp, int isread, void *a, long n, vlong off) +{ + int i; + long boff, res, l, wl, wsz; + vlong woff, blk, mblk; + + blk = off / Blksize; + boff = off % Blksize; + wsz = Blksize - boff; + res = n; + while(n > 0){ + mblk = blk / mp->ndevs; + i = blk % mp->ndevs; + woff = mblk*Blksize + boff; + if (n > wsz) + l = wsz; + else + l = n; + + wl = io(mp, mp->inner[i], isread, a, l, woff); + if (wl != l) + error(Eio); + + blk++; + boff = 0; + wsz = Blksize; + a = (char*)a + l; + n -= l; + } + return res; +} + +static char* +seprintconf(char *s, char *e) +{ + int i, j; + Tree *t; + + *s = 0; + for(i = 0; i < ntrees; i++){ + t = trees[i]; + if(t != nil) + for(j = 0; j < t->nadevs; j++) + if(t->devs[j] != nil) + s = seprintdev(s, e, t->devs[j]); + } + return s; +} + +static long +mread(Chan *c, void *a, long n, vlong off) +{ + int i, retry; + long l, res; + Fsdev *mp; + Tree *t; + + dprint("mread %llux\n", c->qid.path); + rlock(&lck); + if(waserror()){ + runlock(&lck); + nexterror(); + } + res = -1; + if(c->qid.type & QTDIR){ + res = devdirread(c, a, n, 0, 0, mgen); + goto Done; + } + if(c->qid.path == Qctl){ + seprintconf(confstr, confstr + sizeof(confstr)); + res = readstr((long)off, a, n, confstr); + goto Done; + } + + t = gettree(path2treeno(c->qid.path), Mustexist); + mp = getdev(t, path2devno(c->qid.path) - Qfirst, Mustexist); + + if(off >= mp->size){ + res = 0; + goto Done; + } + if(off + n > mp->size) + n = mp->size - off; + if(n == 0){ + res = 0; + goto Done; + } + + switch(mp->type){ + case Fcat: + res = catio(mp, Isread, a, n, off); + break; + case Finter: + res = interio(mp, Isread, a, n, off); + break; + case Fpart: + res = io(mp, mp->inner[0], Isread, a, n, mp->start + off); + break; + case Fmirror: + retry = 0; + do { + if (retry > 0) { + print("#k/%s: retry %d read for byte %,lld " + "count %ld: %s\n", mp->name, retry, off, + n, (up && up->errstr? up->errstr: "")); + /* + * pause before retrying in case it's due to + * a transient bus or controller problem. + */ + tsleep(&up->sleep, return0, 0, Retrypause); + } + for (i = 0; i < mp->ndevs; i++){ + if (waserror()) + continue; + l = io(mp, mp->inner[i], Isread, a, n, off); + poperror(); + if (l >= 0){ + res = l; + break; /* read a good copy */ + } + } + } while (i == mp->ndevs && ++retry <= Maxretries); + if (retry > Maxretries) { + /* no mirror had a good copy of the block */ + print("#k/%s: byte %,lld count %ld: CAN'T READ " + "from mirror: %s\n", mp->name, off, n, + (up && up->errstr? up->errstr: "")); + error(Eio); + } else if (retry > 0) + print("#k/%s: byte %,lld count %ld: retry read OK " + "from mirror: %s\n", mp->name, off, n, + (up && up->errstr? up->errstr: "")); + break; + } +Done: + poperror(); + runlock(&lck); + return res; +} + +static long +mwrite(Chan *c, void *a, long n, vlong off) +{ + int i, allbad, anybad, retry; + long l, res; + Fsdev *mp; + Tree *t; + + dprint("mwrite %llux\n", c->qid.path); + if (c->qid.type & QTDIR) + error(Eisdir); + if (c->qid.path == Qctl){ + mconfig(a, n); + return n; + } + + rlock(&lck); + if(waserror()){ + runlock(&lck); + nexterror(); + } + + t = gettree(path2treeno(c->qid.path), Mustexist); + mp = getdev(t, path2devno(c->qid.path) - Qfirst, Mustexist); + + if(off >= mp->size){ + res = 0; + goto Done; + } + if(off + n > mp->size) + n = mp->size - off; + if(n == 0){ + res = 0; + goto Done; + } + res = n; + switch(mp->type){ + case Fcat: + res = catio(mp, Iswrite, a, n, off); + break; + case Finter: + res = interio(mp, Iswrite, a, n, off); + break; + case Fpart: + res = io(mp, mp->inner[0], Iswrite, a, n, mp->start + off); + if (res != n) + error(Eio); + break; + case Fmirror: + retry = 0; + do { + if (retry > 0) { + print("#k/%s: retry %d write for byte %,lld " + "count %ld: %s\n", mp->name, retry, off, + n, (up && up->errstr? up->errstr: "")); + /* + * pause before retrying in case it's due to + * a transient bus or controller problem. + */ + tsleep(&up->sleep, return0, 0, Retrypause); + } + allbad = 1; + anybad = 0; + for (i = mp->ndevs - 1; i >= 0; i--){ + if (waserror()) { + anybad = 1; + continue; + } + l = io(mp, mp->inner[i], Iswrite, a, n, off); + poperror(); + if (l == n) + allbad = 0; /* wrote a good copy */ + else + anybad = 1; + } + } while (anybad && ++retry <= Maxretries); + if (allbad) { + /* no mirror took a good copy of the block */ + print("#k/%s: byte %,lld count %ld: CAN'T WRITE " + "to mirror: %s\n", mp->name, off, n, + (up && up->errstr? up->errstr: "")); + error(Eio); + } else if (retry > 0) + print("#k/%s: byte %,lld count %ld: retry wrote OK " + "to mirror: %s\n", mp->name, off, n, + (up && up->errstr? up->errstr: "")); + + break; + } +Done: + poperror(); + runlock(&lck); + return res; +} + +Dev fsdevtab = { + 'k', + "fs", + + devreset, + devinit, + devshutdown, + mattach, + mwalk, + mstat, + mopen, + devcreate, + mclose, + mread, + devbread, + mwrite, + devbwrite, + devremove, + devwstat, + devpower, + devconfig, +}; diff -Nru /sys/src/9k/port/devkprof.c /sys/src/9k/port/devkprof.c --- /sys/src/9k/port/devkprof.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devkprof.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,190 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + + +#define LRES 3 /* log of PC resolution */ +#define SZ 4 /* sizeof of count cell; well known as 4 */ + +struct +{ + uintptr minpc; + uintptr maxpc; + int nbuf; + int time; + ulong *buf; +}kprof; + +enum{ + Kprofdirqid, + Kprofdataqid, + Kprofctlqid, +}; +Dirtab kproftab[]={ + ".", {Kprofdirqid, 0, QTDIR},0, DMDIR|0550, + "kpdata", {Kprofdataqid}, 0, 0600, + "kpctl", {Kprofctlqid}, 0, 0600, +}; + +static void +_kproftimer(uintptr pc) +{ + if(kprof.time == 0) + return; + /* + * if the pc is coming out of spllo or splx, + * use the pc saved when we went splhi. + */ + if(pc>=PTR2UINT(spllo) && pc<=PTR2UINT(spldone)) + pc = m->splpc; + + kprof.buf[0] += TK2MS(1); + if(kprof.minpc<=pc && pc>= LRES; + kprof.buf[pc] += TK2MS(1); + }else + kprof.buf[1] += TK2MS(1); +} + +static void +kprofinit(void) +{ + if(SZ != sizeof kprof.buf[0]) + panic("kprof size"); + kproftimer = _kproftimer; +} + +static Chan* +kprofattach(char *spec) +{ + ulong n; + + /* allocate when first used */ + kprof.minpc = KTZERO; + kprof.maxpc = PTR2UINT(etext); + kprof.nbuf = (kprof.maxpc-kprof.minpc) >> LRES; + n = kprof.nbuf*SZ; + if(kprof.buf == 0) { + kprof.buf = malloc(n); + if(kprof.buf == 0) + error(Enomem); + } + kproftab[1].length = n; + return devattach('K', spec); +} + +static Walkqid* +kprofwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, kproftab, nelem(kproftab), devgen); +} + +static long +kprofstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, kproftab, nelem(kproftab), devgen); +} + +static Chan* +kprofopen(Chan *c, int omode) +{ + if(c->qid.type & QTDIR){ + if(omode != OREAD) + error(Eperm); + } + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + return c; +} + +static void +kprofclose(Chan*) +{ +} + +static long +kprofread(Chan *c, void *va, long n, vlong off) +{ + ulong end; + ulong w, *bp; + uchar *a, *ea; + ulong offset = off; + + switch((int)c->qid.path){ + case Kprofdirqid: + return devdirread(c, va, n, kproftab, nelem(kproftab), devgen); + + case Kprofdataqid: + end = kprof.nbuf*SZ; + if(offset & (SZ-1)) + error(Ebadarg); + if(offset >= end){ + n = 0; + break; + } + if(offset+n > end) + n = end-offset; + n &= ~(SZ-1); + a = va; + ea = a + n; + bp = kprof.buf + offset/SZ; + while(a < ea){ + w = *bp++; + *a++ = w>>24; + *a++ = w>>16; + *a++ = w>>8; + *a++ = w>>0; + } + break; + + default: + n = 0; + break; + } + return n; +} + +static long +kprofwrite(Chan *c, void *a, long n, vlong) +{ + switch((int)(c->qid.path)){ + case Kprofctlqid: + if(strncmp(a, "startclr", 8) == 0){ + memset((char *)kprof.buf, 0, kprof.nbuf*SZ); + kprof.time = 1; + }else if(strncmp(a, "start", 5) == 0) + kprof.time = 1; + else if(strncmp(a, "stop", 4) == 0) + kprof.time = 0; + break; + default: + error(Ebadusefd); + } + return n; +} + +Dev kprofdevtab = { + 'K', + "kprof", + + devreset, + kprofinit, + devshutdown, + kprofattach, + kprofwalk, + kprofstat, + kprofopen, + devcreate, + kprofclose, + kprofread, + devbread, + kprofwrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru /sys/src/9k/port/devmem.c /sys/src/9k/port/devmem.c --- /sys/src/9k/port/devmem.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devmem.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,347 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +typedef struct Mprof Mprof; +typedef struct Mevents Mevents; +typedef struct Mevent Mevent; + +enum +{ + Qdir, + Qctl, + Qevent, + Qprof, + Qsum, +}; + +static +Dirtab memdir[] = +{ + ".", {Qdir, 0, QTDIR}, 0, DMDIR|0555, + "memctl", {Qctl}, 0, 0664, + "memevent", {Qevent}, 0, 0444, + "memprof", {Qprof}, 0, 0444, + "memsum", {Qsum}, 0, 0444, +}; + +enum +{ + Nevent = 10000, + BucketLg2= 15, /* for 512k kernel, allows allocation every 16 bytes */ + Nbucket= 1< 0, alloc; < 0, free */ +}; + +static struct Mevents +{ + Lock; + Ref; + Rendez r; + Mevent events[Nevent]; + uint rd; + uint wr; + int want; + ulong lost; +} memevents; + +static Ref monitoring; + +extern void setmemprof(void (*)(void*, ulong, usize, int)); /* qmalloc.c */ + +static void +aadd(int *addr, int delta) +{ + int value; + + do + value = *addr; + while(!CASW(addr, value, value+delta)); +} + +static int +isnonempty(void *v) +{ + Mevents *evs; + + evs = v; + return evs->rd != evs->wr; +} + +static int +isnotfull(Mevents *evs) +{ + return (evs->wr - evs->rd) < Nevent; +} + +static void +addmemevent(void *a, ulong tag, usize nb, int w) +{ + Mevents *evs; + Mevent e; + int empty; + + e.tag = tag; + e.koff = (uintptr)a - KZERO; + if(nb > MaxInt) + nb = MaxInt; + e.size = w < 0? -nb: nb; + + evs = &memevents; + ilock(evs); + if(isnotfull(evs)){ + empty = evs->rd == evs->wr; + evs->events[evs->wr++%nelem(evs->events)] = e; + }else{ + evs->lost++; + empty = 0; + } + iunlock(evs); + if(empty) + wakeup(&evs->r); +} + +static void +mprofmonitor(void *a, ulong tag, usize nb, int w) +{ + Mprof *p; + + if(memevents.ref != 0) + addmemevent(a, tag, nb, w); + p = memprof.bucket; + for(int i = 0; i < nelem(memprof.bucket)-1; i++, p++){ + if(p->tag == 0 || p->tag == tag) + break; + } + if(p->tag != 0 && p->tag != tag) + p->tag = ~0; + else + p->tag = tag; + if(w == 2) /* tag change */ + return; + if(w < 0){ + aadd(&p->na, -1); + aadd(&p->busy, -nb); + }else{ + aadd(&p->na, 1); + aadd(&p->busy, nb); + } +} + +static void +mput4(uchar *m, ulong v) +{ + m[0] = v>>24; + m[1] = v>>16; + m[2] = v>>8; + m[3] = v; +} + +static void +memprofinit(void) +{ + incref(&monitoring); + setmemprof(mprofmonitor); +} + +static Chan* +memattach(char *spec) +{ + return devattach('%', spec); +} + +static Walkqid* +memwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, memdir, nelem(memdir), devgen); +} + +static long +memstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, memdir, nelem(memdir), devgen); +} + +static Chan* +memopen(Chan *c, int omode) +{ + Mevents *evs; + + c = devopen(c, omode, memdir, nelem(memdir), devgen); + switch((ulong)c->qid.path){ + case Qevent: + evs = &memevents; + if(incref(evs) != 1){ + decref(evs); + c->flag &= ~COPEN; + error(Einuse); + } + evs->want = 0; + evs->lost = 0; + incref(&monitoring); + setmemprof(mprofmonitor); + break; + case Qprof: + break; + } + return c; +} + +static void +memclose(Chan *c) +{ + if((c->flag & COPEN) == 0) + return; + switch((ulong)c->qid.path) { + case Qevent: + if(decref(&monitoring) == 0) + setmemprof(nil); + decref(&memevents); + break; + case Qprof: + break; + } + +} + +static long +memread(Chan *c, void *va, long count, vlong offset) +{ + uchar *a; + int i; + Mevent *pe; + Mevents *evs; + Mprof *p; + + if(c->qid.type & QTDIR) + return devdirread(c, va, count, memdir, nelem(memdir), devgen); + + switch((ulong)c->qid.path) { + default: + error(Egreg); + case Qctl: + return 0; + case Qsum: + return mallocreadsummary(c, va, count, offset); + case Qevent: + evs = &memevents; + while(!isnonempty(evs)){ + evs->want = 1; + sleep(&evs->r, isnonempty, evs); + } + a = va; + do{ + if((count -= Evreclen) < 0) + break; + pe = &evs->events[evs->rd%nelem(evs->events)]; + mput4(a+0, 0); + mput4(a+4, pe->tag); + mput4(a+8, pe->koff); + mput4(a+12, pe->size); + a += Evreclen; + }while(++evs->rd != evs->wr); + return a-(uchar*)va; + case Qprof: + a = va; + for(i = offset/Profreclen; i < nelem(memprof.bucket); i++){ + p = &memprof.bucket[i]; + if((p->tag|p->na|p->busy) != 0){ + if((count -= Profreclen) < 0) + break; + mput4(a+0, p->tag); + mput4(a+4, p->na); + mput4(a+8, p->busy); + a += Profreclen; + } + } + return a-(uchar*)va; + } +} + +static long +memwrite(Chan *c, void *a, long n, vlong) +{ + Cmdbuf *cb; + + if(c->qid.type & QTDIR) + error(Eperm); + + switch((ulong)c->qid.path) { + default: + error(Egreg); + case Qctl: + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + if(cb->nf == 1 && strcmp(cb->f[0], "start") == 0){ + if(incref(&monitoring) == 1) + setmemprof(mprofmonitor); + }else if(cb->nf == 1 && strcmp(cb->f[0], "stop") == 0){ + if(decref(&monitoring) == 0) + setmemprof(nil); + }else + cmderror(cb, "unknown command"); + poperror(); + free(cb); + break; + } + return n; +} + +Dev memdevtab = { + '%', + "mem", + + devreset, + memprofinit, //devinit, + devshutdown, + memattach, + memwalk, + memstat, + memopen, + devcreate, + memclose, + memread, + devbread, + memwrite, + devbwrite, + devremove, + devwstat +}; diff -Nru /sys/src/9k/port/devmem1.c /sys/src/9k/port/devmem1.c --- /sys/src/9k/port/devmem1.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devmem1.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,372 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +typedef struct Mprof Mprof; +typedef struct Mevents Mevents; +typedef struct Mevent Mevent; + +enum +{ + Qdir, + Qctl, + Qevent, + Qprof, + Qsum, +}; + +static +Dirtab memdir[] = +{ + ".", {Qdir, 0, QTDIR}, 0, DMDIR|0555, + "memctl", {Qctl}, 0, 0664, + "memevent", {Qevent}, 0, 0444, + "memprof", {Qprof}, 0, 0444, + "memsum", {Qsum}, 0, 0444, +}; + +enum +{ + Nevent = 10000, + BucketLg2= 15, /* for 512k kernel, allows allocation every 16 bytes */ + Nbucket= 1< 0, alloc; < 0, free */ +}; + +static struct Mevents +{ + Lock; + Ref; + Rendez r; + Mevent events[Nevent]; + uint rd; + uint wr; + int want; + ulong lost; +} memevents; + +static Ref monitoring; + +extern void setmemprof(void (*)(void*, ulong, usize, int)); /* qmalloc.c */ + +static void +aadd(int *addr, int delta) +{ + int value; + + do + value = *addr; + while(!CASW(addr, value, value+delta)); +} + +static int +isnonempty(void *v) +{ + Mevents *evs; + + evs = v; + return evs->rd != evs->wr; +} + +static int +isnotfull(Mevents *evs) +{ + return (evs->wr - evs->rd) < Nevent; +} + +static void +addmemevent(void *a, ulong tag, usize nb, int w) +{ + Mevents *evs; + Mevent e; + int empty; + + e.tag = tag; + e.koff = (uintptr)a - KZERO; + if(nb > MaxInt) + nb = MaxInt; + e.size = w < 0? -nb: nb; + + evs = &memevents; + ilock(evs); + if(isnotfull(evs)){ + empty = evs->rd == evs->wr; + evs->events[evs->wr++] = e; + }else{ + evs->lost++; + empty = 0; + } + iunlock(evs); + if(empty) + wakeup(&evs->r); +} + +static void +mprofmonitor(void *a, ulong tag, usize nb, int w) +{ + Mprof *p; + uint n; + + if(memevents.ref != 0) + addmemevent(a, tag, nb, w); + n = ((tag-(KTZERO&0xFFFFFFFF))/(512*KiB/Nbucket))&BucketMask; + if(n > Nbucket) + n = Nbucket-1; + for(;;){ + p = &memprof.bucket[n]; + if(p->tag == tag || p->tag == ~0) + break; + n = p->ovfl; + if(n == 0){ + if(w < 0) + return; + ilock(&memprof.ovlk); + if(p->tag != 0 && p->tag != tag){ + n = p->ovfl; + if(n != 0){ + iunlock(&memprof.ovlk); + /* follow the overflow chain */ + continue; + } + /* need an overflow entry */ + n = memprof.novfl; + if(n < Noverflow) + memprof.novfl++; + else + tag = ~0; + n += Nbucket; + p->ovfl = n; + p = &memprof.bucket[n]; + } + p->tag = tag; + iunlock(&memprof.ovlk); + break; + } + } + if(w < 0){ + aadd(&p->na, -1); + aadd(&p->busy, -nb); + }else{ + aadd(&p->na, 1); + aadd(&p->busy, nb); + } +} + +static void +mput4(uchar *m, ulong v) +{ + m[0] = v>>24; + m[1] = v>>16; + m[2] = v>>8; + m[3] = v; +} + +static void +memprofinit(void) +{ + incref(&monitoring); + setmemprof(mprofmonitor); +} + +static Chan* +memattach(char *spec) +{ + return devattach('%', spec); +} + +static Walkqid* +memwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, memdir, nelem(memdir), devgen); +} + +static long +memstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, memdir, nelem(memdir), devgen); +} + +static Chan* +memopen(Chan *c, int omode) +{ + Mevents *evs; + + c = devopen(c, omode, memdir, nelem(memdir), devgen); + switch((ulong)c->qid.path){ + case Qevent: + evs = &memevents; + if(incref(evs) != 1){ + decref(evs); + c->flag &= ~COPEN; + error(Einuse); + } + evs->rd = evs->wr = 0; + evs->want = 0; + evs->lost = 0; + incref(&monitoring); + setmemprof(mprofmonitor); + break; + case Qprof: + break; + } + return c; +} + +static void +memclose(Chan *c) +{ + if((c->flag & COPEN) == 0) + return; + switch((ulong)c->qid.path) { + case Qevent: + if(decref(&monitoring) == 0) + setmemprof(nil); + decref(&memevents); + break; + case Qprof: + break; + } + +} + +static long +memread(Chan *c, void *va, long count, vlong offset) +{ + uchar *a; + int i; + Mevent *pe; + Mevents *evs; + Mprof *p; + + if(c->qid.type & QTDIR) + return devdirread(c, va, count, memdir, nelem(memdir), devgen); + + switch((ulong)c->qid.path) { + default: + error(Egreg); + case Qctl: + return 0; + case Qsum: + return mallocreadsummary(c, va, count, offset); + case Qevent: + evs = &memevents; + while(!isnonempty(evs)){ + evs->want = 1; + sleep(&evs->r, isnonempty, evs); + } + a = va; + do{ + if((count -= Evreclen) < 0) + break; + pe = &evs->events[evs->rd]; + mput4(a+0, 0); + mput4(a+4, pe->tag); + mput4(a+8, pe->koff); + mput4(a+12, pe->size); + a += Evreclen; + }while(++evs->rd != evs->wr); + return a-(uchar*)va; + case Qprof: + a = va; + for(i = offset/Profreclen; i < nelem(memprof.bucket); i++){ + p = &memprof.bucket[i]; + if((p->tag|p->na|p->busy) != 0){ + if((count -= Profreclen) < 0) + break; + mput4(a+0, p->tag); + mput4(a+4, p->na); + mput4(a+8, p->busy); + a += Profreclen; + } + } + return a-(uchar*)va; + } +} + +static long +memwrite(Chan *c, void *a, long n, vlong) +{ + Cmdbuf *cb; + + if(c->qid.type & QTDIR) + error(Eperm); + + switch((ulong)c->qid.path) { + default: + error(Egreg); + case Qctl: + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + if(cb->nf == 1 && strcmp(cb->f[0], "start") == 0){ + if(incref(&monitoring) == 1) + setmemprof(mprofmonitor); + }else if(cb->nf == 1 && strcmp(cb->f[0], "stop") == 0){ + if(decref(&monitoring) == 0) + setmemprof(nil); + }else + cmderror(cb, "unknown command"); + poperror(); + free(cb); + break; + } + return n; +} + +Dev memdevtab = { + '%', + "mem", + + devreset, + memprofinit, //devinit, + devshutdown, + memattach, + memwalk, + memstat, + memopen, + devcreate, + memclose, + memread, + devbread, + memwrite, + devbwrite, + devremove, + devwstat +}; diff -Nru /sys/src/9k/port/devmnt.c /sys/src/9k/port/devmnt.c --- /sys/src/9k/port/devmnt.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devmnt.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1203 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +/* + * References are managed as follows: + * The channel to the server - a network connection or pipe - has one + * reference for every Chan open on the server. The server channel has + * c->mux set to the Mnt used for muxing control to that server. Mnts + * have no reference count; they go away when c goes away. + * Each channel derived from the mount point has mchan set to c, + * and increfs/decrefs mchan to manage references on the server + * connection. + */ + +#define MAXRPC (IOHDRSZ+8192) + +struct Mntrpc +{ + Chan* c; /* Channel for whom we are working */ + Mntrpc* list; /* Free/pending list */ + Fcall request; /* Outgoing file system protocol message */ + Fcall reply; /* Incoming reply */ + Mnt* m; /* Mount device during rpc */ + Rendez r; /* Place to hang out */ + uchar* rpc; /* I/O Data buffer */ + uint rpclen; /* len of buffer */ + Block *b; /* reply blocks */ + char done; /* Rpc completed */ + uvlong stime; /* start time for mnt statistics */ + ulong reqlen; /* request length for mnt statistics */ + ulong replen; /* reply length for mnt statistics */ + Mntrpc* flushed; /* message this one flushes */ +}; + +enum +{ + TAGSHIFT = 5, /* ulong has to be 32 bits */ + TAGMASK = (1<>TAGSHIFT, +}; + +struct Mntalloc +{ + Lock; + Mnt* list; /* Mount devices in use */ + Mnt* mntfree; /* Free list */ + Mntrpc* rpcfree; + int nrpcfree; + int nrpcused; + uint id; + ulong tagmask[NMASK]; +}mntalloc; + +Mnt* mntchk(Chan*); +void mntdirfix(uchar*, Chan*); +Mntrpc* mntflushalloc(Mntrpc*, ulong); +void mntflushfree(Mnt*, Mntrpc*); +void mntfree(Mntrpc*); +void mntgate(Mnt*); +void mntpntfree(Mnt*); +void mntqrm(Mnt*, Mntrpc*); +Mntrpc* mntralloc(Chan*, ulong); +long mntrdwr(int, Chan*, void*, long, vlong); +int mntrpcread(Mnt*, Mntrpc*); +void mountio(Mnt*, Mntrpc*); +void mountmux(Mnt*, Mntrpc*); +void mountrpc(Mnt*, Mntrpc*); +int rpcattn(void*); +Chan* mntchan(void); + +char Esbadstat[] = "invalid directory entry received from server"; +char Enoversion[] = "version not established for mount channel"; + + +void (*mntstats)(int, Chan*, uvlong, ulong); + +static void +mntreset(void) +{ + mntalloc.id = 1; + mntalloc.tagmask[0] = 1; /* don't allow 0 as a tag */ + mntalloc.tagmask[NMASK-1] = 0x80000000UL; /* don't allow NOTAG */ + fmtinstall('F', fcallfmt); + fmtinstall('D', dirfmt); +/* We can't install %M since eipfmt does and is used in the kernel [sape] */ + + cinit(); +} + +/* + * Version is not multiplexed: message sent only once per connection. + */ +usize +mntversion(Chan *c, u32int msize, char *version, usize returnlen) +{ + Fcall f; + uchar *msg; + Mnt *mnt; + char *v; + long l, n; + usize k; + vlong oo; + char buf[128]; + + qlock(&c->umqlock); /* make sure no one else does this until we've established ourselves */ + if(waserror()){ + qunlock(&c->umqlock); + nexterror(); + } + + /* defaults */ + if(msize == 0) + msize = MAXRPC; + if(msize > c->iounit && c->iounit != 0) + msize = c->iounit; + v = version; + if(v == nil || v[0] == '\0') + v = VERSION9P; + + /* validity */ + if(msize < 0) + error("bad iounit in version call"); + if(strncmp(v, VERSION9P, strlen(VERSION9P)) != 0) + error("bad 9P version specification"); + + mnt = c->mux; + + if(mnt != nil){ + qunlock(&c->umqlock); + poperror(); + + strecpy(buf, buf+sizeof buf, mnt->version); + k = strlen(buf); + if(strncmp(buf, v, k) != 0){ + snprint(buf, sizeof buf, "incompatible 9P versions %s %s", mnt->version, v); + error(buf); + } + if(returnlen != 0){ + if(returnlen < k) + error(Eshort); + memmove(version, buf, k); + } + return k; + } + + f.type = Tversion; + f.tag = NOTAG; + f.msize = msize; + f.version = v; + msg = malloc(8192+IOHDRSZ); + if(msg == nil) + exhausted("version memory"); + if(waserror()){ + free(msg); + nexterror(); + } + k = convS2M(&f, msg, 8192+IOHDRSZ); + if(k == 0) + error("bad fversion conversion on send"); + + lock(c); + oo = c->offset; + c->offset += k; + unlock(c); + + l = c->dev->write(c, msg, k, oo); + + if(l < k){ + lock(c); + c->offset -= k - l; + unlock(c); + error("short write in fversion"); + } + + /* message sent; receive and decode reply */ + n = c->dev->read(c, msg, 8192+IOHDRSZ, c->offset); + if(n <= 0) + error("EOF receiving fversion reply"); + + lock(c); + c->offset += n; + unlock(c); + + l = convM2S(msg, n, &f); + if(l != n) + error("bad fversion conversion on reply"); + if(f.type != Rversion){ + if(f.type == Rerror) + error(f.ename); + error("unexpected reply type in fversion"); + } + if(f.msize > msize) + error("server tries to increase msize in fversion"); + if(f.msize<256 || f.msize>1024*1024) + error("nonsense value of msize in fversion"); + k = strlen(f.version); + if(strncmp(f.version, v, k) != 0) + error("bad 9P version returned from server"); + + /* now build Mnt associated with this connection */ + lock(&mntalloc); + mnt = mntalloc.mntfree; + if(mnt != nil) + mntalloc.mntfree = mnt->list; + else { + mnt = malloc(sizeof(Mnt)); + if(mnt == nil) { + unlock(&mntalloc); + exhausted("mount devices"); + } + } + mnt->list = mntalloc.list; + mntalloc.list = mnt; + mnt->version = nil; + kstrdup(&mnt->version, f.version); + mnt->id = mntalloc.id++; + mnt->q = qopen(10*MAXRPC, 0, nil, nil); + mnt->msize = f.msize; + unlock(&mntalloc); + + if(returnlen != 0){ + if(returnlen < k) + error(Eshort); + memmove(version, f.version, k); + } + + poperror(); /* msg */ + free(msg); + + lock(mnt); + mnt->queue = 0; + mnt->rip = 0; + + c->flag |= CMSG; + c->mux = mnt; + mnt->c = c; + unlock(mnt); + + poperror(); /* c */ + qunlock(&c->umqlock); + + return k; +} + +Chan* +mntauth(Chan *c, char *spec) +{ + Mnt *mnt; + Mntrpc *r; + + mnt = c->mux; + + if(mnt == nil){ + mntversion(c, MAXRPC, VERSION9P, 0); + mnt = c->mux; + if(mnt == nil) + error(Enoversion); + } + + c = mntchan(); + if(waserror()) { + /* Close must not be called since it will + * call mnt recursively + */ + chanfree(c); + nexterror(); + } + + r = mntralloc(0, mnt->msize); + + if(waserror()) { + mntfree(r); + nexterror(); + } + + r->request.type = Tauth; + r->request.afid = c->fid; + r->request.uname = up->user; + r->request.aname = spec; + mountrpc(mnt, r); + + c->qid = r->reply.aqid; + c->mchan = mnt->c; + incref(mnt->c); + c->mqid = c->qid; + c->mode = ORDWR; + + poperror(); /* r */ + mntfree(r); + + poperror(); /* c */ + + return c; + +} + +static Chan* +mntattach(char *muxattach) +{ + Mnt *mnt; + Chan *c; + Mntrpc *r; + struct bogus{ + Chan *chan; + Chan *authchan; + char *spec; + int flags; + }bogus; + + bogus = *((struct bogus *)muxattach); + c = bogus.chan; + + mnt = c->mux; + + if(mnt == nil){ + mntversion(c, 0, nil, 0); + mnt = c->mux; + if(mnt == nil) + error(Enoversion); + } + + c = mntchan(); + if(waserror()) { + /* Close must not be called since it will + * call mnt recursively + */ + chanfree(c); + nexterror(); + } + + r = mntralloc(0, mnt->msize); + + if(waserror()) { + mntfree(r); + nexterror(); + } + + r->request.type = Tattach; + r->request.fid = c->fid; + if(bogus.authchan == nil) + r->request.afid = NOFID; + else + r->request.afid = bogus.authchan->fid; + r->request.uname = up->user; + r->request.aname = bogus.spec; + mountrpc(mnt, r); + + c->qid = r->reply.qid; + c->mchan = mnt->c; + incref(mnt->c); + c->mqid = c->qid; + + poperror(); /* r */ + mntfree(r); + + poperror(); /* c */ + + if(bogus.flags & MCACHE) + c->flag |= CCACHE; + return c; +} + +Chan* +mntchan(void) +{ + Chan *c; + + c = devattach('M', 0); + lock(&mntalloc); + c->devno = mntalloc.id++; + unlock(&mntalloc); + + if(c->mchan) + panic("mntchan non-zero %#p", c->mchan); + return c; +} + +static Walkqid* +mntwalk(Chan *c, Chan *nc, char **name, int nname) +{ + int i, alloc; + Mnt *mnt; + Mntrpc *r; + Walkqid *wq; + + if(nc != nil) + print("mntwalk: nc != nil\n"); + if(nname > MAXWELEM) + error("devmnt: too many name elements"); + alloc = 0; + wq = smalloc(sizeof(Walkqid)+(nname-1)*sizeof(Qid)); + if(waserror()){ + if(alloc && wq->clone!=nil) + cclose(wq->clone); + free(wq); + return nil; + } + + alloc = 0; + mnt = mntchk(c); + r = mntralloc(c, mnt->msize); + if(nc == nil){ + nc = devclone(c); + /* + * Until the other side accepts this fid, + * we can't mntclose it. + * nc->dev remains nil for now. + */ + alloc = 1; + } + wq->clone = nc; + + if(waserror()) { + mntfree(r); + nexterror(); + } + r->request.type = Twalk; + r->request.fid = c->fid; + r->request.newfid = nc->fid; + r->request.nwname = nname; + memmove(r->request.wname, name, nname*sizeof(char*)); + + mountrpc(mnt, r); + + if(r->reply.nwqid > nname) + error("too many QIDs returned by walk"); + if(r->reply.nwqid < nname){ + if(alloc) + cclose(nc); + wq->clone = nil; + if(r->reply.nwqid == 0){ + free(wq); + wq = nil; + goto Return; + } + } + + /* move new fid onto mnt device and update its qid */ + if(wq->clone != nil){ + if(wq->clone != c){ + wq->clone->dev = c->dev; + //if(wq->clone->dev != nil) //XDYNX + // devtabincr(wq->clone->dev); + wq->clone->mchan = c->mchan; + incref(c->mchan); + } + if(r->reply.nwqid > 0) + wq->clone->qid = r->reply.wqid[r->reply.nwqid-1]; + } + wq->nqid = r->reply.nwqid; + for(i=0; inqid; i++) + wq->qid[i] = r->reply.wqid[i]; + + Return: + poperror(); + mntfree(r); + poperror(); + return wq; +} + +static long +mntstat(Chan *c, uchar *dp, long n) +{ + Mnt *mnt; + Mntrpc *r; + usize nstat; + + if(n < BIT16SZ) + error(Eshortstat); + mnt = mntchk(c); + r = mntralloc(c, mnt->msize); + if(waserror()) { + mntfree(r); + nexterror(); + } + r->request.type = Tstat; + r->request.fid = c->fid; + mountrpc(mnt, r); + + if(r->reply.nstat > n){ + nstat = BIT16SZ; + PBIT16(dp, r->reply.nstat-2); + }else{ + nstat = r->reply.nstat; + memmove(dp, r->reply.stat, nstat); + validstat(dp, nstat); + mntdirfix(dp, c); + } + poperror(); + mntfree(r); + + return nstat; +} + +static Chan* +mntopencreate(int type, Chan *c, char *name, int omode, int perm) +{ + Mnt *mnt; + Mntrpc *r; + + mnt = mntchk(c); + r = mntralloc(c, mnt->msize); + if(waserror()) { + mntfree(r); + nexterror(); + } + r->request.type = type; + r->request.fid = c->fid; + r->request.mode = omode; + if(type == Tcreate){ + r->request.perm = perm; + r->request.name = name; + } + mountrpc(mnt, r); + + c->qid = r->reply.qid; + c->offset = 0; + c->mode = openmode(omode); + c->iounit = r->reply.iounit; + if(c->iounit == 0 || c->iounit > mnt->msize-IOHDRSZ) + c->iounit = mnt->msize-IOHDRSZ; + c->flag |= COPEN; + poperror(); + mntfree(r); + + if(c->flag & CCACHE) + copen(c); + + return c; +} + +static Chan* +mntopen(Chan *c, int omode) +{ + return mntopencreate(Topen, c, nil, omode, 0); +} + +static void +mntcreate(Chan *c, char *name, int omode, int perm) +{ + mntopencreate(Tcreate, c, name, omode, perm); +} + +static void +mntclunk(Chan *c, int t) +{ + Mnt *mnt; + Mntrpc *r; + + mnt = mntchk(c); + r = mntralloc(c, mnt->msize); + if(waserror()){ + mntfree(r); + nexterror(); + } + + r->request.type = t; + r->request.fid = c->fid; + mountrpc(mnt, r); + mntfree(r); + poperror(); +} + +void +muxclose(Mnt *mnt) +{ + Mntrpc *q, *r; + + for(q = mnt->queue; q; q = r) { + r = q->list; + mntfree(q); + } + mnt->id = 0; + free(mnt->version); + mnt->version = nil; + mntpntfree(mnt); +} + +void +mntpntfree(Mnt *mnt) +{ + Mnt *f, **l; + Queue *q; + + lock(&mntalloc); + l = &mntalloc.list; + for(f = *l; f; f = f->list) { + if(f == mnt) { + *l = mnt->list; + break; + } + l = &f->list; + } + mnt->list = mntalloc.mntfree; + mntalloc.mntfree = mnt; + q = mnt->q; + unlock(&mntalloc); + + qfree(q); +} + +static void +mntclose(Chan *c) +{ + mntclunk(c, Tclunk); +} + +static void +mntremove(Chan *c) +{ + mntclunk(c, Tremove); +} + +static long +mntwstat(Chan *c, uchar *dp, long n) +{ + Mnt *mnt; + Mntrpc *r; + + mnt = mntchk(c); + r = mntralloc(c, mnt->msize); + if(waserror()) { + mntfree(r); + nexterror(); + } + r->request.type = Twstat; + r->request.fid = c->fid; + r->request.nstat = n; + r->request.stat = dp; + mountrpc(mnt, r); + poperror(); + mntfree(r); + return n; +} + +static long +mntread(Chan *c, void *buf, long n, vlong off) +{ + uchar *p, *e; + int nc, cache, isdir; + usize dirlen; + + isdir = 0; + cache = c->flag & CCACHE; + if(c->qid.type & QTDIR) { + cache = 0; + isdir = 1; + } + + p = buf; + if(cache) { + nc = cread(c, buf, n, off); + if(nc > 0) { + n -= nc; + if(n == 0) + return nc; + p += nc; + off += nc; + } + n = mntrdwr(Tread, c, p, n, off); + cupdate(c, p, n, off); + return n + nc; + } + + n = mntrdwr(Tread, c, buf, n, off); + if(isdir) { + for(e = &p[n]; p+BIT16SZ < e; p += dirlen){ + dirlen = BIT16SZ+GBIT16(p); + if(p+dirlen > e) + break; + validstat(p, dirlen); + mntdirfix(p, c); + } + if(p != e) + error(Esbadstat); + } + return n; +} + +static long +mntwrite(Chan *c, void *buf, long n, vlong off) +{ + return mntrdwr(Twrite, c, buf, n, off); +} + +long +mntrdwr(int type, Chan *c, void *buf, long n, vlong off) +{ + Mnt *mnt; + Mntrpc *r; + char *uba; + int cache; + ulong cnt, nr, nreq; + + mnt = mntchk(c); + uba = buf; + cnt = 0; + cache = c->flag & CCACHE; + if(c->qid.type & QTDIR) + cache = 0; + for(;;) { + r = mntralloc(c, mnt->msize); + if(waserror()) { + mntfree(r); + nexterror(); + } + r->request.type = type; + r->request.fid = c->fid; + r->request.offset = off; + r->request.data = uba; + nr = n; + if(nr > mnt->msize-IOHDRSZ) + nr = mnt->msize-IOHDRSZ; + r->request.count = nr; + mountrpc(mnt, r); + nreq = r->request.count; + nr = r->reply.count; + if(nr > nreq) + nr = nreq; + + if(type == Tread) + r->b = bl2mem((uchar*)uba, r->b, nr); + else if(cache) + cwrite(c, (uchar*)uba, nr, off); + + poperror(); + mntfree(r); + off += nr; + uba += nr; + cnt += nr; + n -= nr; + if(nr != nreq || n == 0 || up->nnote) + break; + } + return cnt; +} + +void +mountrpc(Mnt *mnt, Mntrpc *r) +{ + char *sn, *cn; + int t; + + r->reply.tag = 0; + r->reply.type = Tmax; /* can't ever be a valid message type */ + + mountio(mnt, r); + + t = r->reply.type; + switch(t) { + case Rerror: + error(r->reply.ename); + case Rflush: + error(Eintr); + default: + if(t == r->request.type+1) + break; + sn = "?"; + if(mnt->c->path != nil) + sn = mnt->c->path->s; + cn = "?"; + if(r->c != nil && r->c->path != nil) + cn = r->c->path->s; + print("mnt: proc %s %d: mismatch from %s %s rep %#p tag %d fid %d T%d R%d rp %d\n", + up->text, up->pid, sn, cn, + r, r->request.tag, r->request.fid, r->request.type, + r->reply.type, r->reply.tag); + error(Emountrpc); + } +} + +void +mountio(Mnt *mnt, Mntrpc *r) +{ + int n; + + while(waserror()) { + if(mnt->rip == up) + mntgate(mnt); + if(strcmp(up->errstr, Eintr) != 0){ + mntflushfree(mnt, r); + nexterror(); + } + r = mntflushalloc(r, mnt->msize); + } + + lock(mnt); + r->m = mnt; + r->list = mnt->queue; + mnt->queue = r; + unlock(mnt); + + /* Transmit a file system rpc */ + if(mnt->msize == 0) + panic("msize"); + n = convS2M(&r->request, r->rpc, mnt->msize); + if(n < 0) + panic("bad message type in mountio"); + if(mnt->c->dev->write(mnt->c, r->rpc, n, 0) != n) + error(Emountrpc); + r->stime = fastticks(nil); + r->reqlen = n; + + /* Gate readers onto the mount point one at a time */ + for(;;) { + lock(mnt); + if(mnt->rip == 0) + break; + unlock(mnt); + sleep(&r->r, rpcattn, r); + if(r->done){ + poperror(); + mntflushfree(mnt, r); + return; + } + } + mnt->rip = up; + unlock(mnt); + while(r->done == 0) { + if(mntrpcread(mnt, r) < 0) + error(Emountrpc); + mountmux(mnt, r); + } + mntgate(mnt); + poperror(); + mntflushfree(mnt, r); +} + +static int +doread(Mnt *mnt, int len) +{ + Block *b; + + while(qlen(mnt->q) < len){ + b = mnt->c->dev->bread(mnt->c, mnt->msize, 0); + if(b == nil) + return -1; + if(blocklen(b) == 0){ + freeblist(b); + return -1; + } + qaddlist(mnt->q, b); + } + return 0; +} + +int +mntrpcread(Mnt *mnt, Mntrpc *r) +{ + int i, t, len, hlen; + Block *b, **l, *nb; + + r->reply.type = 0; + r->reply.tag = 0; + + /* read at least length, type, and tag and pullup to a single block */ + if(doread(mnt, BIT32SZ+BIT8SZ+BIT16SZ) < 0) + return -1; + nb = pullupqueue(mnt->q, BIT32SZ+BIT8SZ+BIT16SZ); + + /* read in the rest of the message, avoid ridiculous (for now) message sizes */ + len = GBIT32(nb->rp); + if(len > mnt->msize){ + qdiscard(mnt->q, qlen(mnt->q)); + return -1; + } + if(doread(mnt, len) < 0) + return -1; + + /* pullup the header (i.e. everything except data) */ + t = nb->rp[BIT32SZ]; + switch(t){ + case Rread: + hlen = BIT32SZ+BIT8SZ+BIT16SZ+BIT32SZ; + break; + default: + hlen = len; + break; + } + nb = pullupqueue(mnt->q, hlen); + + if(convM2S(nb->rp, len, &r->reply) <= 0){ + /* bad message, dump it */ + print("mntrpcread: convM2S failed\n"); + qdiscard(mnt->q, len); + return -1; + } + + /* hang the data off of the fcall struct */ + l = &r->b; + *l = nil; + do { + b = qremove(mnt->q); + if(hlen > 0){ + b->rp += hlen; + len -= hlen; + hlen = 0; + } + i = BLEN(b); + if(i <= len){ + len -= i; + *l = b; + l = &(b->next); + } else { + /* split block and put unused bit back */ + nb = allocb(i-len); + memmove(nb->wp, b->rp+len, i-len); + b->wp = b->rp+len; + nb->wp += i-len; + qputback(mnt->q, nb); + *l = b; + return 0; + } + }while(len > 0); + + return 0; +} + +void +mntgate(Mnt *mnt) +{ + Mntrpc *q; + + lock(mnt); + mnt->rip = 0; + for(q = mnt->queue; q; q = q->list) { + if(q->done == 0) + if(wakeup(&q->r)) + break; + } + unlock(mnt); +} + +void +mountmux(Mnt *mnt, Mntrpc *r) +{ + Mntrpc **l, *q; + + lock(mnt); + l = &mnt->queue; + for(q = *l; q; q = q->list) { + /* look for a reply to a message */ + if(q->request.tag == r->reply.tag) { + *l = q->list; + if(q != r) { + /* + * Completed someone else. + * Trade pointers to receive buffer. + */ + q->reply = r->reply; + q->b = r->b; + r->b = nil; + } + q->done = 1; + if(mntstats != nil) + (*mntstats)(q->request.type, + mnt->c, q->stime, + q->reqlen + r->replen); + if(q != r) + wakeup(&q->r); + unlock(mnt); + return; + } + l = &q->list; + } + unlock(mnt); + print("unexpected reply tag %ud; type %d\n", r->reply.tag, r->reply.type); +} + +/* + * Create a new flush request and chain the previous + * requests from it + */ +Mntrpc* +mntflushalloc(Mntrpc *r, ulong iounit) +{ + Mntrpc *fr; + + fr = mntralloc(0, iounit); + + fr->request.type = Tflush; + if(r->request.type == Tflush) + fr->request.oldtag = r->request.oldtag; + else + fr->request.oldtag = r->request.tag; + fr->flushed = r; + + return fr; +} + +/* + * Free a chain of flushes. Remove each unanswered + * flush and the original message from the unanswered + * request queue. Mark the original message as done + * and if it hasn't been answered set the reply to to + * Rflush. + */ +void +mntflushfree(Mnt *mnt, Mntrpc *r) +{ + Mntrpc *fr; + + while(r){ + fr = r->flushed; + if(!r->done){ + r->reply.type = Rflush; + mntqrm(mnt, r); + } + if(fr) + mntfree(r); + r = fr; + } +} + +int +alloctag(void) +{ + int i, j; + ulong v; + + for(i = 0; i < NMASK; i++){ + v = mntalloc.tagmask[i]; + if(v == ~0UL) + continue; + for(j = 0; j < 1<>TAGSHIFT] &= ~(1<<(t&TAGMASK)); +} + +Mntrpc* +mntralloc(Chan *c, ulong msize) +{ + Mntrpc *new; + + lock(&mntalloc); + new = mntalloc.rpcfree; + if(new == nil){ + new = malloc(sizeof(Mntrpc)); + if(new == nil) { + unlock(&mntalloc); + exhausted("mount rpc header"); + } + /* + * The header is split from the data buffer as + * mountmux may swap the buffer with another header. + */ + new->rpc = mallocz(msize, 0); + if(new->rpc == nil){ + free(new); + unlock(&mntalloc); + exhausted("mount rpc buffer"); + } + new->rpclen = msize; + new->request.tag = alloctag(); + } + else { + mntalloc.rpcfree = new->list; + mntalloc.nrpcfree--; + if(new->rpclen < msize){ + free(new->rpc); + new->rpc = mallocz(msize, 0); + if(new->rpc == nil){ + free(new); + mntalloc.nrpcused--; + unlock(&mntalloc); + exhausted("mount rpc buffer"); + } + new->rpclen = msize; + } + } + mntalloc.nrpcused++; + unlock(&mntalloc); + new->c = c; + new->done = 0; + new->flushed = nil; + new->b = nil; + return new; +} + +void +mntfree(Mntrpc *r) +{ + if(r->b != nil) + freeblist(r->b); + lock(&mntalloc); + if(mntalloc.nrpcfree >= 10){ + free(r->rpc); + freetag(r->request.tag); + free(r); + } + else{ + r->list = mntalloc.rpcfree; + mntalloc.rpcfree = r; + mntalloc.nrpcfree++; + } + mntalloc.nrpcused--; + unlock(&mntalloc); +} + +void +mntqrm(Mnt *mnt, Mntrpc *r) +{ + Mntrpc **l, *f; + + lock(mnt); + r->done = 1; + + l = &mnt->queue; + for(f = *l; f; f = f->list) { + if(f == r) { + *l = r->list; + break; + } + l = &f->list; + } + unlock(mnt); +} + +Mnt* +mntchk(Chan *c) +{ + Mnt *mnt; + + /* This routine is mostly vestiges of prior lives; now it's just sanity checking */ + + if(c->mchan == nil) + panic("mntchk 1: nil mchan c %s\n", chanpath(c)); + + mnt = c->mchan->mux; + + if(mnt == nil) + print("mntchk 2: nil mux c %s c->mchan %s \n", chanpath(c), chanpath(c->mchan)); + + /* + * Was it closed and reused (was error(Eshutdown); now, it cannot happen) + */ + if(mnt->id == 0 || mnt->id >= c->devno) + panic("mntchk 3: can't happen"); + + return mnt; +} + +/* + * Rewrite channel type and dev for in-flight data to + * reflect local values. These entries are known to be + * the first two in the Dir encoding after the count. + */ +void +mntdirfix(uchar *dirbuf, Chan *c) +{ + uint r; + + r = c->dev->dc; + dirbuf += BIT16SZ; /* skip count */ + PBIT16(dirbuf, r); + dirbuf += BIT16SZ; + PBIT32(dirbuf, c->devno); +} + +int +rpcattn(void *v) +{ + Mntrpc *r; + + r = v; + return r->done || r->m->rip == 0; +} + +Dev mntdevtab = { + 'M', + "mnt", + + mntreset, + devinit, + devshutdown, + mntattach, + mntwalk, + mntstat, + mntopen, + mntcreate, + mntclose, + mntread, + devbread, + mntwrite, + devbwrite, + mntremove, + mntwstat, +}; diff -Nru /sys/src/9k/port/devpci.c /sys/src/9k/port/devpci.c --- /sys/src/9k/port/devpci.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devpci.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,261 @@ +/* + * access to PCI configuration space (devpnp.c without PNP) + * + * TODO + * - extend PCI raw access to configuration space (writes, byte/short access?) + * - implement PCI access to memory/io space/BIOS ROM + * - use c->aux instead of performing lookup on each read/write? + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" + +#define DPRINT if(0) print +#define XPRINT if(1) print + +enum { + Qtopdir = 0, + + Qpcidir, + Qpcictl, + Qpciraw, +}; + +#define TYPE(q) ((ulong)(q).path & 0x0F) +#define QID(c, t) (((c)<<4)|(t)) + +static Dirtab topdir[] = { + ".", { Qtopdir, 0, QTDIR }, 0, 0555, + "pci", { Qpcidir, 0, QTDIR }, 0, 0555, +}; + +extern Dev pcidevtab; + +static int +pcigen2(Chan *c, int t, int tbdf, Dir *dp) +{ + Qid q; + + q = (Qid){BUSBDF(tbdf)|t, 0, 0}; + switch(t) { + case Qpcictl: + snprint(up->genbuf, sizeof(up->genbuf), "%d.%d.%dctl", BUSBNO(tbdf), BUSDNO(tbdf), BUSFNO(tbdf)); + devdir(c, q, up->genbuf, 0, eve, 0444, dp); + return 1; + case Qpciraw: + snprint(up->genbuf, sizeof(up->genbuf), "%d.%d.%draw", BUSBNO(tbdf), BUSDNO(tbdf), BUSFNO(tbdf)); + devdir(c, q, up->genbuf, 128, eve, 0444, dp); + return 1; + } + return -1; +} + +static int +pcigen(Chan *c, char *, Dirtab*, int, int s, Dir *dp) +{ + Qid q; + Pcidev *p; + int tbdf; + + switch(TYPE(c->qid)){ + case Qtopdir: + if(s == DEVDOTDOT){ + q = (Qid){QID(0, Qtopdir), 0, QTDIR}; + snprint(up->genbuf, sizeof(up->genbuf), "#%C", pcidevtab.dc); + devdir(c, q, up->genbuf, 0, eve, 0555, dp); + return 1; + } + return devgen(c, nil, topdir, nelem(topdir), s, dp); + case Qpcidir: + if(s == DEVDOTDOT){ + q = (Qid){QID(0, Qtopdir), 0, QTDIR}; + snprint(up->genbuf, sizeof(up->genbuf), "#%C", pcidevtab.dc); + devdir(c, q, up->genbuf, 0, eve, 0555, dp); + return 1; + } + p = pcimatch(nil, 0, 0); + while(s >= 2 && p != nil) { + p = pcimatch(p, 0, 0); + s -= 2; + } + if(p == nil) + return -1; + return pcigen2(c, s+Qpcictl, p->tbdf, dp); + case Qpcictl: + case Qpciraw: + tbdf = MKBUS(BusPCI, 0, 0, 0)|BUSBDF((ulong)c->qid.path); + p = pcimatchtbdf(tbdf); + if(p == nil) + return -1; + return pcigen2(c, TYPE(c->qid), tbdf, dp); + default: + break; + } + return -1; +} + +static Chan* +pciattach(char *spec) +{ + return devattach(pcidevtab.dc, spec); +} + +Walkqid* +pciwalk(Chan* c, Chan *nc, char** name, int nname) +{ + return devwalk(c, nc, name, nname, nil, 0, pcigen); +} + +static long +pcistat(Chan* c, uchar* dp, long n) +{ + return devstat(c, dp, n, nil, 0, pcigen); +} + +static Chan* +pciopen(Chan *c, int omode) +{ + return devopen(c, omode, nil, 0, pcigen); +} + +static void +pciclose(Chan*) +{ +} + +static long +pciread(Chan *c, void *va, long n, vlong offset) +{ + ulong x; + Pcidev *p; + char buf[256], *ebuf, *w; + char *a = va; + int i, tbdf, r; + + switch(TYPE(c->qid)){ + case Qtopdir: + case Qpcidir: + return devdirread(c, a, n, nil, 0, pcigen); + case Qpcictl: + tbdf = MKBUS(BusPCI, 0, 0, 0)|BUSBDF((ulong)c->qid.path); + p = pcimatchtbdf(tbdf); + if(p == nil) + error(Egreg); + ebuf = buf+sizeof buf-1; /* -1 for newline */ + w = seprint(buf, ebuf, "%.2x.%.2x.%.2x %.4x/%.4x %3d", + p->ccrb, p->ccru, p->ccrp, p->vid, p->did, p->intl); + for(i=0; imem); i++){ + if(p->mem[i].size == 0) + continue; + w = seprint(w, ebuf, " %d:%.8lux %d", i, p->mem[i].bar, p->mem[i].size); + } + *w++ = '\n'; + *w = '\0'; + return readstr(offset, a, n, buf); + case Qpciraw: + tbdf = MKBUS(BusPCI, 0, 0, 0)|BUSBDF((ulong)c->qid.path); + p = pcimatchtbdf(tbdf); + if(p == nil) + error(Egreg); + if(offset > 256) + return 0; + if(n+offset > 256) + n = 256-offset; + r = offset; + if(!(r & 3) && n == 4){ + x = pcicfgr32(p, r); + PBIT32(a, x); + return 4; + } + if(!(r & 1) && n == 2){ + x = pcicfgr16(p, r); + PBIT16(a, x); + return 2; + } + for(i = 0; i < n; i++){ + x = pcicfgr8(p, r); + PBIT8(a, x); + a++; + r++; + } + return i; + default: + error(Egreg); + } + return n; +} + +static long +pciwrite(Chan *c, void *va, long n, vlong offset) +{ + char buf[256]; + Pcidev *p; + ulong x; + uchar *a; + int i, r, tbdf; + + if(n >= sizeof(buf)) + n = sizeof(buf)-1; + strncpy(buf, va, n); + buf[n] = 0; + + switch(TYPE(c->qid)){ + case Qpciraw: + tbdf = MKBUS(BusPCI, 0, 0, 0)|BUSBDF((ulong)c->qid.path); + p = pcimatchtbdf(tbdf); + if(p == nil) + error(Egreg); + if(offset > 256) + return 0; + if(n+offset > 256) + n = 256-offset; + a = va; + r = offset; + if(!(r & 3) && n == 4){ + x = GBIT32(a); + pcicfgw32(p, r, x); + return 4; + } + if(!(r & 1) && n == 2){ + x = GBIT16(a); + pcicfgw16(p, r, x); + return 2; + } + for(i = 0; i < n; i++){ + x = GBIT8(a); + pcicfgw8(p, r, x); + a++; + r++; + } + return i; + default: + error(Egreg); + } + return n; +} + + +Dev pcidevtab = { + '$', + "pci", + + devreset, + devinit, + devshutdown, + pciattach, + pciwalk, + pcistat, + pciopen, + devcreate, + pciclose, + pciread, + devbread, + pciwrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru /sys/src/9k/port/devpipe.c /sys/src/9k/port/devpipe.c --- /sys/src/9k/port/devpipe.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devpipe.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,384 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +typedef struct Pipe Pipe; +struct Pipe +{ + QLock; + Pipe *next; + int ref; + ulong path; + Queue *q[2]; + int qref[2]; +}; + +struct +{ + Lock; + ulong path; +} pipealloc; + +enum +{ + Qdir, + Qdata0, + Qdata1, + + PIPEQSIZE = 256*KiB, +}; + +Dirtab pipedir[] = +{ + ".", {Qdir,0,QTDIR}, 0, DMDIR|0500, + "data", {Qdata0}, 0, 0600, + "data1", {Qdata1}, 0, 0600, +}; +#define NPIPEDIR 3 + +#define PIPETYPE(x) (((unsigned)x)&0x1f) +#define PIPEID(x) ((((unsigned)x))>>5) +#define PIPEQID(i, t) ((((unsigned)i)<<5)|(t)) + +/* + * create a pipe, no streams are created until an open + */ +static Chan* +pipeattach(char *spec) +{ + Pipe *p; + Chan *c; + + c = devattach('|', spec); + p = malloc(sizeof(Pipe)); + if(p == 0) + exhausted("memory"); + p->ref = 1; + + p->q[0] = qopen(PIPEQSIZE, 0, 0, 0); + if(p->q[0] == 0){ + free(p); + exhausted("memory"); + } + p->q[1] = qopen(PIPEQSIZE, 0, 0, 0); + if(p->q[1] == 0){ + free(p->q[0]); + free(p); + exhausted("memory"); + } + + lock(&pipealloc); + p->path = ++pipealloc.path; + unlock(&pipealloc); + + mkqid(&c->qid, PIPEQID(2*p->path, Qdir), 0, QTDIR); + c->aux = p; + c->devno = 0; + return c; +} + +static int +pipegen(Chan *c, char*, Dirtab *tab, int ntab, int i, Dir *dp) +{ + Qid q; + int len; + Pipe *p; + + if(i == DEVDOTDOT){ + devdir(c, c->qid, "#|", 0, eve, DMDIR|0555, dp); + return 1; + } + i++; /* skip . */ + if(tab==0 || i>=ntab) + return -1; + + tab += i; + p = c->aux; + switch((ulong)tab->qid.path){ + case Qdata0: + len = qlen(p->q[0]); + break; + case Qdata1: + len = qlen(p->q[1]); + break; + default: + len = tab->length; + break; + } + mkqid(&q, PIPEQID(PIPEID(c->qid.path), tab->qid.path), 0, QTFILE); + devdir(c, q, tab->name, len, eve, tab->perm, dp); + return 1; +} + + +static Walkqid* +pipewalk(Chan *c, Chan *nc, char **name, int nname) +{ + Walkqid *wq; + Pipe *p; + + wq = devwalk(c, nc, name, nname, pipedir, NPIPEDIR, pipegen); + if(wq != nil && wq->clone != nil && wq->clone != c){ + p = c->aux; + qlock(p); + p->ref++; + if(c->flag & COPEN){ + print("channel open in pipewalk\n"); + switch(PIPETYPE(c->qid.path)){ + case Qdata0: + p->qref[0]++; + break; + case Qdata1: + p->qref[1]++; + break; + } + } + qunlock(p); + } + return wq; +} + +static long +pipestat(Chan *c, uchar *db, long n) +{ + Pipe *p; + Dir dir; + + p = c->aux; + + switch(PIPETYPE(c->qid.path)){ + case Qdir: + devdir(c, c->qid, ".", 0, eve, DMDIR|0555, &dir); + break; + case Qdata0: + devdir(c, c->qid, "data", qlen(p->q[0]), eve, 0600, &dir); + break; + case Qdata1: + devdir(c, c->qid, "data1", qlen(p->q[1]), eve, 0600, &dir); + break; + default: + panic("pipestat"); + } + n = convD2M(&dir, db, n); + if(n < BIT16SZ) + error(Eshortstat); + return n; +} + +/* + * if the stream doesn't exist, create it + */ +static Chan* +pipeopen(Chan *c, int omode) +{ + Pipe *p; + + if(c->qid.type & QTDIR){ + if(omode != OREAD) + error(Ebadarg); + c->mode = omode; + c->flag |= COPEN; + c->offset = 0; + return c; + } + + p = c->aux; + qlock(p); + switch(PIPETYPE(c->qid.path)){ + case Qdata0: + p->qref[0]++; + break; + case Qdata1: + p->qref[1]++; + break; + } + qunlock(p); + + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + c->iounit = qiomaxatomic; + return c; +} + +static void +pipeclose(Chan *c) +{ + Pipe *p; + + p = c->aux; + qlock(p); + + if(c->flag & COPEN){ + /* + * closing either side hangs up the stream + */ + switch(PIPETYPE(c->qid.path)){ + case Qdata0: + p->qref[0]--; + if(p->qref[0] == 0){ + qhangup(p->q[1], 0); + qclose(p->q[0]); + } + break; + case Qdata1: + p->qref[1]--; + if(p->qref[1] == 0){ + qhangup(p->q[0], 0); + qclose(p->q[1]); + } + break; + } + } + + + /* + * if both sides are closed, they are reusable + */ + if(p->qref[0] == 0 && p->qref[1] == 0){ + qreopen(p->q[0]); + qreopen(p->q[1]); + } + + /* + * free the structure on last close + */ + p->ref--; + if(p->ref == 0){ + qunlock(p); + free(p->q[0]); + free(p->q[1]); + free(p); + } else + qunlock(p); +} + +static long +piperead(Chan *c, void *va, long n, vlong) +{ + Pipe *p; + + p = c->aux; + + switch(PIPETYPE(c->qid.path)){ + case Qdir: + return devdirread(c, va, n, pipedir, NPIPEDIR, pipegen); + case Qdata0: + return qread(p->q[0], va, n); + case Qdata1: + return qread(p->q[1], va, n); + default: + panic("piperead"); + } + return -1; /* not reached */ +} + +static Block* +pipebread(Chan *c, long n, vlong offset) +{ + Pipe *p; + + p = c->aux; + + switch(PIPETYPE(c->qid.path)){ + case Qdata0: + return qbread(p->q[0], n); + case Qdata1: + return qbread(p->q[1], n); + } + + return devbread(c, n, offset); +} + +/* + * a write to a closed pipe causes a note to be sent to + * the process. + */ +static long +pipewrite(Chan *c, void *va, long n, vlong) +{ + Pipe *p; + + if(!islo()) + print("pipewrite hi %#p\n", getcallerpc(&c)); + if(waserror()) { + /* avoid notes when pipe is a mounted queue */ + if((c->flag & CMSG) == 0) + postnote(up, 1, "sys: write on closed pipe", NUser); + nexterror(); + } + + p = c->aux; + + switch(PIPETYPE(c->qid.path)){ + case Qdata0: + n = qwrite(p->q[1], va, n); + break; + + case Qdata1: + n = qwrite(p->q[0], va, n); + break; + + default: + panic("pipewrite"); + } + + poperror(); + return n; +} + +static long +pipebwrite(Chan *c, Block *bp, vlong) +{ + long n; + Pipe *p; + + if(waserror()) { + /* avoid notes when pipe is a mounted queue */ + if((c->flag & CMSG) == 0) + postnote(up, 1, "sys: write on closed pipe", NUser); + nexterror(); + } + + p = c->aux; + switch(PIPETYPE(c->qid.path)){ + case Qdata0: + n = qbwrite(p->q[1], bp); + break; + + case Qdata1: + n = qbwrite(p->q[0], bp); + break; + + default: + n = 0; + panic("pipebwrite"); + } + + poperror(); + return n; +} + +Dev pipedevtab = { + '|', + "pipe", + + devreset, + devinit, + devshutdown, + pipeattach, + pipewalk, + pipestat, + pipeopen, + devcreate, + pipeclose, + piperead, + pipebread, + pipewrite, + pipebwrite, + devremove, + devwstat, +}; diff -Nru /sys/src/9k/port/devprobe.c /sys/src/9k/port/devprobe.c --- /sys/src/9k/port/devprobe.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devprobe.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,400 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "probe.h" + +enum { + Qdir, + Qctl, + Qdata, +}; + +enum { + ProbeEntry = 1, + ProbeExit +}; + +/* fix me make this programmable */ +enum { + defaultlogsize = 1024, + printsize = 64, +}; + +typedef struct Probelog Probelog; +struct Probelog { + uvlong ticks; + /* yeah, waste a whole int on something stupid but ... */ + int info; + ulong pc; + /* these are different depending on type */ + long dat[4]; +}; + +static Rendez probesleep; +static QLock probeslk; +static Probe *probes; +static Lock loglk; +static Probelog *probelog = nil; +/* probe indices. These are just unsigned longs. You mask them + * to get an index. This makes fifo empty/full etc. trivial. + */ +static ulong pw = 0, pr = 0; +static int probesactive = 0; +static unsigned long logsize = defaultlogsize, logmask = defaultlogsize - 1; + +static char eventname[] = { + [ProbeEntry] = 'E', + [ProbeExit] = 'X' +}; + +static Dirtab probedir[]={ + ".", {Qdir, 0, QTDIR}, 0, DMDIR|0555, + "probectl", {Qctl}, 0, 0664, + "probe", {Qdata}, 0, 0440, +}; + +char hex[] = { + '0', + '1', + '2', + '3', + '4', + '5', + '6', + '7', + '8', + '9', + 'A', + 'B', + 'C', + 'D', + 'E', + 'F', +}; + +/* big-endian ... */ +void +hex32(ulong l, char *c) +{ + int i; + for(i = 8; i; i--){ + c[i-1] = hex[l&0xf]; + l >>= 4; + } +} + +void +hex64(uvlong l, char *c) +{ + hex32(l>>32, c); + hex32(l, &c[8]); +} +static int +lognonempty(void *) +{ + return pw - pr; +} + +static int +logfull(void) +{ + return (pw - pr) >= logsize; +} + +static ulong +idx(ulong f) +{ + return f & logmask; +} + +/* can return NULL, meaning, no record for you */ +static struct Probelog * +newpl(void) +{ + ulong index; + + if (logfull()){ + wakeup(&probesleep); + return nil; + } + + ilock(&loglk); + index = pw++; + iunlock(&loglk); + + return &probelog[idx(index)]; + +} + +static void +probeentry(Probe *p) +{ + struct Probelog *pl; +//print("probeentry %p p %p func %p argp %p\n", &p, p, p->func, p->argp); + pl = newpl(); + if (! pl) + return; + cycles(&pl->ticks); + pl->pc = (ulong)p->func; + pl->dat[0] = p->argp[0]; + pl->dat[1] = p->argp[1]; + pl->dat[2] = p->argp[2]; + pl->dat[3] = p->argp[3]; + pl->info = ProbeEntry; +} + +static void +probeexit(Probe *p) +{ +//print("probeexit %p p %p func %p argp %p\n", &p, p, p->func, p->argp); + struct Probelog *pl; + pl = newpl(); + if (! pl) + return; + cycles(&pl->ticks); + pl->pc = (ulong)p->func; + pl->dat[0] = p->rval; + pl->info = ProbeExit; +} + +static Chan* +probeattach(char *spec) +{ + return devattach('+', spec); +} + +static Walkqid* +probewalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, probedir, nelem(probedir), devgen); +} + +static long +probestat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, probedir, nelem(probedir), devgen); +} + +static Chan* +probeopen(Chan *c, int omode) +{ + /* if there is no probelog, allocate one. Open always fails + * if the basic alloc fails. You can resize it later. + */ + if (! probelog) + probelog = malloc(sizeof(*probelog)*logsize); + /* I guess malloc doesn't toss an error */ + if (! probelog) + error("probelog malloc failed"); + + c = devopen(c, omode, probedir, nelem(probedir), devgen); + return c; +} + +static void +probeclose(Chan *) +{ +} + +static long +proberead(Chan *c, void *a, long n, vlong offset) +{ + char *buf; + char *cp = a; + struct Probelog *pl; + Probe *p; + int i; + static QLock gate; + if(c->qid.type == QTDIR) + return devdirread(c, a, n, probedir, nelem(probedir), devgen); + switch((ulong)c->qid.path){ + default: + error("proberead: bad qid"); + case Qctl: + buf = malloc(READSTR); + i = 0; + qlock(&probeslk); + i += snprint(buf + i, READSTR - i, "logsize %lud\n", logsize); + for(p = probes; p != nil; p = p->next) + i += snprint(buf + i, READSTR - i, "probe %p new %s\n", + p->func, p->name); + + for(p = probes; p != nil; p = p->next) + if (p->enabled) + i += snprint(buf + i, READSTR - i, "probe %s on\n", + p->name); + i += snprint(buf + i, READSTR - i, "#probehits %lud, in queue %lud\n", + pw, pw-pr); + snprint(buf + i, READSTR - i, "#probelog %p\n", probelog); + qunlock(&probeslk); + n = readstr(offset, a, n, buf); + free(buf); + break; + case Qdata: + qlock(&gate); + if(waserror()){ + qunlock(&gate); + nexterror(); + } + while(!lognonempty(nil)) + tsleep(&probesleep, lognonempty, nil, 5000); + i = 0; + while(lognonempty((void *)0)){ + int j; + pl = probelog + idx(pr); + + if ((i + printsize) >= n) + break; + /* simple format */ + cp[0] = eventname[pl->info]; + cp ++; + *cp++ = ' '; + hex32(pl->pc, cp); + cp[8] = ' '; + cp += 9; + hex64(pl->ticks, cp); + cp[16] = ' '; + cp += 17; + for(j = 0; j < 4; j++){ + hex32(pl->dat[j], cp); + cp[8] = ' '; + cp += 9; + } + /* adjust for extra skip above */ + cp--; + *cp++ = '\n'; + pr++; + i += printsize; + } + poperror(); + qunlock(&gate); + n = i; + break; + } + return n; +} + +static long +probewrite(Chan *c, void *a, long n, vlong) +{ + char *tok[5]; + char *ep, *s = nil; + Probe *p, **pp; + int ntok; + + qlock(&probeslk); + if(waserror()){ + qunlock(&probeslk); + if(s != nil) free(s); + nexterror(); + } + switch((ulong)c->qid.path){ + default: + error("proberead: bad qid"); + case Qctl: + s = malloc(n + 1); + memmove(s, a, n); + s[n] = 0; + ntok = tokenize(s, tok, nelem(tok)); + if(!strcmp(tok[0], "probe")){ /* 'probe' ktextaddr 'on'|'off'|'mk'|'del' [name] */ + if(ntok < 3) + error("devprobe: usage: 'probe' [ktextaddr|name] 'on'|'off'|'mk'|'del' [name]"); + for(pp = &probes; *pp != nil; pp = &(*pp)->next) + if(!strcmp(tok[1], (*pp)->name)) + break; + p = *pp; + if(!strcmp(tok[2], "new")){ + ulong addr; + void *func; + addr = strtoul(tok[1], &ep, 0); + func = (void*)addr; + if(*ep) + error("devprobe: address not in recognized format"); + // if(addr < ((ulong) start) || addr > ((ulong) end)) + // error("devprobe: address out of bounds"); + if(p != nil) + error("devprobe: %#p already has probe"); + p = mkprobe(func, probeentry, probeexit); + p->next = probes; + if(ntok < 4) + snprint(p->name, sizeof p->name, "%p", func); + else + strncpy(p->name, tok[3], sizeof p->name); + probes = p; + } else if(!strcmp(tok[2], "on")){ + if(p == nil) + error("devprobe: probe not found"); + if(!p->enabled) + probeinstall(p); +print("probeinstall in devprobe\n"); + probesactive++; + } else if(!strcmp(tok[2], "off")){ + if(p == nil) + error("devprobe: probe not found"); + if(p->enabled) + probeuninstall(p); + probesactive--; + } else if(!strcmp(tok[2], "del")){ + if(p == nil) + error("devprobe: probe not found"); + if(p->enabled) + probeuninstall(p); + probesactive--; + *pp = p->next; + freeprobe(p); + } else if(!strcmp(tok[2], "mv")){ + if(p == nil) + error("devprobe: probe not found"); + if(ntok < 4) + error("devprobe: rename without new name?"); + strncpy(p->name, tok[3], sizeof p->name); + } + } else if(!strcmp(tok[0], "size")){ + int l, size; + struct Probelog *newprobelog; + l = strtoul(tok[1], &ep, 0); + if(*ep) + error("devprobe: size not in recognized format"); + size = 1 << l; + /* sort of foolish. Alloc new probe first, then free old. */ + /* and too bad if there are unread probes */ + newprobelog = malloc(sizeof(*newprobelog)*size); + /* does malloc throw waserror? I don't know */ + free(probelog); + probelog = newprobelog; + logsize = size; + pr = pw = 0; + } else { + error("devprobe: usage: 'probe' [ktextaddr|name] 'on'|'off'|'mk'|'del' [name] or: 'size' buffersize (power of 2)"); + } + free(s); + break; + } + poperror(); + qunlock(&probeslk); + return n; +} + +Dev probedevtab = { + '+', + "probe", + devreset, + devinit, + devshutdown, + probeattach, + probewalk, + probestat, + probeopen, + devcreate, + probeclose, + proberead, + devbread, + probewrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru /sys/src/9k/port/devproc.c /sys/src/9k/port/devproc.c --- /sys/src/9k/port/devproc.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devproc.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1604 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/edf.h" +#include "tos.h" +#include +#include "ureg.h" + +enum +{ + Qdir, + Qtrace, + Qargs, + Qctl, + Qfd, + Qfpregs, + Qkregs, + Qmem, + Qnote, + Qnoteid, + Qnotepg, + Qns, + Qproc, + Qregs, + Qsegment, + Qstatus, + Qtext, + Qwait, + Qprofile, + Qsyscall, +}; + +enum +{ + CMclose, + CMclosefiles, + CMfixedpri, + CMhang, + CMkill, + CMnohang, + CMnoswap, + CMpri, + CMprivate, + CMprofile, + CMstart, + CMstartstop, + CMstartsyscall, + CMstop, + CMwaitstop, + CMwired, + CMtrace, + /* real time */ + CMperiod, + CMdeadline, + CMcost, + CMsporadic, + CMdeadlinenotes, + CMadmit, + CMextra, + CMexpel, + CMevent, +}; + +enum{ + Nevents = 0x4000, + Emask = Nevents - 1, +}; + +#define STATSIZE (2*KNAMELEN+12+9*12) +/* + * Status, fd, and ns are left fully readable (0444) because of their use in debugging, + * particularly on shared servers. + * Arguably, ns and fd shouldn't be readable; if you'd prefer, change them to 0000 + */ +Dirtab procdir[] = +{ + "args", {Qargs}, 0, 0660, + "ctl", {Qctl}, 0, 0000, + "fd", {Qfd}, 0, 0444, + "fpregs", {Qfpregs}, 0, 0000, + "kregs", {Qkregs}, sizeof(Ureg), 0400, + "mem", {Qmem}, 0, 0000, + "note", {Qnote}, 0, 0000, + "noteid", {Qnoteid}, 0, 0664, + "notepg", {Qnotepg}, 0, 0000, + "ns", {Qns}, 0, 0444, + "proc", {Qproc}, 0, 0400, + "regs", {Qregs}, sizeof(Ureg), 0000, + "segment", {Qsegment}, 0, 0444, + "status", {Qstatus}, STATSIZE, 0444, + "text", {Qtext}, 0, 0000, + "wait", {Qwait}, 0, 0400, + "profile", {Qprofile}, 0, 0400, + "syscall", {Qsyscall}, 0, 0400, +}; + +static +Cmdtab proccmd[] = { + CMclose, "close", 2, + CMclosefiles, "closefiles", 1, + CMfixedpri, "fixedpri", 2, + CMhang, "hang", 1, + CMnohang, "nohang", 1, + CMnoswap, "noswap", 1, + CMkill, "kill", 1, + CMpri, "pri", 2, + CMprivate, "private", 1, + CMprofile, "profile", 1, + CMstart, "start", 1, + CMstartstop, "startstop", 1, + CMstartsyscall, "startsyscall", 1, + CMstop, "stop", 1, + CMwaitstop, "waitstop", 1, + CMwired, "wired", 2, + CMtrace, "trace", 0, + CMperiod, "period", 2, + CMdeadline, "deadline", 2, + CMcost, "cost", 2, + CMsporadic, "sporadic", 1, + CMdeadlinenotes, "deadlinenotes", 1, + CMadmit, "admit", 1, + CMextra, "extra", 1, + CMexpel, "expel", 1, + CMevent, "event", 1, +}; + +/* Segment type from portdat.h */ +static char *sname[]={ "Text", "Data", "Bss", "Stack", "Shared", "Phys", }; + +/* + * Qids are, in path: + * 4 bits of file type (qids above) + * 23 bits of process slot number + 1 + * in vers, + * 32 bits of pid, for consistency checking + * If notepg, c->pgrpid.path is pgrp slot, .vers is noteid. + */ +#define QSHIFT 5 /* location in qid of proc slot # */ + +#define QID(q) ((((ulong)(q).path)&0x0000001F)>>0) +#define SLOT(q) (((((ulong)(q).path)&0x07FFFFFE0)>>QSHIFT)-1) +#define PID(q) ((q).vers) +#define NOTEID(q) ((q).vers) + +static void procctlreq(Proc*, char*, int); +static int procctlmemio(Proc*, uintptr, int, void*, int); +static Chan* proctext(Chan*, Proc*); +static int procstopped(void*); +static void mntscan(Mntwalk*, Proc*); + +static Traceevent *tevents; +static Lock tlock; +static int topens; +static int tproduced, tconsumed; + +static void +profclock(Ureg *ur, Timer *) +{ + Tos *tos; + + if(up == nil || up->state != Running) + return; + + /* user profiling clock */ + if(userureg(ur)){ + tos = (Tos*)(USTKTOP-sizeof(Tos)); + tos->clock += TK2MS(1); + segclock(userpc(ur)); + } +} + +static int +procgen(Chan *c, char *name, Dirtab *tab, int, int s, Dir *dp) +{ + Qid qid; + Proc *p; + char *ename; + Segment *q; + int pid; + ulong path, perm, len; + + if(s == DEVDOTDOT){ + mkqid(&qid, Qdir, 0, QTDIR); + devdir(c, qid, "#p", 0, eve, 0555, dp); + return 1; + } + + if(c->qid.path == Qdir){ + if(s == 0){ + strcpy(up->genbuf, "trace"); + mkqid(&qid, Qtrace, -1, QTFILE); + devdir(c, qid, up->genbuf, 0, eve, 0444, dp); + return 1; + } + + if(name != nil){ + /* ignore s and use name to find pid */ + pid = strtol(name, &ename, 10); + if(pid<=0 || ename[0]!='\0') + return -1; + s = psindex(pid); + if(s < 0) + return -1; + } + else if(--s >= procalloc.nproc) + return -1; + + if((p = psincref(s)) == nil || (pid = p->pid) == 0) + return 0; + sprint(up->genbuf, "%d", pid); + /* + * String comparison is done in devwalk so + * name must match its formatted pid. + */ + if(name != nil && strcmp(name, up->genbuf) != 0) + return -1; + mkqid(&qid, (s+1)<genbuf, 0, p->user, DMDIR|0555, dp); + psdecref(p); + return 1; + } + if(c->qid.path == Qtrace){ + strcpy(up->genbuf, "trace"); + mkqid(&qid, Qtrace, -1, QTFILE); + devdir(c, qid, up->genbuf, 0, eve, 0444, dp); + return 1; + } + if(s >= nelem(procdir)) + return -1; + if(tab) + panic("procgen"); + + tab = &procdir[s]; + path = c->qid.path&~(((1<qid))) == nil) + return -1; + perm = tab->perm; + if(perm == 0) + perm = p->procmode; + else /* just copy read bits */ + perm |= p->procmode & 0444; + + len = tab->length; + switch(QID(c->qid)) { + case Qwait: + len = p->nwait; /* incorrect size, but >0 means there's something to read */ + break; + case Qprofile: + q = p->seg[TSEG]; + if(q && q->profile) { + len = (q->top-q->base)>>LRESPROF; + len *= sizeof(*q->profile); + } + break; + } + + mkqid(&qid, path|tab->qid.path, c->qid.vers, QTFILE); + devdir(c, qid, tab->name, len, p->user, perm, dp); + psdecref(p); + return 1; +} + +static void +_proctrace(Proc* p, Tevent etype, vlong ts, vlong) +{ + Traceevent *te; + + if (p->trace == 0 || topens == 0 || + tproduced - tconsumed >= Nevents) + return; + + te = &tevents[tproduced&Emask]; + te->pid = p->pid; + te->etype = etype; + if (ts == 0) + te->time = todget(nil); + else + te->time = ts; + tproduced++; +} + +static void +procinit(void) +{ + if(procalloc.nproc >= (1<<(16-QSHIFT))-1) + print("warning: too many procs for devproc\n"); + addclock0link((void (*)(void))profclock, 113); /* Relative prime to HZ */ +} + +static Chan* +procattach(char *spec) +{ + return devattach('p', spec); +} + +static Walkqid* +procwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, 0, 0, procgen); +} + +static long +procstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, 0, 0, procgen); +} + +/* + * none can't read or write state on other + * processes. This is to contain access of + * servers running as none should they be + * subverted by, for example, a stack attack. + */ +static void +nonone(Proc *p) +{ + if(p == up) + return; + if(strcmp(up->user, "none") != 0) + return; + if(isevegroup()) + return; + error(Eperm); +} + +static Chan* +procopen(Chan *c, int omode) +{ + Proc *p; + Pgrp *pg; + Chan *tc; + int pid; + + if(c->qid.type & QTDIR) + return devopen(c, omode, 0, 0, procgen); + + if(QID(c->qid) == Qtrace){ + if (omode != OREAD) + error(Eperm); + lock(&tlock); + if (waserror()){ + unlock(&tlock); + nexterror(); + } + if (topens > 0) + error("already open"); + topens++; + if (tevents == nil){ + tevents = (Traceevent*)malloc(sizeof(Traceevent) * Nevents); + if(tevents == nil) + error(Enomem); + tproduced = tconsumed = 0; + } + proctrace = _proctrace; + poperror(); + unlock(&tlock); + + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + return c; + } + + if((p = psincref(SLOT(c->qid))) == nil) + error(Eprocdied); + qlock(&p->debug); + if(waserror()){ + qunlock(&p->debug); + psdecref(p); + nexterror(); + } + pid = PID(c->qid); + if(p->pid != pid) + error(Eprocdied); + + omode = openmode(omode); + + switch(QID(c->qid)){ + case Qtext: + if(omode != OREAD) + error(Eperm); + tc = proctext(c, p); + tc->offset = 0; + poperror(); + qunlock(&p->debug); + psdecref(p); + return tc; + + case Qproc: + case Qkregs: + case Qsegment: + case Qprofile: + case Qfd: + if(omode != OREAD) + error(Eperm); + break; + + case Qnote: + if(p->privatemem) + error(Eperm); + break; + + case Qmem: + case Qctl: + if(p->privatemem) + error(Eperm); + nonone(p); + break; + + case Qargs: + case Qnoteid: + case Qstatus: + case Qwait: + case Qregs: + case Qfpregs: + nonone(p); + break; + + case Qsyscall: + nonone(p); + if(p->syscallq != nil) + error(Einuse); + c->aux = qopen(1024, 1, nil, nil); + p->syscallq = c->aux; + break; + + case Qns: + if(omode != OREAD) + error(Eperm); + c->aux = malloc(sizeof(Mntwalk)); + break; + + case Qnotepg: + nonone(p); + pg = p->pgrp; + if(pg == nil) + error(Eprocdied); + if(omode!=OWRITE || pg->pgrpid == 1) + error(Eperm); + c->pgrpid.path = pg->pgrpid+1; + c->pgrpid.vers = p->noteid; + break; + + default: + poperror(); + qunlock(&p->debug); + psdecref(p); + pprint("procopen %#llux\n", c->qid.path); + error(Egreg); + } + + /* Affix pid to qid */ + if(p->state != Dead) + c->qid.vers = p->pid; + + /* make sure the process slot didn't get reallocated while we were playing */ + coherence(); + if(p->pid != pid) + error(Eprocdied); + + tc = devopen(c, omode, 0, 0, procgen); + poperror(); + qunlock(&p->debug); + psdecref(p); + + return tc; +} + +static long +procwstat(Chan *c, uchar *db, long n) +{ + Proc *p; + Dir *d; + + if(c->qid.type & QTDIR) + error(Eperm); + + if(QID(c->qid) == Qtrace) + return devwstat(c, db, n); + + if((p = psincref(SLOT(c->qid))) == nil) + error(Eprocdied); + nonone(p); + d = nil; + qlock(&p->debug); + if(waserror()){ + qunlock(&p->debug); + psdecref(p); + free(d); + nexterror(); + } + + if(p->pid != PID(c->qid)) + error(Eprocdied); + + if(strcmp(up->user, p->user) != 0 && strcmp(up->user, eve) != 0) + error(Eperm); + + d = smalloc(sizeof(Dir)+n); + n = convM2D(db, n, &d[0], (char*)&d[1]); + if(n == 0) + error(Eshortstat); + if(!emptystr(d->uid) && strcmp(d->uid, p->user) != 0){ + if(strcmp(up->user, eve) != 0) + error(Eperm); + else + kstrdup(&p->user, d->uid); + } + if(d->mode != ~0UL) + p->procmode = d->mode&0777; + + poperror(); + qunlock(&p->debug); + psdecref(p); + free(d); + + return n; +} + + +static long +procoffset(long offset, char *va, int *np) +{ + if(offset > 0) { + offset -= *np; + if(offset < 0) { + memmove(va, va+*np+offset, -offset); + *np = -offset; + } + else + *np = 0; + } + return offset; +} + +static int +procqidwidth(Chan *c) +{ + char buf[32]; + + return sprint(buf, "%lud", c->qid.vers); +} + +int +procfdprint(Chan *c, int fd, int w, char *s, int ns) +{ + int n; + + if(w == 0) + w = procqidwidth(c); + n = snprint(s, ns, "%3d %.2s %C %4ud (%.16llux %*lud %.2ux) %5ld %8lld %s\n", + fd, + &"r w rw"[(c->mode&3)<<1], + c->dev->dc, c->devno, + c->qid.path, w, c->qid.vers, c->qid.type, + c->iounit, c->offset, c->path->s); + return n; +} + +static int +procfds(Proc *p, char *va, int count, long offset) +{ + Fgrp *f; + Chan *c; + char buf[256]; + int n, i, w, ww; + char *a; + + /* print to buf to avoid holding fgrp lock while writing to user space */ + if(count > sizeof buf) + count = sizeof buf; + a = buf; + + qlock(&p->debug); + f = p->fgrp; + if(f == nil){ + qunlock(&p->debug); + return 0; + } + lock(f); + if(waserror()){ + unlock(f); + qunlock(&p->debug); + nexterror(); + } + + n = readstr(0, a, count, p->dot->path->s); + n += snprint(a+n, count-n, "\n"); + offset = procoffset(offset, a, &n); + /* compute width of qid.path */ + w = 0; + for(i = 0; i <= f->maxfd; i++) { + c = f->fd[i]; + if(c == nil) + continue; + ww = procqidwidth(c); + if(ww > w) + w = ww; + } + for(i = 0; i <= f->maxfd; i++) { + c = f->fd[i]; + if(c == nil) + continue; + n += procfdprint(c, i, w, a+n, count-n); + offset = procoffset(offset, a, &n); + } + poperror(); + unlock(f); + qunlock(&p->debug); + + /* copy result to user space, now that locks are released */ + memmove(va, buf, n); + + return n; +} + +static void +procclose(Chan * c) +{ + Proc *p; + + switch(QID(c->qid)){ + case Qtrace: + lock(&tlock); + if(topens > 0) + topens--; + if(topens == 0) + proctrace = nil; + unlock(&tlock); + break; + case Qns: + if(c->aux != nil) + free(c->aux); + break; + case Qsyscall: + if((p = psincref(SLOT(c->qid))) != nil){ + qlock(&p->debug); + if(p->pid == PID(c->qid)) + p->syscallq = nil; + qunlock(&p->debug); + psdecref(p); + } + if(c->aux != nil) + qfree(c->aux); + break; + } +} + +static void +int2flag(int flag, char *s) +{ + if(flag == 0){ + *s = '\0'; + return; + } + *s++ = '-'; + if(flag & MAFTER) + *s++ = 'a'; + if(flag & MBEFORE) + *s++ = 'b'; + if(flag & MCREATE) + *s++ = 'c'; + if(flag & MCACHE) + *s++ = 'C'; + *s = '\0'; +} + +static int +procargs(Proc *p, char *buf, int nbuf) +{ + int j, k, m; + char *a; + int n; + + a = p->args; + if(p->setargs){ + snprint(buf, nbuf, "%s [%s]", p->text, p->args); + return strlen(buf); + } + n = p->nargs; + for(j = 0; j < nbuf - 1; j += m){ + if(n <= 0) + break; + if(j != 0) + buf[j++] = ' '; + m = snprint(buf+j, nbuf-j, "%q", a); + k = strlen(a) + 1; + a += k; + n -= k; + } + return j; +} + +static int +eventsavailable(void *) +{ + return tproduced > tconsumed; +} + +static long +procread(Chan *c, void *va, long n, vlong off) +{ + Proc *p; + long l, r; + Waitq *wq; + Ureg kur; + uchar *rptr; + Mntwalk *mw; + Segment *sg, *s; + int i, j, navail, ne, pid, rsize; + char flag[10], *sps, *srv, statbuf[NSEG*64]; + uintptr offset; + uintmem paddr, plimit, psize; + uvlong u; + + if(c->qid.type & QTDIR) + return devdirread(c, va, n, 0, 0, procgen); + + offset = off; + + if(QID(c->qid) == Qtrace){ + if(!eventsavailable(nil)) + return 0; + + rptr = va; + navail = tproduced - tconsumed; + if(navail > n / sizeof(Traceevent)) + navail = n / sizeof(Traceevent); + while(navail > 0) { + if((tconsumed & Emask) + navail > Nevents) + ne = Nevents - (tconsumed & Emask); + else + ne = navail; + i = ne * sizeof(Traceevent); + memmove(rptr, &tevents[tconsumed & Emask], i); + + tconsumed += ne; + rptr += i; + navail -= ne; + } + return rptr - (uchar*)va; + } + + if((p = psincref(SLOT(c->qid))) == nil) + error(Eprocdied); + if(p->pid != PID(c->qid)){ + psdecref(p); + error(Eprocdied); + } + + switch(QID(c->qid)){ + default: + psdecref(p); + break; + case Qargs: + qlock(&p->debug); + j = procargs(p, up->genbuf, sizeof up->genbuf); + qunlock(&p->debug); + psdecref(p); + if(offset >= j) + return 0; + if(offset+n > j) + n = j-offset; + memmove(va, &up->genbuf[offset], n); + return n; + + case Qsyscall: + return qread(c->aux, va, n); + + case Qmem: + if(!iskaddr(offset) + || (offset >= USTKTOP-USTKSIZE && offset < USTKTOP)){ + r = procctlmemio(p, offset, n, va, 1); + psdecref(p); + return r; + } + + if(!iseve()){ + psdecref(p); + error(Eperm); + } + + /* validate kernel addresses */ + if(offset < PTR2UINT(end)) { + if(offset+n > PTR2UINT(end)) + n = PTR2UINT(end) - offset; + memmove(va, UINT2PTR(offset), n); + psdecref(p); + return n; + } + paddr = PADDR(UINT2PTR(offset)); + if(!isrmapped(&rmapram, paddr, &psize)){ + psdecref(p); + error(Ebadarg); + } + plimit = paddr + psize; + /* plimit-1 because plimit might be zero (address space top) */ + if(paddr+n >= plimit-1) + n = plimit - paddr; + memmove(va, UINT2PTR(offset), n); + psdecref(p); + return n; + + case Qprofile: + s = p->seg[TSEG]; + if(s == 0 || s->profile == 0) + error("profile is off"); + i = (s->top-s->base)>>LRESPROF; + i *= sizeof(*s->profile); + if(offset >= i){ + psdecref(p); + return 0; + } + if(offset+n > i) + n = i - offset; + memmove(va, ((char*)s->profile)+offset, n); + psdecref(p); + return n; + + case Qnote: + qlock(&p->debug); + if(waserror()){ + qunlock(&p->debug); + psdecref(p); + nexterror(); + } + if(p->pid != PID(c->qid)) + error(Eprocdied); + if(n < 1) /* must accept at least the '\0' */ + error(Etoosmall); + if(p->nnote == 0) + n = 0; + else { + i = strlen(p->note[0].msg) + 1; + if(i > n) + i = n; + rptr = va; + memmove(rptr, p->note[0].msg, i); + rptr[i-1] = '\0'; + p->nnote--; + memmove(p->note, p->note+1, p->nnote*sizeof(Note)); + n = i; + } + if(p->nnote == 0) + p->notepending = 0; + poperror(); + qunlock(&p->debug); + psdecref(p); + return n; + + case Qproc: + if(offset >= sizeof(Proc)){ + psdecref(p); + return 0; + } + if(offset+n > sizeof(Proc)) + n = sizeof(Proc) - offset; + memmove(va, ((char*)p)+offset, n); + psdecref(p); + return n; + + case Qregs: + rptr = (uchar*)p->dbgreg; + rsize = sizeof(Ureg); + regread: + if(rptr == 0){ + psdecref(p); + error(Enoreg); + } + if(offset >= rsize){ + psdecref(p); + return 0; + } + if(offset+n > rsize) + n = rsize - offset; + memmove(va, rptr+offset, n); + psdecref(p); + return n; + + case Qkregs: + memset(&kur, 0, sizeof(Ureg)); + setkernur(&kur, p); + rptr = (uchar*)&kur; + rsize = sizeof(Ureg); + goto regread; + + case Qfpregs: + r = fpudevprocio(p, va, n, offset, 0); + psdecref(p); + return r; + + case Qstatus: + if(offset >= STATSIZE){ + psdecref(p); + return 0; + } + if(offset+n > STATSIZE) + n = STATSIZE - offset; + + sps = p->psstate; + if(sps == 0) + sps = statename[p->state]; + memset(statbuf, ' ', sizeof statbuf); + sprint(statbuf, "%-*.*s%-*.*s%-12.11s", + KNAMELEN, KNAMELEN-1, p->text, + KNAMELEN, KNAMELEN-1, p->user, + sps); + j = 2*KNAMELEN + 12; + + for(i = 0; i < 6; i++) { + l = p->time[i]; + if(i == TReal) + l = sys->ticks - l; + l = TK2MS(l); + readnum(0, statbuf+j+NUMSIZE*i, NUMSIZE, l, NUMSIZE); + } + /* ignore stack, which is mostly non-existent */ + u = 0; + for(i=1; iseg[i]; + if(s) + u += s->top - s->base; + } + readnum(0, statbuf+j+NUMSIZE*6, NUMSIZE, u>>10, NUMSIZE); + readnum(0, statbuf+j+NUMSIZE*7, NUMSIZE, p->basepri, NUMSIZE); + readnum(0, statbuf+j+NUMSIZE*8, NUMSIZE, p->priority, NUMSIZE); + memmove(va, statbuf+offset, n); + psdecref(p); + return n; + + case Qsegment: + j = 0; + for(i = 0; i < NSEG; i++) { + sg = p->seg[i]; + if(sg == 0) + continue; + j += sprint(statbuf+j, "%-6s %c%c %p %p %4d\n", + sname[sg->type&SG_TYPE], + sg->type&SG_RONLY ? 'R' : ' ', + sg->profile ? 'P' : ' ', + sg->base, sg->top, sg->ref); + } + psdecref(p); + if(offset >= j) + return 0; + if(offset+n > j) + n = j-offset; + if(n == 0 && offset == 0) + exhausted("segments"); + memmove(va, &statbuf[offset], n); + return n; + + case Qwait: + if(!canqlock(&p->qwaitr)){ + psdecref(p); + error(Einuse); + } + + if(waserror()) { + qunlock(&p->qwaitr); + psdecref(p); + nexterror(); + } + + lock(&p->exl); + if(up == p && p->nchild == 0 && p->waitq == 0) { + unlock(&p->exl); + error(Enochild); + } + pid = p->pid; + while(p->waitq == 0) { + unlock(&p->exl); + sleep(&p->waitr, haswaitq, p); + if(p->pid != pid) + error(Eprocdied); + lock(&p->exl); + } + wq = p->waitq; + p->waitq = wq->next; + p->nwait--; + unlock(&p->exl); + + poperror(); + qunlock(&p->qwaitr); + psdecref(p); + n = snprint(va, n, "%d %lud %lud %lud %q", + wq->w.pid, + wq->w.time[TUser], wq->w.time[TSys], wq->w.time[TReal], + wq->w.msg); + free(wq); + return n; + + case Qns: + qlock(&p->debug); + if(waserror()){ + qunlock(&p->debug); + psdecref(p); + nexterror(); + } + if(p->pgrp == nil || p->pid != PID(c->qid)) + error(Eprocdied); + mw = c->aux; + if(mw->cddone){ + poperror(); + qunlock(&p->debug); + psdecref(p); + return 0; + } + mntscan(mw, p); + if(mw->mh == 0){ + mw->cddone = 1; + i = snprint(va, n, "cd %s\n", p->dot->path->s); + poperror(); + qunlock(&p->debug); + psdecref(p); + return i; + } + int2flag(mw->cm->mflag, flag); + if(strcmp(mw->cm->to->path->s, "#M") == 0){ + srv = srvname(mw->cm->to->mchan); + i = snprint(va, n, "mount %s %s %s %s\n", flag, + srv==nil? mw->cm->to->mchan->path->s : srv, + mw->mh->from->path->s, mw->cm->spec? mw->cm->spec : ""); + free(srv); + }else + i = snprint(va, n, "bind %s %s %s\n", flag, + mw->cm->to->path->s, mw->mh->from->path->s); + poperror(); + qunlock(&p->debug); + psdecref(p); + return i; + + case Qnoteid: + r = readnum(offset, va, n, p->noteid, NUMSIZE); + psdecref(p); + return r; + case Qfd: + r = procfds(p, va, n, offset); + psdecref(p); + return r; + } + error(Egreg); + return 0; /* not reached */ +} + +static void +mntscan(Mntwalk *mw, Proc *p) +{ + Pgrp *pg; + Mount *t; + Mhead *f; + int best, i, last, nxt; + + pg = p->pgrp; + rlock(&pg->ns); + + nxt = 0; + best = (int)(~0U>>1); /* largest 2's complement int */ + + last = 0; + if(mw->mh) + last = mw->cm->mountid; + + for(i = 0; i < MNTHASH; i++) { + for(f = pg->mnthash[i]; f; f = f->hash) { + for(t = f->mount; t; t = t->next) { + if(mw->mh == 0 || + (t->mountid > last && t->mountid < best)) { + mw->cm = t; + mw->mh = f; + best = mw->cm->mountid; + nxt = 1; + } + } + } + } + if(nxt == 0) + mw->mh = 0; + + runlock(&pg->ns); +} + +static long +procwrite(Chan *c, void *va, long n, vlong off) +{ + Proc *p, *t; + int i, id, l; + char *args, buf[ERRMAX]; + uintptr offset; + + if(c->qid.type & QTDIR) + error(Eisdir); + + /* Use the remembered noteid in the channel rather + * than the process pgrpid + */ + if(QID(c->qid) == Qnotepg) { + pgrpnote(NOTEID(c->pgrpid), va, n, NUser); + return n; + } + + if((p = psincref(SLOT(c->qid))) == nil) + error(Eprocdied); + + qlock(&p->debug); + if(waserror()){ + qunlock(&p->debug); + psdecref(p); + nexterror(); + } + if(p->pid != PID(c->qid)) + error(Eprocdied); + + offset = off; + + switch(QID(c->qid)){ + case Qargs: + if(n == 0) + error(Eshort); + if(n >= ERRMAX) + error(Etoobig); + memmove(buf, va, n); + args = malloc(n+1); + if(args == nil) + error(Enomem); + memmove(args, buf, n); + l = n; + if(args[l-1] != 0) + args[l++] = 0; + free(p->args); + p->nargs = l; + p->args = args; + p->setargs = 1; + break; + + case Qmem: + if(p->state != Stopped) + error(Ebadctl); + + n = procctlmemio(p, offset, n, va, 0); + break; + + case Qregs: + if(offset >= sizeof(Ureg)) + n = 0; + else if(offset+n > sizeof(Ureg)) + n = sizeof(Ureg) - offset; + if(p->dbgreg == 0) + error(Enoreg); + setregisters(p->dbgreg, (char*)(p->dbgreg)+offset, va, n); + break; + + case Qfpregs: + n = fpudevprocio(p, va, n, offset, 1); + break; + + case Qctl: + procctlreq(p, va, n); + break; + + case Qnote: + if(p->kp) + error(Eperm); + if(n >= ERRMAX-1) + error(Etoobig); + memmove(buf, va, n); + buf[n] = 0; + if(!postnote(p, 0, buf, NUser)) + error("note not posted"); + break; + case Qnoteid: + id = atoi(va); + if(id == p->pid) { + p->noteid = id; + break; + } + for(i = 0; (t = psincref(i)) != nil; i++){ + if(t->state == Dead || t->noteid != id){ + psdecref(t); + continue; + } + if(strcmp(p->user, t->user) != 0){ + psdecref(t); + error(Eperm); + } + psdecref(t); + p->noteid = id; + break; + } + if(p->noteid != id) + error(Ebadarg); + break; + default: + poperror(); + qunlock(&p->debug); + psdecref(p); + pprint("unknown qid %#llux in procwrite\n", c->qid.path); + error(Egreg); + } + poperror(); + qunlock(&p->debug); + psdecref(p); + return n; +} + +Dev procdevtab = { + 'p', + "proc", + + devreset, + procinit, + devshutdown, + procattach, + procwalk, + procstat, + procopen, + devcreate, + procclose, + procread, + devbread, + procwrite, + devbwrite, + devremove, + procwstat, +}; + +static Chan* +proctext(Chan *c, Proc *p) +{ + Chan *tc; + Image *i; + Segment *s; + + s = p->seg[TSEG]; + if(s == 0) + error(Enonexist); + if(p->state==Dead) + error(Eprocdied); + + lock(s); + i = s->image; + if(i == 0) { + unlock(s); + error(Eprocdied); + } + unlock(s); + + lock(i); + if(waserror()) { + unlock(i); + nexterror(); + } + + tc = i->c; + if(tc == 0) + error(Eprocdied); + + if(incref(tc) == 1 || (tc->flag&COPEN) == 0 || tc->mode!=OREAD) { + cclose(tc); + error(Eprocdied); + } + + if(p->pid != PID(c->qid)) + error(Eprocdied); + + poperror(); + unlock(i); + + return tc; +} + +void +procstopwait(Proc *p, int ctl) +{ + int pid; + + if(p->pdbg) + error(Einuse); + if(procstopped(p) || p->state == Broken) + return; + + if(ctl != 0) + p->procctl = ctl; + p->pdbg = up; + pid = p->pid; + qunlock(&p->debug); + up->psstate = "Stopwait"; + if(waserror()) { + p->pdbg = 0; + qlock(&p->debug); + nexterror(); + } + sleep(&up->sleep, procstopped, p); + poperror(); + qlock(&p->debug); + if(p->pid != pid) + error(Eprocdied); +} + +static void +procctlcloseone(Proc *p, Fgrp *f, int fd) +{ + Chan *c; + + c = f->fd[fd]; + if(c == nil) + return; + f->fd[fd] = nil; + unlock(f); + qunlock(&p->debug); + cclose(c); + qlock(&p->debug); + lock(f); +} + +void +procctlclosefiles(Proc *p, int all, int fd) +{ + int i; + Fgrp *f; + + f = p->fgrp; + if(f == nil) + error(Eprocdied); + + lock(f); + incref(f); + if(all) + for(i = 0; i < f->maxfd; i++) + procctlcloseone(p, f, i); + else + procctlcloseone(p, f, fd); + unlock(f); + closefgrp(f); +} + +static char * +parsetime(vlong *rt, char *s) +{ + uvlong ticks; + ulong l; + char *e, *p; + static int p10[] = {100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1}; + + if (s == nil) + return("missing value"); + ticks=strtoul(s, &e, 10); + if (*e == '.'){ + p = e+1; + l = strtoul(p, &e, 10); + if(e-p > nelem(p10)) + return "too many digits after decimal point"; + if(e-p == 0) + return "ill-formed number"; + l *= p10[e-p-1]; + }else + l = 0; + if (*e == '\0' || strcmp(e, "s") == 0){ + ticks = 1000000000 * ticks + l; + }else if (strcmp(e, "ms") == 0){ + ticks = 1000000 * ticks + l/1000; + }else if (strcmp(e, "µs") == 0 || strcmp(e, "us") == 0){ + ticks = 1000 * ticks + l/1000000; + }else if (strcmp(e, "ns") != 0) + return "unrecognized unit"; + *rt = ticks; + return nil; +} + +static void +procctlreq(Proc *p, char *va, int n) +{ + Segment *s; + int npc, pri; + Cmdbuf *cb; + Cmdtab *ct; + vlong time; + char *e; + void (*pt)(Proc*, int, vlong, vlong); + + if(p->kp) /* no ctl requests to kprocs */ + error(Eperm); + + cb = parsecmd(va, n); + if(waserror()){ + free(cb); + nexterror(); + } + + ct = lookupcmd(cb, proccmd, nelem(proccmd)); + + switch(ct->index){ + case CMclose: + procctlclosefiles(p, 0, atoi(cb->f[1])); + break; + case CMclosefiles: + procctlclosefiles(p, 1, 0); + break; + case CMhang: + p->hang = 1; + break; + case CMkill: + prockill(p, Proc_exitme, "sys: killed"); + break; + case CMnohang: + p->hang = 0; + break; + case CMnoswap: + /* obsolete */ + break; + case CMpri: + pri = atoi(cb->f[1]); + if(pri > PriNormal && !iseve()) + error(Eperm); + procpriority(p, pri, 0); + break; + case CMfixedpri: + pri = atoi(cb->f[1]); + if(pri > PriNormal && !iseve()) + error(Eperm); + procpriority(p, pri, 1); + break; + case CMprivate: + p->privatemem = 1; + break; + case CMprofile: + s = p->seg[TSEG]; + if(s == 0 || (s->type&SG_TYPE) != SG_TEXT) + error(Ebadctl); + if(s->profile != 0) + free(s->profile); + npc = (s->top-s->base)>>LRESPROF; + s->profile = malloc(npc*sizeof(*s->profile)); + if(s->profile == 0) + error(Enomem); + break; + case CMstart: + if(p->state != Stopped) + error(Ebadctl); + ready(p); + break; + case CMstartstop: + if(p->state != Stopped) + error(Ebadctl); + p->procctl = Proc_traceme; + ready(p); + procstopwait(p, Proc_traceme); + break; + case CMstartsyscall: + if(p->state != Stopped) + error(Ebadctl); + p->procctl = Proc_tracesyscall; + ready(p); + procstopwait(p, Proc_tracesyscall); + break; + case CMstop: + procstopwait(p, Proc_stopme); + break; + case CMwaitstop: + procstopwait(p, 0); + break; + case CMwired: + procwired(p, atoi(cb->f[1])); + break; + case CMtrace: + switch(cb->nf){ + case 1: + p->trace ^= 1; + break; + case 2: + p->trace = (atoi(cb->f[1]) != 0); + break; + default: + error("args"); + } + break; + /* real time */ + case CMperiod: + if(p->edf == nil) + edfinit(p); + if(e=parsetime(&time, cb->f[1])) /* time in ns */ + error(e); + edfstop(p); + p->edf->T = time/1000; /* Edf times are µs */ + break; + case CMdeadline: + if(p->edf == nil) + edfinit(p); + if(e=parsetime(&time, cb->f[1])) + error(e); + edfstop(p); + p->edf->D = time/1000; + break; + case CMcost: + if(p->edf == nil) + edfinit(p); + if(e=parsetime(&time, cb->f[1])) + error(e); + edfstop(p); + p->edf->C = time/1000; + break; + case CMsporadic: + if(p->edf == nil) + edfinit(p); + p->edf->flags |= Sporadic; + break; + case CMdeadlinenotes: + if(p->edf == nil) + edfinit(p); + p->edf->flags |= Sendnotes; + break; + case CMadmit: + if(p->edf == 0) + error("edf params"); + if(e = edfadmit(p)) + error(e); + break; + case CMextra: + if(p->edf == nil) + edfinit(p); + p->edf->flags |= Extratime; + break; + case CMexpel: + if(p->edf) + edfstop(p); + break; + case CMevent: + pt = proctrace; + if(up->trace && pt) + pt(up, SUser, 0, 0); + break; + } + + poperror(); + free(cb); +} + +static int +procstopped(void *a) +{ + Proc *p = a; + return p->state == Stopped; +} + +static int +procctlmemio(Proc *p, uintptr offset, int n, void *va, int read) +{ + KMap *k; + Page *pg; + Segment *s; + uintptr l, pgsize; /* hmmmm */ + uchar *b; + + for(;;) { + s = seg(p, offset, rlock); + if(s == 0) + error(Ebadarg); + + if(offset+n >= s->top) + n = s->top-offset; + + if(!read && (s->type&SG_TYPE) == SG_TEXT) + s = txt2data(p, s); + + if(fixfault(s, offset, read, 0) == 0) + break; + } + pg = segva2page(s, offset); + if(pg == nil) + panic("procctlmemio"); + + pgsize = pagesize(pg); + l = pgsize - (offset&(pgsize-1)); + if(n > l) + n = l; + + k = kmap(pg); + if(waserror()) { + kunmap(k); + nexterror(); + } + b = VA(k); + b += offset&(pgsize-1); + if(read == 1) + memmove(va, b, n); /* This can fault */ + else + memmove(b, va, n); + poperror(); + kunmap(k); + + /* Ensure the process sees text page changes */ + if(s->flushme) + mmucachectl(pg, PG_TXTFLUSH); + + if(read == 0) + p->newtlb = 1; + + return n; +} diff -Nru /sys/src/9k/port/devptrace.c /sys/src/9k/port/devptrace.c --- /sys/src/9k/port/devptrace.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devptrace.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,302 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "netif.h" + +#include + + +enum { + Qdir, + Qctl, + Qdata, +}; + +enum { + CMsize, + CMtrace, +}; + +static Dirtab tracedir[]= +{ + ".", {Qdir, 0, QTDIR}, 0, DMDIR|0555, + "ptracectl", {Qctl}, 0, 0664, + "ptrace", {Qdata}, 0, 0440, +}; + +static Lock tlk; +static int topens; +static int tproduced, tconsumed, ntevents, ntmask; +static uchar *tevents; + +static Chan* +traceattach(char *spec) +{ + return devattach(L'σ', spec); +} + +static Walkqid* +tracewalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, tracedir, nelem(tracedir), devgen); +} + +static long +tracestat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, tracedir, nelem(tracedir), devgen); +} + +static void +_ptrace(Proc *p, int etype, vlong ts, vlong arg) +{ + int i; + uchar *te; + + if (p->trace == 0 || topens == 0) + return; + + lock(&tlk); + if (p->trace == 0 || topens == 0 || tproduced - tconsumed >= ntevents){ + unlock(&tlk); + return; + } + if(ts == 0) + ts = todget(nil); + i = (tproduced&ntmask) * PTsize; + te = &tevents[i]; + PBIT32(te, (int)p->pid); + te += BIT32SZ; + PBIT32(te, etype); + te += BIT32SZ; + PBIT32(te, m->machno); + te += BIT32SZ; + PBIT64(te, ts); + te += BIT64SZ; + PBIT64(te, arg); + tproduced++; + if(etype == SDead) + p->trace = 0; + unlock(&tlk); +} + + +static Chan* +traceopen(Chan *c, int omode) +{ + int q; + + q = c->qid.path; + switch(q){ + case Qdata: + lock(&tlk); + if (waserror()){ + unlock(&tlk); + nexterror(); + } + if(topens > 0) + error(Einuse); + if(ntevents == 0) + error("must set trace size first"); + if(up->trace != 0) + error("a traced process can't open the trace device"); + tproduced = tconsumed = 0; + proctrace = _ptrace; + topens++; + poperror(); + unlock(&tlk); + break; + } + c = devopen(c, omode, tracedir, nelem(tracedir), devgen); + return c; +} + +static void +traceclose(Chan *c) +{ + int q; + + q = c->qid.path; + switch(q){ + case Qdata: + if(c->flag & COPEN){ + lock(&tlk); + topens--; + tproduced = tconsumed = 0; + proctrace = nil; + unlock(&tlk); + } + } +} + +static long +traceread(Chan *c, void *a, long n, vlong offset) +{ + int q, xopens, xevents, xprod, xcons, navail, ne, i; + char *s, *e; + + q = c->qid.path; + switch(q){ + case Qdir: + return devdirread(c, a, n, tracedir, nelem(tracedir), devgen); + break; + case Qctl: + e = up->genbuf + sizeof up->genbuf; + lock(&tlk); + xopens = topens; + xevents = ntevents; + xprod = tproduced; + xcons = tconsumed; + unlock(&tlk); + s = seprint(up->genbuf, e, "opens %d\n", xopens); + s = seprint(s, e, "size %d\n", xevents); + s = seprint(s, e, "produced %d\n", xprod); + seprint(s, e, "consumed %d\n", xcons); + return readstr(offset, a, n, up->genbuf); + break; + case Qdata: + /* no locks! */ + navail = tproduced - tconsumed; + if(navail <= 0) + return 0; + if(navail > n / PTsize) + navail = n / PTsize; + s = a; + e = a; + while(navail > 0) { + if((tconsumed & ntmask) + navail > ntevents) + ne = ntevents - (tconsumed & ntmask); + else + ne = navail; + i = ne * PTsize; + memmove(e, &tevents[(tconsumed & ntmask)*PTsize], i); + + tconsumed += ne; + e += i; + navail -= ne; + } + return e - s; + break; + default: + error(Egreg); + } + error("not yet implemented"); + return -1; +} + +static Cmdtab proccmd[] = +{ + CMsize, "size", 2, + CMtrace, "trace", 0, +}; + +static long +tracewrite(Chan *c, void *a, long n, vlong) +{ + int i, q, sz, msk, pid; + Cmdbuf *cb; + Cmdtab *ct; + uchar *nt; + Proc *p; + + q = c->qid.path; + switch(q){ + case Qctl: + break; + default: + error(Egreg); + } + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + ct = lookupcmd(cb, proccmd, nelem(proccmd)); + + switch(ct->index){ + case CMsize: + sz = atoi(cb->f[1]); + if(sz == 0){ + lock(&tlk); + ntevents = 0; + ntmask = 0; + unlock(&tlk); + } + msk = sz-1; + if((sz&msk) != 0) + error("wrong size. use a power of two."); + if(sz > 512*1024) + error("size is too large"); + lock(&tlk); + if(waserror()){ + unlock(&tlk); + nexterror(); + } + if(sz > ntevents){ + nt = realloc(tevents, PTsize * sz); + if(nt == nil) + error("not enough memory"); + tevents = nt; + ntevents = sz; + ntmask = msk; + } + tproduced = 0; + tconsumed = 0; + poperror(); + unlock(&tlk); + break; + case CMtrace: + if(cb->nf != 2 && cb->nf != 3) + error(Ebadctl); + pid = atoi(cb->f[1]); + i = psindex(pid); + if(i < 0) + error(Eprocdied); + p = psincref(i); + if(p == nil) + error(Eprocdied); + qlock(&p->debug); + if(waserror()){ + qunlock(&p->debug); + psdecref(p); + nexterror(); + } + if(p->pid != pid) + error(Eprocdied); + if(cb->nf == 2) + p->trace ^= p->trace; + else + p->trace = (atoi(cb->f[2]) != 0); + poperror(); + qunlock(&p->debug); + psdecref(p); + break; + } + + poperror(); + free(cb); + return n; +} + +Dev ptracedevtab = { + L'σ', + "ptrace", + devreset, + devinit, + devshutdown, + traceattach, + tracewalk, + tracestat, + traceopen, + devcreate, + traceclose, + traceread, + devbread, + tracewrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru /sys/src/9k/port/devroot.c /sys/src/9k/port/devroot.c --- /sys/src/9k/port/devroot.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devroot.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,254 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +enum +{ + Qdir = 0, + Qboot = 0x1000, + + Nrootfiles = 32, + Nbootfiles = 32, +}; + +typedef struct Dirlist Dirlist; +struct Dirlist +{ + uint base; + Dirtab *dir; + uchar **data; + int ndir; + int mdir; +}; + +static Dirtab rootdir[Nrootfiles] = { + "#/", {Qdir, 0, QTDIR}, 0, DMDIR|0555, + "boot", {Qboot, 0, QTDIR}, 0, DMDIR|0555, +}; +static uchar *rootdata[Nrootfiles]; +static Dirlist rootlist = +{ + 0, + rootdir, + rootdata, + 2, + Nrootfiles +}; + +static Dirtab bootdir[Nbootfiles] = { + "boot", {Qboot, 0, QTDIR}, 0, DMDIR|0555, +}; +static uchar *bootdata[Nbootfiles]; +static Dirlist bootlist = +{ + Qboot, + bootdir, + bootdata, + 1, + Nbootfiles +}; + +/* + * add a file to the list + */ +static void +addlist(Dirlist *l, char *name, uchar *contents, ulong len, int perm) +{ + Dirtab *d; + + if(l->ndir >= l->mdir) + panic("too many root files"); + l->data[l->ndir] = contents; + d = &l->dir[l->ndir]; + strcpy(d->name, name); + d->length = len; + d->perm = perm; + d->qid.type = 0; + d->qid.vers = 0; + d->qid.path = ++l->ndir + l->base; + if(perm & DMDIR) + d->qid.type |= QTDIR; +} + +/* + * add a root file + */ +void +addbootfile(char *name, uchar *contents, ulong len) +{ + addlist(&bootlist, name, contents, len, 0555); +} + +/* + * add a root directory + */ +static void +addrootdir(char *name) +{ + addlist(&rootlist, name, nil, 0, DMDIR|0555); +} + +static void +rootreset(void) +{ + addrootdir("bin"); + addrootdir("dev"); + addrootdir("env"); + addrootdir("fd"); + addrootdir("mnt"); + addrootdir("net"); + addrootdir("net.alt"); + addrootdir("proc"); + addrootdir("root"); + addrootdir("srv"); +} + +static Chan* +rootattach(char *spec) +{ + return devattach('/', spec); +} + +static int +rootgen(Chan *c, char *name, Dirtab*, int, int s, Dir *dp) +{ + int t; + Dirtab *d; + Dirlist *l; + + switch((int)c->qid.path){ + case Qdir: + if(s == DEVDOTDOT){ + devdir(c, (Qid){Qdir, 0, QTDIR}, "#/", 0, eve, 0555, dp); + return 1; + } + return devgen(c, name, rootlist.dir, rootlist.ndir, s, dp); + case Qboot: + if(s == DEVDOTDOT){ + devdir(c, (Qid){Qdir, 0, QTDIR}, "#/", 0, eve, 0555, dp); + return 1; + } + return devgen(c, name, bootlist.dir, bootlist.ndir, s, dp); + default: + if(s == DEVDOTDOT){ + if((int)c->qid.path < Qboot) + devdir(c, (Qid){Qdir, 0, QTDIR}, "#/", 0, eve, 0555, dp); + else + devdir(c, (Qid){Qboot, 0, QTDIR}, "#/", 0, eve, 0555, dp); + return 1; + } + if(s != 0) + return -1; + if((int)c->qid.path < Qboot){ + t = c->qid.path-1; + l = &rootlist; + }else{ + t = c->qid.path - Qboot - 1; + l = &bootlist; + } + if(t >= l->ndir) + return -1; +if(t < 0){ +print("rootgen %#llux %d %d\n", c->qid.path, s, t); +panic("whoops"); +} + d = &l->dir[t]; + devdir(c, d->qid, d->name, d->length, eve, d->perm, dp); + return 1; + } +} + +static Walkqid* +rootwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, nil, 0, rootgen); +} + +static long +rootstat(Chan *c, uchar *dp, long n) +{ + return devstat(c, dp, n, nil, 0, rootgen); +} + +static Chan* +rootopen(Chan *c, int omode) +{ + return devopen(c, omode, nil, 0, devgen); +} + +/* + * sysremove() knows this is a nop + */ +static void +rootclose(Chan*) +{ +} + +static long +rootread(Chan *c, void *buf, long n, vlong off) +{ + ulong t; + Dirtab *d; + Dirlist *l; + uchar *data; + ulong offset = off; + + t = c->qid.path; + switch(t){ + case Qdir: + case Qboot: + return devdirread(c, buf, n, nil, 0, rootgen); + } + + if(t= l->ndir) + error(Egreg); + + d = &l->dir[t]; + data = l->data[t]; + if(offset >= d->length) + return 0; + if(offset+n > d->length) + n = d->length - offset; + memmove(buf, data+offset, n); + return n; +} + +static long +rootwrite(Chan*, void*, long, vlong) +{ + error(Egreg); + return 0; +} + +Dev rootdevtab = { + '/', + "root", + + rootreset, + devinit, + devshutdown, + rootattach, + rootwalk, + rootstat, + rootopen, + devcreate, + rootclose, + rootread, + devbread, + rootwrite, + devbwrite, + devremove, + devwstat, +}; + diff -Nru /sys/src/9k/port/devsd.c /sys/src/9k/port/devsd.c --- /sys/src/9k/port/devsd.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devsd.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1661 @@ +/* + * Storage Device. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" +#include "../port/error.h" + +#include "../port/sd.h" + +extern Dev sddevtab; +extern SDifc* sdifc[]; + +static char Echange[] = "media or partition has changed"; + +static char devletters[] = "0123456789" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + +static SDev *devs[sizeof devletters-1]; +static QLock devslock; + +enum { + Rawcmd, + Rawdata, + Rawstatus, +}; + +enum { + Qtopdir = 1, /* top level directory */ + Qtopbase, + Qtopctl = Qtopbase, + + Qunitdir, /* directory per unit */ + Qunitbase, + Qctl = Qunitbase, + Qraw, + Qpart, + + TypeLOG = 4, + NType = (1<>TypeSHIFT) & TypeMASK) +#define PART(q) ((((ulong)(q).path)>>PartSHIFT) & PartMASK) +#define UNIT(q) ((((ulong)(q).path)>>UnitSHIFT) & UnitMASK) +#define DEV(q) ((((ulong)(q).path)>>DevSHIFT) & DevMASK) +#define QID(d,u, p, t) (((d)<part != nil){ + partno = -1; + for(i = 0; i < unit->npart; i++){ + pp = &unit->part[i]; + if(!pp->valid){ + if(partno == -1) + partno = i; + break; + } + if(strcmp(name, pp->name) == 0){ + if(pp->start == start && pp->end == end) + return; + error(Ebadctl); + } + } + } + else{ + if((unit->part = malloc(sizeof(SDpart)*SDnpart)) == nil) + error(Enomem); + unit->npart = SDnpart; + partno = 0; + } + + /* + * If no free slot found then increase the + * array size (can't get here with unit->part == nil). + */ + if(partno == -1){ + if(unit->npart >= NPart) + error(Enomem); + if((pp = malloc(sizeof(SDpart)*(unit->npart+SDnpart))) == nil) + error(Enomem); + memmove(pp, unit->part, sizeof(SDpart)*unit->npart); + free(unit->part); + unit->part = pp; + partno = unit->npart; + unit->npart += SDnpart; + } + + /* + * Check size and extent are valid. + */ + if(start > end || end > unit->sectors) + error(Eio); + pp = &unit->part[partno]; + pp->start = start; + pp->end = end; + kstrdup(&pp->name, name); + kstrdup(&pp->user, eve); + pp->perm = 0640; + pp->valid = 1; +} + +static void +sddelpart(SDunit* unit, char* name) +{ + int i; + SDpart *pp; + + /* + * Look for the partition to delete. + * Can't delete if someone still has it open. + */ + pp = unit->part; + for(i = 0; i < unit->npart; i++){ + if(strcmp(name, pp->name) == 0) + break; + pp++; + } + if(i >= unit->npart) + error(Ebadctl); + if(strcmp(up->user, pp->user) && !iseve()) + error(Eperm); + pp->valid = 0; + pp->vers++; +} + +static void +sdincvers(SDunit *unit) +{ + int i; + + unit->vers++; + if(unit->part){ + for(i = 0; i < unit->npart; i++){ + unit->part[i].valid = 0; + unit->part[i].vers++; + } + } +} + +static int +sdinitpart(SDunit* unit) +{ + int nf; + uvlong start, end; + char *f[4], *p, *q, buf[10]; + + if(unit->sectors > 0){ + unit->sectors = unit->secsize = 0; + sdincvers(unit); + } + + /* device must be connected or not; other values are trouble */ + if(unit->inquiry[0] & 0xC0) /* see SDinq0periphqual */ + return 0; + switch(unit->inquiry[0] & SDinq0periphtype){ + case SDperdisk: + case SDperworm: + case SDpercd: + case SDpermo: + break; + default: + return 0; + } + + if(unit->dev->ifc->online) + unit->dev->ifc->online(unit); + if(unit->sectors){ + sdincvers(unit); + sdaddpart(unit, "data", 0, unit->sectors); + + /* + * Use partitions passed from boot program, + * e.g. + * sdC0part=dos 63 123123/plan9 123123 456456 + * This happens before /boot sets hostname so the + * partitions will have the null-string for user. + * The gen functions patch it up. + */ + snprint(buf, sizeof buf, "%spart", unit->name); + for(p = getconf(buf); p != nil; p = q){ + if(q = strchr(p, '/')) + *q++ = '\0'; + nf = tokenize(p, f, nelem(f)); + if(nf < 3) + continue; + + start = strtoull(f[1], 0, 0); + end = strtoull(f[2], 0, 0); + if(!waserror()){ + sdaddpart(unit, f[0], start, end); + poperror(); + } + } + } + + return 1; +} + +static int +sdindex(int idno) +{ + char *p; + + p = strchr(devletters, idno); + if(p == nil) + return -1; + return p-devletters; +} + +static SDev* +sdgetdev(int idno) +{ + SDev *sdev; + int i; + + if((i = sdindex(idno)) < 0) + return nil; + + qlock(&devslock); + if(sdev = devs[i]) + incref(&sdev->r); + qunlock(&devslock); + return sdev; +} + +static SDunit* +sdgetunit(SDev* sdev, int subno) +{ + SDunit *unit; + char buf[32]; + + /* + * Associate a unit with a given device and sub-unit + * number on that device. + * The device will be probed if it has not already been + * successfully accessed. + */ + qlock(&sdev->unitlock); + if(subno > sdev->nunit){ + qunlock(&sdev->unitlock); + return nil; + } + + unit = sdev->unit[subno]; + if(unit == nil){ + /* + * Probe the unit only once. This decision + * may be a little severe and reviewed later. + */ + if(sdev->unitflg[subno]){ + qunlock(&sdev->unitlock); + return nil; + } + if((unit = malloc(sizeof(SDunit))) == nil){ + qunlock(&sdev->unitlock); + return nil; + } + sdev->unitflg[subno] = 1; + + snprint(buf, sizeof(buf), "%s%d", sdev->name, subno); + kstrdup(&unit->name, buf); + kstrdup(&unit->user, eve); + unit->perm = 0555; + unit->subno = subno; + unit->dev = sdev; + + if(sdev->enabled == 0 && sdev->ifc->enable) + sdev->ifc->enable(sdev); + sdev->enabled = 1; + + /* + * No need to lock anything here as this is only + * called before the unit is made available in the + * sdunit[] array. + */ + if(unit->dev->ifc->verify(unit) == 0){ + qunlock(&sdev->unitlock); + free(unit); + return nil; + } + sdev->unit[subno] = unit; + } + qunlock(&sdev->unitlock); + return unit; +} + +static void +sdreset(void) +{ + int i; + SDev *sdev; + + /* + * Probe all known controller types and register any devices found. + */ + for(i = 0; sdifc[i] != nil; i++){ + if(sdifc[i]->pnp == nil || (sdev = sdifc[i]->pnp()) == nil) + continue; + sdadddevs(sdev); + } +} + +void +sdadddevs(SDev *sdev) +{ + int i, j, id; + SDev *next; + + for(; sdev; sdev=next){ + next = sdev->next; + + sdev->unit = (SDunit**)malloc(sdev->nunit * sizeof(SDunit*)); + sdev->unitflg = (int*)malloc(sdev->nunit * sizeof(int)); + if(sdev->unit == nil || sdev->unitflg == nil){ + print("sdadddevs: out of memory\n"); + giveup: + free(sdev->unit); + free(sdev->unitflg); + if(sdev->ifc->clear) + sdev->ifc->clear(sdev); + free(sdev); + continue; + } + id = sdindex(sdev->idno); + if(id == -1){ + print("sdadddevs: bad id number %d (%C)\n", id, id); + goto giveup; + } + qlock(&devslock); + for(i=0; iidno = devletters[j]; + devs[j] = sdev; + snprint(sdev->name, sizeof sdev->name, "sd%c", devletters[j]); + break; + } + } + qunlock(&devslock); + if(i == nelem(devs)){ + print("sdadddevs: out of device letters\n"); + goto giveup; + } + } +} + +// void +// sdrmdevs(SDev *sdev) +// { +// char buf[2]; +// +// snprint(buf, sizeof buf, "%c", sdev->idno); +// unconfigure(buf); +// } + +void +sdaddallconfs(void (*addconf)(SDunit *)) +{ + int i, u; + SDev *sdev; + + for(i = 0; i < nelem(devs); i++) /* each controller */ + for(sdev = devs[i]; sdev; sdev = sdev->next) + for(u = 0; u < sdev->nunit; u++) /* each drive */ + (*addconf)(sdev->unit[u]); +} + +static int +sd2gen(Chan* c, int i, Dir* dp) +{ + Qid q; + vlong l; + SDpart *pp; + SDperm *perm; + SDunit *unit; + SDev *sdev; + int rv; + + sdev = sdgetdev(DEV(c->qid)); + assert(sdev); + unit = sdev->unit[UNIT(c->qid)]; + + rv = -1; + switch(i){ + case Qctl: + mkqid(&q, QID(DEV(c->qid), UNIT(c->qid), PART(c->qid), Qctl), + unit->vers, QTFILE); + perm = &unit->ctlperm; + if(emptystr(perm->user)){ + kstrdup(&perm->user, eve); + perm->perm = 0644; /* nothing secret in ctl */ + } + devdir(c, q, "ctl", 0, perm->user, perm->perm, dp); + rv = 1; + break; + + case Qraw: + mkqid(&q, QID(DEV(c->qid), UNIT(c->qid), PART(c->qid), Qraw), + unit->vers, QTFILE); + perm = &unit->rawperm; + if(emptystr(perm->user)){ + kstrdup(&perm->user, eve); + perm->perm = DMEXCL|0600; + } + devdir(c, q, "raw", 0, perm->user, perm->perm, dp); + rv = 1; + break; + + case Qpart: + pp = &unit->part[PART(c->qid)]; + l = (pp->end - pp->start) * (vlong)unit->secsize; + mkqid(&q, QID(DEV(c->qid), UNIT(c->qid), PART(c->qid), Qpart), + unit->vers+pp->vers, QTFILE); + if(emptystr(pp->user)) + kstrdup(&pp->user, eve); + devdir(c, q, pp->name, l, pp->user, pp->perm, dp); + rv = 1; + break; + } + + decref(&sdev->r); + return rv; +} + +static int +sd1gen(Chan* c, int i, Dir* dp) +{ + Qid q; + + switch(i){ + case Qtopctl: + mkqid(&q, QID(0, 0, 0, Qtopctl), 0, QTFILE); + devdir(c, q, "sdctl", 0, eve, 0644, dp); /* no secrets */ + return 1; + } + return -1; +} + +static int +sdgen(Chan* c, char*, Dirtab*, int, int s, Dir* dp) +{ + Qid q; + vlong l; + int i, r; + SDpart *pp; + SDunit *unit; + SDev *sdev; + + switch(TYPE(c->qid)){ + case Qtopdir: + if(s == DEVDOTDOT){ + mkqid(&q, QID(0, 0, 0, Qtopdir), 0, QTDIR); + sprint(up->genbuf, "#%C", sddevtab.dc); + devdir(c, q, up->genbuf, 0, eve, 0555, dp); + return 1; + } + + if(s+Qtopbase < Qunitdir) + return sd1gen(c, s+Qtopbase, dp); + s -= (Qunitdir-Qtopbase); + + qlock(&devslock); + for(i=0; inunit) + break; + s -= devs[i]->nunit; + } + } + + if(i == nelem(devs)){ + /* Run off the end of the list */ + qunlock(&devslock); + return -1; + } + + if((sdev = devs[i]) == nil){ + qunlock(&devslock); + return 0; + } + + incref(&sdev->r); + qunlock(&devslock); + + if((unit = sdev->unit[s]) == nil) + if((unit = sdgetunit(sdev, s)) == nil){ + decref(&sdev->r); + return 0; + } + + mkqid(&q, QID(sdev->idno, s, 0, Qunitdir), 0, QTDIR); + if(emptystr(unit->user)) + kstrdup(&unit->user, eve); + devdir(c, q, unit->name, 0, unit->user, unit->perm, dp); + decref(&sdev->r); + return 1; + + case Qunitdir: + if(s == DEVDOTDOT){ + mkqid(&q, QID(0, 0, 0, Qtopdir), 0, QTDIR); + sprint(up->genbuf, "#%C", sddevtab.dc); + devdir(c, q, up->genbuf, 0, eve, 0555, dp); + return 1; + } + + if((sdev = sdgetdev(DEV(c->qid))) == nil){ + devdir(c, c->qid, "unavailable", 0, eve, 0, dp); + return 1; + } + + unit = sdev->unit[UNIT(c->qid)]; + qlock(&unit->ctl); + + /* + * Check for media change. + * If one has already been detected, sectors will be zero. + * If there is one waiting to be detected, online + * will return > 1. + * Online is a bit of a large hammer but does the job. + */ + if(unit->sectors == 0 + || (unit->dev->ifc->online && unit->dev->ifc->online(unit) > 1)) + sdinitpart(unit); + + i = s+Qunitbase; + if(i < Qpart){ + r = sd2gen(c, i, dp); + qunlock(&unit->ctl); + decref(&sdev->r); + return r; + } + i -= Qpart; + if(unit->part == nil || i >= unit->npart){ + qunlock(&unit->ctl); + decref(&sdev->r); + break; + } + pp = &unit->part[i]; + if(!pp->valid){ + qunlock(&unit->ctl); + decref(&sdev->r); + return 0; + } + l = (pp->end - pp->start) * (vlong)unit->secsize; + mkqid(&q, QID(DEV(c->qid), UNIT(c->qid), i, Qpart), + unit->vers+pp->vers, QTFILE); + if(emptystr(pp->user)) + kstrdup(&pp->user, eve); + devdir(c, q, pp->name, l, pp->user, pp->perm, dp); + qunlock(&unit->ctl); + decref(&sdev->r); + return 1; + case Qraw: + case Qctl: + case Qpart: + if((sdev = sdgetdev(DEV(c->qid))) == nil){ + devdir(c, q, "unavailable", 0, eve, 0, dp); + return 1; + } + unit = sdev->unit[UNIT(c->qid)]; + qlock(&unit->ctl); + r = sd2gen(c, TYPE(c->qid), dp); + qunlock(&unit->ctl); + decref(&sdev->r); + return r; + case Qtopctl: + return sd1gen(c, TYPE(c->qid), dp); + default: + break; + } + + return -1; +} + +static Chan* +sdattach(char* spec) +{ + Chan *c; + char *p; + SDev *sdev; + int idno, subno; + + if(*spec == '\0'){ + c = devattach(sddevtab.dc, spec); + mkqid(&c->qid, QID(0, 0, 0, Qtopdir), 0, QTDIR); + return c; + } + + if(spec[0] != 's' || spec[1] != 'd') + error(Ebadspec); + idno = spec[2]; + subno = strtol(&spec[3], &p, 0); + if(p == &spec[3]) + error(Ebadspec); + + if((sdev=sdgetdev(idno)) == nil) + error(Enonexist); + if(sdgetunit(sdev, subno) == nil){ + decref(&sdev->r); + error(Enonexist); + } + + c = devattach(sddevtab.dc, spec); + mkqid(&c->qid, QID(sdev->idno, subno, 0, Qunitdir), 0, QTDIR); + c->devno = (sdev->idno << UnitLOG) + subno; + decref(&sdev->r); + return c; +} + +static Walkqid* +sdwalk(Chan* c, Chan* nc, char** name, int nname) +{ + return devwalk(c, nc, name, nname, nil, 0, sdgen); +} + +static long +sdstat(Chan* c, uchar* db, long n) +{ + return devstat(c, db, n, nil, 0, sdgen); +} + +static Chan* +sdopen(Chan* c, int omode) +{ + SDpart *pp; + SDunit *unit; + SDev *sdev; + uchar tp; + + c = devopen(c, omode, 0, 0, sdgen); + if((tp = TYPE(c->qid)) != Qctl && tp != Qraw && tp != Qpart) + return c; + + sdev = sdgetdev(DEV(c->qid)); + if(sdev == nil) + error(Enonexist); + + unit = sdev->unit[UNIT(c->qid)]; + + switch(TYPE(c->qid)){ + case Qctl: + c->qid.vers = unit->vers; + break; + case Qraw: + c->qid.vers = unit->vers; + if(TAS(&unit->rawinuse) != 0){ + c->flag &= ~COPEN; + decref(&sdev->r); + error(Einuse); + } + unit->state = Rawcmd; + break; + case Qpart: + qlock(&unit->ctl); + if(waserror()){ + qunlock(&unit->ctl); + c->flag &= ~COPEN; + decref(&sdev->r); + nexterror(); + } + pp = &unit->part[PART(c->qid)]; + c->qid.vers = unit->vers+pp->vers; + qunlock(&unit->ctl); + poperror(); + break; + } + decref(&sdev->r); + return c; +} + +static void +sdclose(Chan* c) +{ + SDunit *unit; + SDev *sdev; + + if(c->qid.type & QTDIR) + return; + if(!(c->flag & COPEN)) + return; + + switch(TYPE(c->qid)){ + default: + break; + case Qraw: + sdev = sdgetdev(DEV(c->qid)); + if(sdev){ + unit = sdev->unit[UNIT(c->qid)]; + unit->rawinuse = 0; + decref(&sdev->r); + } + break; + } +} + +static long +sdbio(Chan* c, int write, char* a, long len, vlong off) +{ + int nchange; + uchar *b; + SDpart *pp; + SDunit *unit; + SDev *sdev; + vlong bno; + long l, max, nb, offset; + + sdev = sdgetdev(DEV(c->qid)); + if(sdev == nil){ + decref(&sdev->r); + error(Enonexist); + } + unit = sdev->unit[UNIT(c->qid)]; + if(unit == nil) + error(Enonexist); + + nchange = 0; + qlock(&unit->ctl); + while(waserror()){ + /* notification of media change; go around again */ + if(strcmp(up->errstr, Eio) == 0 && unit->sectors == 0 && nchange++ == 0){ + sdinitpart(unit); + continue; + } + + /* other errors; give up */ + qunlock(&unit->ctl); + decref(&sdev->r); + nexterror(); + } + pp = &unit->part[PART(c->qid)]; + if(unit->vers+pp->vers != c->qid.vers) + error(Echange); + + /* + * Check the request is within bounds. + * Removeable drives are locked throughout the I/O + * in case the media changes unexpectedly. + * Non-removeable drives are not locked during the I/O + * to allow the hardware to optimise if it can; this is + * a little fast and loose. + * It's assumed that non-removeable media parameters + * (sectors, secsize) can't change once the drive has + * been brought online. + */ + bno = (off/unit->secsize) + pp->start; + nb = ((off+len+unit->secsize-1)/unit->secsize) + pp->start - bno; + max = SDmaxio/unit->secsize; + if(nb > max) + nb = max; + if(bno+nb > pp->end) + nb = pp->end - bno; + if(bno >= pp->end || nb == 0){ + if(write) + error(Eio); + qunlock(&unit->ctl); + decref(&sdev->r); + poperror(); + return 0; + } + if(!(unit->inquiry[1] & SDinq1removable)){ + qunlock(&unit->ctl); + poperror(); + } + + b = sdmalloc(nb*unit->secsize); + if(b == nil) + error(Enomem); + if(waserror()){ + sdfree(b); + if(!(unit->inquiry[1] & SDinq1removable)) + decref(&sdev->r); /* gadverdamme! */ + nexterror(); + } + + offset = off%unit->secsize; + if(offset+len > nb*unit->secsize) + len = nb*unit->secsize - offset; + if(write){ + if(offset || (len%unit->secsize)){ + l = unit->dev->ifc->bio(unit, 0, 0, b, nb, bno); + if(l < 0) + error(Eio); + if(l < (nb*unit->secsize)){ + nb = l/unit->secsize; + l = nb*unit->secsize - offset; + if(len > l) + len = l; + } + } + memmove(b+offset, a, len); + l = unit->dev->ifc->bio(unit, 0, 1, b, nb, bno); + if(l < 0) + error(Eio); + if(l < offset) + len = 0; + else if(len > l - offset) + len = l - offset; + } + else{ + l = unit->dev->ifc->bio(unit, 0, 0, b, nb, bno); + if(l < 0) + error(Eio); + if(l < offset) + len = 0; + else if(len > l - offset) + len = l - offset; + memmove(a, b+offset, len); + } + sdfree(b); + poperror(); + + if(unit->inquiry[1] & SDinq1removable){ + qunlock(&unit->ctl); + poperror(); + } + + decref(&sdev->r); + return len; +} + +static long +sdrio(SDreq* r, void* a, long n) +{ + void *data; + + if(n >= SDmaxio || n < 0) + error(Etoobig); + + data = nil; + if(n){ + if((data = sdmalloc(n)) == nil) + error(Enomem); + if(r->write) + memmove(data, a, n); + } + r->data = data; + r->dlen = n; + + if(waserror()){ + sdfree(data); + r->data = nil; + nexterror(); + } + + if(r->unit->dev->ifc->rio(r) != SDok) + error(Eio); + + if(!r->write && r->rlen > 0) + memmove(a, data, r->rlen); + sdfree(data); + r->data = nil; + poperror(); + + return r->rlen; +} + +/* + * SCSI simulation for non-SCSI devices + */ +int +sdsetsense(SDreq *r, int status, int key, int asc, int ascq) +{ + int len; + SDunit *unit; + + unit = r->unit; + unit->sense[2] = key; + unit->sense[12] = asc; + unit->sense[13] = ascq; + + r->status = status; + if(status == SDcheck && !(r->flags & SDnosense)){ + /* request sense case from sdfakescsi */ + len = sizeof unit->sense; + if(len > sizeof r->sense-1) + len = sizeof r->sense-1; + memmove(r->sense, unit->sense, len); + unit->sense[2] = 0; + unit->sense[12] = 0; + unit->sense[13] = 0; + r->flags |= SDvalidsense; + return SDok; + } + return status; +} + +int +sdmodesense(SDreq *r, uchar *cmd, void *info, int ilen) +{ + int len; + uchar *data; + + /* + * Fake a vendor-specific request with page code 0, + * return the drive info. + */ + if((cmd[2] & 0x3F) != 0 && (cmd[2] & 0x3F) != 0x3F) + return sdsetsense(r, SDcheck, 0x05, 0x24, 0); + len = (cmd[7]<<8)|cmd[8]; + if(len == 0) + return SDok; + if(len < 8+ilen) + return sdsetsense(r, SDcheck, 0x05, 0x1A, 0); + if(r->data == nil || r->dlen < len) + return sdsetsense(r, SDcheck, 0x05, 0x20, 1); + data = r->data; + memset(data, 0, 8); + data[0] = ilen>>8; + data[1] = ilen; + if(ilen) + memmove(data+8, info, ilen); + r->rlen = 8+ilen; + return sdsetsense(r, SDok, 0, 0, 0); +} + +int +sdfakescsi(SDreq *r, void *info, int ilen) +{ + uchar *cmd, *p; + uvlong len; + SDunit *unit; + + cmd = r->cmd; + r->rlen = 0; + unit = r->unit; + + /* + * Rewrite read(6)/write(6) into read(10)/write(10). + */ + switch(cmd[0]){ + case 0x08: /* read */ + case 0x0A: /* write */ + cmd[9] = 0; + cmd[8] = cmd[4]; + cmd[7] = 0; + cmd[6] = 0; + cmd[5] = cmd[3]; + cmd[4] = cmd[2]; + cmd[3] = cmd[1] & 0x0F; + cmd[2] = 0; + cmd[1] &= 0xE0; + cmd[0] |= 0x20; + break; + } + + /* + * Map SCSI commands into ATA commands for discs. + * Fail any command with a LUN except INQUIRY which + * will return 'logical unit not supported'. + */ + if((cmd[1]>>5) && cmd[0] != 0x12) + return sdsetsense(r, SDcheck, 0x05, 0x25, 0); + + switch(cmd[0]){ + default: + return sdsetsense(r, SDcheck, 0x05, 0x20, 0); + + case 0x00: /* test unit ready */ + return sdsetsense(r, SDok, 0, 0, 0); + + case 0x03: /* request sense */ + if(cmd[4] < sizeof unit->sense) + len = cmd[4]; + else + len = sizeof unit->sense; + if(r->data && r->dlen >= len){ + memmove(r->data, unit->sense, len); + r->rlen = len; + } + return sdsetsense(r, SDok, 0, 0, 0); + + case 0x12: /* inquiry */ + if(cmd[4] < sizeof unit->inquiry) + len = cmd[4]; + else + len = sizeof unit->inquiry; + if(r->data && r->dlen >= len){ + memmove(r->data, unit->inquiry, len); + r->rlen = len; + } + return sdsetsense(r, SDok, 0, 0, 0); + + case 0x1B: /* start/stop unit */ + /* + * nop for now, can use power management later. + */ + return sdsetsense(r, SDok, 0, 0, 0); + + case 0x25: /* read capacity */ + if((cmd[1] & 0x01) || cmd[2] || cmd[3]) + return sdsetsense(r, SDcheck, 0x05, 0x24, 0); + if(r->data == nil || r->dlen < 8) + return sdsetsense(r, SDcheck, 0x05, 0x20, 1); + + /* + * Read capacity returns the LBA of the last sector. + */ + len = unit->sectors - 1; + p = r->data; + *p++ = len>>24; + *p++ = len>>16; + *p++ = len>>8; + *p++ = len; + len = 512; + *p++ = len>>24; + *p++ = len>>16; + *p++ = len>>8; + *p++ = len; + r->rlen = p - (uchar*)r->data; + return sdsetsense(r, SDok, 0, 0, 0); + + case 0x9E: /* long read capacity */ + if((cmd[1] & 0x01) || cmd[2] || cmd[3]) + return sdsetsense(r, SDcheck, 0x05, 0x24, 0); + if(r->data == nil || r->dlen < 8) + return sdsetsense(r, SDcheck, 0x05, 0x20, 1); + /* + * Read capcity returns the LBA of the last sector. + */ + len = unit->sectors - 1; + p = r->data; + *p++ = len>>56; + *p++ = len>>48; + *p++ = len>>40; + *p++ = len>>32; + *p++ = len>>24; + *p++ = len>>16; + *p++ = len>>8; + *p++ = len; + len = 512; + *p++ = len>>24; + *p++ = len>>16; + *p++ = len>>8; + *p++ = len; + r->rlen = p - (uchar*)r->data; + return sdsetsense(r, SDok, 0, 0, 0); + + case 0x5A: /* mode sense */ + return sdmodesense(r, cmd, info, ilen); + + case 0x28: /* read */ + case 0x2A: /* write */ + case 0x88: /* read16 */ + case 0x8a: /* write16 */ + return SDnostatus; + } +} + +static long +sdread(Chan *c, void *a, long n, vlong off) +{ + char *p, *e, *buf; + SDpart *pp; + SDunit *unit; + SDev *sdev; + long offset; + int i, l, m, status; + + offset = off; + switch(TYPE(c->qid)){ + default: + error(Eperm); + case Qtopctl: + m = 64*1024; /* room for register dumps */ + p = buf = malloc(m); + if(p == nil) + error(Enomem); + e = p + m; + qlock(&devslock); + for(i = 0; i < nelem(devs); i++){ + sdev = devs[i]; + if(sdev && sdev->ifc->rtopctl) + p = sdev->ifc->rtopctl(sdev, p, e); + } + qunlock(&devslock); + n = readstr(offset, a, n, buf); + free(buf); + return n; + + case Qtopdir: + case Qunitdir: + return devdirread(c, a, n, 0, 0, sdgen); + + case Qctl: + sdev = sdgetdev(DEV(c->qid)); + if(sdev == nil) + error(Enonexist); + + unit = sdev->unit[UNIT(c->qid)]; + m = 16*1024; /* room for register dumps */ + p = malloc(m); + if(p == nil) + error(Enomem); + l = snprint(p, m, "inquiry %.48s\n", + (char*)unit->inquiry+8); + qlock(&unit->ctl); + /* + * If there's a device specific routine it must + * provide all information pertaining to night geometry + * and the garscadden trains. + */ + if(unit->dev->ifc->rctl) + l += unit->dev->ifc->rctl(unit, p+l, m-l); + if(unit->sectors == 0) + sdinitpart(unit); + if(unit->sectors){ + if(unit->dev->ifc->rctl == nil) + l += snprint(p+l, m-l, + "geometry %llud %lud\n", + unit->sectors, unit->secsize); + pp = unit->part; + for(i = 0; i < unit->npart; i++){ + if(pp->valid) + l += snprint(p+l, m-l, + "part %s %llud %llud\n", + pp->name, pp->start, pp->end); + pp++; + } + } + qunlock(&unit->ctl); + decref(&sdev->r); + l = readstr(offset, a, n, p); + free(p); + return l; + + case Qraw: + sdev = sdgetdev(DEV(c->qid)); + if(sdev == nil) + error(Enonexist); + + unit = sdev->unit[UNIT(c->qid)]; + qlock(&unit->raw); + if(waserror()){ + qunlock(&unit->raw); + decref(&sdev->r); + nexterror(); + } + if(unit->state == Rawdata){ + unit->state = Rawstatus; + i = sdrio(unit->req, a, n); + } + else if(unit->state == Rawstatus){ + status = unit->req->status; + unit->state = Rawcmd; + free(unit->req); + unit->req = nil; + i = readnum(0, a, n, status, NUMSIZE); + } else + i = 0; + qunlock(&unit->raw); + decref(&sdev->r); + poperror(); + return i; + + case Qpart: + return sdbio(c, 0, a, n, off); + } +} + +static void legacytopctl(Cmdbuf*); + +static long +sdwrite(Chan* c, void* a, long n, vlong off) +{ + char *f0; + int i; + Cmdbuf *cb; + SDifc *ifc; + SDreq *req; + SDunit *unit; + SDev *sdev; + uvlong end, start; + + switch(TYPE(c->qid)){ + default: + error(Eperm); + case Qtopctl: + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + if(cb->nf == 0) + error("empty control message"); + f0 = cb->f[0]; + cb->f++; + cb->nf--; + if(strcmp(f0, "config") == 0){ + /* wormhole into ugly legacy interface */ + legacytopctl(cb); + poperror(); + free(cb); + break; + } + /* + * "ata arg..." invokes sdifc[i]->wtopctl(nil, cb), + * where sdifc[i]->name=="ata" and cb contains the args. + */ + ifc = nil; + sdev = nil; + for(i=0; sdifc[i]; i++){ + if(strcmp(sdifc[i]->name, f0) == 0){ + ifc = sdifc[i]; + sdev = nil; + goto subtopctl; + } + } + /* + * "sd1 arg..." invokes sdifc[i]->wtopctl(sdev, cb), + * where sdifc[i] and sdev match controller letter "1", + * and cb contains the args. + */ + if(f0[0]=='s' && f0[1]=='d' && f0[2] && f0[3] == 0){ + if((sdev = sdgetdev(f0[2])) != nil){ + ifc = sdev->ifc; + goto subtopctl; + } + } + error("unknown interface"); + + subtopctl: + if(waserror()){ + if(sdev) + decref(&sdev->r); + nexterror(); + } + if(ifc->wtopctl) + ifc->wtopctl(sdev, cb); + else + error(Ebadctl); + poperror(); + poperror(); + if(sdev) + decref(&sdev->r); + free(cb); + break; + + case Qctl: + cb = parsecmd(a, n); + sdev = sdgetdev(DEV(c->qid)); + if(sdev == nil) + error(Enonexist); + unit = sdev->unit[UNIT(c->qid)]; + + qlock(&unit->ctl); + if(waserror()){ + qunlock(&unit->ctl); + decref(&sdev->r); + free(cb); + nexterror(); + } + if(unit->vers != c->qid.vers) + error(Echange); + + if(cb->nf < 1) + error(Ebadctl); + if(strcmp(cb->f[0], "part") == 0){ + if(cb->nf != 4) + error(Ebadctl); + if(unit->sectors == 0 && !sdinitpart(unit)) + error(Eio); + start = strtoull(cb->f[2], 0, 0); + end = strtoull(cb->f[3], 0, 0); + sdaddpart(unit, cb->f[1], start, end); + } + else if(strcmp(cb->f[0], "delpart") == 0){ + if(cb->nf != 2 || unit->part == nil) + error(Ebadctl); + sddelpart(unit, cb->f[1]); + } + else if(unit->dev->ifc->wctl) + unit->dev->ifc->wctl(unit, cb); + else + error(Ebadctl); + qunlock(&unit->ctl); + decref(&sdev->r); + poperror(); + free(cb); + break; + + case Qraw: + sdev = sdgetdev(DEV(c->qid)); + if(sdev == nil) + error(Enonexist); + unit = sdev->unit[UNIT(c->qid)]; + qlock(&unit->raw); + if(waserror()){ + qunlock(&unit->raw); + decref(&sdev->r); + nexterror(); + } + switch(unit->state){ + case Rawcmd: + if(n < 6 || n > sizeof(req->cmd)) + error(Ebadarg); + if((req = malloc(sizeof(SDreq))) == nil) + error(Enomem); + req->unit = unit; + memmove(req->cmd, a, n); + req->clen = n; + req->flags = SDnosense; + req->status = ~0; + + unit->req = req; + unit->state = Rawdata; + break; + + case Rawstatus: + unit->state = Rawcmd; + free(unit->req); + unit->req = nil; + error(Ebadusefd); + + case Rawdata: + unit->state = Rawstatus; + unit->req->write = 1; + n = sdrio(unit->req, a, n); + } + qunlock(&unit->raw); + decref(&sdev->r); + poperror(); + break; + case Qpart: + return sdbio(c, 1, a, n, off); + } + + return n; +} + +static long +sdwstat(Chan* c, uchar* dp, long n) +{ + Dir *d; + SDpart *pp; + SDperm *perm; + SDunit *unit; + SDev *sdev; + + if(c->qid.type & QTDIR) + error(Eperm); + + sdev = sdgetdev(DEV(c->qid)); + if(sdev == nil) + error(Enonexist); + unit = sdev->unit[UNIT(c->qid)]; + qlock(&unit->ctl); + d = nil; + if(waserror()){ + free(d); + qunlock(&unit->ctl); + decref(&sdev->r); + nexterror(); + } + + switch(TYPE(c->qid)){ + default: + error(Eperm); + case Qctl: + perm = &unit->ctlperm; + break; + case Qraw: + perm = &unit->rawperm; + break; + case Qpart: + pp = &unit->part[PART(c->qid)]; + if(unit->vers+pp->vers != c->qid.vers) + error(Enonexist); + perm = &pp->SDperm; + break; + } + + if(strcmp(up->user, perm->user) && !iseve()) + error(Eperm); + + d = smalloc(sizeof(Dir)+n); + n = convM2D(dp, n, &d[0], (char*)&d[1]); + if(n == 0) + error(Eshortstat); + if(!emptystr(d[0].uid)) + kstrdup(&perm->user, d[0].uid); + if(d[0].mode != ~0UL) + perm->perm = (perm->perm & ~0777) | (d[0].mode & 0777); + + free(d); + qunlock(&unit->ctl); + decref(&sdev->r); + poperror(); + return n; +} + +static int +configure(char* spec, DevConf* cf) +{ + SDev *s, *sdev; + char *p; + int i; + + if(sdindex(*spec) < 0) + error("bad sd spec"); + + if((p = strchr(cf->type, '/')) != nil) + *p++ = '\0'; + + for(i = 0; sdifc[i] != nil; i++) + if(strcmp(sdifc[i]->name, cf->type) == 0) + break; + if(sdifc[i] == nil) + error("sd type not found"); + if(p) + *(p-1) = '/'; + + if(sdifc[i]->probe == nil) + error("sd type cannot probe"); + + sdev = sdifc[i]->probe(cf); + for(s=sdev; s; s=s->next) + s->idno = *spec; + sdadddevs(sdev); + return 0; +} + +static int +unconfigure(char* spec) +{ + int i; + SDev *sdev; + SDunit *unit; + + if((i = sdindex(*spec)) < 0) + error(Enonexist); + + qlock(&devslock); + if((sdev = devs[i]) == nil){ + qunlock(&devslock); + error(Enonexist); + } + if(sdev->r.ref){ + qunlock(&devslock); + error(Einuse); + } + devs[i] = nil; + qunlock(&devslock); + + /* make sure no interrupts arrive anymore before removing resources */ + if(sdev->enabled && sdev->ifc->disable) + sdev->ifc->disable(sdev); + + for(i = 0; i != sdev->nunit; i++){ + if(unit = sdev->unit[i]){ + free(unit->name); + free(unit->user); + free(unit); + } + } + + if(sdev->ifc->clear) + sdev->ifc->clear(sdev); + free(sdev); + return 0; +} + +static int +sdconfig(int on, char* spec, DevConf* cf) +{ + if(on) + return configure(spec, cf); + return unconfigure(spec); +} + +Dev sddevtab = { + 'S', + "sd", + + sdreset, + devinit, + devshutdown, + sdattach, + sdwalk, + sdstat, + sdopen, + devcreate, + sdclose, + sdread, + devbread, + sdwrite, + devbwrite, + devremove, + sdwstat, + devpower, + sdconfig, /* probe; only called for pcmcia-like devices */ +}; + +/* + * This is wrong for so many reasons. This code must go. + */ +typedef struct Confdata Confdata; +struct Confdata { + int on; + char* spec; + DevConf cf; +}; + +static void +parseswitch(Confdata* cd, char* option) +{ + if(!strcmp("on", option)) + cd->on = 1; + else if(!strcmp("off", option)) + cd->on = 0; + else + error(Ebadarg); +} + +static void +parsespec(Confdata* cd, char* option) +{ + if(strlen(option) > 1) + error(Ebadarg); + cd->spec = option; +} + +static Devport* +getnewport(DevConf* dc) +{ + Devport *p; + + p = (Devport *)malloc((dc->nports + 1) * sizeof(Devport)); + if(p == nil) + error(Enomem); + if(dc->nports > 0){ + memmove(p, dc->ports, dc->nports * sizeof(Devport)); + free(dc->ports); + } + dc->ports = p; + p = &dc->ports[dc->nports++]; + p->size = -1; + p->port = (ulong)-1; + return p; +} + +static void +parseport(Confdata* cd, char* option) +{ + char *e; + Devport *p; + + if(cd->cf.nports == 0 || cd->cf.ports[cd->cf.nports-1].port != (ulong)-1) + p = getnewport(&cd->cf); + else + p = &cd->cf.ports[cd->cf.nports-1]; + p->port = strtol(option, &e, 0); + if(e == nil || *e != '\0') + error(Ebadarg); +} + +static void +parsesize(Confdata* cd, char* option) +{ + char *e; + Devport *p; + + if(cd->cf.nports == 0 || cd->cf.ports[cd->cf.nports-1].size != -1) + p = getnewport(&cd->cf); + else + p = &cd->cf.ports[cd->cf.nports-1]; + p->size = (int)strtol(option, &e, 0); + if(e == nil || *e != '\0') + error(Ebadarg); +} + +static void +parseirq(Confdata* cd, char* option) +{ + char *e; + + cd->cf.intnum = strtoul(option, &e, 0); + if(e == nil || *e != '\0') + error(Ebadarg); +} + +static void +parsetype(Confdata* cd, char* option) +{ + cd->cf.type = option; +} + +static struct { + char *name; + void (*parse)(Confdata*, char*); +} options[] = { + "switch", parseswitch, + "spec", parsespec, + "port", parseport, + "size", parsesize, + "irq", parseirq, + "type", parsetype, +}; + +static void +legacytopctl(Cmdbuf *cb) +{ + char *opt; + int i, j; + Confdata cd; + + memset(&cd, 0, sizeof cd); + cd.on = -1; + for(i=0; inf; i+=2){ + if(i+2 > cb->nf) + error(Ebadarg); + opt = cb->f[i]; + for(j=0; jf[i+1]); + break; + } + if(j == nelem(options)) + error(Ebadarg); + } + /* this has been rewritten to accomodate sdaoe */ + if(cd.on < 0 || cd.spec == 0) + error(Ebadarg); + if(cd.on && cd.cf.type == nil) + error(Ebadarg); + sdconfig(cd.on, cd.spec, &cd.cf); +} diff -Nru /sys/src/9k/port/devsrv.c /sys/src/9k/port/devsrv.c --- /sys/src/9k/port/devsrv.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devsrv.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,360 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + + +typedef struct Srv Srv; +struct Srv +{ + char *name; + char *owner; + ulong perm; + Chan *chan; + Srv *link; + ulong path; +}; + +static QLock srvlk; +static Srv *srv; +static int qidpath; + +static int +srvgen(Chan *c, char*, Dirtab*, int, int s, Dir *dp) +{ + Srv *sp; + Qid q; + + if(s == DEVDOTDOT){ + devdir(c, c->qid, "#s", 0, eve, 0555, dp); + return 1; + } + + qlock(&srvlk); + for(sp = srv; sp && s; sp = sp->link) + s--; + + if(sp == 0) { + qunlock(&srvlk); + return -1; + } + + mkqid(&q, sp->path, 0, QTFILE); + /* make sure name string continues to exist after we release lock */ + kstrcpy(up->genbuf, sp->name, sizeof up->genbuf); + devdir(c, q, up->genbuf, 0, sp->owner, sp->perm, dp); + qunlock(&srvlk); + return 1; +} + +static void +srvinit(void) +{ + qidpath = 1; +} + +static Chan* +srvattach(char *spec) +{ + return devattach('s', spec); +} + +static Walkqid* +srvwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, 0, 0, srvgen); +} + +static Srv* +srvlookup(char *name, ulong qidpath) +{ + Srv *sp; + for(sp = srv; sp; sp = sp->link) + if(sp->path == qidpath || (name && strcmp(sp->name, name) == 0)) + return sp; + return nil; +} + +static long +srvstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, 0, 0, srvgen); +} + +char* +srvname(Chan *c) +{ + Srv *sp; + char *s; + + for(sp = srv; sp; sp = sp->link) + if(sp->chan == c){ + s = smalloc(3+strlen(sp->name)+1); + sprint(s, "#s/%s", sp->name); + return s; + } + return nil; +} + +static Chan* +srvopen(Chan *c, int omode) +{ + Srv *sp; + + if(c->qid.type == QTDIR){ + if(omode & ORCLOSE) + error(Eperm); + if(omode != OREAD) + error(Eisdir); + c->mode = omode; + c->flag |= COPEN; + c->offset = 0; + return c; + } + qlock(&srvlk); + if(waserror()){ + qunlock(&srvlk); + nexterror(); + } + + sp = srvlookup(nil, c->qid.path); + if(sp == 0 || sp->chan == 0) + error(Eshutdown); + + if(omode&OTRUNC) + error("srv file already exists"); + if(openmode(omode)!=sp->chan->mode && sp->chan->mode!=ORDWR) + error(Eperm); + devpermcheck(sp->owner, sp->perm, omode); + + cclose(c); + c = sp->chan; + incref(c); + c->offset = 0; + qunlock(&srvlk); + poperror(); + return c; +} + +static void +srvcreate(Chan *c, char *name, int omode, int perm) +{ + Srv *sp; + char *sname; + + if(openmode(omode) != OWRITE) + error(Eperm); + + if(omode & OCEXEC) /* can't happen */ + panic("someone broke namec"); + + sp = smalloc(sizeof *sp); + sname = smalloc(strlen(name)+1); + + qlock(&srvlk); + if(waserror()){ + free(sname); + free(sp); + qunlock(&srvlk); + nexterror(); + } + if(sp == nil || sname == nil) + error(Enomem); + if(srvlookup(name, -1)) + error(Eexist); + + sp->path = qidpath++; + sp->link = srv; + strcpy(sname, name); + sp->name = sname; + c->qid.type = QTFILE; + c->qid.path = sp->path; + srv = sp; + qunlock(&srvlk); + poperror(); + + kstrdup(&sp->owner, up->user); + sp->perm = perm&0777; + + c->flag |= COPEN; + c->mode = OWRITE; +} + +static void +srvremove(Chan *c) +{ + Srv *sp, **l; + + if(c->qid.type == QTDIR) + error(Eperm); + + qlock(&srvlk); + if(waserror()){ + qunlock(&srvlk); + nexterror(); + } + l = &srv; + for(sp = *l; sp; sp = sp->link) { + if(sp->path == c->qid.path) + break; + + l = &sp->link; + } + if(sp == 0) + error(Enonexist); + + /* + * Only eve can remove system services. + * No one can remove #s/boot. + */ + if(strcmp(sp->owner, eve) == 0 && !iseve()) + error(Eperm); + if(strcmp(sp->name, "boot") == 0) + error(Eperm); + + /* + * No removing personal services. + */ + if((sp->perm&7) != 7 && strcmp(sp->owner, up->user) && !iseve()) + error(Eperm); + + *l = sp->link; + qunlock(&srvlk); + poperror(); + + if(sp->chan) + cclose(sp->chan); + free(sp->owner); + free(sp->name); + free(sp); +} + +static long +srvwstat(Chan *c, uchar *dp, long n) +{ + Dir d; + Srv *sp; + char *strs; + + if(c->qid.type & QTDIR) + error(Eperm); + + strs = nil; + qlock(&srvlk); + if(waserror()){ + qunlock(&srvlk); + free(strs); + nexterror(); + } + + sp = srvlookup(nil, c->qid.path); + if(sp == 0) + error(Enonexist); + + if(strcmp(sp->owner, up->user) != 0 && !iseve()) + error(Eperm); + + strs = smalloc(n); + n = convM2D(dp, n, &d, strs); + if(n == 0) + error(Eshortstat); + if(d.mode != ~0UL) + sp->perm = d.mode & 0777; + if(d.uid && *d.uid) + kstrdup(&sp->owner, d.uid); + if(d.name && *d.name && strcmp(sp->name, d.name) != 0) { + if(strchr(d.name, '/') != nil) + error(Ebadchar); + kstrdup(&sp->name, d.name); + } + + qunlock(&srvlk); + free(strs); + poperror(); + return n; +} + +static void +srvclose(Chan *c) +{ + /* + * in theory we need to override any changes in removability + * since open, but since all that's checked is the owner, + * which is immutable, all is well. + */ + if(c->flag & CRCLOSE){ + if(waserror()) + return; + srvremove(c); + poperror(); + } +} + +static long +srvread(Chan *c, void *va, long n, vlong) +{ + isdir(c); + return devdirread(c, va, n, 0, 0, srvgen); +} + +static long +srvwrite(Chan *c, void *va, long n, vlong) +{ + Srv *sp; + Chan *c1; + int fd; + char buf[32]; + + if(n >= sizeof buf) + error(Egreg); + memmove(buf, va, n); /* so we can NUL-terminate */ + buf[n] = 0; + fd = strtoul(buf, 0, 0); + + c1 = fdtochan(fd, -1, 0, 1); /* error check and inc ref */ + + qlock(&srvlk); + if(waserror()) { + qunlock(&srvlk); + cclose(c1); + nexterror(); + } + if(c1->flag & (CCEXEC|CRCLOSE)) + error("posted fd has remove-on-close or close-on-exec"); + if(c1->qid.type & QTAUTH) + error("cannot post auth file in srv"); + sp = srvlookup(nil, c->qid.path); + if(sp == 0) + error(Enonexist); + + if(sp->chan) + error(Ebadusefd); + + sp->chan = c1; + qunlock(&srvlk); + poperror(); + return n; +} + +Dev srvdevtab = { + 's', + "srv", + + devreset, + srvinit, + devshutdown, + srvattach, + srvwalk, + srvstat, + srvopen, + srvcreate, + srvclose, + srvread, + devbread, + srvwrite, + devbwrite, + srvremove, + srvwstat, +}; diff -Nru /sys/src/9k/port/devssl.c /sys/src/9k/port/devssl.c --- /sys/src/9k/port/devssl.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devssl.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1502 @@ +/* + * devssl - secure sockets layer + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include + +#define NOSPOOKS 1 + +typedef struct OneWay OneWay; +struct OneWay +{ + QLock q; + QLock ctlq; + + void *state; /* encryption state */ + int slen; /* hash data length */ + uchar *secret; /* secret */ + ulong mid; /* message id */ +}; + +enum +{ + /* connection states */ + Sincomplete= 0, + Sclear= 1, + Sencrypting= 2, + Sdigesting= 4, + Sdigenc= Sencrypting|Sdigesting, + + /* encryption algorithms */ + Noencryption= 0, + DESCBC= 1, + DESECB= 2, + RC4= 3 +}; + +typedef struct Dstate Dstate; +struct Dstate +{ + Chan *c; /* io channel */ + uchar state; /* state of connection */ + int ref; /* serialized by dslock for atomic destroy */ + + uchar encryptalg; /* encryption algorithm */ + ushort blocklen; /* blocking length */ + + ushort diglen; /* length of digest */ + DigestState *(*hf)(uchar*, ulong, uchar*, DigestState*); /* hash func */ + + /* for SSL format */ + int max; /* maximum unpadded data per msg */ + int maxpad; /* maximum padded data per msg */ + + /* input side */ + OneWay in; + Block *processed; + Block *unprocessed; + + /* output side */ + OneWay out; + + /* protections */ + char *user; + int perm; +}; + +enum +{ + Maxdmsg= 1<<16, + Maxdstate= 256, /* must be a power of 2 */ +}; + +Lock dslock; +int dshiwat; +Dstate *dstate[Maxdstate]; +char *encalgs; +char *hashalgs; + +enum{ + Qtopdir = 1, /* top level directory */ + Qprotodir, + Qclonus, + Qconvdir, /* directory for a conversation */ + Qdata, + Qctl, + Qsecretin, + Qsecretout, + Qencalgs, + Qhashalgs, +}; + +#define TYPE(x) ((x).path & 0xf) +#define CONV(x) (((x).path >> 5)&(Maxdstate-1)) +#define QID(c, y) (((c)<<5) | (y)) + +static void ensure(Dstate*, Block**, int); +static void consume(Block**, uchar*, int); +static void setsecret(OneWay*, uchar*, int); +static Block* encryptb(Dstate*, Block*, int); +static Block* decryptb(Dstate*, Block*); +static Block* digestb(Dstate*, Block*, int); +static void checkdigestb(Dstate*, Block*); +static Chan* buftochan(char*); +static void sslhangup(Dstate*); +static Dstate* dsclone(Chan *c); +static void dsnew(Chan *c, Dstate **); +static long sslput(Dstate *s, Block * volatile b); + +char *sslnames[] = { +[Qclonus] "clone", +[Qdata] "data", +[Qctl] "ctl", +[Qsecretin] "secretin", +[Qsecretout] "secretout", +[Qencalgs] "encalgs", +[Qhashalgs] "hashalgs", +}; + +static int +sslgen(Chan *c, char*, Dirtab *d, int nd, int s, Dir *dp) +{ + Qid q; + Dstate *ds; + char *p, *nm; + int ft; + + USED(nd); + USED(d); + + q.type = QTFILE; + q.vers = 0; + + ft = TYPE(c->qid); + switch(ft) { + case Qtopdir: + if(s == DEVDOTDOT){ + q.path = QID(0, Qtopdir); + q.type = QTDIR; + devdir(c, q, "#D", 0, eve, 0555, dp); + return 1; + } + if(s > 0) + return -1; + q.path = QID(0, Qprotodir); + q.type = QTDIR; + devdir(c, q, "ssl", 0, eve, 0555, dp); + return 1; + case Qprotodir: + if(s == DEVDOTDOT){ + q.path = QID(0, Qtopdir); + q.type = QTDIR; + devdir(c, q, ".", 0, eve, 0555, dp); + return 1; + } + if(s < dshiwat) { + q.path = QID(s, Qconvdir); + q.type = QTDIR; + ds = dstate[s]; + if(ds != 0) + nm = ds->user; + else + nm = eve; + snprint(up->genbuf, sizeof(up->genbuf), "%d", s); + devdir(c, q, up->genbuf, 0, nm, 0555, dp); + return 1; + } + if(s > dshiwat) + return -1; + q.path = QID(0, Qclonus); + devdir(c, q, "clone", 0, eve, 0555, dp); + return 1; + case Qconvdir: + if(s == DEVDOTDOT){ + q.path = QID(0, Qprotodir); + q.type = QTDIR; + devdir(c, q, "ssl", 0, eve, 0555, dp); + return 1; + } + ds = dstate[CONV(c->qid)]; + if(ds != 0) + nm = ds->user; + else + nm = eve; + switch(s) { + default: + return -1; + case 0: + q.path = QID(CONV(c->qid), Qctl); + p = "ctl"; + break; + case 1: + q.path = QID(CONV(c->qid), Qdata); + p = "data"; + break; + case 2: + q.path = QID(CONV(c->qid), Qsecretin); + p = "secretin"; + break; + case 3: + q.path = QID(CONV(c->qid), Qsecretout); + p = "secretout"; + break; + case 4: + q.path = QID(CONV(c->qid), Qencalgs); + p = "encalgs"; + break; + case 5: + q.path = QID(CONV(c->qid), Qhashalgs); + p = "hashalgs"; + break; + } + devdir(c, q, p, 0, nm, 0660, dp); + return 1; + case Qclonus: + devdir(c, c->qid, sslnames[TYPE(c->qid)], 0, eve, 0555, dp); + return 1; + default: + ds = dstate[CONV(c->qid)]; + if(ds != 0) + nm = ds->user; + else + nm = eve; + devdir(c, c->qid, sslnames[TYPE(c->qid)], 0, nm, 0660, dp); + return 1; + } +} + +static Chan* +sslattach(char *spec) +{ + Chan *c; + + c = devattach('D', spec); + c->qid.path = QID(0, Qtopdir); + c->qid.vers = 0; + c->qid.type = QTDIR; + return c; +} + +static Walkqid* +sslwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, nil, 0, sslgen); +} + +static long +sslstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, nil, 0, sslgen); +} + +static Chan* +sslopen(Chan *c, int omode) +{ + Dstate *s, **pp; + int perm; + int ft; + + perm = 0; + omode &= 3; + switch(omode) { + case OREAD: + perm = 4; + break; + case OWRITE: + perm = 2; + break; + case ORDWR: + perm = 6; + break; + } + + ft = TYPE(c->qid); + switch(ft) { + default: + panic("sslopen"); + case Qtopdir: + case Qprotodir: + case Qconvdir: + if(omode != OREAD) + error(Eperm); + break; + case Qclonus: + s = dsclone(c); + if(s == 0) + error(Enodev); + break; + case Qctl: + case Qdata: + case Qsecretin: + case Qsecretout: + if(waserror()) { + unlock(&dslock); + nexterror(); + } + lock(&dslock); + pp = &dstate[CONV(c->qid)]; + s = *pp; + if(s == 0) + dsnew(c, pp); + else { + if((perm & (s->perm>>6)) != perm + && (strcmp(up->user, s->user) != 0 + || (perm & s->perm) != perm)) + error(Eperm); + + s->ref++; + } + unlock(&dslock); + poperror(); + break; + case Qencalgs: + case Qhashalgs: + if(omode != OREAD) + error(Eperm); + break; + } + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + return c; +} + +static long +sslwstat(Chan *c, uchar *db, long n) +{ + Dir *dir; + Dstate *s; + int l; + + s = dstate[CONV(c->qid)]; + if(s == 0) + error(Ebadusefd); + if(strcmp(s->user, up->user) != 0) + error(Eperm); + + dir = smalloc(sizeof(Dir)+n); + l = convM2D(db, n, &dir[0], (char*)&dir[1]); + if(l == 0){ + free(dir); + error(Eshortstat); + } + + if(!emptystr(dir->uid)) + kstrdup(&s->user, dir->uid); + if(dir->mode != ~0UL) + s->perm = dir->mode; + + free(dir); + return l; +} + +static void +sslclose(Chan *c) +{ + Dstate *s; + int ft; + + ft = TYPE(c->qid); + switch(ft) { + case Qctl: + case Qdata: + case Qsecretin: + case Qsecretout: + if((c->flag & COPEN) == 0) + break; + + s = dstate[CONV(c->qid)]; + if(s == 0) + break; + + lock(&dslock); + if(--s->ref > 0) { + unlock(&dslock); + break; + } + dstate[CONV(c->qid)] = 0; + unlock(&dslock); + + if(s->user != nil) + free(s->user); + sslhangup(s); + if(s->c) + cclose(s->c); + if(s->in.secret) + free(s->in.secret); + if(s->out.secret) + free(s->out.secret); + if(s->in.state) + free(s->in.state); + if(s->out.state) + free(s->out.state); + free(s); + + } +} + +/* + * make sure we have at least 'n' bytes in list 'l' + */ +static void +ensure(Dstate *s, Block **l, int n) +{ + int sofar, i; + Block *b, *bl; + + sofar = 0; + for(b = *l; b; b = b->next){ + sofar += BLEN(b); + if(sofar >= n) + return; + l = &b->next; + } + + while(sofar < n){ + bl = s->c->dev->bread(s->c, Maxdmsg, 0); + if(bl == 0) + nexterror(); + *l = bl; + i = 0; + for(b = bl; b; b = b->next){ + i += BLEN(b); + l = &b->next; + } + if(i == 0) + error(Ehungup); + sofar += i; + } +} + +/* + * copy 'n' bytes from 'l' into 'p' and free + * the bytes in 'l' + */ +static void +consume(Block **l, uchar *p, int n) +{ + Block *b; + int i; + + for(; *l && n > 0; n -= i){ + b = *l; + i = BLEN(b); + if(i > n) + i = n; + memmove(p, b->rp, i); + b->rp += i; + p += i; + if(BLEN(b) < 0) + panic("consume"); + if(BLEN(b)) + break; + *l = b->next; + freeb(b); + } +} + +/* + * give back n bytes +static void +regurgitate(Dstate *s, uchar *p, int n) +{ + Block *b; + + if(n <= 0) + return; + b = s->unprocessed; + if(s->unprocessed == nil || b->rp - b->base < n) { + b = allocb(n); + memmove(b->wp, p, n); + b->wp += n; + b->next = s->unprocessed; + s->unprocessed = b; + } else { + b->rp -= n; + memmove(b->rp, p, n); + } +} + */ + +/* + * remove at most n bytes from the queue, if discard is set + * dump the remainder + */ +static Block* +qtake(Block **l, int n, int discard) +{ + Block *nb, *b, *first; + int i; + + first = *l; + for(b = first; b; b = b->next){ + i = BLEN(b); + if(i == n){ + if(discard){ + freeblist(b->next); + *l = 0; + } else + *l = b->next; + b->next = 0; + return first; + } else if(i > n){ + i -= n; + if(discard){ + freeblist(b->next); + b->wp -= i; + *l = 0; + } else { + nb = allocb(i); + memmove(nb->wp, b->rp+n, i); + nb->wp += i; + b->wp -= i; + nb->next = b->next; + *l = nb; + } + b->next = 0; + if(BLEN(b) < 0) + panic("qtake"); + return first; + } else + n -= i; + if(BLEN(b) < 0) + panic("qtake"); + } + *l = 0; + return first; +} + +/* + * We can't let Eintr's lose data since the program + * doing the read may be able to handle it. The only + * places Eintr is possible is during the read's in consume. + * Therefore, we make sure we can always put back the bytes + * consumed before the last ensure. + */ +static Block* +sslbread(Chan *c, long n, vlong) +{ + Dstate * volatile s; + Block *b; + uchar consumed[3], *p; + int toconsume; + int len, pad; + + s = dstate[CONV(c->qid)]; + if(s == 0) + panic("sslbread"); + if(s->state == Sincomplete) + error(Ebadusefd); + + qlock(&s->in.q); + if(waserror()){ + qunlock(&s->in.q); + nexterror(); + } + + if(s->processed == 0){ + /* + * Read in the whole message. Until we've got it all, + * it stays on s->unprocessed, so that if we get Eintr, + * we'll pick up where we left off. + */ + ensure(s, &s->unprocessed, 3); + s->unprocessed = pullupblock(s->unprocessed, 2); + p = s->unprocessed->rp; + if(p[0] & 0x80){ + len = ((p[0] & 0x7f)<<8) | p[1]; + ensure(s, &s->unprocessed, len); + pad = 0; + toconsume = 2; + } else { + s->unprocessed = pullupblock(s->unprocessed, 3); + len = ((p[0] & 0x3f)<<8) | p[1]; + pad = p[2]; + if(pad > len){ + print("pad %d buf len %d\n", pad, len); + error("bad pad in ssl message"); + } + toconsume = 3; + } + ensure(s, &s->unprocessed, toconsume+len); + + /* skip header */ + consume(&s->unprocessed, consumed, toconsume); + + /* grab the next message and decode/decrypt it */ + b = qtake(&s->unprocessed, len, 0); + + if(blocklen(b) != len) + print("devssl: sslbread got wrong count %d != %d", blocklen(b), len); + + if(waserror()){ + qunlock(&s->in.ctlq); + if(b != nil) + freeb(b); + nexterror(); + } + qlock(&s->in.ctlq); + switch(s->state){ + case Sencrypting: + if(b == nil) + error("ssl message too short (encrypting)"); + b = decryptb(s, b); + break; + case Sdigesting: + b = pullupblock(b, s->diglen); + if(b == nil) + error("ssl message too short (digesting)"); + checkdigestb(s, b); + pullblock(&b, s->diglen); + len -= s->diglen; + break; + case Sdigenc: + b = decryptb(s, b); + b = pullupblock(b, s->diglen); + if(b == nil) + error("ssl message too short (dig+enc)"); + checkdigestb(s, b); + pullblock(&b, s->diglen); + len -= s->diglen; + break; + } + + /* remove pad */ + if(pad) + s->processed = qtake(&b, len - pad, 1); + else + s->processed = b; + b = nil; + s->in.mid++; + qunlock(&s->in.ctlq); + poperror(); + } + + /* return at most what was asked for */ + b = qtake(&s->processed, n, 0); + + qunlock(&s->in.q); + poperror(); + + return b; +} + +static long +sslread(Chan *c, void *a, long n, vlong off) +{ + Block * volatile b; + Block *nb; + uchar *va; + int i; + char buf[128]; + long offset; + int ft; + + if(c->qid.type & QTDIR) + return devdirread(c, a, n, 0, 0, sslgen); + + ft = TYPE(c->qid); + offset = off; + switch(ft) { + default: + error(Ebadusefd); + case Qctl: + ft = CONV(c->qid); + sprint(buf, "%d", ft); + return readstr(offset, a, n, buf); + case Qdata: + b = sslbread(c, n, offset); + break; + case Qencalgs: + return readstr(offset, a, n, encalgs); + break; + case Qhashalgs: + return readstr(offset, a, n, hashalgs); + break; + } + + if(waserror()){ + freeblist(b); + nexterror(); + } + + n = 0; + va = a; + for(nb = b; nb; nb = nb->next){ + i = BLEN(nb); + memmove(va+n, nb->rp, i); + n += i; + } + + freeblist(b); + poperror(); + + return n; +} + +/* + * this algorithm doesn't have to be great since we're just + * trying to obscure the block fill + */ +static void +randfill(uchar *buf, int len) +{ + while(len-- > 0) + *buf++ = nrand(256); +} + +static long +sslbwrite(Chan *c, Block *b, vlong) +{ + Dstate * volatile s; + long rv; + + s = dstate[CONV(c->qid)]; + if(s == nil) + panic("sslbwrite"); + + if(s->state == Sincomplete){ + freeb(b); + error(Ebadusefd); + } + + /* lock so split writes won't interleave */ + if(waserror()){ + qunlock(&s->out.q); + nexterror(); + } + qlock(&s->out.q); + + rv = sslput(s, b); + + poperror(); + qunlock(&s->out.q); + + return rv; +} + +/* + * use SSL record format, add in count, digest and/or encrypt. + * the write is interruptable. if it is interrupted, we'll + * get out of sync with the far side. not much we can do about + * it since we don't know if any bytes have been written. + */ +static long +sslput(Dstate *s, Block * volatile b) +{ + Block *nb; + int h, n, l, pad, rv; + uchar *p; + int offset; + + if(waserror()){ + if(b != nil) + free(b); + nexterror(); + } + + rv = 0; + while(b != nil){ + l = n = BLEN(b); + h = s->diglen + 2; + + /* trim to maximum block size */ + pad = 0; + if(l > s->max){ + l = s->max; + } else if(s->blocklen != 1){ + pad = (l + s->diglen)%s->blocklen; + if(pad){ + if(l > s->maxpad){ + pad = 0; + l = s->maxpad; + } else { + pad = s->blocklen - pad; + h++; + } + } + } + + rv += l; + if(l != n){ + nb = allocb(l + h + pad); + memmove(nb->wp + h, b->rp, l); + nb->wp += l + h; + b->rp += l; + } else { + /* add header space */ + nb = padblock(b, h); + b = 0; + } + l += s->diglen; + + /* SSL style count */ + if(pad){ + nb = padblock(nb, -pad); + randfill(nb->wp, pad); + nb->wp += pad; + l += pad; + + p = nb->rp; + p[0] = (l>>8); + p[1] = l; + p[2] = pad; + offset = 3; + } else { + p = nb->rp; + p[0] = (l>>8) | 0x80; + p[1] = l; + offset = 2; + } + + switch(s->state){ + case Sencrypting: + nb = encryptb(s, nb, offset); + break; + case Sdigesting: + nb = digestb(s, nb, offset); + break; + case Sdigenc: + nb = digestb(s, nb, offset); + nb = encryptb(s, nb, offset); + break; + } + + s->out.mid++; + + l = BLEN(nb); + s->c->dev->bwrite(s->c, nb, s->c->offset); + s->c->offset += l; + } + + poperror(); + return rv; +} + +static void +setsecret(OneWay *w, uchar *secret, int n) +{ + if(w->secret) + free(w->secret); + + w->secret = smalloc(n); + memmove(w->secret, secret, n); + w->slen = n; +} + +static void +initDESkey(OneWay *w) +{ + if(w->state){ + free(w->state); + w->state = 0; + } + + w->state = smalloc(sizeof(DESstate)); + if(w->slen >= 16) + setupDESstate(w->state, w->secret, w->secret+8); + else if(w->slen >= 8) + setupDESstate(w->state, w->secret, 0); + else + error("secret too short"); +} + +/* + * 40 bit DES is the same as 56 bit DES. However, + * 16 bits of the key are masked to zero. + */ +static void +initDESkey_40(OneWay *w) +{ + uchar key[8]; + + if(w->state){ + free(w->state); + w->state = 0; + } + + if(w->slen >= 8){ + memmove(key, w->secret, 8); + key[0] &= 0x0f; + key[2] &= 0x0f; + key[4] &= 0x0f; + key[6] &= 0x0f; + } + + w->state = malloc(sizeof(DESstate)); + if(w->slen >= 16) + setupDESstate(w->state, key, w->secret+8); + else if(w->slen >= 8) + setupDESstate(w->state, key, 0); + else + error("secret too short"); +} + +static void +initRC4key(OneWay *w) +{ + if(w->state){ + free(w->state); + w->state = 0; + } + + w->state = smalloc(sizeof(RC4state)); + setupRC4state(w->state, w->secret, w->slen); +} + +/* + * 40 bit RC4 is the same as n-bit RC4. However, + * we ignore all but the first 40 bits of the key. + */ +static void +initRC4key_40(OneWay *w) +{ + if(w->state){ + free(w->state); + w->state = 0; + } + + if(w->slen > 5) + w->slen = 5; + + w->state = malloc(sizeof(RC4state)); + setupRC4state(w->state, w->secret, w->slen); +} + +/* + * 128 bit RC4 is the same as n-bit RC4. However, + * we ignore all but the first 128 bits of the key. + */ +static void +initRC4key_128(OneWay *w) +{ + if(w->state){ + free(w->state); + w->state = 0; + } + + if(w->slen > 16) + w->slen = 16; + + w->state = malloc(sizeof(RC4state)); + setupRC4state(w->state, w->secret, w->slen); +} + + +typedef struct Hashalg Hashalg; +struct Hashalg +{ + char *name; + int diglen; + DigestState *(*hf)(uchar*, ulong, uchar*, DigestState*); +}; + +Hashalg hashtab[] = +{ + { "md4", MD4dlen, md4, }, + { "md5", MD5dlen, md5, }, + { "sha1", SHA1dlen, sha1, }, + { "sha", SHA1dlen, sha1, }, + { 0 } +}; + +static int +parsehashalg(char *p, Dstate *s) +{ + Hashalg *ha; + + for(ha = hashtab; ha->name; ha++){ + if(strcmp(p, ha->name) == 0){ + s->hf = ha->hf; + s->diglen = ha->diglen; + s->state &= ~Sclear; + s->state |= Sdigesting; + return 0; + } + } + return -1; +} + +typedef struct Encalg Encalg; +struct Encalg +{ + char *name; + int blocklen; + int alg; + void (*keyinit)(OneWay*); +}; + +#ifdef NOSPOOKS +Encalg encrypttab[] = +{ + { "descbc", 8, DESCBC, initDESkey, }, /* DEPRECATED -- use des_56_cbc */ + { "desecb", 8, DESECB, initDESkey, }, /* DEPRECATED -- use des_56_ecb */ + { "des_56_cbc", 8, DESCBC, initDESkey, }, + { "des_56_ecb", 8, DESECB, initDESkey, }, + { "des_40_cbc", 8, DESCBC, initDESkey_40, }, + { "des_40_ecb", 8, DESECB, initDESkey_40, }, + { "rc4", 1, RC4, initRC4key_40, }, /* DEPRECATED -- use rc4_X */ + { "rc4_256", 1, RC4, initRC4key, }, + { "rc4_128", 1, RC4, initRC4key_128, }, + { "rc4_40", 1, RC4, initRC4key_40, }, + { 0 } +}; +#else +Encalg encrypttab[] = +{ + { "des_40_cbc", 8, DESCBC, initDESkey_40, }, + { "des_40_ecb", 8, DESECB, initDESkey_40, }, + { "rc4", 1, RC4, initRC4key_40, }, /* DEPRECATED -- use rc4_X */ + { "rc4_40", 1, RC4, initRC4key_40, }, + { 0 } +}; +#endif NOSPOOKS + +static int +parseencryptalg(char *p, Dstate *s) +{ + Encalg *ea; + + for(ea = encrypttab; ea->name; ea++){ + if(strcmp(p, ea->name) == 0){ + s->encryptalg = ea->alg; + s->blocklen = ea->blocklen; + (*ea->keyinit)(&s->in); + (*ea->keyinit)(&s->out); + s->state &= ~Sclear; + s->state |= Sencrypting; + return 0; + } + } + return -1; +} + +static long +sslwrite(Chan *c, void *a, long n, vlong) +{ + Dstate * volatile s; + Block * volatile b; + int l, t; + char *p, *np, *e, buf[128]; + uchar *x; + + s = dstate[CONV(c->qid)]; + if(s == 0) + panic("sslwrite"); + + t = TYPE(c->qid); + if(t == Qdata){ + if(s->state == Sincomplete) + error(Ebadusefd); + + /* lock should a write gets split over multiple records */ + if(waserror()){ + qunlock(&s->out.q); + nexterror(); + } + qlock(&s->out.q); + + p = a; + e = p + n; + do { + l = e - p; + if(l > s->max) + l = s->max; + + b = allocb(l); + if(waserror()){ + freeb(b); + nexterror(); + } + memmove(b->wp, p, l); + poperror(); + b->wp += l; + + sslput(s, b); + + p += l; + } while(p < e); + + poperror(); + qunlock(&s->out.q); + return n; + } + + /* mutex with operations using what we're about to change */ + if(waserror()){ + qunlock(&s->in.ctlq); + qunlock(&s->out.q); + nexterror(); + } + qlock(&s->in.ctlq); + qlock(&s->out.q); + + switch(t){ + default: + panic("sslwrite"); + case Qsecretin: + setsecret(&s->in, a, n); + goto out; + case Qsecretout: + setsecret(&s->out, a, n); + goto out; + case Qctl: + break; + } + + if(n >= sizeof(buf)) + error("arg too long"); + strncpy(buf, a, n); + buf[n] = 0; + p = strchr(buf, '\n'); + if(p) + *p = 0; + p = strchr(buf, ' '); + if(p) + *p++ = 0; + + if(strcmp(buf, "fd") == 0){ + s->c = buftochan(p); + + /* default is clear (msg delimiters only) */ + s->state = Sclear; + s->blocklen = 1; + s->diglen = 0; + s->maxpad = s->max = (1<<15) - s->diglen - 1; + s->in.mid = 0; + s->out.mid = 0; + } else if(strcmp(buf, "alg") == 0 && p != 0){ + s->blocklen = 1; + s->diglen = 0; + + if(s->c == 0) + error("must set fd before algorithm"); + + s->state = Sclear; + s->maxpad = s->max = (1<<15) - s->diglen - 1; + if(strcmp(p, "clear") == 0){ + goto out; + } + + if(s->in.secret && s->out.secret == 0) + setsecret(&s->out, s->in.secret, s->in.slen); + if(s->out.secret && s->in.secret == 0) + setsecret(&s->in, s->out.secret, s->out.slen); + if(s->in.secret == 0 || s->out.secret == 0) + error("algorithm but no secret"); + + s->hf = 0; + s->encryptalg = Noencryption; + s->blocklen = 1; + + for(;;){ + np = strchr(p, ' '); + if(np) + *np++ = 0; + + if(parsehashalg(p, s) < 0) + if(parseencryptalg(p, s) < 0) + error("bad algorithm"); + + if(np == 0) + break; + p = np; + } + + if(s->hf == 0 && s->encryptalg == Noencryption) + error("bad algorithm"); + + if(s->blocklen != 1){ + s->max = (1<<15) - s->diglen - 1; + s->max -= s->max % s->blocklen; + s->maxpad = (1<<14) - s->diglen - 1; + s->maxpad -= s->maxpad % s->blocklen; + } else + s->maxpad = s->max = (1<<15) - s->diglen - 1; + } else if(strcmp(buf, "secretin") == 0 && p != 0) { + l = (strlen(p)*3)/2; + x = smalloc(l); + t = dec64(x, l, p, strlen(p)); + setsecret(&s->in, x, t); + free(x); + } else if(strcmp(buf, "secretout") == 0 && p != 0) { + l = (strlen(p)*3)/2 + 1; + x = smalloc(l); + t = dec64(x, l, p, strlen(p)); + setsecret(&s->out, x, t); + free(x); + } else + error(Ebadarg); + +out: + qunlock(&s->in.ctlq); + qunlock(&s->out.q); + poperror(); + return n; +} + +static void +sslinit(void) +{ + struct Encalg *e; + struct Hashalg *h; + int n; + char *cp; + + n = 1; + for(e = encrypttab; e->name != nil; e++) + n += strlen(e->name) + 1; + cp = encalgs = smalloc(n); + for(e = encrypttab;;){ + strcpy(cp, e->name); + cp += strlen(e->name); + e++; + if(e->name == nil) + break; + *cp++ = ' '; + } + *cp = 0; + + n = 1; + for(h = hashtab; h->name != nil; h++) + n += strlen(h->name) + 1; + cp = hashalgs = smalloc(n); + for(h = hashtab;;){ + strcpy(cp, h->name); + cp += strlen(h->name); + h++; + if(h->name == nil) + break; + *cp++ = ' '; + } + *cp = 0; +} + +Dev ssldevtab = { + 'D', + "ssl", + + devreset, + sslinit, + devshutdown, + sslattach, + sslwalk, + sslstat, + sslopen, + devcreate, + sslclose, + sslread, + sslbread, + sslwrite, + sslbwrite, + devremove, + sslwstat, +}; + +static Block* +encryptb(Dstate *s, Block *b, int offset) +{ + uchar *p, *ep, *p2, *ip, *eip; + DESstate *ds; + + switch(s->encryptalg){ + case DESECB: + ds = s->out.state; + ep = b->rp + BLEN(b); + for(p = b->rp + offset; p < ep; p += 8) + block_cipher(ds->expanded, p, 0); + break; + case DESCBC: + ds = s->out.state; + ep = b->rp + BLEN(b); + for(p = b->rp + offset; p < ep; p += 8){ + p2 = p; + ip = ds->ivec; + for(eip = ip+8; ip < eip; ) + *p2++ ^= *ip++; + block_cipher(ds->expanded, p, 0); + memmove(ds->ivec, p, 8); + } + break; + case RC4: + rc4(s->out.state, b->rp + offset, BLEN(b) - offset); + break; + } + return b; +} + +static Block* +decryptb(Dstate *s, Block *bin) +{ + Block *b, **l; + uchar *p, *ep, *tp, *ip, *eip; + DESstate *ds; + uchar tmp[8]; + int i; + + l = &bin; + for(b = bin; b; b = b->next){ + /* make sure we have a multiple of s->blocklen */ + if(s->blocklen > 1){ + i = BLEN(b); + if(i % s->blocklen){ + *l = b = pullupblock(b, i + s->blocklen - (i%s->blocklen)); + if(b == 0) + error("ssl encrypted message too short"); + } + } + l = &b->next; + + /* decrypt */ + switch(s->encryptalg){ + case DESECB: + ds = s->in.state; + ep = b->rp + BLEN(b); + for(p = b->rp; p < ep; p += 8) + block_cipher(ds->expanded, p, 1); + break; + case DESCBC: + ds = s->in.state; + ep = b->rp + BLEN(b); + for(p = b->rp; p < ep;){ + memmove(tmp, p, 8); + block_cipher(ds->expanded, p, 1); + tp = tmp; + ip = ds->ivec; + for(eip = ip+8; ip < eip; ){ + *p++ ^= *ip; + *ip++ = *tp++; + } + } + break; + case RC4: + rc4(s->in.state, b->rp, BLEN(b)); + break; + } + } + return bin; +} + +static Block* +digestb(Dstate *s, Block *b, int offset) +{ + uchar *p; + DigestState ss; + uchar msgid[4]; + ulong n, h; + OneWay *w; + + w = &s->out; + + memset(&ss, 0, sizeof(ss)); + h = s->diglen + offset; + n = BLEN(b) - h; + + /* hash secret + message */ + (*s->hf)(w->secret, w->slen, 0, &ss); + (*s->hf)(b->rp + h, n, 0, &ss); + + /* hash message id */ + p = msgid; + n = w->mid; + *p++ = n>>24; + *p++ = n>>16; + *p++ = n>>8; + *p = n; + (*s->hf)(msgid, 4, b->rp + offset, &ss); + + return b; +} + +static void +checkdigestb(Dstate *s, Block *bin) +{ + uchar *p; + DigestState ss; + uchar msgid[4]; + int n, h; + OneWay *w; + uchar digest[128]; + Block *b; + + w = &s->in; + + memset(&ss, 0, sizeof(ss)); + + /* hash secret */ + (*s->hf)(w->secret, w->slen, 0, &ss); + + /* hash message */ + h = s->diglen; + for(b = bin; b; b = b->next){ + n = BLEN(b) - h; + if(n < 0) + panic("checkdigestb"); + (*s->hf)(b->rp + h, n, 0, &ss); + h = 0; + } + + /* hash message id */ + p = msgid; + n = w->mid; + *p++ = n>>24; + *p++ = n>>16; + *p++ = n>>8; + *p = n; + (*s->hf)(msgid, 4, digest, &ss); + + if(memcmp(digest, bin->rp, s->diglen) != 0) + error("bad digest"); +} + +/* get channel associated with an fd */ +static Chan* +buftochan(char *p) +{ + Chan *c; + int fd; + + if(p == 0) + error(Ebadarg); + fd = strtoul(p, 0, 0); + if(fd < 0) + error(Ebadarg); + c = fdtochan(fd, -1, 0, 1); /* error check and inc ref */ + if(c->dev == &ssldevtab){ + cclose(c); + error("cannot ssl encrypt devssl files"); + } + return c; +} + +/* hand up a digest connection */ +static void +sslhangup(Dstate *s) +{ + Block *b; + + qlock(&s->in.q); + for(b = s->processed; b; b = s->processed){ + s->processed = b->next; + freeb(b); + } + if(s->unprocessed){ + freeb(s->unprocessed); + s->unprocessed = 0; + } + s->state = Sincomplete; + qunlock(&s->in.q); +} + +static Dstate* +dsclone(Chan *ch) +{ + int i; + Dstate *ret; + + if(waserror()) { + unlock(&dslock); + nexterror(); + } + lock(&dslock); + ret = nil; + for(i=0; i= dshiwat) + dshiwat++; + memset(s, 0, sizeof(*s)); + s->state = Sincomplete; + s->ref = 1; + kstrdup(&s->user, up->user); + s->perm = 0660; + t = TYPE(ch->qid); + if(t == Qclonus) + t = Qctl; + ch->qid.path = QID(pp - dstate, t); + ch->qid.vers = 0; +} diff -Nru /sys/src/9k/port/devtab.c /sys/src/9k/port/devtab.c --- /sys/src/9k/port/devtab.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devtab.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,107 @@ +/* + * Stub. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +extern Dev* devtab[]; + +void +devtabreset(void) +{ + int i; + Dev *d; + + for(i = 0; devtab[i] != nil; i++){ + d = devtab[i]; + if(d->readv == nil) + d->readv = devreadv; + if(d->writev == nil) + d->writev = devwritev; + d->reset(); + } +} + +void +devtabinit(void) +{ + int i; + + for(i = 0; devtab[i] != nil; i++) + devtab[i]->init(); +} + +void +devtabshutdown(void) +{ + int i; + + /* + * Shutdown in reverse order. + */ + for(i = 0; devtab[i] != nil; i++) + ; + for(i--; i >= 0; i--) + devtab[i]->shutdown(); +} + + +Dev* +devtabget(int dc, int user) +{ + int i; + + for(i = 0; devtab[i] != nil; i++){ + if(devtab[i]->dc == dc) + return devtab[i]; + } + + if(user == 0) + panic("devtabget %C\n", dc); + + return nil; +} + +Dev* +devbyname(char *name) +{ + int i; + + for(i = 0; devtab[i] != nil; i++) + if(strcmp(devtab[i]->name, name) == 0) + return devtab[i]; + return nil; +} + +long +devtabread(Chan*, void* buf, long n, vlong off) +{ + int i; + Dev *dev; + char *alloc, *e, *p; + + alloc = malloc(READSTR); + if(alloc == nil) + error(Enomem); + + p = alloc; + e = p + READSTR; + for(i = 0; devtab[i] != nil; i++){ + dev = devtab[i]; + p = seprint(p, e, "#%C %s\n", dev->dc, dev->name); + } + + if(waserror()){ + free(alloc); + nexterror(); + } + n = readstr(off, buf, n, alloc); + free(alloc); + poperror(); + + return n; +} diff -Nru /sys/src/9k/port/devtls.c /sys/src/9k/port/devtls.c --- /sys/src/9k/port/devtls.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devtls.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,2201 @@ +/* + * devtls - record layer for transport layer security 1.0 and secure sockets layer 3.0 + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include + +typedef struct OneWay OneWay; +typedef struct Secret Secret; +typedef struct TlsRec TlsRec; +typedef struct TlsErrs TlsErrs; + +enum { + Statlen= 1024, /* max. length of status or stats message */ + /* buffer limits */ + MaxRecLen = 1<<14, /* max payload length of a record layer message */ + MaxCipherRecLen = MaxRecLen + 2048, + RecHdrLen = 5, + MaxMacLen = SHA1dlen, + + /* protocol versions we can accept */ + TLSVersion = 0x0301, + SSL3Version = 0x0300, + ProtocolVersion = 0x0301, /* maximum version we speak */ + MinProtoVersion = 0x0300, /* limits on version we accept */ + MaxProtoVersion = 0x03ff, + + /* connection states */ + SHandshake = 1 << 0, /* doing handshake */ + SOpen = 1 << 1, /* application data can be sent */ + SRClose = 1 << 2, /* remote side has closed down */ + SLClose = 1 << 3, /* sent a close notify alert */ + SAlert = 1 << 5, /* sending or sent a fatal alert */ + SError = 1 << 6, /* some sort of error has occured */ + SClosed = 1 << 7, /* it is all over */ + + /* record types */ + RChangeCipherSpec = 20, + RAlert, + RHandshake, + RApplication, + + SSL2ClientHello = 1, + HSSL2ClientHello = 9, /* local convention; see tlshand.c */ + + /* alerts */ + ECloseNotify = 0, + EUnexpectedMessage = 10, + EBadRecordMac = 20, + EDecryptionFailed = 21, + ERecordOverflow = 22, + EDecompressionFailure = 30, + EHandshakeFailure = 40, + ENoCertificate = 41, + EBadCertificate = 42, + EUnsupportedCertificate = 43, + ECertificateRevoked = 44, + ECertificateExpired = 45, + ECertificateUnknown = 46, + EIllegalParameter = 47, + EUnknownCa = 48, + EAccessDenied = 49, + EDecodeError = 50, + EDecryptError = 51, + EExportRestriction = 60, + EProtocolVersion = 70, + EInsufficientSecurity = 71, + EInternalError = 80, + EUserCanceled = 90, + ENoRenegotiation = 100, + + EMAX = 256 +}; + +struct Secret +{ + char *encalg; /* name of encryption alg */ + char *hashalg; /* name of hash alg */ + int (*enc)(Secret*, uchar*, int); + int (*dec)(Secret*, uchar*, int); + int (*unpad)(uchar*, int, int); + DigestState *(*mac)(uchar*, ulong, uchar*, ulong, uchar*, DigestState*); + int block; /* encryption block len, 0 if none */ + int maclen; + void *enckey; + uchar mackey[MaxMacLen]; +}; + +struct OneWay +{ + QLock io; /* locks io access */ + QLock seclock; /* locks secret paramaters */ + ulong seq; + Secret *sec; /* cipher in use */ + Secret *new; /* cipher waiting for enable */ +}; + +struct TlsRec +{ + Chan *c; /* io channel */ + int ref; /* serialized by tdlock for atomic destroy */ + int version; /* version of the protocol we are speaking */ + char verset; /* version has been set */ + char opened; /* opened command every issued? */ + char err[ERRMAX]; /* error message to return to handshake requests */ + vlong handin; /* bytes communicated by the record layer */ + vlong handout; + vlong datain; + vlong dataout; + + Lock statelk; + int state; + int debug; + + /* record layer mac functions for different protocol versions */ + void (*packMac)(Secret*, uchar*, uchar*, uchar*, uchar*, int, uchar*); + + /* input side -- protected by in.io */ + OneWay in; + Block *processed; /* next bunch of application data */ + Block *unprocessed; /* data read from c but not parsed into records */ + + /* handshake queue */ + Lock hqlock; /* protects hqref, alloc & free of handq, hprocessed */ + int hqref; + Queue *handq; /* queue of handshake messages */ + Block *hprocessed; /* remainder of last block read from handq */ + QLock hqread; /* protects reads for hprocessed, handq */ + + /* output side */ + OneWay out; + + /* protections */ + char *user; + int perm; +}; + +struct TlsErrs{ + int err; + int sslerr; + int tlserr; + int fatal; + char *msg; +}; + +static TlsErrs tlserrs[] = { + {ECloseNotify, ECloseNotify, ECloseNotify, 0, "close notify"}, + {EUnexpectedMessage, EUnexpectedMessage, EUnexpectedMessage, 1, "unexpected message"}, + {EBadRecordMac, EBadRecordMac, EBadRecordMac, 1, "bad record mac"}, + {EDecryptionFailed, EIllegalParameter, EDecryptionFailed, 1, "decryption failed"}, + {ERecordOverflow, EIllegalParameter, ERecordOverflow, 1, "record too long"}, + {EDecompressionFailure, EDecompressionFailure, EDecompressionFailure, 1, "decompression failed"}, + {EHandshakeFailure, EHandshakeFailure, EHandshakeFailure, 1, "could not negotiate acceptable security parameters"}, + {ENoCertificate, ENoCertificate, ECertificateUnknown, 1, "no appropriate certificate available"}, + {EBadCertificate, EBadCertificate, EBadCertificate, 1, "corrupted or invalid certificate"}, + {EUnsupportedCertificate, EUnsupportedCertificate, EUnsupportedCertificate, 1, "unsupported certificate type"}, + {ECertificateRevoked, ECertificateRevoked, ECertificateRevoked, 1, "revoked certificate"}, + {ECertificateExpired, ECertificateExpired, ECertificateExpired, 1, "expired certificate"}, + {ECertificateUnknown, ECertificateUnknown, ECertificateUnknown, 1, "unacceptable certificate"}, + {EIllegalParameter, EIllegalParameter, EIllegalParameter, 1, "illegal parameter"}, + {EUnknownCa, EHandshakeFailure, EUnknownCa, 1, "unknown certificate authority"}, + {EAccessDenied, EHandshakeFailure, EAccessDenied, 1, "access denied"}, + {EDecodeError, EIllegalParameter, EDecodeError, 1, "error decoding message"}, + {EDecryptError, EIllegalParameter, EDecryptError, 1, "error decrypting message"}, + {EExportRestriction, EHandshakeFailure, EExportRestriction, 1, "export restriction violated"}, + {EProtocolVersion, EIllegalParameter, EProtocolVersion, 1, "protocol version not supported"}, + {EInsufficientSecurity, EHandshakeFailure, EInsufficientSecurity, 1, "stronger security routines required"}, + {EInternalError, EHandshakeFailure, EInternalError, 1, "internal error"}, + {EUserCanceled, ECloseNotify, EUserCanceled, 0, "handshake canceled by user"}, + {ENoRenegotiation, EUnexpectedMessage, ENoRenegotiation, 0, "no renegotiation"}, +}; + +enum +{ + /* max. open tls connections */ + MaxTlsDevs = 1024 +}; + +static Lock tdlock; +static int tdhiwat; +static int maxtlsdevs = 128; +static TlsRec **tlsdevs; +static char *encalgs; +static char *hashalgs; + +enum{ + Qtopdir = 1, /* top level directory */ + Qprotodir, + Qclonus, + Qencalgs, + Qhashalgs, + Qconvdir, /* directory for a conversation */ + Qdata, + Qctl, + Qhand, + Qstatus, + Qstats, +}; + +#define TYPE(x) ((x).path & 0xf) +#define CONV(x) (((x).path >> 5)&(MaxTlsDevs-1)) +#define QID(c, y) (((c)<<5) | (y)) + +static void checkstate(TlsRec *, int, int); +static void ensure(TlsRec*, Block**, int); +static void consume(Block**, uchar*, int); +static Chan* buftochan(char*); +static void tlshangup(TlsRec*); +static void tlsError(TlsRec*, char *); +static void alertHand(TlsRec*, char *); +static TlsRec *newtls(Chan *c); +static TlsRec *mktlsrec(void); +static DigestState*sslmac_md5(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest, DigestState *s); +static DigestState*sslmac_sha1(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest, DigestState *s); +static DigestState*nomac(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest, DigestState *s); +static void sslPackMac(Secret *sec, uchar *mackey, uchar *seq, uchar *header, uchar *body, int len, uchar *mac); +static void tlsPackMac(Secret *sec, uchar *mackey, uchar *seq, uchar *header, uchar *body, int len, uchar *mac); +static void put64(uchar *p, vlong x); +static void put32(uchar *p, u32int); +static void put24(uchar *p, int); +static void put16(uchar *p, int); +static u32int get32(uchar *p); +static int get16(uchar *p); +static void tlsSetState(TlsRec *tr, int new, int old); +static void rcvAlert(TlsRec *tr, int err); +static void sendAlert(TlsRec *tr, int err); +static void rcvError(TlsRec *tr, int err, char *msg, ...); +static int rc4enc(Secret *sec, uchar *buf, int n); +static int des3enc(Secret *sec, uchar *buf, int n); +static int des3dec(Secret *sec, uchar *buf, int n); +static int aesenc(Secret *sec, uchar *buf, int n); +static int aesdec(Secret *sec, uchar *buf, int n); +static int noenc(Secret *sec, uchar *buf, int n); +static int sslunpad(uchar *buf, int n, int block); +static int tlsunpad(uchar *buf, int n, int block); +static void freeSec(Secret *sec); +static char *tlsstate(int s); +static void pdump(int, void*, char*); + +#pragma varargck argpos rcvError 3 + +static char *tlsnames[] = { +[Qclonus] "clone", +[Qencalgs] "encalgs", +[Qhashalgs] "hashalgs", +[Qdata] "data", +[Qctl] "ctl", +[Qhand] "hand", +[Qstatus] "status", +[Qstats] "stats", +}; + +static int convdir[] = { Qctl, Qdata, Qhand, Qstatus, Qstats }; + +static int +tlsgen(Chan *c, char*, Dirtab *, int, int s, Dir *dp) +{ + Qid q; + TlsRec *tr; + char *nm; + int perm, t; + + q.vers = 0; + q.type = QTFILE; + + t = TYPE(c->qid); + switch(t) { + case Qtopdir: + if(s == DEVDOTDOT){ + q.path = QID(0, Qtopdir); + q.type = QTDIR; + devdir(c, q, "#a", 0, eve, 0555, dp); + return 1; + } + if(s > 0) + return -1; + q.path = QID(0, Qprotodir); + q.type = QTDIR; + devdir(c, q, "tls", 0, eve, 0555, dp); + return 1; + case Qprotodir: + if(s == DEVDOTDOT){ + q.path = QID(0, Qtopdir); + q.type = QTDIR; + devdir(c, q, ".", 0, eve, 0555, dp); + return 1; + } + if(s < 3){ + switch(s) { + default: + return -1; + case 0: + q.path = QID(0, Qclonus); + break; + case 1: + q.path = QID(0, Qencalgs); + break; + case 2: + q.path = QID(0, Qhashalgs); + break; + } + perm = 0444; + if(TYPE(q) == Qclonus) + perm = 0555; + devdir(c, q, tlsnames[TYPE(q)], 0, eve, perm, dp); + return 1; + } + s -= 3; + if(s >= tdhiwat) + return -1; + q.path = QID(s, Qconvdir); + q.type = QTDIR; + lock(&tdlock); + tr = tlsdevs[s]; + if(tr != nil) + nm = tr->user; + else + nm = eve; + snprint(up->genbuf, sizeof(up->genbuf), "%d", s); + devdir(c, q, up->genbuf, 0, nm, 0555, dp); + unlock(&tdlock); + return 1; + case Qconvdir: + if(s == DEVDOTDOT){ + q.path = QID(0, Qprotodir); + q.type = QTDIR; + devdir(c, q, "tls", 0, eve, 0555, dp); + return 1; + } + if(s < 0 || s >= nelem(convdir)) + return -1; + lock(&tdlock); + tr = tlsdevs[CONV(c->qid)]; + if(tr != nil){ + nm = tr->user; + perm = tr->perm; + }else{ + perm = 0; + nm = eve; + } + t = convdir[s]; + if(t == Qstatus || t == Qstats) + perm &= 0444; + q.path = QID(CONV(c->qid), t); + devdir(c, q, tlsnames[t], 0, nm, perm, dp); + unlock(&tdlock); + return 1; + case Qclonus: + case Qencalgs: + case Qhashalgs: + perm = 0444; + if(t == Qclonus) + perm = 0555; + devdir(c, c->qid, tlsnames[t], 0, eve, perm, dp); + return 1; + default: + lock(&tdlock); + tr = tlsdevs[CONV(c->qid)]; + if(tr != nil){ + nm = tr->user; + perm = tr->perm; + }else{ + perm = 0; + nm = eve; + } + if(t == Qstatus || t == Qstats) + perm &= 0444; + devdir(c, c->qid, tlsnames[t], 0, nm, perm, dp); + unlock(&tdlock); + return 1; + } +} + +static Chan* +tlsattach(char *spec) +{ + Chan *c; + + c = devattach('a', spec); + c->qid.path = QID(0, Qtopdir); + c->qid.type = QTDIR; + c->qid.vers = 0; + return c; +} + +static Walkqid* +tlswalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, nil, 0, tlsgen); +} + +static long +tlsstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, nil, 0, tlsgen); +} + +static Chan* +tlsopen(Chan *c, int omode) +{ + TlsRec *tr, **pp; + int t, perm; + + perm = 0; + omode &= 3; + switch(omode) { + case OREAD: + perm = 4; + break; + case OWRITE: + perm = 2; + break; + case ORDWR: + perm = 6; + break; + } + + t = TYPE(c->qid); + switch(t) { + default: + panic("tlsopen"); + case Qtopdir: + case Qprotodir: + case Qconvdir: + if(omode != OREAD) + error(Eperm); + break; + case Qclonus: + tr = newtls(c); + if(tr == nil) + error(Enodev); + break; + case Qctl: + case Qdata: + case Qhand: + case Qstatus: + case Qstats: + if((t == Qstatus || t == Qstats) && omode != OREAD) + error(Eperm); + if(waserror()) { + unlock(&tdlock); + nexterror(); + } + lock(&tdlock); + pp = &tlsdevs[CONV(c->qid)]; + tr = *pp; + if(tr == nil) + error("must open connection using clone"); + if((perm & (tr->perm>>6)) != perm + && (strcmp(up->user, tr->user) != 0 + || (perm & tr->perm) != perm)) + error(Eperm); + if(t == Qhand){ + if(waserror()){ + unlock(&tr->hqlock); + nexterror(); + } + lock(&tr->hqlock); + if(tr->handq != nil) + error(Einuse); + tr->handq = qopen(2 * MaxCipherRecLen, 0, nil, nil); + if(tr->handq == nil) + error("cannot allocate handshake queue"); + tr->hqref = 1; + unlock(&tr->hqlock); + poperror(); + } + tr->ref++; + unlock(&tdlock); + poperror(); + break; + case Qencalgs: + case Qhashalgs: + if(omode != OREAD) + error(Eperm); + break; + } + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + c->iounit = qiomaxatomic; + return c; +} + +static long +tlswstat(Chan *c, uchar *dp, long n) +{ + Dir *d; + TlsRec *tr; + int rv; + + d = nil; + if(waserror()){ + free(d); + unlock(&tdlock); + nexterror(); + } + + lock(&tdlock); + tr = tlsdevs[CONV(c->qid)]; + if(tr == nil) + error(Ebadusefd); + if(strcmp(tr->user, up->user) != 0) + error(Eperm); + + d = smalloc(n + sizeof *d); + rv = convM2D(dp, n, &d[0], (char*) &d[1]); + if(rv == 0) + error(Eshortstat); + if(!emptystr(d->uid)) + kstrdup(&tr->user, d->uid); + if(d->mode != ~0UL) + tr->perm = d->mode; + + free(d); + poperror(); + unlock(&tdlock); + + return rv; +} + +static void +dechandq(TlsRec *tr) +{ + lock(&tr->hqlock); + if(--tr->hqref == 0){ + if(tr->handq != nil){ + qfree(tr->handq); + tr->handq = nil; + } + if(tr->hprocessed != nil){ + freeb(tr->hprocessed); + tr->hprocessed = nil; + } + } + unlock(&tr->hqlock); +} + +static void +tlsclose(Chan *c) +{ + TlsRec *tr; + int t; + + t = TYPE(c->qid); + switch(t) { + case Qctl: + case Qdata: + case Qhand: + case Qstatus: + case Qstats: + if((c->flag & COPEN) == 0) + break; + + tr = tlsdevs[CONV(c->qid)]; + if(tr == nil) + break; + + if(t == Qhand) + dechandq(tr); + + lock(&tdlock); + if(--tr->ref > 0) { + unlock(&tdlock); + return; + } + tlsdevs[CONV(c->qid)] = nil; + unlock(&tdlock); + + if(tr->c != nil && !waserror()){ + checkstate(tr, 0, SOpen|SHandshake|SRClose); + sendAlert(tr, ECloseNotify); + poperror(); + } + tlshangup(tr); + if(tr->c != nil) + cclose(tr->c); + freeSec(tr->in.sec); + freeSec(tr->in.new); + freeSec(tr->out.sec); + freeSec(tr->out.new); + free(tr->user); + free(tr); + break; + } +} + +/* + * make sure we have at least 'n' bytes in list 'l' + */ +static void +ensure(TlsRec *s, Block **l, int n) +{ + int sofar, i; + Block *b, *bl; + + sofar = 0; + for(b = *l; b; b = b->next){ + sofar += BLEN(b); + if(sofar >= n) + return; + l = &b->next; + } + + while(sofar < n){ + bl = s->c->dev->bread(s->c, MaxCipherRecLen + RecHdrLen, 0); + if(bl == 0) + error(Ehungup); + *l = bl; + i = 0; + for(b = bl; b; b = b->next){ + i += BLEN(b); + l = &b->next; + } + if(i == 0) + error(Ehungup); + sofar += i; + } +if(s->debug) pprint("ensure read %d\n", sofar); +} + +/* + * copy 'n' bytes from 'l' into 'p' and free + * the bytes in 'l' + */ +static void +consume(Block **l, uchar *p, int n) +{ + Block *b; + int i; + + for(; *l && n > 0; n -= i){ + b = *l; + i = BLEN(b); + if(i > n) + i = n; + memmove(p, b->rp, i); + b->rp += i; + p += i; + if(BLEN(b) < 0) + panic("consume"); + if(BLEN(b)) + break; + *l = b->next; + freeb(b); + } +} + +/* + * give back n bytes + */ +static void +regurgitate(TlsRec *s, uchar *p, int n) +{ + Block *b; + + if(n <= 0) + return; + b = s->unprocessed; + if(s->unprocessed == nil || b->rp - b->base < n) { + b = allocb(n); + memmove(b->wp, p, n); + b->wp += n; + b->next = s->unprocessed; + s->unprocessed = b; + } else { + b->rp -= n; + memmove(b->rp, p, n); + } +} + +/* + * remove at most n bytes from the queue + */ +static Block* +qgrab(Block **l, int n) +{ + Block *bb, *b; + int i; + + b = *l; + if(BLEN(b) == n){ + *l = b->next; + b->next = nil; + return b; + } + + i = 0; + for(bb = b; bb != nil && i < n; bb = bb->next) + i += BLEN(bb); + if(i > n) + i = n; + + bb = allocb(i); + consume(l, bb->wp, i); + bb->wp += i; + return bb; +} + +static void +tlsclosed(TlsRec *tr, int new) +{ + lock(&tr->statelk); + if(tr->state == SOpen || tr->state == SHandshake) + tr->state = new; + else if((new | tr->state) == (SRClose|SLClose)) + tr->state = SClosed; + unlock(&tr->statelk); + alertHand(tr, "close notify"); +} + +/* + * read and process one tls record layer message + * must be called with tr->in.io held + * We can't let Eintrs lose data, since doing so will get + * us out of sync with the sender and break the reliablity + * of the channel. Eintr only happens during the reads in + * consume. Therefore we put back any bytes consumed before + * the last call to ensure. + */ +static void +tlsrecread(TlsRec *tr) +{ + OneWay *volatile in; + Block *volatile b; + uchar *p, seq[8], header[RecHdrLen], hmac[MaxMacLen]; + int volatile nconsumed; + int len, type, ver, unpad_len; + + nconsumed = 0; + if(waserror()){ + if(strcmp(up->errstr, Eintr) == 0 && !waserror()){ + regurgitate(tr, header, nconsumed); + poperror(); + }else + tlsError(tr, "channel error"); + nexterror(); + } + ensure(tr, &tr->unprocessed, RecHdrLen); + consume(&tr->unprocessed, header, RecHdrLen); +if(tr->debug)pprint("consumed %d header\n", RecHdrLen); + nconsumed = RecHdrLen; + + if((tr->handin == 0) && (header[0] & 0x80)){ + /* Cope with an SSL3 ClientHello expressed in SSL2 record format. + This is sent by some clients that we must interoperate + with, such as Java's JSSE and Microsoft's Internet Explorer. */ + len = (get16(header) & ~0x8000) - 3; + type = header[2]; + ver = get16(header + 3); + if(type != SSL2ClientHello || len < 22) + rcvError(tr, EProtocolVersion, "invalid initial SSL2-like message"); + }else{ /* normal SSL3 record format */ + type = header[0]; + ver = get16(header+1); + len = get16(header+3); + } + if(ver != tr->version && (tr->verset || ver < MinProtoVersion || ver > MaxProtoVersion)) + rcvError(tr, EProtocolVersion, "devtls expected ver=%x%s, saw (len=%d) type=%x ver=%x '%.12s'", + tr->version, tr->verset?"/set":"", len, type, ver, (char*)header); + if(len > MaxCipherRecLen || len < 0) + rcvError(tr, ERecordOverflow, "record message too long %d", len); + ensure(tr, &tr->unprocessed, len); + nconsumed = 0; + poperror(); + + /* + * If an Eintr happens after this, we'll get out of sync. + * Make sure nothing we call can sleep. + * Errors are ok, as they kill the connection. + * Luckily, allocb won't sleep, it'll just error out. + */ + b = nil; + if(waserror()){ + if(b != nil) + freeb(b); + tlsError(tr, "channel error"); + nexterror(); + } + b = qgrab(&tr->unprocessed, len); +if(tr->debug) pprint("consumed unprocessed %d\n", len); + + in = &tr->in; + if(waserror()){ + qunlock(&in->seclock); + nexterror(); + } + qlock(&in->seclock); + p = b->rp; + if(in->sec != nil) { + /* to avoid Canvel-Hiltgen-Vaudenay-Vuagnoux attack, all errors here + should look alike, including timing of the response. */ + unpad_len = (*in->sec->dec)(in->sec, p, len); + if(unpad_len >= in->sec->maclen) + len = unpad_len - in->sec->maclen; +if(tr->debug) pprint("decrypted %d\n", unpad_len); +if(tr->debug) pdump(unpad_len, p, "decrypted:"); + + /* update length */ + put16(header+3, len); + put64(seq, in->seq); + in->seq++; + (*tr->packMac)(in->sec, in->sec->mackey, seq, header, p, len, hmac); + if(unpad_len < in->sec->maclen) + rcvError(tr, EBadRecordMac, "short record mac"); + if(memcmp(hmac, p+len, in->sec->maclen) != 0) + rcvError(tr, EBadRecordMac, "record mac mismatch"); + b->wp = b->rp + len; + } + qunlock(&in->seclock); + poperror(); + if(len < 0) + rcvError(tr, EDecodeError, "runt record message"); + + switch(type) { + default: + rcvError(tr, EIllegalParameter, "invalid record message %#x", type); + break; + case RChangeCipherSpec: + if(len != 1 || p[0] != 1) + rcvError(tr, EDecodeError, "invalid change cipher spec"); + qlock(&in->seclock); + if(in->new == nil){ + qunlock(&in->seclock); + rcvError(tr, EUnexpectedMessage, "unexpected change cipher spec"); + } + freeSec(in->sec); + in->sec = in->new; + in->new = nil; + in->seq = 0; + qunlock(&in->seclock); + break; + case RAlert: + if(len != 2) + rcvError(tr, EDecodeError, "invalid alert"); + if(p[0] == 2) + rcvAlert(tr, p[1]); + if(p[0] != 1) + rcvError(tr, EIllegalParameter, "invalid alert fatal code"); + + /* + * propate non-fatal alerts to handshaker + */ + if(p[1] == ECloseNotify) { + tlsclosed(tr, SRClose); + if(tr->opened) + error("tls hungup"); + error("close notify"); + } + if(p[1] == ENoRenegotiation) + alertHand(tr, "no renegotiation"); + else if(p[1] == EUserCanceled) + alertHand(tr, "handshake canceled by user"); + else + rcvError(tr, EIllegalParameter, "invalid alert code"); + break; + case RHandshake: + /* + * don't worry about dropping the block + * qbwrite always queues even if flow controlled and interrupted. + * + * if there isn't any handshaker, ignore the request, + * but notify the other side we are doing so. + */ + lock(&tr->hqlock); + if(tr->handq != nil){ + tr->hqref++; + unlock(&tr->hqlock); + if(waserror()){ + dechandq(tr); + nexterror(); + } + b = padblock(b, 1); + *b->rp = RHandshake; + qbwrite(tr->handq, b); + b = nil; + poperror(); + dechandq(tr); + }else{ + unlock(&tr->hqlock); + if(tr->verset && tr->version != SSL3Version && !waserror()){ + sendAlert(tr, ENoRenegotiation); + poperror(); + } + } + break; + case SSL2ClientHello: + lock(&tr->hqlock); + if(tr->handq != nil){ + tr->hqref++; + unlock(&tr->hqlock); + if(waserror()){ + dechandq(tr); + nexterror(); + } + /* Pass the SSL2 format data, so that the handshake code can compute + the correct checksums. HSSL2ClientHello = HandshakeType 9 is + unused in RFC2246. */ + b = padblock(b, 8); + b->rp[0] = RHandshake; + b->rp[1] = HSSL2ClientHello; + put24(&b->rp[2], len+3); + b->rp[5] = SSL2ClientHello; + put16(&b->rp[6], ver); + qbwrite(tr->handq, b); + b = nil; + poperror(); + dechandq(tr); + }else{ + unlock(&tr->hqlock); + if(tr->verset && tr->version != SSL3Version && !waserror()){ + sendAlert(tr, ENoRenegotiation); + poperror(); + } + } + break; + case RApplication: + if(!tr->opened) + rcvError(tr, EUnexpectedMessage, "application message received before handshake completed"); + if(BLEN(b) > 0){ + tr->processed = b; + b = nil; + } + break; + } + if(b != nil) + freeb(b); + poperror(); +} + +/* + * got a fatal alert message + */ +static void +rcvAlert(TlsRec *tr, int err) +{ + char *s; + int i; + + s = "unknown error"; + for(i=0; i < nelem(tlserrs); i++){ + if(tlserrs[i].err == err){ + s = tlserrs[i].msg; + break; + } + } +if(tr->debug) pprint("rcvAlert: %s\n", s); + + tlsError(tr, s); + if(!tr->opened) + error(s); + error("tls error"); +} + +/* + * found an error while decoding the input stream + */ +static void +rcvError(TlsRec *tr, int err, char *fmt, ...) +{ + char msg[ERRMAX]; + va_list arg; + + va_start(arg, fmt); + vseprint(msg, msg+sizeof(msg), fmt, arg); + va_end(arg); +if(tr->debug) pprint("rcvError: %s\n", msg); + + sendAlert(tr, err); + + if(!tr->opened) + error(msg); + error("tls error"); +} + +/* + * make sure the next hand operation returns with a 'msg' error + */ +static void +alertHand(TlsRec *tr, char *msg) +{ + Block *b; + int n; + + lock(&tr->hqlock); + if(tr->handq == nil){ + unlock(&tr->hqlock); + return; + } + tr->hqref++; + unlock(&tr->hqlock); + + n = strlen(msg); + if(waserror()){ + dechandq(tr); + nexterror(); + } + b = allocb(n + 2); + *b->wp++ = RAlert; + memmove(b->wp, msg, n + 1); + b->wp += n + 1; + + qbwrite(tr->handq, b); + + poperror(); + dechandq(tr); +} + +static void +checkstate(TlsRec *tr, int ishand, int ok) +{ + int state; + + lock(&tr->statelk); + state = tr->state; + unlock(&tr->statelk); + if(state & ok) + return; + switch(state){ + case SHandshake: + case SOpen: + break; + case SError: + case SAlert: + if(ishand) + error(tr->err); + error("tls error"); + case SRClose: + case SLClose: + case SClosed: + error("tls hungup"); + } + error("tls improperly configured"); +} + +static Block* +tlsbread(Chan *c, long n, vlong offset) +{ + int ty; + Block *b; + TlsRec *volatile tr; + + ty = TYPE(c->qid); + switch(ty) { + default: + return devbread(c, n, offset); + case Qhand: + case Qdata: + break; + } + + tr = tlsdevs[CONV(c->qid)]; + if(tr == nil) + panic("tlsbread"); + + if(waserror()){ + qunlock(&tr->in.io); + nexterror(); + } + qlock(&tr->in.io); + if(ty == Qdata){ + checkstate(tr, 0, SOpen); + while(tr->processed == nil) + tlsrecread(tr); + + /* return at most what was asked for */ + b = qgrab(&tr->processed, n); +if(tr->debug) pprint("consumed processed %ld\n", BLEN(b)); +if(tr->debug) pdump(BLEN(b), b->rp, "consumed:"); + qunlock(&tr->in.io); + poperror(); + tr->datain += BLEN(b); + }else{ + checkstate(tr, 1, SOpen|SHandshake|SLClose); + + /* + * it's ok to look at state without the lock + * since it only protects reading records, + * and we have that tr->in.io held. + */ + while(!tr->opened && tr->hprocessed == nil && !qcanread(tr->handq)) + tlsrecread(tr); + + qunlock(&tr->in.io); + poperror(); + + if(waserror()){ + qunlock(&tr->hqread); + nexterror(); + } + qlock(&tr->hqread); + if(tr->hprocessed == nil){ + b = qbread(tr->handq, MaxRecLen + 1); + if(*b->rp++ == RAlert){ + kstrcpy(up->errstr, (char*)b->rp, ERRMAX); + freeb(b); + nexterror(); + } + tr->hprocessed = b; + } + b = qgrab(&tr->hprocessed, n); + poperror(); + qunlock(&tr->hqread); + tr->handin += BLEN(b); + } + + return b; +} + +static long +tlsread(Chan *c, void *a, long n, vlong off) +{ + Block *volatile b; + Block *nb; + uchar *va; + int i, ty; + char *buf, *s, *e; + long offset; + TlsRec * tr; + + if(c->qid.type & QTDIR) + return devdirread(c, a, n, 0, 0, tlsgen); + + offset = off; + tr = tlsdevs[CONV(c->qid)]; + ty = TYPE(c->qid); + switch(ty) { + default: + error(Ebadusefd); + case Qstatus: + buf = smalloc(Statlen); + qlock(&tr->in.seclock); + qlock(&tr->out.seclock); + s = buf; + e = buf + Statlen; + s = seprint(s, e, "State: %s\n", tlsstate(tr->state)); + s = seprint(s, e, "Version: %#x\n", tr->version); + if(tr->in.sec != nil) + s = seprint(s, e, "EncIn: %s\nHashIn: %s\n", tr->in.sec->encalg, tr->in.sec->hashalg); + if(tr->in.new != nil) + s = seprint(s, e, "NewEncIn: %s\nNewHashIn: %s\n", tr->in.new->encalg, tr->in.new->hashalg); + if(tr->out.sec != nil) + s = seprint(s, e, "EncOut: %s\nHashOut: %s\n", tr->out.sec->encalg, tr->out.sec->hashalg); + if(tr->out.new != nil) + seprint(s, e, "NewEncOut: %s\nNewHashOut: %s\n", tr->out.new->encalg, tr->out.new->hashalg); + qunlock(&tr->in.seclock); + qunlock(&tr->out.seclock); + n = readstr(offset, a, n, buf); + free(buf); + return n; + case Qstats: + buf = smalloc(Statlen); + s = buf; + e = buf + Statlen; + s = seprint(s, e, "DataIn: %lld\n", tr->datain); + s = seprint(s, e, "DataOut: %lld\n", tr->dataout); + s = seprint(s, e, "HandIn: %lld\n", tr->handin); + seprint(s, e, "HandOut: %lld\n", tr->handout); + n = readstr(offset, a, n, buf); + free(buf); + return n; + case Qctl: + buf = smalloc(Statlen); + snprint(buf, Statlen, "%llud", CONV(c->qid)); + n = readstr(offset, a, n, buf); + free(buf); + return n; + case Qdata: + case Qhand: + b = tlsbread(c, n, offset); + break; + case Qencalgs: + return readstr(offset, a, n, encalgs); + case Qhashalgs: + return readstr(offset, a, n, hashalgs); + } + + if(waserror()){ + freeblist(b); + nexterror(); + } + + n = 0; + va = a; + for(nb = b; nb; nb = nb->next){ + i = BLEN(nb); + memmove(va+n, nb->rp, i); + n += i; + } + + freeblist(b); + poperror(); + + return n; +} + +/* + * write a block in tls records + */ +static void +tlsrecwrite(TlsRec *tr, int type, Block *b) +{ + Block *volatile bb; + Block *nb; + uchar *p, seq[8]; + OneWay *volatile out; + int n, maclen, pad, ok; + + out = &tr->out; + bb = b; + if(waserror()){ + qunlock(&out->io); + if(bb != nil) + freeb(bb); + nexterror(); + } + qlock(&out->io); +if(tr->debug)pprint("send %ld\n", BLEN(b)); +if(tr->debug)pdump(BLEN(b), b->rp, "sent:"); + + + ok = SHandshake|SOpen|SRClose; + if(type == RAlert) + ok |= SAlert; + while(bb != nil){ + checkstate(tr, type != RApplication, ok); + + /* + * get at most one maximal record's input, + * with padding on the front for header and + * back for mac and maximal block padding. + */ + if(waserror()){ + qunlock(&out->seclock); + nexterror(); + } + qlock(&out->seclock); + maclen = 0; + pad = 0; + if(out->sec != nil){ + maclen = out->sec->maclen; + pad = maclen + out->sec->block; + } + n = BLEN(bb); + if(n > MaxRecLen){ + n = MaxRecLen; + nb = allocb(n + pad + RecHdrLen); + memmove(nb->wp + RecHdrLen, bb->rp, n); + bb->rp += n; + }else{ + /* + * carefully reuse bb so it will get freed if we're out of memory + */ + bb = padblock(bb, RecHdrLen); + if(pad) + nb = padblock(bb, -pad); + else + nb = bb; + bb = nil; + } + + p = nb->rp; + p[0] = type; + put16(p+1, tr->version); + put16(p+3, n); + + if(out->sec != nil){ + put64(seq, out->seq); + out->seq++; + (*tr->packMac)(out->sec, out->sec->mackey, seq, p, p + RecHdrLen, n, p + RecHdrLen + n); + n += maclen; + + /* encrypt */ + n = (*out->sec->enc)(out->sec, p + RecHdrLen, n); + nb->wp = p + RecHdrLen + n; + + /* update length */ + put16(p+3, n); + } + if(type == RChangeCipherSpec){ + if(out->new == nil) + error("change cipher without a new cipher"); + freeSec(out->sec); + out->sec = out->new; + out->new = nil; + out->seq = 0; + } + qunlock(&out->seclock); + poperror(); + + /* + * if bwrite error's, we assume the block is queued. + * if not, we're out of sync with the receiver and will not recover. + */ + if(waserror()){ + if(strcmp(up->errstr, "interrupted") != 0) + tlsError(tr, "channel error"); + nexterror(); + } + tr->c->dev->bwrite(tr->c, nb, 0); + poperror(); + } + qunlock(&out->io); + poperror(); +} + +static long +tlsbwrite(Chan *c, Block *b, vlong offset) +{ + int ty; + ulong n; + TlsRec *tr; + + n = BLEN(b); + + tr = tlsdevs[CONV(c->qid)]; + if(tr == nil) + panic("tlsbwrite"); + + ty = TYPE(c->qid); + switch(ty) { + default: + return devbwrite(c, b, offset); + case Qhand: + tlsrecwrite(tr, RHandshake, b); + tr->handout += n; + break; + case Qdata: + checkstate(tr, 0, SOpen); + tlsrecwrite(tr, RApplication, b); + tr->dataout += n; + break; + } + + return n; +} + +typedef struct Hashalg Hashalg; +struct Hashalg +{ + char *name; + int maclen; + void (*initkey)(Hashalg *, int, Secret *, uchar*); +}; + +static void +initmd5key(Hashalg *ha, int version, Secret *s, uchar *p) +{ + s->maclen = ha->maclen; + if(version == SSL3Version) + s->mac = sslmac_md5; + else + s->mac = hmac_md5; + memmove(s->mackey, p, ha->maclen); +} + +static void +initclearmac(Hashalg *, int, Secret *s, uchar *) +{ + s->maclen = 0; + s->mac = nomac; +} + +static void +initsha1key(Hashalg *ha, int version, Secret *s, uchar *p) +{ + s->maclen = ha->maclen; + if(version == SSL3Version) + s->mac = sslmac_sha1; + else + s->mac = hmac_sha1; + memmove(s->mackey, p, ha->maclen); +} + +static Hashalg hashtab[] = +{ + { "clear", 0, initclearmac, }, + { "md5", MD5dlen, initmd5key, }, + { "sha1", SHA1dlen, initsha1key, }, + { 0 } +}; + +static Hashalg* +parsehashalg(char *p) +{ + Hashalg *ha; + + for(ha = hashtab; ha->name; ha++) + if(strcmp(p, ha->name) == 0) + return ha; + error("unsupported hash algorithm"); + return nil; +} + +typedef struct Encalg Encalg; +struct Encalg +{ + char *name; + int keylen; + int ivlen; + void (*initkey)(Encalg *ea, Secret *, uchar*, uchar*); +}; + +static void +initRC4key(Encalg *ea, Secret *s, uchar *p, uchar *) +{ + s->enckey = smalloc(sizeof(RC4state)); + s->enc = rc4enc; + s->dec = rc4enc; + s->block = 0; + setupRC4state(s->enckey, p, ea->keylen); +} + +static void +initDES3key(Encalg *, Secret *s, uchar *p, uchar *iv) +{ + s->enckey = smalloc(sizeof(DES3state)); + s->enc = des3enc; + s->dec = des3dec; + s->block = 8; + setupDES3state(s->enckey, (uchar (*)[8])p, iv); +} + +static void +initAESkey(Encalg *ea, Secret *s, uchar *p, uchar *iv) +{ + s->enckey = smalloc(sizeof(AESstate)); + s->enc = aesenc; + s->dec = aesdec; + s->block = 16; + setupAESstate(s->enckey, p, ea->keylen, iv); +} + +static void +initclearenc(Encalg *, Secret *s, uchar *, uchar *) +{ + s->enc = noenc; + s->dec = noenc; + s->block = 0; +} + +static Encalg encrypttab[] = +{ + { "clear", 0, 0, initclearenc }, + { "rc4_128", 128/8, 0, initRC4key }, + { "3des_ede_cbc", 3 * 8, 8, initDES3key }, + { "aes_128_cbc", 128/8, 16, initAESkey }, + { "aes_256_cbc", 256/8, 16, initAESkey }, + { 0 } +}; + +static Encalg* +parseencalg(char *p) +{ + Encalg *ea; + + for(ea = encrypttab; ea->name; ea++) + if(strcmp(p, ea->name) == 0) + return ea; + error("unsupported encryption algorithm"); + return nil; +} + +static long +tlswrite(Chan *c, void *a, long n, vlong off) +{ + Encalg *ea; + Hashalg *ha; + TlsRec *volatile tr; + Secret *volatile tos, *volatile toc; + Block *volatile b; + Cmdbuf *volatile cb; + int i, ty; + char *p, *e; + uchar *volatile x; + ulong offset = off; + + tr = tlsdevs[CONV(c->qid)]; + if(tr == nil) + panic("tlswrite"); + + ty = TYPE(c->qid); + switch(ty){ + case Qdata: + case Qhand: + p = a; + e = p + n; + do{ + i = e - p; + if(i > MaxRecLen) + i = MaxRecLen; + + b = allocb(i); + if(waserror()){ + freeb(b); + nexterror(); + } + memmove(b->wp, p, i); + poperror(); + b->wp += i; + + tlsbwrite(c, b, offset); + + p += i; + }while(p < e); + return n; + case Qctl: + break; + default: + error(Ebadusefd); + return -1; + } + + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + if(cb->nf < 1) + error("short control request"); + + /* mutex with operations using what we're about to change */ + if(waserror()){ + qunlock(&tr->in.seclock); + qunlock(&tr->out.seclock); + nexterror(); + } + qlock(&tr->in.seclock); + qlock(&tr->out.seclock); + + if(strcmp(cb->f[0], "fd") == 0){ + if(cb->nf != 3) + error("usage: fd open-fd version"); + if(tr->c != nil) + error(Einuse); + i = strtol(cb->f[2], nil, 0); + if(i < MinProtoVersion || i > MaxProtoVersion) + error("unsupported version"); + tr->c = buftochan(cb->f[1]); + tr->version = i; + tlsSetState(tr, SHandshake, SClosed); + }else if(strcmp(cb->f[0], "version") == 0){ + if(cb->nf != 2) + error("usage: version vers"); + if(tr->c == nil) + error("must set fd before version"); + if(tr->verset) + error("version already set"); + i = strtol(cb->f[1], nil, 0); + if(i == SSL3Version) + tr->packMac = sslPackMac; + else if(i == TLSVersion) + tr->packMac = tlsPackMac; + else + error("unsupported version"); + tr->verset = 1; + tr->version = i; + }else if(strcmp(cb->f[0], "secret") == 0){ + if(cb->nf != 5) + error("usage: secret hashalg encalg isclient secretdata"); + if(tr->c == nil || !tr->verset) + error("must set fd and version before secrets"); + + if(tr->in.new != nil){ + freeSec(tr->in.new); + tr->in.new = nil; + } + if(tr->out.new != nil){ + freeSec(tr->out.new); + tr->out.new = nil; + } + + ha = parsehashalg(cb->f[1]); + ea = parseencalg(cb->f[2]); + + p = cb->f[4]; + i = (strlen(p)*3)/2; + x = smalloc(i); + tos = nil; + toc = nil; + if(waserror()){ + freeSec(tos); + freeSec(toc); + free(x); + nexterror(); + } + i = dec64(x, i, p, strlen(p)); + if(i < 2 * ha->maclen + 2 * ea->keylen + 2 * ea->ivlen) + error("not enough secret data provided"); + + tos = smalloc(sizeof(Secret)); + toc = smalloc(sizeof(Secret)); + if(!ha->initkey || !ea->initkey) + error("misimplemented secret algorithm"); + (*ha->initkey)(ha, tr->version, tos, &x[0]); + (*ha->initkey)(ha, tr->version, toc, &x[ha->maclen]); + (*ea->initkey)(ea, tos, &x[2 * ha->maclen], &x[2 * ha->maclen + 2 * ea->keylen]); + (*ea->initkey)(ea, toc, &x[2 * ha->maclen + ea->keylen], &x[2 * ha->maclen + 2 * ea->keylen + ea->ivlen]); + + if(!tos->mac || !tos->enc || !tos->dec + || !toc->mac || !toc->enc || !toc->dec) + error("missing algorithm implementations"); + if(strtol(cb->f[3], nil, 0) == 0){ + tr->in.new = tos; + tr->out.new = toc; + }else{ + tr->in.new = toc; + tr->out.new = tos; + } + if(tr->version == SSL3Version){ + toc->unpad = sslunpad; + tos->unpad = sslunpad; + }else{ + toc->unpad = tlsunpad; + tos->unpad = tlsunpad; + } + toc->encalg = ea->name; + toc->hashalg = ha->name; + tos->encalg = ea->name; + tos->hashalg = ha->name; + + free(x); + poperror(); + }else if(strcmp(cb->f[0], "changecipher") == 0){ + if(cb->nf != 1) + error("usage: changecipher"); + if(tr->out.new == nil) + error("cannot change cipher spec without setting secret"); + + qunlock(&tr->in.seclock); + qunlock(&tr->out.seclock); + poperror(); + free(cb); + poperror(); + + /* + * the real work is done as the message is written + * so the stream is encrypted in sync. + */ + b = allocb(1); + *b->wp++ = 1; + tlsrecwrite(tr, RChangeCipherSpec, b); + return n; + }else if(strcmp(cb->f[0], "opened") == 0){ + if(cb->nf != 1) + error("usage: opened"); + if(tr->in.sec == nil || tr->out.sec == nil) + error("cipher must be configured before enabling data messages"); + lock(&tr->statelk); + if(tr->state != SHandshake && tr->state != SOpen){ + unlock(&tr->statelk); + error("cannot enable data messages"); + } + tr->state = SOpen; + unlock(&tr->statelk); + tr->opened = 1; + }else if(strcmp(cb->f[0], "alert") == 0){ + if(cb->nf != 2) + error("usage: alert n"); + if(tr->c == nil) + error("must set fd before sending alerts"); + i = strtol(cb->f[1], nil, 0); + + qunlock(&tr->in.seclock); + qunlock(&tr->out.seclock); + poperror(); + free(cb); + poperror(); + + sendAlert(tr, i); + + if(m == ECloseNotify) + tlsclosed(tr, SLClose); + + return n; + } else if(strcmp(cb->f[0], "debug") == 0){ + if(cb->nf == 2){ + if(strcmp(cb->f[1], "on") == 0) + tr->debug = 1; + else + tr->debug = 0; + } else + tr->debug = 1; + } else + error(Ebadarg); + + qunlock(&tr->in.seclock); + qunlock(&tr->out.seclock); + poperror(); + free(cb); + poperror(); + + return n; +} + +static void +tlsinit(void) +{ + struct Encalg *e; + struct Hashalg *h; + int n; + char *cp; + static int already; + + if(!already){ +// fmtinstall('H', encodefmt); + already = 1; + } + + tlsdevs = smalloc(sizeof(TlsRec*) * maxtlsdevs); + + n = 1; + for(e = encrypttab; e->name != nil; e++) + n += strlen(e->name) + 1; + cp = encalgs = smalloc(n); + for(e = encrypttab;;){ + strcpy(cp, e->name); + cp += strlen(e->name); + e++; + if(e->name == nil) + break; + *cp++ = ' '; + } + *cp = 0; + + n = 1; + for(h = hashtab; h->name != nil; h++) + n += strlen(h->name) + 1; + cp = hashalgs = smalloc(n); + for(h = hashtab;;){ + strcpy(cp, h->name); + cp += strlen(h->name); + h++; + if(h->name == nil) + break; + *cp++ = ' '; + } + *cp = 0; +} + +Dev tlsdevtab = { + 'a', + "tls", + + devreset, + tlsinit, + devshutdown, + tlsattach, + tlswalk, + tlsstat, + tlsopen, + devcreate, + tlsclose, + tlsread, + tlsbread, + tlswrite, + tlsbwrite, + devremove, + tlswstat, +}; + +/* get channel associated with an fd */ +static Chan* +buftochan(char *p) +{ + Chan *c; + int fd; + + if(p == 0) + error(Ebadarg); + fd = strtoul(p, 0, 0); + if(fd < 0) + error(Ebadarg); + c = fdtochan(fd, -1, 0, 1); /* error check and inc ref */ + return c; +} + +static void +sendAlert(TlsRec *tr, int err) +{ + Block *b; + int i, fatal; + char *msg; + +if(tr->debug)pprint("sendAlert %d\n", err); + fatal = 1; + msg = "tls unknown alert"; + for(i=0; i < nelem(tlserrs); i++) { + if(tlserrs[i].err == err) { + msg = tlserrs[i].msg; + if(tr->version == SSL3Version) + err = tlserrs[i].sslerr; + else + err = tlserrs[i].tlserr; + fatal = tlserrs[i].fatal; + break; + } + } + + if(!waserror()){ + b = allocb(2); + *b->wp++ = fatal + 1; + *b->wp++ = err; + if(fatal) + tlsSetState(tr, SAlert, SOpen|SHandshake|SRClose); + tlsrecwrite(tr, RAlert, b); + poperror(); + } + if(fatal) + tlsError(tr, msg); +} + +static void +tlsError(TlsRec *tr, char *msg) +{ + int s; + +if(tr->debug)pprint("tleError %s\n", msg); + lock(&tr->statelk); + s = tr->state; + tr->state = SError; + if(s != SError){ + strncpy(tr->err, msg, ERRMAX - 1); + tr->err[ERRMAX - 1] = '\0'; + } + unlock(&tr->statelk); + if(s != SError) + alertHand(tr, msg); +} + +static void +tlsSetState(TlsRec *tr, int new, int old) +{ + lock(&tr->statelk); + if(tr->state & old) + tr->state = new; + unlock(&tr->statelk); +} + +/* hand up a digest connection */ +static void +tlshangup(TlsRec *tr) +{ + Block *b; + + qlock(&tr->in.io); + for(b = tr->processed; b; b = tr->processed){ + tr->processed = b->next; + freeb(b); + } + if(tr->unprocessed != nil){ + freeb(tr->unprocessed); + tr->unprocessed = nil; + } + qunlock(&tr->in.io); + + tlsSetState(tr, SClosed, ~0); +} + +static TlsRec* +newtls(Chan *ch) +{ + TlsRec **pp, **ep, **np; + int t, newmax; + + if(waserror()) { + unlock(&tdlock); + nexterror(); + } + lock(&tdlock); + ep = &tlsdevs[maxtlsdevs]; + for(pp = tlsdevs; pp < ep; pp++) + if(*pp == nil) + break; + if(pp >= ep) { + if(maxtlsdevs >= MaxTlsDevs) { + unlock(&tdlock); + poperror(); + return nil; + } + newmax = 2 * maxtlsdevs; + if(newmax > MaxTlsDevs) + newmax = MaxTlsDevs; + np = smalloc(sizeof(TlsRec*) * newmax); + memmove(np, tlsdevs, sizeof(TlsRec*) * maxtlsdevs); + tlsdevs = np; + pp = &tlsdevs[maxtlsdevs]; + memset(pp, 0, sizeof(TlsRec*)*(newmax - maxtlsdevs)); + + maxtlsdevs = newmax; + } + *pp = mktlsrec(); + if(pp - tlsdevs >= tdhiwat) + tdhiwat++; + t = TYPE(ch->qid); + if(t == Qclonus) + t = Qctl; + ch->qid.path = QID(pp - tlsdevs, t); + ch->qid.vers = 0; + unlock(&tdlock); + poperror(); + return *pp; +} + +static TlsRec * +mktlsrec(void) +{ + TlsRec *tr; + + tr = mallocz(sizeof(*tr), 1); + if(tr == nil) + error(Enomem); + tr->state = SClosed; + tr->ref = 1; + kstrdup(&tr->user, up->user); + tr->perm = 0660; + return tr; +} + +static char* +tlsstate(int s) +{ + switch(s){ + case SHandshake: + return "Handshaking"; + case SOpen: + return "Established"; + case SRClose: + return "RemoteClosed"; + case SLClose: + return "LocalClosed"; + case SAlert: + return "Alerting"; + case SError: + return "Errored"; + case SClosed: + return "Closed"; + } + return "Unknown"; +} + +static void +freeSec(Secret *s) +{ + if(s != nil){ + free(s->enckey); + free(s); + } +} + +static int +noenc(Secret *, uchar *, int n) +{ + return n; +} + +static int +rc4enc(Secret *sec, uchar *buf, int n) +{ + rc4(sec->enckey, buf, n); + return n; +} + +static int +tlsunpad(uchar *buf, int n, int block) +{ + int pad, nn; + + pad = buf[n - 1]; + nn = n - 1 - pad; + if(nn <= 0 || n % block) + return -1; + while(--n > nn) + if(pad != buf[n - 1]) + return -1; + return nn; +} + +static int +sslunpad(uchar *buf, int n, int block) +{ + int pad, nn; + + pad = buf[n - 1]; + nn = n - 1 - pad; + if(nn <= 0 || n % block) + return -1; + return nn; +} + +static int +blockpad(uchar *buf, int n, int block) +{ + int pad, nn; + + nn = n + block; + nn -= nn % block; + pad = nn - (n + 1); + while(n < nn) + buf[n++] = pad; + return nn; +} + +static int +des3enc(Secret *sec, uchar *buf, int n) +{ + n = blockpad(buf, n, 8); + des3CBCencrypt(buf, n, sec->enckey); + return n; +} + +static int +des3dec(Secret *sec, uchar *buf, int n) +{ + des3CBCdecrypt(buf, n, sec->enckey); + return (*sec->unpad)(buf, n, 8); +} + +static int +aesenc(Secret *sec, uchar *buf, int n) +{ + n = blockpad(buf, n, 16); + aesCBCencrypt(buf, n, sec->enckey); + return n; +} + +static int +aesdec(Secret *sec, uchar *buf, int n) +{ + aesCBCdecrypt(buf, n, sec->enckey); + return (*sec->unpad)(buf, n, 16); +} + +static DigestState* +nomac(uchar *, ulong, uchar *, ulong, uchar *, DigestState *) +{ + return nil; +} + +/* + * sslmac: mac calculations for ssl 3.0 only; tls 1.0 uses the standard hmac. + */ +static DigestState* +sslmac_x(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest, DigestState *s, + DigestState*(*x)(uchar*, ulong, uchar*, DigestState*), int xlen, int padlen) +{ + int i; + uchar pad[48], innerdigest[20]; + + if(xlen > sizeof(innerdigest) + || padlen > sizeof(pad)) + return nil; + + if(klen>64) + return nil; + + /* first time through */ + if(s == nil){ + for(i=0; imac)(buf, 11, mackey, sec->maclen, 0, 0); + (*sec->mac)(body, len, mackey, sec->maclen, mac, s); +} + +static void +tlsPackMac(Secret *sec, uchar *mackey, uchar *seq, uchar *header, uchar *body, int len, uchar *mac) +{ + DigestState *s; + uchar buf[13]; + + memmove(buf, seq, 8); + memmove(&buf[8], header, 5); + + s = (*sec->mac)(buf, 13, mackey, sec->maclen, 0, 0); + (*sec->mac)(body, len, mackey, sec->maclen, mac, s); +} + +static void +put32(uchar *p, u32int x) +{ + p[0] = x>>24; + p[1] = x>>16; + p[2] = x>>8; + p[3] = x; +} + +static void +put64(uchar *p, vlong x) +{ + put32(p, (u32int)(x >> 32)); + put32(p+4, (u32int)x); +} + +static void +put24(uchar *p, int x) +{ + p[0] = x>>16; + p[1] = x>>8; + p[2] = x; +} + +static void +put16(uchar *p, int x) +{ + p[0] = x>>8; + p[1] = x; +} + +static u32int +get32(uchar *p) +{ + return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3]; +} + +static int +get16(uchar *p) +{ + return (p[0]<<8)|p[1]; +} + +static char *charmap = "0123456789abcdef"; + +static void +pdump(int len, void *a, char *tag) +{ + uchar *p; + int i; + char buf[65+32]; + char *q; + + p = a; + strcpy(buf, tag); + while(len > 0){ + q = buf + strlen(tag); + for(i = 0; len > 0 && i < 32; i++){ + if(*p >= ' ' && *p < 0x7f){ + *q++ = ' '; + *q++ = *p; + } else { + *q++ = charmap[*p>>4]; + *q++ = charmap[*p & 0xf]; + } + len--; + p++; + } + *q = 0; + + if(len > 0) + pprint("%s...\n", buf); + else + pprint("%s\n", buf); + } +} diff -Nru /sys/src/9k/port/devtrace.c /sys/src/9k/port/devtrace.c --- /sys/src/9k/port/devtrace.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devtrace.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,894 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "netif.h" + +/* + * NB: To be used with 6l -e so tracein/out are called upon + * function entry and exit. + * There's no trace(3) man page, look at write source to see the + * commands. + */ + +#pragma profile 0 + +typedef struct Trace Trace; +/* This is a trace--a segment of memory to watch for entries and exits */ +struct Trace { + struct Trace *next; + void *func; + void *start; + void *end; + int enabled; + char name[16]; +}; + +enum { + Qdir, + Qctl, + Qdata, +}; + +enum { + TraceEntry = 1, + TraceExit, +}; + +/* fix me make this programmable */ +enum { + defaultlogsize = 8192, +}; + +/* This represents a trace "hit" or event */ +typedef struct Tracelog Tracelog; +struct Tracelog { + uvlong ticks; + int info; + uintptr pc; + /* these are different depending on type */ + uintptr dat[5]; + int machno; +}; + + +static Rendez tracesleep; +static QLock traceslock; +/* this will contain as many entries as there are valid pc values */ +static Trace **tracemap; +static Trace *traces; /* This stores all the traces */ +static Lock loglk; +static Tracelog *tracelog = nil; +int traceactive = 0; +/* trace indices. These are just unsigned longs. You mask them + * to get an index. This makes fifo empty/full etc. trivial. + */ +static uint pw = 0, pr = 0; +static int tracesactive = 0; +static int all = 0; +static int watching = 0; +static int slothits = 0; +static unsigned int traceinhits = 0; +static unsigned int newplfail = 0; +static unsigned long logsize = defaultlogsize, logmask = defaultlogsize - 1; + +static int printsize = 0; //The length of a line being printed + +/* These are for observing a single process */ +static int *pidwatch = nil; +static int numpids = 0; +static const PIDWATCHSIZE = 32; /* The number of PIDS that can be watched. Pretty arbitrary. */ + +int codesize = 0; + +static uvlong lastestamp; /* last entry timestamp */ +static uvlong lastxstamp; /* last exit timestamp */ + +/* Trace events can be either Entries or Exits */ +static char eventname[] = { + [TraceEntry] = 'E', + [TraceExit] = 'X', +}; + +static Dirtab tracedir[]={ + ".", {Qdir, 0, QTDIR}, 0, DMDIR|0555, + "tracectl", {Qctl}, 0, 0664, + "trace", {Qdata}, 0, 0440, +}; + +char hex[] = { + '0', + '1', + '2', + '3', + '4', + '5', + '6', + '7', + '8', + '9', + 'a', + 'b', + 'c', + 'd', + 'e', + 'f', +}; + +/* big-endian ... */ +void +hex8(u32int l, char *c) +{ + int i; + for(i = 2; i; i--){ + c[i-1] = hex[l&0xf]; + l >>= 4; + } +} + +void +hex16(u32int l, char *c) +{ + int i; + for(i = 4; i; i--){ + c[i-1] = hex[l&0xf]; + l >>= 4; + } +} + +void +hex32(u32int l, char *c) +{ + int i; + for(i = 8; i; i--){ + c[i-1] = hex[l&0xf]; + l >>= 4; + } +} + +void +hex64(u64int l, char *c) +{ + hex32(l>>32, c); + hex32(l, &c[8]); +} + +static int +lognonempty(void *) +{ + return pw - pr; +} + +static int +logfull(void) +{ + return (pw - pr) >= logsize; +} + +static u64int +idx(u64int f) +{ + return f & logmask; +} + +/* + * Check if the given trace overlaps any others + * Returns 1 if there is overlap, 0 if clear. + */ +int +overlapping(Trace *p) { + Trace *curr; + + curr = traces; + + if (!curr) + return 0; + + do { + if ((curr->start < p->start && p->start < curr->end) || + (curr->start < p->end && p->end < curr->end)) + return 1; + curr = curr->next; + } while (curr != nil); + + return 0; +} + +/* Make sure a PC is valid and traced; if so, return its Trace */ +/* if dopanic == 1, the kernel will panic on an invalid PC */ +struct Trace ** +traceslot(void *pc, int dopanic) +{ + int index; + struct Trace **p; + + if (pc > etext) { + if (dopanic) + panic("Bad PC %p", pc); + + print("Invalid PC %p\n", pc); + return nil; + } + index = (int)((uintptr)pc - KTZERO); + if (index > codesize){ + if (dopanic) { + panic("Bad PC %p", pc); + while(1); + } + print("Invalid PC %p\n", pc); + return nil; + } + p = &tracemap[index]; + if (tracemap[index]) + ainc(&slothits); + return p; +} + +/* Check if the given PC is traced and return a Trace if so */ +struct Trace * +traced(void *pc, int dopanic) +{ + struct Trace **p; + + p = traceslot(pc, dopanic); + + if (p == nil) + return nil; + + return *p; +} + +/* + * Return 1 if pid is being watched or no pids are being watched. + * Return 0 if pids are being watched and the argument is not + * among them. + */ +int +watchingpid(int pid) { + int i; + + if (pidwatch[0] == 0) + return 1; + + for (i = 0; i < numpids; i++) { + if (pidwatch[i] == pid) + return 1; + } + return 0; +} + +/* + * Remove a trace. + */ +void +removetrace(Trace *p) { + unsigned char *cp; + struct Trace *prev; + struct Trace *curr; + struct Trace **slot; + + slot = traceslot(p->start, 0); + for(cp = p->start; cp <= p->end; slot++, cp++) + *slot = nil; + + curr = traces; + + if (curr == p) { + if (curr->next) { + traces = curr->next; + } else { + traces = nil; //this seems to work fine + } + free(curr); + return; + } + + prev = curr; + curr = curr->next; + do { + if (curr == p) { + prev->next = curr->next; + return; + } + prev = curr; + curr = curr->next; + } while (curr != nil); + +} + +/* it is recommended that you call these with something sane. */ +/* these next two functions assume you locked tracelock */ + +/* Turn on a trace */ +void +traceon(struct Trace *p) +{ + unsigned char *cp; + struct Trace **slot; + slot = traceslot(p->start, 0); + for(cp = p->start; cp <= p->end; slot++, cp++) + *slot = p; + p->enabled = 1; + tracesactive++; +} + +/* Turn off a trace */ +void +traceoff(struct Trace *p) +{ + unsigned char *cp; + struct Trace **slot; + slot = traceslot(p->start, 0); + for(cp = p->start; cp <= p->end; slot++, cp++) + *slot = nil; + p->enabled = 0; + tracesactive--; +} + +/* Make a new tracelog (an event) */ +/* can return NULL, meaning, no record for you */ +static struct Tracelog * +newpl(void) +{ + uint index; + + index = ainc((int *)&pw); + + return &tracelog[idx(index)]; + +} + +/* Called every time a (traced) function starts */ +/* this is not really smp safe. FIX */ +void +tracein(void* pc, uintptr a1, uintptr a2, uintptr a3, uintptr a4) +{ + struct Tracelog *pl; + + /* if we are here, tracing is active. Turn it off. */ + traceactive = 0; + if (! traced(pc, 1)){ + traceactive = 1; + return; + } + + ainc((int *)&traceinhits); + /* Continue if we are watching this pid or we're not watching any */ + if (!all) + if (!up || !watchingpid(up->pid)){ + traceactive = 1; + return; + } + + pl = newpl(); + + if (! pl) { + ainc((int *)&newplfail); + traceactive = 1; + return; + } + + cycles(&pl->ticks); + + pl->pc = (uintptr)pc; + if (up) + pl->dat[0] = up->pid; + else + pl->dat[0] = (unsigned long)-1; + + pl->dat[1] = a1; + pl->dat[2] = a2; + pl->dat[3] = a3; + pl->dat[4] = a4; + + pl->info = TraceEntry; + pl->machno = m->machno; + traceactive = 1; +} + +/* Called every time a traced function exits */ +void +traceout(void* pc, uintptr retval) +{ + struct Tracelog *pl; + /* if we are here, tracing is active. Turn it off. */ + traceactive = 0; + if (! traced(pc, 1)){ + traceactive = 1; + return; + } + + if (!all) + if (!up || !watchingpid(up->pid)){ + traceactive = 1; + return; + } + + pl = newpl(); + if (! pl){ + traceactive = 1; + return; + } + + cycles(&pl->ticks); + + pl->pc = (uintptr)pc; + if (up) + pl->dat[0] = up->pid; + else + pl->dat[0] = (unsigned long)-1; + + pl->dat[1] = retval; + pl->dat[2] = 0; + pl->dat[3] = 0; + + pl->info = TraceExit; + pl->machno = m->machno; + traceactive = 1; +} + +/* Create a new trace with the given range */ +static Trace * +mktrace(void *func, void *start, void *end) +{ + Trace *p; + p = mallocz(sizeof p[0], 1); + p->func = func; + p->start = start; + p->end = end; + return p; +} + +/* Get rid of an old trace */ +static void +freetrace(Trace *p) +{ + free(p); +} + + +static Chan* +traceattach(char *spec) +{ + return devattach('T', spec); +} + +static Walkqid* +tracewalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, tracedir, nelem(tracedir), devgen); +} + +static long +tracestat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, tracedir, nelem(tracedir), devgen); +} + +static Chan* +traceopen(Chan *c, int omode) +{ + + /* if there is no tracelog, allocate one. Open always fails + * if the basic alloc fails. You can resize it later. + */ + + codesize = (uintptr)etext - (uintptr)KTZERO; + if (! tracemap) + //tracemap = mallocz(sizeof(struct tracemap *)*codesize, 1); + tracemap = mallocz(sizeof(struct Trace *)*codesize, 1); + if (! tracemap) + error("tracemap malloc failed"); + if (! tracelog) + tracelog = mallocz(sizeof(*tracelog)*logsize, 1); + /* I guess malloc doesn't toss an error */ + if (! tracelog) + error("tracelog malloc failed"); + if (! pidwatch) + pidwatch = mallocz(sizeof(int)*PIDWATCHSIZE, 1); + if (! pidwatch) + error("pidwatch malloc failed"); + c = devopen(c, omode, tracedir, nelem(tracedir), devgen); + return c; +} + +static void +traceclose(Chan *) +{ +} + +/* + * Reading from the device, either the data or control files. + * The data reading involves deep rminnich magic so we don't have + * to call print(), which is traced. + */ +static long +traceread(Chan *c, void *a, long n, vlong offset) +{ + char *buf; + char *cp = a; + struct Tracelog *pl; + Trace *p; + int i, j; + int saveactive = traceactive; + traceactive = 0; + static QLock gate; + + if (waserror()) { + traceactive = saveactive; + nexterror(); + } + + if(c->qid.type == QTDIR) { + long l = devdirread(c, a, n, tracedir, nelem(tracedir), devgen); + poperror(); + traceactive = saveactive; + return l; + } + + switch((int) c->qid.path){ + default: + error("traceread: bad qid"); + case Qctl: + i = 0; + qlock(&traceslock); + buf = malloc(READSTR); + i += snprint(buf + i, READSTR - i, "logsize %lud\n", logsize); + for(p = traces; p != nil; p = p->next) + i += snprint(buf + i, READSTR - i, "trace %p %p new %s\n", + p->start, p->end, p->name); + + for(p = traces; p != nil; p = p->next) + i += snprint(buf + i, READSTR - i, "#trace %p traced? %p\n", + p->func, traced(p->func, 0)); + + for(p = traces; p != nil; p = p->next) + if (p->enabled) + i += snprint(buf + i, READSTR - i, "trace %s on\n", + p->name); + i += snprint(buf + i, READSTR - i, "#tracehits %d, in queue %d\n", + pw, pw-pr); + i += snprint(buf + i, READSTR - i, "#tracelog %p\n", tracelog); + i += snprint(buf + i, READSTR - i, "#traceactive %d\n", saveactive); + i += snprint(buf + i, READSTR - i, "#slothits %d\n", slothits); + i += snprint(buf + i, READSTR - i, "#traceinhits %d\n", traceinhits); + for (j = 0; j < numpids - 1; j++) + i += snprint(buf + i, READSTR - i, "watch %d\n", pidwatch[j]); + snprint(buf + i, READSTR - i, "watch %d\n", pidwatch[numpids - 1]); + n = readstr(offset, a, n, buf); + free(buf); + qunlock(&traceslock); + break; + case Qdata: + + // Set the printsize + /* 32-bit E PCPCPCPC TIMETIMETIMETIME PID# CR XXARG1XX XXARG2XX XXARG3XX XXARG4XX\n */ + if (sizeof(uintptr) == 4) { + printsize = 73; // 32-bit format + } else { + printsize = 121; // must be 64-bit + } + + i = 0; + while(lognonempty((void *)0)){ + int j; + + if ((pw - pr) > logsize) + pr = pw - logsize; + + pl = tracelog + idx(pr); + + if ((i + printsize) > n) + break; + /* simple format */ + if (sizeof(uintptr) == 4) { + cp[0] = eventname[pl->info]; + cp ++; + *cp++ = ' '; + hex32((uint)pl->pc, cp); + cp[8] = ' '; + cp += 9; + hex64(pl->ticks, cp); + cp[16] = ' '; + cp += 17; + hex16(pl->dat[0], cp); + cp += 4; + cp[0] = ' '; + cp++; + hex8(pl->machno, cp); + cp += 2; + cp[0] = ' '; + cp++; + for(j = 1; j < 4; j++){ + hex32(pl->dat[j], cp); + cp[8] = ' '; + cp += 9; + } + /* adjust for extra skip above */ + cp--; + *cp++ = '\n'; + pr++; + i += printsize; + } else { + cp[0] = eventname[pl->info]; + cp ++; + *cp++ = ' '; + hex64((u64int)pl->pc, cp); + cp[16] = ' '; + cp += 17; + hex64(pl->ticks, cp); + cp[16] = ' '; + cp += 17; + hex32(pl->dat[0], cp); + cp += 8; + cp[0] = ' '; + cp++; + cp[0] = ' '; + cp++; + cp[0] = ' '; + cp++; + cp[0] = ' '; + cp++; + hex8(pl->machno, cp); + cp += 4; + for (j = 1; j < 5; j++) { + hex64(pl->dat[j], cp); + cp[16] = ' '; + cp += 17; + } + cp--; + *cp++ = '\n'; + pr++; + i += printsize; + } + } + n = i; + break; + } + poperror(); + traceactive = saveactive; + return n; +} + +/* + * Process commands sent to the ctl file. + */ +static long +tracewrite(Chan *c, void *a, long n, vlong) +{ + char *tok[6]; //changed this so "tracein" works with the new 4th arg + char *ep, *s = nil; + Trace *p, **pp, *foo; + int ntok; + int saveactive = traceactive; + traceactive = 0; + + qlock(&traceslock); + if(waserror()){ + qunlock(&traceslock); + if(s != nil) free(s); + traceactive = saveactive; + nexterror(); + } + switch((uintptr)c->qid.path){ + default: + error("tracewrite: bad qid"); + case Qctl: + s = malloc(n + 1); + memmove(s, a, n); + s[n] = 0; + ntok = tokenize(s, tok, nelem(tok)); + if(!strcmp(tok[0], "trace")){ /* 'trace' ktextaddr 'on'|'off'|'mk'|'del' [name] */ + if(ntok < 3) { + error("devtrace: usage: 'trace' [ktextaddr|name] 'on'|'off'|'mk'|'del' [name]"); + } + for(pp = &traces; *pp != nil; pp = &(*pp)->next){ + if(!strcmp(tok[1], (*pp)->name)) + break; +} + p = *pp; + if((ntok > 3) && (!strcmp(tok[3], "new"))){ + uintptr addr; + void *start, *end, *func; + if (ntok != 5) { + error("devtrace: usage: trace new "); + } + addr = (uintptr)strtoul(tok[1], &ep, 16); + if (addr < KTZERO) + addr |= KTZERO; + func = start = (void *)addr; + if(*ep) { + error("devtrace: start address not in recognized format"); + } + addr = (uintptr)strtoul(tok[2], &ep, 16); + if (addr < KTZERO) + addr |= KTZERO; + end = (void *)addr; + if(*ep) { + error("devtrace: end address not in recognized format"); + } + + if (start > end || start > etext || end > etext) + error("devtrace: invalid address range"); + + /* What do we do here? start and end are weird * + if((addr < (uintptr)start) || (addr > (uintptr)end) + error("devtrace: address out of bounds"); + */ + if(p) { + error("devtrace: trace already exists"); + } + p = mktrace(func, start, end); + for (foo = traces; foo != nil; foo = foo->next) { + if (!strcmp(tok[4], foo->name)) + error("devtrace: trace with that name already exists"); + } + + if (!overlapping(p)) { + p->next = traces; + if(ntok < 5) + snprint(p->name, sizeof p->name, "%p", func); + else + strncpy(p->name, tok[4], sizeof p->name); + traces = p; + } else { + error("devtrace: given range overlaps with existing trace"); + } + } else if(!strcmp(tok[2], "remove")){ + if (ntok != 3) + error("devtrace: usage: trace remove"); + if (p == nil) { + error("devtrace: trace not found"); + } + removetrace(p); + } else if(!strcmp(tok[2], "on")){ + if (ntok != 3) + error("devtrace: usage: trace on"); + + if(p == nil) { + error("devtrace: trace not found"); + } + if (! traced(p->func, 0)){ + traceon(p); + } + } else if(!strcmp(tok[2], "off")){ + if (ntok != 3) + error("devtrace: usage: trace off"); + if(p == nil) { + error("devtrace: trace not found"); + } + if(traced(p->func, 0)){ + traceoff(p); + } + } + } else if(!strcmp(tok[0], "query")){ + /* See if addr is being traced */ + Trace* p; + uintptr addr; + if (ntok != 2) { + error("devtrace: usage: query "); + } + addr = (uintptr)strtoul(tok[1], &ep, 16); + if (addr < KTZERO) + addr |= KTZERO; + p = traced((void *)addr, 0); + if (p) { + print("Probing is enabled\n"); + } else { + print("Probing is disabled\n"); + } + } else if(!strcmp(tok[0], "size")){ + int l, size; + struct Tracelog *newtracelog; + + if (ntok != 2) + error("devtrace: usage: size "); + + l = strtoul(tok[1], &ep, 0); + if(*ep) { + error("devtrace: size not in recognized format"); + } + size = 1 << l; + /* sort of foolish. Alloc new trace first, then free old. */ + /* and too bad if there are unread traces */ + newtracelog = mallocz(sizeof(*newtracelog)*size, 1); + /* does malloc throw waserror? I don't know */ + if (newtracelog){ + free(tracelog); + tracelog = newtracelog; + logsize = size; + logmask = size - 1; + pr = pw = 0; + } else { + error("devtrace: can't allocate that much"); + } + } else if (!strcmp(tok[0], "testtracein")) { + /* Manually jump to a certain bit of traced code */ + uintptr pc, a1, a2, a3, a4; + int x; + + if (ntok != 6) + error("devtrace: usage: testtracein "); + + pc = (uintptr)strtoul(tok[1], &ep, 16); + if (pc < KTZERO) + pc |= KTZERO; + a1 = (uintptr)strtoul(tok[2], &ep, 16); + a2 = (uintptr)strtoul(tok[3], &ep, 16); + a3 = (uintptr)strtoul(tok[4], &ep, 16); + a4 = (uintptr)strtoul(tok[5], &ep, 16); + + if (traced((void *)pc, 0)) { + x = splhi(); + watching = 1; + tracein((void *)pc, a1, a2, a3, a4); + watching = 0; + splx(x); + } + } else if (!strcmp(tok[0], "watch")) { + /* Watch a certain PID */ + int pid; + + if (ntok != 2) { + error("devtrace: usage: watch [0|]"); + } + + pid = atoi(tok[1]); + if (pid == 0) { + pidwatch = mallocz(sizeof(int)*PIDWATCHSIZE, 1); + numpids = 0; + } else if (pid < 0) { + error("PID must be greater than zero."); + } else if (numpids < PIDWATCHSIZE) { + pidwatch[numpids] = pid; + ainc(&numpids); + } else { + error("pidwatch array full!"); + } + } else if (!strcmp(tok[0], "start")) { + if (ntok != 1) + error("devtrace: usage: start"); + saveactive = 1; + } else if (!strcmp(tok[0], "stop")) { + if (ntok != 1) + error("devtrace: usage: stop"); + saveactive = 0; + all = 0; + } else if (!strcmp(tok[0], "all")) { + if (ntok != 1) + error("devtrace: usage: all"); + saveactive = 1; + all = 1; + } else { + error("devtrace: usage: 'trace' [ktextaddr|name] 'on'|'off'|'mk'|'del' [name] or: 'size' buffersize (power of 2)"); + } + free(s); + break; + } + poperror(); + qunlock(&traceslock); + traceactive = saveactive; + return n; +} + +Dev tracedevtab = { + 'T', + "trace", + devreset, + devinit, + devshutdown, + traceattach, + tracewalk, + tracestat, + traceopen, + devcreate, + traceclose, + traceread, + devbread, + tracewrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru /sys/src/9k/port/devuart.c /sys/src/9k/port/devuart.c --- /sys/src/9k/port/devuart.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devuart.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,808 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" + +enum { + Qdir = 0, + Qdata, + Qctl, + Qstat, +}; + +#define UARTTYPE(x) (((unsigned)x)&0x1f) +#define UARTID(x) ((((unsigned)x))>>5) +#define UARTQID(i, t) ((((unsigned)i)<<5)|(t)) + +enum +{ + /* soft flow control chars */ + CTLS= 023, + CTLQ= 021, +}; + +extern Dev uartdevtab; +extern PhysUart* physuart[]; + +static Uart* uartlist; +static Uart** uart; +static int uartnuart; +static Dirtab *uartdir; +static int uartndir; +static Timer *uarttimer; + +struct Uartalloc { + Lock; + Uart *elist; /* list of enabled interfaces */ +} uartalloc; + +static void uartclock(void); +static void uartflow(void*); + +/* + * enable/disable uart and add/remove to list of enabled uarts + */ +static Uart* +uartenable(Uart *p) +{ + Uart **l; + + if(p->iq == nil){ + if((p->iq = qopen(8*1024, 0, uartflow, p)) == nil) + return nil; + } + else + qreopen(p->iq); + if(p->oq == nil){ + if((p->oq = qopen(8*1024, 0, uartkick, p)) == nil){ + qfree(p->iq); + p->iq = nil; + return nil; + } + } + else + qreopen(p->oq); + + p->ir = p->istage; + p->iw = p->istage; + p->ie = &p->istage[Stagesize]; + p->op = p->ostage; + p->oe = p->ostage; + + p->hup_dsr = p->hup_dcd = 0; + p->dsr = p->dcd = 0; + + /* assume we can send */ + p->cts = 1; + p->ctsbackoff = 0; + + if(p->bits == 0) + uartctl(p, "l8"); + if(p->stop == 0) + uartctl(p, "s1"); + if(p->parity == 0) + uartctl(p, "pn"); + if(p->baud == 0) + uartctl(p, "b9600"); + (*p->phys->enable)(p, 1); + + ilock(&uartalloc); + for(l = &uartalloc.elist; *l; l = &(*l)->elist){ + if(*l == p) + break; + } + if(*l == 0){ + p->elist = uartalloc.elist; + uartalloc.elist = p; + } + p->enabled = 1; + iunlock(&uartalloc); + + return p; +} + +static void +uartdisable(Uart *p) +{ + Uart **l; + + (*p->phys->disable)(p); + + ilock(&uartalloc); + for(l = &uartalloc.elist; *l; l = &(*l)->elist){ + if(*l == p){ + *l = p->elist; + break; + } + } + p->enabled = 0; + iunlock(&uartalloc); +} + +Uart* +uartconsole(int i, char *cmd) +{ + Uart *p; + + if(i >= uartnuart || (p = uart[i]) == nil) + return nil; + + qlock(p); + if(!p->console){ + if(p->opens == 0 && uartenable(p) == nil){ + qunlock(p); + return nil; + } + p->opens++; + + addkbdq(p->iq, -1); + addconsdev(p->oq, uartputs, 2, 0); + p->putc = kbdcr2nl; + if(cmd != nil && *cmd != '\0') + uartctl(p, cmd); + + p->console = 1; + } + qunlock(p); + + return p; +} + +static void +uartsetlength(int i) +{ + Uart *p; + + if(i > 0){ + p = uart[i]; + if(p && p->opens && p->iq) + uartdir[1+3*i].length = qlen(p->iq); + } else for(i = 0; i < uartnuart; i++){ + p = uart[i]; + if(p && p->opens && p->iq) + uartdir[1+3*i].length = qlen(p->iq); + } +} + +/* + * set up the '#t' directory + */ +static void +uartreset(void) +{ + int i; + Dirtab *dp; + Uart *p, *tail; + + tail = nil; + for(i = 0; physuart[i] != nil; i++){ + if(physuart[i]->pnp == nil) + continue; + if((p = physuart[i]->pnp()) == nil) + continue; + if(uartlist != nil) + tail->next = p; + else + uartlist = p; + for(tail = p; tail->next != nil; tail = tail->next) + uartnuart++; + uartnuart++; + } + +//fix the case of uartnuart == 0, will panic below + if(uartnuart) + uart = malloc(uartnuart*sizeof(Uart*)); + + uartndir = 1 + 3*uartnuart; + uartdir = malloc(uartndir * sizeof(Dirtab)); + if(uart == nil || uartdir == nil){ + panic("uartreset: no memory %#p (%ud) %#p (%ud)", + uart, uartnuart*sizeof(Uart*), + uartdir, uartndir * sizeof(Dirtab)); + } + dp = uartdir; + strcpy(dp->name, "."); + mkqid(&dp->qid, 0, 0, QTDIR); + dp->length = 0; + dp->perm = DMDIR|0555; + dp++; + p = uartlist; + for(i = 0; i < uartnuart; i++){ + /* 3 directory entries per port */ + sprint(dp->name, "eia%d", i); + dp->qid.path = UARTQID(i, Qdata); + dp->perm = 0660; + dp++; + sprint(dp->name, "eia%dctl", i); + dp->qid.path = UARTQID(i, Qctl); + dp->perm = 0660; + dp++; + sprint(dp->name, "eia%dstatus", i); + dp->qid.path = UARTQID(i, Qstat); + dp->perm = 0444; + dp++; + + uart[i] = p; + p->dev = i; + if(p->console || p->special){ + /* + * No qlock here, only called at boot time. + */ + if(uartenable(p) != nil){ + if(p->console){ + addkbdq(p->iq, -1); + addconsdev(p->oq, uartputs, 2, 0); + p->putc = kbdcr2nl; + } + p->opens++; + } + } + p = p->next; + } + + if(uartnuart){ + /* + * at 115200 baud, the 1024 char buffer takes 56 ms to process, + * processing it every 22 ms should be fine. + */ + uarttimer = addclock0link(uartclock, 22); + } +} + + +static Chan* +uartattach(char *spec) +{ + return devattach('t', spec); +} + +static Walkqid* +uartwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, uartdir, uartndir, devgen); +} + +static long +uartstat(Chan *c, uchar *dp, long n) +{ + if(UARTTYPE(c->qid.path) == Qdata) + uartsetlength(UARTID(c->qid.path)); + return devstat(c, dp, n, uartdir, uartndir, devgen); +} + +static Chan* +uartopen(Chan *c, int omode) +{ + Uart *p; + + c = devopen(c, omode, uartdir, uartndir, devgen); + + switch(UARTTYPE(c->qid.path)){ + case Qctl: + case Qdata: + p = uart[UARTID(c->qid.path)]; + qlock(p); + if(p->opens == 0 && uartenable(p) == nil){ + qunlock(p); + c->flag &= ~COPEN; + error(Enodev); + } + p->opens++; + qunlock(p); + break; + } + + c->iounit = qiomaxatomic; + return c; +} + +static int +uartdrained(void* arg) +{ + Uart *p; + + p = arg; + return qlen(p->oq) == 0 && p->op == p->oe; +} + +static void +uartdrainoutput(Uart *p) +{ + if(!p->enabled) + return; + + p->drain = 1; + if(waserror()){ + p->drain = 0; + nexterror(); + } + sleep(&p->r, uartdrained, p); + poperror(); +} + +static void +uartclose(Chan *c) +{ + Uart *p; + + if(c->qid.type & QTDIR) + return; + if((c->flag & COPEN) == 0) + return; + switch(UARTTYPE(c->qid.path)){ + case Qdata: + case Qctl: + p = uart[UARTID(c->qid.path)]; + qlock(p); + if(--(p->opens) == 0){ + qclose(p->iq); + ilock(&p->rlock); + p->ir = p->iw = p->istage; + iunlock(&p->rlock); + + /* + */ + qhangup(p->oq, nil); + if(!waserror()){ + uartdrainoutput(p); + poperror(); + } + qclose(p->oq); + uartdisable(p); + p->dcd = p->dsr = p->dohup = 0; + } + qunlock(p); + break; + } +} + +static long +uartread(Chan *c, void *buf, long n, vlong off) +{ + Uart *p; + ulong offset = off; + + if(c->qid.type & QTDIR){ + uartsetlength(-1); + return devdirread(c, buf, n, uartdir, uartndir, devgen); + } + + p = uart[UARTID(c->qid.path)]; + switch(UARTTYPE(c->qid.path)){ + case Qdata: + return qread(p->iq, buf, n); + case Qctl: + return readnum(offset, buf, n, UARTID(c->qid.path), NUMSIZE); + case Qstat: + return (*p->phys->status)(p, buf, n, offset); + } + + return 0; +} + +int +uartctl(Uart *p, char *cmd) +{ + char *f[16]; + int i, n, nf; + + nf = tokenize(cmd, f, nelem(f)); + for(i = 0; i < nf; i++){ + if(strncmp(f[i], "break", 5) == 0){ + (*p->phys->dobreak)(p, 0); + continue; + } + + n = atoi(f[i]+1); + switch(*f[i]){ + case 'B': + case 'b': + uartdrainoutput(p); + if((*p->phys->baud)(p, n) < 0) + return -1; + break; + case 'C': + case 'c': + p->hup_dcd = n; + break; + case 'D': + case 'd': + uartdrainoutput(p); + (*p->phys->dtr)(p, n); + break; + case 'E': + case 'e': + p->hup_dsr = n; + break; + case 'F': + case 'f': + if(p->oq != nil) + qflush(p->oq); + break; + case 'H': + case 'h': + if(p->iq != nil) + qhangup(p->iq, 0); + if(p->oq != nil) + qhangup(p->oq, 0); + break; + case 'I': + case 'i': + uartdrainoutput(p); + (*p->phys->fifo)(p, n); + break; + case 'K': + case 'k': + uartdrainoutput(p); + (*p->phys->dobreak)(p, n); + break; + case 'L': + case 'l': + uartdrainoutput(p); + if((*p->phys->bits)(p, n) < 0) + return -1; + break; + case 'M': + case 'm': + uartdrainoutput(p); + (*p->phys->modemctl)(p, n); + break; + case 'N': + case 'n': + if(p->oq != nil) + qnoblock(p->oq, n); + break; + case 'P': + case 'p': + uartdrainoutput(p); + if((*p->phys->parity)(p, *(f[i]+1)) < 0) + return -1; + break; + case 'Q': + case 'q': + if(p->iq != nil) + qsetlimit(p->iq, n); + if(p->oq != nil) + qsetlimit(p->oq, n); + break; + case 'R': + case 'r': + uartdrainoutput(p); + (*p->phys->rts)(p, n); + break; + case 'S': + case 's': + uartdrainoutput(p); + if((*p->phys->stop)(p, n) < 0) + return -1; + break; + case 'W': + case 'w': + if(uarttimer == nil || n < 1) + return -1; + uarttimer->tns = (vlong)n * 100000LL; + break; + case 'X': + case 'x': + if(p->enabled){ + ilock(&p->tlock); + p->xonoff = n; + iunlock(&p->tlock); + } + break; + } + } + return 0; +} + +static long +uartwrite(Chan *c, void *buf, long n, vlong) +{ + Uart *p; + char *cmd; + + if(c->qid.type & QTDIR) + error(Eperm); + + p = uart[UARTID(c->qid.path)]; + + switch(UARTTYPE(c->qid.path)){ + case Qdata: + qlock(p); + if(waserror()){ + qunlock(p); + nexterror(); + } + + n = qwrite(p->oq, buf, n); + + qunlock(p); + poperror(); + break; + case Qctl: + cmd = malloc(n+1); + memmove(cmd, buf, n); + cmd[n] = 0; + qlock(p); + if(waserror()){ + qunlock(p); + free(cmd); + nexterror(); + } + + /* let output drain */ + if(uartctl(p, cmd) < 0) + error(Ebadarg); + + qunlock(p); + poperror(); + free(cmd); + break; + } + + return n; +} + +static long +uartwstat(Chan *c, uchar *dp, long n) +{ + Dir d; + Dirtab *dt; + + if(!isevegroup()) + error(Eperm); + if(QTDIR & c->qid.type) + error(Eperm); + if(UARTTYPE(c->qid.path) == Qstat) + error(Eperm); + + dt = &uartdir[1 + 3 * UARTID(c->qid.path)]; + n = convM2D(dp, n, &d, nil); + if(n == 0) + error(Eshortstat); + if(d.mode != ~0UL) + dt[0].perm = dt[1].perm = d.mode; + return n; +} + +void +uartpower(int on) +{ + Uart *p; + + for(p = uartlist; p != nil; p = p->next) { + if(p->phys->power) + (*p->phys->power)(p, on); + } +} + +Dev uartdevtab = { + 't', + "uart", + + uartreset, + devinit, + devshutdown, + uartattach, + uartwalk, + uartstat, + uartopen, + devcreate, + uartclose, + uartread, + devbread, + uartwrite, + devbwrite, + devremove, + uartwstat, + uartpower, +}; + +/* + * restart input if it's off + */ +static void +uartflow(void *v) +{ + Uart *p; + + p = v; + if(p->modem) + (*p->phys->rts)(p, 1); +} + +/* + * put some bytes into the local queue to avoid calling + * qconsume for every character + */ +int +uartstageoutput(Uart *p) +{ + int n; + + n = qconsume(p->oq, p->ostage, Stagesize); + if(n <= 0) + return 0; + p->op = p->ostage; + p->oe = p->ostage + n; + return n; +} + +/* + * restart output + */ +void +uartkick(void *v) +{ + Uart *p = v; + + if(p->blocked) + return; + + ilock(&p->tlock); + (*p->phys->kick)(p); + iunlock(&p->tlock); + + if(p->drain && uartdrained(p)){ + p->drain = 0; + wakeup(&p->r); + } +} + +/* + * Move data from the interrupt staging area to + * the input Queue. + */ +static void +uartstageinput(Uart *p) +{ + int n; + uchar *ir, *iw; + + while(p->ir != p->iw){ + ir = p->ir; + if(p->ir > p->iw){ + iw = p->ie; + p->ir = p->istage; + } + else{ + iw = p->iw; + p->ir = p->iw; + } + if((n = qproduce(p->iq, ir, iw - ir)) < 0){ + p->serr++; + (*p->phys->rts)(p, 0); + } + else if(n == 0) + p->berr++; + } +} + +/* + * receive a character at interrupt time + */ +void +uartrecv(Uart *p, char ch) +{ + uchar *next; + + /* software flow control */ + if(p->xonoff){ + if(ch == CTLS){ + p->blocked = 1; + }else if(ch == CTLQ){ + p->blocked = 0; + p->ctsbackoff = 2; /* clock gets output going again */ + } + } + + /* receive the character */ + if(p->putc) + p->putc(p->iq, ch); + else if(p->iw != nil){ /* maybe the line isn't enabled yet */ + ilock(&p->rlock); + next = p->iw + 1; + if(next == p->ie) + next = p->istage; + if(next == p->ir) + uartstageinput(p); + if(next != p->ir){ + *p->iw = ch; + p->iw = next; + } + iunlock(&p->rlock); + } +} + +/* + * we save up input characters till clock time to reduce + * per character interrupt overhead. + */ +static void +uartclock(void) +{ + Uart *p; + + ilock(&uartalloc); + for(p = uartalloc.elist; p; p = p->elist){ + + if(p->phys->poll != nil) + (*p->phys->poll)(p); + + /* this hopefully amortizes cost of qproduce to many chars */ + if(p->iw != p->ir){ + ilock(&p->rlock); + uartstageinput(p); + iunlock(&p->rlock); + } + + /* hang up if requested */ + if(p->dohup){ + qhangup(p->iq, 0); + qhangup(p->oq, 0); + p->dohup = 0; + } + + /* this adds hysteresis to hardware/software flow control */ + if(p->ctsbackoff){ + ilock(&p->tlock); + if(p->ctsbackoff){ + if(--(p->ctsbackoff) == 0) + (*p->phys->kick)(p); + } + iunlock(&p->tlock); + } + } + iunlock(&uartalloc); +} + +/* + * polling console input, output + */ + +Uart* consuart; + +int +uartgetc(void) +{ + if(consuart == nil || consuart->phys->getc == nil) + return -1; + return consuart->phys->getc(consuart); +} + +void +uartputc(int c) +{ + if(consuart == nil || consuart->phys->putc == nil) + return; + consuart->phys->putc(consuart, c); +} + +void +uartputs(char *s, int n) +{ + char *e; + + if(consuart == nil || consuart->phys->putc == nil) + return; + + e = s+n; + for(; sphys->putc(consuart, '\r'); + consuart->phys->putc(consuart, *s); + } +} + +void +uartpush(void) +{ + while(consuart != nil && !uartdrained(consuart)){ + consuart->xonoff = 0; + consuart->blocked = 0; + uartkick(consuart); + } +} diff -Nru /sys/src/9k/port/devusb.c /sys/src/9k/port/devusb.c --- /sys/src/9k/port/devusb.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devusb.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1474 @@ +/* + * USB device driver framework. + * + * This is in charge of providing access to actual HCIs + * and providing I/O to the various endpoints of devices. + * A separate user program (usbd) is in charge of + * enumerating the bus, setting up endpoints and + * starting devices (also user programs). + * + * The interface provided is a violation of the standard: + * you're welcome. + * + * The interface consists of a root directory with several files + * plus a directory (epN.M) with two files per endpoint. + * A device is represented by its first endpoint, which + * is a control endpoint automatically allocated for each device. + * Device control endpoints may be used to create new endpoints. + * Devices corresponding to hubs may also allocate new devices, + * perhaps also hubs. Initially, a hub device is allocated for + * each controller present, to represent its root hub. Those can + * never be removed. + * + * All endpoints refer to the first endpoint (epN.0) of the device, + * which keeps per-device information, and also to the HCI used + * to reach them. Although all endpoints cache that information. + * + * epN.M/data files permit I/O and are considered DMEXCL. + * epN.M/ctl files provide status info and accept control requests. + * + * Endpoints may be given file names to be listed also at #u, + * for those drivers that have nothing to do after configuring the + * device and its endpoints. + * + * Drivers for different controllers are kept at usb[oue]hci.c + * It's likely we could factor out much from controllers into + * a generic controller driver, the problem is that details + * regarding how to handle toggles, tokens, Tds, etc. will + * get in the way. Thus, code is probably easier the way it is. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/usb.h" + +typedef struct Hcitype Hcitype; + +enum +{ + /* Qid numbers */ + Qdir = 0, /* #u */ + Qusbdir, /* #u/usb */ + Qctl, /* #u/usb/ctl - control requests */ + + Qep0dir, /* #u/usb/ep0.0 - endpoint 0 dir */ + Qep0io, /* #u/usb/ep0.0/data - endpoint 0 I/O */ + Qep0ctl, /* #u/usb/ep0.0/ctl - endpoint 0 ctl. */ + Qep0dummy, /* give 4 qids to each endpoint */ + + Qepdir = 0, /* (qid-qep0dir)&3 is one of these */ + Qepio, /* to identify which file for the endpoint */ + Qepctl, + + /* ... */ + + /* Usb ctls. */ + CMdebug = 0, /* debug on|off */ + CMdump, /* dump (data structures for debug) */ + + /* Ep. ctls */ + CMnew = 0, /* new nb ctl|bulk|intr|iso r|w|rw (endpoint) */ + CMnewdev, /* newdev full|low|high portnb (allocate new devices) */ + CMhub, /* hub (set the device as a hub) */ + CMspeed, /* speed full|low|high|no */ + CMmaxpkt, /* maxpkt size */ + CMntds, /* ntds nb (max nb. of tds per µframe) */ + CMclrhalt, /* clrhalt (halt was cleared on endpoint) */ + CMpollival, /* pollival interval (interrupt/iso) */ + CMhz, /* hz n (samples/sec; iso) */ + CMsamplesz, /* samplesz n (sample size; iso) */ + CMinfo, /* info infostr (ke.ep info for humans) */ + CMdetach, /* detach (abort I/O forever on this ep). */ + CMaddress, /* address (address is assigned) */ + CMdebugep, /* debug n (set/clear debug for this ep) */ + CMname, /* name str (show up as #u/name as well) */ + CMtmout, /* timeout n (activate timeouts for ep) */ + CMpreset, /* reset the port */ + + /* Hub feature selectors */ + Rportenable = 1, + Rportreset = 4, + +}; + +struct Hcitype +{ + char* type; + int (*reset)(Hci*); +}; + +#define QID(q) ((int)(q).path) + +static char Edetach[] = "device is detached"; +static char Enotconf[] = "endpoint not configured"; +char Estalled[] = "endpoint stalled"; + +static Cmdtab usbctls[] = +{ + {CMdebug, "debug", 2}, + {CMdump, "dump", 1}, +}; + +static Cmdtab epctls[] = +{ + {CMnew, "new", 4}, + {CMnewdev, "newdev", 3}, + {CMhub, "hub", 1}, + {CMspeed, "speed", 2}, + {CMmaxpkt, "maxpkt", 2}, + {CMntds, "ntds", 2}, + {CMpollival, "pollival", 2}, + {CMsamplesz, "samplesz", 2}, + {CMhz, "hz", 2}, + {CMinfo, "info", 0}, + {CMdetach, "detach", 1}, + {CMaddress, "address", 1}, + {CMdebugep, "debug", 2}, + {CMclrhalt, "clrhalt", 1}, + {CMname, "name", 2}, + {CMtmout, "timeout", 2}, + {CMpreset, "reset", 1}, +}; + +static Dirtab usbdir[] = +{ + "ctl", {Qctl}, 0, 0666, +}; + +char *usbmodename[] = +{ + [OREAD] "r", + [OWRITE] "w", + [ORDWR] "rw", +}; + +static char *ttname[] = +{ + [Tnone] "none", + [Tctl] "control", + [Tiso] "iso", + [Tintr] "interrupt", + [Tbulk] "bulk", +}; + +static char *spname[] = +{ + [Fullspeed] "full", + [Lowspeed] "low", + [Highspeed] "high", + [Nospeed] "no", +}; + +static int debug; +static Hcitype hcitypes[Nhcis]; +static Hci* hcis[Nhcis]; +static QLock epslck; /* add, del, lookup endpoints */ +static Ep* eps[Neps]; /* all endpoints known */ +static int epmax; /* 1 + last endpoint index used */ +static int usbidgen; /* device address generator */ + +/* + * Is there something like this in a library? should it be? + */ +char* +seprintdata(char *s, char *se, uchar *d, int n) +{ + if(n > 10) + return seprint(s, se, " %#p[%d]: %.10H...", d, n, d); + else + return seprint(s, se, " %#p[%d]: %.*H", d, n, n, d); +} + +static int +name2speed(char *name) +{ + int i; + + for(i = 0; i < nelem(spname); i++) + if(strcmp(name, spname[i]) == 0) + return i; + return Nospeed; +} + +static int +name2ttype(char *name) +{ + int i; + + for(i = 0; i < nelem(ttname); i++) + if(strcmp(name, ttname[i]) == 0) + return i; + /* may be a std. USB ep. type */ + i = strtol(name, nil, 0); + switch(i+1){ + case Tctl: + case Tiso: + case Tbulk: + case Tintr: + return i+1; + default: + return Tnone; + } +} + +static int +name2mode(char *mode) +{ + int i; + + for(i = 0; i < nelem(usbmodename); i++) + if(strcmp(mode, usbmodename[i]) == 0) + return i; + return -1; +} + +static int +qid2epidx(int q) +{ + q = (q-Qep0dir)/4; + if(q < 0 || q >= epmax || eps[q] == nil) + return -1; + return q; +} + +static int +isqtype(int q, int type) +{ + if(q < Qep0dir) + return 0; + q -= Qep0dir; + return (q & 3) == type; +} + +void +addhcitype(char* t, int (*r)(Hci*)) +{ + static int ntype; + + if(ntype == Nhcis) + panic("too many USB host interface types"); + hcitypes[ntype].type = t; + hcitypes[ntype].reset = r; + ntype++; +} + +static char* +seprintep(char *s, char *se, Ep *ep, int all) +{ + static char* dsnames[] = { "config", "enabled", "detached", "reset" }; + Udev *d; + int i; + int di; + + d = ep->dev; + + qlock(ep); + if(waserror()){ + qunlock(ep); + nexterror(); + } + di = ep->dev->nb; + if(all) + s = seprint(s, se, "dev %d ep %d ", di, ep->nb); + s = seprint(s, se, "%s", dsnames[ep->dev->state]); + s = seprint(s, se, " %s", ttname[ep->ttype]); + assert(ep->mode == OREAD || ep->mode == OWRITE || ep->mode == ORDWR); + s = seprint(s, se, " %s", usbmodename[ep->mode]); + s = seprint(s, se, " speed %s", spname[d->speed]); + s = seprint(s, se, " maxpkt %ld", ep->maxpkt); + s = seprint(s, se, " pollival %ld", ep->pollival); + s = seprint(s, se, " samplesz %ld", ep->samplesz); + s = seprint(s, se, " hz %ld", ep->hz); + s = seprint(s, se, " hub %d", ep->dev->hub); + s = seprint(s, se, " port %d", ep->dev->port); + if(ep->inuse) + s = seprint(s, se, " busy"); + else + s = seprint(s, se, " idle"); + if(all){ + s = seprint(s, se, " load %uld", ep->load); + s = seprint(s, se, " ref %d addr %#p", ep->ref, ep); + s = seprint(s, se, " idx %d", ep->idx); + if(ep->name != nil) + s = seprint(s, se, " name '%s'", ep->name); + if(ep->tmout != 0) + s = seprint(s, se, " tmout"); + if(ep == ep->ep0){ + s = seprint(s, se, " ctlrno %#x", ep->hp->ctlrno); + s = seprint(s, se, " eps:"); + for(i = 0; i < nelem(d->eps); i++) + if(d->eps[i] != nil) + s = seprint(s, se, " ep%d.%d", di, i); + } + } + if(ep->info != nil) + s = seprint(s, se, "\n%s %s\n", ep->info, ep->hp->type); + else + s = seprint(s, se, "\n"); + qunlock(ep); + poperror(); + return s; +} + +static Ep* +epalloc(Hci *hp) +{ + Ep *ep; + int i; + + ep = smalloc(sizeof(Ep)); + ep->ref = 1; + qlock(&epslck); + for(i = 0; i < Neps; i++) + if(eps[i] == nil) + break; + if(i == Neps){ + qunlock(&epslck); + free(ep); + print("usb: bug: too few endpoints.\n"); + return nil; + } + ep->idx = i; + if(epmax <= i) + epmax = i+1; + eps[i] = ep; + ep->hp = hp; + ep->maxpkt = 8; + ep->ntds = 1; + ep->samplesz = ep->pollival = ep->hz = 0; /* make them void */ + qunlock(&epslck); + return ep; +} + +static Ep* +getep(int i) +{ + Ep *ep; + + if(i < 0 || i >= epmax || eps[i] == nil) + return nil; + qlock(&epslck); + ep = eps[i]; + if(ep != nil) + incref(ep); + qunlock(&epslck); + return ep; +} + +static void +putep(Ep *ep) +{ + Udev *d; + + if(ep != nil && decref(ep) == 0){ + d = ep->dev; + deprint("usb: ep%d.%d %#p released\n", d->nb, ep->nb, ep); + qlock(&epslck); + eps[ep->idx] = nil; + if(ep->idx == epmax-1) + epmax--; + if(ep == ep->ep0 && ep->dev != nil && ep->dev->nb == usbidgen) + usbidgen--; + qunlock(&epslck); + if(d != nil){ + qlock(ep->ep0); + d->eps[ep->nb] = nil; + qunlock(ep->ep0); + } + if(ep->ep0 != ep){ + putep(ep->ep0); + ep->ep0 = nil; + } + free(ep->info); + free(ep->name); + free(ep); + } +} + +static void +dumpeps(void) +{ + int i; + static char buf[512]; + char *s; + char *e; + Ep *ep; + + print("usb dump eps: epmax %d Neps %d (ref=1+ for dump):\n", epmax, Neps); + for(i = 0; i < epmax; i++){ + s = buf; + e = buf+sizeof(buf); + ep = getep(i); + if(ep != nil){ + if(waserror()){ + putep(ep); + nexterror(); + } + s = seprint(s, e, "ep%d.%d ", ep->dev->nb, ep->nb); + seprintep(s, e, ep, 1); + print("%s", buf); + ep->hp->seprintep(buf, e, ep); + print("%s", buf); + poperror(); + putep(ep); + } + } + print("usb dump hcis:\n"); + for(i = 0; i < Nhcis; i++) + if(hcis[i] != nil) + hcis[i]->dump(hcis[i]); +} + +static int +newusbid(Hci *) +{ + int id; + + qlock(&epslck); + id = ++usbidgen; + if(id >= 0x7F) + print("#u: too many device addresses; reuse them more\n"); + qunlock(&epslck); + return id; +} + +/* + * Create endpoint 0 for a new device + */ +static Ep* +newdev(Hci *hp, int ishub, int isroot) +{ + Ep *ep; + Udev *d; + + ep = epalloc(hp); + d = ep->dev = smalloc(sizeof(Udev)); + d->nb = newusbid(hp); + d->eps[0] = ep; + ep->nb = 0; + ep->toggle[0] = ep->toggle[1] = 0; + d->ishub = ishub; + d->isroot = isroot; + if(hp->highspeed != 0) + d->speed = Highspeed; + else + d->speed = Fullspeed; + d->state = Dconfig; /* address not yet set */ + ep->dev = d; + ep->ep0 = ep; /* no ref counted here */ + ep->ttype = Tctl; + ep->tmout = Xfertmout; + ep->mode = ORDWR; + dprint("newdev %#p ep%d.%d %#p\n", d, d->nb, ep->nb, ep); + return ep; +} + +/* + * Create a new endpoint for the device + * accessed via the given endpoint 0. + */ +static Ep* +newdevep(Ep *ep, int i, int tt, int mode) +{ + Ep *nep; + Udev *d; + + d = ep->dev; + if(d->eps[i] != nil) + error("endpoint already in use"); + nep = epalloc(ep->hp); + incref(ep); + d->eps[i] = nep; + nep->nb = i; + nep->toggle[0] = nep->toggle[1] = 0; + nep->ep0 = ep; + nep->dev = ep->dev; + nep->mode = mode; + nep->ttype = tt; + nep->debug = ep->debug; + /* set defaults */ + switch(tt){ + case Tctl: + nep->tmout = Xfertmout; + break; + case Tintr: + nep->pollival = 10; + break; + case Tiso: + nep->tmout = Xfertmout; + nep->pollival = 10; + nep->samplesz = 4; + nep->hz = 44100; + break; + } + deprint("newdevep ep%d.%d %#p\n", d->nb, nep->nb, nep); + return ep; +} + +static int +epdataperm(int mode) +{ + + switch(mode){ + case OREAD: + return 0440|DMEXCL; + break; + case OWRITE: + return 0220|DMEXCL; + break; + default: + return 0660|DMEXCL; + } +} + +static int +usbgen(Chan *c, char *, Dirtab*, int, int s, Dir *dp) +{ + Qid q; + Dirtab *dir; + int perm; + char *se; + Ep *ep; + int nb; + int mode; + + if(0)ddprint("usbgen q %#x s %d...", QID(c->qid), s); + if(s == DEVDOTDOT){ + if(QID(c->qid) <= Qusbdir){ + mkqid(&q, Qdir, 0, QTDIR); + devdir(c, q, "#u", 0, eve, 0555, dp); + }else{ + mkqid(&q, Qusbdir, 0, QTDIR); + devdir(c, q, "usb", 0, eve, 0555, dp); + } + if(0)ddprint("ok\n"); + return 1; + } + + switch(QID(c->qid)){ + case Qdir: /* list #u */ + if(s == 0){ + mkqid(&q, Qusbdir, 0, QTDIR); + devdir(c, q, "usb", 0, eve, 0555, dp); + if(0)ddprint("ok\n"); + return 1; + } + s--; + if(s < 0 || s >= epmax) + goto Fail; + ep = getep(s); + if(ep == nil || ep->name == nil){ + if(ep != nil) + putep(ep); + if(0)ddprint("skip\n"); + return 0; + } + if(waserror()){ + putep(ep); + nexterror(); + } + mkqid(&q, Qep0io+s*4, 0, QTFILE); + devdir(c, q, ep->name, 0, eve, epdataperm(ep->mode), dp); + putep(ep); + poperror(); + if(0)ddprint("ok\n"); + return 1; + + case Qusbdir: /* list #u/usb */ + Usbdir: + if(s < nelem(usbdir)){ + dir = &usbdir[s]; + mkqid(&q, dir->qid.path, 0, QTFILE); + devdir(c, q, dir->name, dir->length, eve, dir->perm, dp); + if(0)ddprint("ok\n"); + return 1; + } + s -= nelem(usbdir); + if(s < 0 || s >= epmax) + goto Fail; + ep = getep(s); + if(ep == nil){ + if(0)ddprint("skip\n"); + return 0; + } + if(waserror()){ + putep(ep); + nexterror(); + } + se = up->genbuf+sizeof(up->genbuf); + seprint(up->genbuf, se, "ep%d.%d", ep->dev->nb, ep->nb); + mkqid(&q, Qep0dir+4*s, 0, QTDIR); + putep(ep); + poperror(); + devdir(c, q, up->genbuf, 0, eve, 0755, dp); + if(0)ddprint("ok\n"); + return 1; + + case Qctl: + s = 0; + goto Usbdir; + + default: /* list #u/usb/epN.M */ + nb = qid2epidx(QID(c->qid)); + ep = getep(nb); + if(ep == nil) + goto Fail; + mode = ep->mode; + putep(ep); + if(isqtype(QID(c->qid), Qepdir)){ + Epdir: + switch(s){ + case 0: + mkqid(&q, Qep0io+nb*4, 0, QTFILE); + perm = epdataperm(mode); + devdir(c, q, "data", 0, eve, perm, dp); + break; + case 1: + mkqid(&q, Qep0ctl+nb*4, 0, QTFILE); + devdir(c, q, "ctl", 0, eve, 0664, dp); + break; + default: + goto Fail; + } + }else if(isqtype(QID(c->qid), Qepctl)){ + s = 1; + goto Epdir; + }else{ + s = 0; + goto Epdir; + } + if(0)ddprint("ok\n"); + return 1; + } +Fail: + if(0)ddprint("fail\n"); + return -1; +} + +static Hci* +hciprobe(int cardno, int ctlrno) +{ + Hci *hp; + char *type; + char name[64]; + static int epnb = 1; /* guess the endpoint nb. for the controller */ + + ddprint("hciprobe %d %d\n", cardno, ctlrno); + hp = smalloc(sizeof(Hci)); + hp->ctlrno = ctlrno; + hp->tbdf = BUSUNKNOWN; + + if(cardno < 0){ +// if(pciconfig("usb", ctlrno, hp) == 0){ + if(1){ + free(hp); + return nil; + } + for(cardno = 0; cardno < Nhcis; cardno++){ + if(hcitypes[cardno].type == nil) + break; + type = hp->type; + if(type==nil || *type==0) + type = "uhci"; + if(cistrcmp(hcitypes[cardno].type, type) == 0) + break; + } + } + + if(cardno >= Nhcis || hcitypes[cardno].type == nil){ + free(hp); + return nil; + } + dprint("%s...", hcitypes[cardno].type); + if(hcitypes[cardno].reset(hp) < 0){ + free(hp); + return nil; + } + + /* + * IRQ2 doesn't really exist, it's used to gang the interrupt + * controllers together. A device set to IRQ2 will appear on + * the second interrupt controller as IRQ9. + */ +/*port*/ if(hp->irq == 2) +/*port*/ hp->irq = 9; + snprint(name, sizeof(name), "usb%s", hcitypes[cardno].type); + intrenable(hp->irq, hp->interrupt, hp, hp->tbdf, name); + + /* + * modern machines have too many usb controllers to list on + * the console. + */ + dprint("#u/usb/ep%d.0: %s: port %#p irq %d\n", + epnb, hcitypes[cardno].type, hp->port, hp->irq); + epnb++; + return hp; +} + +static void +usbreset(void) +{ + int cardno, ctlrno; + Hci *hp; + + if(getconf("*nousbprobe")) + return; + dprint("usbreset\n"); + + for(ctlrno = 0; ctlrno < Nhcis; ctlrno++) + if((hp = hciprobe(-1, ctlrno)) != nil) + hcis[ctlrno] = hp; + cardno = ctlrno = 0; + while(cardno < Nhcis && ctlrno < Nhcis && hcitypes[cardno].type != nil) + if(hcis[ctlrno] != nil) + ctlrno++; + else{ + hp = hciprobe(cardno, ctlrno); + if(hp == nil) + cardno++; + hcis[ctlrno++] = hp; + } + if(hcis[Nhcis-1] != nil) + print("usbreset: bug: Nhcis (%d) too small\n", Nhcis); +} + +/* need to move this to arch directory */ +static void +usbinit(void) +{ + Hci *hp; + int ctlrno; + Ep *d; + char info[40]; + + dprint("usbinit\n"); + for(ctlrno = 0; ctlrno < Nhcis; ctlrno++){ + hp = hcis[ctlrno]; + if(hp != nil){ + if(hp->init != nil) + hp->init(hp); + d = newdev(hp, 1, 1); /* new root hub */ + d->dev->state = Denabled; /* although addr == 0 */ + d->maxpkt = 64; + snprint(info, sizeof(info), "ports %d", hp->nports); + kstrdup(&d->info, info); + } + } +} + +static Chan* +usbattach(char *spec) +{ + return devattach(L'u', spec); +} + +static Walkqid* +usbwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, nil, 0, usbgen); +} + +static long +usbstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, nil, 0, usbgen); +} + +/* + * µs for the given transfer, for bandwidth allocation. + * This is a very rough worst case for what 5.11.3 + * of the usb 2.0 spec says. + * Also, we are using maxpkt and not actual transfer sizes. + * Only when we are sure we + * are not exceeding b/w might we consider adjusting it. + */ +static ulong +usbload(int speed, int maxpkt) +{ + enum{ Hostns = 1000, Hubns = 333 }; + ulong l; + ulong bs; + + l = 0; + bs = 10UL * maxpkt; + switch(speed){ + case Highspeed: + l = 55*8*2 + 2 * (3 + bs) + Hostns; + break; + case Fullspeed: + l = 9107 + 84 * (4 + bs) + Hostns; + break; + case Lowspeed: + l = 64107 + 2 * Hubns + 667 * (3 + bs) + Hostns; + break; + default: + print("usbload: bad speed %d\n", speed); + /* let it run */ + } + return l / 1000UL; /* in µs */ +} + +static Chan* +usbopen(Chan *c, int omode) +{ + int q; + Ep *ep; + int mode; + + mode = openmode(omode); + q = QID(c->qid); + + if(q >= Qep0dir && qid2epidx(q) < 0) + error(Eio); + if(q < Qep0dir || isqtype(q, Qepctl) || isqtype(q, Qepdir)) + return devopen(c, omode, nil, 0, usbgen); + + ep = getep(qid2epidx(q)); + if(ep == nil) + error(Eio); + deprint("usbopen q %#x fid %d omode %d\n", q, c->fid, mode); + if(waserror()){ + putep(ep); + nexterror(); + } + qlock(ep); + if(ep->inuse){ + qunlock(ep); + error(Einuse); + } + ep->inuse = 1; + qunlock(ep); + if(waserror()){ + ep->inuse = 0; + nexterror(); + } + if(mode != OREAD && ep->mode == OREAD) + error(Eperm); + if(mode != OWRITE && ep->mode == OWRITE) + error(Eperm); + if(ep->ttype == Tnone) + error(Enotconf); + ep->clrhalt = 0; + ep->rhrepl = -1; + if(ep->load == 0) + ep->load = usbload(ep->dev->speed, ep->maxpkt); + ep->hp->epopen(ep); + + poperror(); /* ep->inuse */ + poperror(); /* don't putep(): ref kept for fid using the ep. */ + + c->mode = mode; + c->flag |= COPEN; + c->offset = 0; + c->aux = nil; /* paranoia */ + return c; +} + +static void +epclose(Ep *ep) +{ + qlock(ep); + if(waserror()){ + qunlock(ep); + nexterror(); + } + if(ep->inuse){ + ep->hp->epclose(ep); + ep->inuse = 0; + } + qunlock(ep); + poperror(); +} + +static void +usbclose(Chan *c) +{ + int q; + Ep *ep; + + q = QID(c->qid); + if(q < Qep0dir || isqtype(q, Qepctl) || isqtype(q, Qepdir)) + return; + + ep = getep(qid2epidx(q)); + if(ep == nil) + return; + deprint("usbclose q %#x fid %d ref %d\n", q, c->fid, ep->ref); + if(waserror()){ + putep(ep); + nexterror(); + } + if(c->flag & COPEN){ + free(c->aux); + c->aux = nil; + epclose(ep); + putep(ep); /* release ref kept since usbopen */ + c->flag &= ~COPEN; + } + poperror(); + putep(ep); +} + +static long +ctlread(Chan *c, void *a, long n, vlong offset) +{ + int q; + char *s; + char *us; + char *se; + Ep *ep; + int i; + + q = QID(c->qid); + us = s = smalloc(READSTR); + se = s + READSTR; + if(waserror()){ + free(us); + nexterror(); + } + if(q == Qctl) + for(i = 0; i < epmax; i++){ + ep = getep(i); + if(ep != nil){ + if(waserror()){ + putep(ep); + nexterror(); + } + s = seprint(s, se, "ep%d.%d ", ep->dev->nb, ep->nb); + s = seprintep(s, se, ep, 0); + poperror(); + } + putep(ep); + } + else{ + ep = getep(qid2epidx(q)); + if(ep == nil) + error(Eio); + if(waserror()){ + putep(ep); + nexterror(); + } + if(c->aux != nil){ + /* After a new endpoint request we read + * the new endpoint name back. + */ + strecpy(s, se, c->aux); + free(c->aux); + c->aux = nil; + }else + seprintep(s, se, ep, 0); + poperror(); + putep(ep); + } + n = readstr(offset, a, n, us); + poperror(); + free(us); + return n; +} + +/* + * Fake root hub emulation. + */ +static long +rhubread(Ep *ep, void *a, long n) +{ + char *b; + + if(ep->dev->isroot == 0 || ep->nb != 0 || n < 2) + return -1; + if(ep->rhrepl < 0) + return -1; + + b = a; + memset(b, 0, n); + PUT2(b, ep->rhrepl); + ep->rhrepl = -1; + return n; +} + +static long +rhubwrite(Ep *ep, void *a, long n) +{ + uchar *s; + int cmd; + int feature; + int port; + Hci *hp; + + if(ep->dev == nil || ep->dev->isroot == 0 || ep->nb != 0) + return -1; + if(n != Rsetuplen) + error("root hub is a toy hub"); + ep->rhrepl = -1; + s = a; + if(s[Rtype] != (Rh2d|Rclass|Rother) && s[Rtype] != (Rd2h|Rclass|Rother)) + error("root hub is a toy hub"); + hp = ep->hp; + cmd = s[Rreq]; + feature = GET2(s+Rvalue); + port = GET2(s+Rindex); + if(port < 1 || port > hp->nports) + error("bad hub port number"); + switch(feature){ + case Rportenable: + ep->rhrepl = hp->portenable(hp, port, cmd == Rsetfeature); + break; + case Rportreset: + ep->rhrepl = hp->portreset(hp, port, cmd == Rsetfeature); + break; + case Rgetstatus: + ep->rhrepl = hp->portstatus(hp, port); + break; + default: + ep->rhrepl = 0; + } + return n; +} + +static long +usbread(Chan *c, void *a, long n, vlong offset) +{ + int q; + Ep *ep; + int nr; + + q = QID(c->qid); + + if(c->qid.type == QTDIR) + return devdirread(c, a, n, nil, 0, usbgen); + + if(q == Qctl || isqtype(q, Qepctl)) + return ctlread(c, a, n, offset); + + ep = getep(qid2epidx(q)); + if(ep == nil) + error(Eio); + if(waserror()){ + putep(ep); + nexterror(); + } + if(ep->dev->state == Ddetach) + error(Edetach); + if(ep->mode == OWRITE || ep->inuse == 0) + error(Ebadusefd); + switch(ep->ttype){ + case Tnone: + error("endpoint not configured"); + case Tctl: + nr = rhubread(ep, a, n); + if(nr >= 0){ + n = nr; + break; + } + /* else fall */ + default: + ddeprint("\nusbread q %#x fid %d cnt %ld off %lld\n",q,c->fid,n,offset); + n = ep->hp->epread(ep, a, n); + break; + } + poperror(); + putep(ep); + return n; +} + +static long +pow2(int n) +{ + return 1 << n; +} + +static void +setmaxpkt(Ep *ep, char* s) +{ + long spp; /* samples per packet */ + + if(ep->dev->speed == Highspeed) + spp = (ep->hz * ep->pollival * ep->ntds + 7999) / 8000; + else + spp = (ep->hz * ep->pollival + 999) / 1000; + ep->maxpkt = spp * ep->samplesz; + deprint("usb: %s: setmaxpkt: hz %ld poll %ld" + " ntds %d %s speed -> spp %ld maxpkt %ld\n", s, + ep->hz, ep->pollival, ep->ntds, spname[ep->dev->speed], + spp, ep->maxpkt); + if(ep->maxpkt > 1024){ + print("usb: %s: maxpkt %ld > 1024. truncating\n", s, ep->maxpkt); + ep->maxpkt = 1024; + } +} + +/* + * Many endpoint ctls. simply update the portable representation + * of the endpoint. The actual controller driver will look + * at them to setup the endpoints as dictated. + */ +static long +epctl(Ep *ep, Chan *c, void *a, long n) +{ + int i, l, mode, nb, tt; + char *b, *s; + Cmdbuf *cb; + Cmdtab *ct; + Ep *nep; + Udev *d; + static char *Info = "info "; + + d = ep->dev; + + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + ct = lookupcmd(cb, epctls, nelem(epctls)); + if(ct == nil) + error(Ebadctl); + i = ct->index; + if(i == CMnew || i == CMspeed || i == CMhub || i == CMpreset) + if(ep != ep->ep0) + error("allowed only on a setup endpoint"); + if(i != CMclrhalt && i != CMdetach && i != CMdebugep && i != CMname) + if(ep != ep->ep0 && ep->inuse != 0) + error("must configure before using"); + switch(i){ + case CMnew: + deprint("usb epctl %s\n", cb->f[0]); + nb = strtol(cb->f[1], nil, 0); + if(nb < 0 || nb >= Ndeveps) + error("bad endpoint number"); + tt = name2ttype(cb->f[2]); + if(tt == Tnone) + error("unknown endpoint type"); + mode = name2mode(cb->f[3]); + if(mode < 0) + error("unknown i/o mode"); + newdevep(ep, nb, tt, mode); + break; + case CMnewdev: + deprint("usb epctl %s\n", cb->f[0]); + if(ep != ep->ep0 || d->ishub == 0) + error("not a hub setup endpoint"); + l = name2speed(cb->f[1]); + if(l == Nospeed) + error("speed must be full|low|high"); + nep = newdev(ep->hp, 0, 0); + nep->dev->speed = l; + if(nep->dev->speed != Lowspeed) + nep->maxpkt = 64; /* assume full speed */ + nep->dev->hub = d->nb; + nep->dev->port = atoi(cb->f[2]); + /* next read request will read + * the name for the new endpoint + */ + l = sizeof(up->genbuf); + snprint(up->genbuf, l, "ep%d.%d", nep->dev->nb, nep->nb); + kstrdup(&c->aux, up->genbuf); + break; + case CMhub: + deprint("usb epctl %s\n", cb->f[0]); + d->ishub = 1; + break; + case CMspeed: + l = name2speed(cb->f[1]); + deprint("usb epctl %s %d\n", cb->f[0], l); + if(l == Nospeed) + error("speed must be full|low|high"); + qlock(ep->ep0); + d->speed = l; + qunlock(ep->ep0); + break; + case CMmaxpkt: + l = strtoul(cb->f[1], nil, 0); + deprint("usb epctl %s %d\n", cb->f[0], l); + if(l < 1 || l > 1024) + error("maxpkt not in [1:1024]"); + qlock(ep); + ep->maxpkt = l; + qunlock(ep); + break; + case CMntds: + l = strtoul(cb->f[1], nil, 0); + deprint("usb epctl %s %d\n", cb->f[0], l); + if(l < 1 || l > 3) + error("ntds not in [1:3]"); + qlock(ep); + ep->ntds = l; + qunlock(ep); + break; + case CMpollival: + if(ep->ttype != Tintr && ep->ttype != Tiso) + error("not an intr or iso endpoint"); + l = strtoul(cb->f[1], nil, 0); + deprint("usb epctl %s %d\n", cb->f[0], l); + if(ep->ttype == Tiso || + (ep->ttype == Tintr && ep->dev->speed == Highspeed)){ + if(l < 1 || l > 16) + error("pollival power not in [1:16]"); + l = pow2(l-1); + }else + if(l < 1 || l > 255) + error("pollival not in [1:255]"); + qlock(ep); + ep->pollival = l; + if(ep->ttype == Tiso) + setmaxpkt(ep, "pollival"); + qunlock(ep); + break; + case CMsamplesz: + if(ep->ttype != Tiso) + error("not an iso endpoint"); + l = strtoul(cb->f[1], nil, 0); + deprint("usb epctl %s %d\n", cb->f[0], l); + if(l <= 0 || l > 8) + error("samplesz not in [1:8]"); + qlock(ep); + ep->samplesz = l; + setmaxpkt(ep, "samplesz"); + qunlock(ep); + break; + case CMhz: + if(ep->ttype != Tiso) + error("not an iso endpoint"); + l = strtoul(cb->f[1], nil, 0); + deprint("usb epctl %s %d\n", cb->f[0], l); + if(l <= 0 || l > 100000) + error("hz not in [1:100000]"); + qlock(ep); + ep->hz = l; + setmaxpkt(ep, "hz"); + qunlock(ep); + break; + case CMclrhalt: + qlock(ep); + deprint("usb epctl %s\n", cb->f[0]); + ep->clrhalt = 1; + qunlock(ep); + break; + case CMinfo: + deprint("usb epctl %s\n", cb->f[0]); + l = strlen(Info); + s = a; + if(n < l+2 || strncmp(Info, s, l) != 0) + error(Ebadctl); + if(n > 1024) + n = 1024; + b = smalloc(n); + memmove(b, s+l, n-l); + b[n-l] = 0; + if(b[n-l-1] == '\n') + b[n-l-1] = 0; + qlock(ep); + free(ep->info); + ep->info = b; + qunlock(ep); + break; + case CMaddress: + deprint("usb epctl %s\n", cb->f[0]); + ep->dev->state = Denabled; + break; + case CMdetach: + if(ep->dev->isroot != 0) + error("can't detach a root hub"); + deprint("usb epctl %s ep%d.%d\n", + cb->f[0], ep->dev->nb, ep->nb); + ep->dev->state = Ddetach; + /* Release file system ref. for its endpoints */ + for(i = 0; i < nelem(ep->dev->eps); i++) + putep(ep->dev->eps[i]); + break; + case CMdebugep: + if(strcmp(cb->f[1], "on") == 0) + ep->debug = 1; + else if(strcmp(cb->f[1], "off") == 0) + ep->debug = 0; + else + ep->debug = strtoul(cb->f[1], nil, 0); + print("usb: ep%d.%d debug %d\n", + ep->dev->nb, ep->nb, ep->debug); + break; + case CMname: + deprint("usb epctl %s %s\n", cb->f[0], cb->f[1]); + validname(cb->f[1], 0); + kstrdup(&ep->name, cb->f[1]); + break; + case CMtmout: + deprint("usb epctl %s\n", cb->f[0]); + if(ep->ttype == Tiso || ep->ttype == Tctl) + error("ctl ignored for this endpoint type"); + ep->tmout = strtoul(cb->f[1], nil, 0); + if(ep->tmout != 0 && ep->tmout < Xfertmout) + ep->tmout = Xfertmout; + break; + case CMpreset: + deprint("usb epctl %s\n", cb->f[0]); + if(ep->ttype != Tctl) + error("not a control endpoint"); + if(ep->dev->state != Denabled) + error("forbidden on devices not enabled"); + ep->dev->state = Dreset; + break; + default: + panic("usb: unknown epctl %d", ct->index); + } + free(cb); + poperror(); + return n; +} + +static long +usbctl(void *a, long n) +{ + Cmdtab *ct; + Cmdbuf *cb; + Ep *ep; + int i; + + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + ct = lookupcmd(cb, usbctls, nelem(usbctls)); + dprint("usb ctl %s\n", cb->f[0]); + switch(ct->index){ + case CMdebug: + if(strcmp(cb->f[1], "on") == 0) + debug = 1; + else if(strcmp(cb->f[1], "off") == 0) + debug = 0; + else + debug = strtol(cb->f[1], nil, 0); + print("usb: debug %d\n", debug); + for(i = 0; i < epmax; i++) + if((ep = getep(i)) != nil){ + ep->hp->debug(ep->hp, debug); + putep(ep); + } + break; + case CMdump: + dumpeps(); + break; + } + free(cb); + poperror(); + return n; +} + +static long +ctlwrite(Chan *c, void *a, long n) +{ + int q; + Ep *ep; + + q = QID(c->qid); + if(q == Qctl) + return usbctl(a, n); + + ep = getep(qid2epidx(q)); + if(ep == nil) + error(Eio); + if(waserror()){ + putep(ep); + nexterror(); + } + if(ep->dev->state == Ddetach) + error(Edetach); + if(isqtype(q, Qepctl) && c->aux != nil){ + /* Be sure we don't keep a cloned ep name */ + free(c->aux); + c->aux = nil; + error("read, not write, expected"); + } + n = epctl(ep, c, a, n); + putep(ep); + poperror(); + return n; +} + +static long +usbwrite(Chan *c, void *a, long n, vlong off) +{ + int nr, q; + Ep *ep; + + if(c->qid.type == QTDIR) + error(Eisdir); + + q = QID(c->qid); + + if(q == Qctl || isqtype(q, Qepctl)) + return ctlwrite(c, a, n); + + ep = getep(qid2epidx(q)); + if(ep == nil) + error(Eio); + if(waserror()){ + putep(ep); + nexterror(); + } + if(ep->dev->state == Ddetach) + error(Edetach); + if(ep->mode == OREAD || ep->inuse == 0) + error(Ebadusefd); + + switch(ep->ttype){ + case Tnone: + error("endpoint not configured"); + case Tctl: + nr = rhubwrite(ep, a, n); + if(nr >= 0){ + n = nr; + break; + } + /* else fall */ + default: + ddeprint("\nusbwrite q %#x fid %d cnt %ld off %lld\n",q, c->fid, n, off); + ep->hp->epwrite(ep, a, n); + } + putep(ep); + poperror(); + return n; +} + +void +usbshutdown(void) +{ + Hci *hp; + int i; + + for(i = 0; i < Nhcis; i++){ + hp = hcis[i]; + if(hp == nil) + continue; + if(hp->shutdown == nil) + print("#u: no shutdown function for %s\n", hp->type); + else + hp->shutdown(hp); + } +} + +Dev usbdevtab = { + L'u', + "usb", + + usbreset, + usbinit, + usbshutdown, + usbattach, + usbwalk, + usbstat, + usbopen, + devcreate, + usbclose, + usbread, + devbread, + usbwrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru /sys/src/9k/port/devwd.c /sys/src/9k/port/devwd.c --- /sys/src/9k/port/devwd.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/devwd.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,250 @@ +/* + * watchdog framework + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" + +enum { + Qdir, + Qwdctl, +}; + +/* + * these are exposed so that delay() and the like can disable the watchdog + * before busy looping for a long time. + */ +Watchdog*watchdog; +int watchdogon; + +static Watchdog *wd; +static int wdautopet; +static int wdclock0called; +static Ref refs; +static Dirtab wddir[] = { + ".", { Qdir, 0, QTDIR }, 0, 0555, + "wdctl", { Qwdctl, 0 }, 0, 0664, +}; + + +void +addwatchdog(Watchdog *wdog) +{ + if(wd){ + print("addwatchdog: watchdog already installed\n"); + return; + } + wd = watchdog = wdog; + if(wd) + wd->disable(); +} + +static int +wdallowed(void) +{ + return getconf("*nowatchdog") == nil; +} + +static void +wdshutdown(void) +{ + if (wd) { + wd->disable(); + watchdogon = 0; + } +} + +/* called from clock interrupt, so restart needs ilock internally */ +static void +wdpet(void) +{ + /* watchdog could be paused; if so, don't restart */ + if (wdautopet && watchdogon) + wd->restart(); +} + +/* + * reassure the watchdog from the clock interrupt + * until the user takes control of it. + */ +static void +wdautostart(void) +{ + if (wdautopet || !wd || !wdallowed()) + return; + if (waserror()) { + print("watchdog: automatic enable failed\n"); + return; + } + wd->enable(); + poperror(); + + wdautopet = watchdogon = 1; + if (!wdclock0called) { + addclock0link(wdpet, 200); + wdclock0called = 1; + } +} + +/* + * disable strokes from the clock interrupt. + * have to disable the watchdog to mark it `not in use'. + */ +static void +wdautostop(void) +{ + if (!wdautopet) + return; + wdautopet = 0; + wdshutdown(); +} + +/* + * user processes exist and up is non-nil when the + * device init routines are called. + */ +static void +wdinit(void) +{ + wdautostart(); +} + +static Chan* +wdattach(char *spec) +{ + return devattach('w', spec); +} + +static Walkqid* +wdwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, wddir, nelem(wddir), devgen); +} + +static int +wdstat(Chan *c, uchar *dp, int n) +{ + return devstat(c, dp, n, wddir, nelem(wddir), devgen); +} + +static Chan* +wdopen(Chan* c, int omode) +{ + wdautostop(); + c = devopen(c, omode, wddir, nelem(wddir), devgen); + if (c->qid.path == Qwdctl) + incref(&refs); + return c; +} + +static void +wdclose(Chan *c) +{ + if(c->qid.path == Qwdctl && c->flag&COPEN && decref(&refs) <= 0) + wdshutdown(); +} + +static long +wdread(Chan* c, void* a, long n, vlong off) +{ + ulong offset = off; + char *p; + + switch((ulong)c->qid.path){ + case Qdir: + return devdirread(c, a, n, wddir, nelem(wddir), devgen); + + case Qwdctl: + if(wd == nil || wd->stat == nil) + return 0; + + p = malloc(READSTR); + if(p == nil) + error(Enomem); + if(waserror()){ + free(p); + nexterror(); + } + + wd->stat(p, p + READSTR); + n = readstr(offset, a, n, p); + free(p); + poperror(); + return n; + + default: + error(Egreg); + break; + } + return 0; +} + +static long +wdwrite(Chan* c, void* a, long n, vlong off) +{ + ulong offset = off; + char *p; + + switch((ulong)c->qid.path){ + case Qdir: + error(Eperm); + + case Qwdctl: + if(wd == nil) + return n; + + if(offset || n >= READSTR) + error(Ebadarg); + + if((p = strchr(a, '\n')) != nil) + *p = 0; + + if(strncmp(a, "enable", n) == 0) { + if (waserror()) { + print("watchdog: enable failed\n"); + nexterror(); + } + wd->enable(); + poperror(); + watchdogon = 1; + } else if(strncmp(a, "disable", n) == 0) + wdshutdown(); + else if(strncmp(a, "restart", n) == 0) + wd->restart(); + else + error(Ebadarg); + return n; + + default: + error(Egreg); + break; + } + + return 0; +} + +Dev wddevtab = { + 'w', + "watchdog", + + devreset, + wdinit, + wdshutdown, + wdattach, + wdwalk, + wdstat, + wdopen, + devcreate, + wdclose, + wdread, + devbread, + wdwrite, + devbwrite, + devremove, + devwstat, + devpower, +}; diff -Nru /sys/src/9k/port/dirdep /sys/src/9k/port/dirdep --- /sys/src/9k/port/dirdep Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/dirdep Wed Dec 9 00:00:00 2015 @@ -0,0 +1,34 @@ +#!/bin/rc + +awk -v 'confdir='`{basename -d $1} ' +BEGIN{ + collect = isdir = 0 +} + +/^[ \t]*$/{ + next +} +/^#/{ + next +} +collect && /^[^ \t]/{ + collect = isdir = 0 +} +collect && $0 ~ /[^ \t]+/{ + tab[$0]++; +} +$0 ~ /^[^ \t]/{ + if($1 ~ "dirs"){ + dirs = 1; + collect = 1; + } + next; +} + +END{ + for(i in tab) { + gsub(/[ \t]/, "", i) + mkfragdir = confdir "/" i + print "<|sed ''s!MKFRAGDIR!" mkfragdir "!'' " mkfragdir "/mkfrag" + } +}' $* diff -Nru /sys/src/9k/port/edf.c /sys/src/9k/port/edf.c --- /sys/src/9k/port/edf.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/edf.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,684 @@ +/* EDF scheduling */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/edf.h" +#include + +/* debugging */ +enum { + Dontprint = 1, +}; + +#define DPRINT if(Dontprint){}else print + +static long now; /* Low order 32 bits of time in µs */ +extern ulong delayedscheds; +extern Schedq runq[Nrq]; +extern int nrdy; +extern ulong runvec; + +/* Statistics stuff */ +ulong nilcount; +ulong scheds; +ulong edfnrun; +int misseddeadlines; + +/* Edfschedlock protects modification of admission params */ +int edfinited; +QLock edfschedlock; +static Lock thelock; + +enum{ + Dl, /* Invariant for schedulability test: Dl < Rl */ + Rl, +}; + +static char *testschedulability(Proc*); +static Proc *qschedulability; + +enum { + Onemicrosecond = 1, + Onemillisecond = 1000, + Onesecond = 1000000, + OneRound = Onemillisecond/2, +}; + +static int +timeconv(Fmt *f) +{ + char buf[128], *sign; + vlong t; + + buf[0] = 0; + switch(f->r) { + case 'U': + t = va_arg(f->args, uvlong); + break; + case 't': /* vlong in nanoseconds */ + t = va_arg(f->args, long); + break; + default: + return fmtstrcpy(f, "(timeconv)"); + } + if (t < 0) { + sign = "-"; + t = -t; + } + else + sign = ""; + if (t > Onesecond){ + t += OneRound; + sprint(buf, "%s%d.%.3ds", sign, (int)(t / Onesecond), + (int)(t % Onesecond)/Onemillisecond); + }else if (t > Onemillisecond) + sprint(buf, "%s%d.%.3dms", sign, (int)(t / Onemillisecond), + (int)(t % Onemillisecond)); + else + sprint(buf, "%s%dµs", sign, (int)t); + return fmtstrcpy(f, buf); +} + +long edfcycles; + +Edf* +edflock(Proc *p) +{ + Edf *e; + + if (p->edf == nil) + return nil; + ilock(&thelock); + if((e = p->edf) && (e->flags & Admitted)){ + locksetpc(&thelock, getcallerpc(&p)); +#ifdef EDFCYCLES + edfcycles -= lcycles(); +#endif + now = µs(); + return e; + } + iunlock(&thelock); + return nil; +} + +void +edfunlock(void) +{ + +#ifdef EDFCYCLES + edfcycles += lcycles(); +#endif + edfnrun++; + iunlock(&thelock); +} + +void +edfinit(Proc*p) +{ + if(!edfinited){ + fmtinstall('t', timeconv); + edfinited++; + } + now = µs(); + DPRINT("%lud edfinit %d[%s]\n", now, p->pid, statename[p->state]); + p->edf = malloc(sizeof(Edf)); + if(p->edf == nil) + error(Enomem); + return; +} + +static void +deadlineintr(Ureg*, Timer *t) +{ + /* Proc reached deadline */ + extern int panicking; + Proc *p; + void (*pt)(Proc*, int, vlong, vlong); + + if(panicking || active.exiting) + return; + + p = t->ta; + now = µs(); + DPRINT("%lud deadlineintr %d[%s]\n", now, p->pid, statename[p->state]); + /* If we're interrupting something other than the proc pointed to by t->a, + * we've already achieved recheduling, so we need not do anything + * Otherwise, we must cause a reschedule, but if we call sched() + * here directly, the timer interrupt routine will not finish its business + * Instead, we cause the resched to happen when the interrupted proc + * returns to user space + */ + if(p == up){ + if(up->trace && (pt = proctrace)) + pt(up, SInts, 0, 0); + up->delaysched++; + delayedscheds++; + } +} + +static void +release(Proc *p) +{ + /* Called with edflock held */ + Edf *e; + void (*pt)(Proc*, int, vlong, vlong); + long n; + vlong nowns; + + e = p->edf; + e->flags &= ~Yield; + if(e->d - now < 0){ + e->periods++; + e->r = now; + if((e->flags & Sporadic) == 0){ + /* + * Non sporadic processes stay true to their period; + * calculate next release time. + * Second test limits duration of while loop. + */ + if((n = now - e->t) > 0){ + if(n < e->T) + e->t += e->T; + else + e->t = now + e->T - (n % e->T); + } + }else{ + /* Sporadic processes may not be released earlier than + * one period after this release + */ + e->t = e->r + e->T; + } + e->d = e->r + e->D; + e->S = e->C; + DPRINT("%lud release %d[%s], r=%lud, d=%lud, t=%lud, S=%lud\n", + now, p->pid, statename[p->state], e->r, e->d, e->t, e->S); + if(pt = proctrace){ + nowns = todget(nil); + pt(p, SRelease, nowns, 0); + pt(p, SDeadline, nowns + 1000LL*e->D, 0); + } + }else{ + DPRINT("%lud release %d[%s], too late t=%lud, called from %#p\n", + now, p->pid, statename[p->state], e->t, getcallerpc(&p)); + } +} + +static void +releaseintr(Ureg*, Timer *t) +{ + Proc *p; + extern int panicking; + Schedq *rq; + + if(panicking || active.exiting) + return; + + p = t->ta; + if((edflock(p)) == nil) + return; + DPRINT("%lud releaseintr %d[%s]\n", now, p->pid, statename[p->state]); + switch(p->state){ + default: + edfunlock(); + return; + case Ready: + /* remove proc from current runq */ + rq = &runq[p->priority]; + if(dequeueproc(rq, p) != p){ + DPRINT("releaseintr: can't find proc or lock race\n"); + release(p); /* It'll start best effort */ + edfunlock(); + return; + } + p->state = Waitrelease; + /* fall through */ + case Waitrelease: + release(p); + edfunlock(); + if(p->state == Wakeme){ + iprint("releaseintr: wakeme\n"); + } + ready(p); + if(up){ + up->delaysched++; + delayedscheds++; + } + return; + case Running: + release(p); + edfrun(p, 1); + break; + case Wakeme: + release(p); + edfunlock(); + if(p->trend) + wakeup(p->trend); + p->trend = nil; + if(up){ + up->delaysched++; + delayedscheds++; + } + return; + } + edfunlock(); +} + +void +edfrecord(Proc *p) +{ + long used; + Edf *e; + void (*pt)(Proc*, int, vlong, vlong); + + if((e = edflock(p)) == nil) + return; + used = now - e->s; + if(e->d - now <= 0) + e->edfused += used; + else + e->extraused += used; + if(e->S > 0){ + if(e->S <= used){ + if(pt = proctrace) + pt(p, SSlice, 0, 0); + DPRINT("%lud edfrecord slice used up\n", now); + e->d = now; + e->S = 0; + }else + e->S -= used; + } + e->s = now; + edfunlock(); +} + +void +edfrun(Proc *p, int edfpri) +{ + Edf *e; + void (*pt)(Proc*, int, vlong, vlong); + long tns; + + e = p->edf; + /* Called with edflock held */ + if(edfpri){ + tns = e->d - now; + if(tns <= 0 || e->S == 0){ + /* Deadline reached or resources exhausted, + * deschedule forthwith + */ + p->delaysched++; + delayedscheds++; + e->s = now; + return; + } + if(e->S < tns) + tns = e->S; + if(tns < 20) + tns = 20; + e->tns = 1000LL * tns; /* µs to ns */ + if(e->tt == nil || e->tf != deadlineintr){ + DPRINT("%lud edfrun, deadline=%lud\n", now, tns); + }else{ + DPRINT("v"); + } + if(p->trace && (pt = proctrace)) + pt(p, SInte, todget(nil) + e->tns, 0); + e->tmode = Trelative; + e->tf = deadlineintr; + e->ta = p; + timeradd(e); + }else{ + DPRINT("<"); + } + e->s = now; +} + +char * +edfadmit(Proc *p) +{ + char *err; + Edf *e; + int i; + Proc *r; + void (*pt)(Proc*, int, vlong, vlong); + long tns; + + e = p->edf; + if (e->flags & Admitted) + return "task state"; /* should never happen */ + + /* simple sanity checks */ + if (e->T == 0) + return "T not set"; + if (e->C == 0) + return "C not set"; + if (e->D > e->T) + return "D > T"; + if (e->D == 0) /* if D is not set, set it to T */ + e->D = e->T; + if (e->C > e->D) + return "C > D"; + + qlock(&edfschedlock); + if (err = testschedulability(p)){ + qunlock(&edfschedlock); + return err; + } + e->flags |= Admitted; + + edflock(p); + + if(p->trace && (pt = proctrace)) + pt(p, SAdmit, 0, 0); + + /* Look for another proc with the same period to synchronize to */ + for(i=0; (r = psincref(i)) != nil; i++) { + if(r->state == Dead || r == p){ + psdecref(r); + continue; + } + if (r->edf == nil || (r->edf->flags & Admitted) == 0){ + psdecref(r); + continue; + } + if (r->edf->T == e->T) + break; + } + if (r == nil){ + /* Can't synchronize to another proc, release now */ + e->t = now; + e->d = 0; + release(p); + if (p == up){ + DPRINT("%lud edfadmit self %d[%s], release now: r=%lud d=%lud t=%lud\n", + now, p->pid, statename[p->state], e->r, e->d, e->t); + /* We're already running */ + edfrun(p, 1); + }else{ + /* We're releasing another proc */ + DPRINT("%lud edfadmit other %d[%s], release now: r=%lud d=%lud t=%lud\n", + now, p->pid, statename[p->state], e->r, e->d, e->t); + p->ta = p; + edfunlock(); + qunlock(&edfschedlock); + releaseintr(nil, p); + return nil; + } + }else{ + /* Release in synch to something else */ + e->t = r->edf->t; + psdecref(r); + if (p == up){ + DPRINT("%lud edfadmit self %d[%s], release at %lud\n", + now, p->pid, statename[p->state], e->t); + }else{ + DPRINT("%lud edfadmit other %d[%s], release at %lud\n", + now, p->pid, statename[p->state], e->t); + if(e->tt == nil){ + e->tf = releaseintr; + e->ta = p; + tns = e->t - now; + if(tns < 20) + tns = 20; + e->tns = 1000LL * tns; + e->tmode = Trelative; + timeradd(e); + } + } + } + edfunlock(); + qunlock(&edfschedlock); + return nil; +} + +void +edfstop(Proc *p) +{ + Edf *e; + void (*pt)(Proc*, int, vlong, vlong); + + if(e = edflock(p)){ + DPRINT("%lud edfstop %d[%s]\n", now, p->pid, statename[p->state]); + if(p->trace && (pt = proctrace)) + pt(p, SExpel, 0, 0); + e->flags &= ~Admitted; + if(e->tt) + timerdel(e); + edfunlock(); + } +} + +static int +yfn(void *) +{ + now = µs(); + return up->trend == nil || now - up->edf->r >= 0; +} + +void +edfyield(void) +{ + /* sleep until next release */ + Edf *e; + void (*pt)(Proc*, int, vlong, vlong); + long n; + + if((e = edflock(up)) == nil) + return; + if(up->trace && (pt = proctrace)) + pt(up, SYield, 0, 0); + if((n = now - e->t) > 0){ + if(n < e->T) + e->t += e->T; + else + e->t = now + e->T - (n % e->T); + } + e->r = e->t; + e->flags |= Yield; + e->d = now; + if (up->tt == nil){ + n = e->t - now; + if(n < 20) + n = 20; + up->tns = 1000LL * n; + up->tf = releaseintr; + up->tmode = Trelative; + up->ta = up; + up->trend = &up->sleep; + timeradd(up); + }else if(up->tf != releaseintr) + print("edfyield: surprise! %#p\n", up->tf); + edfunlock(); + sleep(&up->sleep, yfn, nil); +} + +int +edfready(Proc *p) +{ + Edf *e; + Schedq *rq; + Proc *l, *pp; + void (*pt)(Proc*, int, vlong, vlong); + long n; + + if((e = edflock(p)) == nil) + return 0; + + if(p->state == Wakeme && p->r){ + iprint("edfready: wakeme\n"); + } + if(e->d - now <= 0){ + /* past deadline, arrange for next release */ + if((e->flags & Sporadic) == 0){ + /* + * Non sporadic processes stay true to their period; + * calculate next release time. + */ + if((n = now - e->t) > 0){ + if(n < e->T) + e->t += e->T; + else + e->t = now + e->T - (n % e->T); + } + } + if(now - e->t < 0){ + /* Next release is in the future, schedule it */ + if(e->tt == nil || e->tf != releaseintr){ + n = e->t - now; + if(n < 20) + n = 20; + e->tns = 1000LL * n; + e->tmode = Trelative; + e->tf = releaseintr; + e->ta = p; + timeradd(e); + DPRINT("%lud edfready %d[%s], release=%lud\n", + now, p->pid, statename[p->state], e->t); + } + if(p->state == Running && (e->flags & (Yield|Yieldonblock)) == 0 && (e->flags & Extratime)){ + /* If we were running, we've overrun our CPU allocation + * or missed the deadline, continue running best-effort at low priority + * Otherwise we were blocked. If we don't yield on block, we continue + * best effort + */ + DPRINT(">"); + p->basepri = PriExtra; + p->fixedpri = 1; + edfunlock(); + return 0; /* Stick on runq[PriExtra] */ + } + DPRINT("%lud edfready %d[%s] wait release at %lud\n", + now, p->pid, statename[p->state], e->t); + p->state = Waitrelease; + edfunlock(); + return 1; /* Make runnable later */ + } + DPRINT("%lud edfready %d %s release now\n", now, p->pid, statename[p->state]); + /* release now */ + release(p); + } + edfunlock(); + DPRINT("^"); + rq = &runq[PriEdf]; + /* insert in queue in earliest deadline order */ + lock(runq); + l = nil; + for(pp = rq->head; pp; pp = pp->rnext){ + if(pp->edf->d > e->d) + break; + l = pp; + } + p->rnext = pp; + if (l == nil) + rq->head = p; + else + l->rnext = p; + if(pp == nil) + rq->tail = p; + rq->n++; + nrdy++; + runvec |= 1 << PriEdf; + p->priority = PriEdf; + p->readytime = m->ticks; + p->state = Ready; + unlock(runq); + if(p->trace && (pt = proctrace)) + pt(p, SReady, 0, 0); + return 1; +} + + +static void +testenq(Proc *p) +{ + Proc *xp, **xpp; + Edf *e; + + e = p->edf; + e->testnext = nil; + if (qschedulability == nil) { + qschedulability = p; + return; + } + SET(xp); + for (xpp = &qschedulability; *xpp; xpp = &xp->edf->testnext) { + xp = *xpp; + if (e->testtime - xp->edf->testtime < 0 + || (e->testtime == xp->edf->testtime && e->testtype < xp->edf->testtype)){ + e->testnext = xp; + *xpp = p; + return; + } + } + assert(xp->edf->testnext == nil); + xp->edf->testnext = p; +} + +static char * +testschedulability(Proc *theproc) +{ + Proc *p; + long H, G, Cb, ticks; + int steps, i; + + /* initialize */ + DPRINT("schedulability test %d\n", theproc->pid); + qschedulability = nil; + for(i=0; (p = psincref(i)) != nil; i++) { + if(p->state == Dead){ + psdecref(p); + continue; + } + if ((p->edf == nil || (p->edf->flags & Admitted) == 0) && p != theproc){ + psdecref(p); + continue; + } + p->edf->testtype = Rl; + p->edf->testtime = 0; + DPRINT("\tInit: edfenqueue %d\n", p->pid); + testenq(p); + psdecref(p); + } + H=0; + G=0; + for(steps = 0; steps < Maxsteps; steps++){ + p = qschedulability; + qschedulability = p->edf->testnext; + ticks = p->edf->testtime; + switch (p->edf->testtype){ + case Dl: + H += p->edf->C; + Cb = 0; + DPRINT("\tStep %3d, Ticks %lud, pid %d, deadline, H += %lud → %lud, Cb = %lud\n", + steps, ticks, p->pid, p->edf->C, H, Cb); + if (H+Cb>ticks){ + DPRINT("not schedulable\n"); + return "not schedulable"; + } + p->edf->testtime += p->edf->T - p->edf->D; + p->edf->testtype = Rl; + testenq(p); + break; + case Rl: + DPRINT("\tStep %3d, Ticks %lud, pid %d, release, G %lud, C%lud\n", + steps, ticks, p->pid, p->edf->C, G); + if(ticks && G <= ticks){ + DPRINT("schedulable\n"); + return nil; + } + G += p->edf->C; + p->edf->testtime += p->edf->D; + p->edf->testtype = Dl; + testenq(p); + break; + default: + assert(0); + } + } + DPRINT("probably not schedulable\n"); + return "probably not schedulable"; +} diff -Nru /sys/src/9k/port/edf.h /sys/src/9k/port/edf.h --- /sys/src/9k/port/edf.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/edf.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,54 @@ +enum { + Maxsteps = 200 * 100 * 2, /* 100 periods of 200 procs */ + + /* Edf.flags field */ + Admitted = 0x01, + Sporadic = 0x02, + Yieldonblock = 0x04, + Sendnotes = 0x08, + Deadline = 0x10, + Yield = 0x20, + Extratime = 0x40, + + Infinity = ~0ULL, +}; + +typedef struct Edf Edf; + +struct Edf { + /* All times in µs */ + /* time intervals */ + long D; /* Deadline */ + long Delta; /* Inherited deadline */ + long T; /* period */ + long C; /* Cost */ + long S; /* Slice: time remaining in this period */ + /* times (only low-order bits of absolute time) */ + long r; /* (this) release time */ + long d; /* (this) deadline */ + long t; /* Start of next period, t += T at release */ + long s; /* Time at which this proc was last scheduled */ + /* for schedulability testing */ + long testDelta; + int testtype; /* Release or Deadline */ + long testtime; + Proc *testnext; + /* other */ + ushort flags; + Timer; + /* Stats */ + long edfused; + long extraused; + long aged; + ulong periods; + ulong missed; +}; + +extern Lock edftestlock; /* for atomic admitting/expelling */ + +#pragma varargck type "t" long +#pragma varargck type "U" uvlong + +/* Interface: */ +Edf* edflock(Proc*); +void edfunlock(void); diff -Nru /sys/src/9k/port/error.h /sys/src/9k/port/error.h --- /sys/src/9k/port/error.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/error.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,49 @@ +extern char Enoerror[]; /* no error */ +extern char Emount[]; /* inconsistent mount */ +extern char Eunmount[]; /* not mounted */ +extern char Eismtpt[]; /* is a mount point */ +extern char Eunion[]; /* not in union */ +extern char Emountrpc[]; /* mount rpc error */ +extern char Eshutdown[]; /* device shut down */ +extern char Enocreate[]; /* mounted directory forbids creation */ +extern char Enonexist[]; /* file does not exist */ +extern char Eexist[]; /* file already exists */ +extern char Ebadsharp[]; /* unknown device in # filename */ +extern char Enotdir[]; /* not a directory */ +extern char Eisdir[]; /* file is a directory */ +extern char Ebadchar[]; /* bad character in file name */ +extern char Efilename[]; /* file name syntax */ +extern char Eperm[]; /* permission denied */ +extern char Ebadusefd[]; /* inappropriate use of fd */ +extern char Ebadarg[]; /* bad arg in system call */ +extern char Einuse[]; /* device or object already in use */ +extern char Eio[]; /* i/o error */ +extern char Etoobig[]; /* read or write too large */ +extern char Etoosmall[]; /* read or write too small */ +extern char Enoport[]; /* network port not available */ +extern char Ehungup[]; /* i/o on hungup channel */ +extern char Ebadctl[]; /* bad process or channel control request */ +extern char Enodev[]; /* no free devices */ +extern char Eprocdied[]; /* process exited */ +extern char Enochild[]; /* no living children */ +extern char Eioload[]; /* i/o error in demand load */ +extern char Enovmem[]; /* virtual memory allocation failed */ +extern char Ebadfd[]; /* fd out of range or not open */ +extern char Enofd[]; /* no free file descriptors */ +extern char Eisstream[]; /* seek on a stream */ +extern char Ebadexec[]; /* exec header invalid */ +extern char Etimedout[]; /* connection timed out */ +extern char Econrefused[]; /* connection refused */ +extern char Econinuse[]; /* connection in use */ +extern char Eintr[]; /* interrupted */ +extern char Enomem[]; /* kernel allocate failed */ +extern char Esoverlap[]; /* segments overlap */ +extern char Eshort[]; /* i/o count too small */ +extern char Egreg[]; /* ken has left the building */ +extern char Ebadspec[]; /* bad attach specifier */ +extern char Enoreg[]; /* process has no saved registers */ +extern char Enoattach[]; /* mount/attach disallowed */ +extern char Eshortstat[]; /* stat buffer too small */ +extern char Ebadstat[]; /* malformed stat buffer */ +extern char Ecmdargs[]; /* wrong #args in control message */ +extern char Ebadip[]; /* bad ip address syntax */ diff -Nru /sys/src/9k/port/ethermii.c /sys/src/9k/port/ethermii.c --- /sys/src/9k/port/ethermii.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/ethermii.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,290 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "../port/netif.h" + +#include "ethermii.h" + +static int +miiprobe(Mii* mii, int mask) +{ + MiiPhy *miiphy; + int bit, oui, phyno, r, rmask; + + /* + * Probe through mii for PHYs in mask; + * return the mask of those found in the current probe. + * If the PHY has not already been probed, update + * the Mii information. + */ + rmask = 0; + for(phyno = 0; phyno < NMiiPhy; phyno++){ + bit = 1<mask & bit){ + rmask |= bit; + continue; + } + if(mii->rw(mii, 0, phyno, Bmsr, 0) == -1) + continue; + r = mii->rw(mii, 0, phyno, Phyidr1, 0)<<16; + r |= mii->rw(mii, 0, phyno, Phyidr2, 0); + oui = (r>>10) & 0xffff; + if(oui == 0xffff || oui == 0) + continue; + + if((miiphy = malloc(sizeof(MiiPhy))) == nil) + continue; + + miiphy->mii = mii; + miiphy->phyno = phyno; + miiphy->phyid = r; + miiphy->oui = oui; + + miiphy->anar = ~0; + miiphy->fc = ~0; + miiphy->mscr = ~0; + + mii->phy[phyno] = miiphy; + if(mii->curphy == nil) + mii->curphy = miiphy; + mii->mask |= bit; + mii->nphy++; + + rmask |= bit; + } + return rmask; +} + +int +miimir(Mii* mii, int r) +{ + if(mii == nil || mii->ctlr == nil || mii->curphy == nil) + return -1; + return mii->rw(mii, 0, mii->curphy->phyno, r, 0); +} + +int +miimiw(Mii* mii, int r, int data) +{ + if(mii == nil || mii->ctlr == nil || mii->curphy == nil) + return -1; + return mii->rw(mii, 1, mii->curphy->phyno, r, data); +} + +int +miireset(Mii* mii) +{ + int bmcr, timeo; + + if(mii == nil || mii->ctlr == nil || mii->curphy == nil) + return -1; + bmcr = mii->rw(mii, 0, mii->curphy->phyno, Bmcr, 0); + mii->rw(mii, 1, mii->curphy->phyno, Bmcr, BmcrR|bmcr); + for(timeo = 0; timeo < 1000; timeo++){ + bmcr = mii->rw(mii, 0, mii->curphy->phyno, Bmcr, 0); + if(!(bmcr & BmcrR)) + break; + microdelay(1); + } + if(bmcr & BmcrR) + return -1; + if(bmcr & BmcrI) + mii->rw(mii, 1, mii->curphy->phyno, Bmcr, bmcr & ~BmcrI); + return 0; +} + +int +miiane(Mii* mii, int a, int p, int e) +{ + int anar, bmsr, mscr, r, phyno; + + if(mii == nil || mii->ctlr == nil || mii->curphy == nil) + return -1; + phyno = mii->curphy->phyno; + + mii->rw(mii, 1, phyno, Bmsr, 0); + bmsr = mii->rw(mii, 0, phyno, Bmsr, 0); + if(!(bmsr & BmsrAna)) + return -1; + + if(a != ~0) + anar = (AnaTXFD|AnaTXHD|Ana10FD|Ana10HD) & a; + else if(mii->curphy->anar != ~0) + anar = mii->curphy->anar; + else{ + anar = mii->rw(mii, 0, phyno, Anar, 0); + anar &= ~(AnaAP|AnaP|AnaT4|AnaTXFD|AnaTXHD|Ana10FD|Ana10HD); + if(bmsr & Bmsr10THD) + anar |= Ana10HD; + if(bmsr & Bmsr10TFD) + anar |= Ana10FD; + if(bmsr & Bmsr100TXHD) + anar |= AnaTXHD; + if(bmsr & Bmsr100TXFD) + anar |= AnaTXFD; + } + mii->curphy->anar = anar; + + if(p != ~0) + anar |= (AnaAP|AnaP) & p; + else if(mii->curphy->fc != ~0) + anar |= mii->curphy->fc; + mii->curphy->fc = (AnaAP|AnaP) & anar; + + if(bmsr & BmsrEs){ + mscr = mii->rw(mii, 0, phyno, Mscr, 0); + mscr &= ~(Mscr1000TFD|Mscr1000THD); + if(e != ~0) + mscr |= (Mscr1000TFD|Mscr1000THD) & e; + else if(mii->curphy->mscr != ~0) + mscr = mii->curphy->mscr; + else{ + r = mii->rw(mii, 0, phyno, Esr, 0); + if(r & Esr1000THD) + mscr |= Mscr1000THD; + if(r & Esr1000TFD) + mscr |= Mscr1000TFD; + } + mii->curphy->mscr = mscr; + mii->rw(mii, 1, phyno, Mscr, mscr); + } + else + mii->curphy->mscr = 0; + mii->rw(mii, 1, phyno, Anar, anar); + + r = mii->rw(mii, 0, phyno, Bmcr, 0); + if(!(r & BmcrR)){ + r |= BmcrAne|BmcrRan; + mii->rw(mii, 1, phyno, Bmcr, r); + } + + return 0; +} + +int +miistatus(Mii* mii) +{ + MiiPhy *phy; + int anlpar, bmsr, p, r, phyno; + + if(mii == nil || mii->ctlr == nil || mii->curphy == nil) + return -1; + phy = mii->curphy; + phyno = phy->phyno; + + /* + * Check Auto-Negotiation is complete and link is up. + * (Read status twice as the Ls bit is sticky). + */ + bmsr = mii->rw(mii, 0, phyno, Bmsr, 0); + if(!(bmsr & (BmsrAnc|BmsrAna))) + return -1; + + bmsr = mii->rw(mii, 0, phyno, Bmsr, 0); + if(!(bmsr & BmsrLs)){ + phy->link = 0; + return -1; + } + + phy->speed = phy->fd = phy->rfc = phy->tfc = 0; + if(phy->mscr){ + r = mii->rw(mii, 0, phyno, Mssr, 0); + if((phy->mscr & Mscr1000TFD) && (r & Mssr1000TFD)){ + phy->speed = 1000; + phy->fd = 1; + } + else if((phy->mscr & Mscr1000THD) && (r & Mssr1000THD)) + phy->speed = 1000; + } + + anlpar = mii->rw(mii, 0, phyno, Anlpar, 0); + if(phy->speed == 0){ + r = phy->anar & anlpar; + if(r & AnaTXFD){ + phy->speed = 100; + phy->fd = 1; + } + else if(r & AnaTXHD) + phy->speed = 100; + else if(r & Ana10FD){ + phy->speed = 10; + phy->fd = 1; + } + else if(r & Ana10HD) + phy->speed = 10; + } + if(phy->speed == 0) + return -1; + + if(phy->fd){ + p = phy->fc; + r = anlpar & (AnaAP|AnaP); + if(p == AnaAP && r == (AnaAP|AnaP)) + phy->tfc = 1; + else if(p == (AnaAP|AnaP) && r == AnaAP) + phy->rfc = 1; + else if((p & AnaP) && (r & AnaP)) + phy->rfc = phy->tfc = 1; + } + + phy->link = 1; + + return 0; +} + +char* +miidumpphy(Mii* mii, char* p, char* e) +{ + int i, r; + + if(mii == nil || mii->curphy == nil) + return p; + + p = seprint(p, e, "phy: "); + for(i = 0; i < NMiiPhyr; i++){ + if(i && ((i & 0x07) == 0)) + p = seprint(p, e, "\n "); + r = mii->rw(mii, 0, mii->curphy->phyno, i, 0); + p = seprint(p, e, " %4.4ux", r); + } + p = seprint(p, e, "\n"); + + return p; +} + +void +miidetach(Mii* mii) +{ + int i; + + for(i = 0; i < NMiiPhy; i++){ + if(mii->phy[i] == nil) + continue; + free(mii); + mii->phy[i] = nil; + } + free(mii); +} + +Mii* +miiattach(void* ctlr, int mask, int (*rw)(Mii*, int, int, int, int)) +{ + Mii* mii; + + if((mii = malloc(sizeof(Mii))) == nil) + return nil; + mii->ctlr = ctlr; + mii->rw = rw; + + if(miiprobe(mii, mask) == 0){ + free(mii); + mii = nil; + } + + return mii; +} diff -Nru /sys/src/9k/port/ethermii.h /sys/src/9k/port/ethermii.h --- /sys/src/9k/port/ethermii.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/ethermii.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,120 @@ +typedef struct Mii Mii; +typedef struct MiiPhy MiiPhy; + +enum { /* registers */ + Bmcr = 0x00, /* Basic Mode Control */ + Bmsr = 0x01, /* Basic Mode Status */ + Phyidr1 = 0x02, /* PHY Identifier #1 */ + Phyidr2 = 0x03, /* PHY Identifier #2 */ + Anar = 0x04, /* Auto-Negotiation Advertisement */ + Anlpar = 0x05, /* AN Link Partner Ability */ + Aner = 0x06, /* AN Expansion */ + Annptr = 0x07, /* AN Next Page TX */ + Annprr = 0x08, /* AN Next Page RX */ + Mscr = 0x09, /* MASTER-SLAVE Control */ + Mssr = 0x0a, /* MASTER-SLAVE Status */ + Esr = 0x0f, /* Extended Status */ + + NMiiPhyr = 32, + NMiiPhy = 32, +}; + +enum { /* Bmcr */ + BmcrSs1 = 0x0040, /* Speed Select[1] */ + BmcrCte = 0x0080, /* Collision Test Enable */ + BmcrDm = 0x0100, /* Duplex Mode */ + BmcrRan = 0x0200, /* Restart Auto-Negotiation */ + BmcrI = 0x0400, /* Isolate */ + BmcrPd = 0x0800, /* Power Down */ + BmcrAne = 0x1000, /* Auto-Negotiation Enable */ + BmcrSs0 = 0x2000, /* Speed Select[0] */ + BmcrLe = 0x4000, /* Loopback Enable */ + BmcrR = 0x8000, /* Reset */ +}; + +enum { /* Bmsr */ + BmsrEc = 0x0001, /* Extended Capability */ + BmsrJd = 0x0002, /* Jabber Detect */ + BmsrLs = 0x0004, /* Link Status */ + BmsrAna = 0x0008, /* Auto-Negotiation Ability */ + BmsrRf = 0x0010, /* Remote Fault */ + BmsrAnc = 0x0020, /* Auto-Negotiation Complete */ + BmsrPs = 0x0040, /* Preamble Suppression Capable */ + BmsrEs = 0x0100, /* Extended Status */ + Bmsr100T2HD = 0x0200, /* 100BASE-T2 HD Capable */ + Bmsr100T2FD = 0x0400, /* 100BASE-T2 FD Capable */ + Bmsr10THD = 0x0800, /* 10BASE-T HD Capable */ + Bmsr10TFD = 0x1000, /* 10BASE-T FD Capable */ + Bmsr100TXHD = 0x2000, /* 100BASE-TX HD Capable */ + Bmsr100TXFD = 0x4000, /* 100BASE-TX FD Capable */ + Bmsr100T4 = 0x8000, /* 100BASE-T4 Capable */ +}; + +enum { /* Anar/Anlpar */ + Ana10G = 0x0001, + + Ana10HD = 0x0020, /* Advertise 10BASE-T */ + Ana10FD = 0x0040, /* Advertise 10BASE-T FD */ + AnaTXHD = 0x0080, /* Advertise 100BASE-TX */ + AnaTXFD = 0x0100, /* Advertise 100BASE-TX FD */ + AnaT4 = 0x0200, /* Advertise 100BASE-T4 */ + AnaP = 0x0400, /* Pause */ + AnaAP = 0x0800, /* Asymmetrical Pause */ + AnaRf = 0x2000, /* Remote Fault */ + AnaAck = 0x4000, /* Acknowledge */ + AnaNp = 0x8000, /* Next Page Indication */ +}; + +enum { /* Mscr */ + Mscr1000THD = 0x0100, /* Advertise 1000BASE-T HD */ + Mscr1000TFD = 0x0200, /* Advertise 1000BASE-T FD */ +}; + +enum { /* Mssr */ + Mssr1000THD = 0x0400, /* Link Partner 1000BASE-T HD able */ + Mssr1000TFD = 0x0800, /* Link Partner 1000BASE-T FD able */ +}; + +enum { /* Esr */ + Esr1000THD = 0x1000, /* 1000BASE-T HD Capable */ + Esr1000TFD = 0x2000, /* 1000BASE-T FD Capable */ + Esr1000XHD = 0x4000, /* 1000BASE-X HD Capable */ + Esr1000XFD = 0x8000, /* 1000BASE-X FD Capable */ +}; + +typedef struct Mii { + Lock; + int nphy; + int mask; + MiiPhy* phy[NMiiPhy]; + MiiPhy* curphy; + + void* ctlr; + int (*rw)(Mii*, int, int, int, int); +} Mii; + +typedef struct MiiPhy { + Mii* mii; + int phyno; + int phyid; + int oui; + + int anar; + int fc; + int mscr; + + int link; + int speed; + int fd; + int rfc; + int tfc; +}; + +extern int miiane(Mii*, int, int, int); +extern Mii* miiattach(void*, int, int (*)(Mii*, int, int, int, int)); +extern void miidetach(Mii* mii); +extern char* miidumpphy(Mii*, char*, char*); +extern int miimir(Mii*, int); +extern int miimiw(Mii*, int, int); +extern int miireset(Mii*); +extern int miistatus(Mii*); diff -Nru /sys/src/9k/port/fault.c /sys/src/9k/port/fault.c --- /sys/src/9k/port/fault.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/fault.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,288 @@ +#define _DBGC_ 'F' +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +int +fault(uintptr addr, int read) +{ + Segment *s; + char *sps; + + if(up == nil) + panic("fault: nil up"); + if(up->nlocks){ + panic("fault: %#p %s: %s: nlocks %d %#p\n", addr, up->text, up->user, up->nlocks, up->lastlock? lockgetpc(up->lastlock): 0); + //dumpstack(); + } + + m->pfault++; + spllo(); + + for(;;){ + s = seg(up, addr, rlock); /* leaves s->lk rlocked */ + if(s == nil) + return -1; + if(!read && (s->type&SG_RONLY)){ + runlock(&s->lk); + return -1; + } + + sps = up->psstate; + up->psstate = "Fault"; + if(fixfault(s, addr, read, 1) == 0){ /* runlocks s->lk */ + if(DBGFLG) + checkpages(); + up->psstate = sps; + return 0; + } + up->psstate = sps; + + if(up->procctl == Proc_exitbig) + pexit("out of memory", 1); + + /* + * See the comment in newpage that describes + * how to get here. + */ + } +} + +int +fixfault(Segment *s, uintptr addr, int read, int dommuput) +{ + int type; + Pte **p, *etp; + uintptr soff; + uintmem mmuphys; + Page **pg, *old, *new; + Page *(*fn)(Segment*, uintptr); + uintptr pgsize; + Pages *pages; + + pages = s->pages; /* TO DO: segwalk */ + pgsize = 1<lg2pgsize; + addr &= ~(pgsize-1); + soff = addr-s->base; + + p = &pages->map[soff/pages->ptemapmem]; + if(*p == nil) + *p = ptealloc(); + + etp = *p; + pg = &etp->pages[(soff&(pages->ptemapmem-1))>>pages->lg2pgsize]; + + if(pg < etp->first) + etp->first = pg; + if(pg > etp->last) + etp->last = pg; + + type = s->type&SG_TYPE; + if(*pg == nil){ + switch(type){ + case SG_BSS: /* Zero fill on demand */ + case SG_SHARED: + case SG_STACK: + new = newpage(1, s->pages->lg2pgsize, &s->lk); + if(new == nil) + return -1; + *pg = new; + break; + + case SG_TEXT: /* demand load */ + case SG_DATA: + runlock(&s->lk); + new = imagepage(s->image, s->isec, addr, soff); + rlock(&s->lk); + if(*pg == nil){ + *pg = new; + if(s->flushme) + mmucachectl(new, PG_TXTFLUSH); + }else + putpage(new); + break; + + case SG_PHYSICAL: + fn = s->pseg->pgalloc; + if(fn != nil) + *pg = (*fn)(s, addr); + else { + new = smalloc(sizeof(Page)); + new->pa = s->pseg->pa+(addr-s->base); + new->ref = 1; + new->lg2size = s->pseg->lg2pgsize; + if(new->lg2size == 0) + new->lg2size = PGSHFT; /* TO DO */ + *pg = new; + } + break; + default: + panic("fault on demand"); + break; + } + } + mmuphys = 0; + switch(type) { + default: + panic("fault"); + break; + + case SG_TEXT: + DBG("text pg %#p: %#p -> %#P %d\n", pg, addr, (*pg)->pa, (*pg)->ref); + mmuphys = PPN((*pg)->pa) | PTERONLY|PTEVALID; + break; + + case SG_BSS: + case SG_SHARED: + case SG_STACK: + case SG_DATA: /* copy on write */ + DBG("data pg %#p: %#p -> %#P %d\n", pg, addr, (*pg)->pa, (*pg)->ref); + /* + * It's only possible to copy on write if + * we're the only user of the segment. + */ + if(read && sys->copymode == 0 && s->ref == 1) { + mmuphys = PPN((*pg)->pa)|PTERONLY|PTEVALID; + break; + } + + old = *pg; + if(old->ref > 1){ + /* shared (including image pages): make private writable copy */ + new = newpage(0, s->pages->lg2pgsize, &s->lk); + if(new == nil) + return -1; + copypage(old, new); + *pg = new; + putpage(old); + DBG("data' pg %#p: %#p -> %#P %d\n", *pg, addr, old->pa, old->ref); + }else if(old->ref <= 0) + panic("fault: page %#p %#P ref %d <= 0", old, old->pa, old->ref); + mmuphys = PPN((*pg)->pa) | PTEWRITE | PTEVALID; + break; + + case SG_PHYSICAL: + mmuphys = PPN((*pg)->pa) | PTEVALID; + if((s->pseg->attr & SG_RONLY) == 0) + mmuphys |= PTEWRITE; + if((s->pseg->attr & SG_CACHED) == 0) + mmuphys |= PTEUNCACHED; + break; + } + runlock(&s->lk); + + if(dommuput) + mmuput(addr, mmuphys, *pg); + + return 0; +} + +/* + * Called only in a system call + */ +int +okaddr(uintptr addr, long len, int write) +{ + Segment *s; + + if(len >= 0) { + for(;;) { + s = seg(up, addr, nil); + if(s == 0 || (write && (s->type&SG_RONLY))) + break; + + if(addr+len > s->top) { + len -= s->top - addr; + addr = s->top; + continue; + } + return 1; + } + } + return 0; +} + +void* +validaddr(void* addr, long len, int write) +{ + if(!okaddr(PTR2UINT(addr), len, write)){ + pprint("trap: invalid address %#p/%lud in sys call pc=%#P\n", addr, len, userpc(nil)); + postnote(up, 1, "sys: bad address in syscall", NDebug); + error(Ebadarg); + } + + return UINT2PTR(addr); +} + +/* + * &s[0] is known to be a valid address. + */ +void* +vmemchr(void *s, int c, int n) +{ + int np; + uintptr a; + void *t; + + a = PTR2UINT(s); + while(ROUNDUP(a, PGSZ) != ROUNDUP(a+n-1, PGSZ)){ + /* spans pages; handle this page */ + np = PGSZ - (a & (PGSZ-1)); + t = memchr(UINT2PTR(a), c, np); + if(t) + return t; + a += np; + n -= np; + if(!iskaddr(a)) + validaddr(UINT2PTR(a), 1, 0); + } + + /* fits in one page */ + return memchr(UINT2PTR(a), c, n); +} + +void +checkpages(void) +{ + uintptr addr, off; + Pte *p; + Page *pg; + Segment **sp, **ep, *s; + Pages *ps; + uint pgsize; + + if(up == nil || up->newtlb) + return; + + for(sp=up->seg, ep=&up->seg[NSEG]; splk); + ps = s->pages; + pgsize = 1<lg2pgsize; + for(addr=s->base; addrtop; addr+=pgsize){ + off = addr - s->base; + if(off >= ps->xsize){ + print("%d %s: seg %ld off %#p outside %#p\n", up->pid, up->text, sp-up->seg, off, ps->xsize); + continue; + } + p = ps->map[off/ps->ptemapmem]; + if(p == nil) + continue; + pg = p->pages[(off&(ps->ptemapmem-1))/pgsize]; + if(pg == 0) + continue; + if(!iskaddr(pg)){ + print("%d %s: invalid page off %#p pg %#p\n", up->pid, up->text, off, pg); + printpages(ps); + continue; + } + checkmmu(addr, pg->pa); + } + runlock(&s->lk); + } +} diff -Nru /sys/src/9k/port/image.c /sys/src/9k/port/image.c --- /sys/src/9k/port/image.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/image.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,324 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#define IHASHSIZE 64 +#define ihash(s) imagealloc.hash[s%IHASHSIZE] + +static void putsection(Section*); + +enum +{ + NIMAGE = 200, +}; + +static struct Imagealloc +{ + Lock; + Image *free; + Image *hash[IHASHSIZE]; + Image lru; +} imagealloc; + +static void freeimage(Image*); +static void cleanimage(Image*); + +void +initimage(void) +{ + Image *i, *ie; + + imagealloc.free = malloc(NIMAGE*sizeof(Image)); + if(imagealloc.free == nil) + panic("imagealloc: no memory"); + ie = &imagealloc.free[NIMAGE-1]; + for(i = imagealloc.free; i < ie; i++) + i->next = i+1; + i->next = 0; + imagealloc.lru.next = imagealloc.lru.prev = &imagealloc.lru; + imagealloc.lru.ref = 1; +} + +static Image* +imagereclaim(void) +{ + Image *i; + + i = imagealloc.lru.prev; + if(i->next == i) + return nil; + lock(i); + i->prev->next = i->next; + i->next->prev = i->prev; + unlock(&imagealloc); + cleanimage(i); + return i; +} + +Image* +attachimage(Chan *c) +{ + Image *i, **l; + + lock(&imagealloc); + + /* + * Search the image cache for remains of the text from a previous + * or currently running incarnation + */ + for(i = ihash(c->qid.path); i; i = i->hash) { + if(c->qid.path == i->qid.path) { + lock(i); + if(eqqid(c->qid, i->qid) && + eqqid(c->mqid, i->mqid) && + c->mchan == i->mchan && + c->dev->dc == i->dc) { +//subtype + incref(i); + if(0 && i->ref == 1){ /* remove from LRU list */ + DBG("image %#p was LRU %s\n", i, c->path? c->path->s: "??"); + i->prev->next = i->next; + i->next->prev = i->prev; + } + unlock(&imagealloc); + return i; + } + unlock(i); + } + } + + /* + * imagereclaim frees the least-recently-used cached image + */ + if((i = imagealloc.free) == nil){ + i = imagereclaim(); + if(i == nil){ + i = mallocz(sizeof(*i), 1); + if(i == nil) + error(Enomem); + } + }else + imagealloc.free = i->next; + + lock(i); + i->ref = 1; + incref(c); + i->c = c; + i->dc = c->dev->dc; +//subtype + i->qid = c->qid; + i->mqid = c->mqid; + i->mchan = c->mchan; + l = &ihash(c->qid.path); + i->hash = *l; + *l = i; + unlock(&imagealloc); + + return i; +} + +/* + * i is locked + */ +static void +cleanimage(Image *i) +{ + Chan *c; + Image *f, **l; + int s; + + DBG("freeimage: %p %s\n", i, i->c && i->c->path? i->c->path->s: "?"); + + l = &ihash(i->qid.path); + mkqid(&i->qid, ~0, ~0, QTDIR); /* now impossible to find by hash */ + unlock(i); + + c = i->c; + i->c = nil; + if(c == nil || c->ref == 0) + panic("putimage: %#p %#p", c, getcallerpc(&i)); + + lock(&imagealloc); + for(f = *l; f; f = f->hash) { + if(f == i) { + *l = i->hash; + break; + } + l = &f->hash; + } + unlock(&imagealloc); + + for(s = 0; s < nelem(i->section); s++){ + if(i->section[s] != nil){ + putsection(i->section[s]); + i->section[s] = nil; + } + } + + /* let the daemon deal with it */ + ccloseq(c); +} + +static void +freeimage(Image *i) +{ + cleanimage(i); + + lock(&imagealloc); + i->next = imagealloc.free; + imagealloc.free = i; + unlock(&imagealloc); +} + +void +putimage(Image *i) +{ + DBG("putimage: %p ref=%d\n", i, i->ref); + lock(i); + if(decref(i) != 0){ + unlock(i); + return; + } + + /* TO DO: LRU recycling, with a quick cull if memory runs low */ + + freeimage(i); + +} + +Section* +newsection(uintptr size, ulong fstart, ulong flen) +{ + Section *s; + int npages, lg2pgsize; + + lg2pgsize = PGSHFT; /* TO DO: pick a page size */ + + if(size & ((1<>lg2pgsize; + if(npages > (SEGMAPSIZE*PTEPERTAB)) + error(Enovmem); + + s = smalloc(sizeof(*s) + npages*sizeof(Page*)); + s->fstart = fstart; + s->flen = flen; + s->xsize = size; + s->npages = npages; + s->lg2pgsize = lg2pgsize; + return s; +} + +static void +putsection(Section *s) +{ + int i; + Page *p; + + for(i = 0; i < s->npages; i++){ + p = s->pages[i]; + if(p != nil){ + putpage(p); + s->pages[i] = nil; + } + } +} + +static void +faulterror(char *s, Chan *c, int freemem) +{ + char buf[ERRMAX]; + + if(c && c->path){ + snprint(buf, sizeof buf, "%s accessing %s: %s", s, c->path->s, up->errstr); + s = buf; + } + if(up->nerrlab) { + postnote(up, 1, s, NDebug); + error(s); + } + pexit(s, freemem); +} + +/* + * return a page from a text/data image, allocating and loading on demand if needed. + */ +Page* +imagepage(Image *image, int isec, uintptr addr, uintptr soff) +{ + Page *new, *ep, **pg; + KMap *k; + Chan *c; + int n, ask; + char *kaddr; + ulong daddr; + uintptr pgsize; + Section *s; + + s = image->section[isec]; + pgsize = 1<lg2pgsize; + daddr = s->fstart+soff; + + DBG("read section %#p addr %#p o %#p da %lud sz %#p xsize %#p\n", s, addr, soff, daddr, pgsize, s->xsize); + if(soff >= s->xsize) + panic("imageread"); + pg = &s->pages[soff >> s->lg2pgsize]; + new = *pg; + if(new != nil){ + incref(new); + return new; + } + + c = image->c; + ask = s->flen-soff; + if(ask > pgsize) + ask = pgsize; + + new = newpage(0, s->lg2pgsize, nil); + if(new == nil) + panic("pio"); /* can't happen, ps wasn't locked */ + + qlock(&s->lk); + + /* re-check under lock before starting IO */ + ep = *pg; + if(ep != nil){ + qunlock(&s->lk); + putpage(new); + DBG("race %#p %#p -> %#p\n", s, soff, ep); + incref(ep); + return ep; + } + + k = kmap(new); + kaddr = VA(k); + + while(waserror()){ + if(strcmp(up->errstr, Eintr) == 0) + continue; + qunlock(&s->lk); + kunmap(k); + putpage(new); + faulterror(Eioload, c, 0); + } + + n = c->dev->read(c, kaddr, ask, daddr); + if(n != ask) + faulterror(Eioload, c, 0); + if(ask < pgsize) + memset(kaddr+ask, 0, pgsize-ask); + + poperror(); + kunmap(k); + + *pg = new; /* update the page map */ + + qunlock(&s->lk); + + incref(new); + return new; +} diff -Nru /sys/src/9k/port/initcode.c /sys/src/9k/port/initcode.c --- /sys/src/9k/port/initcode.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/initcode.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,42 @@ +/* + * IMPORTANT! DO NOT ADD LIBRARY CALLS TO THIS FILE. + * The entire text image must fit on one page + * (and there's no data segment, so any read/write data must be on the stack). + */ + +#include +#include + +char cons[] = "#c/cons"; +char boot[] = "/boot/boot"; +char dev[] = "/dev"; +char c[] = "#c"; +char e[] = "#e"; +char ec[] = "#ec"; +char s[] = "#s"; +char srv[] = "/srv"; +char env[] = "/env"; + +void +startboot(char *argv0, char **argv) +{ + char buf[200]; + + USED(argv0); + /* + * open the console here so that /boot/boot, + * which could be a shell script, can inherit the open fds. + */ + open(cons, OREAD); + open(cons, OWRITE); + open(cons, OWRITE); + bind(c, dev, MAFTER); + bind(ec, env, MAFTER); + bind(e, env, MCREATE|MAFTER); + bind(s, srv, MREPL|MCREATE); + exec(boot, argv); + + rerrstr(buf, sizeof buf); + buf[sizeof buf - 1] = '\0'; + _exits(buf); +} diff -Nru /sys/src/9k/port/iofilter.h /sys/src/9k/port/iofilter.h --- /sys/src/9k/port/iofilter.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/iofilter.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,33 @@ + +enum { + Niosamples = 32, + Lsum = 0, + Lmax, + Lavg, + Lsz, +}; + +typedef struct Iofilter Iofilter; +struct Iofilter { + Lock; + ulong nsamples; /* total samples taken */ + struct { + ulong b; + ulong lat[Lsz]; /* latency min, max, avg for bytes in b */ + } samples[Niosamples]; + + ulong bytes; + ulong lmin; + ulong lmax; + vlong lsum; + ulong nlat; +}; + +#pragma varargck type "Z" Iofilter* + +void incfilter(Iofilter *, ulong, ulong); +void delfilter(Iofilter *); +int addfilter(Iofilter *); +void zfilter(Iofilter *); +int filtersum(Iofilter*, uvlong*, vlong*, int); +int filterfmt(Fmt *); diff -Nru /sys/src/9k/port/latin1.c /sys/src/9k/port/latin1.c --- /sys/src/9k/port/latin1.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/latin1.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,83 @@ +#include "u.h" +#include "../port/lib.h" + +/* + * The code makes two assumptions: strlen(ld) is 1 or 2; latintab[i].ld can be a + * prefix of latintab[j].ld only when j=esctab[i+1]) + return unicode(k, esctab[i+1]); + else + return -esctab[i+1]; + } + for(l=latintab; l->ld!=0; l++) + if(k[0] == l->ld[0]){ + if(n == 1) + return -2; + if(l->ld[1] == 0) + c = k[1]; + else if(l->ld[1] != k[1]) + continue; + else if(n == 2) + return -3; + else + c = k[2]; + for(p=l->si; *p!=0; p++) + if(*p == c) + return l->so[p - l->si]; + return -1; + } + return -1; +} diff -Nru /sys/src/9k/port/latin1.h /sys/src/9k/port/latin1.h --- /sys/src/9k/port/latin1.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/latin1.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,100 @@ + " ", " i", L"␣ı", + "!~", "-=~", L"≄≇≉", + "!", "!<=>?bmp", L"¡≮≠≯‽⊄∉⊅", + "\"*", "IUiu", L"ΪΫϊϋ", + "\"", "\"AEIOUYaeiouy", L"¨ÄËÏÖÜŸäëïöüÿ", + "$*", "fhk", L"ϕϑϰ", + "$", "BEFHILMRVaefglopv", L"ℬℰℱℋℐℒℳℛƲɑℯƒℊℓℴ℘ʋ", + "\'\"", "Uu", L"Ǘǘ", + "\'", "\'ACEILNORSUYZacegilnorsuyz", L"´ÁĆÉÍĹŃÓŔŚÚÝŹáćéģíĺńóŕśúýź", + "*", "*ABCDEFGHIKLMNOPQRSTUWXYZabcdefghiklmnopqrstuwxyz", L"∗ΑΒΞΔΕΦΓΘΙΚΛΜΝΟΠΨΡΣΤΥΩΧΗΖαβξδεφγθικλμνοπψρστυωχηζ", + "+", "-O", L"±⊕", + ",", ",ACEGIKLNORSTUacegiklnorstu", L"¸ĄÇĘĢĮĶĻŅǪŖŞŢŲąçęģįķļņǫŗşţų", + "-*", "l", L"ƛ", + "-", "+-2:>DGHILOTZbdghiltuz~", L"∓­ƻ÷→ÐǤĦƗŁ⊖ŦƵƀðǥℏɨłŧʉƶ≂", + ".", ".CEGILOZceglz", L"·ĊĖĠİĿ⊙Żċėġŀż", + "/", "Oo", L"Øø", + "1", ".234568", L"․½⅓¼⅕⅙⅛", + "2", "-.35", L"ƻ‥⅔⅖", + "3", ".458", L"…¾⅗⅜", + "4", "5", L"⅘", + "5", "68", L"⅚⅝", + "7", "8", L"⅞", + ":", "()-=", L"☹☺÷≔", + "~", L"←«≤≶≲", + "=", ":<=>OV", L"≕⋜≡⋝⊜⇒", + ">!", "=~", L"≩⋧", + ">", "<=>~", L"≷≥»≳", + "?", "!?", L"‽¿", + "@\'", "\'", L"ъ", + "@@", "\'EKSTYZekstyz", L"ьЕКСТЫЗекстыз", + "@C", "Hh", L"ЧЧ", + "@E", "Hh", L"ЭЭ", + "@K", "Hh", L"ХХ", + "@S", "CHch", L"ЩШЩШ", + "@T", "Ss", L"ЦЦ", + "@Y", "AEOUaeou", L"ЯЕЁЮЯЕЁЮ", + "@Z", "Hh", L"ЖЖ", + "@c", "h", L"ч", + "@e", "h", L"э", + "@k", "h", L"х", + "@s", "ch", L"щш", + "@t", "s", L"ц", + "@y", "aeou", L"яеёю", + "@z", "h", L"ж", + "@", "ABDFGIJLMNOPRUVXabdfgijlmnopruvx", L"АБДФГИЙЛМНОПРУВХабдфгийлмнопрувх", + "A", "E", L"Æ", + "C", "ACU", L"⋂ℂ⋃", + "Dv", "Zz", L"DŽDž", + "D", "-e", L"Ð∆", + "G", "-", L"Ǥ", + "H", "-H", L"Ħℍ", + "I", "-J", L"ƗIJ", + "L", "&-Jj|", L"⋀ŁLJLj⋁", + "M", "#48bs", L"♮♩♪♭♯", + "N", "JNj", L"NJℕNj", + "O", "*+-./=EIcoprx", L"⊛⊕⊖⊙⊘⊜ŒƢ©⊚℗®⊗", + "P", "OP", L"💩ℙ", + "Q", "Q", L"ℚ", + "R", "R", L"ℝ", + "S", "S", L"§", + "T", "-u", L"Ŧ⊨", + "V", "=", L"⇐", + "Y", "R", L"Ʀ", + "Z", "-ACSZ", L"Ƶℤ", + "^", "ACEGHIJOSUWYaceghijosuwy", L"ÂĈÊĜĤÎĴÔŜÛŴŶâĉêĝĥîĵôŝûŵŷ", + "_\"", "AUau", L"ǞǕǟǖ", + "_,", "Oo", L"Ǭǭ", + "_.", "Aa", L"Ǡǡ", + "_", "AEIOU_aeiou", L"ĀĒĪŌŪ¯āēīōū", + "`\"", "Uu", L"Ǜǜ", + "`", "AEIOUaeiou", L"ÀÈÌÒÙàèìòù", + "a", "ben", L"↔æ∠", + "b", "()+-0123456789=bknpqru", L"₍₎₊₋₀₁₂₃₄₅₆₇₈₉₌♝♚♞♟♛♜•", + "c", "$Oagu", L"¢©∩≅∪", + "dv", "z", L"dž", + "d", "-adegz", L"ð↓‡°†ʣ", + "e", "$lmns", L"€⋯—–∅", + "f", "a", L"∀", + "g", "$-r", L"¤ǥ∇", + "h", "-v", L"ℏƕ", + "i", "-bfjps", L"ɨ⊆∞ij⊇∫", + "l", "\"$&\'-jz|", L"“£∧‘łlj⋄∨", + "m", "iou", L"µ∈×", + "n", "jo", L"nj¬", + "o", "AOUaeiu", L"Å⊚Ůåœƣů", + "p", "Odgrt", L"℗∂¶∏∝", + "r", "\"\'O", L"”’®", + "s", "()+-0123456789=abnoprstu", L"⁽⁾⁺⁻⁰¹²³⁴⁵⁶⁷⁸⁹⁼ª⊂ⁿº⊃√ß∍∑", + "t", "-efmsu", L"ŧ∃∴™ς⊢", + "u", "-AEGIOUaegiou", L"ʉĂĔĞĬŎŬ↑ĕğĭŏŭ", + "v\"", "Uu", L"Ǚǚ", + "v", "ACDEGIKLNORSTUZacdegijklnorstuz", L"ǍČĎĚǦǏǨĽŇǑŘŠŤǓŽǎčďěǧǐǰǩľňǒřšťǔž", + "w", "bknpqr", L"♗♔♘♙♕♖", + "x", "O", L"⊗", + "y", "$", L"¥", + "z", "-", L"ƶ", + "|", "Pp|", L"Þþ¦", + "~!", "=", L"≆", + "~", "-=AINOUainou~", L"≃≅ÃĨÑÕŨãĩñõũ≈", diff -Nru /sys/src/9k/port/lib.h /sys/src/9k/port/lib.h --- /sys/src/9k/port/lib.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/lib.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,282 @@ +/* + * functions (possibly) linked in, complete, from libc. + */ +#define nelem(x) (sizeof(x)/sizeof((x)[0])) +#define offsetof(s, m) (ulong)(&(((s*)0)->m)) +#define assert(x) if(x){}else _assert("x") + +/* + * mem routines + */ +extern void* memccpy(void*, void*, int, ulong); +extern void* memset(void*, int, ulong); +extern int memcmp(void*, void*, ulong); +extern void* memmove(void*, void*, ulong); +extern void* memchr(void*, int, ulong); + +/* + * string routines + */ +extern char* strcat(char*, char*); +extern char* strchr(char*, int); +extern int strcmp(char*, char*); +extern char* strcpy(char*, char*); +extern char* strecpy(char*, char*, char*); +extern char* strncat(char*, char*, long); +extern char* strncpy(char*, char*, long); +extern int strncmp(char*, char*, long); +extern char* strrchr(char*, int); +extern long strlen(char*); +extern char* strstr(char*, char*); +extern int cistrncmp(char*, char*, int); +extern int cistrcmp(char*, char*); +extern int tokenize(char*, char**, int); + +enum +{ + UTFmax = 4, /* maximum bytes per rune */ + Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ + Runeself = 0x80, /* rune and UTF sequences are the same (<) */ + Runeerror = 0xFFFD, /* decoding error in UTF */ + Runemax = 0x10FFFF, /* 21-bit rune */ + Runemask = 0x1FFFFF, /* bits used by runes (see grep) */ +}; + +/* + * rune routines + */ +extern int runetochar(char*, Rune*); +extern int chartorune(Rune*, char*); +extern int runelen(long); +extern int fullrune(char*, int); +extern int utflen(char*); +extern int utfnlen(char*, long); +extern char* utfrune(char*, long); + +/* + * malloc + */ +extern void* malloc(usize); +extern void* mallocz(usize, int); +extern void free(void*); +extern ulong msize(void*); +extern void* mallocalign(usize, ulong, long, ulong); +extern void* realloc(void*, usize); +extern void setmalloctag(void*, ulong); +extern void setrealloctag(void*, ulong); +extern ulong getmalloctag(void*); +extern ulong getrealloctag(void*); + +/* + * print routines + */ +typedef struct Fmt Fmt; +struct Fmt{ + uchar runes; /* output buffer is runes or chars? */ + void *start; /* of buffer */ + void *to; /* current place in the buffer */ + void *stop; /* end of the buffer; overwritten if flush fails */ + int (*flush)(Fmt *); /* called when to == stop */ + void *farg; /* to make flush a closure */ + int nfmt; /* num chars formatted so far */ + va_list args; /* args passed to dofmt */ + int r; /* % format Rune */ + int width; + int prec; + ulong flags; +}; + +enum { + FmtWidth = 1, + FmtLeft = FmtWidth<<1, + FmtPrec = FmtLeft<<1, + FmtSharp = FmtPrec<<1, + FmtSpace = FmtSharp<<1, + FmtSign = FmtSpace<<1, + FmtZero = FmtSign<<1, + FmtUnsigned = FmtZero<<1, + FmtShort = FmtUnsigned<<1, + FmtLong = FmtShort<<1, + FmtVLong = FmtLong<<1, + FmtComma = FmtVLong<<1, + FmtByte = FmtComma<<1, + + FmtFlag = FmtByte<<1 +}; + +extern int print(char*, ...); +extern char* seprint(char*, char*, char*, ...); +extern char* vseprint(char*, char*, char*, va_list); +extern int snprint(char*, int, char*, ...); +extern int vsnprint(char*, int, char*, va_list); +extern int sprint(char*, char*, ...); + +#pragma varargck argpos fmtprint 2 +#pragma varargck argpos print 1 +#pragma varargck argpos seprint 3 +#pragma varargck argpos snprint 3 +#pragma varargck argpos sprint 2 + +#pragma varargck type "lld" vlong +#pragma varargck type "llx" vlong +#pragma varargck type "lld" uvlong +#pragma varargck type "llx" uvlong +#pragma varargck type "ld" long +#pragma varargck type "lx" long +#pragma varargck type "ld" ulong +#pragma varargck type "lx" ulong +#pragma varargck type "d" int +#pragma varargck type "x" int +#pragma varargck type "c" int +#pragma varargck type "C" int +#pragma varargck type "d" uint +#pragma varargck type "x" uint +#pragma varargck type "c" uint +#pragma varargck type "C" uint +#pragma varargck type "s" char* +#pragma varargck type "q" char* +#pragma varargck type "S" Rune* +#pragma varargck type "%" void +#pragma varargck type "p" uintptr +#pragma varargck type "p" void* +#pragma varargck flag ',' +#pragma varargck type "<" void* +#pragma varargck type "[" void* +#pragma varargck type "H" void* +#pragma varargck type "lH" void* + +extern int fmtinstall(int, int (*)(Fmt*)); +extern int fmtprint(Fmt*, char*, ...); +extern int fmtstrcpy(Fmt*, char*); +extern char* fmtstrflush(Fmt*); +extern int fmtstrinit(Fmt*); + +/* + * quoted strings + */ +extern void quotefmtinstall(void); + +/* + * Time-of-day + */ +extern void cycles(uvlong*); /* 64-bit value of the cycle counter if there is one, 0 if there isn't */ + +/* + * one-of-a-kind + */ +extern int abs(int); +extern int atoi(char*); +extern char* cleanname(char*); +extern int dec16(uchar*, int, char*, int); +extern int enc16(char*, int, uchar*, int); +extern int encodefmt(Fmt*); +extern int dec64(uchar*, int, char*, int); +extern uintptr getcallerpc(void*); +extern int getfields(char*, char**, int, int, char*); +extern int gettokens(char *, char **, int, char *); +extern void qsort(void*, long, long, int (*)(void*, void*)); +extern long strtol(char*, char**, int); +extern ulong strtoul(char*, char**, int); +extern vlong strtoll(char*, char**, int); +extern uvlong strtoull(char*, char**, int); + +/* + * Syscall data structures + */ +#define MORDER 0x0003 /* mask for bits defining order of mounting */ +#define MREPL 0x0000 /* mount replaces object */ +#define MBEFORE 0x0001 /* mount goes before others in union directory */ +#define MAFTER 0x0002 /* mount goes after others in union directory */ +#define MCREATE 0x0004 /* permit creation in mounted directory */ +#define MCACHE 0x0010 /* cache some data */ +#define MMASK 0x0017 /* all bits on */ + +#define OREAD 0 /* open for read */ +#define OWRITE 1 /* write */ +#define ORDWR 2 /* read and write */ +#define OEXEC 3 /* execute, == read but check execute permission */ +#define OTRUNC 16 /* or'ed in (except for exec), truncate file first */ +#define OCEXEC 32 /* or'ed in, close on exec */ +#define ORCLOSE 64 /* or'ed in, remove on close */ +#define OEXCL 0x1000 /* or'ed in, exclusive create */ + +#define NCONT 0 /* continue after note */ +#define NDFLT 1 /* terminate after note */ +#define NSAVE 2 /* clear note but hold state */ +#define NRSTR 3 /* restore saved state */ + +typedef struct Qid Qid; +typedef struct Dir Dir; +typedef struct OWaitmsg OWaitmsg; +typedef struct Waitmsg Waitmsg; + +#define ERRMAX 128 /* max length of error string */ +#define KNAMELEN 28 /* max length of name held in kernel */ + +/* bits in Qid.type */ +#define QTDIR 0x80 /* type bit for directories */ +#define QTAPPEND 0x40 /* type bit for append only files */ +#define QTEXCL 0x20 /* type bit for exclusive use files */ +#define QTMOUNT 0x10 /* type bit for mounted channel */ +#define QTAUTH 0x08 /* type bit for authentication file */ +#define QTFILE 0x00 /* plain file */ + +/* bits in Dir.mode */ +#define DMDIR 0x80000000 /* mode bit for directories */ +#define DMAPPEND 0x40000000 /* mode bit for append only files */ +#define DMEXCL 0x20000000 /* mode bit for exclusive use files */ +#define DMMOUNT 0x10000000 /* mode bit for mounted channel */ +#define DMREAD 0x4 /* mode bit for read permission */ +#define DMWRITE 0x2 /* mode bit for write permission */ +#define DMEXEC 0x1 /* mode bit for execute permission */ + +struct Qid +{ + uvlong path; + ulong vers; + uchar type; +}; + +struct Dir { + /* system-modified data */ + ushort type; /* server type */ + uint dev; /* server subtype */ + /* file data */ + Qid qid; /* unique id from server */ + ulong mode; /* permissions */ + ulong atime; /* last read time */ + ulong mtime; /* last write time */ + vlong length; /* file length: see */ + char *name; /* last element of path */ + char *uid; /* owner name */ + char *gid; /* group name */ + char *muid; /* last modifier name */ +}; + +struct OWaitmsg +{ + char pid[12]; /* of loved one */ + char time[3*12]; /* of loved one and descendants */ + char msg[64]; /* compatibility BUG */ +}; + +struct Waitmsg +{ + int pid; /* of loved one */ + ulong time[3]; /* of loved one and descendants */ + char msg[ERRMAX]; /* actually variable-size in user mode */ +}; + +typedef +struct IOchunk +{ + void *addr; + ulong len; +} IOchunk; + +extern char etext[]; +extern char edata[]; +extern char end[]; + +extern char* smprint(char*, ...); +extern char* strdup(char*); diff -Nru /sys/src/9k/port/mcslock.c /sys/src/9k/port/mcslock.c --- /sys/src/9k/port/mcslock.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/mcslock.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,225 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "../port/edf.h" + +#define D(c) if(0)cgapost((c)) + +static void +mcslock(Lock *lk, LockEntry *ql) +{ + LockEntry *pred; + + D('!'); + ql->next = nil; + ql->locked = 0; + pred = xchgm(&lk->head, ql); + if(pred != nil){ + ql->locked = 1; + sfence(); /* ensure reader sees updated value */ + pred->next = ql; + if(1){ /* qemu made us do it */ + while(ql->locked) + pause(); /* spin, could monwait */ + }else{ + while(mwait32(&ql->locked, 1) == 1) + {} + } + } +} + +static int +mcscanlock(Lock *lk, LockEntry *ql) +{ + D('?'); + ql->next = nil; + ql->locked = 0; + return CASV(&lk->head, nil, ql); +} + +static LockEntry* +mcsunlock(Lock *lk, LockEntry *ql) +{ + D('#'); + if(ql->next != nil || !CASV(&lk->head, ql, nil)){ + /* successor, wait for list to catch up */ + while(ql->next == nil) + {} + ql->next->locked = 0; + sfence(); + } + return ql; +} + +static LockEntry* +allocle(Lock *l, uintptr pc) +{ + LockEntry *a; + int i; + + a = &m->locks[0]; + if(a->used != nil){ + i = nelem(m->locks)-1; + while(--i >= 0){ + a++; + if(a->used == nil) + break; + } + if(i < 0) + panic("allocle: need more m->locks"); + } + a->used = l; /* must be first, to claim against interrupts */ + a->pc = pc; + a->p = up; + a->m = m; + a->isilock = 0; + return a; +} + +static LockEntry* +findle(Lock *l) +{ + LockEntry *a; + + a = l->e; + if(a->used != l) + panic("findle"); + return a; +} + +int +lock(Lock *l) +{ + LockEntry *ql; + + if(up != nil) + up->nlocks++; + ql = allocle(l, getcallerpc(&l)); + mcslock(l, ql); + l->e = ql; + return 0; +} + +int +canlock(Lock *l) +{ + LockEntry *ql; + + if(up != nil) + up->nlocks++; + ql = allocle(l, getcallerpc(&l)); + if(mcscanlock(l, ql)){ + l->e = ql; + return 1; + } + ql->used = nil; + if(up != nil) + up->nlocks--; + return 0; +} + +void +unlock(Lock *l) +{ + LockEntry *ql; + + if(l->head == nil){ + print("unlock: not locked: pc %#p\n", getcallerpc(&l)); + return; + } + ql = findle(l); + if(ql->isilock) + panic("unlock of ilock: pc %#p", getcallerpc(&l)); + if(ql->p != up) + panic("unlock: up changed: pc %#p, acquired at pc %#p, lock p %#p, unlock up %#p", + getcallerpc(&l), ql->pc, ql->p, up); + mcsunlock(l, ql); + ql->used = nil; + if(up != nil && --up->nlocks == 0 && up->delaysched && islo()){ + /* + * Call sched if the need arose while locks were held + * But, don't do it from interrupt routines, hence the islo() test + */ + sched(); + } +} + +void +ilock(Lock *l) +{ + uintptr pc; + Mreg s; + LockEntry *ql; + + pc = getcallerpc(&l); + s = splhi(); + ql = allocle(l, pc); + ql->isilock = 1; + ql->sr = s; + /* the old taslock code would splx(s) to allow interrupts while waiting (if not nested) */ + mcslock(l, ql); + l->e = ql; + m->ilockdepth++; + m->ilockpc = pc; + if(up != nil) + up->lastilock = l; +} + +void +iunlock(Lock *l) +{ + Mreg s; + LockEntry *ql; + + if(islo()) + panic("iunlock while lo: pc %#p\n", getcallerpc(&l)); + ql = findle(l); + if(!ql->isilock) + panic("iunlock of lock: pc %#p\n", getcallerpc(&l)); + if(ql->m != m){ + panic("iunlock by cpu%d, locked by cpu%d: pc %#p\n", + m->machno, ql->m->machno, getcallerpc(&l)); + } + mcsunlock(l, ql); + s = ql->sr; + ql->used = nil; + m->ilockdepth--; + if(up != nil) + up->lastilock = nil; + splx(s); +} + +int +ownlock(Lock *l) +{ + int i; + + for(i = 0; i < nelem(m->locks); i++) + if(m->locks[i].used == l) + return 1; + return 0; +} + +uintptr +lockgetpc(Lock *l) +{ + LockEntry *ql; + + ql = l->e; + if(ql != nil && ql->used == l) + return ql->pc; + return 0; +} + +void +locksetpc(Lock *l, uintptr pc) +{ + LockEntry *ql; + + ql = l->e; + if(ql != nil && ql->used == l && ql->m == m) + ql->pc = pc; +} diff -Nru /sys/src/9k/port/mul64fract.c /sys/src/9k/port/mul64fract.c --- /sys/src/9k/port/mul64fract.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/mul64fract.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,39 @@ +#include "u.h" + +/* mul64fract(uvlong*r, uvlong a, uvlong b) + * + * Multiply two 64 numbers and return the middle 64 bits of the 128 bit result. + * + * The assumption is that one of the numbers is a + * fixed point number with the integer portion in the + * high word and the fraction in the low word. + * + * There should be an assembler version of this routine + * for each architecture. This one is intended to + * make ports easier. + * + * ignored r0 = lo(a0*b0) + * lsw of result r1 = hi(a0*b0) +lo(a0*b1) +lo(a1*b0) + * msw of result r2 = hi(a0*b1) +hi(a1*b0) +lo(a1*b1) + * ignored r3 = hi(a1*b1) + */ + +void +mul64fract(uvlong *r, uvlong a, uvlong b) +{ + uvlong bh, bl; + uvlong ah, al; + uvlong res; + + bl = b & 0xffffffffULL; + bh = b >> 32; + al = a & 0xffffffffULL; + ah = a >> 32; + + res = (al*bl)>>32; + res += (al*bh); + res += (ah*bl); + res += (ah*bh)<<32; + + *r = res; +} diff -Nru /sys/src/9k/port/net.c /sys/src/9k/port/net.c --- /sys/src/9k/port/net.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/net.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,65 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +void +hnputv(void *p, uvlong v) +{ + uchar *a; + + a = p; + hnputl(a, v>>32); + hnputl(a+4, v); +} + +void +hnputl(void *p, uint v) +{ + uchar *a; + + a = p; + a[0] = v>>24; + a[1] = v>>16; + a[2] = v>>8; + a[3] = v; +} + +void +hnputs(void *p, ushort v) +{ + uchar *a; + + a = p; + a[0] = v>>8; + a[1] = v; +} + +uvlong +nhgetv(void *p) +{ + uchar *a; + + a = p; + return ((vlong)nhgetl(a) << 32) | nhgetl(a+4); +} + +uint +nhgetl(void *p) +{ + uchar *a; + + a = p; + return (a[0]<<24)|(a[1]<<16)|(a[2]<<8)|(a[3]<<0); +} + +ushort +nhgets(void *p) +{ + uchar *a; + + a = p; + return (a[0]<<8)|(a[1]<<0); +} diff -Nru /sys/src/9k/port/netif.c /sys/src/9k/port/netif.c --- /sys/src/9k/port/netif.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/netif.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,760 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/netif.h" + +static int netown(Netfile*, char*, int); +static int openfile(Netif*, int); +static char* matchtoken(char*, char*); +static char* netmulti(Netif*, Netfile*, uchar*, int); +static int parseaddr(uchar*, char*, int); + +/* + * set up a new network interface + */ +void +netifinit(Netif *nif, char *name, int nfile, ulong limit) +{ + if(nif->inited) + return; + strncpy(nif->name, name, KNAMELEN-1); + nif->name[KNAMELEN-1] = 0; + nif->nfile = nfile; + nif->f = malloc(nfile*sizeof(Netfile*)); + if(nif->f == nil) + panic("netifinit: %s: out of memory", name); + memset(nif->f, 0, nfile*sizeof(Netfile*)); + nif->limit = limit; + nif->inited = 1; +} + +/* + * generate a 3 level directory + */ +static int +netifgen(Chan *c, char*, Dirtab *vp, int, int i, Dir *dp) +{ + Qid q; + Netif *nif = (Netif*)vp; + Netfile *f; + int t; + int perm; + char *o; + + q.type = QTFILE; + q.vers = 0; + + /* top level directory contains the name of the network */ + if(c->qid.path == 0){ + switch(i){ + case DEVDOTDOT: + q.path = 0; + q.type = QTDIR; + devdir(c, q, ".", 0, eve, 0555, dp); + break; + case 0: + q.path = N2ndqid; + q.type = QTDIR; + strcpy(up->genbuf, nif->name); + devdir(c, q, up->genbuf, 0, eve, 0555, dp); + break; + default: + return -1; + } + return 1; + } + + /* second level contains clone plus all the conversations */ + t = NETTYPE(c->qid.path); + if(t == N2ndqid || t == Ncloneqid || t == Naddrqid || + t == Nstatqid || t == Nifstatqid || t == Nmtuqid || t == Nmaxmtuqid){ + switch(i) { + case DEVDOTDOT: + q.type = QTDIR; + q.path = 0; + devdir(c, q, ".", 0, eve, DMDIR|0555, dp); + break; + case 0: + q.path = Ncloneqid; + devdir(c, q, "clone", 0, eve, 0666, dp); + break; + case 1: + q.path = Naddrqid; + devdir(c, q, "addr", 0, eve, 0666, dp); + break; + case 2: + q.path = Nstatqid; + devdir(c, q, "stats", 0, eve, 0444, dp); + break; + case 3: + q.path = Nifstatqid; + devdir(c, q, "ifstats", 0, eve, 0444, dp); + break; + case 4: + q.path = Nmtuqid; + devdir(c, q, "mtu", 0, eve, 0444, dp); + break; + case 5: + q.path = Nmaxmtuqid; + devdir(c, q, "maxmtu", 0, eve, 0444, dp); + break; + default: + i -= 6; + if(i >= nif->nfile) + return -1; + if(nif->f[i] == 0) + return 0; + q.type = QTDIR; + q.path = NETQID(i, N3rdqid); + sprint(up->genbuf, "%d", i); + devdir(c, q, up->genbuf, 0, eve, DMDIR|0555, dp); + break; + } + return 1; + } + + /* third level */ + f = nif->f[NETID(c->qid.path)]; + if(f == 0) + return 0; + if(*f->owner){ + o = f->owner; + perm = f->mode; + } else { + o = eve; + perm = 0666; + } + switch(i){ + case DEVDOTDOT: + q.type = QTDIR; + q.path = N2ndqid; + strcpy(up->genbuf, nif->name); + devdir(c, q, up->genbuf, 0, eve, DMDIR|0555, dp); + break; + case 0: + q.path = NETQID(NETID(c->qid.path), Ndataqid); + devdir(c, q, "data", 0, o, perm, dp); + break; + case 1: + q.path = NETQID(NETID(c->qid.path), Nctlqid); + devdir(c, q, "ctl", 0, o, perm, dp); + break; + case 2: + q.path = NETQID(NETID(c->qid.path), Nstatqid); + devdir(c, q, "stats", 0, eve, 0444, dp); + break; + case 3: + q.path = NETQID(NETID(c->qid.path), Ntypeqid); + devdir(c, q, "type", 0, eve, 0444, dp); + break; + case 4: + q.path = NETQID(NETID(c->qid.path), Nifstatqid); + devdir(c, q, "ifstats", 0, eve, 0444, dp); + break; + default: + return -1; + } + return 1; +} + +Walkqid* +netifwalk(Netif *nif, Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, (Dirtab *)nif, 0, netifgen); +} + +Chan* +netifopen(Netif *nif, Chan *c, int omode) +{ + int id; + Netfile *f; + + id = 0; + if(c->qid.type & QTDIR){ + if(omode != OREAD) + error(Eperm); + } else { + switch(NETTYPE(c->qid.path)){ + case Ndataqid: + case Nctlqid: + id = NETID(c->qid.path); + openfile(nif, id); + break; + case Ncloneqid: + id = openfile(nif, -1); + c->qid.path = NETQID(id, Nctlqid); + break; + default: + if(omode != OREAD) + error(Ebadarg); + } + switch(NETTYPE(c->qid.path)){ + case Ndataqid: + case Nctlqid: + f = nif->f[id]; + if(netown(f, up->user, omode&7) < 0){ + netifclose(nif, c); + error(Eperm); + } + break; + } + } + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + c->iounit = qiomaxatomic; + return c; +} + +long +netifread(Netif *nif, Chan *c, void *a, long n, vlong off) +{ + int i; + Netfile *f; + char *p, *op, *e; + long offset; + + if(c->qid.type & QTDIR) + return devdirread(c, a, n, (Dirtab*)nif, 0, netifgen); + + offset = off; + switch(NETTYPE(c->qid.path)){ + case Ndataqid: + f = nif->f[NETID(c->qid.path)]; + return qread(f->iq, a, n); + case Nctlqid: + return readnum(offset, a, n, NETID(c->qid.path), NUMSIZE); + case Nstatqid: + p = op = malloc(READSTR); + if(p == nil) + return 0; + e = p + READSTR; + p = seprint(p, e, "in: %llud\n", nif->inpackets); + p = seprint(p, e, "link: %d\n", nif->link); + p = seprint(p, e, "out: %llud\n", nif->outpackets); + p = seprint(p, e, "crc errs: %d\n", nif->crcs); + p = seprint(p, e, "overflows: %d\n", nif->overflows); + p = seprint(p, e, "input overflows: %d\n", nif->inoverflows); + p = seprint(p, e, "output overflows: %d\n", nif->outoverflows); + p = seprint(p, e, "loopback frames: %d\n", nif->loopbacks); + p = seprint(p, e, "framing errs: %d\n", nif->frames); + p = seprint(p, e, "buffer errs: %d\n", nif->buffs); + p = seprint(p, e, "output errs: %d\n", nif->oerrs); + p = seprint(p, e, "prom: %d\n", nif->prom); + p = seprint(p, e, "mbps: %d\n", nif->mbps); + p = seprint(p, e, "limit: %d\n", nif->limit); + p = seprint(p, e, "addr: "); + for(i = 0; i < nif->alen; i++) + p = seprint(p, e, "%2.2ux", nif->addr[i]); + p = seprint(p, e, "\n"); + seprint(p, e, "oq len: %d\n", qblen(nif->oq)); + n = readstr(offset, a, n, op); + free(op); + return n; + case N3statqid: + f = nif->f[NETID(c->qid.path)]; + p = op = malloc(READSTR); + if(p == nil) + return 0; + e = p + READSTR; + p = seprint(p, e, "in qlen: %ud\n", qblen(f->iq)); + seprint(p, e, "input overflows: %ud\n", f->inoverflows); + n = readstr(offset, a, n, op); + free(op); + return n; + case Naddrqid: + p = op = malloc(READSTR); + if(p == nil) + return 0; + e = p + READSTR; + for(i = 0; i < nif->alen; i++) + p = seprint(p, e, "%2.2ux", nif->addr[i]); + n = readstr(offset, a, n, op); + free(op); + return n; + case Ntypeqid: + f = nif->f[NETID(c->qid.path)]; + return readnum(offset, a, n, f->type, NUMSIZE); + case Nifstatqid: + return 0; + case Nmtuqid: + snprint(up->genbuf, sizeof(up->genbuf), "%11.ud %11.ud %11.ud\n", nif->minmtu, nif->mtu, nif->maxmtu); + return readstr(offset, a, n, up->genbuf); + case Nmaxmtuqid: + snprint(up->genbuf, sizeof(up->genbuf), "%d", nif->maxmtu); + return readstr(offset, a, n, up->genbuf); + } + error(Ebadarg); + return -1; /* not reached */ +} + +Block* +netifbread(Netif *nif, Chan *c, long n, vlong offset) +{ + Netfile *f; + Block *bp; + + if((c->qid.type & QTDIR) || NETTYPE(c->qid.path) != Ndataqid) + return devbread(c, n, offset); + + f = nif->f[NETID(c->qid.path)]; + if(f->fat){ + /* + * Frame at a time (fat) allows us to provide + * non-blocking performance with blocking semantics + * for consumers that know ahead of time data is + * contained within a single frame. Once empty, we + * get in line with other blocking reads and wait our + * turn. + */ + for(;;){ + if(bp = qget(f->iq)) + return bp; + if(waserror()) + return nil; + qsleep(f->iq); + poperror(); + } + } + return qbread(f->iq, n); +} + +/* + * make sure this type isn't already in use on this device + */ +static int +typeinuse(Netif *nif, int type) +{ + Netfile *f, **fp, **efp; + + if(type <= 0) + return 0; + + efp = &nif->f[nif->nfile]; + for(fp = nif->f; fp < efp; fp++){ + f = *fp; + if(f == 0) + continue; + if(f->type == type) + return 1; + } + return 0; +} + +/* + * the devxxx.c that calls us handles writing data, it knows best + */ +long +netifwrite(Netif *nif, Chan *c, void *a, long n) +{ + Netfile *f; + int type, onoff, mtu; + char *p, buf[64]; + uchar binaddr[Nmaxaddr]; + + if(NETTYPE(c->qid.path) != Nctlqid) + error(Eperm); + + if(n >= sizeof(buf)) + n = sizeof(buf)-1; + memmove(buf, a, n); + buf[n] = 0; + + if(waserror()){ + qunlock(nif); + nexterror(); + } + + qlock(nif); + f = nif->f[NETID(c->qid.path)]; + if((p = matchtoken(buf, "connect")) != 0){ + qclose(f->iq); + type = atoi(p); + if(typeinuse(nif, type)) + error(Einuse); + f->type = type; + if(f->type < 0) + nif->all++; + qreopen(f->iq); + } else if(matchtoken(buf, "promiscuous")){ + if(f->prom == 0){ + if(nif->prom == 0 && nif->promiscuous != nil) + nif->promiscuous(nif->arg, 1); + f->prom = 1; + nif->prom++; + } + } else if((p = matchtoken(buf, "scanbs")) != 0){ + /* scan for base stations */ + if(f->scan == 0){ + type = atoi(p); + if(type < 5) + type = 5; + if(nif->scanbs != nil) + nif->scanbs(nif->arg, type); + f->scan = type; + nif->scan++; + } + } else if((p = matchtoken(buf, "mtu")) != 0){ + mtu = atoi(p); + /* zero resets default. */ + if(mtu != 0) + if(mtu < nif->minmtu || mtu > nif->maxmtu) + error(Ebadarg); + if(nif->hwmtu) + nif->mtu = nif->hwmtu(nif->arg, mtu); + else + nif->mtu = mtu; + } else if(matchtoken(buf, "bridge")){ + f->bridge = 1; + } else if (matchtoken(buf, "vlan")){ + f->vlan = 1; + } else if(matchtoken(buf, "headersonly")){ + f->headersonly = 1; + } else if((p = matchtoken(buf, "addmulti")) != 0){ + if(parseaddr(binaddr, p, nif->alen) < 0) + error("bad address"); + p = netmulti(nif, f, binaddr, 1); + if(p) + error(p); + } else if((p = matchtoken(buf, "remmulti")) != 0){ + if(parseaddr(binaddr, p, nif->alen) < 0) + error("bad address"); + p = netmulti(nif, f, binaddr, 0); + if(p) + error(p); + } else if((p = matchtoken(buf, "fat")) != 0){ + if(*p == 0) + onoff = 1; + else + onoff = atoi(p); + f->fat = onoff; + } else + n = -1; + qunlock(nif); + poperror(); + return n; +} + +long +netifwstat(Netif *nif, Chan *c, uchar *db, long n) +{ + Dir *dir; + Netfile *f; + int l; + + f = nif->f[NETID(c->qid.path)]; + if(f == 0) + error(Enonexist); + + if(netown(f, up->user, OWRITE) < 0) + error(Eperm); + + dir = smalloc(sizeof(Dir)+n); + l = convM2D(db, n, &dir[0], (char*)&dir[1]); + if(l == 0){ + free(dir); + error(Eshortstat); + } + if(!emptystr(dir[0].uid)) + strncpy(f->owner, dir[0].uid, KNAMELEN); + if(dir[0].mode != ~0UL) + f->mode = dir[0].mode; + free(dir); + return l; +} + +long +netifstat(Netif *nif, Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, (Dirtab *)nif, 0, netifgen); +} + +void +netifclose(Netif *nif, Chan *c) +{ + Netfile *f; + int t; + Netaddr *ap; + + if((c->flag & COPEN) == 0) + return; + + t = NETTYPE(c->qid.path); + if(t != Ndataqid && t != Nctlqid) + return; + + f = nif->f[NETID(c->qid.path)]; + qlock(f); + if(--(f->inuse) == 0){ + if(f->prom){ + qlock(nif); + if(--(nif->prom) == 0 && nif->promiscuous != nil) + nif->promiscuous(nif->arg, 0); + qunlock(nif); + f->prom = 0; + } + if(f->scan){ + qlock(nif); + if(--(nif->scan) == 0 && nif->scanbs != nil) + nif->scanbs(nif->arg, 0); + qunlock(nif); + f->prom = 0; + f->scan = 0; + } + if(f->nmaddr){ + qlock(nif); + t = 0; + for(ap = nif->maddr; ap; ap = ap->next){ + if(f->maddr[t/8] & (1<<(t%8))) + netmulti(nif, f, ap->addr, 0); + } + qunlock(nif); + f->nmaddr = 0; + } + if(f->type < 0){ + qlock(nif); + --(nif->all); + qunlock(nif); + } + f->owner[0] = 0; + f->type = 0; + f->bridge = 0; + f->headersonly = 0; + qclose(f->iq); + } + qunlock(f); +} + +Lock netlock; + +static int +netown(Netfile *p, char *o, int omode) +{ + static int access[] = { 0400, 0200, 0600, 0100 }; + int mode; + int t; + + lock(&netlock); + if(*p->owner){ + if(strncmp(o, p->owner, KNAMELEN) == 0) /* User */ + mode = p->mode; + else if(strncmp(o, eve, KNAMELEN) == 0) /* Bootes is group */ + mode = p->mode<<3; + else + mode = p->mode<<6; /* Other */ + + t = access[omode&3]; + if((t & mode) == t){ + unlock(&netlock); + return 0; + } else { + unlock(&netlock); + return -1; + } + } + strncpy(p->owner, o, KNAMELEN); + p->mode = 0660; + unlock(&netlock); + return 0; +} + +/* + * Increment the reference count of a network device. + * If id < 0, return an unused ether device. + */ +static int +openfile(Netif *nif, int id) +{ + Netfile *f, **fp, **efp; + + if(id >= 0){ + f = nif->f[id]; + if(f == 0) + error(Enodev); + qlock(f); + qreopen(f->iq); + f->inuse++; + qunlock(f); + return id; + } + + qlock(nif); + if(waserror()){ + qunlock(nif); + nexterror(); + } + efp = &nif->f[nif->nfile]; + for(fp = nif->f; fp < efp; fp++){ + f = *fp; + if(f == 0){ + f = malloc(sizeof(Netfile)); + if(f == 0) + exhausted("memory"); + f->iq = qopen(nif->limit, Qmsg, 0, 0); + if(f->iq == nil){ + free(f); + exhausted("memory"); + } + qlock(f); + *fp = f; + } else { + qlock(f); + if(f->inuse){ + qunlock(f); + continue; + } + } + f->inuse = 1; + qreopen(f->iq); + netown(f, up->user, 0); + qunlock(f); + qunlock(nif); + poperror(); + return fp - nif->f; + } + error(Enodev); + return -1; /* not reached */ +} + +/* + * look for a token starting a string, + * return a pointer to first non-space char after it + */ +static char* +matchtoken(char *p, char *token) +{ + int n; + + n = strlen(token); + if(strncmp(p, token, n)) + return 0; + p += n; + if(*p == 0) + return p; + if(*p != ' ' && *p != '\t' && *p != '\n') + return 0; + while(*p == ' ' || *p == '\t' || *p == '\n') + p++; + return p; +} + +static ulong +hash(uchar *a, int len) +{ + ulong sum = 0; + + while(len-- > 0) + sum = (sum << 1) + *a++; + return sum%Nmhash; +} + +int +activemulti(Netif *nif, uchar *addr, int alen) +{ + Netaddr *hp; + + for(hp = nif->mhash[hash(addr, alen)]; hp; hp = hp->hnext) + if(memcmp(addr, hp->addr, alen) == 0){ + if(hp->ref) + return 1; + else + break; + } + return 0; +} + +static int +parseaddr(uchar *to, char *from, int alen) +{ + char nip[4]; + char *p; + int i; + + p = from; + for(i = 0; i < alen; i++){ + if(*p == 0) + return -1; + nip[0] = *p++; + if(*p == 0) + return -1; + nip[1] = *p++; + nip[2] = 0; + to[i] = strtoul(nip, 0, 16); + if(*p == ':') + p++; + } + return 0; +} + +/* + * keep track of multicast addresses + */ +static char* +netmulti(Netif *nif, Netfile *f, uchar *addr, int add) +{ + Netaddr **l, *ap; + int i; + ulong h; + + if(nif->multicast == nil) + return "interface does not support multicast"; + + l = &nif->maddr; + i = 0; + for(ap = *l; ap; ap = *l){ + if(memcmp(addr, ap->addr, nif->alen) == 0) + break; + i++; + l = &ap->next; + } + + if(add){ + if(ap == 0){ + *l = ap = smalloc(sizeof(*ap)); + memmove(ap->addr, addr, nif->alen); + ap->next = 0; + ap->ref = 1; + h = hash(addr, nif->alen); + ap->hnext = nif->mhash[h]; + nif->mhash[h] = ap; + } else { + ap->ref++; + } + if(ap->ref == 1){ + nif->nmaddr++; + nif->multicast(nif->arg, addr, 1); + } + if(i < 8*sizeof(f->maddr)){ + if((f->maddr[i/8] & (1<<(i%8))) == 0) + f->nmaddr++; + f->maddr[i/8] |= 1<<(i%8); + } + } else { + if(ap == 0 || ap->ref == 0) + return 0; + ap->ref--; + if(ap->ref == 0){ + nif->nmaddr--; + nif->multicast(nif->arg, addr, 0); + } + if(i < 8*sizeof(f->maddr)){ + if((f->maddr[i/8] & (1<<(i%8))) != 0) + f->nmaddr--; + f->maddr[i/8] &= ~(1<<(i%8)); + } + } + return 0; +} + +void +netifbypass(Netif *nif, Chan *c, void (*bypass)(void*, Block*), void *arg) +{ + Netfile *f; + + f = nif->f[NETID(c->qid.path)]; + qsetbypass(f->iq, bypass, arg); +} diff -Nru /sys/src/9k/port/netif.h /sys/src/9k/port/netif.h --- /sys/src/9k/port/netif.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/netif.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,134 @@ +typedef struct Netaddr Netaddr; +typedef struct Netfile Netfile; +typedef struct Netif Netif; + +enum +{ + Nmaxaddr= 64, + Nmhash= 31, + Niqlim= 128, + Noqlim= 4096, + + Ncloneqid= 1, + Naddrqid, + N2ndqid, + N3rdqid, + Ndataqid, + Nctlqid, + Nstatqid, + N3statqid, + Ntypeqid, + Nifstatqid, + Nmtuqid, + Nmaxmtuqid, +}; + +/* + * Macros to manage Qid's used for multiplexed devices + */ +#define NETTYPE(x) (((ulong)x)&0x1f) +#define NETID(x) ((((ulong)x))>>5) +#define NETQID(i,t) ((((ulong)i)<<5)|(t)) + +/* + * one per multiplexed connection + */ +struct Netfile +{ + QLock; + + int inuse; + ulong mode; + char owner[KNAMELEN]; + + int type; /* multiplexor type */ + int prom; /* promiscuous mode */ + int scan; /* base station scanning interval */ + int bridge; /* bridge mode */ + int vlan; /* treat 802.1Q frames as inner frame type*/ + int headersonly; /* headers only - no data */ + uchar maddr[8]; /* bitmask of multicast addresses requested */ + int nmaddr; /* number of multicast addresses */ + int fat; /* frame at a time */ + + uint inoverflows; /* software input overflows */ + + Queue* iq; /* input */ +}; + +/* + * a network address + */ +struct Netaddr +{ + Netaddr *next; /* allocation chain */ + Netaddr *hnext; + uchar addr[Nmaxaddr]; + int ref; +}; + +/* + * a network interface + */ +struct Netif +{ + QLock; + + int inited; + + /* multiplexing */ + char name[KNAMELEN]; /* for top level directory */ + int nfile; /* max number of Netfiles */ + Netfile **f; + + /* about net */ + int limit; /* flow control */ + int alen; /* address length */ + int mbps; /* megabits per sec */ + int link; /* link status */ + int minmtu; + int maxmtu; + int mtu; + uchar addr[Nmaxaddr]; + uchar bcast[Nmaxaddr]; + Netaddr *maddr; /* known multicast addresses */ + int nmaddr; /* number of known multicast addresses */ + Netaddr *mhash[Nmhash]; /* hash table of multicast addresses */ + int prom; /* number of promiscuous opens */ + int scan; /* number of base station scanners */ + int all; /* number of -1 multiplexors */ + + Queue* oq; /* output */ + + /* statistics */ + uint misses; + uvlong inpackets; + uvlong outpackets; + uint crcs; /* input crc errors */ + uint oerrs; /* output errors */ + uint frames; /* framing errors */ + uint overflows; /* packet overflows */ + uint buffs; /* buffering errors */ + uint inoverflows; /* software overflow on input */ + uint outoverflows; /* software overflow on output */ + uint loopbacks; /* loopback packets processed */ + + /* routines for touching the hardware */ + void *arg; + void (*promiscuous)(void*, int); + void (*multicast)(void*, uchar*, int); + int (*hwmtu)(void*, int); /* get/set mtu */ + void (*scanbs)(void*, uint); /* scan for base stations */ +}; + +void netifinit(Netif*, char*, int, ulong); +Walkqid* netifwalk(Netif*, Chan*, Chan*, char **, int); +Chan* netifopen(Netif*, Chan*, int); +void netifclose(Netif*, Chan*); +long netifread(Netif*, Chan*, void*, long, vlong); +Block* netifbread(Netif*, Chan*, long, vlong); +long netifwrite(Netif*, Chan*, void*, long); +long netifwstat(Netif*, Chan*, uchar*, long); +long netifstat(Netif*, Chan*, uchar*, long); +int activemulti(Netif*, uchar*, int); +void netifbypass(Netif*, Chan*, void (*)(void*, Block*), void*); diff -Nru /sys/src/9k/port/nocache.c /sys/src/9k/port/nocache.c --- /sys/src/9k/port/nocache.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/nocache.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,38 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +// No file caching + +void +cinit(void) +{ +} + +void +copen(Chan *c) +{ + USED(c); +} + +int +cread(Chan *c, uchar *buf, int len, vlong off) +{ + USED(c, buf, len, off); + return 0; +} + +void +cupdate(Chan *c, uchar *buf, int len, vlong off) +{ + USED(c, buf, len, off); +} + +void +cwrite(Chan* c, uchar *buf, int len, vlong off) +{ + USED(c, buf, len, off); +} diff -Nru /sys/src/9k/port/noedf.c /sys/src/9k/port/noedf.c --- /sys/src/9k/port/noedf.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/noedf.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,43 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +void +edfinit(Proc*) +{ +} + +char* +edfadmit(Proc*) +{ + return "edf scheduling not implemented"; +} + +int +edfready(Proc*) +{ + return 0; +} + +void +edfrecord(Proc*) +{ +} + +void +edfrun(Proc*, int) +{ +} + +void +edfstop(Proc*) +{ +} + +void +edfyield(void) +{ + yield(); +} diff -Nru /sys/src/9k/port/page.c /sys/src/9k/port/page.c --- /sys/src/9k/port/page.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/page.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,558 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "../port/error.h" + +Palloc palloc; + +static uint highwater; /* TO DO */ + +static void pageblanks(usize, int); +static Page* blankpage(uint); + +void +pageinit(void) +{ + uintmem avail; + uvlong pkb, kkb, kmkb, mkb; + + avail = sys->pmpaged; /* could include a portion of unassigned memory */ + palloc.user = avail/PGSZ; /* fairly arbitrary: mainly sets highwater */ +print("pmoccupied: %lld unassigned: %lld\n", sys->pmoccupied, sys->pmunassigned); + +print("user=%#lud\n", palloc.user); + /* keep 4% reserve for copy-on-write, but cap it */ + highwater = (palloc.user*4)/100; + if(highwater > 16*MB/PGSZ) + highwater = 16*MB/PGSZ; + + /* user, kernel, kernel malloc area, memory */ + pkb = palloc.user*PGSZ/KiB; + kkb = ROUNDUP((uintptr)end - KTZERO, PGSZ)/KiB; + kmkb = ROUNDDN(sys->vmunmapped - (uintptr)end, PGSZ)/KiB; + mkb = sys->pmoccupied/KiB; + + print("%lldM memory: %lldK+%lldM kernel," + " %lldM user, %lldM uncommitted\n", + mkb/KiB, kkb, kmkb/KiB, pkb/KiB, (mkb-kkb-kmkb-pkb)/KiB + ); +} + +/* + * allocate and return a new page for page set s; + * return nil iff s was locked on entry and had to be unlocked to wait for memory. + */ +Page* +newpage(int clear, uint lg2pgsize, RWlock *locked) +{ + Page *p; + KMap *k; + Pallocpg *pg; + int hw, dontalloc; + uintmem pa; + + pg = &palloc.avail[lg2pgsize]; + lock(pg); + hw = highwater >> (lg2pgsize-PGSHFT); + if(up == nil || up->kp || locked != nil && !clear) + hw = 0; + for(;;){ + if(pg->freecount > hw) + break; + DBG("freec %lud hw %ud\n", pg->freecount, hw); + + /* try allocating a suitable page */ + pa = physalloc(1<blank; + if(p != nil){ + pg->blank = p->next; + p->next = nil; + }else + p = blankpage(lg2pgsize); + p->pa = pa; + p->ref = 1; + p->mdom = 0; /* TO DO */ + pg->count++; + unlock(pg); + goto Clear; + } + + unlock(pg); + dontalloc = 0; + if(locked != nil) { + runlock(locked); + locked = nil; + dontalloc = 1; + } + qlock(&pg->pwait); /* Hold memory requesters here */ + + if(!waserror()){ + tsleep(&pg->r, ispages, pg, 300); + poperror(); + } + + if(!ispages(pg) && up->procctl != Proc_exitbig){ + print("out of physical memory %dK\n", 1<pwait); + + /* + * If called from fault and we lost the segment from + * underneath don't waste time allocating and freeing + * a page. Fault will call newpage again when it has + * reacquired the segment locks + */ + if(dontalloc) + return nil; + + lock(pg); + } + + p = pg->head; + pg->head = p->next; + pg->freecount--; + + if(p->ref != 0) + panic("newpage: %#p: p->ref %d != 0", p, p->ref); + + p->ref = 1; + mmucachectl(p, PG_NOFLUSH); + unlock(pg); + +Clear: + if(clear) { + k = kmap(p); + memset(VA(k), 0, pagesize(p)); + kunmap(k); + } + + return p; +} + +int +physmemavail(uintptr) +{ + return 1; /* palloc.freecount > highwater+need/BY2PG+1 */ +} + +int +ispages(void *a) +{ + return ((Pallocpg*)a)->freecount > highwater; +} + +void +putpage(Page *p) +{ + Pallocpg *pg; + + if(decref(p) != 0) + return; + + pg = &palloc.avail[p->lg2size]; + lock(pg); + + p->next = pg->head; + pg->head = p; + pg->freecount++; + + if(pg->r.p != 0) + wakeup(&pg->r); + + unlock(pg); +} + +void +copypage(Page *f, Page *t) +{ + KMap *ks, *kd; + + if(f->lg2size != t->lg2size) + panic("copypage"); + ks = kmap(f); + kd = kmap(t); + memmove(VA(kd), VA(ks), pagesize(t)); + kunmap(ks); + kunmap(kd); +} + +Pte* +ptecpy(Pte *old) +{ + Pte *new; + Page **src, **dst, *pg; + + new = ptealloc(); + dst = &new->pages[old->first-old->pages]; + new->first = dst; + for(src = old->first; src <= old->last; src++, dst++){ + if((pg = *src) != nil){ + incref(pg); + new->last = dst; + *dst = pg; + } + } + return new; +} + +Pte* +ptealloc(void) +{ + Pte *new; + + new = smalloc(sizeof(Pte)); + new->first = &new->pages[PTEPERTAB]; + new->last = new->pages; + return new; +} + +void +freepte(void (*fn)(Page*), Pte *p) +{ + Page **pg, **ptop; + + if(fn != nil){ + ptop = &p->pages[PTEPERTAB]; + for(pg = p->pages; pg < ptop; pg++) { + if(*pg == 0) + continue; + (*fn)(*pg); + *pg = 0; + } + }else{ + for(pg = p->first; pg <= p->last; pg++) + if(*pg) { + putpage(*pg); + *pg = 0; + } + } + free(p); +} + +void +pteflush(Pte *pte, int s, int e) +{ + int i; + Page *p; + + for(i = s; i < e; i++) { + p = pte->pages[i]; + if(p != nil) + mmucachectl(p, PG_TXTFLUSH); + } +} + +static void +pageblanks(usize n, int lg2size) +{ + Pallocpg *pg; + Page *p, *pages; + int j; + + pages = malloc(n*sizeof(Page)); + if(pages == 0) + panic("pageblanks"); + + pg = &palloc.avail[lg2size]; + p = pages; + for(j=0; jnext = nil; + p->pa = 0; + p->lg2size = lg2size; + p->mdom = 0; /* TO DO */ + p->next = pg->blank; + pg->blank = p; + p++; + } +} + +static Page* +blankpage(uint lg) +{ + Pallocpg *pg; + Page *p; + + pg = &palloc.avail[lg]; + while((p = pg->blank) == nil) + pageblanks(256, lg); + pg->blank = p->next; + p->next = nil; + p->pa = 0; + p->mdom = 0; /* TO DO */ + p->lg2size = lg; + return p; +} + +char* +seprintpagestats(char *s, char *e) +{ + Pallocpg *pg; + int i; + + for(i = 0; i < nelem(palloc.avail); i++){ + pg = &palloc.avail[i]; + lock(pg); + if(pg->freecount != 0) + s = seprint(s, e, "%lud/%lud %dK user pages avail\n", + pg->freecount, + pg->count, (1<r.p != nil) + wakeup(&pg->r); + } +} + +/* + * return the Page containing the given offset in Page set s, + * which must be locked or unchanging; + * returns nil if page is not allocated. + */ +Page* +segoff2page(Pages *s, uintptr soff) +{ + Pte *pte; + + if(soff >= s->xsize) + return nil; + pte = s->map[soff/s->ptemapmem]; + if(pte == nil) + return nil; + return pte->pages[(soff&(s->ptemapmem-1))>>s->lg2pgsize]; +} + +Page* +segva2page(Segment *s, uintptr va) +{ + if(!(va >= s->base && va <= s->top-1)) + return nil; + return segoff2page(s->pages, va - s->base); +} + +/* + * allocate a new set of Pages + */ +Pages* +newpages(int lg2pgsize, uintptr size, void (*freepage)(Page*)) +{ + Pages *ps; + int mapsize, npages; + + if(size & ((1<>lg2pgsize; + if(npages > (SEGMAPSIZE*PTEPERTAB)) + return nil; + + mapsize = HOWMANY(npages, PTEPERTAB); + ps = smalloc(sizeof(*ps) + mapsize*sizeof(Pte*)); +// if(waserror()){ +// free(ps); +// nexterror(); +// } + + ps->mapsize = mapsize; + ps->xsize = size; + ps->npages = npages; + ps->lg2pgsize = lg2pgsize; + ps->ptemapmem = PTEPERTAB<lg2pgsize; + ps->freepage = freepage; + +// poperror(); + return ps; +} + +/* + * caller must hold lock on structure that owns s + */ +void +duppages(Pages *n, Pages *s) +{ + uint size; + Pte *pte; + int i; + + size = s->mapsize; + for(i = 0; i < size; i++) + if((pte = s->map[i]) != nil) + n->map[i] = ptecpy(pte); +} + +void +freepages(Pages *ps) +{ + Pte **pp, **emap; + + emap = &ps->map[ps->mapsize]; + for(pp = ps->map; pp < emap; pp++) + if(*pp) + freepte(ps->freepage, *pp); + free(ps); +} + +void +addpage(Pages *ps, uintptr soff, Page *p) +{ + Pte **pte; + Page **pg; + + /* no lock, since this is called only during initialisation */ + + if(soff >= ps->npages) + panic("addpage"); + pte = &ps->map[soff/ps->ptemapmem]; + if(*pte == 0) + *pte = ptealloc(); + + pg = &(*pte)->pages[(soff&(ps->ptemapmem-1))>>ps->lg2pgsize]; + *pg = p; + if(pg < (*pte)->first) + (*pte)->first = pg; + if(pg > (*pte)->last) + (*pte)->last = pg; +} + +/* + * free a range of pages in a page set, and return the list, + * for re-use, or to be freed after synchronisation. + * any locks needed are in the structure that refers to the page set, + * and that structure is also responsible for synchronising MMUs. + */ +Page* +mfreepages(Pages *ps, uintptr soff, usize pages) +{ + int i, j, size; + Page *pg; + Page *list; + + j = (soff&(ps->ptemapmem-1))>>ps->lg2pgsize; + + size = ps->mapsize; + list = nil; + for(i = soff/ps->ptemapmem; i < size; i++) { + if(pages <= 0) + break; + if(ps->map[i] == 0) { + pages -= PTEPERTAB-j; + j = 0; + continue; + } + while(j < PTEPERTAB) { + pg = ps->map[i]->pages[j]; + if(pg != nil){ + pg->next = list; + list = pg; + ps->map[i]->pages[j] = nil; + } + if(--pages == 0) + return list; + j++; + } + j = 0; + } + return list; +} + +void +freepagelist(Page *list) +{ + Page *pg; + + for(pg = list; pg != nil; pg = list){ + list = list->next; + putpage(pg); + } +} + +/* + * caller must mmuflush + */ +void +pagesflush(Pages *ps, uintptr soff, uintptr len) +{ + Pte *pte; + usize chunk, l, sp, ep; + uintptr pgsize; + + pgsize = 1<lg2pgsize; + l = len >> ps->lg2pgsize; + while(l != 0){ + pte = ps->map[soff/ps->ptemapmem]; + sp = soff & (ps->ptemapmem-1); + ep = ps->ptemapmem; + if(sp-ep > l){ + ep = sp + l; + ep = (ep+pgsize-1)&~(pgsize-1); + } + if(sp == ep) + error(Ebadarg); + + if(pte) + pteflush(pte, sp/pgsize, ep/pgsize); + + chunk = ep-sp; + len -= chunk; + soff += chunk; + } +} + +void +relocatepages(Pages *s, uintptr offset) +{ + /* TO DO: remove */ + USED(s); + USED(offset); +} + +Pages* +growpages(Pages *ps, uintptr newsize) +{ + Pages *ns; + + DBG("growpages %#p -> %#p\n", ps->xsize, newsize); + ns = newpages(ps->lg2pgsize, newsize, ps->freepage); + if(ns == nil) + return nil; + memmove(ns->map, ps->map, ps->mapsize*sizeof(Pte*)); + free(ps); + return ns; +} + +void +printpages(Pages *ps) +{ + int i; + Pte *pte; + Page **pg; + + print("pid %d pages %#p xsize %#p npages %ld mapsize %d\n", + up->pid, ps, ps->xsize, ps->npages, ps->mapsize); + for(i = 0; i < ps->mapsize; i++){ + pte = ps->map[i]; + if(pte != nil){ + print("%d: %#p\n", i, pte); + for(pg = pte->first; pg <= pte->last; pg++) + if(*pg) + print("%#p %#p [%d] %#P\n", (uintptr)(pg - pte->pages), *pg, (*pg)->ref, (*pg)->pa); + } + } +} diff -Nru /sys/src/9k/port/parse.c /sys/src/9k/port/parse.c --- /sys/src/9k/port/parse.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/parse.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,114 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +/* + * Generous estimate of number of fields, including terminal nil pointer + */ +static int +ncmdfield(char *p, int n) +{ + int white, nwhite; + char *ep; + int nf; + + if(p == nil) + return 1; + + nf = 0; + ep = p+n; + white = 1; /* first text will start field */ + while(p < ep){ + nwhite = (strchr(" \t\r\n", *p++ & 0xFF) != 0); /* UTF is irrelevant */ + if(white && !nwhite) /* beginning of field */ + nf++; + white = nwhite; + } + return nf+1; /* +1 for nil */ +} + +/* + * parse a command written to a device + */ +Cmdbuf* +parsecmd(char *p, int n) +{ + Cmdbuf *volatile cb; + int nf; + char *sp; + + nf = ncmdfield(p, n); + + /* allocate Cmdbuf plus string pointers plus copy of string including \0 */ + sp = smalloc(sizeof(*cb) + nf * sizeof(char*) + n + 1); + cb = (Cmdbuf*)sp; + cb->f = (char**)(&cb[1]); + cb->buf = (char*)(&cb->f[nf]); + + if(up!=nil && waserror()){ + free(cb); + nexterror(); + } + memmove(cb->buf, p, n); + if(up != nil) + poperror(); + + /* dump new line and null terminate */ + if(n > 0 && cb->buf[n-1] == '\n') + n--; + cb->buf[n] = '\0'; + + cb->nf = tokenize(cb->buf, cb->f, nf-1); + cb->f[cb->nf] = nil; + + return cb; +} + +/* + * Reconstruct original message, for error diagnostic + */ +void +cmderror(Cmdbuf *cb, char *s) +{ + int i; + char *p, *e; + + p = up->genbuf; + e = p+ERRMAX-10; + p = seprint(p, e, "%s \"", s); + for(i=0; inf; i++){ + if(i > 0) + p = seprint(p, e, " "); + p = seprint(p, e, "%q", cb->f[i]); + } + strcpy(p, "\""); + error(up->genbuf); +} + +/* + * Look up entry in table + */ +Cmdtab* +lookupcmd(Cmdbuf *cb, Cmdtab *ctab, int nctab) +{ + int i; + Cmdtab *ct; + + if(cb->nf == 0) + error("empty control message"); + + for(ct = ctab, i=0; icmd, "*") !=0) /* wildcard always matches */ + if(strcmp(ct->cmd, cb->f[0]) != 0) + continue; + if(ct->narg != 0 && ct->narg != cb->nf) + cmderror(cb, Ecmdargs); + return ct; + } + + cmderror(cb, "unknown control message"); + return nil; +} diff -Nru /sys/src/9k/port/pgrp.c /sys/src/9k/port/pgrp.c --- /sys/src/9k/port/pgrp.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/pgrp.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,332 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +enum { + Whinesecs = 10, /* frequency of out-of-resources printing */ +}; + +static Ref pgrpid; +static struct{ + Lock; + uint ref; +} mountid; + +void +pgrpnote(ulong noteid, char *a, long n, int flag) +{ + int i; + Proc *p; + char buf[ERRMAX]; + + if(n >= ERRMAX-1) + error(Etoobig); + + memmove(buf, a, n); + buf[n] = 0; + for(i = 0; (p = psincref(i)) != nil; i++){ + if(p == up || p->state == Dead || p->noteid != noteid || p->kp){ + psdecref(p); + continue; + } + qlock(&p->debug); + if(p->pid == 0 || p->noteid != noteid){ + qunlock(&p->debug); + psdecref(p); + continue; + } + if(!waserror()) { + postnote(p, 0, buf, flag); + poperror(); + } + qunlock(&p->debug); + psdecref(p); + } +} + +Pgrp* +newpgrp(void) +{ + Pgrp *p; + + p = smalloc(sizeof(Pgrp)); + p->ref = 1; + p->pgrpid = incref(&pgrpid); + return p; +} + +Rgrp* +newrgrp(void) +{ + Rgrp *r; + + r = smalloc(sizeof(Rgrp)); + r->ref = 1; + return r; +} + +void +closergrp(Rgrp *r) +{ + if(decref(r) == 0) + free(r); +} + +void +closepgrp(Pgrp *p) +{ + Mhead **h, **e, *f, *next; + + if(decref(p) != 0) + return; + + qlock(&p->debug); + wlock(&p->ns); + p->pgrpid = -1; + + e = &p->mnthash[MNTHASH]; + for(h = p->mnthash; h < e; h++) { + for(f = *h; f; f = next) { + wlock(&f->lock); + cclose(f->from); + mountfree(f->mount); + f->mount = nil; + next = f->hash; + wunlock(&f->lock); + putmhead(f); + } + } + wunlock(&p->ns); + qunlock(&p->debug); + free(p); +} + +void +pgrpinsert(Mount **order, Mount *mount) +{ + Mount *f; + + mount->order = 0; + if(*order == 0) { + *order = mount; + return; + } + for(f = *order; f; f = f->order) { + if(mount->mountid < f->mountid) { + mount->order = f; + *order = mount; + return; + } + order = &f->order; + } + *order = mount; +} + +/* + * pgrpcpy MUST preserve the mountid allocation order of the parent group + */ +void +pgrpcpy(Pgrp *to, Pgrp *from) +{ + int i; + Mount *n, *mount, **link, *order; + Mhead *f, **tom, **l, *mh; + + wlock(&from->ns); + order = 0; + tom = to->mnthash; + for(i = 0; i < MNTHASH; i++) { + l = tom++; + for(f = from->mnthash[i]; f; f = f->hash) { + rlock(&f->lock); + mh = newmhead(f->from); + *l = mh; + l = &mh->hash; + link = &mh->mount; + for(mount = f->mount; mount != nil; mount = mount->next) { + n = newmount(mh, mount->to, mount->mflag, mount->spec); + mount->copy = n; + pgrpinsert(&order, mount); + *link = n; + link = &n->next; + } + runlock(&f->lock); + } + } + /* + * Allocate mount ids in the same sequence as the parent group + */ + lock(&mountid); + for(mount = order; mount != nil; mount = mount->order) + mount->copy->mountid = mountid.ref++; + unlock(&mountid); + wunlock(&from->ns); +} + +Fgrp* +dupfgrp(Fgrp *f) +{ + Fgrp *new; + Chan *c; + int i; + + new = smalloc(sizeof(Fgrp)); + if(f == nil){ + new->fd = smalloc(DELTAFD*sizeof(Chan*)); + new->nfd = DELTAFD; + new->ref = 1; + return new; + } + + lock(f); + /* Make new fd list shorter if possible, preserving quantization */ + new->nfd = f->maxfd+1; + i = new->nfd%DELTAFD; + if(i != 0) + new->nfd += DELTAFD - i; + new->fd = malloc(new->nfd*sizeof(Chan*)); + if(new->fd == nil){ + unlock(f); + free(new); + error("no memory for fgrp"); + } + new->ref = 1; + + new->maxfd = f->maxfd; + for(i = 0; i <= f->maxfd; i++) { + if(c = f->fd[i]){ + incref(c); + new->fd[i] = c; + } + } + unlock(f); + + return new; +} + +void +closefgrp(Fgrp *f) +{ + int i; + Chan *c; + + if(f == 0) + return; + + if(decref(f) != 0) + return; + + /* + * If we get into trouble, forceclosefgrp + * will bail us out. + */ + up->closingfgrp = f; + for(i = 0; i <= f->maxfd; i++){ + if(c = f->fd[i]){ + f->fd[i] = nil; + cclose(c); + } + } + up->closingfgrp = nil; + + free(f->fd); + free(f); +} + +/* + * Called from sleep because up is in the middle + * of closefgrp and just got a kill ctl message. + * This usually means that up has wedged because + * of some kind of deadly embrace with mntclose + * trying to talk to itself. To break free, hand the + * unclosed channels to the close queue. Once they + * are finished, the blocked cclose that we've + * interrupted will finish by itself. + */ +void +forceclosefgrp(void) +{ + int i; + Chan *c; + Fgrp *f; + + if(up->procctl != Proc_exitme || up->closingfgrp == nil){ + print("bad forceclosefgrp call"); + return; + } + + f = up->closingfgrp; + for(i = 0; i <= f->maxfd; i++){ + if(c = f->fd[i]){ + f->fd[i] = nil; + ccloseq(c); + } + } +} + +Mount* +newmount(Mhead *mh, Chan *to, int flag, char *spec) +{ + Mount *mount; + + mount = smalloc(sizeof(Mount)); + mount->to = to; + mount->head = mh; + incref(to); + lock(&mountid); + mount->mountid = mountid.ref++; + unlock(&mountid); + mount->mflag = flag; + if(spec != 0) + kstrdup(&mount->spec, spec); + + return mount; +} + +void +mountfree(Mount *mount) +{ + Mount *f; + + while(mount != nil) { + f = mount->next; + cclose(mount->to); + mount->mountid = 0; + free(mount->spec); + free(mount); + mount = f; + } +} + +void +resrcwait(char *reason, char *pstag) +{ + ulong now; + char *p; + static ulong lastwhine; + + if(up == 0) + panic("resrcwait"); + + p = up->psstate; + if(waserror()){ + up->psstate = p; + nexterror(); + } + if(reason) { + up->psstate = pstag; + now = seconds(); + /* don't tie up the console with complaints */ + if(now - lastwhine > Whinesecs) { + lastwhine = now; + print("%s\n", reason); + } + } + + tsleep(&up->sleep, return0, 0, 300); + poperror(); + up->psstate = p; +} diff -Nru /sys/src/9k/port/physalloc.c /sys/src/9k/port/physalloc.c --- /sys/src/9k/port/physalloc.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/physalloc.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,145 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +/* + * TO DO: + * big pool -> small pool + */ + +enum{ + MinK= PGSHFT, + MidK= 21, + MaxK= 30, /* last usable k (largest block is 2^k) */ +}; + +static Bpool* physgig; +static Bpool* phys; + +static void* +alloc0(usize nb, int clr) +{ + void *p; + + p = basealloc(nb, 0, nil); + if(clr && p != nil) + memset(p, 0, nb); + return p; +} + +uintmem +physalloc(usize size) +{ + uintmem a; + + if(physgig != nil && size >= 1ull<= 4ull*GiB) + bpoolfree(physgig, a, size); + else + bpoolfree(phys, a, size); +} + +void +physallocrange(usize *low, usize *high) +{ + bpoolallocrange(phys, low, high); +} + +void +physinitfree(uintmem base, uintmem lim) +{ + uintmem a, lo, hi; + + base = ROUNDUP(base, (1<= 4ull*GiB || lim > 4ull*GiB)){ + a = base; + if(a < 4ull*GiB) + a = 4ull*GiB; + lo = ROUNDUP(a, (1<next) + top = e->addr + e->size; + if(top > 4ull*GiB){ + physgig = bpoolcreate(MidK, MaxK, 4ull*GiB, top, alloc0); + phys = bpoolcreate(MinK, MaxK, 0, 4ull*GiB, alloc0); + }else + phys = bpoolcreate(MinK, MaxK, 0, top, alloc0); + pa = mmuphysaddr(sys->vmstart) + sys->pmunassigned; + if(DBGFLG) + rmapprint(&rmapram); + DBG("pa lim: %#llux top %#llux\n", pa, top); + while(rmapfirst(&rmapram, pa, &base, &size)){ + if(base >= 4ull*GiB) + break; + lim = base+size; + if(lim > 4ull*GiB) + lim = 4ull*GiB; + lo = ROUNDUP(base, (1<pmpaged += hi - lo; + } + } + if(DBGFLG) + physdump(); +} + +void +physdump(void) +{ + print("bpooldump phys: "); + bpooldump(phys); + if(physgig != nil){ + print("bpooldump physgig: "); + bpooldump(physgig); + } +} diff -Nru /sys/src/9k/port/portclock.c /sys/src/9k/port/portclock.c --- /sys/src/9k/port/portclock.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/portclock.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,284 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "ureg.h" + +struct Timers +{ + Lock; + Timer *head; +}; + +static Timers timers[MACHMAX]; + + +static vlong +tadd(Timers *tt, Timer *nt) +{ + vlong when; + Timer *t, **last; + + /* Called with tt locked */ + assert(nt->tt == nil); + switch(nt->tmode){ + default: + panic("timer"); + break; + case Trelative: + if(nt->tns <= 0) + nt->tns = 1; + nt->twhen = fastticks(nil) + ns2fastticks(nt->tns); + break; + case Tperiodic: + /* + * Periodic timers must have a period of at least 100µs. + */ + assert(nt->tns >= 100000); + if(nt->twhen == 0){ + /* + * Look for another timer at the + * same frequency for combining. + */ + for(t = tt->head; t; t = t->tnext){ + if(t->tmode == Tperiodic && t->tns == nt->tns) + break; + } + if(t) + nt->twhen = t->twhen; + else + nt->twhen = fastticks(nil); + } + + /* + * The new time must be in the future. + * ns2fastticks() can return 0 if the tod clock + * has been adjusted by, e.g. timesync. + */ + when = ns2fastticks(nt->tns); + if(when == 0) + when = 1; + nt->twhen += when; + break; + } + + for(last = &tt->head; t = *last; last = &t->tnext){ + if(t->twhen > nt->twhen) + break; + } + nt->tnext = *last; + *last = nt; + nt->tt = tt; + if(last == &tt->head) + return nt->twhen; + return 0; +} + +static vlong +tdel(Timer *dt) +{ + Timer *t, **last; + Timers *tt; + + tt = dt->tt; + if(tt == nil) + return 0; + for(last = &tt->head; t = *last; last = &t->tnext){ + if(t == dt){ + assert(dt->tt); + dt->tt = nil; + *last = t->tnext; + break; + } + } + if(last == &tt->head && tt->head) + return tt->head->twhen; + return 0; +} + +/* add or modify a timer */ +void +timeradd(Timer *nt) +{ + Timers *tt; + vlong when; + + /* Must lock Timer struct before Timers struct */ + ilock(nt); + if(tt = nt->tt){ + ilock(tt); + tdel(nt); + iunlock(tt); + } + tt = &timers[m->machno]; + ilock(tt); + when = tadd(tt, nt); + if(when) + timerset(when); + iunlock(tt); + iunlock(nt); +} + + +void +timerdel(Timer *dt) +{ + Timers *tt; + vlong when; + + ilock(dt); + if(tt = dt->tt){ + ilock(tt); + when = tdel(dt); + if(when && tt == &timers[m->machno]) + timerset(tt->head->twhen); + iunlock(tt); + } + iunlock(dt); +} + +void +hzclock(Ureg *ur) +{ + uintptr pc; + + m->ticks++; + if(m->machno == 0) + sys->ticks = m->ticks; + + pc = userpc(ur); + if(m->proc) + m->proc->pc = pc; + + accounttime(); + kmapinval(); + + if(kproftimer != nil) + kproftimer(pc); + + if(!m->online) + return; + + if(active.exiting) { + iprint("someone's exiting\n"); + exit(0); + } + + if(m->machno == 0) + checkalarms(); + + if(up && up->state == Running) + hzsched(); /* in proc.c */ +} + +void +timerintr(Ureg *u, void*) +{ + Timer *t; + Timers *tt; + vlong when, now; + int callhzclock; + + callhzclock = 0; + tt = &timers[m->machno]; + now = fastticks(nil); + ilock(tt); + while(t = tt->head){ + /* + * No need to ilock t here: any manipulation of t + * requires tdel(t) and this must be done with a + * lock to tt held. We have tt, so the tdel will + * wait until we're done + */ + when = t->twhen; + if(when > now){ + timerset(when); + iunlock(tt); + if(callhzclock) + hzclock(u); + return; + } + tt->head = t->tnext; +if(t->tt != tt)print("t=%#p t->tt=%#p tt=%#p\n", t, t->tt, tt); + assert(t->tt == tt); + t->tt = nil; + iunlock(tt); + if(t->tf) + (*t->tf)(u, t); + else + callhzclock++; + ilock(tt); + if(t->tmode == Tperiodic) + tadd(tt, t); + } + iunlock(tt); +} + +void +timersinit(void) +{ + Timer *t; + + /* + * T->tf == nil means the HZ clock for this processor. + */ + todinit(); + t = malloc(sizeof(*t)); + t->tmode = Tperiodic; + t->tt = nil; + t->tns = 1000000000/HZ; + t->tf = nil; + timeradd(t); +} + +Timer* +addclock0link(void (*f)(void), int ms) +{ + Timer *nt; + vlong when; + + /* Synchronize to hztimer if ms is 0 */ + nt = malloc(sizeof(Timer)); + if(ms == 0) + ms = 1000/HZ; + nt->tns = (vlong)ms*1000000LL; + nt->tmode = Tperiodic; + nt->tt = nil; + nt->tf = (void (*)(Ureg*, Timer*))f; + + ilock(&timers[0]); + when = tadd(&timers[0], nt); + if(when) + timerset(when); + iunlock(&timers[0]); + return nt; +} + +/* + * This tk2ms avoids overflows that the macro version is prone to. + * It is a LOT slower so shouldn't be used if you're just converting + * a delta. + */ +ulong +tk2ms(ulong ticks) +{ + uvlong t, hz; + + t = ticks; + hz = HZ; + t *= 1000L; + t = t/hz; + ticks = t; + return ticks; +} + +ulong +ms2tk(ulong ms) +{ + /* avoid overflows at the cost of precision */ + if(ms >= 1000000000/HZ) + return (ms/1000)*HZ; + return (ms*HZ+500)/1000; +} diff -Nru /sys/src/9k/port/portdat.h /sys/src/9k/port/portdat.h --- /sys/src/9k/port/portdat.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/portdat.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1027 @@ +typedef struct Alarms Alarms; +typedef struct Block Block; +typedef struct Bpool Bpool; +typedef struct Chan Chan; +typedef struct Cmdbuf Cmdbuf; +typedef struct Cmdtab Cmdtab; +typedef struct Dev Dev; +typedef struct DevConf DevConf; +typedef struct Dirtab Dirtab; +typedef struct Edf Edf; +typedef struct Egrp Egrp; +typedef struct Evalue Evalue; +typedef struct Fgrp Fgrp; +typedef struct Image Image; +typedef struct LockEntry LockEntry; +typedef struct Log Log; +typedef struct Logflag Logflag; +typedef struct Mntcache Mntcache; +typedef struct Mount Mount; +typedef struct Mntrpc Mntrpc; +typedef struct Mntwalk Mntwalk; +typedef struct Mnt Mnt; +typedef struct Mhead Mhead; +typedef struct Next Next; +typedef struct Note Note; +typedef struct Page Page; +typedef struct Pages Pages; +typedef struct Path Path; +typedef struct Palloc Palloc; +typedef struct Pallocmem Pallocmem; +typedef struct Pallocpg Pallocpg; +typedef struct Perf Perf; +typedef struct PhysUart PhysUart; +typedef struct Pgrp Pgrp; +typedef struct Physseg Physseg; +typedef struct Proc Proc; +typedef struct Procalloc Procalloc; +typedef struct Profbuf Profbuf; +typedef struct Pte Pte; +typedef struct QLock QLock; +typedef struct Queue Queue; +typedef struct Ref Ref; +typedef struct Rendez Rendez; +typedef struct Rgrp Rgrp; +typedef struct RMap RMap; +typedef struct RMapel RMapel; +typedef struct RWlock RWlock; +typedef struct Schedq Schedq; +typedef struct Section Section; +typedef struct Segment Segment; +typedef struct Sema Sema; +typedef struct Timer Timer; +typedef struct Timers Timers; +typedef struct Uart Uart; +typedef struct Waitq Waitq; +typedef struct Walkqid Walkqid; +typedef struct Watchdog Watchdog; +typedef int Devgen(Chan*, char*, Dirtab*, int, int, Dir*); + +#pragma incomplete Bpool +#pragma incomplete DevConf +#pragma incomplete Edf +#pragma incomplete Mntcache +#pragma incomplete Mntrpc +#pragma incomplete Queue +#pragma incomplete Timers + +#include + +struct Ref +{ + int ref; +}; + +struct LockEntry +{ + LockEntry* next; + int locked; + Lock* used; + int isilock; + Mpl sr; + + /* for debugging */ + uintptr pc; + Proc* p; + Mach* m; +}; + +struct Rendez +{ + Lock; + Proc *p; +}; + +struct QLock +{ + Lock use; /* to access Qlock structure */ + Proc *head; /* next process waiting for object */ + Proc *tail; /* last process waiting for object */ + int locked; /* flag */ + uintptr qpc; /* pc of the holder */ +}; + +struct RWlock +{ + Lock use; + Proc *head; /* list of waiting processes */ + Proc *tail; + uintptr wpc; /* pc of writer */ + Proc *wproc; /* writing proc */ + int readers; /* number of readers */ + int writer; /* number of writers */ +}; + +struct RMapel +{ + uintmem size; + uintmem addr; + RMapel* next; +}; + +struct RMap +{ + char* name; + + RMapel* (*alloc)(void); + RMapel* map; + RMapel* free; + Lock; +}; + +struct Alarms +{ + QLock; + Proc *head; +}; + +/* + * Access types in namec & channel flags + */ +enum +{ + Aaccess, /* as in stat, wstat */ + Abind, /* for left-hand-side of bind */ + Atodir, /* as in chdir */ + Aopen, /* for i/o */ + Amount, /* to be mounted or mounted upon */ + Acreate, /* is to be created */ + Aremove, /* will be removed by caller */ + + COPEN = 0x0001, /* for i/o */ + CMSG = 0x0002, /* the message channel for a mount */ +/*rsc CCREATE = 0x0004, /* permits creation if c->mnt */ + CCEXEC = 0x0008, /* close on exec */ + CFREE = 0x0010, /* not in use */ + CRCLOSE = 0x0020, /* remove on close */ + CCACHE = 0x0080, /* client cache */ +}; + +/* flag values */ +enum +{ + BINTR = (1<<0), + + Bipck = (1<<2), /* ip checksum */ + Budpck = (1<<3), /* udp checksum */ + Btcpck = (1<<4), /* tcp checksum */ + Bpktck = (1<<5), /* packet checksum */ +}; + +struct Block +{ + Block* next; + Block* list; + uchar* rp; /* first unconsumed byte */ + uchar* wp; /* first empty byte */ + uchar* lim; /* 1 past the end of the buffer */ + uchar* base; /* start of the buffer */ + void (*free)(Block*); + uchar auxspc[64]; + ushort flag; + ushort checksum; /* IP checksum of complete packet (minus media header) */ + ushort vlan; +}; +#define BLEN(s) ((s)->wp - (s)->rp) +#define BALLOC(s) ((s)->lim - (s)->base) + +struct Chan +{ + Lock; + Ref; + Chan* next; /* allocation */ + Chan* link; + vlong offset; /* in fd */ + vlong devoffset; /* in underlying device; see read */ + Dev* dev; + uint devno; + ushort mode; /* read/write */ + ushort flag; + Qid qid; + int fid; /* for devmnt */ + ulong iounit; /* chunk size for i/o; 0==default */ + Mhead* umh; /* mount point that derived Chan; used in unionread */ + Chan* umc; /* channel in union; held for union read */ + QLock umqlock; /* serialize unionreads */ + int uri; /* union read index */ + int dri; /* devdirread index */ + uchar* dirrock; /* directory entry rock for translations */ + int nrock; + int mrock; + QLock rockqlock; + int ismtpt; + Mntcache*mc; /* Mount cache pointer */ + Mnt* mux; /* Mnt for clients using me for messages */ + union { + void* aux; + Qid pgrpid; /* for #p/notepg */ + ulong mid; /* for ns in devproc */ + }; + Chan* mchan; /* channel to mounted server */ + Qid mqid; /* qid of root of mount point */ + Path* path; +}; + +struct Path +{ + Ref; + char* s; + Chan** mtpt; /* mtpt history */ + int len; /* strlen(s) */ + int alen; /* allocated length of s */ + int mlen; /* number of path elements */ + int malen; /* allocated length of mtpt */ +}; + +struct Dev +{ + int dc; + char* name; + + void (*reset)(void); + void (*init)(void); + void (*shutdown)(void); + Chan* (*attach)(char*); + Walkqid*(*walk)(Chan*, Chan*, char**, int); + long (*stat)(Chan*, uchar*, long); + Chan* (*open)(Chan*, int); + void (*create)(Chan*, char*, int, int); + void (*close)(Chan*); + long (*read)(Chan*, void*, long, vlong); + Block* (*bread)(Chan*, long, vlong); + long (*write)(Chan*, void*, long, vlong); + long (*bwrite)(Chan*, Block*, vlong); + void (*remove)(Chan*); + long (*wstat)(Chan*, uchar*, long); + void (*power)(int); /* power mgt: power(1) => on, power (0) => off */ + int (*config)(int, char*, DevConf*); /* returns 0 on error */ + long (*readv)(Chan*, IOchunk*, long, long, vlong); + long (*writev)(Chan*, IOchunk*, long, long, vlong); +}; + +struct Dirtab +{ + char name[KNAMELEN]; + Qid qid; + vlong length; + long perm; +}; + +struct Walkqid +{ + Chan *clone; + int nqid; + Qid qid[1]; +}; + +enum +{ + NSMAX = 1000, + NSLOG = 7, + NSCACHE = (1<ref; channels on this mount point incref(c->mchan) == Mnt.c */ + Chan *c; /* Channel to file service */ + Proc *rip; /* Reader in progress */ + Mntrpc *queue; /* Queue of pending requests on this channel */ + uint id; /* Multiplexer id for channel check */ + Mnt *list; /* Free list */ + int flags; /* cache */ + int msize; /* data + IOHDRSZ */ + char *version; /* 9P version */ + Queue *q; /* input queue */ +}; + +enum +{ + NUser, /* note provided externally */ + NExit, /* deliver note quietly */ + NDebug, /* print debug message */ +}; + +struct Note +{ + char msg[ERRMAX]; + int flag; /* whether system posted it */ +}; + +enum +{ + PG_NOFLUSH = 0, + PG_TXTFLUSH = 1, /* flush dcache and invalidate icache */ + PG_DATFLUSH = 2, /* flush both i & d caches (UNUSED) */ +}; + +struct Page +{ + Ref; /* Reference count */ + uintmem pa; /* Physical address in memory */ + uchar lg2size; /* log2(pagesize) */ + uchar mdom; /* Memory domain (could probably replace by a pa2dom map) */ + Page *next; /* Free list */ + PPAGE; +}; + +#define pagesize(p) (1<<(p)->lg2size) + +struct Image +{ + Lock; + Ref; + Chan *c; /* channel to text file */ + Qid qid; /* Qid for page cache coherence */ + Qid mqid; + Chan *mchan; + int dc; /* Device type of owning channel */ +//subtype + Section *section[2]; /* TEXT and DATA prototypes of image if running or cached */ + Image *hash; /* Qid hash chains */ + Image *next; /* LRU free list */ + Image *prev; +}; + +struct Section +{ + QLock lk; /* lock for update */ + uintptr xsize; /* size in bytes */ + usize npages; /* size in pages */ + ulong fstart; /* start address in file for demand load */ + ulong flen; /* length of segment in file */ + uchar lg2pgsize; /* log2(size of pages in set) */ + Page* pages[]; /* Pages assigned, once loaded; entry is nil if still on file */ +}; + +struct Pte +{ + Page *pages[PTEPERTAB]; /* Page map for this chunk of pte */ + Page **first; /* First used entry */ + Page **last; /* Last used entry */ +}; + +struct Pages +{ + uintptr xsize; /* size in bytes */ + usize npages; /* size in pages */ + uchar lg2pgsize; /* log2(size of pages in set) */ + uintptr ptemapmem; /* space mapped by one Pte in this set */ + void (*freepage)(Page*); + int mapsize; + Pte* map[]; +}; + +/* Segment types */ +enum +{ + SG_TYPE = 07, /* Mask type of segment */ + SG_TEXT = 00, + SG_DATA = 01, + SG_BSS = 02, + SG_STACK = 03, + SG_SHARED = 04, + SG_PHYSICAL = 05, + + SG_CACHED = 0020, /* Physseg can be cached */ + SG_RONLY = 0040, /* Segment is read only */ + SG_CEXEC = 0100, /* Detach at exec */ +}; + +#define SEGMAXSIZE (SEGMAPSIZE*PTEMAPMEM) + +struct Physseg +{ + ulong attr; /* Segment attributes */ + char *name; /* Attach name */ + uintptr pa; /* Physical address */ + usize size; /* Maximum segment size in pages */ + Page *(*pgalloc)(Segment*, uintptr); /* Allocation if we need it */ + void (*pgfree)(Page*); + uchar lg2pgsize; /* log2(size of pages in segment) */ +}; + +struct Profbuf +{ + Ref; + ulong* ticks; /* Tick profile area */ +}; + +struct Sema +{ + Rendez; + int* addr; + int waiting; + Sema* next; + Sema* prev; +}; + +struct Segment +{ + Lock; + Ref; + RWlock lk; /* r: access and paging; w: grow or shrink */ + ushort type; /* segment type */ + uintptr base; /* virtual base */ + uintptr top; /* virtual top */ + int flushme; /* maintain icache for this segment */ + Pages* pages; /* physical pages mapped to virtual base */ + Image* image; /* prototype in file attached to this segment */ + int isec; /* Section index in Image */ + Physseg *pseg; + Lock semalock; + Sema sema; + ulong* profile; +}; + +#define segpgsize(s) (1<<(s)->pages->lg2pgsize) + +enum +{ + RENDHASH = 31, /* Hash to lookup rendezvous tags */ + MNTLOG = 5, + MNTHASH = 1<rendhash[(uintptr)(s)%RENDHASH]) +#define MOUNTH(p,qid) ((p)->mnthash[(qid).path&((1< variadic */ +}; + +/* + * routines to access UART hardware + */ +struct PhysUart +{ + char* name; + Uart* (*pnp)(void); + void (*enable)(Uart*, int); + void (*disable)(Uart*); + void (*kick)(Uart*); + void (*dobreak)(Uart*, int); + int (*baud)(Uart*, int); + int (*bits)(Uart*, int); + int (*stop)(Uart*, int); + int (*parity)(Uart*, int); + void (*modemctl)(Uart*, int); + void (*rts)(Uart*, int); + void (*dtr)(Uart*, int); + long (*status)(Uart*, void*, long, long); + void (*fifo)(Uart*, int); + void (*power)(Uart*, int); + int (*getc)(Uart*); /* polling version for rdb */ + void (*putc)(Uart*, int); /* polling version for iprint */ + void (*poll)(Uart*); /* polled interrupt routine */ +}; + +enum { + Stagesize= 2048 +}; + +/* + * software UART + */ +struct Uart +{ + void* regs; /* hardware stuff */ + void* saveregs; /* place to put registers on power down */ + char* name; /* internal name */ + ulong freq; /* clock frequency */ + int bits; /* bits per character */ + int stop; /* stop bits */ + int parity; /* even, odd or no parity */ + int baud; /* baud rate */ + PhysUart*phys; + int console; /* used as a serial console */ + int special; /* internal kernel device */ + Uart* next; /* list of allocated uarts */ + + QLock; + int type; /* ?? */ + int dev; + int opens; + + int enabled; + Uart *elist; /* next enabled interface */ + + int perr; /* parity errors */ + int ferr; /* framing errors */ + int oerr; /* rcvr overruns */ + int berr; /* no input buffers */ + int serr; /* input queue overflow */ + + /* buffers */ + int (*putc)(Queue*, int); + Queue *iq; + Queue *oq; + + Lock rlock; + uchar istage[Stagesize]; + uchar *iw; + uchar *ir; + uchar *ie; + + Lock tlock; /* transmit */ + uchar ostage[Stagesize]; + uchar *op; + uchar *oe; + int drain; + + int modem; /* hardware flow control on */ + int xonoff; /* software flow control on */ + int blocked; + int cts, dsr, dcd; /* keep track of modem status */ + int ctsbackoff; + int hup_dsr, hup_dcd; /* send hangup upstream? */ + int dohup; + + Rendez r; +}; + +extern Uart* consuart; + +/* + * performance timers, all units in perfticks + */ +struct Perf +{ + ulong intrts; /* time of last interrupt */ + ulong inintr; /* time since last clock tick in interrupt handlers */ + ulong avg_inintr; /* avg time per clock tick in interrupt handlers */ + ulong inidle; /* time since last clock tick in idle loop */ + ulong avg_inidle; /* avg time per clock tick in idle loop */ + ulong last; /* value of perfticks() at last clock tick */ + ulong period; /* perfticks() per clock tick */ +}; + +struct Watchdog +{ + void (*enable)(void); /* watchdog enable */ + void (*disable)(void); /* watchdog disable */ + void (*restart)(void); /* watchdog restart */ + void (*stat)(char*, char*); /* watchdog statistics */ +}; + +/* queue state bits, Qmsg, Qcoalesce, and Qkick can be set in qopen */ +enum +{ + /* Queue.state */ + Qstarve = (1<<0), /* consumer starved */ + Qmsg = (1<<1), /* message stream */ + Qclosed = (1<<2), /* queue has been closed/hungup */ + Qflow = (1<<3), /* producer flow controlled */ + Qcoalesce = (1<<4), /* coalesce packets on read */ + Qkick = (1<<5), /* always call the kick routine after qwrite */ +}; + +#define DEVDOTDOT -1 + +#pragma varargck type "I" uchar* +#pragma varargck type "V" uchar* +#pragma varargck type "E" uchar* +#pragma varargck type "M" uchar* + +#pragma varargck type "m" Mreg +#pragma varargck type "P" uintmem diff -Nru /sys/src/9k/port/portfns.h /sys/src/9k/port/portfns.h --- /sys/src/9k/port/portfns.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/portfns.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,432 @@ +void _assert(char*); +void accounttime(void); +void addbootfile(char*, uchar*, ulong); +Timer* addclock0link(void (*)(void), int); +int addconsdev(Queue*, void (*fn)(char*,int), int, int); +int addkbdq(Queue*, int); +int addphysseg(Physseg*); +void adduser(char*, char*, int, char**); +void addwatchdog(Watchdog*); +int adec(int*); +Block* adjustblock(Block*, int); +int ainc(int*); +void alarmkproc(void*); +Block* allocb(int); +Block* allocbalign(int, int); +int anyhigher(void); +int anyready(void); +Image* attachimage(Chan*); +Block* bl2mem(uchar*, Block*, int); +int blocklen(Block*); +void bootlinks(void); +Bpool* bpoolcreate(uint, uint, uintmem, uintmem, void* (*)(usize, int)); +uintmem bpoolalloc(Bpool*, usize); +void bpoolfree(Bpool*, uintmem, usize); +void bpoolallocrange(Bpool*, usize*, usize*); +void bpoolinitfree(Bpool*, uintmem, uintmem); +void bpooldump(Bpool*); +void callwithureg(void (*)(Ureg*)); +char* chanpath(Chan*); +int canlock(Lock*); +int canqlock(QLock*); +int canrlock(RWlock*); +Chan* cclone(Chan*); +void cclose(Chan*); +void ccloseq(Chan*); +void (*consdebug)(void); +void (*consputs)(char*, int); +void chanfree(Chan*); +void checkalarms(void); +void checkb(Block*, char*); +void checkflushmmu(void); +void checkmmu(uintptr, uintmem); +void checkpages(void); +void cinit(void); +void closeegrp(Egrp*); +void closefgrp(Fgrp*); +void closepgrp(Pgrp*); +void closergrp(Rgrp*); +void cmderror(Cmdbuf*, char*); +int cmount(Chan**, Chan*, int, char*); +#define commonerror() (up->errstr) +#define commonuser() (up->user) +Block* concatblock(Block*); +int consactive(void); +void copen(Chan*); +Block* copyblock(Block*, int); +void copypage(Page*, Page*); +int cread(Chan*, uchar*, int, vlong); +void cunmount(Chan*, Chan*); +void cupdate(Chan*, uchar*, int, vlong); +void cwrite(Chan*, uchar*, int, vlong); +uintptr dbgpc(Proc*); +int decref(Ref*); +int decrypt(void*, void*, int); +void delay(int); +void delconsdevs(void); +int deluser(char*); +Proc* dequeueproc(Schedq*, Proc*); +Chan* devattach(int, char*); +Block* devbread(Chan*, long, vlong); +long devbwrite(Chan*, Block*, vlong); +Dev* devbyname(char*); +Chan* devclone(Chan*); +int devconfig(int, char *, DevConf *); +void devcreate(Chan*, char*, int, int); +void devdir(Chan*, Qid, char*, vlong, char*, long, Dir*); +long devdirread(Chan*, char*, long, Dirtab*, int, Devgen*); +Devgen devgen; +void devinit(void); +Chan* devopen(Chan*, int, Dirtab*, int, Devgen*); +void devpermcheck(char*, int, int); +void devpower(int); +long devreadv(Chan*, IOchunk*, long, long, vlong); +void devremove(Chan*); +void devreset(void); +void devshutdown(void); +long devstat(Chan*, uchar*, long, Dirtab*, int, Devgen*); +Dev* devtabget(int, int); +void devtabinit(void); +long devtabread(Chan*, void*, long, vlong); +void devtabreset(void); +void devtabshutdown(void); +long devwritev(Chan*, IOchunk*, long, long, vlong); +long devwstat(Chan*, uchar*, long); +Walkqid* devwalk(Chan*, Chan*, char**, int, Dirtab*, int, Devgen*); +void drawactive(int); +void drawcmap(void); +void dumpaproc(Proc*); +void dumpregs(Ureg*); +void dumpstack(void); +Fgrp* dupfgrp(Fgrp*); +void duppages(Pages*, Pages*); +void edfinit(Proc*); +char* edfadmit(Proc*); +int edfready(Proc*); +void edfrecord(Proc*); +void edfrun(Proc*, int); +void edfstop(Proc*); +void edfyield(void); +int emptystr(char*); +int encrypt(void*, void*, int); +void envcpy(Egrp*, Egrp*); +int eqchanddq(Chan*, int, uint, Qid, int); +int eqqid(Qid, Qid); +void error(char*); +void errorf(char*, ...); +void exhausted(char*); +void exit(int); +uvlong fastticks(uvlong*); +uvlong fastticks2us(uvlong); +uvlong fastticks2ns(uvlong); +int fault(uintptr, int); +void fdclose(int, int); +Chan* fdtochan(int, int, int, int); +int findmount(Chan**, Mhead**, int, uint, Qid); +int fixfault(Segment*, uintptr, int, int); +void fmtinit(void); +void forceclosefgrp(void); +void free(void*); +void freeb(Block*); +void freeblist(Block*); +void freepagelist(Page*); +void freepages(Pages*); +int freebroken(void); +Pages* growpages(Pages*, uintptr); +void freepte(void (*)(Page*), Pte*); +void getcolor(ulong, ulong*, ulong*, ulong*); +void gotolabel(Label*); +char* getconfenv(void); +int haswaitq(void*); +long hostdomainwrite(char*, long); +long hostownerwrite(char*, long); +void hzsched(void); +Block* iallocb(int); +void ialloclimit(ulong); +void iallocsummary(void); +void ilock(Lock*); +void iunlock(Lock*); +Page* imagepage(Image*, int, uintptr, uintptr); +int incref(Ref*); +int ingroup(char*, char*); +void initimage(void); +int iprint(char*, ...); +void isdir(Chan*); +int iseve(void); +int isevegroup(void); +int islo(void); +#define islocked(l) ((l)->key != 0) +Segment* isoverlap(Proc*, uintptr, usize); +int ispages(void*); +int isphysseg(char*); +int isrmapped(RMap*, uintmem, uintmem*); +void ixsummary(void); +int kbdcr2nl(Queue*, int); +int kbdgetmap(int, int*, int*, Rune*); +int kbdputc(Queue*, int); +void kbdputmap(ushort, ushort, Rune); +void kickpager(void); +void killbig(char*); +void kproc(char*, void(*)(void*), void*); +void kprocchild(Proc*, void (*)(void*), void*); +void (*kproftimer)(uintptr); +void ksetenv(char*, char*, int); +void kstrcpy(char*, char*, int); +void kstrdup(char**, char*); +int labtrap(char*); +long latin1(Rune*, int); +int leadsgroup(char*, char*); +int lock(Lock*); +uintptr lockgetpc(Lock*); +void locksetpc(Lock*, uintptr); +void logopen(Log*); +void logclose(Log*); +char* logctl(Log*, int, char**, Logflag*); +void logn(Log*, int, void*, int); +long logread(Log*, void*, ulong, long); +void log(Log*, int, char*, ...); +Cmdtab* lookupcmd(Cmdbuf*, Cmdtab*, int); +Page* lookpage(Image*, ulong); +#define MS2NS(n) (((vlong)(n))*1000000LL) +void mallocinit(void); +int malloclocked(void); +long mallocreadsummary(Chan*, void*, long, long); +void mallocsummary(void); +Block* mem2bl(uchar*, int); +Page* mfreepages(Pages*, uintptr, usize); +void mfreeseg(Segment*, uintptr, uintptr); +void microdelay(int); +uvlong mk64fract(uvlong, uvlong); +void mkqid(Qid*, vlong, ulong, int); +void mmuflush(void); +void mmuput(uintptr, uintmem, Page*); +void mmurelease(Proc*); +void mmuswitch(Proc*); +Chan* mntauth(Chan*, char*); +usize mntversion(Chan*, u32int, char*, usize); +void mountfree(Mount*); +int mregfmt(Fmt*); +ulong ms2tk(ulong); +uvlong ms2fastticks(ulong); +void mul64fract(uvlong*, uvlong, uvlong); +void muxclose(Mnt*); +Chan* namec(char*, int, int, int); +void nameerror(char*, char*); +Chan* newchan(void); +int newfd(Chan*); +Mhead* newmhead(Chan*); +Mount* newmount(Mhead*, Chan*, int, char*); +Page* newpage(int, uint, RWlock*); +Pages* newpages(int, uintptr, void (*f)(Page*)); +Path* newpath(char*); +Pgrp* newpgrp(void); +Rgrp* newrgrp(void); +Proc* newproc(void); +Section* newsection(uintptr, ulong, ulong); +void nexterror(void); +void noteallow(void); +void notedefer(void); +int nrand(int); +uvlong ns2fastticks(uvlong); +int okaddr(uintptr, long, int); +int openmode(int); +int ownlock(Lock*); +Block* packblock(Block*); +Block* padblock(Block*, int); +void pageinit(void); +void pageflush(Page*, uint); +void pagesflush(Pages*, uintptr, uintptr); +ulong pagenumber(Page*); +void pagereclaim(int, int); +void pagewake(void); +void panic(char*, ...); +Cmdbuf* parsecmd(char *a, int n); +void pathclose(Path*); +ulong perfticks(void); +void pexit(char*, int); +void pgrpcpy(Pgrp*, Pgrp*); +void pgrpnote(ulong, char*, long, int); +void physdump(void); +void physinit(uintmem); +void physinitfree(uintmem, uintmem); +uintmem physalloc(usize); +void physdump(void); +int physmemavail(uintptr); +int psindex(int); +#define poperror() up->nerrlab-- +int postnote(Proc*, int, char*, int); +void priqlock(QLock*); +void prockill(Proc*, int, char*); +int pprint(char*, ...); +int preempted(void); +void prflush(void); +void printinit(void); +void printpages(Pages*); +void psinit(int); +ulong procalarm(ulong); +void procctl(Proc*); +ulong procdatasize(Proc*, int); +void procdump(void); +int procfdprint(Chan*, int, int, char*, int); +void procflushseg(Segment*); +void procpriority(Proc*, int, int); +void procrestore(Proc*); +void procsave(Proc*); +Proc* psincref(int); +void psdecref(Proc*); +void (*proctrace)(Proc*, int, vlong, vlong); +void procwired(Proc*, int); +Pte* ptealloc(void); +Pte* ptecpy(Pte*); +void pteflush(Pte*, int, int); +int pullblock(Block**, int); +Block* pullupblock(Block*, int); +Block* pullupqueue(Queue*, int); +void putimage(Image*); +void putmhead(Mhead*); +void putpage(Page*); +void putseg(Segment*); +void putstrn(char*, int); +int pwait(Waitmsg*); +void qaddlist(Queue*, Block*); +int qblen(Queue*); +Block* qbread(Queue*, int); +long qbwrite(Queue*, Block*); +Queue* qbypass(void (*)(void*, Block*), void*); +int qcanread(Queue*); +void qclose(Queue*); +int qconsume(Queue*, void*, int); +Block* qcopy(Queue*, int, ulong); +int qdiscard(Queue*, int); +void qflush(Queue*); +void qfree(Queue*); +int qfull(Queue*); +Block* qget(Queue*); +void qhangup(Queue*, char*); +int qisclosed(Queue*); +int qiwrite(Queue*, void*, int); +int qlen(Queue*); +void qlock(QLock*); +Queue* qopen(int, int, void (*)(void*), void*); +int qpass(Queue*, Block*); +int qpassnolim(Queue*, Block*); +int qproduce(Queue*, void*, int); +void qputback(Queue*, Block*); +long qread(Queue*, void*, int); +Block* qremove(Queue*); +void qreopen(Queue*); +void qsetbypass(Queue*, void (*)(void*, Block*), void*); +void qsetlimit(Queue*, int); +void qsleep(Queue*); +void qunlock(QLock*); +int qwindow(Queue*); +int qwrite(Queue*, void*, int); +void qnoblock(Queue*, int); +int rand(void); +void randominit(void); +ulong randomread(void*, ulong); +void rdb(void); +int readnum(ulong, char*, ulong, ulong, int); +long readstr(long, char*, long, char*); +void ready(Proc*); +void rebootcmd(int, char**); +void reboot(void*, void*, long); +void relocatepages(Pages*, uintptr); +void relocateseg(Segment*, uintptr); +void renameuser(char*, char*); +void resched(char*); +void resrcwait(char*, char*); +int return0(void*); +void rlock(RWlock*); +void rmapinit(RMap*, char*, RMapel* (*)(void)); +uintmem rmapalloc(RMap*, uintmem, uintmem, uint); +int rmapfirst(RMap*, uintmem, uintmem*, uintmem*); +void rmapfree(RMap*, uintmem, uintmem); +void rmapgaps(RMap*, RMap*); +void rmapprint(RMap*); +uintmem rmapsize(RMap*); +long rtctime(void); +void runlock(RWlock*); +Proc* runproc(void); +void sched(void); +void scheddump(void); +void schedinit(void); +long seconds(void); +void segclock(uintptr); +void addpage(Pages*, uintptr, Page*); +Page* va2page(Pages*, uintptr); +Page* segva2page(Segment*, uintptr); +char* seprintbpoolstats(Bpool*, char*, char*); +char* seprintphysstats(char*, char*); +char* seprintpagestats(char*, char*); +int setcolor(ulong, ulong, ulong, ulong); +void setkernur(Ureg*, Proc*); +int setlabel(Label*); +void setregisters(Ureg*, char*, char*, int); +char* skipslash(char*); +void sleep(Rendez*, int (*)(void*), void*); +void* smalloc(ulong); +char* srvname(Chan*); +char* syscallfmt(int, uintptr, va_list list); +char* sysretfmt(int, va_list, Ar0*, uvlong, uvlong); +void sysrforkchild(Proc*, Proc*); +#define tickscmp(a, b) ((long)((a)-(b))) +void timeradd(Timer*); +void timerdel(Timer*); +void timersinit(void); +void timerintr(Ureg*, void*); +void timerset(uvlong); +ulong tk2ms(ulong); +#define TK2MS(x) ((x)*(1000/HZ)) +uvlong tod2fastticks(vlong); +vlong todget(vlong*); +void todsetfreq(vlong); +void todinit(void); +void todset(vlong, vlong, int); +void tsleep(Rendez*, int (*)(void*), void*, long); +Block* trimblock(Block*, int, int); +Segment* txt2data(Proc*, Segment*); +Uart* uartconsole(int, char*); +int uartctl(Uart*, char*); +int uartgetc(void); +void uartkick(void*); +void uartputc(int); +void uartputs(char*, int); +void uartrecv(Uart*, char); +int uartstageoutput(Uart*); +void unbreak(Proc*); +void unlock(Lock*); +void userinit(void); +uintptr userpc(Ureg*); +char* usersread(void); +long userswrite(void*, long); +long userwrite(char*, long); +void* validaddr(void*, long, int); +void validname(char*, int); +char* validnamedup(char*, int); +void validstat(uchar*, usize); +void* vmemchr(void*, int, int); +Proc* wakeup(Rendez*); +int walk(Chan**, char**, int, int, int*); +void wlock(RWlock*); +void wunlock(RWlock*); +void yield(void); +Segment* data2txt(Segment*); +Segment* dupseg(Segment**, int, int); +Segment* newseg(int, uintptr, uintptr, Image*, int); +Segment* seg(Proc*, uintptr, void(*)(RWlock*)); +void hnputv(void*, uvlong); +void hnputl(void*, uint); +void hnputs(void*, ushort); +uvlong nhgetv(void*); +uint nhgetl(void*); +ushort nhgets(void*); +ulong µs(void); + +#pragma varargck argpos errorf 1 +#pragma varargck argpos iprint 1 +#pragma varargck argpos panic 1 +#pragma varargck argpos pprint 1 + +void* xchgm(void*, void*); +#define ALIGNED(h, a) ((((uintptr)(h)) & (a-1)) == 0) diff -Nru /sys/src/9k/port/portusbehci.h /sys/src/9k/port/portusbehci.h --- /sys/src/9k/port/portusbehci.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/portusbehci.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,144 @@ +/* + * ECHI portable hardware definitions + */ + +typedef struct Ecapio Ecapio; +typedef struct Edbgio Edbgio; + +#pragma incomplete Ecapio; +#pragma incomplete Edbgio; + +/* + * EHCI interface registers and bits + */ +enum +{ + /* Ecapio->parms reg. */ + Cnports = 0xF, /* nport bits */ + Cdbgportshift = 20, /* debug port */ + Cdbgportmask = 0xF, + + /* Ecapio->capparms bits */ + C64 = 1<<0, /* 64-bits */ + Cpfl = 1<<1, /* program'ble frame list: can be <1024 */ + Casp = 1<<2, /* asynch. sched. park */ + Ceecpshift = 8, /* extended capabilities ptr. */ + Ceecpmask = (1<<8) - 1, + + Clegacy = 1, /* legacy support cap. id */ + CLbiossem = 2, /* legacy cap. bios sem. */ + CLossem = 3, /* legacy cap. os sem */ + CLcontrol = 4, /* legacy support control & status */ + + /* typed links */ + Lterm = 1, + Litd = 0<<1, + Lqh = 1<<1, + Lsitd = 2<<1, + Lfstn = 3<<1, /* we don't use these */ + + /* Cmd reg. */ + Cstop = 0x00000, /* stop running */ + Crun = 0x00001, /* start operation */ + Chcreset = 0x00002, /* host controller reset */ + Cflsmask = 0x0000C, /* frame list size bits */ + Cfls1024 = 0x00000, /* frame list size 1024 */ + Cfls512 = 0x00004, /* frame list size 512 frames */ + Cfls256 = 0x00008, /* frame list size 256 frames */ + Cpse = 0x00010, /* periodic sched. enable */ + Case = 0x00020, /* async sched. enable */ + Ciasync = 0x00040, /* interrupt on async advance doorbell */ + /* interrupt threshold ctl. in µframes (1-32 in powers of 2) */ + Citcshift = 16, + Citcmask = 0xff << Citcshift, + + /* Sts reg. */ + Sasyncss = 0x08000, /* aync schedule status */ + Speriodss = 0x04000, /* periodic schedule status */ + Srecl = 0x02000, /* reclamnation (empty async sched.) */ + Shalted = 0x01000, /* h.c. is halted */ + Sasync = 0x00020, /* interrupt on async advance */ + Sherr = 0x00010, /* host system error */ + Sfrroll = 0x00008, /* frame list roll over */ + Sportchg = 0x00004, /* port change detect */ + Serrintr = 0x00002, /* error interrupt */ + Sintr = 0x00001, /* interrupt */ + Sintrs = 0x0003F, /* interrupts status */ + + /* Intr reg. */ + Iusb = 0x01, /* intr. on usb */ + Ierr = 0x02, /* intr. on usb error */ + Iportchg = 0x04, /* intr. on port change */ + Ifrroll = 0x08, /* intr. on frlist roll over */ + Ihcerr = 0x10, /* intr. on host error */ + Iasync = 0x20, /* intr. on async advance enable */ + Iall = 0x3F, /* all interrupts */ + + /* Config reg. */ + Callmine = 1, /* route all ports to us */ + + /* Portsc reg. */ + Pspresent = 0x00000001, /* device present */ + Psstatuschg = 0x00000002, /* Pspresent changed */ + Psenable = 0x00000004, /* device enabled */ + Pschange = 0x00000008, /* Psenable changed */ + Psresume = 0x00000040, /* resume detected */ + Pssuspend = 0x00000080, /* port suspended */ + Psreset = 0x00000100, /* port reset */ + Pspower = 0x00001000, /* port power on */ + Psowner = 0x00002000, /* port owned by companion */ + Pslinemask = 0x00000C00, /* line status bits */ + Pslow = 0x00000400, /* low speed device */ + + /* Debug port csw reg. */ + Cowner = 0x40000000, /* port owned by ehci */ + Cenable = 0x10000000, /* debug port enabled */ + Cdone = 0x00010000, /* request is done */ + Cbusy = 0x00000400, /* port in use by a driver */ + Cerrmask= 0x00000380, /* error code bits */ + Chwerr = 0x00000100, /* hardware error */ + Cterr = 0x00000080, /* transaction error */ + Cfailed = 0x00000040, /* transaction did fail */ + Cgo = 0x00000020, /* execute the transaction */ + Cwrite = 0x00000010, /* request is a write */ + Clen = 0x0000000F, /* data len */ + + /* Debug port pid reg. */ + Prpidshift = 16, /* received pid */ + Prpidmask = 0xFF, + Pspidshift = 8, /* sent pid */ + Pspidmask = 0xFF, + Ptokshift = 0, /* token pid */ + Ptokmask = 0xFF, + + Ptoggle = 0x00008800, /* to update toggles */ + Ptogglemask = 0x0000FF00, + + /* Debug port addr reg. */ + Adevshift = 8, /* device address */ + Adevmask = 0x7F, + Aepshift = 0, /* endpoint number */ + Aepmask = 0xF, +}; + +/* + * Capability registers (hw) + */ +struct Ecapio +{ + u32int cap; /* 00 controller capability register */ + u32int parms; /* 04 structural parameters register */ + u32int capparms; /* 08 capability parameters */ + u32int portroute; /* 0c not on the CS5536 */ +}; + +/* + * Debug port registers (hw) + */ +struct Edbgio +{ + u32int csw; /* control and status */ + u32int pid; /* USB pid */ + uchar data[8]; /* data buffer */ + u32int addr; /* device and endpoint addresses */ +}; diff -Nru /sys/src/9k/port/print.c /sys/src/9k/port/print.c --- /sys/src/9k/port/print.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/print.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,43 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +static Lock fmtl; + +void +_fmtlock(void) +{ + lock(&fmtl); +} + +void +_fmtunlock(void) +{ + unlock(&fmtl); +} + +int +_efgfmt(Fmt*) +{ + return -1; +} + +int +mregfmt(Fmt* f) +{ + Mreg mreg; + + mreg = va_arg(f->args, Mreg); + if(sizeof(Mreg) == sizeof(uvlong)) + return fmtprint(f, "%#16.16llux", (uvlong)mreg); + return fmtprint(f, "%#8.8ux", (uint)mreg); +} + +void +fmtinit(void) +{ + quotefmtinstall(); + archfmtinstall(); +} diff -Nru /sys/src/9k/port/proc.c /sys/src/9k/port/proc.c --- /sys/src/9k/port/proc.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/proc.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1705 @@ +#include +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/edf.h" +#include "errstr.h" +#include + +int nrdy; +Ref noteidalloc; + +ulong delayedscheds; /* statistics */ +long skipscheds; +long preempts; + +static Ref pidalloc; +static int machnoalloc; + +struct Procalloc procalloc; + +extern Proc* psalloc(void); +extern void pshash(Proc*); +extern void psrelease(Proc*); +extern void psunhash(Proc*); + +enum +{ + Scaling=2, +}; + +static int reprioritize(Proc*); +static void updatecpu(Proc*); +static int schedgain = 30; /* units in seconds */ + +static void rebalance(void); +static ulong balancetime; + +Schedq runq[Nrq]; +ulong runvec; + +char *statename[] = +{ /* BUG: generate automatically */ + "Dead", + "Moribund", + "Ready", + "Scheding", + "Running", + "Queueing", + "QueueingR", + "QueueingW", + "Wakeme", + "Broken", + "Stopped", + "Rendez", + "Waitrelease", +}; + +/* + * Always splhi()'ed. + */ +void +schedinit(void) /* never returns */ +{ + Edf *e; + + setlabel(&m->sched); + if(up) { + if((e = up->edf) && (e->flags & Admitted)) + edfrecord(up); + updatecpu(up); + m->proc = 0; + switch(up->state) { + case Running: + ready(up); + break; + case Moribund: + up->state = Dead; + + /* + * Holding lock from pexit: + * procalloc + */ + mmurelease(up); + + psrelease(up); + unlock(&procalloc); + break; + } + up->mach = nil; + coherence(); + up = nil; + } + sched(); +} + +/* + * If changing this routine, look also at sleep(). It + * contains a copy of the guts of sched(). + */ +void +sched(void) +{ + Proc *p; + + if(m->ilockdepth) + panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p", + m->machno, + m->ilockdepth, + up? up->lastilock: nil, + (up && up->lastilock)? lockgetpc(up->lastilock): m->ilockpc, + getcallerpc(&p+2)); + + if(up){ + /* + * Delay the sched until the process gives up the locks + * it is holding. This avoids dumb lock loops. + * Don't delay if the process is Moribund: it called sched to die. + */ + if(up->nlocks && up->state != Moribund){ + up->delaysched++; + delayedscheds++; + return; + } + up->delaysched = 0; + + splhi(); + + /* statistics */ + m->cs++; + + procsave(up); + if(setlabel(&up->sched)){ + procrestore(up); + spllo(); + return; + } + gotolabel(&m->sched); + } + p = runproc(); + if(!p->edf){ + updatecpu(p); + p->priority = reprioritize(p); + } + if(p != m->readied) + m->schedticks = m->ticks + HZ/10; + m->readied = 0; + up = p; + up->state = Running; + up->mach = m; + m->proc = up; + mmuswitch(up); + gotolabel(&up->sched); +} + +int +anyready(void) +{ + return runvec; +} + +int +anyhigher(void) +{ + return runvec & ~((1<<(up->priority+1))-1); +} + +/* + * here once per clock tick to see if we should resched + */ +void +hzsched(void) +{ + /* once a second, rebalance will reprioritize ready procs */ + if(m->machno == 0) + rebalance(); + + /* unless preempted, get to run for at least 100ms */ + if(anyhigher() + || (!up->fixedpri && m->ticks > m->schedticks && anyready())){ + m->readied = nil; /* avoid cooperative scheduling */ + up->delaysched++; + } +} + +/* + * here at the end of non-clock interrupts to see if we should preempt the + * current process. Returns 1 if preempted, 0 otherwise. + */ +int +preempted(void) +{ + if(up && up->state == Running && + up->nlocks == 0 && + !up->preempted && + anyhigher() && + !active.exiting){ + m->readied = nil; /* avoid cooperative scheduling */ + up->preempted = 1; + sched(); + splhi(); + up->preempted = 0; + return 1; + } + return 0; +} + +/* + * Update the cpu time average for this particular process, + * which is about to change from up -> not up or vice versa. + * p->lastupdate is the last time an updatecpu happened. + * + * The cpu time average is a decaying average that lasts + * about D clock ticks. D is chosen to be approximately + * the cpu time of a cpu-intensive "quick job". A job has to run + * for approximately D clock ticks before we home in on its + * actual cpu usage. Thus if you manage to get in and get out + * quickly, you won't be penalized during your burst. Once you + * start using your share of the cpu for more than about D + * clock ticks though, your p->cpu hits 1000 (1.0) and you end up + * below all the other quick jobs. Interactive tasks, because + * they basically always use less than their fair share of cpu, + * will be rewarded. + * + * If the process has not been running, then we want to + * apply the filter + * + * cpu = cpu * (D-1)/D + * + * n times, yielding + * + * cpu = cpu * ((D-1)/D)^n + * + * but D is big enough that this is approximately + * + * cpu = cpu * (D-n)/D + * + * so we use that instead. + * + * If the process has been running, we apply the filter to + * 1 - cpu, yielding a similar equation. Note that cpu is + * stored in fixed point (* 1000). + * + * Updatecpu must be called before changing up, in order + * to maintain accurate cpu usage statistics. It can be called + * at any time to bring the stats for a given proc up-to-date. + */ +static void +updatecpu(Proc *p) +{ + int D, n, t, ocpu; + + if(p->edf) + return; + + t = sys->ticks*Scaling + Scaling/2; + n = t - p->lastupdate; + p->lastupdate = t; + + if(n == 0) + return; + D = schedgain*HZ*Scaling; + if(n > D) + n = D; + + ocpu = p->cpu; + if(p != up) + p->cpu = (ocpu*(D-n))/D; + else{ + t = 1000 - ocpu; + t = (t*(D-n))/D; + p->cpu = 1000 - t; + } + +//iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu); +} + +/* + * On average, p has used p->cpu of a cpu recently. + * Its fair share is sys.nonline/m->load of a cpu. If it has been getting + * too much, penalize it. If it has been getting not enough, reward it. + * I don't think you can get much more than your fair share that + * often, so most of the queues are for using less. Having a priority + * of 3 means you're just right. Having a higher priority (up to p->basepri) + * means you're not using as much as you could. + */ +static int +reprioritize(Proc *p) +{ + int fairshare, n, load, ratio; + + load = sys->machptr[0]->load; + if(load == 0) + return p->basepri; + + /* + * fairshare = 1.000 * ncpu * 1.000/load, + * except the decimal point is moved three places + * on both load and fairshare. + */ + fairshare = (sys->nonline*1000*1000)/load; + n = p->cpu; + if(n == 0) + n = 1; + ratio = (fairshare+n/2) / n; + if(ratio > p->basepri) + ratio = p->basepri; + if(ratio < 0) + panic("reprioritize"); +//iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio); + return ratio; +} + +/* + * add a process to a scheduling queue + */ +void +queueproc(Schedq *rq, Proc *p) +{ + int pri; + + pri = rq - runq; + lock(runq); + p->priority = pri; + p->rnext = 0; + if(rq->tail) + rq->tail->rnext = p; + else + rq->head = p; + rq->tail = p; + rq->n++; + nrdy++; + runvec |= 1<head; p; p = p->rnext){ + if(p == tp) + break; + l = p; + } + + /* + * p->mach==0 only when process state is saved + */ + if(p == 0 || p->mach){ + unlock(runq); + return nil; + } + if(p->rnext == 0) + rq->tail = l; + if(l) + l->rnext = p->rnext; + else + rq->head = p->rnext; + if(rq->head == nil) + runvec &= ~(1<<(rq-runq)); + rq->n--; + nrdy--; + if(p->state != Ready) + print("dequeueproc %s %d %s\n", p->text, p->pid, statename[p->state]); + + unlock(runq); + return p; +} + +/* + * ready(p) picks a new priority for a process and sticks it in the + * runq for that priority. + */ +void +ready(Proc *p) +{ + Mreg s; + int pri; + Schedq *rq; + void (*pt)(Proc*, int, vlong, vlong); + + s = splhi(); + if(edfready(p)){ + splx(s); + return; + } + + if(up != p && (p->wired == nil || p->wired == m)) + m->readied = p; /* group scheduling */ + + updatecpu(p); + pri = reprioritize(p); + p->priority = pri; + rq = &runq[pri]; + p->state = Ready; + queueproc(rq, p); + pt = proctrace; + if(pt) + pt(p, SReady, 0, 0); + splx(s); +} + +/* + * yield the processor and drop our priority + */ +void +yield(void) +{ + if(anyready()){ + /* pretend we just used 1/2 tick */ + up->lastupdate -= Scaling/2; + sched(); + } +} + +/* + * recalculate priorities once a second. We need to do this + * since priorities will otherwise only be recalculated when + * the running process blocks. + */ +static void +rebalance(void) +{ + Mreg s; + int pri, npri, t; + Schedq *rq; + Proc *p; + + t = m->ticks; + if(t - balancetime < HZ) + return; + balancetime = t; + + for(pri=0, rq=runq; prihead; + if(p == nil) + continue; + if(p->mp != m) + continue; + if(pri == p->basepri) + continue; + updatecpu(p); + npri = reprioritize(p); + if(npri != pri){ + s = splhi(); + p = dequeueproc(rq, p); + if(p) + queueproc(&runq[npri], p); + splx(s); + goto another; + } + } +} + +/* + * pick a process to run + */ +Proc* +runproc(void) +{ + Schedq *rq; + Proc *p; + ulong start, now; + int i; + void (*pt)(Proc*, int, vlong, vlong); + + start = perfticks(); + + /* cooperative scheduling until the clock ticks */ + if((p=m->readied) && p->mach==0 && p->state==Ready + && (p->wired == nil || p->wired == m) + && runq[Nrq-1].head == nil && runq[Nrq-2].head == nil){ + skipscheds++; + rq = &runq[p->priority]; + goto found; + } + + preempts++; + +loop: + /* + * find a process that last ran on this processor (affinity), + * or one that hasn't moved in a while (load balancing). Every + * time around the loop affinity goes down. + */ + spllo(); + for(i = 0;; i++){ + /* + * find the highest priority target process that this + * processor can run given affinity constraints. + * + */ + for(rq = &runq[Nrq-1]; rq >= runq; rq--){ + for(p = rq->head; p; p = p->rnext){ + if(p->mp == nil || p->mp == m + || (!p->wired && i > 0)) + goto found; + } + } + + /* waste time or halt the CPU */ + idlehands(); + + /* remember how much time we're here */ + now = perfticks(); + m->perf.inidle += now-start; + start = now; + } + +found: + splhi(); + p = dequeueproc(rq, p); + if(p == nil) + goto loop; + + p->state = Scheding; + p->mp = m; + + if(edflock(p)){ + edfrun(p, rq == &runq[PriEdf]); /* start deadline timer and do admin */ + edfunlock(); + } + pt = proctrace; + if(pt) + pt(p, SRun, 0, 0); + return p; +} + +void +pickmach(Proc *p) +{ + Mach *mp; + int i; + + if(sys->nmach < 2) + return; + for(;;){ + i = (uint)ainc(&machnoalloc)%sys->nmach; + mp = sys->machptr[i]; + if(mp->online){ + p->mp = mp; + return; + } + } +} + +Proc* +newproc(void) +{ + Proc *p; + + p = psalloc(); + + p->state = Scheding; + p->psstate = "New"; + p->mach = 0; + p->qnext = 0; + p->nchild = 0; + p->nwait = 0; + p->waitq = 0; + p->parent = 0; + p->pgrp = 0; + p->egrp = 0; + p->fgrp = 0; + p->rgrp = 0; + p->pdbg = 0; + p->kp = 0; + if(up != nil && up->procctl == Proc_tracesyscall) + p->procctl = Proc_tracesyscall; + else + p->procctl = 0; + p->syscallq = nil; + p->notepending = 0; + p->notedeferred = 0; + p->ureg = 0; + p->privatemem = 0; + p->errstr = p->errbuf0; + p->syserrstr = p->errbuf1; + p->errbuf0[0] = '\0'; + p->errbuf1[0] = '\0'; + p->nlocks = 0; + p->delaysched = 0; + p->trace = 0; + kstrdup(&p->user, "*nouser"); + kstrdup(&p->text, "*notext"); + kstrdup(&p->args, ""); + p->nargs = 0; + p->setargs = 0; + memset(p->seg, 0, sizeof p->seg); + p->pid = incref(&pidalloc); + pshash(p); + p->noteid = incref(¬eidalloc); + if(p->pid <= 0 || p->noteid <= 0) + panic("pidalloc"); + if(p->kstack == 0) + p->kstack = smalloc(KSTACK); + + /* sched params */ + p->mp = 0; + p->wired = 0; + procpriority(p, PriNormal, 0); + p->cpu = 0; + p->lastupdate = sys->ticks*Scaling; + p->edf = nil; + + return p; +} + +/* + * wire this proc to a machine + */ +void +procwired(Proc *p, int bm) +{ + Proc *pp; + int i; + char nwired[MACHMAX]; + Mach *wm, *mp; + + if(bm < 0){ + /* pick a machine to wire to */ + memset(nwired, 0, sizeof(nwired)); + p->wired = 0; + for(i=0; (pp = psincref(i)) != nil; i++){ + wm = pp->wired; + if(wm && pp->pid) + nwired[wm->machno]++; + psdecref(pp); + } + bm = 0; + for(i=0; imachptr[i]) == nil || !mp->online) + continue; + if(nwired[i] < nwired[bm]) + bm = i; + } + } else { + /* use the virtual machine requested */ + bm = bm % MACHMAX; + } + + p->wired = sys->machptr[bm]; + p->mp = p->wired; +} + +void +procpriority(Proc *p, int pri, int fixed) +{ + if(pri >= Npriq) + pri = Npriq - 1; + else if(pri < 0) + pri = 0; + p->basepri = pri; + p->priority = pri; + if(fixed){ + p->fixedpri = 1; + } else { + p->fixedpri = 0; + } +} + +/* + * sleep if a condition is not true. Another process will + * awaken us after it sets the condition. When we awaken + * the condition may no longer be true. + * + * we lock both the process and the rendezvous to keep r->p + * and p->r synchronized. + */ +void +sleep(Rendez *r, int (*f)(void*), void *arg) +{ + Mreg s; + void (*pt)(Proc*, int, vlong, vlong); + + s = splhi(); + + if(up->nlocks) + print("process %d sleeps with %d locks held, last lock %#p locked at pc %#p, sleep called from %#p\n", + up->pid, up->nlocks, up->lastlock, lockgetpc(up->lastlock), getcallerpc(&r)); + lock(r); + lock(&up->rlock); + if(r->p){ + print("double sleep called from %#p, %d %d\n", + getcallerpc(&r), r->p->pid, up->pid); + dumpstack(); + } + + /* + * Wakeup only knows there may be something to do by testing + * r->p in order to get something to lock on. + * Flush that information out to memory in case the sleep is + * committed. + */ + r->p = up; + + if((*f)(arg) || up->notepending && !up->notedeferred){ + /* + * if condition happened or a note is pending + * never mind + */ + r->p = nil; + unlock(&up->rlock); + unlock(r); + } else { + /* + * now we are committed to + * change state and call scheduler + */ + pt = proctrace; + if(pt) + pt(up, SSleep, 0, 0); + up->state = Wakeme; + up->r = r; + + /* statistics */ + m->cs++; + + procsave(up); + if(setlabel(&up->sched)) { + /* + * here when the process is awakened + */ + procrestore(up); + spllo(); + } else { + /* + * here to go to sleep (i.e. stop Running) + */ + unlock(&up->rlock); + unlock(r); + gotolabel(&m->sched); + } + } + + if(up->notepending && !up->notedeferred) { + up->notepending = 0; + splx(s); + if(up->procctl == Proc_exitme && up->closingfgrp) + forceclosefgrp(); + error(Eintr); + } + + splx(s); +} + +static int +tfn(void *arg) +{ + return up->trend == nil || up->tfn(arg); +} + +void +twakeup(Ureg*, Timer *t) +{ + Proc *p; + Rendez *trend; + + p = t->ta; + trend = p->trend; + p->trend = 0; + if(trend) + wakeup(trend); +} + +void +tsleep(Rendez *r, int (*fn)(void*), void *arg, long ms) +{ + if (up->tt){ + print("tsleep: timer active: mode %d, tf %#p\n", + up->tmode, up->tf); + timerdel(up); + } + up->tns = MS2NS(ms); + up->tf = twakeup; + up->tmode = Trelative; + up->ta = up; + up->trend = r; + up->tfn = fn; + timeradd(up); + + if(waserror()){ + timerdel(up); + nexterror(); + } + sleep(r, tfn, arg); + if (up->tt) + timerdel(up); + up->twhen = 0; + poperror(); +} + +/* + * Expects that only one process can call wakeup for any given Rendez. + * We hold both locks to ensure that r->p and p->r remain consistent. + * Richard Miller has a better solution that doesn't require both to + * be held simultaneously, but I'm a paranoid - presotto. + */ +Proc* +wakeup(Rendez *r) +{ + Mreg s; + Proc *p; + + s = splhi(); + + lock(r); + p = r->p; + + if(p != nil){ + lock(&p->rlock); + if(p->state != Wakeme || p->r != r) + panic("wakeup: state"); + r->p = nil; + p->r = nil; + ready(p); + unlock(&p->rlock); + } + unlock(r); + + splx(s); + + return p; +} + +/* + * if waking a sleeping process, this routine must hold both + * p->rlock and r->lock. However, it can't know them in + * the same order as wakeup causing a possible lock ordering + * deadlock. We break the deadlock by giving up the p->rlock + * lock if we can't get the r->lock and retrying. + */ +int +postnote(Proc *p, int dolock, char *n, int flag) +{ + Mreg s; + int ret; + Rendez *r; + Proc *d, **l; + + if(dolock) + qlock(&p->debug); + + if(flag != NUser && (p->notify == 0 || p->notified)) + p->nnote = 0; + + ret = 0; + if(p->nnote < NNOTE) { + strcpy(p->note[p->nnote].msg, n); + p->note[p->nnote++].flag = flag; + ret = 1; + } + p->notepending = 1; + if(dolock) + qunlock(&p->debug); + + if(p->notedeferred){ + if(flag == NUser) + return ret; + /* do not defer fatal errors and kill through ctl */ + p->notedeferred = 0; + } + + /* this loop is to avoid lock ordering problems. */ + for(;;){ + s = splhi(); + lock(&p->rlock); + r = p->r; + + /* waiting for a wakeup? */ + if(r == nil) + break; /* no */ + + /* try for the second lock */ + if(canlock(r)){ + if(p->state != Wakeme || r->p != p) + panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state); + p->r = nil; + r->p = nil; + ready(p); + unlock(r); + break; + } + + /* give other process time to get out of critical section and try again */ + unlock(&p->rlock); + splx(s); + sched(); + } + unlock(&p->rlock); + splx(s); + + if(p->state != Rendezvous) + return ret; + + /* Try and pull out of a rendezvous */ + lock(p->rgrp); + if(p->state == Rendezvous) { + p->rendval = ~0; + l = &REND(p->rgrp, p->rendtag); + for(d = *l; d; d = d->rendhash) { + if(d == p) { + *l = p->rendhash; + break; + } + l = &d->rendhash; + } + ready(p); + } + unlock(p->rgrp); + return ret; +} + +/* + * prevent application notes within a control request that must complete + */ +void +notedefer(void) +{ + int s; + + s = splhi(); + if(up->notedeferred) + panic("notedefer"); + if(up->notepending){ + up->notepending = 0; + splx(s); + error(Eintr); + } + up->notedeferred = 1; + splx(s); +} + +void +noteallow(void) +{ + up->notedeferred = 0; + /* trap or a later sleep will process the note */ +} + +/* + * weird thing: keep at most NBROKEN around + */ +#define NBROKEN 4 +struct +{ + QLock; + int n; + Proc *p[NBROKEN]; +}broken; + +void +addbroken(Proc *p) +{ + qlock(&broken); + if(broken.n == NBROKEN) { + ready(broken.p[0]); + memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1)); + --broken.n; + } + broken.p[broken.n++] = p; + qunlock(&broken); + + edfstop(up); + p->state = Broken; + p->psstate = 0; + sched(); +} + +void +unbreak(Proc *p) +{ + int b; + + qlock(&broken); + for(b=0; b < broken.n; b++) + if(broken.p[b] == p) { + broken.n--; + memmove(&broken.p[b], &broken.p[b+1], + sizeof(Proc*)*(NBROKEN-(b+1))); + ready(p); + break; + } + qunlock(&broken); +} + +int +freebroken(void) +{ + int i, n; + + qlock(&broken); + n = broken.n; + for(i=0; ialarm = 0; + if (up->tt) + timerdel(up); + pt = proctrace; + if(pt) + pt(up, SDead, 0, 0); + + /* nil out all the resources under lock (free later) */ + qlock(&up->debug); + fgrp = up->fgrp; + up->fgrp = nil; + egrp = up->egrp; + up->egrp = nil; + rgrp = up->rgrp; + up->rgrp = nil; + pgrp = up->pgrp; + up->pgrp = nil; + dot = up->dot; + up->dot = nil; + qunlock(&up->debug); + + if(fgrp) + closefgrp(fgrp); + if(egrp) + closeegrp(egrp); + if(rgrp) + closergrp(rgrp); + if(dot) + cclose(dot); + if(pgrp) + closepgrp(pgrp); + + /* + * if not a kernel process and have a parent, + * do some housekeeping. + */ + if(up->kp == 0) { + p = up->parent; + if(p == 0) { + if(exitstr == 0) + exitstr = "unknown"; + panic("boot process died: %s", exitstr); + } + + while(waserror()) + ; + + wq = smalloc(sizeof(Waitq)); + poperror(); + + wq->w.pid = up->pid; + utime = up->time[TUser] + up->time[TCUser]; + stime = up->time[TSys] + up->time[TCSys]; + wq->w.time[TUser] = tk2ms(utime); + wq->w.time[TSys] = tk2ms(stime); + wq->w.time[TReal] = tk2ms(sys->ticks - up->time[TReal]); + if(exitstr && exitstr[0]) + snprint(wq->w.msg, sizeof(wq->w.msg), "%s %d: %s", + up->text, up->pid, exitstr); + else + wq->w.msg[0] = '\0'; + + lock(&p->exl); + /* + * Check that parent is still alive. + */ + if(p->pid == up->parentpid && p->state != Broken) { + p->nchild--; + p->time[TCUser] += utime; + p->time[TCSys] += stime; + /* + * If there would be more than 2000 wait records + * processes for my parent, then don't leave a wait + * record behind. This helps prevent badly written + * daemon processes from accumulating lots of wait + * records. + */ + if(p->nwait < 2000) { + wq->next = p->waitq; + p->waitq = wq; + p->nwait++; + wq = nil; + wakeup(&p->waitr); + } + } + unlock(&p->exl); + if(wq) + free(wq); + } + + if(!freemem) + addbroken(up); + + qlock(&up->seglock); + es = &up->seg[NSEG]; + for(s = up->seg; s < es; s++) { + if(*s) { + putseg(*s); + *s = 0; + } + } + qunlock(&up->seglock); + + lock(&up->exl); /* Prevent my children from leaving waits */ + psunhash(up); + up->pid = 0; + wakeup(&up->waitr); + unlock(&up->exl); + + for(f = up->waitq; f; f = next) { + next = f->next; + free(f); + } + + /* release debuggers */ + qlock(&up->debug); + if(up->syscallq != nil){ + qhangup(up->syscallq, nil); + up->syscallq = nil; + } + if(up->pdbg) { + wakeup(&up->pdbg->sleep); + up->pdbg = 0; + } + qunlock(&up->debug); + + edfstop(up); + if(up->edf) { + free(up->edf); + up->edf = nil; + } + + /* Sched must not loop for these locks */ + lock(&procalloc); + + up->state = Moribund; + sched(); + panic("pexit"); +} + +int +haswaitq(void *x) +{ + Proc *p; + + p = (Proc *)x; + return p->waitq != 0; +} + +int +pwait(Waitmsg *w) +{ + int cpid; + Waitq *wq; + + if(!canqlock(&up->qwaitr)) + error(Einuse); + + if(waserror()) { + qunlock(&up->qwaitr); + nexterror(); + } + + lock(&up->exl); + if(up->nchild == 0 && up->waitq == 0) { + unlock(&up->exl); + error(Enochild); + } + unlock(&up->exl); + + sleep(&up->waitr, haswaitq, up); + + lock(&up->exl); + wq = up->waitq; + up->waitq = wq->next; + up->nwait--; + unlock(&up->exl); + + qunlock(&up->qwaitr); + poperror(); + + if(w) + memmove(w, &wq->w, sizeof(Waitmsg)); + cpid = wq->w.pid; + free(wq); + + return cpid; +} + +void +dumpaproc(Proc *p) +{ + uintptr bss; + char *s; + + if(p == 0) + return; + + bss = 0; + if(p->seg[BSEG]) + bss = p->seg[BSEG]->top; + + s = p->psstate; + if(s == 0) + s = statename[p->state]; + print("%3d:%10s pc %#p dbgpc %#p %8s (%s) ut %ld st %ld bss %#p qpc %#p nl %d nd %lud lpc %#p pri %lud\n", + p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state], + p->time[0], p->time[1], bss, p->qpc, p->nlocks, + p->delaysched, p->lastlock ? lockgetpc(p->lastlock) : 0, p->priority); +} + +void +procdump(void) +{ + int i; + Proc *p; + + if(up) + print("up %d\n", up->pid); + else + print("no current process\n"); + for(i=0; (p = psincref(i)) != nil; i++) { + if(p->state != Dead) + dumpaproc(p); + psdecref(p); + } +} + +/* + * must be called in trap, at least on clock interrupt, + * to check and clear the flushmmu flag set above + */ +void +checkflushmmu(void) +{ + if(m->mmuflush){ + if(up) + mmuflush(); + m->mmuflush = 0; + } +} + +/* + * wait till all processes have flushed their mmu + * state about the given segment + */ +void +procflushseg(Segment *s) +{ + int i, ns, nm, nwait; + Proc *p; + Mach *mp; + + /* + * tell all processes with the same set + * of pages to flush their mmus + */ + nwait = 0; + for(i=0; (p = psincref(i)) != nil; i++) { + if(p->state == Dead){ + psdecref(p); + continue; + } + for(ns = 0; ns < NSEG; ns++){ + if(p->seg[ns] == s){ + p->newtlb = 1; + for(nm = 0; nm < MACHMAX; nm++){ + if((mp = sys->machptr[nm]) == nil || !mp->online) + continue; + if(mp->proc == p){ + mp->mmuflush = 1; + nwait++; + } + } + break; + } + } + psdecref(p); + } + + if(nwait == 0) + return; + + /* + * wait for all processors to take a clock interrupt + * and flush their mmu's + */ + for(i = 0; i < MACHMAX; i++){ + if((mp = sys->machptr[i]) == nil || !mp->online || mp == m) + continue; + while(mp->mmuflush) + sched(); + } +} + +void +scheddump(void) +{ + Proc *p; + Schedq *rq; + + for(rq = &runq[Nrq-1]; rq >= runq; rq--){ + if(rq->head == 0) + continue; + print("rq%ld:", rq-runq); + for(p = rq->head; p; p = p->rnext) + print(" %d(%lud)", p->pid, m->ticks - p->readytime); + print("\n"); + delay(150); + } + print("nrdy %d\n", nrdy); +} + +void +kproc(char *name, void (*func)(void *), void *arg) +{ + Proc *p; + static Pgrp *kpgrp; + + while(waserror()) + {} + p = newproc(); + poperror(); + + p->psstate = 0; + p->procmode = 0640; + p->kp = 1; + + p->scallnr = up->scallnr; + memmove(p->arg, up->arg, sizeof(up->arg)); + p->nerrlab = 0; + p->slash = up->slash; + p->dot = up->dot; + if(p->dot) + incref(p->dot); + + memmove(p->note, up->note, sizeof(p->note)); + p->nnote = up->nnote; + p->notified = 0; + p->lastnote = up->lastnote; + p->notify = up->notify; + p->ureg = 0; + p->dbgreg = 0; + + procpriority(p, PriKproc, 0); + + kprocchild(p, func, arg); + + kstrdup(&p->user, eve); + kstrdup(&p->text, name); + if(kpgrp == 0) + kpgrp = newpgrp(); + p->pgrp = kpgrp; + incref(kpgrp); + + memset(p->time, 0, sizeof(p->time)); + p->time[TReal] = sys->ticks; +// pickmach(p); + ready(p); +} + +/* + * called splhi() by notify(). See comment in notify for the + * reasoning. + */ +void +procctl(Proc *p) +{ + Mreg s; + char *state; + + switch(p->procctl) { + case Proc_exitbig: + spllo(); + pexit("Killed: Insufficient physical memory", 1); + + case Proc_exitme: + spllo(); /* pexit has locks in it */ + pexit("Killed", 1); + + case Proc_traceme: + if(p->nnote == 0) + return; + /* No break */ + + case Proc_stopme: + p->procctl = 0; + state = p->psstate; + p->psstate = "Stopped"; + /* free a waiting debugger */ + s = spllo(); + qlock(&p->debug); + if(p->pdbg) { + wakeup(&p->pdbg->sleep); + p->pdbg = 0; + } + qunlock(&p->debug); + splhi(); + p->state = Stopped; + sched(); + p->psstate = state; + splx(s); + return; + } +} + +void +errorf(char *fmt, ...) +{ + va_list arg; + char buf[PRINTSIZE]; + + va_start(arg, fmt); + vseprint(buf, buf+sizeof(buf), fmt, arg); + va_end(arg); + error(buf); +} + +void +error(char *err) +{ + if(up == nil) + panic("error(%s) not in a process", err); + spllo(); + + if(up->nerrlab >= NERR) + panic("error stack too deep"); + if(err != up->errstr) + kstrcpy(up->errstr, err, ERRMAX); + setlabel(&up->errlab[NERR-1]); + nexterror(); +} + +void +nexterror(void) +{ + if(up->nerrlab <= 0) + labtrap("nexterror"); + gotolabel(&up->errlab[--up->nerrlab]); +} + +int +labtrap(char *source) +{ + static Lock l; + int i; + + ilock(&l); + print("labtrap (%s):\n", source); + for (i=NERR-1; i>=0; i--) + if (up->errlab[i].pc) + print("%d: sp=%#p pc=%#p\n", i, up->errlab[i].sp, up->errlab[i].pc); + iunlock(&l); + delay(3*1000); /* delay to let cons queue drain */ + panic(source); + return 1; +} + +void +labassert(int nerrlab) +{ + if (up->nerrlab != nerrlab) + labtrap("labassert"); +} + +void +exhausted(char *resource) +{ + char buf[ERRMAX]; + + sprint(buf, "no free %s", resource); + iprint("%s\n", buf); + error(buf); +} + +/* + * neither p nor its segments are necessarily locked + */ +ulong +procdatasize(Proc *p, int addstack) +{ + Segment *s; + ulong tot, l; + int i, n; + + tot = 0; + for(i = 1; i < NSEG; i++){ + s = p->seg[i]; + if(s != nil && (i != SSEG || addstack)){ + l = s->top - s->base; + n = s->ref; + if(n > 1) + l /= n; + if(p->seg[i] == s) /* ie, hasn't changed meanwhile */ + tot += l; + } + } + return tot; +} + +int +cankillproc(Proc *p) +{ + if(p->kp) + return 0; + if(p->state == Dead || p->state == Moribund) + return 0; + if(p->procctl == Proc_exitbig || p->procctl == Proc_exitme) + return 0; + return strcmp(p->user, eve) != 0 || (p->procmode&0222) != 0; +} + +enum{ + Nbig= 8 +}; + +static ulong +findbignoteid(void) +{ + int i, nbig; + ulong l, noteid, noteids[Nbig], sizes[Nbig], t; + Proc *p; + int j; + + memset(sizes, 0, sizeof(sizes)); + memset(noteids, 0, sizeof(noteids)); + nbig = 0; + for(j = 0; (p = psincref(j)) != nil; j++){ + if(cankillproc(p)){ + noteid = p->noteid; + l = procdatasize(p, 1); + if(l < 4*MB) + continue; + for(i = 0; i < nbig; i++){ + if(noteid == noteids[i]){ + if(l > sizes[i]) + sizes[i] = l; + break; + } + if(l > sizes[i]){ + t = noteids[i]; noteids[i] = noteid; noteid = t; + t = sizes[i]; sizes[i] = l; l = t; + } + } + if(i == nbig && nbig < nelem(noteids)){ + noteids[nbig] = noteid; + sizes[nbig] = l; + nbig++; + } + } + psdecref(p); + } + noteid = 0; + l = 0; + for(i = 0; i < nbig; i++){ + if(noteids[i] > noteid && sizes[i] > l/2){ + noteid = noteids[i]; + l = sizes[i]; + } + } + return noteid; +} + +void +killbig(char *why) +{ + ulong noteid; + Proc *p; + int i; + + noteid = findbignoteid(); + if(noteid == 0) + return; + for(i = 0; (p = psincref(i)) != nil; i++){ + if(p->noteid == noteid && canqlock(&p->debug)){ + if(p->noteid == noteid && cankillproc(p)){ + print("%ud: %s killed: %s\n", p->pid, p->text, why); + prockill(p, Proc_exitbig, why); + } + qunlock(&p->debug); + } + psdecref(p); + } +} + +/* + * must hold the lock p->debug + */ +void +prockill(Proc *p, int ctl, char *why) +{ + switch(p->state) { + case Broken: + unbreak(p); + break; + case Stopped: + p->procctl = ctl; + postnote(p, 0, why, NExit); + ready(p); + break; + default: + p->procctl = ctl; + postnote(p, 0, why, NExit); + break; + } +} + +/* + * change ownership to 'new' of all processes owned by 'old'. Used when + * eve changes. + */ +void +renameuser(char *old, char *new) +{ + int i; + Proc *p; + + for(i = 0; (p = psincref(i)) != nil; i++){ + if(p->user!=nil && strcmp(old, p->user)==0) + kstrdup(&p->user, new); + psdecref(p); + } +} + +/* + * time accounting called by clock() splhi'd + */ +void +accounttime(void) +{ + Proc *p; + ulong n, per; + static ulong nrun; + + p = m->proc; + if(p) { + nrun++; + p->time[p->insyscall]++; + } + + /* calculate decaying duty cycles */ + n = perfticks(); + per = n - m->perf.last; + m->perf.last = n; + per = (m->perf.period*(HZ-1) + per)/HZ; + if(per != 0) + m->perf.period = per; + + m->perf.avg_inidle = (m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ; + m->perf.inidle = 0; + + m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ; + m->perf.inintr = 0; + + /* only one processor gets to compute system load averages */ + if(m->machno != 0) + return; + + /* + * calculate decaying load average. + * if we decay by (n-1)/n then it takes + * n clock ticks to go from load L to .36 L once + * things quiet down. it takes about 5 n clock + * ticks to go to zero. so using HZ means this is + * approximately the load over the last second, + * with a tail lasting about 5 seconds. + */ + n = nrun; + nrun = 0; + n = (nrdy+n)*1000; + m->load = (m->load*(HZ-1)+n)/HZ; +} + diff -Nru /sys/src/9k/port/ps.c /sys/src/9k/port/ps.c --- /sys/src/9k/port/ps.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/ps.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,126 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +void +pshash(Proc *p) +{ + int h; + + h = p->pid % nelem(procalloc.ht); + lock(&procalloc); + p->pidhash = procalloc.ht[h]; + procalloc.ht[h] = p; + unlock(&procalloc); +} + +void +psunhash(Proc *p) +{ + int h; + Proc **l; + + h = p->pid % nelem(procalloc.ht); + lock(&procalloc); + for(l = &procalloc.ht[h]; *l != nil; l = &(*l)->pidhash) + if(*l == p){ + *l = p->pidhash; + break; + } + unlock(&procalloc); +} + +int +psindex(int pid) +{ + Proc *p; + int h; + int s; + + s = -1; + h = pid % nelem(procalloc.ht); + lock(&procalloc); + for(p = procalloc.ht[h]; p != nil; p = p->pidhash) + if(p->pid == pid){ + s = p->index; + break; + } + unlock(&procalloc); + return s; +} + +Proc* +psincref(int i) +{ + /* + * Placeholder. + */ + if(i >= procalloc.nproc) + return nil; + return &procalloc.arena[i]; +} + +void +psdecref(Proc *p) +{ + /* + * Placeholder. + */ + USED(p); +} + +void +psrelease(Proc* p) +{ + p->qnext = procalloc.free; + procalloc.free = p; +} + +Proc* +psalloc(void) +{ + Proc *p; + char msg[64]; + + lock(&procalloc); + for(;;) { + if(p = procalloc.free) + break; + + unlock(&procalloc); + snprint(msg, sizeof msg, "no procs; %s forking", + up? up->text: "kernel"); + resrcwait(msg, "Noprocs"); + lock(&procalloc); + } + procalloc.free = p->qnext; + unlock(&procalloc); + + while(p->mach != nil || p->nlocks != 0) + {} + + return p; +} + +void +psinit(int nproc) +{ + Proc *p; + int i; + + procalloc.nproc = nproc; + procalloc.free = malloc(nproc*sizeof(Proc)); + if(procalloc.free == nil) + panic("cannot allocate %ud procs (%udMB)\n", nproc, nproc*sizeof(Proc)/(1024*1024)); + procalloc.arena = procalloc.free; + + p = procalloc.free; + for(i=0; iqnext = p+1; + p->index = i; + } + p->qnext = 0; + p->index = i; +} diff -Nru /sys/src/9k/port/qio.c /sys/src/9k/port/qio.c --- /sys/src/9k/port/qio.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/qio.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1600 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +static ulong padblockcnt; +static ulong concatblockcnt; +static ulong pullupblockcnt; +static ulong copyblockcnt; +static ulong consumecnt; +static ulong producecnt; +static ulong qcopycnt; + +static int debugging; + +#define QDEBUG if(0) + +/* + * IO queues + */ +typedef struct Queue Queue; + +struct Queue +{ + Lock; + + Block* bfirst; /* buffer */ + Block* blast; + + int len; /* bytes allocated to queue */ + int dlen; /* data bytes in queue */ + int limit; /* max bytes in queue */ + int inilim; /* initial limit */ + int state; + int noblock; /* true if writes return immediately when q full */ + int eof; /* number of eofs read by user */ + + void (*kick)(void*); /* restart output */ + void (*bypass)(void*, Block*); /* bypass queue altogether */ + void* arg; /* argument to kick */ + + QLock rlock; /* mutex for reading processes */ + Rendez rr; /* process waiting to read */ + QLock wlock; /* mutex for writing processes */ + Rendez wr; /* process waiting to write */ + + char err[ERRMAX]; +}; + +enum +{ + Maxatomic = 64*1024, +}; + +uint qiomaxatomic = Maxatomic; + +void +ixsummary(void) +{ + debugging ^= 1; + iallocsummary(); + print("pad %lud, concat %lud, pullup %lud, copy %lud\n", + padblockcnt, concatblockcnt, pullupblockcnt, copyblockcnt); + print("consume %lud, produce %lud, qcopy %lud\n", + consumecnt, producecnt, qcopycnt); +} + +/* + * free a list of blocks + */ +void +freeblist(Block *b) +{ + Block *next; + + for(; b != 0; b = next){ + next = b->next; + b->next = 0; + freeb(b); + } +} + +/* + * pad a block to the front (or the back if size is negative) + */ +Block* +padblock(Block *bp, int size) +{ + int n; + Block *nbp; + + QDEBUG checkb(bp, "padblock 1"); + if(size >= 0){ + if(bp->rp - bp->base >= size){ + bp->rp -= size; + return bp; + } + + if(bp->next) + panic("padblock %#p", getcallerpc(&bp)); + n = BLEN(bp); + padblockcnt++; + nbp = allocb(size+n); + nbp->rp += size; + nbp->wp = nbp->rp; + memmove(nbp->wp, bp->rp, n); + nbp->wp += n; + freeb(bp); + nbp->rp -= size; + } else { + size = -size; + + if(bp->next) + panic("padblock %#p", getcallerpc(&bp)); + + if(bp->lim - bp->wp >= size) + return bp; + + n = BLEN(bp); + padblockcnt++; + nbp = allocb(size+n); + memmove(nbp->wp, bp->rp, n); + nbp->wp += n; + freeb(bp); + } + QDEBUG checkb(nbp, "padblock 1"); + return nbp; +} + +/* + * return count of bytes in a string of blocks + */ +int +blocklen(Block *bp) +{ + int len; + + len = 0; + while(bp) { + len += BLEN(bp); + bp = bp->next; + } + return len; +} + +/* + * return count of space in blocks + */ +int +blockalloclen(Block *bp) +{ + int len; + + len = 0; + while(bp) { + len += BALLOC(bp); + bp = bp->next; + } + return len; +} + +/* + * copy the string of blocks into + * a single block and free the string + */ +Block* +concatblock(Block *bp) +{ + int len; + Block *nb, *f; + + if(bp->next == 0) + return bp; + + nb = allocb(blocklen(bp)); + for(f = bp; f; f = f->next) { + len = BLEN(f); + memmove(nb->wp, f->rp, len); + nb->wp += len; + } + concatblockcnt += BLEN(nb); + freeblist(bp); + QDEBUG checkb(nb, "concatblock 1"); + return nb; +} + +/* + * make sure the first block has at least n bytes + */ +Block* +pullupblock(Block *bp, int n) +{ + int i; + Block *nbp; + + /* + * this should almost always be true, it's + * just to avoid every caller checking. + */ + if(BLEN(bp) >= n) + return bp; + + /* + * if not enough room in the first block, + * add another to the front of the list. + */ + if(bp->lim - bp->rp < n){ + nbp = allocb(n); + nbp->next = bp; + bp = nbp; + } + + /* + * copy bytes from the trailing blocks into the first + */ + n -= BLEN(bp); + while(nbp = bp->next){ + i = BLEN(nbp); + if(i > n) { + memmove(bp->wp, nbp->rp, n); + pullupblockcnt++; + bp->wp += n; + nbp->rp += n; + QDEBUG checkb(bp, "pullupblock 1"); + return bp; + } else { + /* shouldn't happen but why crash if it does */ + if(i < 0){ + print("pullupblock -ve length, from %#p\n", + getcallerpc(&bp)); + i = 0; + } + memmove(bp->wp, nbp->rp, i); + pullupblockcnt++; + bp->wp += i; + bp->next = nbp->next; + nbp->next = 0; + freeb(nbp); + n -= i; + if(n == 0){ + QDEBUG checkb(bp, "pullupblock 2"); + return bp; + } + } + } + freeb(bp); + return 0; +} + +/* + * make sure the first block has at least n bytes + */ +Block* +pullupqueue(Queue *q, int n) +{ + Block *b; + + if(BLEN(q->bfirst) >= n) + return q->bfirst; + q->bfirst = pullupblock(q->bfirst, n); + for(b = q->bfirst; b != nil && b->next != nil; b = b->next) + ; + q->blast = b; + return q->bfirst; +} + +/* + * trim to len bytes starting at offset + */ +Block * +trimblock(Block *bp, int offset, int len) +{ + long l; + Block *nb, *startb; + + QDEBUG checkb(bp, "trimblock 1"); + if(blocklen(bp) < offset+len) { + freeblist(bp); + return nil; + } + + while((l = BLEN(bp)) < offset) { + offset -= l; + nb = bp->next; + bp->next = nil; + freeb(bp); + bp = nb; + } + + startb = bp; + bp->rp += offset; + + while((l = BLEN(bp)) < len) { + len -= l; + bp = bp->next; + } + + bp->wp -= (BLEN(bp) - len); + + if(bp->next) { + freeblist(bp->next); + bp->next = nil; + } + + return startb; +} + +/* + * copy 'count' bytes into a new block + */ +Block* +copyblock(Block *bp, int count) +{ + int l; + Block *nbp; + + QDEBUG checkb(bp, "copyblock 0"); + if(bp->flag & BINTR){ + nbp = iallocb(count); + if(nbp == nil) + return nil; + }else + nbp = allocb(count); + for(; count > 0 && bp != 0; bp = bp->next){ + l = BLEN(bp); + if(l > count) + l = count; + memmove(nbp->wp, bp->rp, l); + nbp->wp += l; + count -= l; + } + if(count > 0){ + memset(nbp->wp, 0, count); + nbp->wp += count; + } + copyblockcnt++; + QDEBUG checkb(nbp, "copyblock 1"); + + return nbp; +} + +Block* +adjustblock(Block* bp, int len) +{ + int n; + Block *nbp; + + if(len < 0){ + freeb(bp); + return nil; + } + + if(bp->rp+len > bp->lim){ + nbp = copyblock(bp, len); + freeblist(bp); + QDEBUG checkb(nbp, "adjustblock 1"); + + return nbp; + } + + n = BLEN(bp); + if(len > n) + memset(bp->wp, 0, len-n); + bp->wp = bp->rp+len; + QDEBUG checkb(bp, "adjustblock 2"); + + return bp; +} + + +/* + * throw away up to count bytes from a + * list of blocks. Return count of bytes + * thrown away. + */ +int +pullblock(Block **bph, int count) +{ + Block *bp; + int n, bytes; + + bytes = 0; + if(bph == nil) + return 0; + + while(*bph != nil && count != 0) { + bp = *bph; + n = BLEN(bp); + if(count < n) + n = count; + bytes += n; + count -= n; + bp->rp += n; + QDEBUG checkb(bp, "pullblock "); + if(BLEN(bp) == 0) { + *bph = bp->next; + bp->next = nil; + freeb(bp); + } + } + return bytes; +} + +/* + * get next block from a queue, return null if nothing there + */ +Block* +qget(Queue *q) +{ + int dowakeup; + Block *b; + + /* sync with qwrite */ + ilock(q); + + b = q->bfirst; + if(b == nil){ + q->state |= Qstarve; + iunlock(q); + return nil; + } + q->bfirst = b->next; + b->next = 0; + q->len -= BALLOC(b); + q->dlen -= BLEN(b); + QDEBUG checkb(b, "qget"); + + /* if writer flow controlled, restart */ + if((q->state & Qflow) && q->len < q->limit/2){ + q->state &= ~Qflow; + dowakeup = 1; + } else + dowakeup = 0; + + iunlock(q); + + if(dowakeup) + wakeup(&q->wr); + + return b; +} + +/* + * throw away the next 'len' bytes in the queue + */ +int +qdiscard(Queue *q, int len) +{ + Block *b; + int dowakeup, n, sofar; + + ilock(q); + for(sofar = 0; sofar < len; sofar += n){ + b = q->bfirst; + if(b == nil) + break; + QDEBUG checkb(b, "qdiscard"); + n = BLEN(b); + if(n <= len - sofar){ + q->bfirst = b->next; + b->next = 0; + q->len -= BALLOC(b); + q->dlen -= BLEN(b); + freeb(b); + } else { + n = len - sofar; + b->rp += n; + q->dlen -= n; + } + } + + /* + * if writer flow controlled, restart + * + * This used to be + * q->len < q->limit/2 + * but it slows down tcp too much for certain write sizes. + * I really don't understand it completely. It may be + * due to the queue draining so fast that the transmission + * stalls waiting for the app to produce more data. - presotto + * + * changed back from q->len < q->limit for reno tcp. - jmk + */ + if((q->state & Qflow) && q->len < q->limit/2){ + q->state &= ~Qflow; + dowakeup = 1; + } else + dowakeup = 0; + + iunlock(q); + + if(dowakeup) + wakeup(&q->wr); + + return sofar; +} + +/* + * Interrupt level copy out of a queue, return # bytes copied. + */ +int +qconsume(Queue *q, void *vp, int len) +{ + Block *b; + int n, dowakeup; + uchar *p = vp; + Block *tofree = nil; + + /* sync with qwrite */ + ilock(q); + + for(;;) { + b = q->bfirst; + if(b == 0){ + q->state |= Qstarve; + iunlock(q); + return -1; + } + QDEBUG checkb(b, "qconsume 1"); + + n = BLEN(b); + if(n > 0) + break; + q->bfirst = b->next; + q->len -= BALLOC(b); + + /* remember to free this */ + b->next = tofree; + tofree = b; + }; + + if(n < len) + len = n; + memmove(p, b->rp, len); + consumecnt += n; + b->rp += len; + q->dlen -= len; + + /* discard the block if we're done with it */ + if((q->state & Qmsg) || len == n){ + q->bfirst = b->next; + b->next = 0; + q->len -= BALLOC(b); + q->dlen -= BLEN(b); + + /* remember to free this */ + b->next = tofree; + tofree = b; + } + + /* if writer flow controlled, restart */ + if((q->state & Qflow) && q->len < q->limit/2){ + q->state &= ~Qflow; + dowakeup = 1; + } else + dowakeup = 0; + + iunlock(q); + + if(dowakeup) + wakeup(&q->wr); + + if(tofree != nil) + freeblist(tofree); + + return len; +} + +int +qpass(Queue *q, Block *b) +{ + int dlen, len, dowakeup; + + /* sync with qread */ + dowakeup = 0; + ilock(q); + if(q->len >= q->limit){ + iunlock(q); + freeblist(b); + return -1; + } + if(q->state & Qclosed){ + len = BALLOC(b); + iunlock(q); + freeblist(b); + return len; + } + + /* add buffer to queue */ + if(q->bfirst) + q->blast->next = b; + else + q->bfirst = b; + len = BALLOC(b); + dlen = BLEN(b); + QDEBUG checkb(b, "qpass"); + while(b->next){ + b = b->next; + QDEBUG checkb(b, "qpass"); + len += BALLOC(b); + dlen += BLEN(b); + } + q->blast = b; + q->len += len; + q->dlen += dlen; + + if(q->len >= q->limit/2) + q->state |= Qflow; + + if(q->state & Qstarve){ + q->state &= ~Qstarve; + dowakeup = 1; + } + iunlock(q); + + if(dowakeup) + wakeup(&q->rr); + + return len; +} + +int +qpassnolim(Queue *q, Block *b) +{ + int dlen, len, dowakeup; + + /* sync with qread */ + dowakeup = 0; + ilock(q); + + if(q->state & Qclosed){ + len = BALLOC(b); + iunlock(q); + freeblist(b); + return len; + } + + /* add buffer to queue */ + if(q->bfirst) + q->blast->next = b; + else + q->bfirst = b; + len = BALLOC(b); + dlen = BLEN(b); + QDEBUG checkb(b, "qpass"); + while(b->next){ + b = b->next; + QDEBUG checkb(b, "qpass"); + len += BALLOC(b); + dlen += BLEN(b); + } + q->blast = b; + q->len += len; + q->dlen += dlen; + + if(q->len >= q->limit/2) + q->state |= Qflow; + + if(q->state & Qstarve){ + q->state &= ~Qstarve; + dowakeup = 1; + } + iunlock(q); + + if(dowakeup) + wakeup(&q->rr); + + return len; +} + +/* + * if the allocated space is way out of line with the used + * space, reallocate to a smaller block + */ +Block* +packblock(Block *bp) +{ + Block **l, *nbp; + int n; + + for(l = &bp; *l; l = &(*l)->next){ + nbp = *l; + n = BLEN(nbp); + if((n<<2) < BALLOC(nbp)){ + *l = allocb(n); + memmove((*l)->wp, nbp->rp, n); + (*l)->wp += n; + (*l)->next = nbp->next; + freeb(nbp); + } + } + + return bp; +} + +int +qproduce(Queue *q, void *vp, int len) +{ + Block *b; + int dowakeup; + uchar *p = vp; + + /* sync with qread */ + dowakeup = 0; + ilock(q); + + /* no waiting receivers, room in buffer? */ + if(q->len >= q->limit){ + q->state |= Qflow; + iunlock(q); + return -1; + } + + /* save in buffer */ + b = iallocb(len); + if(b == 0){ + iunlock(q); + return 0; + } + memmove(b->wp, p, len); + producecnt += len; + b->wp += len; + if(q->bfirst) + q->blast->next = b; + else + q->bfirst = b; + q->blast = b; + /* b->next = 0; done by iallocb() */ + q->len += BALLOC(b); + q->dlen += BLEN(b); + QDEBUG checkb(b, "qproduce"); + + if(q->state & Qstarve){ + q->state &= ~Qstarve; + dowakeup = 1; + } + + if(q->len >= q->limit) + q->state |= Qflow; + iunlock(q); + + if(dowakeup) + wakeup(&q->rr); + + return len; +} + +/* + * copy from offset in the queue + */ +Block* +qcopy(Queue *q, int len, ulong offset) +{ + int sofar; + int n; + Block *b, *nb; + uchar *p; + + nb = allocb(len); + + ilock(q); + + /* go to offset */ + b = q->bfirst; + for(sofar = 0; ; sofar += n){ + if(b == nil){ + iunlock(q); + return nb; + } + n = BLEN(b); + if(sofar + n > offset){ + p = b->rp + offset - sofar; + n -= offset - sofar; + break; + } + QDEBUG checkb(b, "qcopy"); + b = b->next; + } + + /* copy bytes from there */ + for(sofar = 0; sofar < len;){ + if(n > len - sofar) + n = len - sofar; + memmove(nb->wp, p, n); + qcopycnt += n; + sofar += n; + nb->wp += n; + b = b->next; + if(b == nil) + break; + n = BLEN(b); + p = b->rp; + } + iunlock(q); + + return nb; +} + +/* + * called by non-interrupt code + */ +Queue* +qopen(int limit, int msg, void (*kick)(void*), void *arg) +{ + Queue *q; + + q = malloc(sizeof(Queue)); + if(q == 0) + return 0; + + q->limit = q->inilim = limit; + q->kick = kick; + q->arg = arg; + q->state = msg; + + q->state |= Qstarve; + q->eof = 0; + q->noblock = 0; + + return q; +} + +/* open a queue to be bypassed */ +Queue* +qbypass(void (*bypass)(void*, Block*), void *arg) +{ + Queue *q; + + q = malloc(sizeof(Queue)); + if(q == 0) + return 0; + + q->limit = 0; + q->arg = arg; + q->bypass = bypass; + q->state = 0; + + return q; +} + +static int +notempty(void *a) +{ + Queue *q = a; + + return (q->state & Qclosed) || q->bfirst != 0; +} + +/* + * wait for the queue to be non-empty or closed. + * called with q ilocked. + */ +static int +qwait(Queue *q) +{ + /* wait for data */ + for(;;){ + if(q->bfirst != nil) + break; + + if(q->state & Qclosed){ + if(++q->eof > 3) + return -1; + if(*q->err && strcmp(q->err, Ehungup) != 0) + return -1; + return 0; + } + + q->state |= Qstarve; /* flag requesting producer to wake me */ + iunlock(q); + sleep(&q->rr, notempty, q); + ilock(q); + } + return 1; +} + +/* + * wait for the queue to be non-empty or closed + */ +void +qsleep(Queue *q) +{ + qlock(&q->rlock); + if(waserror()){ + qunlock(&q->rlock); + nexterror(); + } + ilock(q); + if(q->state & Qclosed){ + iunlock(q); + error(q->err); + } + if(q->bfirst){ + iunlock(q); + goto done; + } + q->state |= Qstarve; + iunlock(q); + sleep(&q->rr, notempty, q); +done: + poperror(); + qunlock(&q->rlock); +} + +/* + * add a block list to a queue + */ +void +qaddlist(Queue *q, Block *b) +{ + /* queue the block */ + if(q->bfirst) + q->blast->next = b; + else + q->bfirst = b; + q->len += blockalloclen(b); + q->dlen += blocklen(b); + while(b->next) + b = b->next; + q->blast = b; +} + +/* + * called with q ilocked + */ +Block* +qremove(Queue *q) +{ + Block *b; + + b = q->bfirst; + if(b == nil) + return nil; + q->bfirst = b->next; + b->next = nil; + q->dlen -= BLEN(b); + q->len -= BALLOC(b); + QDEBUG checkb(b, "qremove"); + return b; +} + +/* + * copy the contents of a string of blocks into + * memory. emptied blocks are freed. return + * pointer to first unconsumed block. + */ +Block* +bl2mem(uchar *p, Block *b, int n) +{ + int i; + Block *next; + + for(; b != nil; b = next){ + i = BLEN(b); + if(i > n){ + memmove(p, b->rp, n); + b->rp += n; + return b; + } + memmove(p, b->rp, i); + n -= i; + p += i; + b->rp += i; + next = b->next; + freeb(b); + } + return nil; +} + +/* + * copy the contents of memory into a string of blocks. + * return nil on error. + */ +Block* +mem2bl(uchar *p, int len) +{ + int n; + Block *b, *first, **l; + + first = nil; + l = &first; + if(waserror()){ + freeblist(first); + nexterror(); + } + do { + n = len; + if(n > Maxatomic) + n = Maxatomic; + + *l = b = allocb(n); + setmalloctag(b->base, getcallerpc(&p)); + memmove(b->wp, p, n); + b->wp += n; + p += n; + len -= n; + l = &b->next; + } while(len > 0); + poperror(); + + return first; +} + +/* + * put a block back to the front of the queue + * called with q ilocked + */ +void +qputback(Queue *q, Block *b) +{ + b->next = q->bfirst; + if(q->bfirst == nil) + q->blast = b; + q->bfirst = b; + q->len += BALLOC(b); + q->dlen += BLEN(b); +} + +/* + * flow control, get producer going again + * called with q ilocked + */ +static void +qwakeup_iunlock(Queue *q) +{ + int dowakeup; + + /* if writer flow controlled, restart */ + if((q->state & Qflow) && q->len < q->limit/2){ + q->state &= ~Qflow; + dowakeup = 1; + } + else + dowakeup = 0; + + iunlock(q); + + /* wakeup flow controlled writers */ + if(dowakeup){ + if(q->kick) + q->kick(q->arg); + wakeup(&q->wr); + } +} + +/* + * get next block from a queue (up to a limit) + */ +Block* +qbread(Queue *q, int len) +{ + Block *b, *nb; + int n; + + qlock(&q->rlock); + if(waserror()){ + qunlock(&q->rlock); + nexterror(); + } + + ilock(q); + switch(qwait(q)){ + case 0: + /* queue closed */ + iunlock(q); + qunlock(&q->rlock); + poperror(); + return nil; + case -1: + /* multiple reads on a closed queue */ + iunlock(q); + error(q->err); + } + + /* if we get here, there's at least one block in the queue */ + b = qremove(q); + n = BLEN(b); + + /* split block if it's too big and this is not a message queue */ + nb = b; + if(n > len){ + if((q->state&Qmsg) == 0){ + n -= len; + b = allocb(n); + memmove(b->wp, nb->rp+len, n); + b->wp += n; + qputback(q, b); + } + nb->wp = nb->rp + len; + } + + /* restart producer */ + qwakeup_iunlock(q); + + poperror(); + qunlock(&q->rlock); + return nb; +} + +/* + * read a queue. if no data is queued, post a Block + * and wait on its Rendez. + */ +long +qread(Queue *q, void *vp, int len) +{ + Block *b, *first, **l; + int blen, n; + + qlock(&q->rlock); + if(waserror()){ + qunlock(&q->rlock); + nexterror(); + } + + ilock(q); +again: + switch(qwait(q)){ + case 0: + /* queue closed */ + iunlock(q); + qunlock(&q->rlock); + poperror(); + return 0; + case -1: + /* multiple reads on a closed queue */ + iunlock(q); + error(q->err); + } + + /* if we get here, there's at least one block in the queue */ + if(q->state & Qcoalesce){ + /* when coalescing, 0 length blocks just go away */ + b = q->bfirst; + if(BLEN(b) <= 0){ + freeb(qremove(q)); + goto again; + } + + /* grab the first block plus as many + * following blocks as will completely + * fit in the read. + */ + n = 0; + l = &first; + blen = BLEN(b); + for(;;) { + *l = qremove(q); + l = &b->next; + n += blen; + + b = q->bfirst; + if(b == nil) + break; + blen = BLEN(b); + if(n+blen > len) + break; + } + } else { + first = qremove(q); + n = BLEN(first); + } + + /* copy to user space outside of the ilock */ + iunlock(q); + b = bl2mem(vp, first, len); + ilock(q); + + /* take care of any left over partial block */ + if(b != nil){ + n -= BLEN(b); + if(q->state & Qmsg) + freeb(b); + else + qputback(q, b); + } + + /* restart producer */ + qwakeup_iunlock(q); + + poperror(); + qunlock(&q->rlock); + return n; +} + +static int +qnotfull(void *a) +{ + Queue *q = a; + + return q->len < q->limit || (q->state & Qclosed); +} + +ulong noblockcnt; + +/* + * add a block to a queue obeying flow control + */ +long +qbwrite(Queue *q, Block *b) +{ + int n, dowakeup; + + n = BLEN(b); + + if(q->bypass){ + (*q->bypass)(q->arg, b); + return n; + } + + dowakeup = 0; + qlock(&q->wlock); + if(waserror()){ + if(b != nil) + freeb(b); + qunlock(&q->wlock); + nexterror(); + } + + ilock(q); + + /* give up if the queue is closed */ + if(q->state & Qclosed){ + iunlock(q); + error(q->err); + } + + /* if nonblocking, don't queue over the limit */ + if(q->len >= q->limit){ + if(q->noblock){ + iunlock(q); + freeb(b); + noblockcnt += n; + qunlock(&q->wlock); + poperror(); + return n; + } + } + + /* queue the block */ + if(q->bfirst) + q->blast->next = b; + else + q->bfirst = b; + q->blast = b; + b->next = 0; + q->len += BALLOC(b); + q->dlen += n; + QDEBUG checkb(b, "qbwrite"); + b = nil; + + /* make sure other end gets awakened */ + if(q->state & Qstarve){ + q->state &= ~Qstarve; + dowakeup = 1; + } + iunlock(q); + + /* get output going again */ + if(q->kick && (dowakeup || (q->state&Qkick))) + q->kick(q->arg); + + /* wakeup anyone consuming at the other end */ + if(dowakeup) + wakeup(&q->rr); + + /* + * flow control, wait for queue to get below the limit + * before allowing the process to continue and queue + * more. We do this here so that postnote can only + * interrupt us after the data has been queued. This + * means that things like 9p flushes and ssl messages + * will not be disrupted by software interrupts. + * + * Note - this is moderately dangerous since a process + * that keeps getting interrupted and rewriting will + * queue infinite crud. + */ + for(;;){ + if(q->noblock || qnotfull(q)) + break; + + ilock(q); + q->state |= Qflow; + iunlock(q); + sleep(&q->wr, qnotfull, q); + } + USED(b); + + qunlock(&q->wlock); + poperror(); + return n; +} + +/* + * write to a queue. only Maxatomic bytes at a time is atomic. + */ +int +qwrite(Queue *q, void *vp, int len) +{ + int n, sofar; + Block *b; + uchar *p = vp; + + QDEBUG if(!islo()) + print("qwrite hi %#p\n", getcallerpc(&q)); + + sofar = 0; + do { + n = len-sofar; + if(n > Maxatomic) + n = Maxatomic; + + b = allocb(n); + setmalloctag(b->base, getcallerpc(&q)); + if(waserror()){ + freeb(b); + nexterror(); + } + memmove(b->wp, p+sofar, n); + poperror(); + b->wp += n; + + qbwrite(q, b); + + sofar += n; + } while(sofar < len && (q->state & Qmsg) == 0); + + return len; +} + +/* + * used by print() to write to a queue. Since we may be splhi or not in + * a process, don't qlock. + * + * this routine merges adjacent blocks if block n+1 will fit into + * the free space of block n. + */ +int +qiwrite(Queue *q, void *vp, int len) +{ + int n, sofar, dowakeup; + Block *b; + uchar *p = vp; + + dowakeup = 0; + + sofar = 0; + do { + n = len-sofar; + if(n > Maxatomic) + n = Maxatomic; + + b = iallocb(n); + if(b == nil) + break; + memmove(b->wp, p+sofar, n); + b->wp += n; + + ilock(q); + + /* we use an artificially high limit for kernel prints since anything + * over the limit gets dropped + */ + if(q->dlen >= 16*1024){ + iunlock(q); + freeb(b); + break; + } + + QDEBUG checkb(b, "qiwrite"); + if(q->bfirst) + q->blast->next = b; + else + q->bfirst = b; + q->blast = b; + q->len += BALLOC(b); + q->dlen += n; + + if(q->state & Qstarve){ + q->state &= ~Qstarve; + dowakeup = 1; + } + + iunlock(q); + + if(dowakeup){ + if(q->kick) + q->kick(q->arg); + wakeup(&q->rr); + } + + sofar += n; + } while(sofar < len && (q->state & Qmsg) == 0); + + return sofar; +} + +/* + * be extremely careful when calling this, + * as there is no reference accounting + */ +void +qfree(Queue *q) +{ + qclose(q); + free(q); +} + +/* + * Mark a queue as closed. No further IO is permitted. + * All blocks are released. + */ +void +qclose(Queue *q) +{ + Block *bfirst; + + if(q == nil) + return; + + /* mark it */ + ilock(q); + q->state |= Qclosed; + q->state &= ~(Qflow|Qstarve); + strcpy(q->err, Ehungup); + bfirst = q->bfirst; + q->bfirst = 0; + q->len = 0; + q->dlen = 0; + q->noblock = 0; + iunlock(q); + + /* free queued blocks */ + freeblist(bfirst); + + /* wake up readers/writers */ + wakeup(&q->rr); + wakeup(&q->wr); +} + +/* + * Mark a queue as closed. Wakeup any readers. Don't remove queued + * blocks. + */ +void +qhangup(Queue *q, char *msg) +{ + /* mark it */ + ilock(q); + q->state |= Qclosed; + if(msg == 0 || *msg == 0) + strcpy(q->err, Ehungup); + else + strncpy(q->err, msg, ERRMAX-1); + iunlock(q); + + /* wake up readers/writers */ + wakeup(&q->rr); + wakeup(&q->wr); +} + +/* + * return non-zero if the q is hungup + */ +int +qisclosed(Queue *q) +{ + return q->state & Qclosed; +} + +/* + * mark a queue as no longer hung up + */ +void +qreopen(Queue *q) +{ + ilock(q); + q->state &= ~Qclosed; + q->state |= Qstarve; + q->eof = 0; + q->limit = q->inilim; + iunlock(q); +} + +/* + * return bytes queued + */ +int +qlen(Queue *q) +{ + return q->dlen; +} + +/* + * return space remaining before flow control + */ +int +qwindow(Queue *q) +{ + int l; + + l = q->limit - q->len; + if(l < 0) + l = 0; + return l; +} + +/* + * return true if we can read without blocking + */ +int +qcanread(Queue *q) +{ + return q->bfirst!=0; +} + +/* + * change queue limit + */ +void +qsetlimit(Queue *q, int limit) +{ + q->limit = limit; +} + +/* + * set blocking/nonblocking + */ +void +qnoblock(Queue *q, int onoff) +{ + q->noblock = onoff; +} + +/* + * flush the output queue + */ +void +qflush(Queue *q) +{ + Block *bfirst; + + /* mark it */ + ilock(q); + bfirst = q->bfirst; + q->bfirst = 0; + q->len = 0; + q->dlen = 0; + iunlock(q); + + /* free queued blocks */ + freeblist(bfirst); + + /* wake up readers/writers */ + wakeup(&q->wr); +} + +int +qfull(Queue *q) +{ + return q->state & Qflow; +} + +int +qstate(Queue *q) +{ + return q->state; +} + +/* + * set a bypass function after a queue has been created + */ +void +qsetbypass(Queue *q, void (*bypass)(void*, Block*), void *arg) +{ + ilock(q); + q->bypass = bypass; + q->arg = arg; + q->limit = 0; + iunlock(q); + + qflush(q); +} + +int +qblen(Queue *q) +{ + int n; + Block *b; + + n = 0; + ilock(q); + for(b = q->bfirst; b != nil; b = b->next) + n++; + iunlock(q); + return n; +} diff -Nru /sys/src/9k/port/qlock.c /sys/src/9k/port/qlock.c --- /sys/src/9k/port/qlock.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/qlock.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,279 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include + +struct { + ulong rlock; + ulong rlockq; + ulong wlock; + ulong wlockq; + ulong qlock; + ulong qlockq; +} rwstats; + +void +qlock(QLock *q) +{ + Proc *p; + void (*pt)(Proc*, int, vlong, vlong); + + if(m->ilockdepth != 0) + print("qlock: %#p: ilockdepth %d", getcallerpc(&q), m->ilockdepth); + if(up != nil && up->nlocks) + print("qlock: %#p: nlocks %d", getcallerpc(&q), up->nlocks); + + lock(&q->use); + rwstats.qlock++; + if(!q->locked) { + q->locked = 1; + q->qpc = getcallerpc(&q); + unlock(&q->use); + return; + } + if(up == nil) + panic("qlock"); + rwstats.qlockq++; + p = q->tail; + if(p == 0) + q->head = up; + else + p->qnext = up; + q->tail = up; + up->qnext = 0; + up->state = Queueing; + up->qpc = getcallerpc(&q); + if(up->trace && (pt = proctrace) != nil) + pt(up, SSleep, 0, Queueing | (up->qpc<<8)); + unlock(&q->use); + sched(); +} + +int +canqlock(QLock *q) +{ + if(!canlock(&q->use)) + return 0; + if(q->locked){ + unlock(&q->use); + return 0; + } + q->locked = 1; + q->qpc = getcallerpc(&q); + unlock(&q->use); + + return 1; +} + +void +qunlock(QLock *q) +{ + Proc *p; + + lock(&q->use); + if (q->locked == 0) + print("qunlock called with qlock not held, from %#p\n", + getcallerpc(&q)); + p = q->head; + if(p){ + q->head = p->qnext; + if(q->head == 0) + q->tail = 0; + unlock(&q->use); + ready(p); + return; + } + q->locked = 0; + q->qpc = 0; + unlock(&q->use); +} + +void +priqlock(QLock *q) +{ + Proc *p; + void (*pt)(Proc*, int, vlong, vlong); + + if(m->ilockdepth != 0) + print("qlock: %#p: ilockdepth %d\n", getcallerpc(&q), m->ilockdepth); + if(up != nil && up->nlocks) + print("qlock: %#p: nlocks %d\n", getcallerpc(&q), up->nlocks); + + lock(&q->use); + if(!q->locked) { + //q->p = up; + q->locked = 1; + q->qpc = getcallerpc(&q); + unlock(&q->use); + return; + } + if(up == nil) + panic("priqlock"); +// if(q->p == up) +// panic("qlock deadlock. pid=%ld cpc=%lux qpc=%lux\n", up->pid, getcallerpc(&q), up->qpc); + p = up->qnext = q->head; + if(p == nil) + q->tail = up; + q->head = up; + up->state = Queueing; + up->qpc = getcallerpc(&q); + if(up->trace && (pt = proctrace) != nil) + pt(up, SSleep, 0, Queueing | (up->qpc<<8)); +// if(kproflock) +// kproflock(up->qpc); + unlock(&q->use); + sched(); +} + +void +rlock(RWlock *q) +{ + Proc *p; + void (*pt)(Proc*, int, vlong, vlong); + uintptr pc; + + lock(&q->use); + rwstats.rlock++; + if(q->writer == 0 && q->head == nil){ + /* no writer, go for it */ + q->readers++; + unlock(&q->use); + return; + } + + rwstats.rlockq++; + p = q->tail; + if(up == nil) + panic("rlock"); + if(p == 0) + q->head = up; + else + p->qnext = up; + q->tail = up; + up->qnext = 0; + up->state = QueueingR; + if(up->trace && (pt = proctrace) != nil){ + pc = getcallerpc(&q); + pt(up, SSleep, 0, QueueingR | (pc<<8)); + } + unlock(&q->use); + sched(); +} + +void +runlock(RWlock *q) +{ + Proc *p; + + lock(&q->use); + p = q->head; + if(--(q->readers) > 0 || p == nil){ + unlock(&q->use); + return; + } + + /* start waiting writer */ + if(p->state != QueueingW) + panic("runlock"); + q->head = p->qnext; + if(q->head == 0) + q->tail = 0; + q->writer = 1; + unlock(&q->use); + ready(p); +} + +void +wlock(RWlock *q) +{ + Proc *p; + uintptr pc; + void (*pt)(Proc*, int, vlong, vlong); + + lock(&q->use); + rwstats.wlock++; + if(q->readers == 0 && q->writer == 0){ + /* noone waiting, go for it */ + q->wpc = getcallerpc(&q); + q->wproc = up; + q->writer = 1; + unlock(&q->use); + return; + } + + /* wait */ + rwstats.wlockq++; + p = q->tail; + if(up == nil) + panic("wlock"); + if(p == nil) + q->head = up; + else + p->qnext = up; + q->tail = up; + up->qnext = 0; + up->state = QueueingW; + if(up->trace && (pt = proctrace) != nil){ + pc = getcallerpc(&q); + pt(up, SSleep, 0, QueueingW|(pc<<8)); + } + unlock(&q->use); + sched(); +} + +void +wunlock(RWlock *q) +{ + Proc *p; + + lock(&q->use); + p = q->head; + if(p == nil){ + q->writer = 0; + unlock(&q->use); + return; + } + if(p->state == QueueingW){ + /* start waiting writer */ + q->head = p->qnext; + if(q->head == nil) + q->tail = nil; + unlock(&q->use); + ready(p); + return; + } + + if(p->state != QueueingR) + panic("wunlock"); + + /* waken waiting readers */ + while(q->head != nil && q->head->state == QueueingR){ + p = q->head; + q->head = p->qnext; + q->readers++; + ready(p); + } + if(q->head == nil) + q->tail = nil; + q->writer = 0; + unlock(&q->use); +} + +/* same as rlock but punts if there are any writers waiting */ +int +canrlock(RWlock *q) +{ + lock(&q->use); + rwstats.rlock++; + if(q->writer == 0 && q->head == nil){ + /* no writer, go for it */ + q->readers++; + unlock(&q->use); + return 1; + } + unlock(&q->use); + return 0; +} diff -Nru /sys/src/9k/port/qmalloc.c /sys/src/9k/port/qmalloc.c --- /sys/src/9k/port/qmalloc.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/qmalloc.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1117 @@ +/* + * malloc + * + * C B Weinstock and W A Wulf, "Quick Fit: An Efficient Storage Algorithm for Heap Storage Allocation", + * SIGPLAN Notices, 23(10), 144-148 (1988). + * A Iyengar, "Scalability of Dynamic Storage Allocation Algorithms", IEEE Proceedings Frontiers of + * Massively Parallel Computing, 223-232 (1996). + * Yi Feng and Emery D Berger, "A Locality-Improving Dynamic Memory Allocator", + * ACM Proceedings of the 2005 workshop on Memory system performance, 68-77 (2005). + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +typedef union Header Header; +typedef struct Qlist Qlist; +typedef struct Region Region; +typedef struct BigAlloc BigAlloc; + +enum{ + Bpw= 8*sizeof(uint), + Busy= (uint)1<<(Bpw-1), /* set in Header.s.size of allocated blocks */ + Poisoning= 1, /* or DBGFLG */ /* DEBUG: poison block */ +}; + +/* + * Memory is allocated in units of Header, + * which also provides the basic alignment. + * Every usable block is at least 2 units: the header + * and at least one unit of data. The block heading + * the misc list has size 0 and will never be allocated. + * + * Yi Feng and Berger eliminate the object headers for + * quick fit items, since they are never coalesced, + * but we retain headers to track ownership. + */ +union Header +{ + struct { + uint size; /* size in units including Header, and Busy flag */ + uint tag; /* only for debugging: set malloctag */ + Header* next; /* free list */ + } s; + uchar _align[8]; +}; + +enum{ + Unitsz= sizeof(Header), /* must be sizeof(Header) */ + MinUnits= 2, + Align= Unitsz, /* minimum alignment */ +}; + +/* application pointers to and from headers */ +#define D2H(p) ((Header*)(p)-1) +#define H2D(h) ((void*)((h)+1)) + +#define XX(P,N) iprint("[%d]: %#p -> %#p\n", (N), (P), (P)->s.next) +#undef XX +#define XX(P,N) + +struct Qlist +{ + Lock lk; + Header* first; + + uint nalloc; +}; + +struct Region +{ + Lock lk; + Region* down; /* run as a stack */ + Header* start; + Header* tail; /* next available, if any */ + Header* end; + uint size; /* remaining space, in Unitsz */ + char* base; + char* limit; +}; + +/* + * big mallocs out of basealloc + */ +enum +{ + BHashSize= 67, /* prime */ + NBig= 512, + BigThreshold= 8*1024*1024, +}; + +struct BigAlloc +{ + uintptr va; + uintptr nbytes; + uintptr tag; + BigAlloc* next; +}; + +static struct { + Lock; + BigAlloc* free; + BigAlloc pool[NBig]; /* could qalloc them */ + + BigAlloc* set[BHashSize]; +} bighash; + +#define BIGHASH(p) ((uintptr)p%BHashSize) +#define isbigalloc(p) ((uintptr)(p)>=KSEG2 && (uintptr)(p)tail; + if(n == 0) + panic("tailalloc size"); + if(r->size < n) + panic("tailalloc"); + r->size -= n; + r->tail += n; + p->s.size = Busy | n; + return p; +} + +static int +freetail(Header *h) +{ + if(h+h->s.size == tail->tail) { /* worthwhile locking? */ + MLOCK; + if(h+h->s.size == tail->tail){ + /* block before tail */ + tail->tail = h; + tail->size += h->s.size; + qstats[QSfreetail]++; + MUNLOCK; + return 1; + } + MUNLOCK; + } + return 0; +} + +static void +freequick(Header *t) +{ + Qlist *ql; + + ql = &quicklist[t->s.size]; + ilock(&ql->lk); + t->s.next = ql->first; + ql->first = t; + iunlock(&ql->lk); + qstats[QSfreequick]++; +} + +int +malloclocked(void) +{ + if(canlock(&mainlock)){ + unlock(&mainlock); + return 0; + } + return 1; +} + +static void +showchain(Header *p) +{ + Header *q; + + iprint("chain %#p:\n", p); + q = p; + do{ + iprint("%#p [%#ux %#ux] -> %#p\n", q, q->s.size, q->s.tag, q->s.next); + }while((q = q->s.next) != nil && q->s.next != p); +} + +static void +badchain(char *tag, char *why, Header *p, Header *q, Header *r, void *h, Header *split) +{ + showchain(&misclist); + if(split != nil) + panic("%s: %s: %#p %#ux %#ux -> %#p %#ux %#ux [%#p -| %#p]%s %#p %#ux %#ux\n", tag, why, + q, q->s.size, q->s.tag, + r, r->s.size, r->s.tag, + p, h, " split", split, split->s.size, split->s.tag); + else + panic("%s: %s: %#p %#ux %#ux -> %#p %#ux %#ux [%#p -| %#p]\n", tag, why, + q, q->s.size, q->s.tag, + r, r->s.size, r->s.tag, + p, h); +} + +static Header* +contained(Header *b, Header *chain) +{ + Header *q; + int i; + + i = 0; + q = chain; + do{ + if(q->s.next == nil) + panic("nil contained"); + if(q->s.next == b) + return q; + }while((q = q->s.next) != chain && ++i < 100000); + if(i >= 100000) + print("long check chain\n"); + return 0; +} + +static void +checkchain(Header *p, char *tag, void *h, Header *split) +{ + Header *q, *r; + int i; + + q = p; + i = 0; + do{ + if(q->s.next == nil) + badchain(tag, "nil next", p, q, q, h, split); + if((r = q->s.next)->s.size & Busy) + badchain(tag, "busy", p, q, r, h, split); + if(split && r == h) + badchain(tag, "mentioned", p, q, r, h, split); + }while((q = q->s.next) != p && ++i < 100000); + if(i >= 100000) + print("long chain\n"); +} + +static void* +qallocalign(usize nbytes, uintptr align, long offset, usize span, uintptr pc) +{ + Qlist *qlist; + uintptr aligned; + Header **pp, *p, *q, *r; + uint n, nunits, alunits, maxunits, gap; + + if(nbytes == 0 || offset != 0 || span != 0) + return nil; + + if(!ISPOWEROF2(align)) + panic("qallocalign"); + + if(align <= Align) + return qalloc(nbytes, pc); + + qstats[QSmalign]++; + nunits = NUNITS(nbytes); + if(nunits <= NQUICK){ + /* + * Look for a conveniently aligned block + * on one of the quicklists. + */ + qlist = &quicklist[nunits]; + ilock(&qlist->lk); + for(pp = &qlist->first; (p = *pp) != nil; pp = &p->s.next){ + if(ALIGNED(p+1, align)){ + *pp = p->s.next; + p->s.size |= Busy; + qstats[QSmalignquick]++; + iunlock(&qlist->lk); + p->s.tag = pc; + return H2D(p); + } + } + iunlock(&qlist->lk); + } + + alunits = HOWMANY(align, Unitsz); + maxunits = nunits + alunits + MinUnits; + MLOCK; + if(maxunits > tail->size) { + /* hard way */ + q = rover; + do { + p = q->s.next; + aligned = ALIGNED(p+1, align); + if(aligned && p->s.size >= nunits || p->s.size >= maxunits){ + + /* + * This block is big enough + */ + qstats[QSmalignrover]++; + + /* + * Leave behind any runt in front of the alignment; + * it will be coalesced if the aligned memory is released. + */ + if(!aligned){ + r = p; + p = ALIGNHDR(p+1, align) - 1; + if(!ALIGNED(p+1, align)) + panic("qmallocalign"); + n = p - r; + p->s.size = r->s.size - n; + if(r->s.size <= 1 || n <= 1) + panic("qallocalign size"); + p->s.size = n; + p->s.next = r->s.next; + q = r; + qstats[QSmalignfront]++; + } + + q->s.next = p->s.next; + XX(q,1); + rover = q; + + /* + * Leave behind any residue after the aligned block. + */ + if(p->s.size > nunits+MinUnits){ + r = p + nunits; + r->s.size = p->s.size - nunits; + r->s.next = q->s.next; + q->s.next = r; + XX(q,2); + qstats[QSmalignback]++; + p->s.size = nunits; + } + + p->s.size |= Busy; + MUNLOCK; + + p->s.next = nil; + p->s.tag = pc; + return H2D(p); + } + } while((q = p) != rover); + + /* grow tail */ + if(!morecore(maxunits+MinUnits)){ + MUNLOCK; + return nil; + } + } + + q = tail->tail + 1; + p = ALIGNHDR(q, align); + gap = p - q; + if(gap == 0){ + p = tailalloc(tail, nunits); + if(!ALIGNED(p+1, align)) + panic("qmallocalign 2"); + qstats[QSmaligntail]++; + }else{ + if(gap < MinUnits) + gap += alunits; + if(tail->size < nunits+gap) + panic("qallocalign: miscalculation tail=%ud maxunits=%ud nunits=%ud gap=%ud", tail->size, maxunits, nunits, gap); + /* + * Save the residue before the aligned allocation + * and free it after the tail pointer has been bumped + * for the main allocation. + */ + r = tailalloc(tail, gap); + p = tailalloc(tail, nunits); + if(!ALIGNED(p+1, align)) + panic("qmallocalign 3"); + qstats[QSmalignnottail]++; + freemisc(r); /* put on misc list to allow combining if this block is freed */ + } + MUNLOCK; + + p->s.next = nil; + p->s.tag = pc; + return H2D(p); +} + +static void* +qalloc(usize nbytes, uintptr pc) +{ + Qlist *qlist; + Header *p, *q, *t; + uint nunits, u; + int split; + + if(nbytes == 0) + return nil; + + nunits = NUNITS(nbytes); + for(u = nunits; u <= NQUICK; u++){ + qlist = &quicklist[u]; + ilock(&qlist->lk); + if((p = qlist->first) != nil){ + qlist->first = p->s.next; + qlist->nalloc++; + iunlock(&qlist->lk); + p->s.next = nil; + if(p->s.size >= nunits+2*MinUnits){ /* don't make blocks pointlessly small */ + t = p; + t->s.size -= nunits; + p += t->s.size; + p->s.size = nunits; + freequick(t); + qstats[QSsplitquick]++; + } + p->s.size |= Busy; + p->s.tag = pc; + return H2D(p); + } + iunlock(&qlist->lk); + } + + MLOCK; + if(nunits > tail->size) { + /* hard way */ + q = rover; + split = 0; + checkchain(q, "qalloc-1", rover, 0); + do { + p = q->s.next; + if(p->s.size & Busy) + panic("qalloc: busy: %#p %#ux ~ %#ux\n", p, p->s.size, p->s.tag); + if(p->s.next->s.size & Busy) + panic("qalloc+: busy"); + if(p->s.next->s.next->s.size & Busy) + panic("qalloc++: busy"); + if(p->s.size >= nunits) { + if(p->s.size > nunits+MinUnits) { /* split block; return tail */ + p->s.size -= nunits; + p += p->s.size; + p->s.size = nunits; + split = 1; + if(contained(p, rover)) + badchain("qalloc-3", "contained", rover, q, q->s.next, p, q->s.next); + qstats[QSsplitmisc]++; + }else{ + q->s.next = p->s.next; + XX(q,3); + } + rover = q; + qstats[QSmallocrover]++; + if(contained(p, rover)) + badchain("qalloc-4", "contained", rover, q, p, p, nil); + checkchain(q, "qalloc-2", p, split? q->s.next: nil); + p->s.size |= Busy; + MUNLOCK; + p->s.tag = pc; + return H2D(p); + } + } while((q = p) != rover); + + /* grow tail */ + if(!morecore(nunits)){ + MUNLOCK; + return nil; + } + } + p = tailalloc(tail, nunits); + MUNLOCK; + p->s.tag = pc; + return H2D(p); +} + +static void +freemisc(Header *p) +{ + Header *q, *x; + + p->s.size &= ~Busy; + q = rover; + checkchain(q, "qfree-1", p, 0); + for(; !(p > q && p < q->s.next); q = q->s.next) + if(q >= q->s.next && (p > q || p < q->s.next)){ /* put freed block at start or end of arena */ + iprint("q==%#p %#ux %#p p==%#p %#ux\n", q, q->s.size, q->s.next, p, p->s.size); + break; + } + if((x = contained(p, &misclist)) != nil) + badchain("qfree-1a", "contained", x, q, p, p, nil); + if(p+p->s.size == q->s.next) { + p->s.size += q->s.next->s.size; + p->s.next = q->s.next->s.next; + XX(p,4); + qstats[QSfreenext]++; + }else{ + p->s.next = q->s.next; + XX(p,5); + } + if(q+q->s.size == p) { + q->s.size += p->s.size; + q->s.next = p->s.next; + XX(q,6); + qstats[QSfreeprev]++; + if(contained(p, &misclist)) + badchain("qfree-1b", "contained", rover, q, p, p, nil); + }else{ + q->s.next = p; + XX(q,7); + } + checkchain(q, "qfree-2", p, 0); + rover = q; +} + +ulong +msize(void* ap) +{ + BigAlloc *b; + Header *h; + uint nunits; + + if(ap == nil) + return 0; + + if(isbigalloc(ap)){ + b = bigfind(ap); + if(b != nil) + return b->nbytes; + } + h = D2H(ap); + nunits = h->s.size & ~Busy; + if((h->s.size & Busy) == 0 || nunits == 0) + panic("msize: corrupt allocation arena"); + + return (nunits - 1) * Unitsz; +} + +static void +mallocreadfmt(char* s, char* e) +{ + char *p; + Header *q; + int i, n; + Qlist *qlist; + Region *r; + uintmem t, u; + + p = s; + MLOCK; + t = 0; + n = 0; + u = 0; + for(r = tail; r != nil; r = r->down){ + p = seprint(p, e, "reg%d: %#p %#p %#p : %#p %#p\n", n, r, r->base, r->limit, r->start, r->tail); + t += r->limit - r->base; + u += (r->tail - r->start)*Unitsz; + n++; + } + MUNLOCK; + p = seprint(p, e, "%P kernel malloc %P used %d regions\n", t, u, n); + p = seprint(p, e, "0/0 kernel draw\n"); // keep scripts happy + + t = 0; + for(i = 0; i <= NQUICK; i++) { + n = 0; + qlist = &quicklist[i]; + ilock(&qlist->lk); + for(q = qlist->first; q != nil; q = q->s.next) + n++; + iunlock(&qlist->lk); + + if(n != 0) + p = seprint(p, e, "q%d %ud %ud %ud\n", i, n, n*i*Unitsz, qlist->nalloc); + t += n * i*Unitsz; + } + p = seprint(p, e, "quick: %P bytes total\n", t); + + MLOCK; + if((q = rover) != nil){ + i = t = 0; + do { + t += q->s.size; + i++; +// p = seprint(p, e, "m%d\t%#p\n", q->s.size, q); + } while((q = q->s.next) != rover); + + p = seprint(p, e, "rover: %d blocks %P bytes total\n", + i, t*Unitsz); + } + MUNLOCK; + + for(i = 0; i < nelem(qstats); i++) + if(qstats[i] != 0) + p = seprint(p, e, "%s: %ud\n", qstatname[i], qstats[i]); + USED(p); +} + +long +mallocreadsummary(Chan*, void *a, long n, long offset) +{ + char *alloc; + + alloc = malloc(READSTR); + if(waserror()){ + free(alloc); + nexterror(); + } + mallocreadfmt(alloc, alloc+READSTR); + n = readstr(offset, a, n, alloc); + poperror(); + free(alloc); + + return n; +} + +static void +coalesce(Region *r) +{ + Header *p, *q; + + ilock(&r->lk); + /* TO DO: need to re-establish the local free list */ + for(p = r->start; p != r->tail;){ + if((p->s.size & Busy) == 0){ + if((q = p->s.next) != nil && (q->s.size & Busy) == 0) + p->s.size += q->s.size; + }else + p += p->s.size & ~Busy; + } + iunlock(&r->lk); +} + +/* + * big allocations use basealloc + */ +static BigAlloc* +bigfind(void *p) +{ + BigAlloc *b, **l; + + /* quick check without lock is fine: p can't be added meanwhile */ + l = &bighash.set[BIGHASH(p)]; + if(*l == nil) + return nil; + ilock(&bighash); + for(; (b = *l) != nil; l = &b->next) + if(b->va == (uintptr)p) + break; + iunlock(&bighash); + return b; +} + +static void +bignote(void *p, usize nbytes, uintptr pc) +{ + BigAlloc *b, **l; + + ilock(&bighash); + b = bighash.free; + if(b == nil) + panic("bigstore: no free structures"); + bighash.free = b->next; + b->va = (uintptr)p; + b->nbytes = nbytes; + b->tag = pc; + l = &bighash.set[BIGHASH(p)]; + b->next = *l; + *l = b; + iunlock(&bighash); +} + +static void* +bigalloc(usize nbytes, usize align, uintptr pc) +{ + void *p; + + p = basealloc(nbytes, align, &nbytes); + if(p == nil) + return p; + bignote(p, nbytes, pc); + return p; +} + +static void +bigfree(void *p) +{ + BigAlloc **l, *b; + uintmem used; + + if(p == nil) + return; + ilock(&bighash); + l = &bighash.set[BIGHASH(p)]; + for(; (b = *l) != nil; l = &b->next) + if(b->va == (uintptr)p){ + used = b->nbytes; + *l = b->next; + b->next = bighash.free; + bighash.free = b; + iunlock(&bighash); + basefree(p, used); + return; + } + iunlock(&bighash); + panic("bigfree"); +} + +typedef struct Rov Rov; +struct Rov{ + uint tag; + uint size; +}; +static Rov rovers[2048]; + +void +mallocsummary(void) +{ + Header *q; + int i, n, t; + Qlist *qlist; + + t = 0; + for(i = 0; i <= NQUICK; i++) { + n = 0; + qlist = &quicklist[i]; + ilock(&qlist->lk); + for(q = qlist->first; q != nil; q = q->s.next){ + if(q->s.size != i) + DBG("q%d\t%#p\t%ud\n", i, q, q->s.size); + n++; + } + iunlock(&qlist->lk); + + t += n * i*Unitsz; + } + print("quick: %ud bytes total\n", t); + + MLOCK; + if((q = rover) != nil){ + i = t = 0; + do { + t += q->s.size; + if(i < nelem(rovers)){ + rovers[i].tag = q->s.tag; + rovers[i].size = q->s.size; + } + i++; + } while((q = q->s.next) != rover); + } + MUNLOCK; + + if(i != 0){ + print("rover: %d blocks %ud bytes total\n", + i, t*Unitsz); + while(--i >= 0) + if(i < nelem(rovers) && rovers[i].size != 0) + print("R%d: %#8.8ux %ud\n", i, rovers[i].tag, rovers[i].size); + } + + for(i = 0; i < nelem(qstats); i++){ + if(qstats[i] == 0) + continue; + print("%s: %ud\n", qstatname[i], qstats[i]); + } +} + +void +free(void* ap) +{ + Header *h; + BigAlloc *b; + usize nunits; + + if(ap == nil) + return; + qstats[QSfree]++; + if(isbigalloc(ap)){ + b = bigfind(ap); + if(b != nil){ + bigfree(ap); + return; + } + } + h = D2H(ap); + nunits = h->s.size; + if((nunits & Busy) == 0) + panic("free: already free %#p: freed %#p tag %#ux", ap, getcallerpc(&ap), h->s.tag); + nunits &= ~Busy; + h->s.size = nunits; + if(nunits < MinUnits) + panic("free: empty block: corrupt allocation arena"); + if(memprof != nil) + memprof(ap, h->s.tag, (nunits-1)*Unitsz, -1); + if(Poisoning) + memset(h+1, 0xAA, (nunits-1)*Unitsz); + if(!freetail(h)){ + if(nunits > NQUICK){ + MLOCK; + freemisc(h); + MUNLOCK; + }else + freequick(h); + } +} + +void* +malloc(ulong size) +{ + void* v; + + qstats[QSmalloc]++; +if(size > 1536*1024)print("malloc %lud %#p\n", size, getcallerpc(&size)); + if(size >= BigThreshold) + v = bigalloc(size, 0, getcallerpc(&size)); + else + v = qalloc(size, getcallerpc(&size)); + if(v != nil) + memset(v, 0, size); + return v; +} + +void* +mallocz(ulong size, int clr) +{ + void *v; + + qstats[QSmalloc]++; +if(size > 1900*1024)print("mallocz %lud %#p\n", size, getcallerpc(&size)); + if(size >= BigThreshold) + v = bigalloc(size, 0, getcallerpc(&size)); + else + v = qalloc(size, getcallerpc(&size)); + if(v == nil) + return nil; + if(clr) + memset(v, 0, size); + return v; +} + +void* +mallocalign(ulong nbytes, ulong align, long offset, ulong span) +{ + void *v; + + qstats[QSmalloc]++; + if(span != 0 && align <= span){ + if(nbytes > span) + return nil; + align = span; + span = 0; + } + if(align <= Align) + return mallocz(nbytes, 1); + +if(nbytes > 1900*1024)print("mallocalign %lud %lud %#p\n", nbytes, align, getcallerpc(&nbytes)); + + if(nbytes >= BigThreshold) + v = bigalloc(nbytes, align, getcallerpc(&nbytes)); + else + v = qallocalign(nbytes, align,offset, span, getcallerpc(&nbytes)); + if(v != nil){ + if(align && (uintptr)v & (align-1)) + panic("mallocalign %#p %#lux", v, align); + memset(v, 0, nbytes); /* leave it to caller? */ + } + return v; +} + +void* +smalloc(ulong size) +{ + void *v; + + while((v = malloc(size)) == nil) + tsleep(&up->sleep, return0, 0, 100); + setmalloctag(v, getcallerpc(&size)); + memset(v, 0, size); + + return v; +} + +void* +realloc(void* ap, ulong size) +{ + void *v; + Header *h; + BigAlloc *b; + ulong osize; + uint nunits, ounits; + int delta; + Region *t; + + /* + * Easy stuff: + * free and return nil if size is 0 + * (implementation-defined behaviour); + * behave like malloc if ap is nil; + * check for arena corruption; + * do nothing if units are the same. + */ + if(size == 0){ + free(ap); + return nil; + } + if(ap == nil){ + v = malloc(size); + if(v != nil) + setmalloctag(v, getcallerpc(&ap)); + return v; + } + + if(!isbigalloc(ap) || (b = bigfind(ap)) == nil){ + h = D2H(ap); + ounits = h->s.size & ~Busy; + if((h->s.size & Busy) == 0 || ounits == 0) + panic("realloc: corrupt allocation arena"); + + if((nunits = NUNITS(size)) == ounits) + return ap; + + /* + * Slightly harder: + * if this allocation abuts the tail of a region, try to adjust the tail + */ + MLOCK; + for(t = tail; t != nil; t = t->down){ + if(t->tail != nil && h+ounits == t->tail){ + delta = nunits-ounits; + if(delta < 0 || t->size >= delta){ + h->s.size = nunits | Busy; + t->size -= delta; + t->tail += delta; + MUNLOCK; + return ap; + } + } + } + MUNLOCK; + osize = (ounits-1)*Unitsz; + }else + osize = b->nbytes; + + /* + * Too hard (or can't be bothered): + * allocate, copy and free. + * The original block must be unchanged on failure. + */ + if((v = malloc(size)) != nil){ + setmalloctag(v, getcallerpc(&size)); + if(size < osize) + osize = size; + memmove(v, ap, osize); + free(ap); + } + + return v; +} + +void +setmalloctag(void *a, ulong tag) +{ + Header *h; + BigAlloc *b; + + if(isbigalloc(a) && (b = bigfind(a)) != nil){ + b->tag = tag; + if(memprof != nil) + memprof(a, tag, b->nbytes, 2); + return; + } + h = D2H(a); + if((h->s.size & Busy) == 0) + panic("setmalloctag free %#p %#lux [%#ux %#ux] %#p", a, tag, h->s.size, h->s.tag, getcallerpc(&a)); + h->s.tag = tag; + if(memprof != nil) + memprof(a, tag, (h->s.size-1)*Unitsz, 2); +} + +ulong +getmalloctag(void *a) +{ + BigAlloc *b; + + if(a == nil) + return 0; + if(isbigalloc(a) && (b = bigfind(a)) != nil) + return b->tag; + return D2H(a)->s.tag; +} + +void +mallocinit(void) +{ + BigAlloc *p, *pe; + + if(tail != nil) + return; + + rover = &misclist; + rover->s.next = rover; + pe = &bighash.pool[nelem(bighash.pool)-1]; + bighash.free = p = bighash.pool; + for(; p < pe; p++) + p->next = p+1; + p->next = nil; + if(!morecore(BigThreshold/Unitsz)) + panic("mallocinit"); + print("base %#p bound %#p nunits %lud\n", tail->start, tail->end, tail->end - tail->start); +} + +/* + * get some space from basealloc + */ +static int +morecore(uint nunits) +{ + usize nbytes; + char *p; + Region *r; + + if(nunits < NUNITS(256*KiB)) + nunits = NUNITS(256*KiB); + nbytes = nunits*Unitsz + sizeof(Region) + Unitsz; + p = basealloc(nbytes, Align, &nbytes); + if(p == nil) + return 0; + /* build a new region if current one can't be extended */ + if((r = tail) == nil || p != r->limit){ + r = (Region*)p; + r->base = p; + r->start= ALIGNHDR(r+1, Align); + r->tail = r->start; + r->down = tail; + tail = r; + } + r->limit = p+nbytes; + r->end = r->start + (r->limit - (char*)r->start)/Unitsz; + r->size = r->end - r->tail; + return 1; +} + +void +setmemprof(void (*f)(void*, ulong, usize, int)) +{ + memprof = f; +} + +/* + * Mstate :: base (Used|Free Size)* + */ +void +snapmemarena(void) +{ + /* just the available parts */ +} diff -Nru /sys/src/9k/port/random.c /sys/src/9k/port/random.c --- /sys/src/9k/port/random.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/random.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,158 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +static struct +{ + QLock; + Rendez producer; + Rendez consumer; + ulong randomcount; + uchar buf[1024]; + uchar *ep; + uchar *rp; + uchar *wp; + uchar next; + uchar wakeme; + uchar filled; + ushort bits; + ulong randn; + int target; +} rb; + +static int +rbnotfull(void*) +{ + int i; + + i = rb.wp - rb.rp; + if(i < 0) + i += sizeof(rb.buf); + return i < rb.target; +} + +static int +rbnotempty(void*) +{ + return rb.wp != rb.rp; +} + +static void +genrandom(void*) +{ + up->basepri = PriNormal; + up->priority = up->basepri; + + for(;;){ + for(;;) + if(++rb.randomcount > 100000) + break; + if(anyhigher()) + sched(); + if(rb.filled || !rbnotfull(0)) + sleep(&rb.producer, rbnotfull, 0); + } +} + +/* + * produce random bits in a circular buffer + */ +static void +randomclock(void) +{ + uchar *p; + + if(rb.randomcount == 0) + return; + + if(!rbnotfull(0)) { + rb.filled = 1; + return; + } + + rb.bits = (rb.bits<<2) ^ rb.randomcount; + rb.randomcount = 0; + + rb.next++; + if(rb.next != 8/2) + return; + rb.next = 0; + + *rb.wp ^= rb.bits; + p = rb.wp+1; + if(p == rb.ep) + p = rb.buf; + rb.wp = p; + + if(rb.wakeme){ + rb.wakeme = 0; + wakeup(&rb.consumer); + } +} + +void +randominit(void) +{ + /* Frequency close but not equal to HZ */ + addclock0link(randomclock, 13); + rb.target = 16; + rb.ep = rb.buf + sizeof(rb.buf); + rb.rp = rb.wp = rb.buf; + kproc("genrandom", genrandom, 0); +} + +/* + * consume random bytes from a circular buffer + */ +ulong +randomread(void *xp, ulong n) +{ + uchar *e, *p, *r; + ulong x; + int i; + + p = xp; + + qlock(&rb); + if(waserror()){ + qunlock(&rb); + nexterror(); + } + + for(e = p + n; p < e; ){ + if(rb.wp == rb.rp){ + rb.wakeme = 1; + wakeup(&rb.producer); + sleep(&rb.consumer, rbnotempty, 0); + continue; + } + + /* + * beating clocks will be predictable if + * they are synchronized. Use a cheap pseudo- + * random number generator to obscure any cycles. + */ + x = rb.randn*1103515245 ^ *rb.rp; + *p++ = rb.randn = x; + + r = rb.rp + 1; + if(r == rb.ep) + r = rb.buf; + rb.rp = r; + } + if(rb.filled && rb.wp == rb.rp){ + i = 2*rb.target; + if(i > sizeof(rb.buf) - 1) + i = sizeof(rb.buf) - 1; + rb.target = i; + rb.filled = 0; + } + poperror(); + qunlock(&rb); + + wakeup(&rb.producer); + + return n; +} diff -Nru /sys/src/9k/port/rdb.c /sys/src/9k/port/rdb.c --- /sys/src/9k/port/rdb.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/rdb.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,117 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "ureg.h" + +#undef DBG +#define DBG if(0)scrprint +#pragma varargck argpos scrprint 1 +static Ureg ureg; + +static void +scrprint(char *fmt, ...) +{ + char buf[128]; + va_list va; + int n; + + va_start(va, fmt); + n = vseprint(buf, buf+sizeof buf, fmt, va)-buf; + va_end(va); + putstrn(buf, n); +} + +static char* +getline(void) +{ + static char buf[128]; + int i, c; + + for(;;){ + for(i=0; idbgreg = ureg; + m->dbgsp = ureg->sp; + delconsdevs(); /* turn off serial console and kprint */ +// scrprint("Plan 9 debugger\n"); + iprint("Edebugger reset\n"); + for(;;){ + req = getline(); + if(req == nil){ + iprint("Edebugger break\n"); + return; + } + switch(*req){ + case 'r': + a = addr(req+1, ureg, nil); + DBG("read %#p\n", a); + iprint("R%.8lux %.2ux %.2ux %.2ux %.2ux\n", + strtoul(req+1, 0, 16), a[0], a[1], a[2], a[3]); + break; + + case 'w': + a = addr(req+1, ureg, &p); + *(ulong*)a = strtoul(p, nil, 16); + iprint("W\n"); + break; +/* + * case Tmput: + n = min[4]; + if(n > 4){ + mesg(Rerr, Ecount); + break; + } + a = addr(min+0); + scrprint("mput %.8lux\n", a); + memmove(a, min+5, n); + mesg(Rmput, mout); + break; + * + */ + default: + DBG("unknown %c\n", *req); + iprint("Eunknown message\n"); + break; + } + } +} + +void +rdb(void) +{ + splhi(); + iprint("rdb..."); + callwithureg(talkrdb); +} diff -Nru /sys/src/9k/port/reap.c /sys/src/9k/port/reap.c --- /sys/src/9k/port/reap.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/reap.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,91 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +/* + * Bibop table + */ +typedef struct Bibop Bibop; +struct Bibop{ + uintmem base; + uintmem limit; + uint shift; + uint mlen; /* map length */ + uchar map[]; +}; + +enum{ + MaxGiB= 64, + BibUnit= 2*MiB, + BibSize= (MaxGiB*GiB/BibUnit), + + IsReap= 0x40, + IsBusy= 0x80, + SizeMask= 0xF, /* up to 16 size codes */ +}; + +static uint +bibopsize(uintmem base, uintmem limit, uint lg2size) +{ + return sizeof(Bibop)+((limit-base)>>lg2size); +} + +Bibop* +bibopalloc(uintmem base, uintmem limit, uint lg2size, void* (*alloc)(uint)) +{ + uint need; + Bibop *b; + + need = bibopsize(base, limit, lg2size); + b = alloc(need); + if(b == nil) + panic("needed %ud bytes for bibop table", need); + b->base = base; + b->limit = limit; + b->shift = lg2size; + b->mlen = need-sizeof(Bibop); + memset(b->map, 0, b->mlen); + return b; +} + +int +bibopstate(Bibop *b, uintmem pa) +{ + if(pa == 0 || !(pa >= b->base && pa < b->limit)) + return -1; + return b->map[(pa - b->base)>>b->shift]; +} + +/* + * set the state for memory from pa0 to pa1 (half-open) to s + */ +void +bibopsetstate(Bibop *b, uintmem pa0, uintmem pa1, int s) +{ + int i, j; + + s |= IsBusy; + if(pa == 0 || !(pa0 >= b->base && pa0 < b->limit)) + return; + if(pa1 > b->limit) + pa1 = b->limit; + i = (pa0 - b->base) >> b->shift; + j = (pa1 - b->base) >> b->shift; + for(i = 0; i < j; i++) + b->map[i] = s; +} + +static Reap reaps[~IsReap & 0xFF]; + +Reap* +findreap(uintmem pa) +{ + int s; + + s = memstatus(pa); + if(s < 0 || (s&IsReap) == 0) + return nil; + return &reap[s & ~IsReap]; +} diff -Nru /sys/src/9k/port/rebootcmd.c /sys/src/9k/port/rebootcmd.c --- /sys/src/9k/port/rebootcmd.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/rebootcmd.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,102 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include + +static ulong +l2be(long l) +{ + uchar *cp; + + cp = (uchar*)&l; + return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3]; +} + + +static void +readn(Chan *c, void *vp, long n) +{ + char *p; + long nn; + + p = vp; + while(n > 0) { + nn = c->dev->read(c, p, n, c->offset); + if(nn == 0) + error(Eshort); + c->offset += nn; + p += nn; + n -= nn; + } +} + +static void +setbootcmd(int argc, char *argv[]) +{ + char *buf, *p, *ep; + int i; + + buf = malloc(1024); + if(buf == nil) + error(Enomem); + p = buf; + ep = buf + 1024; + for(i=0; iref); + if(x <= 0) + panic("incref pc=%#p", getcallerpc(&r)); + return x; +} + +int +decref(Ref *r) +{ + int x; + + x = adec(&r->ref); + if(x < 0) + panic("decref pc=%#p", getcallerpc(&r)); + return x; +} diff -Nru /sys/src/9k/port/rmap.c /sys/src/9k/port/rmap.c --- /sys/src/9k/port/rmap.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/rmap.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,213 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +static struct{ + Lock; + RMapel els[128]; + int next; +} elpool; + +static RMapel* +rmapnew(RMap *r) +{ + RMapel *e; + + if((e = r->free) != nil){ + r->free = e->next; + return e; + } + ilock(&elpool); + if(elpool.next >= nelem(elpool.els)){ + iunlock(&elpool); + return nil; + } + e = &elpool.els[elpool.next++]; + iunlock(&elpool); + return e; +} + +void +rmapfree(RMap* rmap, uintmem addr, uintmem size) +{ + RMapel *p, *n, **lp; + + if(size == 0) + return; + + ilock(rmap); + p = nil; + for(lp = &rmap->map; (n = *lp) != nil && n->addr <= addr; lp = &n->next) + {} + + if(p != nil && p->addr+p->size > addr || + n != nil && addr+size > n->addr){ + iunlock(rmap); + print("rmapfree: overlap: %#P %#P (%lld)\n", addr, size, (u64int)size); + return; + } + + if(p != nil && p->addr+p->size == addr){ + p->size += size; + if(n != nil && addr+size == n->addr){ + p->size += n->size; + p->next = n->next; + + n->next = rmap->free; + rmap->free = n; + } + }else{ + if(n != nil && addr+size == n->addr){ + n->addr -= size; + n->size += size; + }else{ + p = rmapnew(rmap); + if(p != nil){ + p->addr = addr; + p->size = size; + p->next = n; + *lp = p; + }else + print("rmapfree: %s: losing 0x%llux, %llud\n", rmap->name, (u64int)addr, (u64int)size); + } + } + iunlock(rmap); +} + +uintmem +rmapalloc(RMap* rmap, uintmem addr, uintmem size, uint align) +{ + RMapel *e, **lp; + uintmem maddr, oaddr; + + ilock(rmap); + for(lp = &rmap->map; (e = *lp) != nil; lp = &e->next){ + maddr = e->addr; + + if(addr != 0){ + /* + * A specific address range has been given: + * if the current map entry is greater then + * the address is not in the map; + * if the current map entry does not overlap + * the beginning of the requested range then + * continue on to the next map entry; + * if the current map entry does not entirely + * contain the requested range then the range + * is not in the map. + */ + if(maddr > addr) + break; + if(e->size < addr - maddr) /* maddr+e->size < addr, but no overflow */ + continue; + if(addr - maddr > e->size - size) /* addr+size > maddr+e->size, but no overflow */ + break; + maddr = addr; + } + + if(align > 0) + maddr = ((maddr+align-1)/align)*align; + if(e->addr+e->size-maddr < size) + continue; + + oaddr = e->addr; + e->addr = maddr+size; + e->size -= maddr-oaddr+size; + if(e->size == 0){ + *lp = e->next; + e->next = rmap->free; + rmap->free = e; + } + + iunlock(rmap); + if(oaddr != maddr) + rmapfree(rmap, oaddr, maddr-oaddr); + + return maddr; + } + iunlock(rmap); + + return 0; +} + +int +isrmapped(RMap *r, uintmem addr, uintmem *limit) +{ + RMapel *e; + + ilock(r); + for(e = r->map; e != nil; e = e->next){ + if(e->addr >= addr && addr <= e->addr+e->size-1){ + if(limit != nil) + *limit = e->addr + e->size; + iunlock(r); + return 1; + } + } + iunlock(r); + return 0; +} + +int +rmapfirst(RMap *r, uintmem start, uintmem *addr, uintmem *size) +{ + RMapel *e; + uintmem lim; + + ilock(r); + for(e = r->map; e != nil; e = e->next){ + lim = e->addr + e->size; + if(e->addr <= start && start <= lim-1){ + iunlock(r); + *addr = start; + *size = lim-start; + return 1; + } + } + iunlock(r); + return 0; +} + +uintmem +rmapsize(RMap *r) +{ + uintmem t; + RMapel *e; + + ilock(r); + t = 0; + for(e = r->map; e != nil; e = e->next) + t += e->size; + iunlock(r); + return t; +} + +void +rmapgaps(RMap *r2, RMap *r1) +{ + RMapel *e1; + uintmem prev; + + ilock(r1); + prev = 0; + for(e1 = r1->map; e1 != nil; e1 = e1->next){ + if(prev < e1->addr) + rmapfree(r2, prev, e1->addr - prev); + prev = e1->addr + e1->size; + } + iunlock(r1); + if(prev-1 != ~(uintmem)0) + rmapfree(r2, prev, -prev); +} + +void +rmapprint(RMap *r) +{ + RMapel *e; + + print("%s:\n", r->name); + for(e = r->map; e != nil; e = e->next) + print("\t%#P %#P (%llud)\n", e->addr, e->addr+e->size, (u64int)e->size); +} diff -Nru /sys/src/9k/port/sd.h /sys/src/9k/port/sd.h --- /sys/src/9k/port/sd.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/sd.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,194 @@ +/* + * Storage Device. + */ +typedef struct SDev SDev; +typedef struct SDifc SDifc; +typedef struct SDio SDio; +typedef struct SDpart SDpart; +typedef struct SDperm SDperm; +typedef struct SDreq SDreq; +typedef struct SDunit SDunit; + +struct SDperm { + char* name; + char* user; + ulong perm; +}; + +struct SDpart { + uvlong start; + uvlong end; + SDperm; + int valid; + ulong vers; +}; + +struct SDunit { + SDev* dev; + int subno; + uchar inquiry[255]; /* format follows SCSI spec */ + uchar sense[18]; /* format follows SCSI spec */ + SDperm; + + QLock ctl; + uvlong sectors; + ulong secsize; + SDpart* part; /* nil or array of size npart */ + int npart; + ulong vers; + SDperm ctlperm; + + QLock raw; /* raw read or write in progress */ + ulong rawinuse; /* really just a test-and-set */ + int state; + SDreq* req; + SDperm rawperm; +}; + +/* + * Each controller is represented by a SDev. + */ +struct SDev { + Ref r; /* Number of callers using device */ + SDifc* ifc; /* pnp/legacy */ + void* ctlr; + int idno; + char name[8]; + SDev* next; + + QLock; /* enable/disable */ + int enabled; + int nunit; /* Number of units */ + QLock unitlock; /* `Loading' of units */ + int* unitflg; /* Unit flags */ + SDunit**unit; +}; + +struct SDifc { + char* name; + + SDev* (*pnp)(void); + SDev* (*legacy)(int, int); + int (*enable)(SDev*); + int (*disable)(SDev*); + + int (*verify)(SDunit*); + int (*online)(SDunit*); + int (*rio)(SDreq*); + int (*rctl)(SDunit*, char*, int); + int (*wctl)(SDunit*, Cmdbuf*); + + long (*bio)(SDunit*, int, int, void*, long, uvlong); + SDev* (*probe)(DevConf*); + void (*clear)(SDev*); + char* (*rtopctl)(SDev*, char*, char*); + int (*wtopctl)(SDev*, Cmdbuf*); +}; + +struct SDreq { + SDunit* unit; + int lun; + int write; + uchar cmd[16]; + int clen; + void* data; + int dlen; + + int flags; + + int status; + long rlen; + uchar sense[256]; +}; + +enum { + SDnosense = 0x00000001, + SDvalidsense = 0x00010000, + + SDinq0periphqual= 0xe0, + SDinq0periphtype= 0x1f, + SDinq1removable = 0x80, + + /* periphtype values */ + SDperdisk = 0, /* Direct access (disk) */ + SDpertape = 1, /* Sequential eg, tape */ + SDperpr = 2, /* Printer */ + SDperworm = 4, /* Worm */ + SDpercd = 5, /* CD-ROM */ + SDpermo = 7, /* rewriteable MO */ + SDperjuke = 8, /* medium-changer */ +}; + +enum { + SDretry = -5, /* internal to controllers */ + SDmalloc = -4, + SDeio = -3, + SDtimeout = -2, + SDnostatus = -1, + + SDok = 0, + + SDcheck = 0x02, /* check condition */ + SDbusy = 0x08, /* busy */ + + SDmaxio = 2048*1024, + SDnpart = 16, +}; + +/* + * Allow the default #defines for sdmalloc & sdfree to be overridden by + * system-specific versions. This can be used to avoid extra copying + * by making sure sd buffers are cache-aligned (some ARM systems) or + * page-aligned (xen) for DMA. + */ +#ifndef sdmalloc +#define sdmalloc(n) malloc(n) +#define sdfree(p) free(p) +#endif + +/* + * mmc/sd/sdio host controller interface + */ + +struct SDio { + char *name; + int (*init)(void); + void (*enable)(void); + int (*inquiry)(char*, int); + int (*cmd)(u32int, u32int, u32int*); + void (*iosetup)(int, void*, int, int); + void (*io)(int, uchar*, int); +}; + +extern SDio sdio; + +/* devsd.c */ +extern void sdadddevs(SDev*); +extern void sdaddconf(SDunit*); +extern void sdaddallconfs(void (*f)(SDunit*)); +extern void sdaddpart(SDunit*, char*, uvlong, uvlong); +extern int sdsetsense(SDreq*, int, int, int, int); +extern int sdmodesense(SDreq*, uchar*, void*, int); +extern int sdfakescsi(SDreq*, void*, int); + +/* sdscsi.c */ +extern int scsiverify(SDunit*); +extern int scsionline(SDunit*); +extern long scsibio(SDunit*, int, int, void*, long, uvlong); +extern SDev* scsiid(SDev*, SDifc*); + +/* + * hardware info about a device + */ +typedef struct { + ulong port; + int size; +} Devport; + +struct DevConf +{ + ulong intnum; /* interrupt number */ + char *type; /* card type, malloced */ + int nports; /* Number of ports */ + Devport *ports; /* The ports themselves */ +}; diff -Nru /sys/src/9k/port/sdaoe.c /sys/src/9k/port/sdaoe.c --- /sys/src/9k/port/sdaoe.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/sdaoe.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,627 @@ +/* + * aoe sd driver, copyright © 2007 coraid + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/netif.h" +#include "../port/sd.h" + +#include "etherif.h" +#include "../port/aoe.h" + +extern char Echange[]; +extern char Enotup[]; + +#define uprint(...) snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__); + +enum { + Nctlr = 32, + Maxpath = 128, + + Probeintvl = 100, /* ms. between probes */ + Probemax = 20, /* max probes */ +}; + +enum { + /* sync with ahci.h */ + Dllba = 1<<0, + Dsmart = 1<<1, + Dpower = 1<<2, + Dnop = 1<<3, + Datapi = 1<<4, + Datapi16= 1<<5, +}; + +static char *flagname[] = { + "llba", + "smart", + "power", + "nop", + "atapi", + "atapi16", +}; + +typedef struct Ctlr Ctlr; +struct Ctlr{ + QLock; + + Ctlr *next; + SDunit *unit; + + char path[Maxpath]; + Chan *c; + + ulong vers; + uchar mediachange; + uchar flag; + uchar smart; + uchar smartrs; + uchar feat; + + uvlong sectors; + char serial[20+1]; + char firmware[8+1]; + char model[40+1]; + char ident[0x100]; +}; + +void aoeidmove(char *p, ushort *a, unsigned n); + +static Lock ctlrlock; +static Ctlr *head; +static Ctlr *tail; + +SDifc sdaoeifc; + +static ushort +gbit16(void *a) +{ + uchar *i; + + i = a; + return i[1] << 8 | i[0]; +} + +static ulong +gbit32(void *a) +{ + ulong j; + uchar *i; + + i = a; + j = i[3] << 24; + j |= i[2] << 16; + j |= i[1] << 8; + j |= i[0]; + return j; +} + +static uvlong +gbit64(void *a) +{ + uchar *i; + + i = a; + return (uvlong)gbit32(i+4)<<32 | gbit32(i); +} + +static int +identify(Ctlr *c, ushort *id) +{ + int i; + uchar oserial[21]; + uvlong osectors, s; + + osectors = c->sectors; + memmove(oserial, c->serial, sizeof c->serial); + + c->feat &= ~(Dllba|Dpower|Dsmart|Dnop); + i = gbit16(id+83) | gbit16(id+86); + if(i & (1<<10)){ + c->feat |= Dllba; + s = gbit64(id+100); + }else + s = gbit32(id+60); + + i = gbit16(id+83); + if((i>>14) == 1) { + if(i & (1<<3)) + c->feat |= Dpower; + i = gbit16(id+82); + if(i & 1) + c->feat |= Dsmart; + if(i & (1<<14)) + c->feat |= Dnop; + } + + aoeidmove(c->serial, id+10, 20); + aoeidmove(c->firmware, id+23, 8); + aoeidmove(c->model, id+27, 40); + + if((osectors == 0 || osectors != s) && + memcmp(oserial, c->serial, sizeof oserial) != 0){ + c->sectors = s; + c->mediachange = 1; + c->vers++; + } + return 0; +} + +/* must call with d qlocked */ +static int +aoeidentify(Ctlr *d, SDunit *u) +{ + Chan *c; + + c = nil; + if(waserror()){ + if(c) + cclose(c); + iprint("aoeidentify: %s\n", up->errstr); + nexterror(); + } + + uprint("%s/ident", d->path); + c = namec(up->genbuf, Aopen, OREAD, 0); + c->dev->read(c, d->ident, sizeof d->ident, 0); + + poperror(); + cclose(c); + + d->feat = 0; + d->smart = 0; + identify(d, (ushort*)d->ident); + + memset(u->inquiry, 0, sizeof u->inquiry); + u->inquiry[2] = 2; + u->inquiry[3] = 2; + u->inquiry[4] = sizeof u->inquiry - 4; + memmove(u->inquiry+8, d->model, 40); + + return 0; +} + +static Ctlr* +ctlrlookup(char *path) +{ + Ctlr *c; + + lock(&ctlrlock); + for(c = head; c; c = c->next) + if(strcmp(c->path, path) == 0) + break; + unlock(&ctlrlock); + return c; +} + +static Ctlr* +newctlr(char *path) +{ + Ctlr *c; + + /* race? */ + if(ctlrlookup(path)) + error(Eexist); + + if((c = malloc(sizeof *c)) == nil) + return 0; + kstrcpy(c->path, path, sizeof c->path); + lock(&ctlrlock); + if(head != nil) + tail->next = c; + else + head = c; + tail = c; + unlock(&ctlrlock); + return c; +} + +static void +delctlr(Ctlr *c) +{ + Ctlr *x, *prev; + + lock(&ctlrlock); + + for(prev = 0, x = head; x; prev = x, x = c->next) + if(strcmp(c->path, x->path) == 0) + break; + if(x == 0){ + unlock(&ctlrlock); + error(Enonexist); + } + + if(prev) + prev->next = x->next; + else + head = x->next; + if(x->next == nil) + tail = prev; + unlock(&ctlrlock); + + if(x->c) + cclose(x->c); + free(x); +} + +/* don't call aoeprobe from within a loop; it loops internally retrying open. */ +static SDev* +aoeprobe(char *path, SDev *s) +{ + int n, i; + char *p; + Chan *c; + Ctlr *ctlr; + + if((p = strrchr(path, '/')) == 0) + error(Ebadarg); + *p = 0; + uprint("%s/ctl", path); + *p = '/'; + + c = namec(up->genbuf, Aopen, OWRITE, 0); + if(waserror()) { + cclose(c); + nexterror(); + } + n = uprint("discover %s", p+1); + c->dev->write(c, up->genbuf, n, 0); + poperror(); + cclose(c); + + for(i = 0; i < Probemax; i++){ + tsleep(&up->sleep, return0, 0, Probeintvl); + uprint("%s/ident", path); + if(!waserror()) { + c = namec(up->genbuf, Aopen, OREAD, 0); + poperror(); + cclose(c); + break; + } + } + if(i >= Probemax) + error(Etimedout); + uprint("%s/ident", path); + ctlr = newctlr(path); + if(ctlr == nil || s == nil && (s = malloc(sizeof *s)) == nil) + return nil; + s->ctlr = ctlr; + s->ifc = &sdaoeifc; + s->nunit = 1; + return s; +} + +static char *probef[32]; +static int nprobe; + +static int +pnpprobeid(char *s) +{ + if(strlen(s) < 2) + return 0; + return s[1] == '!'? s[0]: 'e'; +} + +static SDev* +aoepnp(void) +{ + int i, id; + char *p; + SDev *h, *t, *s; + + if((p = getconf("aoedev")) == 0) + return 0; + nprobe = tokenize(p, probef, nelem(probef)); + h = t = 0; + for(i = 0; i < nprobe; i++){ + id = pnpprobeid(probef[i]); + if(id == 0) + continue; + s = malloc(sizeof *s); + if(s == nil) + break; + s->ctlr = 0; + s->idno = id; + s->ifc = &sdaoeifc; + s->nunit = 1; + + if(h) + t->next = s; + else + h = s; + t = s; + } + return h; +} + +static Ctlr* +pnpprobe(SDev *sd) +{ + ulong start; + char *p; + static int i; + + if(i > nprobe) + return 0; + p = probef[i++]; + if(strlen(p) < 2) + return 0; + if(p[1] == '!') + p += 2; + + start = TK2MS(sys->ticks); + if(waserror()){ + print("#æ: pnpprobe failed in %lud ms: %s: %s\n", + TK2MS(sys->ticks) - start, probef[i-1], + up->errstr); + return nil; + } + sd = aoeprobe(p, sd); /* does a round of probing */ + poperror(); + print("#æ: pnpprobe established %s in %lud ms\n", + probef[i-1], TK2MS(sys->ticks) - start); + return sd->ctlr; +} + + +static int +aoeverify(SDunit *u) +{ + SDev *s; + Ctlr *c; + + s = u->dev; + c = s->ctlr; + if(c == nil && (s->ctlr = c = pnpprobe(s)) == nil) + return 0; + c->mediachange = 1; + return 1; +} + +static int +aoeconnect(SDunit *u, Ctlr *c) +{ + qlock(c); + if(waserror()){ + qunlock(c); + return -1; + } + + aoeidentify(u->dev->ctlr, u); + if(c->c) + cclose(c->c); + c->c = 0; + uprint("%s/data", c->path); + c->c = namec(up->genbuf, Aopen, ORDWR, 0); + qunlock(c); + poperror(); + + return 0; +} + +static int +aoeonline(SDunit *u) +{ + Ctlr *c; + int r; + + c = u->dev->ctlr; + r = 0; + + if((c->feat&Datapi) && c->mediachange){ + if(aoeconnect(u, c) == 0 && (r = scsionline(u)) > 0) + c->mediachange = 0; + return r; + } + + if(c->mediachange){ + if(aoeconnect(u, c) == -1) + return 0; + r = 2; + c->mediachange = 0; + u->sectors = c->sectors; + u->secsize = Aoesectsz; + } else + r = 1; + + return r; +} + +static int +aoerio(SDreq *r) +{ + int i, count; + uvlong lba; + char *name; + uchar *cmd; + long (*rio)(Chan*, void*, long, vlong); + Ctlr *c; + SDunit *unit; + + unit = r->unit; + c = unit->dev->ctlr; +// if(c->feat & Datapi) +// return aoeriopkt(r, d); + + cmd = r->cmd; + name = unit->name; + + if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91){ +// qlock(c); +// i = flushcache(); +// qunlock(c); +// if(i == 0) +// return sdsetsense(r, SDok, 0, 0, 0); + return sdsetsense(r, SDcheck, 3, 0xc, 2); + } + + if((i = sdfakescsi(r, c->ident, sizeof c->ident)) != SDnostatus){ + r->status = i; + return i; + } + + switch(*cmd){ + case 0x88: + case 0x28: + rio = c->c->dev->read; + break; + case 0x8a: + case 0x2a: + rio = c->c->dev->write; + break; + default: + print("%s: bad cmd %#.2ux\n", name, cmd[0]); + r->status = SDcheck; + return SDcheck; + } + + if(r->data == nil) + return SDok; + + if(r->clen == 16){ + if(cmd[2] || cmd[3]) + return sdsetsense(r, SDcheck, 3, 0xc, 2); + lba = (uvlong)cmd[4]<<40 | (uvlong)cmd[5]<<32; + lba |= cmd[6]<<24 | cmd[7]<<16 | cmd[8]<<8 | cmd[9]; + count = cmd[10]<<24 | cmd[11]<<16 | cmd[12]<<8 | cmd[13]; + }else{ + lba = cmd[2]<<24 | cmd[3]<<16 | cmd[4]<<8 | cmd[5]; + count = cmd[7]<<8 | cmd[8]; + } + + count *= Aoesectsz; + + if(r->dlen < count) + count = r->dlen & ~0x1ff; + + if(waserror()){ + if(strcmp(up->errstr, Echange) == 0 || + strcmp(up->errstr, Enotup) == 0) + unit->sectors = 0; + nexterror(); + } + r->rlen = rio(c->c, r->data, count, Aoesectsz * lba); + poperror(); + r->status = SDok; + return SDok; +} + +static char *smarttab[] = { + "unset", + "error", + "threshold exceeded", + "normal" +}; + +static char * +pflag(char *s, char *e, uchar f) +{ + uchar i; + + for(i = 0; i < 8; i++) + if(f & (1 << i)) + s = seprint(s, e, "%s ", flagname[i]); + return seprint(s, e, "\n"); +} + +static int +aoerctl(SDunit *u, char *p, int l) +{ + Ctlr *c; + char *e, *op; + + if((c = u->dev->ctlr) == nil) + return 0; + e = p+l; + op = p; + + p = seprint(p, e, "model\t%s\n", c->model); + p = seprint(p, e, "serial\t%s\n", c->serial); + p = seprint(p, e, "firm %s\n", c->firmware); + if(c->smartrs == 0xff) + p = seprint(p, e, "smart\tenable error\n"); + else if(c->smartrs == 0) + p = seprint(p, e, "smart\tdisabled\n"); + else + p = seprint(p, e, "smart\t%s\n", smarttab[c->smart]); + p = seprint(p, e, "flag "); + p = pflag(p, e, c->feat); + p = seprint(p, e, "geometry %llud %d\n", c->sectors, Aoesectsz); + return p-op; +} + +static int +aoewctl(SDunit *, Cmdbuf *cmd) +{ + cmderror(cmd, Ebadarg); + return 0; +} + +static SDev* +aoeprobew(DevConf *c) +{ + char *p; + + p = strchr(c->type, '/'); + if(p == nil || strlen(p) > Maxpath - 11) + error(Ebadarg); + if(p[1] == '#') + p++; /* hack */ + if(ctlrlookup(p)) + error(Einuse); + return aoeprobe(p, 0); +} + +static void +aoeclear(SDev *s) +{ + delctlr((Ctlr *)s->ctlr); +} + +static char* +aoertopctl(SDev *s, char *p, char *e) +{ + Ctlr *c; + + if(s == nil || (c = s->ctlr) == nil) + return p; + + return seprint(p, e, "%s aoe %s\n", s->name, c->path); +} + +static int +aoewtopctl(SDev *, Cmdbuf *cmd) +{ + switch(cmd->nf){ + default: + cmderror(cmd, Ebadarg); + } + return 0; +} + +SDifc sdaoeifc = { + "aoe", + + aoepnp, + nil, /* legacy */ + nil, /* enable */ + nil, /* disable */ + + aoeverify, + aoeonline, + aoerio, + aoerctl, + aoewctl, + + scsibio, + aoeprobew, /* probe */ + aoeclear, /* clear */ + aoertopctl, + aoewtopctl, +}; diff -Nru /sys/src/9k/port/sdatafis.c /sys/src/9k/port/sdatafis.c --- /sys/src/9k/port/sdatafis.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/sdatafis.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,292 @@ +/* + * ata analog to sdscsi + * copyright © 2010 erik quanstrom + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/sd.h" +#include +#include "sdfis.h" + +#define reqio(r) (r)->unit->dev->ifc->ataio(r) +#define dprint(...) print(__VA_ARGS__) + +static char* +dnam(SDunit *u) +{ + return u->name; +} + +static int +settxmode(SDunit *u, Sfis *f, uchar x) +{ + int t; + SDreq r; + + memset(&r, 0, sizeof r); + r.unit = u; + if((t = txmodefis(f, r.cmd, x)) == -1) + return 0; + r.clen = 16; + r.ataproto = t; + r.timeout = totk(Ms2tk(1*1000)); + return reqio(&r); +} + +static int +flushcache(SDunit *u, Sfis *f) +{ + SDreq r; + + memset(&r, 0, sizeof r); + r.unit = u; + r.clen = 16; + r.ataproto = flushcachefis(f, r.cmd); + r.timeout = totk(Ms2tk(60*1000)); + return reqio(&r); +} + +static int +setfeatures(SDunit *u, Sfis *f, uchar x, uint w) +{ + SDreq r; + + memset(&r, 0, sizeof r); + r.unit = u; + r.clen = 16; + r.ataproto = featfis(f, r.cmd, x); + r.timeout = totk(w); + return reqio(&r); +} + +static int +identify1(SDunit *u, Sfis *f, void *id) +{ + SDreq r; + + memset(&r, 0, sizeof r); + r.unit = u; + r.clen = 16; + r.ataproto = identifyfis(f, r.cmd); + r.data = id; + r.dlen = 0x200; + r.timeout = totk(Ms2tk(3*1000)); + return reqio(&r); +} + +static int +identify0(SDunit *u, Sfisx *f, ushort *id) +{ + int i, n; + vlong osectors, s; + uchar oserial[21]; + + for(i = 0;; i++){ + if(i > 5 || identify1(u, f, id) != 0) + return -1; + n = idpuis(id); + if(n & Pspinup && setfeatures(u, f, 7, 20*1000) == -1) + dprint("%s: puis spinup fail\n", dnam(u)); + if(n & Pidready) + break; + } + + s = idfeat(f, id); + if(s == -1) + return -1; + if((f->feat&Dlba) == 0){ + dprint("%s: no lba support\n", dnam(u)); + return -1; + } + osectors = u->sectors; + memmove(oserial, f->serial, sizeof f->serial); + + f->sectors = s; + f->secsize = idss(f, id); + + idmove(f->serial, id+10, 20); + idmove(f->firmware, id+23, 8); + idmove(f->model, id+27, 40); + f->wwn = idwwn(f, id); + memset(u->inquiry, 0, sizeof u->inquiry); + u->inquiry[2] = 2; + u->inquiry[3] = 2; + u->inquiry[4] = sizeof u->inquiry - 4; + memmove(u->inquiry+8, f->model, 40); + + if(osectors != s || memcmp(oserial, f->serial, sizeof oserial)){ + f->drivechange = 1; + u->sectors = 0; + } + return 0; +} + +static int +identify(SDunit *u, Sfisx *f) +{ + int r; + ushort *id; + + id = malloc(0x200); + if(id == nil) + error(Enomem); + r = identify0(u, f, id); + free(id); + return r; +} + +void +pronline(SDunit *u, Sfisx *f) +{ + char *s, *t; + + if(f->type == Sas) + s = "sas"; + else{ + s = "lba"; + if(f->feat & Dllba) + s = "llba"; + if(f->feat & Datapi) + s = "atapi"; + } + t = ""; + if(f->drivechange) + t = "[newdrive]"; + print("%s: %s %,lld sectors\n", dnam(u), s, f->sectors); + print(" %s %s %s %s\n", f->model, f->firmware, f->serial, t); +} + +int +ataonline0(SDunit *u, Sfisx *f) +{ + if(identify(u, f) != 0){ + dprint("%s: identify failure\n", dnam(u)); + return SDeio; + } + if(f->feat & Dpower && setfeatures(u, f, 0x85, 3*1000) != 0) + f->feat &= ~Dpower; + if(settxmode(u, f, f->udma) != 0){ + dprint("%s: can't set tx mode udma %d\n", dnam(u), f->udma); + return SDeio; + } + return SDok; +} + + +int +ataonline(SDunit *u, Sfisx *f) +{ + int r; + + wlock(f); + if(waserror()) + r = SDeio; + else{ + r = ataonline0(u, f); + poperror(); + } + wunlock(f); + return r; +} + +static int +ereqio(Sfisx *f, SDreq *r) +{ + int rv; + + rv = -1; + rlock(f); + if(!waserror()){ + rv = reqio(r); + poperror(); + } + runlock(f); + return rv; +} + +static int +ataexec(Sfisx *f, SDreq *r) +{ + ulong s, t; + + for(t = r->timeout; setreqto(r, t) != -1; edelay(250, t)){ + if((s = ereqio(f, r)) != SDok) + return s; + switch(r->status){ + default: + return r->status; + case SDtimeout: + case SDretry: + continue; + } + } + return -1; +} + +long +atabio(SDunit* u, Sfisx *f, int lun, int write, void *d0, long count0, uvlong lba) +{ + char *data; + uint llba, n, count; + SDreq r; +// Sfisx *f; + +// f = u->f; + memset(&r, 0, sizeof r); + r.unit = u; + r.lun = lun; + llba = (f->feat & Dlba) != 0; + r.clen = 16; + data = d0; + r.timeout = gettotk(f); + for(count = count0; count > 0; count -= n){ + n = count; + if(llba && n > 65536) + n = 65536; + else if(!llba && n > 256) + n = 256; + if(n > f->atamaxxfr) + n = f->atamaxxfr; + r.data = data; + r.dlen = n*f->secsize; + r.ataproto = rwfis(f, r.cmd, write, n, lba); + r.write = (r.ataproto & Pout) != 0; + if(ataexec(f, &r) != SDok) + return -1; + data += r.dlen; + lba += n; + } + return count0 * f->secsize; +} + +int +atariosata(SDunit *u, Sfisx *f, SDreq *r) +{ + uchar *cmd; + int i, n, count, rw; + uvlong lba; + + cmd = r->cmd; + if(cmd[0] == 0x35 || cmd[0] == 0x91){ + if(flushcache(u, f) == 0) + return sdsetsense(r, SDok, 0, 0, 0); /* stupid scuzz */ + return sdsetsense(r, SDcheck, 3, 0xc, 2); + } + if((i = sdfakescsi(r)) != SDnostatus){ + r->status = i; + return i; + } + if((i = sdfakescsirw(r, &lba, &count, &rw)) != SDnostatus) + return i; + n = atabio(u, f, r->lun, r->write, r->data, count, lba); + if(n == -1) + return SDeio; + r->rlen = n; + return sdsetsense(r, SDok, 0, 0, 0); /* stupid scuzz */ +} diff -Nru /sys/src/9k/port/segment.c /sys/src/9k/port/segment.c --- /sys/src/9k/port/segment.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/segment.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,232 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +Segment * +newseg(int type, uintptr base, uintptr top, Image *i, int isec) +{ + Segment *s; + + s = smalloc(sizeof(Segment)); + s->ref = 1; + s->type = type; + s->base = base; + s->top = top; + s->pages = newpages(PGSHFT, top-base, nil); + if(s->pages == nil){ + free(s); + error(Enovmem); + } + s->sema.prev = &s->sema; + s->sema.next = &s->sema; + if(i != nil){ + incref(i); + s->image = i; + s->isec = isec; + } + return s; +} + +void +putseg(Segment *s) +{ + Image *i; + + if(s == 0) + return; + if(decref(s) != 0) + return; + i = s->image; + if(i != nil) + putimage(i); + freepages(s->pages); + if(s->profile != nil) + free(s->profile); + free(s); +} + +void +relocateseg(Segment *s, uintptr offset) +{ + relocatepages(s->pages, offset); +} + +Segment* +dupseg(Segment **seg, int segno, int share) +{ + Segment *n, *s; + + SET(n); + s = seg[segno]; + + rlock(&s->lk); + if(waserror()){ + runlock(&s->lk); + nexterror(); + } + switch(s->type&SG_TYPE) { + case SG_TEXT: /* New segment shares pte set */ + case SG_SHARED: + case SG_PHYSICAL: + goto sameseg; + + case SG_STACK: + n = newseg(s->type, s->base, s->top, nil, 0); + break; + + case SG_BSS: /* Just copy on write */ + if(share) + goto sameseg; + n = newseg(s->type, s->base, s->top, nil, 0); + break; + + case SG_DATA: /* Copy on write plus demand load info */ + if(segno == TSEG){ + poperror(); + runlock(&s->lk); + return data2txt(s); + } + + if(share) + goto sameseg; + n = newseg(s->type, s->base, s->top, s->image, s->isec); + break; + } + duppages(n->pages, s->pages); + + n->flushme = s->flushme; + if(s->ref > 1) + procflushseg(s); /* to force copy-on-write/copy-on-reference */ + poperror(); + runlock(&s->lk); + return n; + +sameseg: + incref(s); + poperror(); + runlock(&s->lk); + return s; +} + +/* + * called with s->lk wlocked + */ +void +mfreeseg(Segment *s, uintptr start, uintptr top) +{ + usize pages; + uintptr soff; + Pages *ps; + Page *freed; + + ps = s->pages; + pages = (top-start)>>ps->lg2pgsize; + soff = start-s->base; + freed = mfreepages(ps, soff, pages); + if(s->ref > 1) + procflushseg(s); + freepagelist(freed); +} + +Segment* +isoverlap(Proc* p, uintptr va, usize len) +{ + int i; + Segment *ns; + uintptr newtop; + + newtop = va+len; + for(i = 0; i < NSEG; i++) { + ns = p->seg[i]; + if(ns == 0) + continue; + if((newtop > ns->base && newtop <= ns->top) || + (va >= ns->base && va < ns->top)) + return ns; + } + return nil; +} + +Segment* +seg(Proc *p, uintptr addr, void (*dolock)(RWlock*)) +{ + Segment **s, **et, *n; + void (*dounlock)(RWlock*); + + if(dolock == wlock) + dounlock = wunlock; + else + dounlock = runlock; + + et = &p->seg[NSEG]; + for(s = p->seg; s < et; s++) { + n = *s; + if(n == 0) + continue; + if(addr >= n->base && addr < n->top) { + if(dolock == nil) + return n; + dolock(&n->lk); + if(addr >= n->base && addr < n->top) + return n; + dounlock(&n->lk); + } + } + + return 0; +} + +void +segclock(uintptr pc) +{ + Segment *s; + + s = up->seg[TSEG]; + if(s == 0 || s->profile == 0) + return; + + s->profile[0] += TK2MS(1); + if(pc >= s->base && pc < s->top) { + pc -= s->base; + s->profile[pc>>LRESPROF] += TK2MS(1); + } +} + +Segment* +txt2data(Proc *p, Segment *s) +{ + int i; + Segment *ps; + + ps = newseg(SG_DATA, s->base, s->top, s->image, s->isec); + ps->flushme = 1; + + qlock(&p->seglock); + for(i = 0; i < NSEG; i++) + if(p->seg[i] == s) + break; + if(i == NSEG) + panic("segment gone"); + + runlock(&s->lk); + putseg(s); + rlock(&ps->lk); + p->seg[i] = ps; + qunlock(&p->seglock); + + return ps; +} + +Segment* +data2txt(Segment *s) +{ + Segment *ps; + + ps = newseg(SG_TEXT, s->base, s->top, s->image, s->isec); + ps->flushme = 1; + + return ps; +} diff -Nru /sys/src/9k/port/sysauth.c /sys/src/9k/port/sysauth.c --- /sys/src/9k/port/sysauth.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/sysauth.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,168 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include + +char *eve; +char hostdomain[DOMLEN]; + +/* + * return true if current user is eve + */ +int +iseve(void) +{ + return strcmp(eve, up->user) == 0; +} + +int +isevegroup(void) +{ + return ingroup(up->user, eve); +} + +void +sysfversion(Ar0* ar0, va_list list) +{ + Chan *c; + char *version; + int fd; + u32int msize; + usize nversion; + + /* + * int fversion(int fd, int bufsize, char *version, int nversion); + * should be + * usize fversion(int fd, u32int msize, char *version, usize nversion); + */ + fd = va_arg(list, int); + msize = va_arg(list, u32int); + version = va_arg(list, char*); + nversion = va_arg(list, usize); + version = validaddr(version, nversion, 1); + /* check there's a NUL in the version string */ + if(nversion == 0 || memchr(version, 0, nversion) == nil) + error(Ebadarg); + + c = fdtochan(fd, ORDWR, 0, 1); + if(waserror()){ + cclose(c); + nexterror(); + } + + ar0->u = mntversion(c, msize, version, nversion); + + cclose(c); + poperror(); +} + +void +sysfauth(Ar0* ar0, va_list list) +{ + Chan *c, *ac; + char *aname; + int fd; + + /* + * int fauth(int fd, char *aname); + */ + fd = va_arg(list, int); + aname = va_arg(list, char*); + + aname = validaddr(aname, 1, 0); + aname = validnamedup(aname, 1); + if(waserror()){ + free(aname); + nexterror(); + } + c = fdtochan(fd, ORDWR, 0, 1); + if(waserror()){ + cclose(c); + nexterror(); + } + + ac = mntauth(c, aname); + /* at this point ac is responsible for keeping c alive */ + cclose(c); + poperror(); /* c */ + free(aname); + poperror(); /* aname */ + + if(waserror()){ + cclose(ac); + nexterror(); + } + + fd = newfd(ac); + if(fd < 0) + error(Enofd); + poperror(); /* ac */ + + /* always mark it close on exec */ + ac->flag |= CCEXEC; + + ar0->i = fd; +} + +/* + * called by devcons() for user device + * + * anyone can become none + */ +long +userwrite(char* a, long n) +{ + if(n != 4 || strncmp(a, "none", 4) != 0) + error(Eperm); + kstrdup(&up->user, "none"); + up->basepri = PriNormal; + + return n; +} + +/* + * called by devcons() for host owner/domain + * + * writing hostowner also sets user + */ +long +hostownerwrite(char* a, long n) +{ + char buf[128]; + + if(!iseve()) + error(Eperm); + if(n <= 0 || n >= sizeof buf) + error(Ebadarg); + memmove(buf, a, n); + buf[n] = 0; + + renameuser(eve, buf); + kstrdup(&eve, buf); + kstrdup(&up->user, buf); + up->basepri = PriNormal; + + return n; +} + +long +hostdomainwrite(char* a, long n) +{ + char buf[DOMLEN]; + + if(!iseve()) + error(Eperm); + if(n >= DOMLEN) + error(Ebadarg); + memset(buf, 0, DOMLEN); + strncpy(buf, a, n); + if(buf[0] == 0) + error(Ebadarg); + memmove(hostdomain, buf, DOMLEN); + + return n; +} diff -Nru /sys/src/9k/port/syscallfmt.c /sys/src/9k/port/syscallfmt.c --- /sys/src/9k/port/syscallfmt.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/syscallfmt.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,376 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "/sys/src/libc/9syscall/sys.h" + +/* + * Print functions for system call tracing. + */ +static void +fmtrwdata(Fmt* f, char* a, int n, char* suffix) +{ + int i; + char *t; + + if(a == nil){ + fmtprint(f, "0x0%s", suffix); + return; + } + a = validaddr(a, n, 0); + t = smalloc(n+1); + for(i = 0; i < n; i++){ + if(a[i] > 0x20 && a[i] < 0x7f) + t[i] = a[i]; + else + t[i] = '.'; + } + t[n] = 0; + + fmtprint(f, " %#p/\"%s\"%s", a, t, suffix); + free(t); +} + +static void +fmtuserstring(Fmt* f, char* a, char* suffix) +{ + int n; + char *t; + + if(a == nil){ + fmtprint(f, "0/\"\"%s", suffix); + return; + } + a = validaddr(a, 1, 0); + n = ((char*)vmemchr(a, 0, 0x7fffffff) - a) + 1; + t = smalloc(n+1); + memmove(t, a, n); + t[n] = 0; + fmtprint(f, "%#p/\"%s\"%s", a, t, suffix); + free(t); +} + +/* + */ +char* +syscallfmt(int syscallno, uintptr pc, va_list list) +{ + long l; + Fmt fmt; + void *v; + vlong vl; + uintptr p; + int i[2], len; + char *a, **argv; + + fmtstrinit(&fmt); + fmtprint(&fmt, "%d %s ", up->pid, up->text); + + if(syscallno >= nsyscall) + fmtprint(&fmt, " %d ", syscallno); + else + fmtprint(&fmt, "%s ", systab[syscallno].n); + + fmtprint(&fmt, sizeof(uintptr)==sizeof(uvlong)? "%llux":"%lux ", pc); + switch(syscallno){ + case SYSR1: + p = va_arg(list, uintptr); + fmtprint(&fmt, "%#p", p); + break; + case CHDIR: + case EXITS: + case REMOVE: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, ""); + break; + case BIND: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + i[0] = va_arg(list, int); + fmtprint(&fmt, "%#ux", i[0]); + break; + case CLOSE: + case NOTED: + i[0] = va_arg(list, int); + fmtprint(&fmt, "%d", i[0]); + break; + case DUP: + i[0] = va_arg(list, int); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%d %d", i[0], i[1]); + break; + case ALARM: + l = va_arg(list, unsigned long); + fmtprint(&fmt, "%#lud ", l); + break; + case EXEC: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, ""); + argv = va_arg(list, char**); + evenaddr(PTR2UINT(argv)); + for(;;){ + a = *(char**)validaddr(argv, sizeof(char**), 0); + if(a == nil) + break; + fmtprint(&fmt, " "); + fmtuserstring(&fmt, a, ""); + argv++; + } + break; + case FAUTH: + i[0] = va_arg(list, int); + a = va_arg(list, char*); + fmtprint(&fmt, "%d", i[0]); + fmtuserstring(&fmt, a, ""); + break; + case SEGBRK: + case RENDEZVOUS: + v = va_arg(list, void*); + fmtprint(&fmt, "%#p ", v); + v = va_arg(list, void*); + fmtprint(&fmt, "%#p", v); + break; + case OPEN: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + i[0] = va_arg(list, int); + fmtprint(&fmt, "%#ux", i[0]); + break; + case OSEEK: /* deprecated */ + i[0] = va_arg(list, int); + l = va_arg(list, long); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%d %ld %d", i[0], l, i[1]); + break; + case SLEEP: + l = va_arg(list, long); + fmtprint(&fmt, "%ld", l); + break; + case RFORK: + i[0] = va_arg(list, int); + fmtprint(&fmt, "%#ux", i[0]); + break; + case PIPE: + case BRK_: + v = va_arg(list, int*); + fmtprint(&fmt, "%#p", v); + break; + case CREATE: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + i[0] = va_arg(list, int); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%#ux %#ux", i[0], i[1]); + break; + case FD2PATH: + case FSTAT: + case FWSTAT: + i[0] = va_arg(list, int); + a = va_arg(list, char*); + l = va_arg(list, unsigned long); + fmtprint(&fmt, "%d %#p %lud", i[0], a, l); + break; + case NOTIFY: + case SEGDETACH: + v = va_arg(list, void*); + fmtprint(&fmt, "%#p", v); + break; + case SEGATTACH: + i[0] = va_arg(list, int); + fmtprint(&fmt, "%d ", i[0]); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + /*FALLTHROUGH*/ + case SEGFREE: + case SEGFLUSH: + v = va_arg(list, void*); + l = va_arg(list, unsigned long); + fmtprint(&fmt, "%#p %lud", v, l); + break; + case UNMOUNT: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, ""); + break; + case SEMACQUIRE: + case SEMRELEASE: + v = va_arg(list, int*); + i[0] = va_arg(list, int); + fmtprint(&fmt, "%#p %d", v, i[0]); + break; + case TSEMACQUIRE: + v = va_arg(list, int*); + l = va_arg(list, ulong); + fmtprint(&fmt, "%#p %ld", v, l); + break; + case SEEK: + v = va_arg(list, vlong*); + i[0] = va_arg(list, int); + vl = va_arg(list, vlong); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%#p %d %#llux %d", v, i[0], vl, i[1]); + break; + case FVERSION: + i[0] = va_arg(list, int); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%d %d ", i[0], i[1]); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + l = va_arg(list, unsigned long); + fmtprint(&fmt, "%lud", l); + break; + case WSTAT: + case STAT: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + /*FALLTHROUGH*/ + case ERRSTR: + case AWAIT: + a = va_arg(list, char*); + l = va_arg(list, unsigned long); + fmtprint(&fmt, "%#p %lud", a, l); + break; + case MOUNT: + i[0] = va_arg(list, int); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%d %d ", i[0], i[1]); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + i[0] = va_arg(list, int); + fmtprint(&fmt, "%#ux ", i[0]); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, ""); + break; + case PREAD: + i[0] = va_arg(list, int); + v = va_arg(list, void*); + l = va_arg(list, long); + fmtprint(&fmt, "%d %#p %ld", i[0], v, l); + if(syscallno == PREAD){ + vl = va_arg(list, vlong); + fmtprint(&fmt, " %lld", vl); + } + break; + case PWRITE: + i[0] = va_arg(list, int); + v = va_arg(list, void*); + l = va_arg(list, long); + fmtprint(&fmt, "%d ", i[0]); + len = MIN(l, 64); + fmtrwdata(&fmt, v, len, " "); + fmtprint(&fmt, "%ld", l); + if(syscallno == PWRITE){ + vl = va_arg(list, vlong); + fmtprint(&fmt, " %lld", vl); + } + break; + case NSEC: + break; + } + return fmtstrflush(&fmt); +} + +char* +sysretfmt(int syscallno, va_list list, Ar0* ar0, uvlong start, uvlong stop) +{ + long l; + void* v; + Fmt fmt; + vlong vl; + int i, len; + char *a, *errstr; + + fmtstrinit(&fmt); + + errstr = "\"\""; + switch(syscallno){ + default: + if(ar0->i == -1) + errstr = up->syserrstr; + fmtprint(&fmt, " = %d", ar0->i); + break; + case ALARM: + case PWRITE: + if(ar0->l == -1) + errstr = up->syserrstr; + fmtprint(&fmt, " = %ld", ar0->l); + break; + case EXEC: + case SEGBRK: + case SEGATTACH: + case RENDEZVOUS: + if(ar0->v == (void*)-1) + errstr = up->syserrstr; + fmtprint(&fmt, " = %#p", ar0->v); + break; + case AWAIT: + a = va_arg(list, char*); + l = va_arg(list, unsigned long); + if(ar0->i > 0){ + fmtuserstring(&fmt, a, " "); + fmtprint(&fmt, "%lud = %d", l, ar0->i); + } + else{ + fmtprint(&fmt, "%#p/\"\" %lud = %d", a, l, ar0->i); + errstr = up->syserrstr; + } + break; + case ERRSTR: + a = va_arg(list, char*); + l = va_arg(list, unsigned long); + if(ar0->i > 0){ + fmtuserstring(&fmt, a, " "); + fmtprint(&fmt, "%lud = %d", l, ar0->i); + } + else{ + fmtprint(&fmt, "\"\" %lud = %d", l, ar0->i); + errstr = up->syserrstr; + } + break; + case FD2PATH: + i = va_arg(list, int); + USED(i); + a = va_arg(list, char*); + l = va_arg(list, unsigned long); + if(ar0->i > 0){ + fmtuserstring(&fmt, a, " "); + fmtprint(&fmt, "%lud = %d", l, ar0->i); + } + else{ + fmtprint(&fmt, "\"\" %lud = %d", l, ar0->i); + errstr = up->syserrstr; + } + break; + case PREAD: + i = va_arg(list, int); + USED(i); + v = va_arg(list, void*); + l = va_arg(list, long); + if(ar0->l > 0){ + len = MIN(ar0->l, 64); + fmtrwdata(&fmt, v, len, ""); + } + else{ + fmtprint(&fmt, "/\"\""); + errstr = up->syserrstr; + } + fmtprint(&fmt, " %ld", l); + if(syscallno == PREAD){ + vl = va_arg(list, vlong); + fmtprint(&fmt, " %lld", vl); + } + fmtprint(&fmt, " = %d", ar0->i); + break; + case NSEC: + fmtprint(&fmt, " = %lld", ar0->vl); + break; + } + fmtprint(&fmt, " %s %#llud %#llud\n", errstr, start, stop-start); + + return fmtstrflush(&fmt); +} diff -Nru /sys/src/9k/port/sysfile.c /sys/src/9k/port/sysfile.c --- /sys/src/9k/port/sysfile.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/sysfile.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1384 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +/* + * The sys*() routines needn't poperror() as they return directly to syscall(). + */ + +static void +unlockfgrp(Fgrp *f) +{ + int ex; + + ex = f->exceed; + f->exceed = 0; + unlock(f); + if(ex) + pprint("warning: process exceeds %d file descriptors\n", ex); +} + +static int +growfd(Fgrp *f, int fd) /* fd is always >= 0 */ +{ + Chan **newfd, **oldfd; + + if(fd < f->nfd) + return 0; + if(fd >= f->nfd+DELTAFD) + return -1; /* out of range */ + /* + * Unbounded allocation is unwise + */ + if(f->nfd >= 5000){ + Exhausted: + print("no free file descriptors\n"); + return -1; + } + newfd = malloc((f->nfd+DELTAFD)*sizeof(Chan*)); + if(newfd == 0) + goto Exhausted; + oldfd = f->fd; + memmove(newfd, oldfd, f->nfd*sizeof(Chan*)); + f->fd = newfd; + free(oldfd); + f->nfd += DELTAFD; + if(fd > f->maxfd){ + if(fd/100 > f->maxfd/100) + f->exceed = (fd/100)*100; + f->maxfd = fd; + } + return 1; +} + +/* + * this assumes that the fgrp is locked + */ +static int +findfreefd(Fgrp *f, int start) +{ + int fd; + + for(fd=start; fdnfd; fd++) + if(f->fd[fd] == 0) + break; + if(fd >= f->nfd && growfd(f, fd) < 0) + return -1; + return fd; +} + +int +newfd(Chan *c) +{ + int fd; + Fgrp *f; + + f = up->fgrp; + lock(f); + fd = findfreefd(f, 0); + if(fd < 0){ + unlockfgrp(f); + return -1; + } + if(fd > f->maxfd) + f->maxfd = fd; + f->fd[fd] = c; + unlockfgrp(f); + return fd; +} + +static int +newfd2(int fd[2], Chan *c[2]) +{ + Fgrp *f; + + f = up->fgrp; + lock(f); + fd[0] = findfreefd(f, 0); + if(fd[0] < 0){ + unlockfgrp(f); + return -1; + } + fd[1] = findfreefd(f, fd[0]+1); + if(fd[1] < 0){ + unlockfgrp(f); + return -1; + } + if(fd[1] > f->maxfd) + f->maxfd = fd[1]; + f->fd[fd[0]] = c[0]; + f->fd[fd[1]] = c[1]; + unlockfgrp(f); + + return 0; +} + +Chan* +fdtochan(int fd, int mode, int chkmnt, int iref) +{ + Chan *c; + Fgrp *f; + + c = nil; + f = up->fgrp; + + lock(f); + if(fd<0 || f->nfd<=fd || (c = f->fd[fd])==0) { + unlock(f); + error(Ebadfd); + } + if(iref) + incref(c); + unlock(f); + + if(chkmnt && (c->flag&CMSG)) { + if(iref) + cclose(c); + error(Ebadusefd); + } + + if(mode<0 || c->mode==ORDWR) + return c; + + if((mode&OTRUNC) && c->mode==OREAD) { + if(iref) + cclose(c); + error(Ebadusefd); + } + + if((mode&~OTRUNC) != c->mode) { + if(iref) + cclose(c); + error(Ebadusefd); + } + + return c; +} + +int +openmode(int omode) +{ + omode &= ~(OTRUNC|OCEXEC|ORCLOSE); + if(omode > OEXEC) + error(Ebadarg); + if(omode == OEXEC) + return OREAD; + return omode; +} + +void +sysfd2path(Ar0* ar0, va_list list) +{ + Chan *c; + char *buf; + int fd; + usize nbuf; + + /* + * int fd2path(int fd, char* buf, int nbuf); + * should be + * int fd2path(int fd, char* buf, usize nbuf); + */ + fd = va_arg(list, int); + buf = va_arg(list, char*); + nbuf = va_arg(list, usize); + buf = validaddr(buf, nbuf, 1); + + c = fdtochan(fd, -1, 0, 1); + snprint(buf, nbuf, "%s", chanpath(c)); + cclose(c); + + ar0->i = 0; +} + +void +syspipe(Ar0* ar0, va_list list) +{ + int *a, fd[2]; + Chan *c[2]; + static char *datastr[] = {"data", "data1"}; + + /* + * int pipe(int fd[2]); + */ + a = va_arg(list, int*); + a = validaddr(a, sizeof(fd), 1); + evenaddr(PTR2UINT(a)); + + c[0] = namec("#|", Atodir, 0, 0); + c[1] = nil; + fd[0] = -1; + fd[1] = -1; + + if(waserror()){ + cclose(c[0]); + if(c[1]) + cclose(c[1]); + nexterror(); + } + c[1] = cclone(c[0]); + if(walk(&c[0], datastr+0, 1, 1, nil) < 0) + error(Egreg); + if(walk(&c[1], datastr+1, 1, 1, nil) < 0) + error(Egreg); + c[0] = c[0]->dev->open(c[0], ORDWR); + c[1] = c[1]->dev->open(c[1], ORDWR); + if(newfd2(fd, c) < 0) + error(Enofd); + poperror(); + + a[0] = fd[0]; + a[1] = fd[1]; + + ar0->i = 0; +} + +void +sysdup(Ar0* ar0, va_list list) +{ + int nfd, ofd; + Chan *nc, *oc; + Fgrp *f; + + /* + * int dup(int oldfd, int newfd); + * + * Close after dup'ing, so date > #d/1 works + */ + ofd = va_arg(list, int); + oc = fdtochan(ofd, -1, 0, 1); + nfd = va_arg(list, int); + + if(nfd != -1){ + f = up->fgrp; + lock(f); + if(nfd < 0 || growfd(f, nfd) < 0) { + unlockfgrp(f); + cclose(oc); + error(Ebadfd); + } + if(nfd > f->maxfd) + f->maxfd = nfd; + + nc = f->fd[nfd]; + f->fd[nfd] = oc; + unlockfgrp(f); + if(nc != nil) + cclose(nc); + }else{ + if(waserror()) { + cclose(oc); + nexterror(); + } + nfd = newfd(oc); + if(nfd < 0) + error(Enofd); + poperror(); + } + + ar0->i = nfd; +} + +void +sysopen(Ar0* ar0, va_list list) +{ + char *aname; + int fd, omode; + Chan *c; + + /* + * int open(char* file, int omode); + */ + aname = va_arg(list, char*); + omode = va_arg(list, int); + openmode(omode); /* error check only */ + + c = nil; + if(waserror()){ + if(c != nil) + cclose(c); + nexterror(); + } + aname = validaddr(aname, 1, 0); + c = namec(aname, Aopen, omode, 0); + fd = newfd(c); + if(fd < 0) + error(Enofd); + poperror(); + + ar0->i = fd; +} + +void +fdclose(int fd, int flag) +{ + int i; + Chan *c; + Fgrp *f; + + f = up->fgrp; + lock(f); + c = f->fd[fd]; + if(c == nil){ + /* can happen for users with shared fd tables */ + unlock(f); + return; + } + if(flag){ + if(c == nil || !(c->flag&flag)){ + unlock(f); + return; + } + } + f->fd[fd] = nil; + if(fd == f->maxfd) + for(i = fd; --i >= 0 && f->fd[i] == 0; ) + f->maxfd = i; + + unlock(f); + cclose(c); +} + +void +sysclose(Ar0* ar0, va_list list) +{ + int fd; + + /* + * int close(int fd); + */ + fd = va_arg(list, int); + + fdtochan(fd, -1, 0, 0); + fdclose(fd, 0); + + ar0->i = 0; +} + +static long +unionread(Chan *c, void *va, long n) +{ + int i; + long nr; + Mhead *mh; + Mount *mount; + + qlock(&c->umqlock); + mh = c->umh; + rlock(&mh->lock); + mount = mh->mount; + /* bring mount in sync with c->uri and c->umc */ + for(i = 0; mount != nil && i < c->uri; i++) + mount = mount->next; + + nr = 0; + while(mount != nil){ + /* Error causes component of union to be skipped */ + if(mount->to && !waserror()){ + if(c->umc == nil){ + c->umc = cclone(mount->to); + c->umc = c->umc->dev->open(c->umc, OREAD); + } + + nr = c->umc->dev->read(c->umc, va, n, c->umc->offset); + c->umc->offset += nr; + poperror(); + } + if(nr > 0) + break; + + /* Advance to next element */ + c->uri++; + if(c->umc){ + cclose(c->umc); + c->umc = nil; + } + mount = mount->next; + } + runlock(&mh->lock); + qunlock(&c->umqlock); + return nr; +} + +static void +unionrewind(Chan *c) +{ + qlock(&c->umqlock); + c->uri = 0; + if(c->umc){ + cclose(c->umc); + c->umc = nil; + } + qunlock(&c->umqlock); +} + +static usize +dirfixed(uchar *p, uchar *e, Dir *d) +{ + int len; + Dev *dev; + + len = GBIT16(p)+BIT16SZ; + if(p + len > e) + return 0; + + p += BIT16SZ; /* ignore size */ + dev = devtabget(GBIT16(p), 1); //XDYNX + if(dev != nil){ + d->type = dev->dc; + //devtabdecr(dev); + } + else + d->type = -1; + p += BIT16SZ; + d->dev = GBIT32(p); + p += BIT32SZ; + d->qid.type = GBIT8(p); + p += BIT8SZ; + d->qid.vers = GBIT32(p); + p += BIT32SZ; + d->qid.path = GBIT64(p); + p += BIT64SZ; + d->mode = GBIT32(p); + p += BIT32SZ; + d->atime = GBIT32(p); + p += BIT32SZ; + d->mtime = GBIT32(p); + p += BIT32SZ; + d->length = GBIT64(p); + + return len; +} + +static char* +dirname(uchar *p, usize *n) +{ + p += BIT16SZ+BIT16SZ+BIT32SZ+BIT8SZ+BIT32SZ+BIT64SZ + + BIT32SZ+BIT32SZ+BIT32SZ+BIT64SZ; + *n = GBIT16(p); + + return (char*)p+BIT16SZ; +} + +static usize +dirsetname(char *name, usize len, uchar *p, usize n, usize maxn) +{ + char *oname; + usize nn, olen; + + if(n == BIT16SZ) + return BIT16SZ; + + oname = dirname(p, &olen); + + nn = n+len-olen; + PBIT16(p, nn-BIT16SZ); + if(nn > maxn) + return BIT16SZ; + + if(len != olen) + memmove(oname+len, oname+olen, p+n-(uchar*)(oname+olen)); + PBIT16((uchar*)(oname-2), len); + memmove(oname, name, len); + + return nn; +} + +/* + * Mountfix might have caused the fixed results of the directory read + * to overflow the buffer. Catch the overflow in c->dirrock. + */ +static void +mountrock(Chan *c, uchar *p, uchar **pe) +{ + uchar *e, *r; + int len, n; + + e = *pe; + + /* find last directory entry */ + for(;;){ + len = BIT16SZ+GBIT16(p); + if(p+len >= e) + break; + p += len; + } + + /* save it away */ + qlock(&c->rockqlock); + if(c->nrock+len > c->mrock){ + n = ROUNDUP(c->nrock+len, 1024); + r = smalloc(n); + memmove(r, c->dirrock, c->nrock); + free(c->dirrock); + c->dirrock = r; + c->mrock = n; + } + memmove(c->dirrock+c->nrock, p, len); + c->nrock += len; + qunlock(&c->rockqlock); + + /* drop it */ + *pe = p; +} + +/* + * Satisfy a directory read with the results saved in c->dirrock. + */ +static int +mountrockread(Chan *c, uchar *op, long n, long *nn) +{ + long dirlen; + uchar *rp, *erp, *ep, *p; + + /* common case */ + if(c->nrock == 0) + return 0; + + /* copy out what we can */ + qlock(&c->rockqlock); + rp = c->dirrock; + erp = rp+c->nrock; + p = op; + ep = p+n; + while(rp+BIT16SZ <= erp){ + dirlen = BIT16SZ+GBIT16(rp); + if(p+dirlen > ep) + break; + memmove(p, rp, dirlen); + p += dirlen; + rp += dirlen; + } + + if(p == op){ + qunlock(&c->rockqlock); + return 0; + } + + /* shift the rest */ + if(rp != erp) + memmove(c->dirrock, rp, erp-rp); + c->nrock = erp - rp; + + *nn = p - op; + qunlock(&c->rockqlock); + return 1; +} + +static void +mountrewind(Chan *c) +{ + c->nrock = 0; +} + +/* + * Rewrite the results of a directory read to reflect current + * name space bindings and mounts. Specifically, replace + * directory entries for bind and mount points with the results + * of statting what is mounted there. Except leave the old names. + */ +static long +mountfix(Chan *c, uchar *op, long n, long maxn) +{ + char *name; + int nbuf; + Chan *nc; + Mhead *mh; + Mount *mount; + usize dirlen, nname, r, rest; + long l; + uchar *buf, *e, *p; + Dir d; + + p = op; + buf = nil; + nbuf = 0; + for(e=&p[n]; p+BIT16SZmount; mount; mount=mount->next) + if(eqchanddq(mount->to, d.type, d.dev, d.qid, 1)) + goto Norewrite; + + name = dirname(p, &nname); + /* + * Do the stat but fix the name. If it fails, + * leave old entry. + * BUG: If it fails because there isn't room for + * the entry, what can we do? Nothing, really. + * Might as well skip it. + */ + if(buf == nil){ + buf = smalloc(4096); + nbuf = 4096; + } + if(waserror()) + goto Norewrite; + l = nc->dev->stat(nc, buf, nbuf); + r = dirsetname(name, nname, buf, l, nbuf); + if(r == BIT16SZ) + error("dirsetname"); + poperror(); + + /* + * Shift data in buffer to accomodate new entry, + * possibly overflowing into rock. + */ + rest = e - (p+dirlen); + if(r > dirlen){ + while(p+r+rest > op+maxn){ + mountrock(c, p, &e); + if(e == p){ + dirlen = 0; + goto Norewrite; + } + rest = e - (p+dirlen); + } + } + if(r != dirlen){ + memmove(p+r, p+dirlen, rest); + dirlen = r; + e = p+dirlen+rest; + } + + /* + * Rewrite directory entry. + */ + memmove(p, buf, r); + + Norewrite: + cclose(nc); + putmhead(mh); + } + } + if(buf) + free(buf); + + if(p != e) + error("oops in mountfix"); + + return e-op; +} + +static long +read(va_list list, int ispread) +{ + int fd; + long n, nn, nnn; + void *p; + Chan *c; + vlong off; + + fd = va_arg(list, int); + p = va_arg(list, void*); + n = va_arg(list, long); + p = validaddr(p, n, 1); + + c = fdtochan(fd, OREAD, 1, 1); + + if(waserror()){ + cclose(c); + nexterror(); + } + + /* + * The offset is passed through on directories, normally. + * Sysseek complains, but pread is used by servers like exportfs, + * that shouldn't need to worry about this issue. + * + * Notice that c->devoffset is the offset that c's dev is seeing. + * The number of bytes read on this fd (c->offset) may be different + * due to rewritings in mountfix. + */ + if(ispread){ + off = va_arg(list, vlong); + if(off == ~0LL){ /* use and maintain channel's offset */ + off = c->offset; + ispread = 0; + } + } + else + off = c->offset; + + if(c->qid.type & QTDIR){ + /* + * Directory read: + * rewind to the beginning of the file if necessary; + * try to fill the buffer via mountrockread; + * clear ispread to always maintain the Chan offset. + */ + if(off == 0LL){ + if(!ispread){ + c->offset = 0; + c->devoffset = 0; + } + mountrewind(c); + unionrewind(c); + } + + if(!mountrockread(c, p, n, &nn)){ + if(c->umh) + nn = unionread(c, p, n); + else{ + if(off != c->offset) + error(Eisdir); + nn = c->dev->read(c, p, n, c->devoffset); + } + } + nnn = mountfix(c, p, nn, n); + + ispread = 0; + } + else + nnn = nn = c->dev->read(c, p, n, off); + + if(!ispread){ + lock(c); + c->devoffset += nn; + c->offset += nnn; + unlock(c); + } + + poperror(); + cclose(c); + + return nnn; +} + +void +syspread(Ar0* ar0, va_list list) +{ + /* + * long pread(int fd, void* buf, long nbytes, vlong offset); + */ + ar0->l = read(list, 1); +} + +static long +write(va_list list, int ispwrite) +{ + int fd; + long n, r; + void *p; + Chan *c; + vlong off; + + fd = va_arg(list, int); + p = va_arg(list, void*); + r = n = va_arg(list, long); + + p = validaddr(p, n, 0); + n = 0; + c = fdtochan(fd, OWRITE, 1, 1); + if(waserror()) { + if(!ispwrite){ + lock(c); + c->offset -= n; + unlock(c); + } + cclose(c); + nexterror(); + } + + if(c->qid.type & QTDIR) + error(Eisdir); + + n = r; + + off = ~0LL; + if(ispwrite) + off = va_arg(list, vlong); + if(off == ~0LL){ /* use and maintain channel's offset */ + lock(c); + off = c->offset; + c->offset += n; + unlock(c); + } + + r = c->dev->write(c, p, n, off); + + if(!ispwrite && r < n){ + lock(c); + c->offset -= n - r; + unlock(c); + } + + poperror(); + cclose(c); + + return r; +} + +void +syspwrite(Ar0* ar0, va_list list) +{ + /* + * long pwrite(int fd, void *buf, long nbytes, vlong offset); + */ + ar0->l = write(list, 1); +} + +static vlong +sseek(int fd, vlong offset, int whence) +{ + Chan *c; + uchar buf[sizeof(Dir)+100]; + Dir dir; + int n; + + c = fdtochan(fd, -1, 1, 1); + if(waserror()){ + cclose(c); + nexterror(); + } + if(c->dev->dc == '|') + error(Eisstream); + + switch(whence){ + case 0: + if((c->qid.type & QTDIR) && offset != 0LL) + error(Eisdir); + c->offset = offset; + break; + + case 1: + if(c->qid.type & QTDIR) + error(Eisdir); + lock(c); /* lock for read/write update */ + offset += c->offset; + c->offset = offset; + unlock(c); + break; + + case 2: + if(c->qid.type & QTDIR) + error(Eisdir); + n = c->dev->stat(c, buf, sizeof buf); + if(convM2D(buf, n, &dir, nil) == 0) + error("internal error: stat error in seek"); + offset += dir.length; + c->offset = offset; + break; + + default: + error(Ebadarg); + } + c->uri = 0; + c->dri = 0; + cclose(c); + poperror(); + + return offset; +} + +void +sysseek(Ar0* ar0, va_list list) +{ + int fd, whence; + vlong offset, *rv; + + /* + * vlong seek(int fd, vlong n, int type); + * + * The system call actually has 4 arguments, + * int _seek(vlong*, int, vlong, int); + * and the first argument is where the offset + * is returned. The C library arranges the + * argument/return munging if necessary. + */ + rv = va_arg(list, vlong*); + rv = validaddr(rv, sizeof(vlong), 1); + + fd = va_arg(list, int); + offset = va_arg(list, vlong); + whence = va_arg(list, int); + *rv = sseek(fd, offset, whence); + + ar0->i = 0; +} + +void +sysoseek(Ar0* ar0, va_list list) +{ + long offset; + int fd, whence; + + /* + * long oseek(int fd, long n, int type); + * + * Deprecated; backwards compatibility only. + */ + fd = va_arg(list, int); + offset = va_arg(list, long); + whence = va_arg(list, int); + + ar0->l = sseek(fd, offset, whence); +} + +void +validstat(uchar *s, usize n) +{ + usize m; + char buf[64]; + + if(statcheck(s, n) < 0) + error(Ebadstat); + /* verify that name entry is acceptable */ + s += STATFIXLEN - 4*BIT16SZ; /* location of first string */ + /* + * s now points at count for first string. + * if it's too long, let the server decide; this is + * only for his protection anyway. otherwise + * we'd have to allocate and waserror. + */ + m = GBIT16(s); + s += BIT16SZ; + if(m+1 > sizeof buf) + return; + memmove(buf, s, m); + buf[m] = '\0'; + /* name could be '/' */ + if(strcmp(buf, "/") != 0) + validname(buf, 0); +} + +static char* +pathlast(Path *p) +{ + char *s; + + if(p == nil) + return nil; + if(p->len == 0) + return nil; + s = strrchr(p->s, '/'); + if(s) + return s+1; + return p->s; +} + +void +sysfstat(Ar0* ar0, va_list list) +{ + int fd; + Chan *c; + usize n; + int r; + uchar *p; + + /* + * int fstat(int fd, uchar* edir, int nedir); + * should really be + * usize fstat(int fd, uchar* edir, usize nedir); + * but returning an unsigned is probably too + * radical. + */ + fd = va_arg(list, int); + p = va_arg(list, uchar*); + n = va_arg(list, usize); + + p = validaddr(p, n, 1); + c = fdtochan(fd, -1, 0, 1); + if(waserror()) { + cclose(c); + nexterror(); + } + r = c->dev->stat(c, p, n); + poperror(); + cclose(c); + + ar0->i = r; +} + +void +sysstat(Ar0* ar0, va_list list) +{ + char *aname; + Chan *c; + usize n; + int r; + uchar *p; + + /* + * int stat(char* name, uchar* edir, int nedir); + * should really be + * usize stat(char* name, uchar* edir, usize nedir); + * but returning an unsigned is probably too + * radical. + */ + aname = va_arg(list, char*); + aname = validaddr(aname, 1, 0); + p = va_arg(list, uchar*); + n = va_arg(list, usize); + + p = validaddr(p, n, 1); + c = namec(aname, Aaccess, 0, 0); + if(waserror()){ + cclose(c); + nexterror(); + } + r = c->dev->stat(c, p, n); + aname = pathlast(c->path); + if(aname) + r = dirsetname(aname, strlen(aname), p, r, n); + + poperror(); + cclose(c); + + ar0->i = r; +} + +void +syschdir(Ar0* ar0, va_list list) +{ + Chan *c; + char *aname; + + /* + * int chdir(char* dirname); + */ + aname = va_arg(list, char*); + aname = validaddr(aname, 1, 0); + + c = namec(aname, Atodir, 0, 0); + cclose(up->dot); + up->dot = c; + + ar0->i = 0; +} + +static int +bindmount(int ismount, int fd, int afd, char* arg0, char* arg1, int flag, char* spec) +{ + int i; + Dev *dev; + Chan *c0, *c1, *ac, *bc; + struct{ + Chan *chan; + Chan *authchan; + char *spec; + int flags; + }bogus; + + if((flag&~MMASK) || (flag&MORDER)==(MBEFORE|MAFTER)) + error(Ebadarg); + + bogus.flags = flag & MCACHE; + + if(ismount){ + if(up->pgrp->noattach) + error(Enoattach); + + ac = nil; + bc = fdtochan(fd, ORDWR, 0, 1); + if(waserror()) { + if(ac) + cclose(ac); + cclose(bc); + nexterror(); + } + + if(afd >= 0) + ac = fdtochan(afd, ORDWR, 0, 1); + + bogus.chan = bc; + bogus.authchan = ac; + + bogus.spec = validaddr(spec, 1, 0); + if(waserror()) + error(Ebadspec); + spec = validnamedup(spec, 1); + poperror(); + + if(waserror()){ + free(spec); + nexterror(); + } + + dev = devtabget('M', 0); //XDYNX + if(waserror()){ + //devtabdecr(dev); + nexterror(); + } + c0 = dev->attach((char*)&bogus); + poperror(); + //devtabdecr(dev); + + poperror(); /* spec */ + free(spec); + poperror(); /* ac bc */ + if(ac) + cclose(ac); + cclose(bc); + }else{ + bogus.spec = nil; + c0 = namec(validaddr(arg0, 1, 0), Abind, 0, 0); + } + + if(waserror()){ + cclose(c0); + nexterror(); + } + + c1 = namec(validaddr(arg1, 1, 0), Amount, 0, 0); + if(waserror()){ + cclose(c1); + nexterror(); + } + + i = cmount(&c0, c1, flag, bogus.spec); + + poperror(); + cclose(c1); + poperror(); + cclose(c0); + if(ismount) + fdclose(fd, 0); + + return i; +} + +void +sysbind(Ar0* ar0, va_list list) +{ + int flag; + char *name, *old; + + /* + * int bind(char* name, char* old, int flag); + * should be + * long bind(char* name, char* old, int flag); + */ + name = va_arg(list, char*); + old = va_arg(list, char*); + flag = va_arg(list, int); + + ar0->i = bindmount(0, -1, -1, name, old, flag, nil); +} + +void +sysmount(Ar0* ar0, va_list list) +{ + int afd, fd, flag; + char *aname, *old; + + /* + * int mount(int fd, int afd, char* old, int flag, char* aname); + * should be + * long mount(int fd, int afd, char* old, int flag, char* aname); + */ + fd = va_arg(list, int); + afd = va_arg(list, int); + old = va_arg(list, char*); + flag = va_arg(list, int); + aname = va_arg(list, char*); + + ar0->i = bindmount(1, fd, afd, nil, old, flag, aname); +} + +void +sysunmount(Ar0* ar0, va_list list) +{ + char *name, *old; + Chan *cmount, *cmounted; + + /* + * int unmount(char* name, char* old); + */ + name = va_arg(list, char*); + old = va_arg(list, char*); + cmount = namec(validaddr(old, 1, 0), Amount, 0, 0); + + cmounted = nil; + if(name != nil) { + if(waserror()) { + cclose(cmount); + nexterror(); + } + + /* + * This has to be namec(..., Aopen, ...) because + * if arg[0] is something like /srv/cs or /fd/0, + * opening it is the only way to get at the real + * Chan underneath. + */ + cmounted = namec(validaddr(name, 1, 0), Aopen, OREAD, 0); + poperror(); + } + + if(waserror()) { + cclose(cmount); + if(cmounted != nil) + cclose(cmounted); + nexterror(); + } + + cunmount(cmount, cmounted); + cclose(cmount); + if(cmounted != nil) + cclose(cmounted); + poperror(); + + ar0->i = 0; +} + +void +syscreate(Ar0* ar0, va_list list) +{ + char *aname; + int fd, omode, perm; + Chan *c; + + /* + * int create(char* file, int omode, ulong perm); + * should be + * int create(char* file, int omode, int perm); + */ + aname = va_arg(list, char*); + omode = va_arg(list, int); + perm = va_arg(list, int); + + openmode(omode & ~OEXCL); /* error check only; OEXCL okay here */ + c = nil; + if(waserror()) { + if(c != nil) + cclose(c); + nexterror(); + } + c = namec(validaddr(aname, 1, 0), Acreate, omode, perm); + fd = newfd(c); + if(fd < 0) + error(Enofd); + poperror(); + + ar0->i = fd; +} + +void +sysremove(Ar0* ar0, va_list list) +{ + Chan *c; + char *aname; + + /* + * int remove(char* file); + */ + aname = va_arg(list, char*); + c = namec(validaddr(aname, 1, 0), Aremove, 0, 0); + + /* + * Removing mount points is disallowed to avoid surprises + * (which should be removed: the mount point or the mounted Chan?). + */ + if(c->ismtpt){ + cclose(c); + error(Eismtpt); + } + if(waserror()){ + c->dev = nil; /* see below */ + cclose(c); + nexterror(); + } + c->dev->remove(c); + + /* + * Remove clunks the fid, but we need to recover the Chan + * so fake it up. rootclose() is known to be a nop. +Not sure this dicking around is right for Dev ref counts. + */ + c->dev = nil; + poperror(); + cclose(c); + + ar0->i = 0; +} + +static long +wstat(Chan* c, uchar* p, usize n) +{ + long l; + usize namelen; + + if(waserror()){ + cclose(c); + nexterror(); + } + + /* + * Renaming mount points is disallowed to avoid surprises + * (which should be renamed? the mount point or the mounted Chan?). + */ + if(c->ismtpt){ + dirname(p, &namelen); + if(namelen) + nameerror(chanpath(c), Eismtpt); + } + l = c->dev->wstat(c, p, n); + poperror(); + cclose(c); + + return l; +} + +void +syswstat(Ar0* ar0, va_list list) +{ + Chan *c; + char *aname; + uchar *p; + usize n; + + /* + * int wstat(char* name, uchar* edir, int nedir); + * should really be + * usize wstat(char* name, uchar* edir, usize nedir); + * but returning an unsigned is probably too + * radical. + */ + aname = va_arg(list, char*); + p = va_arg(list, uchar*); + n = va_arg(list, usize); + + p = validaddr(p, n, 0); + validstat(p, n); + c = namec(validaddr(aname, 1, 0), Aaccess, 0, 0); + + ar0->l = wstat(c, p, n); +} + +void +sysfwstat(Ar0* ar0, va_list list) +{ + Chan *c; + int fd; + uchar *p; + usize n; + + /* + * int fwstat(int fd, uchar* edir, int nedir); + * should really be + * usize wstat(int fd, uchar* edir, usize nedir); + * but returning an unsigned is probably too + * radical. + */ + fd = va_arg(list, int); + p = va_arg(list, uchar*); + n = va_arg(list, usize); + + p = validaddr(p, n, 0); + validstat(p, n); + c = fdtochan(fd, -1, 1, 1); + + ar0->l = wstat(c, p, n); +} diff -Nru /sys/src/9k/port/sysproc.c /sys/src/9k/port/sysproc.c --- /sys/src/9k/port/sysproc.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/sysproc.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1184 @@ +#include "u.h" +#include "tos.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/edf.h" +#include +#include + +void +sysr1(Ar0* ar0, va_list list) +{ + USED(list); + + ar0->i = 0; +} + +void +sysrfork(Ar0* ar0, va_list list) +{ + Proc *p; + int flag, i, n, pid; + Fgrp *ofg; + Pgrp *opg; + Rgrp *org; + Egrp *oeg; + Mach *wm; + uintptr ds; + void (*pt)(Proc*, int, vlong, vlong); + u64int ptarg; + + /* + * int rfork(int); + */ + flag = va_arg(list, int); + + /* Check flags before we commit */ + if((flag & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) + error(Ebadarg); + if((flag & (RFNAMEG|RFCNAMEG)) == (RFNAMEG|RFCNAMEG)) + error(Ebadarg); + if((flag & (RFENVG|RFCENVG)) == (RFENVG|RFCENVG)) + error(Ebadarg); + + if((flag&RFPROC) == 0) { + if(flag & (RFMEM|RFNOWAIT)) + error(Ebadarg); + if(flag & (RFFDG|RFCFDG)) { + ofg = up->fgrp; + if(flag & RFFDG) + up->fgrp = dupfgrp(ofg); + else + up->fgrp = dupfgrp(nil); + closefgrp(ofg); + } + if(flag & (RFNAMEG|RFCNAMEG)) { + opg = up->pgrp; + up->pgrp = newpgrp(); + if(flag & RFNAMEG) + pgrpcpy(up->pgrp, opg); + /* inherit noattach */ + up->pgrp->noattach = opg->noattach; + closepgrp(opg); + } + if(flag & RFNOMNT) + up->pgrp->noattach = 1; + if(flag & RFREND) { + org = up->rgrp; + up->rgrp = newrgrp(); + closergrp(org); + } + if(flag & (RFENVG|RFCENVG)) { + oeg = up->egrp; + up->egrp = smalloc(sizeof(Egrp)); + up->egrp->ref = 1; + if(flag & RFENVG) + envcpy(up->egrp, oeg); + closeegrp(oeg); + } + if(flag & RFNOTEG) + up->noteid = incref(¬eidalloc); + + ar0->i = 0; + return; + } + + if((flag & RFMEM) == 0){ + /* assume half might change copy-on-write, but cap it */ + ds = procdatasize(up, 1)/2; + if(ds > 64*MB) + ds = 64*MB; + if(!physmemavail(ds)) + error(Enovmem); + } + + p = newproc(); + + p->trace = up->trace; + p->scallnr = up->scallnr; + memmove(p->arg, up->arg, sizeof(up->arg)); + p->nerrlab = 0; + p->slash = up->slash; + p->dot = up->dot; + incref(p->dot); + + memmove(p->note, up->note, sizeof(p->note)); + p->privatemem = up->privatemem; + p->nnote = up->nnote; + p->notified = 0; + p->lastnote = up->lastnote; + p->notify = up->notify; + p->ureg = up->ureg; + p->dbgreg = 0; + + /* Make a new set of memory segments */ + n = flag & RFMEM; + qlock(&p->seglock); + if(waserror()){ + for(i = 0; i < NSEG; i++) + if(p->seg[i]){ + putseg(p->seg[i]); + p->seg[i] = 0; + } + qunlock(&p->seglock); + nexterror(); + } + for(i = 0; i < NSEG; i++) + if(up->seg[i]) + p->seg[i] = dupseg(up->seg, i, n); + poperror(); + qunlock(&p->seglock); + + /* File descriptors */ + if(flag & (RFFDG|RFCFDG)) { + if(flag & RFFDG) + p->fgrp = dupfgrp(up->fgrp); + else + p->fgrp = dupfgrp(nil); + } + else { + p->fgrp = up->fgrp; + incref(p->fgrp); + } + + /* Process groups */ + if(flag & (RFNAMEG|RFCNAMEG)) { + p->pgrp = newpgrp(); + if(flag & RFNAMEG) + pgrpcpy(p->pgrp, up->pgrp); + /* inherit noattach */ + p->pgrp->noattach = up->pgrp->noattach; + } + else { + p->pgrp = up->pgrp; + incref(p->pgrp); + } + if(flag & RFNOMNT) + p->pgrp->noattach = 1; + + if(flag & RFREND) + p->rgrp = newrgrp(); + else { + incref(up->rgrp); + p->rgrp = up->rgrp; + } + + /* Environment group */ + if(flag & (RFENVG|RFCENVG)) { + p->egrp = smalloc(sizeof(Egrp)); + p->egrp->ref = 1; + if(flag & RFENVG) + envcpy(p->egrp, up->egrp); + } + else { + p->egrp = up->egrp; + incref(p->egrp); + } + p->hang = up->hang; + p->procmode = up->procmode; + + /* Craft a return frame which will cause the child to pop out of + * the scheduler in user mode with the return register zero + */ + sysrforkchild(p, up); + + p->parent = up; + p->parentpid = up->pid; + if(flag&RFNOWAIT) + p->parentpid = 0; + else { + lock(&up->exl); + up->nchild++; + unlock(&up->exl); + } + if((flag&RFNOTEG) == 0) + p->noteid = up->noteid; + + pid = p->pid; + memset(p->time, 0, sizeof(p->time)); + p->time[TReal] = sys->ticks; + + kstrdup(&p->text, up->text); + kstrdup(&p->user, up->user); + /* + * since the bss/data segments are now shareable, + * any mmu info about this process is now stale + * (i.e. has bad properties) and has to be discarded. + */ + mmuflush(); + p->basepri = up->basepri; + p->priority = up->basepri; + p->fixedpri = up->fixedpri; + wm = up->wired; +// if((flag & RFMEM) == 0 && wm == nil) +// pickmach(p); +// else + p->mp = up->mp; + if(wm) + procwired(p, wm->machno); + if(p->trace && (pt = proctrace) != nil){ + strncpy((char*)&ptarg, p->text, sizeof ptarg); + pt(p, SName, 0, ptarg); + } + ready(p); + sched(); + + ar0->i = pid; +} + +static uvlong +vl2be(uvlong v) +{ + uchar *p; + + p = (uchar*)&v; + return ((uvlong)((p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3])<<32) + |((uvlong)(p[4]<<24)|(p[5]<<16)|(p[6]<<8)|p[7]); +} + +static ulong +l2be(long l) +{ + uchar *cp; + + cp = (uchar*)&l; + return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3]; +} + +typedef struct { + Exec; + uvlong hdr[1]; +} Hdr; + +void +sysexec(Ar0* ar0, va_list list) +{ + Hdr hdr; + Fgrp *f; + Tos *tos; + Chan *chan; + Image *img; + Segment *s, *ts, *ds, *bs; + int argc, i, n; + char *a, **argv, *elem, *file, *p; + char line[sizeof(Exec)], *progarg[sizeof(Exec)/2+1]; + long hdrsz, magic, textsz, datasz, bsssz; + uintptr textlim, textmin, datalim, bsslim, entry, stack; + void (*pt)(Proc*, int, vlong, vlong); + u64int ptarg; + + /* + * void* exec(char* name, char* argv[]); + */ + + /* + * Open the file, remembering the final element and the full name. + */ + + elem = nil; + p = va_arg(list, char*); + p = validaddr(p, 1, 0); + file = validnamedup(p, 1); + if(waserror()){ + free(file); + nexterror(); + } + chan = namec(file, Aopen, OEXEC, 0); + if(waserror()){ + cclose(chan); + if(elem != nil) + free(elem); + nexterror(); + } + kstrdup(&elem, up->genbuf); + + /* + * Read the header. + * If it's a #!, fill in progarg[] with info then read a new header + * from the file indicated by the #!. + * The #! line must be less than sizeof(Exec) in size, + * including the terminating \n. + */ + hdrsz = chan->dev->read(chan, &hdr, sizeof(Hdr), 0); + if(hdrsz < 2) + error(Ebadexec); + p = (char*)&hdr; + argc = 0; + if(p[0] == '#' && p[1] == '!'){ + p = memccpy(line, (char*)&hdr, '\n', MIN(sizeof(Exec), hdrsz)); + if(p == nil) + error(Ebadexec); + *(p-1) = '\0'; + argc = tokenize(line+2, progarg, nelem(progarg)); + if(argc == 0) + error(Ebadexec); + + /* The original file becomes an extra arg after #! line */ + progarg[argc++] = file; + + /* + * Take the #! $0 as a file to open, and replace + * $0 with the original path's name. + */ + p = progarg[0]; + progarg[0] = elem; + poperror(); /* chan */ + cclose(chan); + chan = namec(p, Aopen, OEXEC, 0); + if(waserror()){ + cclose(chan); + free(elem); + nexterror(); + } + hdrsz = chan->dev->read(chan, &hdr, sizeof(Hdr), 0); + if(hdrsz < 2) + error(Ebadexec); + } + + /* + * #! has had its chance, now we need a real binary. + */ + magic = l2be(hdr.magic); + if(hdrsz != sizeof(Hdr) || magic != AOUT_MAGIC) + error(Ebadexec); + if(magic & HDR_MAGIC){ + entry = vl2be(hdr.hdr[0]); + hdrsz = sizeof(Hdr); + } + else{ + entry = l2be(hdr.entry); + hdrsz = sizeof(Exec); + } + + textsz = l2be(hdr.text); + datasz = l2be(hdr.data); + bsssz = l2be(hdr.bss); + + textmin = ROUNDUP(UTZERO+hdrsz+textsz, PGSZ); + textlim = UTROUND(textmin); + datalim = ROUNDUP(textlim+datasz, PGSZ); + bsslim = ROUNDUP(textlim+datasz+bsssz, PGSZ); + + /* + * Check the binary header for consistency, + * e.g. the entry point is within the text segment and + * the segments don't overlap each other. + */ + if(entry < UTZERO+hdrsz || entry >= UTZERO+hdrsz+textsz) + error(Ebadexec); + + if(textsz >= textlim || datasz > datalim || bsssz > bsslim + || textlim >= USTKTOP || datalim >= USTKTOP || bsslim >= USTKTOP + || datalim < textlim || bsslim < datalim) + error(Ebadexec); + + /* + * The new stack is created in ESEG, temporarily mapped elsewhere. + * The stack contains, in descending address order: + * a structure containing housekeeping and profiling data (Tos); + * argument strings; + * array of vectors to the argument strings with a terminating + * nil (argv). + * When the exec is committed, this temporary stack in ESEG will + * become SSEG. + * The architecture-dependent code which jumps to the new image + * will also push a count of the argument array onto the stack (argc). + */ + qlock(&up->seglock); + if(waserror()){ + if(up->seg[ESEG] != nil){ + putseg(up->seg[ESEG]); + up->seg[ESEG] = nil; + } + qunlock(&up->seglock); + nexterror(); + } + up->seg[ESEG] = newseg(SG_STACK, TSTKTOP-USTKSIZE, TSTKTOP, nil, 0); + + /* + * Stack is a pointer into the temporary stack + * segment, and will move as items are pushed. + */ + stack = TSTKTOP-sizeof(Tos); + + /* + * First, the top-of-stack structure. + */ + tos = (Tos*)stack; + tos->cyclefreq = m->cyclefreq; + cycles((uvlong*)&tos->pcycles); + tos->pcycles = -tos->pcycles; + tos->kcycles = tos->pcycles; + tos->clock = 0; + + /* + * Next push any arguments found from a #! header. + */ + for(i = 0; i < argc; i++){ + n = strlen(progarg[i])+1; + stack -= n; + memmove(UINT2PTR(stack), progarg[i], n); + } + + /* + * Copy the strings pointed to by the syscall argument argv into + * the temporary stack segment, being careful to check both argv and + * the strings it points to are valid. + */ + argv = va_arg(list, char**); + evenaddr(PTR2UINT(argv)); + for(i = 0;; i++, argv++){ + a = *(char**)validaddr(argv, sizeof(char**), 0); + if(a == nil) + break; + a = validaddr(a, 1, 0); + n = ((char*)vmemchr(a, 0, 0x7fffffff) - a) + 1; + + /* + * This futzing is so argv[0] gets validated even + * though it will be thrown away if this is a shell + * script. + */ + if(argc > 0 && i == 0) + continue; + + /* + * Before copying the string into the temporary stack, + * which might involve a demand-page, check the string + * will not overflow the bottom of the stack. + */ + stack -= n; + if(stack < TSTKTOP-USTKSIZE) + error(Enovmem); + p = UINT2PTR(stack); + memmove(p, a, n); + p[n-1] = 0; + argc++; + } + if(argc < 1) + error(Ebadexec); + + /* + * Before pushing the argument pointers onto the temporary stack, + * which might involve a demand-page, check there is room for the + * terminating nil pointer, plus pointers, plus some slop for however + * argc might be passed on the stack by sysexecregs (give a page + * of slop, it is an overestimate, but why not). + * Sysexecstack does any architecture-dependent stack alignment. + * Keep a copy of the start of the argument strings before alignment + * so up->args can be created later. + * Although the argument vectors are being pushed onto the stack in + * the temporary segment, the values must be adjusted to reflect + * the segment address after it replaces the current SSEG. + */ + a = p = UINT2PTR(stack); + stack = sysexecstack(stack, argc); + if(stack-(argc+1)*sizeof(char**)-PGSZ < TSTKTOP-USTKSIZE) + error(Ebadexec); + + argv = (char**)stack; + *--argv = nil; + for(i = 0; i < argc; i++){ + *--argv = p + (USTKTOP-TSTKTOP); + p += strlen(p) + 1; + } + + /* + * Make a good faith copy of the args in up->args using the strings + * in the temporary stack segment. The length must be > 0 as it + * includes the \0 on the last argument and argc was checked earlier + * to be > 0. After the memmove, compensate for any UTF character + * boundary before placing the terminating \0. + */ + n = p - a; + if(n <= 0) + error(Egreg); + if(n > 128) + n = 128; + + p = smalloc(n); + if(waserror()){ + free(p); + nexterror(); + } + + memmove(p, a, n); + while(n > 0 && (p[n-1] & 0xc0) == 0x80) + n--; + p[n-1] = '\0'; + + /* + * All the argument processing is now done, ready for the image. + */ + + /* build image for file */ + img = attachimage(chan); + if(waserror()){ + putimage(img); + nexterror(); + } + if(img->section[0] == nil) + img->section[0] = newsection(textmin-UTZERO, 0, hdrsz+textsz); + if(img->section[1] == nil) + img->section[1] = newsection(datalim-textlim, hdrsz+textsz, datasz); + unlock(img); + + ts = newseg(SG_TEXT|SG_RONLY, UTZERO, textmin, img, 0); + if(waserror()){ + putseg(ts); + nexterror(); + } + ts->flushme = 1; + ds = newseg(SG_DATA, textlim, datalim, img, 1); + if(waserror()){ + putseg(ds); + nexterror(); + } + bs = newseg(SG_BSS, datalim, bsslim, nil, 0); + + /* + * Close on exec + */ + f = up->fgrp; + for(i=0; i<=f->maxfd; i++) + fdclose(i, CCEXEC); + + /* + * Free old memory. + * Special segments maintained across exec. + */ + for(i = SSEG; i <= BSEG; i++) { + putseg(up->seg[i]); + up->seg[i] = nil; /* in case of error */ + } + for(i = BSEG+1; i< NSEG; i++) { + s = up->seg[i]; + if(s && (s->type&SG_CEXEC)) { + putseg(s); + up->seg[i] = nil; + } + } + + if(up->trace && (pt = proctrace) != nil){ + strncpy((char*)&ptarg, elem, sizeof ptarg); + pt(up, SName, 0, ptarg); + } + + /* + * At this point, the mmu contains info about the old address + * space and needs to be flushed + */ + mmuflush(); + + up->seg[TSEG] = ts; + up->seg[DSEG] = ds; + up->seg[BSEG] = bs; + poperror(); /* ds */ + poperror(); /* ts */ + poperror(); /* img */ + putimage(img); + + free(up->text); + up->text = elem; + elem = nil; + free(up->args); + up->args = p; + up->nargs = n; + poperror(); /* p (up->args) */ + + /* + * Move the stack + */ + s = up->seg[ESEG]; + up->seg[ESEG] = nil; + up->seg[SSEG] = s; + s->base = USTKTOP-USTKSIZE; + s->top = USTKTOP; + relocateseg(s, USTKTOP-TSTKTOP); + + poperror(); /* seglock */ + qunlock(&up->seglock); + + /* + * '/' processes are higher priority. (TO DO: really?) + */ + if(chan->dev->dc == L'/') + up->basepri = PriRoot; + up->priority = up->basepri; + poperror(); /* chan, elem */ + cclose(chan); + poperror(); /* file */ + free(file); + + qlock(&up->debug); + up->nnote = 0; + up->notify = 0; + up->notified = 0; + up->privatemem = 0; + sysprocsetup(up); + qunlock(&up->debug); + if(up->hang) + up->procctl = Proc_stopme; + + ar0->v = sysexecregs(entry, TSTKTOP - PTR2UINT(argv), argc); +} + +int +return0(void*) +{ + return 0; +} + +void +syssleep(Ar0* ar0, va_list list) +{ + long ms; + + /* + * int sleep(long millisecs); + */ + ms = va_arg(list, long); + + ar0->i = 0; + if(ms <= 0) { + if (up->edf && (up->edf->flags & Admitted)) + edfyield(); + else + yield(); + return; + } + if(ms < TK2MS(1)) + ms = TK2MS(1); + tsleep(&up->sleep, return0, 0, ms); +} + +void +sysalarm(Ar0* ar0, va_list list) +{ + unsigned long ms; + + /* + * long alarm(unsigned long millisecs); + * Odd argument type... + */ + ms = va_arg(list, unsigned long); + + ar0->l = procalarm(ms); +} + +void +sysexits(Ar0*, va_list list) +{ + char *status; + char *inval = "invalid exit string"; + char buf[ERRMAX]; + + /* + * void exits(char *msg); + */ + status = va_arg(list, char*); + + if(status){ + if(waserror()) + status = inval; + else{ + status = validaddr(status, 1, 0); + if(vmemchr(status, 0, ERRMAX) == 0){ + memmove(buf, status, ERRMAX); + buf[ERRMAX-1] = 0; + status = buf; + } + poperror(); + } + + } + pexit(status, 1); +} + +void +sysawait(Ar0* ar0, va_list list) +{ + int i; + int pid; + Waitmsg w; + usize n; + char *p; + + /* + * int await(char* s, int n); + * should really be + * usize await(char* s, usize n); + */ + p = va_arg(list, char*); + n = va_arg(list, long); + p = validaddr(p, n, 1); + + pid = pwait(&w); + if(pid < 0){ + ar0->i = -1; + return; + } + i = snprint(p, n, "%d %lud %lud %lud %q", + w.pid, + w.time[TUser], w.time[TSys], w.time[TReal], + w.msg); + + ar0->i = i; +} + +void +werrstr(char *fmt, ...) +{ + va_list va; + + if(up == nil) + return; + + va_start(va, fmt); + vseprint(up->syserrstr, up->syserrstr+ERRMAX, fmt, va); + va_end(va); +} + +static void +generrstr(char *buf, long n) +{ + char *p, tmp[ERRMAX]; + + if(n <= 0) + error(Ebadarg); + p = validaddr(buf, n, 1); + if(n > sizeof tmp) + n = sizeof tmp; + memmove(tmp, p, n); + + /* make sure it's NUL-terminated */ + tmp[n-1] = '\0'; + memmove(p, up->syserrstr, n); + p[n-1] = '\0'; + memmove(up->syserrstr, tmp, n); +} + +void +syserrstr(Ar0* ar0, va_list list) +{ + char *err; + usize nerr; + + /* + * int errstr(char* err, uint nerr); + * should really be + * usize errstr(char* err, usize nerr); + * but errstr always returns 0. + */ + err = va_arg(list, char*); + nerr = va_arg(list, usize); + generrstr(err, nerr); + + ar0->i = 0; +} + +void +sysnotify(Ar0* ar0, va_list list) +{ + void (*f)(void*, char*); + + /* + * int notify(void (*f)(void*, char*)); + */ + f = (void (*)(void*, char*))va_arg(list, void*); + + if(f != nil) + validaddr(f, sizeof(void (*)(void*, char*)), 0); + up->notify = f; + + ar0->i = 0; +} + +void +sysnoted(Ar0* ar0, va_list list) +{ + int v; + + /* + * int noted(int v); + */ + v = va_arg(list, int); + + if(v != NRSTR && !up->notified) + error(Egreg); + + ar0->i = 0; +} + +void +sysrendezvous(Ar0* ar0, va_list list) +{ + Proc *p, **l; + uintptr tag, val, pc; + void (*pt)(Proc*, int, vlong, vlong); + + /* + * void* rendezvous(void*, void*); + */ + tag = PTR2UINT(va_arg(list, void*)); + + l = &REND(up->rgrp, tag); + up->rendval = ~0; + + lock(up->rgrp); + for(p = *l; p; p = p->rendhash) { + if(p->rendtag == tag) { + *l = p->rendhash; + val = p->rendval; + p->rendval = PTR2UINT(va_arg(list, void*)); + unlock(up->rgrp); + + while(p->mach != 0) + ; + ready(p); + + ar0->v = UINT2PTR(val); + return; + } + l = &p->rendhash; + } + + /* Going to sleep here */ + up->rendtag = tag; + up->rendval = PTR2UINT(va_arg(list, void*)); + up->rendhash = *l; + *l = up; + up->state = Rendezvous; + if(up->trace && (pt = proctrace) != nil){ + pc = (uintptr)sysrendezvous; + pt(up, SSleep, 0, Rendezvous|(pc<<8)); + } + unlock(up->rgrp); + + sched(); + + ar0->v = UINT2PTR(up->rendval); +} + +/* + * The implementation of semaphores is complicated by needing + * to avoid rescheduling in syssemrelease, so that it is safe + * to call from real-time processes. This means syssemrelease + * cannot acquire any qlocks, only spin locks. + * + * Semacquire and semrelease must both manipulate the semaphore + * wait list. Lock-free linked lists only exist in theory, not + * in practice, so the wait list is protected by a spin lock. + * + * The semaphore value *addr is stored in user memory, so it + * cannot be read or written while holding spin locks. + * + * Thus, we can access the list only when holding the lock, and + * we can access the semaphore only when not holding the lock. + * This makes things interesting. Note that sleep's condition function + * is called while holding two locks - r and up->rlock - so it cannot + * access the semaphore value either. + * + * An acquirer announces its intention to try for the semaphore + * by putting a Sema structure onto the wait list and then + * setting Sema.waiting. After one last check of semaphore, + * the acquirer sleeps until Sema.waiting==0. A releaser of n + * must wake up n acquirers who have Sema.waiting set. It does + * this by clearing Sema.waiting and then calling wakeup. + * + * There are three interesting races here. + + * The first is that in this particular sleep/wakeup usage, a single + * wakeup can rouse a process from two consecutive sleeps! + * The ordering is: + * + * (a) set Sema.waiting = 1 + * (a) call sleep + * (b) set Sema.waiting = 0 + * (a) check Sema.waiting inside sleep, return w/o sleeping + * (a) try for semaphore, fail + * (a) set Sema.waiting = 1 + * (a) call sleep + * (b) call wakeup(a) + * (a) wake up again + * + * This is okay - semacquire will just go around the loop + * again. It does mean that at the top of the for(;;) loop in + * semacquire, phore.waiting might already be set to 1. + * + * The second is that a releaser might wake an acquirer who is + * interrupted before he can acquire the lock. Since + * release(n) issues only n wakeup calls -- only n can be used + * anyway -- if the interrupted process is not going to use his + * wakeup call he must pass it on to another acquirer. + * + * The third race is similar to the second but more subtle. An + * acquirer sets waiting=1 and then does a final canacquire() + * before going to sleep. The opposite order would result in + * missing wakeups that happen between canacquire and + * waiting=1. (In fact, the whole point of Sema.waiting is to + * avoid missing wakeups between canacquire() and sleep().) But + * there can be spurious wakeups between a successful + * canacquire() and the following semdequeue(). This wakeup is + * not useful to the acquirer, since he has already acquired + * the semaphore. Like in the previous case, though, the + * acquirer must pass the wakeup call along. + * + * This is all rather subtle. The code below has been verified + * with the spin model /sys/src/9/port/semaphore.p. The + * original code anticipated the second race but not the first + * or third, which were caught only with spin. The first race + * is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it. + * It was lucky that my abstract model of sleep/wakeup still managed + * to preserve that behavior. + * + * I remain slightly concerned about memory coherence + * outside of locks. The spin model does not take + * queued processor writes into account so we have to + * think hard. The only variables accessed outside locks + * are the semaphore value itself and the boolean flag + * Sema.waiting. The value is only accessed with CAS, + * whose job description includes doing the right thing as + * far as memory coherence across processors. That leaves + * Sema.waiting. To handle it, we call coherence() before each + * read and after each write. - rsc + */ + +/* Add semaphore p with addr a to list in seg. */ +static void +semqueue(Segment* s, int* addr, Sema* p) +{ + memset(p, 0, sizeof *p); + p->addr = addr; + + lock(&s->sema); /* uses s->sema.Rendez.Lock, but no one else is */ + p->next = &s->sema; + p->prev = s->sema.prev; + p->next->prev = p; + p->prev->next = p; + unlock(&s->sema); +} + +/* Remove semaphore p from list in seg. */ +static void +semdequeue(Segment* s, Sema* p) +{ + lock(&s->sema); + p->next->prev = p->prev; + p->prev->next = p->next; + unlock(&s->sema); +} + +/* Wake up n waiters with addr on list in seg. */ +static void +semwakeup(Segment* s, int* addr, int n) +{ + Sema *p; + + lock(&s->sema); + for(p = s->sema.next; p != &s->sema && n > 0; p = p->next){ + if(p->addr == addr && p->waiting){ + p->waiting = 0; + coherence(); + wakeup(p); + n--; + } + } + unlock(&s->sema); +} + +/* Add delta to semaphore and wake up waiters as appropriate. */ +static int +semrelease(Segment* s, int* addr, int delta) +{ + int value; + + do + value = *addr; + while(!CASW(addr, value, value+delta)); + semwakeup(s, addr, delta); + + return value+delta; +} + +/* Try to acquire semaphore using compare-and-swap */ +static int +canacquire(int* addr) +{ + int value; + + while((value = *addr) > 0){ + if(CASW(addr, value, value-1)) + return 1; + } + + return 0; +} + +/* Should we wake up? */ +static int +semawoke(void* p) +{ + coherence(); + return !((Sema*)p)->waiting; +} + +/* Acquire semaphore (subtract 1). */ +static int +semacquire(Segment* s, int* addr, int block) +{ + int acquired; + Sema phore; + + if(canacquire(addr)) + return 1; + if(!block) + return 0; + + acquired = 0; + semqueue(s, addr, &phore); + for(;;){ + phore.waiting = 1; + coherence(); + if(canacquire(addr)){ + acquired = 1; + break; + } + if(waserror()) + break; + sleep(&phore, semawoke, &phore); + poperror(); + } + semdequeue(s, &phore); + coherence(); /* not strictly necessary due to lock in semdequeue */ + if(!phore.waiting) + semwakeup(s, addr, 1); + if(!acquired) + nexterror(); + + return 1; +} + +/* Acquire semaphore or time-out */ +static int +tsemacquire(Segment* s, int* addr, long ms) +{ + int acquired; + ulong t; + Sema phore; + + if(canacquire(addr)) + return 1; + if(ms == 0) + return 0; + + acquired = 0; + semqueue(s, addr, &phore); + for(;;){ + phore.waiting = 1; + coherence(); + if(canacquire(addr)){ + acquired = 1; + break; + } + if(waserror()) + break; + t = m->ticks; + tsleep(&phore, semawoke, &phore, ms); + ms -= TK2MS(m->ticks-t); + poperror(); + if(ms <= 0) + break; + } + semdequeue(s, &phore); + coherence(); /* not strictly necessary due to lock in semdequeue */ + if(!phore.waiting) + semwakeup(s, addr, 1); + if(ms <= 0) + return 0; + if(!acquired) + nexterror(); + return 1; +} + +void +syssemacquire(Ar0* ar0, va_list list) +{ + Segment *s; + int *addr, block; + + /* + * int semacquire(long* addr, int block); + * should be (and will be implemented below as) perhaps + * int semacquire(int* addr, int block); + */ + addr = va_arg(list, int*); + addr = validaddr(addr, sizeof(int), 1); + evenaddr(PTR2UINT(addr)); + block = va_arg(list, int); + + if((s = seg(up, PTR2UINT(addr), nil)) == nil) + error(Ebadarg); + if(*addr < 0) + error(Ebadarg); + + ar0->i = semacquire(s, addr, block); +} + +void +systsemacquire(Ar0* ar0, va_list list) +{ + Segment *s; + int *addr, ms; + + /* + * int tsemacquire(long* addr, ulong ms); + * should be (and will be implemented below as) perhaps + * int tsemacquire(int* addr, ulong ms); + */ + addr = va_arg(list, int*); + addr = validaddr(addr, sizeof(int), 1); + evenaddr(PTR2UINT(addr)); + ms = va_arg(list, ulong); + + if((s = seg(up, PTR2UINT(addr), nil)) == nil) + error(Ebadarg); + if(*addr < 0) + error(Ebadarg); + + ar0->i = tsemacquire(s, addr, ms); +} + +void +syssemrelease(Ar0* ar0, va_list list) +{ + Segment *s; + int *addr, delta; + + /* + * long semrelease(long* addr, long count); + * should be (and will be implemented below as) perhaps + * int semrelease(int* addr, int count); + */ + addr = va_arg(list, int*); + addr = validaddr(addr, sizeof(int), 1); + evenaddr(PTR2UINT(addr)); + delta = va_arg(list, int); + + if((s = seg(up, PTR2UINT(addr), nil)) == nil) + error(Ebadarg); + if(delta < 0 || *addr < 0) + error(Ebadarg); + + ar0->i = semrelease(s, addr, delta); +} + +void +sysnsec(Ar0* ar0, va_list) +{ + ar0->vl = todget(nil); +} diff -Nru /sys/src/9k/port/sysseg.c /sys/src/9k/port/sysseg.c --- /sys/src/9k/port/sysseg.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/sysseg.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,402 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +Segment* (*_globalsegattach)(Proc*, char*); + +static Lock physseglock; + +int +addphysseg(Physseg* new) +{ + Physseg *ps; + + /* + * Check not already entered and there is room + * for a new entry and the terminating null entry. + */ + lock(&physseglock); + for(ps = physseg; ps->name; ps++){ + if(strcmp(ps->name, new->name) == 0){ + unlock(&physseglock); + return -1; + } + } + if(ps-physseg >= nphysseg-2){ + unlock(&physseglock); + return -1; + } + + *ps = *new; + unlock(&physseglock); + + return 0; +} + +int +isphysseg(char *name) +{ + int rv; + Physseg *ps; + + lock(&physseglock); + rv = 0; + for(ps = physseg; ps->name; ps++){ + if(strcmp(ps->name, name) == 0){ + rv = 1; + break; + } + } + unlock(&physseglock); + return rv; +} + +/* Needs to be non-static for BGP support */ +uintptr +ibrk(uintptr addr, int seg) +{ + Segment *s, *ns; + Pages *nps; + uintptr newtop; + long newsize; + uintptr pgsize; + int i; + + s = up->seg[seg]; + if(s == 0) + error(Ebadarg); + + if(addr == 0) + return s->top; + + wlock(&s->lk); + if(waserror()){ + wunlock(&s->lk); + nexterror(); + } + + DBG("ibrk addr %#p seg %d base %#p top %#p\n", + addr, seg, s->base, s->top); + /* We may start with the bss overlapping the data */ + if(addr < s->base) { + if(seg != BSEG || up->seg[DSEG] == 0 || addr < up->seg[DSEG]->base) + error(Enovmem); + addr = s->base; + } + + pgsize = segpgsize(s); + newtop = ROUNDUP(addr, pgsize); + newsize = (newtop-s->base)/pgsize; + + + DBG("ibrk addr %#p newtop %#p newsize %ld\n", addr, newtop, newsize); + + if(newtop < s->top) { + /* + * do not shrink a segment shared with other procs, as the + * to-be-freed address space may have been passed to the kernel + * already by another proc and is past the validaddr stage. + */ + if(s->ref > 1) + error(Einuse); + mfreeseg(s, newtop, s->top); + s->top = newtop; + poperror(); + wunlock(&s->lk); + mmuflush(); + return newtop; + } + + for(i = 0; i < NSEG; i++) { + ns = up->seg[i]; + if(ns == 0 || ns == s) + continue; + if(newtop >= ns->base && newtop < ns->top) + error(Esoverlap); + } + + if(!physmemavail(newtop - s->top)) + error(Enovmem); + + nps = growpages(s->pages, newtop - s->base); + if(nps == nil) + error(Enovmem); + s->pages = nps; + s->top = newtop; + + poperror(); + wunlock(&s->lk); + + return newtop; +} + +void +syssegbrk(Ar0* ar0, va_list list) +{ + int i; + uintptr addr; + Segment *s; + + /* + * int segbrk(void*, void*); + * should be + * void* segbrk(void* saddr, void* addr); + */ + addr = PTR2UINT(va_arg(list, void*)); + for(i = 0; i < NSEG; i++) { + s = up->seg[i]; + if(s == nil || addr < s->base || addr >= s->top) + continue; + switch(s->type&SG_TYPE) { + case SG_TEXT: + case SG_DATA: + case SG_STACK: + error(Ebadarg); + default: + addr = PTR2UINT(va_arg(list, void*)); + ar0->v = UINT2PTR(ibrk(addr, i)); + return; + } + } + error(Ebadarg); +} + +void +sysbrk_(Ar0* ar0, va_list list) +{ + uintptr addr; + + /* + * int brk(void*); + * + * Deprecated; should be for backwards compatibility only. + */ + addr = PTR2UINT(va_arg(list, void*)); + + ibrk(addr, BSEG); + + ar0->i = 0; +} + +static uintptr +segattach(Proc* p, int attr, char* name, uintptr va, usize len) +{ + int sno; + Segment *s, *os; + Physseg *ps; + + if((va != 0 && va < UTZERO) || iskaddr(va)) + error("virtual address below text or in kernel"); + + vmemchr(name, 0, ~0); + + for(sno = 0; sno < NSEG; sno++) + if(p->seg[sno] == nil && sno != ESEG) + break; + + if(sno == NSEG) + error("too many segments in process"); + + /* + * first look for a global segment with the + * same name + */ + if(_globalsegattach != nil){ + s = (*_globalsegattach)(p, name); + if(s != nil){ + p->seg[sno] = s; + return s->base; + } + } + + len = ROUNDUP(len, PGSZ); + if(len == 0) + error("length overflow"); + + /* + * Find a hole in the address space. + * Starting at the lowest possible stack address - len, + * check for an overlapping segment, and repeat at the + * base of that segment - len until either a hole is found + * or the address space is exhausted. + */ +//need check here to prevent mapping page 0? + if(va == 0) { + va = p->seg[SSEG]->base - len; + for(;;) { + os = isoverlap(p, va, len); + if(os == nil) + break; + va = os->base; + if(len > va) + error("cannot fit segment at virtual address"); + va -= len; + } + } + + va = va&~(PGSZ-1); + if(isoverlap(p, va, len) != nil) + error(Esoverlap); + + for(ps = physseg; ps->name; ps++) + if(strcmp(name, ps->name) == 0) + goto found; + + error("segment not found"); +found: + if((len/PGSZ) > ps->size) + error("len > segment size"); + + attr &= ~SG_TYPE; /* Turn off what is not allowed */ + attr |= ps->attr; /* Copy in defaults */ + + s = newseg(attr, va, va+len, nil, 0); + s->pseg = ps; + p->seg[sno] = s; + + return va; +} + +void +syssegattach(Ar0* ar0, va_list list) +{ + int attr; + char *name; + uintptr va; + usize len; + + /* + * long segattach(int, char*, void*, ulong); + * should be + * void* segattach(int, char*, void*, usize); + */ + attr = va_arg(list, int); + name = va_arg(list, char*); + va = PTR2UINT(va_arg(list, void*)); + len = va_arg(list, usize); + + ar0->v = UINT2PTR(segattach(up, attr, validaddr(name, 1, 0), va, len)); +} + +void +syssegdetach(Ar0* ar0, va_list list) +{ + int i; + uintptr addr; + Segment *s; + + /* + * int segdetach(void*); + */ + addr = PTR2UINT(va_arg(list, void*)); + + qlock(&up->seglock); + if(waserror()){ + qunlock(&up->seglock); + nexterror(); + } + + s = 0; + for(i = 0; i < NSEG; i++) + if(s = up->seg[i]) { + rlock(&s->lk); + if((addr >= s->base && addr < s->top) || + (s->top == s->base && addr == s->base)) + goto found; + runlock(&s->lk); + } + + error(Ebadarg); + +found: + /* + * Can't detach the initial stack segment + * because the clock writes profiling info + * there. + */ + if(s == up->seg[SSEG]){ + runlock(&s->lk); + error(Ebadarg); + } + up->seg[i] = 0; + runlock(&s->lk); + putseg(s); + qunlock(&up->seglock); + poperror(); + + /* Ensure we flush any entries from the lost segment */ + mmuflush(); + + ar0->i = 0; +} + +void +syssegfree(Ar0* ar0, va_list list) +{ + Segment *s; + uintptr from, to; + usize len; + + /* + * int segfree(void*, ulong); + * should be + * int segfree(void*, usize); + */ + from = PTR2UINT(va_arg(list, void*)); + s = seg(up, from, wlock); + if(s == nil) + error(Ebadarg); + len = va_arg(list, usize); + to = (from + len) & ~(PGSZ-1); + if(to < from || to > s->top){ + wunlock(&s->lk); + error(Ebadarg); + } + from = ROUNDUP(from, PGSZ); + + mfreeseg(s, from, to); + wunlock(&s->lk); + mmuflush(); + + ar0->i = 0; +} + +void +syssegflush(Ar0* ar0, va_list list) +{ + Segment *s; + uintptr addr; + usize l, len; + + /* + * int segflush(void*, ulong); + * should be + * int segflush(void*, usize); + */ + addr = PTR2UINT(va_arg(list, void*)); + len = va_arg(list, usize); + + while(len > 0) { + s = seg(up, addr, rlock); + if(s == nil) + error(Ebadarg); + + s->flushme = 1; + l = len; + if(addr+l > s->top) + l = s->top - addr; + if(l == 0 || addr+l < s->base){ + runlock(&s->lk); + error(Ebadarg); + } + pagesflush(s->pages, addr - s->base, l); /* TO DO: check rounding-up */ + runlock(&s->lk); + addr += l; + len -= l; + } + mmuflush(); + ar0->i = 0; +} diff -Nru /sys/src/9k/port/taslock.c /sys/src/9k/port/taslock.c --- /sys/src/9k/port/taslock.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/taslock.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,354 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "../port/edf.h" + +typedef struct Glares Glares; +typedef struct Glaring Glaring; + +struct Glaring +{ + Lock* l; + ulong ticks; + ulong lastlock; + ulong key; + uintptr lpc; + uintptr pc; + ulong n; + int ok; + int shown; +}; + +struct Glares +{ + Glaring instance[30]; + uint next; + uint total; +}; + +static Glares glares[MACHMAX]; /* per core, to avoid lock */ +int lockdebug = 1; + +uvlong maxlockcycles; +uvlong maxilockcycles; +uintptr maxlockpc; +ulong maxilockpc; + +void showlockloops(void); + +struct +{ + ulong locks; + ulong glare; + ulong inglare; +} lockstats; + +static void +dumplockmem(char *tag, Lock *l) +{ + uchar *cp; + int i; + + iprint("%s: ", tag); + cp = (uchar*)l; + for(i = 0; i < 64; i++) + iprint("%2.2ux ", cp[i]); + iprint("\n"); +} + +static void +lockcrash(Lock *l, uintptr pc, char *why) +{ + Proc *p; + + p = l->p; + if(lockdebug > 1){ + dumpaproc(up); + if(p != nil) + dumpaproc(p); + } + showlockloops(); + panic("cpu%d: %s lock %#p key %#ux pc %#p proc %ud held by pc %#p proc %ud\n", + m->machno, why, l, l->key, pc, up->pid, l->pc, p? p->pid: 0); +} + +/* + * A "lock loop" is excessive delay in obtaining a spin lock: + * it could be long delay through contention (ie, inefficient but harmless), + * or a real deadlock (a programming error); + * record them for later analysis to discover which. + * Don't print them at the time, or the harmless cases become deadly. + */ +static Glaring* +lockloop(Glaring *og, Lock *l, uintptr pc) +{ + Glares *mg; + Glaring *g; + int i, s; + + + s = splhi(); + if(l->m == sys->machptr[m->machno]) + lockcrash(l, pc, "deadlock/abandoned"); /* recovery is impossible */ + mg = &glares[m->machno]; + g = mg->instance; + for(i = 0; i < nelem(mg->instance) && g->l != nil; i++){ + if(g->l == l && g->lpc == l->pc && g->pc == pc){ + g->ok = 0; + if(og == g){ + if(tickscmp(sys->ticks, g->lastlock) >= 60*HZ) + lockcrash(l, pc, "stuck"); /* delay is hopelessly long: we're doomed, i tell ye */ + }else{ + g->lastlock = sys->ticks; + g->n++; + g->shown = 0; + } + splx(s); + return g; + } + g++; + } + i = mg->next; + g = &mg->instance[i]; + g->ticks = sys->ticks; + g->lastlock = g->ticks; + g->l = l; + g->pc = pc; + g->lpc = l->pc; + g->n = 1; + g->ok = 0; + g->shown = 0; + if(++i >= nelem(mg->instance)) + i = 0; + mg->next = i; + mg->total++; + splx(s); + if(islo() && up != nil) + iprint("cpu%d: slow locks: %d\n", m->machno, glares[m->machno].total); + if(lockdebug) + lockcrash(l, pc, "stuck"); + return g; +} + +void +showlockloops(void) +{ + Glares *mg; + Glaring *g; + int mno, i, p; + + p = 0; + for(mno = 0; mno < nelem(glares); mno++){ + mg = &glares[mno]; + g = mg->instance; + for(i = 0; i < nelem(mg->instance) && g->l != nil; i++){ + if(!g->shown){ + g->shown = 0; + iprint("cpu%d: %d: l=%#p lpc=%#p pc=%#p n=%lud ok=%d\n", + mno, i, g->l, g->lpc, g->pc, g->n, g->ok); + } + g++; + p++; + } + } + if(p == 0) + print("no loops\n"); +} + +int +lock(Lock *l) +{ + int i; + uintptr pc; + Glaring *g; + + pc = getcallerpc(&l); + + lockstats.locks++; + if(up) + up->nlocks++; /* prevent being scheded */ + if(TAS(&l->key) == 0){ + if(up) + up->lastlock = l; + l->pc = pc; + l->p = up; + l->isilock = 0; + return 0; + } + if(up) + up->nlocks--; + + g = nil; + lockstats.glare++; + for(;;){ + lockstats.inglare++; + i = 0; + while(l->key){ + if(sys->nonline < 2 && up && up->edf && (up->edf->flags & Admitted)){ + /* + * Priority inversion, yield on a uniprocessor; on a + * multiprocessor, the other processor will unlock + */ + print("inversion %#p pc %#p proc %d held by pc %#p proc %d\n", + l, pc, up ? up->pid : 0, l->pc, l->p ? l->p->pid : 0); + up->edf->d = todget(nil); /* yield to process with lock */ + } + if(i++ > 100*1000*1000){ + g = lockloop(g, l, pc); + i = 0; + } + } + if(up) + up->nlocks++; + if(TAS(&l->key) == 0){ + if(up) + up->lastlock = l; + l->pc = pc; + l->p = up; + l->isilock = 0; + return 1; + } + if(up) + up->nlocks--; + } +} + +void +ilock(Lock *l) +{ + Mreg s; + uintptr pc; + + pc = getcallerpc(&l); + lockstats.locks++; + + s = splhi(); + if(TAS(&l->key) != 0){ + lockstats.glare++; + /* + * Cannot also check l->pc, l->m, or l->isilock here + * because they might just not be set yet, or + * (for pc and m) the lock might have just been unlocked. + */ + for(;;){ + lockstats.inglare++; + splx(s); + while(l->key) + ; + s = splhi(); + if(TAS(&l->key) == 0) + goto acquire; + } + } +acquire: + m->ilockdepth++; + m->ilockpc = pc; + if(up) + up->lastilock = l; + l->sr = s; + l->pc = pc; + l->p = up; + l->isilock = 1; + l->m = m; +} + +int +canlock(Lock *l) +{ + if(up) + up->nlocks++; + if(TAS(&l->key)){ + if(up) + up->nlocks--; + return 0; + } + + if(up) + up->lastlock = l; + l->pc = getcallerpc(&l); + l->p = up; + l->m = m; + l->isilock = 0; + return 1; +} + +void +unlock(Lock *l) +{ + if(l->key == 0) + print("unlock: not locked: pc %#p\n", getcallerpc(&l)); + if(l->isilock) + print("unlock of ilock: pc %#p, held by %#p\n", getcallerpc(&l), l->pc); + if(l->p != up) + print("unlock: up changed: pc %#p, acquired at pc %#p, lock p %#p, unlock up %#p\n", getcallerpc(&l), l->pc, l->p, up); + l->m = nil; + l->key = 0; + coherence(); + + if(up && --up->nlocks == 0 && up->delaysched && islo()){ + /* + * Call sched if the need arose while locks were held + * But, don't do it from interrupt routines, hence the islo() test + */ + sched(); + } +} + +void +iunlock(Lock *l) +{ + Mreg s; + + if(l->key == 0) + print("iunlock: not locked: pc %#p\n", getcallerpc(&l)); + if(!l->isilock) + print("iunlock of lock: pc %#p, held by %#p\n", getcallerpc(&l), l->pc); + if(islo()) + print("iunlock while lo: pc %#p, held by %#p\n", getcallerpc(&l), l->pc); + if(l->m != m){ + print("iunlock by cpu%d, locked by cpu%d: pc %#p, held by %#p\n", + m->machno, l->m->machno, getcallerpc(&l), l->pc); + } + + s = l->sr; + l->m = nil; + l->key = 0; + coherence(); + m->ilockdepth--; + if(up) + up->lastilock = nil; + splx(s); +} + +int +lockpc(Lock *l, uintptr) +{ + return lock(l); +} + +void +ilockpc(Lock *l, uintptr) +{ + ilock(l); +} + +int +ownlock(Lock *l) +{ + return l->m == m; +} + +uintptr +lockgetpc(Lock *l) +{ + return l->pc; +} + +void +locksetpc(Lock *l, uintptr pc) +{ + l->pc = pc; +} diff -Nru /sys/src/9k/port/tod.c /sys/src/9k/port/tod.c --- /sys/src/9k/port/tod.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/tod.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,308 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +/* + * Compute nanosecond epoch time from the fastest ticking clock + * on the system. Converting the time to nanoseconds requires + * the following formula + * + * t = (((1000000000<<31)/f)*ticks)>>31 + * + * where + * + * 'f' is the clock frequency + * 'ticks' are clock ticks + * + * to avoid too much calculation in todget(), we calculate + * + * mult = (1000000000<<32)/f + * + * each time f is set. f is normally set by a user level + * program writing to /dev/fastclock. mul64fract will then + * take that fractional multiplier and a 64 bit integer and + * return the resulting integer product. + * + * We assume that the cpu's of a multiprocessor are synchronized. + * This assumption needs to be questioned with each new architecture. + */ + +/* frequency of the tod clock */ +#define TODFREQ 1000000000ULL +#define MicroFREQ 1000000ULL + +struct { + int init; /* true if initialized */ + ulong cnt; + Lock; + uvlong multiplier; /* ns = off + (multiplier*ticks)>>31 */ + uvlong divider; /* ticks = (divider*(ns-off))>>31 */ + uvlong umultiplier; /* µs = (µmultiplier*ticks)>>31 */ + uvlong udivider; /* ticks = (µdivider*µs)>>31 */ + vlong hz; /* frequency of fast clock */ + vlong last; /* last reading of fast clock */ + vlong off; /* offset from epoch to last */ + vlong lasttime; /* last return value from todget */ + vlong delta; /* add 'delta' each slow clock tick from sstart to send */ + ulong sstart; /* ... */ + ulong send; /* ... */ +} tod; + +static void todfix(void); + +void +todinit(void) +{ + if(tod.init) + return; + ilock(&tod); + tod.init = 1; /* prevent reentry via fastticks */ + tod.last = fastticks((uvlong *)&tod.hz); + iunlock(&tod); + todsetfreq(tod.hz); + addclock0link(todfix, 100); +} + +/* + * calculate multiplier + */ +void +todsetfreq(vlong f) +{ + ilock(&tod); + tod.hz = f; + + /* calculate multiplier for time conversion */ + tod.multiplier = mk64fract(TODFREQ, f); + tod.divider = mk64fract(f, TODFREQ) + 1; + tod.umultiplier = mk64fract(MicroFREQ, f); + tod.udivider = mk64fract(f, MicroFREQ) + 1; + iunlock(&tod); +} + +/* + * Set the time of day struct + */ +void +todset(vlong t, vlong delta, int n) +{ + if(!tod.init) + todinit(); + + ilock(&tod); + if(t >= 0){ + tod.off = t; + tod.last = fastticks(nil); + tod.lasttime = 0; + tod.delta = 0; + tod.sstart = tod.send; + } else { + if(n <= 0) + n = 1; + n *= HZ; + if(delta < 0 && n > -delta) + n = -delta; + if(delta > 0 && n > delta) + n = delta; + delta = delta/n; + tod.sstart = sys->ticks; + tod.send = tod.sstart + n; + tod.delta = delta; + } + iunlock(&tod); +} + +/* + * get time of day + */ +vlong +todget(vlong *ticksp) +{ + uvlong x; + vlong ticks, diff; + ulong t; + + if(!tod.init) + todinit(); + + /* + * we don't want time to pass twixt the measuring of fastticks + * and grabbing tod.last. Also none of the vlongs are atomic so + * we have to look at them inside the lock. + */ + ilock(&tod); + tod.cnt++; + ticks = fastticks(nil); + + /* add in correction */ + if(tod.sstart != tod.send){ + t = sys->ticks; + if(t >= tod.send) + t = tod.send; + tod.off = tod.off + tod.delta*(t - tod.sstart); + tod.sstart = t; + } + + /* convert to epoch */ + diff = ticks - tod.last; + if(diff < 0) + diff = 0; + mul64fract(&x, diff, tod.multiplier); + x += tod.off; + + /* time can't go backwards */ + if(x < tod.lasttime) + x = tod.lasttime; + else + tod.lasttime = x; + + iunlock(&tod); + + if(ticksp != nil) + *ticksp = ticks; + + return x; +} + +/* + * convert time of day to ticks + */ +uvlong +tod2fastticks(vlong ns) +{ + uvlong x; + + ilock(&tod); + mul64fract(&x, ns-tod.off, tod.divider); + x += tod.last; + iunlock(&tod); + return x; +} + +/* + * called regularly to avoid calculation overflows + */ +static void +todfix(void) +{ + vlong ticks, diff; + uvlong x; + + ticks = fastticks(nil); + + diff = ticks - tod.last; + if(diff > tod.hz){ + ilock(&tod); + + /* convert to epoch */ + mul64fract(&x, diff, tod.multiplier); +if(x > 30000000000ULL) print("todfix %llud\n", x); + x += tod.off; + + /* protect against overflows */ + tod.last = ticks; + tod.off = x; + + iunlock(&tod); + } +} + +long +seconds(void) +{ + return (vlong)todget(nil) / TODFREQ; +} + +uvlong +fastticks2us(uvlong ticks) +{ + uvlong res; + + if(!tod.init) + todinit(); + mul64fract(&res, ticks, tod.umultiplier); + return res; +} + +uvlong +us2fastticks(uvlong us) +{ + uvlong res; + + if(!tod.init) + todinit(); + mul64fract(&res, us, tod.udivider); + return res; +} + +/* + * convert milliseconds to fast ticks + */ +uvlong +ms2fastticks(ulong ms) +{ + if(!tod.init) + todinit(); + return (tod.hz*ms)/1000ULL; +} + +/* + * convert nanoseconds to fast ticks + */ +uvlong +ns2fastticks(uvlong ns) +{ + uvlong res; + + if(!tod.init) + todinit(); + mul64fract(&res, ns, tod.divider); + return res; +} + +/* + * convert fast ticks to ns + */ +uvlong +fastticks2ns(uvlong ticks) +{ + uvlong res; + + if(!tod.init) + todinit(); + mul64fract(&res, ticks, tod.multiplier); + return res; +} + +/* + * Make a 64 bit fixed point number that has a decimal point + * to the left of the low order 32 bits. This is used with + * mul64fract for converting twixt nanoseconds and fastticks. + * + * multiplier = (to<<32)/from + */ +uvlong +mk64fract(uvlong to, uvlong from) +{ +/* + int shift; + + if(to == 0ULL) + return 0ULL; + + shift = 0; + while(shift < 32 && to < (1ULL<<(32+24))){ + to <<= 8; + shift += 8; + } + while(shift < 32 && to < (1ULL<<(32+31))){ + to <<= 1; + shift += 1; + } + + return (to/from)<<(32-shift); + */ + return (to<<32) / from; +} diff -Nru /sys/src/9k/port/uidgid.c /sys/src/9k/port/uidgid.c --- /sys/src/9k/port/uidgid.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/uidgid.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,264 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +typedef struct User User; +struct User +{ + char* name; + char* leader; + User* next; + int n; + char* mem[]; +}; + +enum{ + Ulog= 6, + Usize= 1<= 0x7F && r < 0xA0 || strchr(invalid, r) != nil) + r = Runeerror; + }else + s += chartorune(&r, s); + if(r == Runeerror) + error("invalid character in name"); + } +} + +static uint +hashpjw(char *s) +{ + uint h, g; + + h = 0; + for(; *s != 0; s++){ + h = (h << 4) + (*s&0xFF); + g = h & 0xf0000000; + if(g != 0) + h ^= ((g >> 24) & 0xff) | g; + } + return h & 0x7FFFFFFF; +} + +static User** +lookuser(char *name) +{ + uint h; + User **l, *u; + + h = hashpjw(name) & Umask; + for(l = &users.hash[h]; (u = *l) != nil; l = &u->next) + if(strcmp(u->name, name) == 0) + break; + return l; +} + +static char* +tack(char **p, char *s) +{ + char *o; + + o = *p; + strcpy(o, s); + *p += strlen(o)+1; + return o; +} + +void +adduser(char *uid, char *leader, int nm, char **mem) +{ + User **l, *u, *v; + char *o; + int i, nc; + + if(leader != nil){ + if(*leader == '\0') + leader = nil; + else if(strcmp(leader, uid) == 0) + leader = uid; + } + checkname(uid); + nc = strlen(uid)+1; + if(leader != nil && leader != uid){ + checkname(leader); + nc += strlen(leader)+1; + } + for(i = 0; i < nm; i++){ + checkname(mem[i]); + nc += strlen(mem[i])+1; + } + v = mallocz(sizeof(User)+nm*sizeof(v->mem[0])+nc, 1); + if(v == nil) + error(Enomem); + o = (char*)(v+1)+nm*sizeof(v->mem[0]); + v->name = tack(&o, uid); + if(leader == nil) + v->leader = nil; + else if(strcmp(v->name, leader) != 0) + v->leader = tack(&o, leader); + else + v->leader = v->name; + v->n = nm; + for(i = 0; i < nm; i++) + v->mem[i] = tack(&o, mem[i]); + lock(&users); + l = lookuser(uid); + u = *l; + if(u != nil){ + /* replace */ + v->next = u->next; + free(u); + } + *l = v; + unlock(&users); +} + +int +deluser(char *name) +{ + User **l, *u; + + lock(&users); + l = lookuser(name); + u = *l; + if(u == nil){ + unlock(&users); + return 0; + } + *l = u->next; + unlock(&users); + free(u); + return 1; +} + +static int +ismember(char *s, int n, char **mem) +{ + int i; + + for(i = 0; i < n; i++) + if(strcmp(s, mem[i]) == 0) + return 1; + return 0; +} + +int +ingroup(char *uid, char *gid) +{ + User *g; + + if(strcmp(uid, gid) == 0) + return 1; + lock(&users); + g = *lookuser(gid); + if(g != nil && ismember(uid, g->n, g->mem)){ + unlock(&users); + return 1; + } + unlock(&users); + return 0; +} + +int +leadsgroup(char *uid, char *gid) +{ + User *g; + + lock(&users); + g = *lookuser(gid); + if(g != nil){ + if(g->leader != nil && strcmp(uid, g->leader) == 0 || + g->leader == nil && ismember(uid, g->n, g->mem)){ + unlock(&users); + return 1; + } + } + unlock(&users); + return g == nil && strcmp(uid, gid) == 0; +} + +char* +usersread(void) +{ + int i, m; + User *u; + Fmt fmt; + + fmtstrinit(&fmt); + for(i = 0; i < nelem(users.hash); i++){ + lock(&users); + for(u = users.hash[i]; u != nil; u = u->next){ + fmtprint(&fmt, "%q", u->name); + if(u->leader != nil || u->n != 0){ + fmtprint(&fmt, " %q", u->leader != nil? u->leader: ""); + for(m = 0; m < u->n; m++) + fmtprint(&fmt, " %q", u->mem[m]); + } + fmtprint(&fmt, "\n"); + } + unlock(&users); + } + return fmtstrflush(&fmt); +} + +long +userswrite(void *buf, long n) +{ + int i, nf; + char *p, *s, *e, *flds[100]; + + if(n <= 0) + return n; + if(n > 16*1024) + error(Etoobig); + p = malloc(n+1); + if(p == nil) + error(Enomem); + if(waserror()){ + free(p); + nexterror(); + } + memmove(p, buf, n); + p[n] = '\0'; + if(p[n-1] != '\n') + error("incomplete line"); + for(s = p; (e = strchr(s, '\n')) != nil; s = e){ + *e++ = '\0'; + if(*s == '#') + continue; + nf = tokenize(s, flds, nelem(flds)); + if(nf == nelem(flds)) + error("too many group members"); + if(strcmp(flds[0], "-") == 0){ + for(i = 1; i < nf; i++) + deluser(flds[i]); + }else if(nf > 1) + adduser(flds[0], flds[1], nf-2, flds+2); + else if(nf != 0) + adduser(flds[0], nil, 0, nil); + } + poperror(); + free(p); + return n; +} diff -Nru /sys/src/9k/port/usb.h /sys/src/9k/port/usb.h --- /sys/src/9k/port/usb.h Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/usb.h Wed Dec 9 00:00:00 2015 @@ -0,0 +1,196 @@ +/* + * common USB definitions. + */ +#define dprint(...) do if(debug)print(__VA_ARGS__); while(0) +#define ddprint(...) do if(debug>1)print(__VA_ARGS__); while(0) +#define deprint(...) do if(debug || ep->debug)print(__VA_ARGS__); while(0) +#define ddeprint(...) do if(debug>1 || ep->debug>1)print(__VA_ARGS__); while(0) + +#define GET2(p) ((((p)[1]&0xFF)<<8)|((p)[0]&0xFF)) +#define PUT2(p,v) {((p)[0] = (v)); ((p)[1] = (v)>>8);} + +typedef struct Udev Udev; /* USB device */ +typedef struct Ep Ep; /* Endpoint */ +typedef struct Hci Hci; /* Host Controller Interface */ +typedef struct Hciimpl Hciimpl; /* Link to the controller impl. */ + +enum +{ + /* fundamental constants */ + Ndeveps = 16, /* max nb. of endpoints per device */ + + /* tunable parameters */ + Nhcis = 16, /* max nb. of HCIs */ + Neps = 64, /* max nb. of endpoints */ + Maxctllen = 32*1024, /* max allowed sized for ctl. xfers; see Maxdevconf */ + Xfertmout = 2000, /* default request time out (ms) */ + + /* transfer types. keep this order */ + Tnone = 0, /* no tranfer type configured */ + Tctl, /* wr req + rd/wr data + wr/rd sts */ + Tiso, /* stream rd or wr (real time) */ + Tbulk, /* stream rd or wr */ + Tintr, /* msg rd or wr */ + Nttypes, /* number of transfer types */ + + Epmax = 0xF, /* max ep. addr */ + Devmax = 0x7F, /* max dev. addr */ + + /* Speeds */ + Fullspeed = 0, + Lowspeed, + Highspeed, + Nospeed, + + /* request type */ + Rh2d = 0<<7, + Rd2h = 1<<7, + Rstd = 0<<5, + Rclass = 1<<5, + Rdev = 0, + Rep = 2, + Rother = 3, + + /* req offsets */ + Rtype = 0, + Rreq = 1, + Rvalue = 2, + Rindex = 4, + Rcount = 6, + Rsetuplen = 8, + + /* standard requests */ + Rgetstatus = 0, + Rclearfeature = 1, + Rsetfeature = 3, + Rsetaddr = 5, + Rgetdesc = 6, + + /* device states */ + Dconfig = 0, /* configuration in progress */ + Denabled, /* address assigned */ + Ddetach, /* device is detached */ + Dreset, /* its port is being reset */ + + /* (root) Hub reply to port status (reported to usbd) */ + HPpresent = 0x1, + HPenable = 0x2, + HPsuspend = 0x4, + HPovercurrent = 0x8, + HPreset = 0x10, + HPpower = 0x100, + HPslow = 0x200, + HPhigh = 0x400, + HPstatuschg = 0x10000, + HPchange = 0x20000, +}; + +/* + * Services provided by the driver. + * epopen allocates hardware structures to prepare the endpoint + * for I/O. This happens when the user opens the data file. + * epclose releases them. This happens when the data file is closed. + * epwrite tries to write the given bytes, waiting until all of them + * have been written (or failed) before returning; but not for Iso. + * epread does the same for reading. + * It can be assumed that endpoints are DMEXCL but concurrent + * read/writes may be issued and the controller must take care. + * For control endpoints, device-to-host requests must be followed by + * a read of the expected length if needed. + * The port requests are called when usbd issues commands for root + * hubs. Port status must return bits as a hub request would do. + * Toggle handling and other details are left for the controller driver + * to avoid mixing too much the controller and the comon device. + * While an endpoint is closed, its toggles are saved in the Ep struct. + */ +struct Hciimpl +{ + void *aux; /* for controller info */ + void (*init)(Hci*); /* init. controller */ + void (*dump)(Hci*); /* debug */ + void (*interrupt)(Ureg*, void*); /* service interrupt */ + void (*epopen)(Ep*); /* prepare ep. for I/O */ + void (*epclose)(Ep*); /* terminate I/O on ep. */ + long (*epread)(Ep*,void*,long); /* transmit data for ep */ + long (*epwrite)(Ep*,void*,long); /* receive data for ep */ + char* (*seprintep)(char*,char*,Ep*); /* debug */ + int (*portenable)(Hci*, int, int); /* enable/disable port */ + int (*portreset)(Hci*, int, int); /* set/clear port reset */ + int (*portstatus)(Hci*, int); /* get port status */ + void (*shutdown)(Hci*); /* shutdown for reboot */ + void (*debug)(Hci*, int); /* set/clear debug flag */ +}; + +struct Hci +{ + ISAConf; /* hardware info */ + int ctlrno; /* controller number */ + int nports; /* number of ports in hub */ + int highspeed; + Hciimpl; /* HCI driver */ +}; + +/* + * USB endpoint. + * All endpoints are kept in a global array. The first + * block of fields is constant after endpoint creation. + * The rest is configuration information given to all controllers. + * The first endpoint for a device (known as ep0) represents the + * device and is used to configure it and create other endpoints. + * Its QLock also protects per-device data in dev. + * See Hciimpl for clues regarding how this is used by controllers. + */ +struct Ep +{ + Ref; /* one per fid (and per dev ep for ep0s) */ + + /* const once inited. */ + int idx; /* index in global eps array */ + int nb; /* endpoint number in device */ + Hci* hp; /* HCI it belongs to */ + Udev* dev; /* device for the endpoint */ + Ep* ep0; /* control endpoint for its device */ + + QLock; /* protect fields below */ + char* name; /* for ep file names at #u/ */ + int inuse; /* endpoint is open */ + int mode; /* OREAD, OWRITE, or ORDWR */ + int clrhalt; /* true if halt was cleared on ep. */ + int debug; /* per endpoint debug flag */ + char* info; /* for humans to read */ + long maxpkt; /* maximum packet size */ + int ttype; /* tranfer type */ + ulong load; /* in µs, for a fransfer of maxpkt bytes */ + void* aux; /* for controller specific info */ + int rhrepl; /* fake root hub replies */ + int toggle[2]; /* saved toggles (while ep is not in use) */ + long pollival; /* poll interval ([µ]frames; intr/iso) */ + long hz; /* poll frequency (iso) */ + long samplesz; /* sample size (iso) */ + int ntds; /* nb. of Tds per µframe */ + int tmout; /* 0 or timeout for transfers (ms) */ + int sampledelay; /* maximum delay introduced by buffering (iso) */ +}; + +/* + * Per-device configuration and cached list of endpoints. + * eps[0]->QLock protects it. + */ +struct Udev +{ + int nb; /* USB device number */ + int state; /* state for the device */ + int ishub; /* hubs can allocate devices */ + int isroot; /* is a root hub */ + int speed; /* Full/Low/High/No -speed */ + int hub; /* dev number for the parent hub */ + int port; /* port number in the parent hub */ + Ep* eps[Ndeveps]; /* end points for this device (cached) */ +}; + +void addhcitype(char *type, int (*reset)(Hci*)); + +extern char *usbmodename[]; +extern char Estalled[]; + +extern char *seprintdata(char*,char*,uchar*,int); diff -Nru /sys/src/9k/port/usbehci.c /sys/src/9k/port/usbehci.c --- /sys/src/9k/port/usbehci.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/usbehci.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,3284 @@ +/* + * USB Enhanced Host Controller Interface (EHCI) driver + * High speed USB 2.0. + * + * Note that all of our unlock routines call coherence. + * + * BUGS: + * - Too many delays and ilocks. + * - bandwidth admission control must be done per-frame. + * - requires polling (some controllers miss interrupts). + * - must warn of power overruns. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/usb.h" +#include "../port/portusbehci.h" +#include "usbehci.h" +#include "uncached.h" + +#define diprint if(ehcidebug || iso->debug)print +#define ddiprint if(ehcidebug>1 || iso->debug>1)print +#define dqprint if(ehcidebug || (qh->io && qh->io->debug))print +#define ddqprint if(ehcidebug>1 || (qh->io && qh->io->debug>1))print + +#define TRUNC(x, sz) ((x) & ((sz)-1)) +#define LPTR(q) ((ulong*)KADDR((q) & ~0x1F)) + +typedef struct Ctlio Ctlio; +typedef union Ed Ed; +typedef struct Edpool Edpool; +typedef struct Itd Itd; +typedef struct Qio Qio; +typedef struct Qtd Qtd; +typedef struct Sitd Sitd; +typedef struct Td Td; + +/* + * EHCI interface registers and bits + */ +enum +{ + /* Queue states (software) */ + Qidle = 0, + Qinstall, + Qrun, + Qdone, + Qclose, + Qfree, + + Enabledelay = 100, /* waiting for a port to enable */ + Abortdelay = 5, /* delay after cancelling Tds (ms) */ + + Incr = 64, /* for pools of Tds, Qhs, etc. */ + Align = 128, /* in bytes for all those descriptors */ + + /* Keep them as a power of 2, lower than ctlr->nframes */ + /* Also, keep Nisoframes >= Nintrleafs */ + Nintrleafs = 32, /* nb. of leaf frames in intr. tree */ + Nisoframes = 64, /* nb. of iso frames (in window) */ + + /* + * HW constants + */ + + /* Itd bits (csw[]) */ + Itdactive = 0x80000000, /* execution enabled */ + Itddberr = 0x40000000, /* data buffer error */ + Itdbabble = 0x20000000, /* babble error */ + Itdtrerr = 0x10000000, /* transaction error */ + Itdlenshift = 16, /* transaction length */ + Itdlenmask = 0xFFF, + Itdioc = 0x00008000, /* interrupt on complete */ + Itdpgshift = 12, /* page select field */ + Itdoffshift = 0, /* transaction offset */ + /* Itd bits, buffer[] */ + Itdepshift = 8, /* endpoint address (buffer[0]) */ + Itddevshift = 0, /* device address (buffer[0]) */ + Itdin = 0x800, /* is input (buffer[1]) */ + Itdout = 0, + Itdmaxpktshift = 0, /* max packet (buffer[1]) */ + Itdntdsshift = 0, /* nb. of tds per µframe (buffer[2]) */ + + Itderrors = Itddberr|Itdbabble|Itdtrerr, + + /* Sitd bits (epc) */ + Stdin = 0x80000000, /* input direction */ + Stdportshift = 24, /* hub port number */ + Stdhubshift = 16, /* hub address */ + Stdepshift = 8, /* endpoint address */ + Stddevshift = 0, /* device address */ + /* Sitd bits (mfs) */ + Stdssmshift = 0, /* split start mask */ + Stdscmshift = 8, /* split complete mask */ + /* Sitd bits (csw) */ + Stdioc = 0x80000000, /* interrupt on complete */ + Stdpg = 0x40000000, /* page select */ + Stdlenshift = 16, /* total bytes to transfer */ + Stdlenmask = 0x3FF, + Stdactive = 0x00000080, /* active */ + Stderr = 0x00000040, /* tr. translator error */ + Stddberr = 0x00000020, /* data buffer error */ + Stdbabble = 0x00000010, /* babble error */ + Stdtrerr = 0x00000008, /* transaction error */ + Stdmmf = 0x00000004, /* missed µframe */ + Stddcs = 0x00000002, /* do complete split */ + + Stderrors = Stderr|Stddberr|Stdbabble|Stdtrerr|Stdmmf, + + /* Sitd bits buffer[1] */ + Stdtpall = 0x00000000, /* all payload here (188 bytes) */ + Stdtpbegin = 0x00000008, /* first payload for fs trans. */ + Stdtcntmask = 0x00000007, /* T-count */ + + /* Td bits (csw) */ + Tddata1 = 0x80000000, /* data toggle 1 */ + Tddata0 = 0x00000000, /* data toggle 0 */ + Tdlenshift = 16, /* total bytes to transfer */ + Tdlenmask = 0x7FFF, + Tdmaxpkt = 0x5000, /* max buffer for a Td */ + Tdioc = 0x00008000, /* interrupt on complete */ + Tdpgshift = 12, /* current page */ + Tdpgmask = 7, + Tderr1 = 0x00000400, /* bit 0 of error counter */ + Tderr2 = 0x00000800, /* bit 1 of error counter */ + Tdtokout = 0x00000000, /* direction out */ + Tdtokin = 0x00000100, /* direction in */ + Tdtoksetup = 0x00000200, /* setup packet */ + Tdtok = 0x00000300, /* token bits */ + Tdactive = 0x00000080, /* active */ + Tdhalt = 0x00000040, /* halted */ + Tddberr = 0x00000020, /* data buffer error */ + Tdbabble = 0x00000010, /* babble error */ + Tdtrerr = 0x00000008, /* transaction error */ + Tdmmf = 0x00000004, /* missed µframe */ + Tddcs = 0x00000002, /* do complete split */ + Tdping = 0x00000001, /* do ping */ + + Tderrors = Tdhalt|Tddberr|Tdbabble|Tdtrerr|Tdmmf, + + /* Qh bits (eps0) */ + Qhrlcmask = 0xF, /* nak reload count */ + Qhrlcshift = 28, /* nak reload count */ + Qhnhctl = 0x08000000, /* not-high speed ctl */ + Qhmplmask = 0x7FF, /* max packet */ + Qhmplshift = 16, + Qhhrl = 0x00008000, /* head of reclamation list */ + Qhdtc = 0x00004000, /* data toggle ctl. */ + Qhint = 0x00000080, /* inactivate on next transition */ + Qhspeedmask = 0x00003000, /* speed bits */ + Qhfull = 0x00000000, /* full speed */ + Qhlow = 0x00001000, /* low speed */ + Qhhigh = 0x00002000, /* high speed */ + + /* Qh bits (eps1) */ + Qhmultshift = 30, /* multiple tds per µframe */ + Qhmultmask = 3, + Qhportshift = 23, /* hub port number */ + Qhhubshift = 16, /* hub address */ + Qhscmshift = 8, /* split completion mask bits */ + Qhismshift = 0, /* interrupt sched. mask bits */ +}; + +/* + * Endpoint tree (software) + */ +struct Qtree +{ + int nel; + int depth; + ulong* bw; + Qh** root; +}; + +/* + * One per endpoint per direction, to control I/O. + */ +struct Qio +{ + QLock; /* for the entire I/O process */ + Rendez; /* wait for completion */ + Qh* qh; /* Td list (field const after init) */ + int usbid; /* usb address for endpoint/device */ + int toggle; /* Tddata0/Tddata1 */ + int tok; /* Tdtoksetup, Tdtokin, Tdtokout */ + ulong iotime; /* last I/O time; to hold interrupt polls */ + int debug; /* debug flag from the endpoint */ + char* err; /* error string */ + char* tag; /* debug (no room in Qh for this) */ + ulong bw; +}; + +struct Ctlio +{ + Qio; /* a single Qio for each RPC */ + uchar* data; /* read from last ctl req. */ + int ndata; /* number of bytes read */ +}; + +struct Isoio +{ + QLock; + Rendez; /* wait for space/completion/errors */ + int usbid; /* address used for device/endpoint */ + int tok; /* Tdtokin or Tdtokout */ + int state; /* Qrun -> Qdone -> Qrun... -> Qclose */ + int nframes; /* number of frames ([S]Itds) used */ + uchar* data; /* iso data buffers if not embedded */ + char* err; /* error string */ + int nerrs; /* nb of consecutive I/O errors */ + ulong maxsize; /* ntds * ep->maxpkt */ + long nleft; /* number of bytes left from last write */ + int debug; /* debug flag from the endpoint */ + int delay; /* max number of bytes to buffer */ + int hs; /* is high speed? */ + Isoio* next; /* in list of active Isoios */ + ulong td0frno; /* first frame used in ctlr */ + union{ + Itd* tdi; /* next td processed by interrupt */ + Sitd* stdi; + }; + union{ + Itd* tdu; /* next td for user I/O in tdps */ + Sitd* stdu; + }; + union{ + Itd** itdps; /* itdps[i]: ptr to Itd for i-th frame or nil */ + Sitd** sitdps; /* sitdps[i]: ptr to Sitd for i-th frame or nil */ + ulong** tdps; /* same thing, as seen by hw */ + }; +}; + +struct Edpool +{ + Lock; + Ed* free; + int nalloc; + int ninuse; + int nfree; +}; + +/* + * We use the 64-bit version for Itd, Sitd, Td, and Qh. + * If the ehci is 64-bit capable it assumes we are using those + * structures even when the system is 32 bits. + */ + +/* + * Iso transfer descriptor. hw: 92 bytes, 108 bytes total + * aligned to 32. + */ +struct Itd +{ + u32int link; /* to next hw struct */ + u32int csw[8]; /* sts/length/pg/off. updated by hw */ + u32int buffer[7]; /* buffer pointers, addrs, maxsz */ + u32int xbuffer[7]; /* high 32 bits of buffer for 64-bits */ + + u32int _pad0; /* pad to next cache line */ + /* cache-line boundary here */ + + /* software */ + Itd* next; + uint ndata; /* number of bytes in data */ + uint mdata; /* max number of bytes in data */ + uchar* data; +}; + +/* + * Split transaction iso transfer descriptor. + * hw: 36 bytes, 52 bytes total. aligned to 32. + */ +struct Sitd +{ + u32int link; /* to next hw struct */ + u32int epc; /* static endpoint state. addrs */ + u32int mfs; /* static endpoint state. µ-frame sched. */ + u32int csw; /* transfer state. updated by hw */ + u32int buffer[2]; /* buf. ptr/offset. offset updated by hw */ + /* buf ptr/TP/Tcnt. TP/Tcnt updated by hw */ + u32int blink; /* back pointer */ + /* cache-line boundary after xbuffer[0] */ + u32int xbuffer[2]; /* high 32 bits of buffer for 64-bits */ + + /* software */ + Sitd* next; + uint ndata; /* number of bytes in data */ + uint mdata; /* max number of bytes in data */ + uchar* data; +}; + +/* + * Queue element transfer descriptor. + * hw: first 52 bytes, total 68+sbuff bytes. aligned to 32 bytes. + */ +struct Td +{ + u32int nlink; /* to next Td */ + u32int alink; /* alternate link to next Td */ + u32int csw; /* cmd/sts. updated by hw */ + u32int buffer[5]; /* buf ptrs. offset updated by hw */ + /* cache-line boundary here */ + u32int xbuffer[5]; /* high 32 bits of buffer for 64-bits */ + + /* software */ + Td* next; /* in qh or Isoio or free list */ + int ndata; /* bytes available/used at data */ + uchar* data; /* pointer to actual data */ + uchar* buff; /* allocated data buffer or nil */ + uchar sbuff[1]; /* first byte of embedded buffer */ +}; + +/* + * Queue head. Aligned to 32 bytes. + * hw: first 68 bytes, 92 total. + */ +struct Qh +{ + u32int link; /* to next Qh in round robin */ + u32int eps0; /* static endpoint state. addrs */ + u32int eps1; /* static endpoint state. µ-frame sched. */ + + /* updated by hw */ + u32int tclink; /* current Td (No Term bit here!) */ + u32int nlink; /* to next Td */ + u32int alink; /* alternate link to next Td */ + u32int csw; /* cmd/sts. updated by hw */ + /* cache-line boundary after buffer[0] */ + u32int buffer[5]; /* buf ptrs. offset updated by hw */ + u32int xbuffer[5]; /* high 32 bits of buffer for 64-bits */ + + /* software */ + Qh* next; /* in controller list/tree of Qhs */ + int state; /* Qidle -> Qinstall -> Qrun -> Qdone | Qclose */ + Qio* io; /* for this queue */ + Td* tds; /* for this queue */ + int sched; /* slot for for intr. Qhs */ + Qh* inext; /* next in list of intr. qhs */ +}; + +/* + * We can avoid frame span traversal nodes if we don't span frames. + * Just schedule transfers that can fit on the current frame and + * wait a little bit otherwise. + */ + +/* + * Software. Ehci descriptors provided by pool. + * There are soo few because we avoid using Fstn. + */ +union Ed +{ + Ed* next; /* in free list */ + Qh qh; + Td td; + Itd itd; + Sitd sitd; + uchar align[Align]; +}; + +int ehcidebug = 0; + +static Edpool edpool; +static char Ebug[] = "not yet implemented"; +static char* qhsname[] = { "idle", "install", "run", "done", "close", "FREE" }; + +Ecapio* ehcidebugcapio; +int ehcidebugport; + +void +ehcirun(Ctlr *ctlr, int on) +{ + int i; + Eopio *opio; + + ddprint("ehci %#p %s\n", ctlr->capio, on ? "starting" : "halting"); + opio = ctlr->opio; + if(on) + opio->cmd |= Crun; + else + opio->cmd = Cstop; + coherence(); + for(i = 0; i < 100; i++) + if(on == 0 && (opio->sts & Shalted) != 0) + break; + else if(on != 0 && (opio->sts & Shalted) == 0) + break; + else + delay(1); + if(i == 100) + print("ehci %#p %s cmd timed out\n", + ctlr->capio, on ? "run" : "halt"); + ddprint("ehci %#p cmd %#ux sts %#ux\n", + ctlr->capio, opio->cmd, opio->sts); +} + +static void* +edalloc(void) +{ + Ed *ed, *pool; + int i; + + lock(&edpool); + if(edpool.free == nil){ + pool = mallocalign(Incr*sizeof(Ed), Align, 0, 0); + if(pool == nil) + panic("edalloc"); + for(i=Incr; --i>=0;){ + pool[i].next = edpool.free; + edpool.free = &pool[i]; + } + edpool.nalloc += Incr; + edpool.nfree += Incr; + dprint("ehci: edalloc: %d eds\n", edpool.nalloc); + } + ed = edpool.free; + edpool.free = ed->next; + edpool.ninuse++; + edpool.nfree--; + unlock(&edpool); + + memset(ed, 0, sizeof(Ed)); /* safety */ + assert(((uintptr)ed & 0xF) == 0); + return ed; +} + +static void +edfree(void *a) +{ + Ed *ed; + + ed = a; + lock(&edpool); + ed->next = edpool.free; + edpool.free = ed; + edpool.ninuse--; + edpool.nfree++; + unlock(&edpool); +} + +/* + * Allocate and do some initialization. + * Free after releasing buffers used. + */ + +static Itd* +itdalloc(void) +{ + Itd *td; + + td = edalloc(); + td->link = Lterm; + return td; +} + +static void +itdfree(Itd *td) +{ + edfree(td); +} + +static Sitd* +sitdalloc(void) +{ + Sitd *td; + + td = edalloc(); + td->link = td->blink = Lterm; + return td; +} + +static void +sitdfree(Sitd *td) +{ + edfree(td); +} + +static Td* +tdalloc(void) +{ + Td *td; + + td = edalloc(); + td->nlink = td->alink = Lterm; + return td; +} + +static void +tdfree(Td *td) +{ + if(td == nil) + return; + free(td->buff); + edfree(td); +} + +static void +tdlinktd(Td *td, Td *next) +{ + td->next = next; + td->alink = Lterm; + if(next == nil) + td->nlink = Lterm; + else + td->nlink = PADDR(next); + coherence(); +} + +static Qh* +qhlinkqh(Qh *qh, Qh *next) +{ + qh->next = next; + if(next == nil) + qh->link = Lterm; + else + qh->link = PADDR(next)|Lqh; + coherence(); + return qh; +} + +static void +qhsetaddr(Qh *qh, u32int addr) +{ + u32int eps0; + + eps0 = qh->eps0 & ~((Epmax<<8)|Devmax); + qh->eps0 = eps0 | addr & Devmax | ((addr >> 7) & Epmax) << 8; + coherence(); +} + +/* + * return largest power of 2 <= n + */ +static int +flog2lower(int n) +{ + int i; + + for(i = 0; (1 << (i + 1)) <= n; i++) + ; + return i; +} + +static int +pickschedq(Qtree *qt, int pollival, ulong bw, ulong limit) +{ + int i, j, d, upperb, q; + ulong best, worst, total; + + d = flog2lower(pollival); + if(d > qt->depth) + d = qt->depth; + q = -1; + worst = 0; + best = ~0; + upperb = (1 << (d+1)) - 1; + for(i = (1 << d) - 1; i < upperb; i++){ + total = qt->bw[0]; + for(j = i; j > 0; j = (j - 1) / 2) + total += qt->bw[j]; + if(total < best){ + best = total; + q = i; + } + if(total > worst) + worst = total; + } + if(worst + bw >= limit) + return -1; + return q; +} + +static int +schedq(Ctlr *ctlr, Qh *qh, int pollival) +{ + int q; + Qh *tqh; + ulong bw; + + bw = qh->io->bw; + q = pickschedq(ctlr->tree, pollival, 0, ~0); + ddqprint("ehci: sched %#p q %d, ival %d, bw %uld\n", + qh->io, q, pollival, bw); + if(q < 0){ + print("ehci: no room for ed\n"); + return -1; + } + ctlr->tree->bw[q] += bw; + tqh = ctlr->tree->root[q]; + qh->sched = q; + qhlinkqh(qh, tqh->next); + qhlinkqh(tqh, qh); + coherence(); + qh->inext = ctlr->intrqhs; + ctlr->intrqhs = qh; + coherence(); + return 0; +} + +static void +unschedq(Ctlr *ctlr, Qh *qh) +{ + int q; + Qh *prev, *this, *next; + Qh **l; + ulong bw; + + bw = qh->io->bw; + q = qh->sched; + if(q < 0) + return; + ctlr->tree->bw[q] -= bw; + + prev = ctlr->tree->root[q]; + this = prev->next; + while(this != nil && this != qh){ + prev = this; + this = this->next; + } + if(this == nil) + print("ehci: unschedq %d: not found\n", q); + else{ + next = this->next; + qhlinkqh(prev, next); + } + for(l = &ctlr->intrqhs; *l != nil; l = &(*l)->inext) + if(*l == qh){ + *l = (*l)->inext; + return; + } + print("ehci: unschedq: qh %#p not found\n", qh); +} + +static u32int +qhmaxpkt(Qh *qh) +{ + return (qh->eps0 >> Qhmplshift) & Qhmplmask; +} + +static void +qhsetmaxpkt(Qh *qh, int maxpkt) +{ + u32int eps0; + + eps0 = qh->eps0 & ~(Qhmplmask << Qhmplshift); + qh->eps0 = eps0 | (maxpkt & Qhmplmask) << Qhmplshift; + coherence(); +} + +/* + * Initialize the round-robin circular list of ctl/bulk Qhs + * if ep is nil. Otherwise, allocate and link a new Qh in the ctlr. + */ +static Qh* +qhalloc(Ctlr *ctlr, Ep *ep, Qio *io, char* tag) +{ + Qh *qh; + int ttype; + + qh = edalloc(); + qh->nlink = Lterm; + qh->alink = Lterm; + qh->csw = Tdhalt; + qh->state = Qidle; + qh->sched = -1; + qh->io = io; + if(ep != nil){ + qh->eps0 = 0; + qhsetmaxpkt(qh, ep->maxpkt); + if(ep->dev->speed == Lowspeed) + qh->eps0 |= Qhlow; + if(ep->dev->speed == Highspeed) + qh->eps0 |= Qhhigh; + else if(ep->ttype == Tctl) + qh->eps0 |= Qhnhctl; + qh->eps0 |= Qhdtc | 8 << Qhrlcshift; /* 8 naks max */ + coherence(); + qhsetaddr(qh, io->usbid); + qh->eps1 = (ep->ntds & Qhmultmask) << Qhmultshift; + qh->eps1 |= ep->dev->port << Qhportshift; + qh->eps1 |= ep->dev->hub << Qhhubshift; + qh->eps1 |= 034 << Qhscmshift; + if(ep->ttype == Tintr) + qh->eps1 |= 1 << Qhismshift; /* intr. start µf. */ + coherence(); + if(io != nil) + io->tag = tag; + } + ilock(ctlr); + ttype = Tctl; + if(ep != nil) + ttype = ep->ttype; + switch(ttype){ + case Tctl: + case Tbulk: + if(ctlr->qhs == nil){ + ctlr->qhs = qhlinkqh(qh, qh); + qh->eps0 |= Qhhigh | Qhhrl; + coherence(); + ctlr->opio->link = PADDR(qh)|Lqh; + coherence(); + }else{ + qhlinkqh(qh, ctlr->qhs->next); + qhlinkqh(ctlr->qhs, qh); + } + break; + case Tintr: + schedq(ctlr, qh, ep->pollival); + break; + default: + print("ehci: qhalloc called for ttype != ctl/bulk\n"); + } + iunlock(ctlr); + return qh; +} + +static int +qhadvanced(void *a) +{ + Ctlr *ctlr; + + ctlr = a; + return (ctlr->opio->cmd & Ciasync) == 0; +} + +/* + * called when a qh is removed, to be sure the hw is not + * keeping pointers into it. + */ +static void +qhcoherency(Ctlr *ctlr) +{ + int i; + + qlock(&ctlr->portlck); + ctlr->opio->cmd |= Ciasync; /* ask for intr. on async advance */ + coherence(); + for(i = 0; i < 3 && qhadvanced(ctlr) == 0; i++){ + while(waserror()) + ; + tsleep(ctlr, qhadvanced, ctlr, Abortdelay); + poperror(); + } + dprint("ehci: qhcoherency: doorbell %d\n", qhadvanced(ctlr)); + if(i == 3) + print("ehci: async advance doorbell did not ring\n"); + ctlr->opio->cmd &= ~Ciasync; /* try to clean */ + qunlock(&ctlr->portlck); +} + +static void +qhfree(Ctlr *ctlr, Qh *qh) +{ + Td *td, *ltd; + Qh *q; + + if(qh == nil) + return; + ilock(ctlr); + if(qh->sched < 0){ + for(q = ctlr->qhs; q != nil; q = q->next) + if(q->next == qh) + break; + if(q == nil) + panic("qhfree: nil q"); + q->next = qh->next; + q->link = qh->link; + coherence(); + }else + unschedq(ctlr, qh); + qh->state = Qfree; /* paranoia */ + iunlock(ctlr); + + qhcoherency(ctlr); + + for(td = qh->tds; td != nil; td = ltd){ + ltd = td->next; + tdfree(td); + } + + edfree(qh); +} + +static void +qhlinktd(Qh *qh, Td *td) +{ + u32int csw; + int i; + + csw = qh->csw; + qh->tds = td; + if(td == nil) + qh->csw = (csw & ~Tdactive) | Tdhalt; + else{ + csw &= Tddata1 | Tdping; /* save */ + qh->csw = Tdhalt; + coherence(); + qh->tclink = 0; + qh->alink = Lterm; + qh->nlink = PADDR(td); + for(i = 0; i < nelem(qh->buffer); i++) + qh->buffer[i] = 0; + coherence(); + qh->csw = csw & ~(Tdhalt|Tdactive); /* activate next */ + } + coherence(); +} + +static char* +seprintlink(char *s, char *se, char *name, u32int l, int typed) +{ + s = seprint(s, se, "%s %ux", name, l); + if((l & Lterm) != 0) + return seprint(s, se, "T"); + if(typed == 0) + return s; + switch(l & (3<<1)){ + case Litd: + return seprint(s, se, "I"); + case Lqh: + return seprint(s, se, "Q"); + case Lsitd: + return seprint(s, se, "S"); + default: + return seprint(s, se, "F"); + } +} + +static char* +seprintitd(char *s, char *se, Itd *td) +{ + int i; + u32int b0, b1; + char flags[6]; + char *rw; + + if(td == nil) + return seprint(s, se, "\n"); + b0 = td->buffer[0]; + b1 = td->buffer[1]; + + s = seprint(s, se, "itd %#p", td); + rw = (b1 & Itdin) ? "in" : "out"; + s = seprint(s, se, " %s ep %ud dev %ud max %ud mult %ud", + rw, (b0>>8)&Epmax, (b0&Devmax), + td->buffer[1] & 0x7ff, b1 & 3); + s = seprintlink(s, se, " link", td->link, 1); + s = seprint(s, se, "\n"); + for(i = 0; i < nelem(td->csw); i++){ + memset(flags, '-', 5); + if((td->csw[i] & Itdactive) != 0) + flags[0] = 'a'; + if((td->csw[i] & Itdioc) != 0) + flags[1] = 'i'; + if((td->csw[i] & Itddberr) != 0) + flags[2] = 'd'; + if((td->csw[i] & Itdbabble) != 0) + flags[3] = 'b'; + if((td->csw[i] & Itdtrerr) != 0) + flags[4] = 't'; + flags[5] = 0; + s = seprint(s, se, "\ttd%d %s", i, flags); + s = seprint(s, se, " len %ud", (td->csw[i] >> 16) & 0x7ff); + s = seprint(s, se, " pg %ud", (td->csw[i] >> 12) & 0x7); + s = seprint(s, se, " off %ud\n", td->csw[i] & 0xfff); + } + s = seprint(s, se, "\tbuffs:"); + for(i = 0; i < nelem(td->buffer); i++) + s = seprint(s, se, " %#ux", td->buffer[i] >> 12); + return seprint(s, se, "\n"); +} + +static char* +seprintsitd(char *s, char *se, Sitd *td) +{ + char rw, pg, ss; + char flags[8]; + static char pc[4] = { 'a', 'b', 'm', 'e' }; + + if(td == nil) + return seprint(s, se, "\n"); + s = seprint(s, se, "sitd %#p", td); + rw = (td->epc & Stdin) ? 'r' : 'w'; + s = seprint(s, se, " %c ep %ud dev %ud", + rw, (td->epc>>8)&0xf, td->epc&0x7f); + s = seprint(s, se, " max %ud", (td->csw >> 16) & 0x3ff); + s = seprint(s, se, " hub %ud", (td->epc >> 16) & 0x7f); + s = seprint(s, se, " port %ud\n", (td->epc >> 24) & 0x7f); + memset(flags, '-', 7); + if((td->csw & Stdactive) != 0) + flags[0] = 'a'; + if((td->csw & Stdioc) != 0) + flags[1] = 'i'; + if((td->csw & Stderr) != 0) + flags[2] = 'e'; + if((td->csw & Stddberr) != 0) + flags[3] = 'd'; + if((td->csw & Stdbabble) != 0) + flags[4] = 'b'; + if((td->csw & Stdtrerr) != 0) + flags[5] = 't'; + if((td->csw & Stdmmf) != 0) + flags[6] = 'n'; + flags[7] = 0; + ss = (td->csw & Stddcs) ? 'c' : 's'; + pg = (td->csw & Stdpg) ? '1' : '0'; + s = seprint(s, se, "\t%s %cs pg%c", flags, ss, pg); + s = seprint(s, se, " b0 %#ux b1 %#ux off %ud\n", + td->buffer[0] >> 12, td->buffer[1] >> 12, td->buffer[0] & 0xfff); + s = seprint(s, se, "\ttpos %c tcnt %ud", + pc[(td->buffer[0]>>3)&3], td->buffer[1] & 7); + s = seprint(s, se, " ssm %#ux csm %#ux cspm %#ux", + td->mfs & 0xff, (td->mfs>>8) & 0xff, (td->csw>>8) & 0xff); + s = seprintlink(s, se, " link", td->link, 1); + s = seprintlink(s, se, " blink", td->blink, 0); + return seprint(s, se, "\n"); +} + +static long +maxtdlen(Td *td) +{ + return (td->csw >> Tdlenshift) & Tdlenmask; +} + +static long +tdlen(Td *td) +{ + if(td->data == nil) + return 0; + return td->ndata - maxtdlen(td); +} + +static char* +seprinttd(char *s, char *se, Td *td, char *tag) +{ + int i; + char t, ss; + char flags[9]; + static char *tok[4] = { "out", "in", "setup", "BUG" }; + + if(td == nil) + return seprint(s, se, "%s \n", tag); + s = seprint(s, se, "%s %#p", tag, td); + s = seprintlink(s, se, " nlink", td->nlink, 0); + s = seprintlink(s, se, " alink", td->alink, 0); + s = seprint(s, se, " %s", tok[(td->csw & Tdtok) >> 8]); + if((td->csw & Tdping) != 0) + s = seprint(s, se, " png"); + memset(flags, '-', 8); + if((td->csw & Tdactive) != 0) + flags[0] = 'a'; + if((td->csw & Tdioc) != 0) + flags[1] = 'i'; + if((td->csw & Tdhalt) != 0) + flags[2] = 'h'; + if((td->csw & Tddberr) != 0) + flags[3] = 'd'; + if((td->csw & Tdbabble) != 0) + flags[4] = 'b'; + if((td->csw & Tdtrerr) != 0) + flags[5] = 't'; + if((td->csw & Tdmmf) != 0) + flags[6] = 'n'; + if((td->csw & (Tderr2|Tderr1)) == 0) + flags[7] = 'z'; + flags[8] = 0; + t = (td->csw & Tddata1) ? '1' : '0'; + ss = (td->csw & Tddcs) ? 'c' : 's'; + s = seprint(s, se, "\n\td%c %s %cs", t, flags, ss); + s = seprint(s, se, " max %uld", maxtdlen(td)); + s = seprint(s, se, " pg %ud off %#ux\n", + (td->csw >> Tdpgshift) & Tdpgmask, td->buffer[0] & 0xFFF); + s = seprint(s, se, "\tbuffs:"); + for(i = 0; i < nelem(td->buffer); i++) + s = seprint(s, se, " %#ux", td->buffer[i]>>12); + if(td->data != nil) + s = seprintdata(s, se, td->data, td->ndata); + return seprint(s, se, "\n"); +} + +static void +dumptd(Td *td, char *pref) +{ + char buf[256]; + char *se; + int i; + + i = 0; + se = buf+sizeof(buf); + for(; td != nil; td = td->next){ + seprinttd(buf, se, td, pref); + print("%s", buf); + if(i++ > 20){ + print("...more tds...\n"); + break; + } + } +} + +static void +qhdump(Qh *qh) +{ + char buf[256]; + char *s, *se, *tag; + Td td; + static char *speed[] = {"full", "low", "high", "BUG"}; + + if(qh == nil){ + print("\n"); + return; + } + if(qh->io == nil) + tag = "qh"; + else + tag = qh->io->tag; + se = buf+sizeof(buf); + s = seprint(buf, se, "%s %#p", tag, qh); + s = seprint(s, se, " ep %ud dev %ud", + (qh->eps0>>8)&0xf, qh->eps0&0x7f); + s = seprint(s, se, " hub %ud", (qh->eps1 >> 16) & 0x7f); + s = seprint(s, se, " port %ud", (qh->eps1 >> 23) & 0x7f); + s = seprintlink(s, se, " link", qh->link, 1); + seprint(s, se, " clink %#ux", qh->tclink); + print("%s\n", buf); + s = seprint(buf, se, "\tnrld %ud", (qh->eps0 >> Qhrlcshift) & Qhrlcmask); + s = seprint(s, se, " nak %ud", (qh->alink >> 1) & 0xf); + s = seprint(s, se, " max %ud ", qhmaxpkt(qh)); + if((qh->eps0 & Qhnhctl) != 0) + s = seprint(s, se, "c"); + if((qh->eps0 & Qhhrl) != 0) + s = seprint(s, se, "h"); + if((qh->eps0 & Qhdtc) != 0) + s = seprint(s, se, "d"); + if((qh->eps0 & Qhint) != 0) + s = seprint(s, se, "i"); + s = seprint(s, se, " %s", speed[(qh->eps0 >> 12) & 3]); + s = seprint(s, se, " mult %ud", (qh->eps1 >> Qhmultshift) & Qhmultmask); + seprint(s, se, " scm %#ux ism %#ux\n", + (qh->eps1 >> 8 & 0xff), qh->eps1 & 0xff); + print("%s\n", buf); + memset(&td, 0, sizeof(td)); + memmove(&td, &qh->nlink, 32); /* overlay area */ + seprinttd(buf, se, &td, "\tovl"); + print("%s", buf); +} + +static void +isodump(Isoio* iso, int all) +{ + Itd *td, *tdi, *tdu; + Sitd *std, *stdi, *stdu; + char buf[256]; + int i; + + if(iso == nil){ + print("\n"); + return; + } + print("iso %#p %s %s speed state %d nframes %d maxsz %uld", + iso, iso->tok == Tdtokin ? "in" : "out", + iso->hs ? "high" : "full", + iso->state, iso->nframes, iso->maxsize); + print(" td0 %uld tdi %#p tdu %#p data %#p\n", + iso->td0frno, iso->tdi, iso->tdu, iso->data); + if(iso->err != nil) + print("\terr %s\n", iso->err); + if(iso->err != nil) + print("\terr='%s'\n", iso->err); + if(all == 0) + if(iso->hs != 0){ + tdi = iso->tdi; + seprintitd(buf, buf+sizeof(buf), tdi); + print("\ttdi %s\n", buf); + tdu = iso->tdu; + seprintitd(buf, buf+sizeof(buf), tdu); + print("\ttdu %s\n", buf); + }else{ + stdi = iso->stdi; + seprintsitd(buf, buf+sizeof(buf), stdi); + print("\tstdi %s\n", buf); + stdu = iso->stdu; + seprintsitd(buf, buf+sizeof(buf), stdu); + print("\tstdu %s\n", buf); + } + else + for(i = 0; i < Nisoframes; i++) + if(iso->tdps[i] != nil) + if(iso->hs != 0){ + td = iso->itdps[i]; + seprintitd(buf, buf+sizeof(buf), td); + if(td == iso->tdi) + print("i->"); + if(td == iso->tdu) + print("i->"); + print("[%d]\t%s", i, buf); + }else{ + std = iso->sitdps[i]; + seprintsitd(buf, buf+sizeof(buf), std); + if(std == iso->stdi) + print("i->"); + if(std == iso->stdu) + print("u->"); + print("[%d]\t%s", i, buf); + } +} + +static void +dump(Hci *hp) +{ + int i; + char *s, *se; + char buf[128]; + Ctlr *ctlr; + Eopio *opio; + Isoio *iso; + Qh *qh; + + ctlr = hp->aux; + opio = ctlr->opio; + ilock(ctlr); + print("ehci port %#p frames %#p (%d fr.) nintr %d ntdintr %d", + ctlr->capio, ctlr->frames, ctlr->nframes, + ctlr->nintr, ctlr->ntdintr); + print(" nqhintr %d nisointr %d\n", ctlr->nqhintr, ctlr->nisointr); + print("\tcmd %#ux sts %#ux intr %#ux frno %ud", + opio->cmd, opio->sts, opio->intr, opio->frno); + print(" base %#ux link %#ux fr0 %#lux\n", + opio->frbase, opio->link, ctlr->frames[0]); + se = buf+sizeof(buf); + s = seprint(buf, se, "\t"); + for(i = 0; i < hp->nports; i++){ + s = seprint(s, se, "p%d %#ux ", i, opio->portsc[i]); + if(hp->nports > 4 && i == hp->nports/2 - 1) + s = seprint(s, se, "\n\t"); + } + print("%s\n", buf); + qh = ctlr->qhs; + i = 0; + do{ + qhdump(qh); + qh = qh->next; + }while(qh != ctlr->qhs && i++ < 100); + if(i > 100) + print("...too many Qhs...\n"); + if(ctlr->intrqhs != nil) + print("intr qhs:\n"); + for(qh = ctlr->intrqhs; qh != nil; qh = qh->inext) + qhdump(qh); + if(ctlr->iso != nil) + print("iso:\n"); + for(iso = ctlr->iso; iso != nil; iso = iso->next) + isodump(ctlr->iso, 0); + print("%d eds in tree\n", ctlr->ntree); + iunlock(ctlr); + lock(&edpool); + print("%d eds allocated = %d in use + %d free\n", + edpool.nalloc, edpool.ninuse, edpool.nfree); + unlock(&edpool); +} + +static char* +errmsg(int err) +{ + if(err == 0) + return "ok"; + if(err & Tddberr) + return "data buffer error"; + if(err & Tdbabble) + return "babble detected"; + if(err & Tdtrerr) + return "transaction error"; + if(err & Tdmmf) + return "missed µframe"; + if(err & Tdhalt) + return Estalled; /* [uo]hci report this error */ + return Eio; +} + +static char* +ierrmsg(int err) +{ + if(err == 0) + return "ok"; + if(err & Itddberr) + return "data buffer error"; + if(err & Itdbabble) + return "babble detected"; + if(err & Itdtrerr) + return "transaction error"; + return Eio; +} + +static char* +serrmsg(int err) +{ + if(err & Stderr) + return "translation translator error"; + /* other errors have same numbers than Td errors */ + return errmsg(err); +} + +static int +isocanread(void *a) +{ + Isoio *iso; + + iso = a; + if(iso->state == Qclose) + return 1; + if(iso->state == Qrun && iso->tok == Tdtokin){ + if(iso->hs != 0 && iso->tdi != iso->tdu) + return 1; + if(iso->hs == 0 && iso->stdi != iso->stdu) + return 1; + } + return 0; +} + +static int +isocanwrite(void *a) +{ + Isoio *iso; + + iso = a; + if(iso->state == Qclose) + return 1; + if(iso->state == Qrun && iso->tok == Tdtokout){ + if(iso->hs != 0 && iso->tdu->next != iso->tdi) + return 1; + if(iso->hs == 0 && iso->stdu->next != iso->stdi) + return 1; + } + return 0; +} + +static void +itdinit(Isoio *iso, Itd *td) +{ + int p, t; + ulong pa, tsize, size; + + /* + * BUG: This does not put an integral number of samples + * on each µframe unless samples per packet % 8 == 0 + * Also, all samples are packed early on each frame. + */ + p = 0; + size = td->ndata = td->mdata; + pa = PADDR(td->data); + for(t = 0; size > 0 && t < 8; t++){ + tsize = size; + if(tsize > iso->maxsize) + tsize = iso->maxsize; + size -= tsize; + assert(p < nelem(td->buffer)); + td->csw[t] = tsize << Itdlenshift | p << Itdpgshift | + (pa & 0xFFF) << Itdoffshift | Itdactive | Itdioc; + coherence(); + if(((pa+tsize) & ~0xFFF) != (pa & ~0xFFF)) + p++; + pa += tsize; + } +} + +static void +sitdinit(Isoio *iso, Sitd *td) +{ + td->ndata = td->mdata & Stdlenmask; + td->buffer[0] = PADDR(td->data); + td->buffer[1] = (td->buffer[0] & ~0xFFF) + 0x1000; + if(iso->tok == Tdtokin || td->ndata <= 188) + td->buffer[1] |= Stdtpall; + else + td->buffer[1] |= Stdtpbegin; + if(iso->tok == Tdtokin) + td->buffer[1] |= 1; + else + td->buffer[1] |= ((td->ndata + 187) / 188) & Stdtcntmask; + coherence(); + td->csw = td->ndata << Stdlenshift | Stdactive | Stdioc; + coherence(); +} + +static int +itdactive(Itd *td) +{ + int i; + + for(i = 0; i < nelem(td->csw); i++) + if((td->csw[i] & Itdactive) != 0) + return 1; + return 0; +} + +static int +isodelay(void *a) +{ + Isoio *iso; + int delay; + + iso = a; + if(iso->state == Qclose || iso->err || iso->delay == 0) + return 1; + + delay = 0; + if(iso->hs){ + Itd *i; + + for(i = iso->tdi; i->next != iso->tdu; i = i->next){ + if(!itdactive(i)) + continue; + delay += i->mdata; + if(delay > iso->delay) + break; + } + } else { + Sitd *i; + + for(i = iso->stdi; i->next != iso->stdu; i = i->next){ + if((i->csw & Stdactive) == 0) + continue; + delay += i->mdata; + if(delay > iso->delay) + break; + } + } + + return delay <= iso->delay; +} + +static int +isohsinterrupt(Ctlr *ctlr, Isoio *iso) +{ + int err, i, nframes, t; + Itd *tdi; + + tdi = iso->tdi; + if(tdi == nil || itdactive(tdi)) /* not all tds are done */ + return 0; + ctlr->nisointr++; + ddiprint("isohsintr: iso %#p: tdi %#p tdu %#p\n", iso, tdi, iso->tdu); + if(iso->state != Qrun && iso->state != Qdone) + panic("isofsintr: iso state"); + if(ehcidebug > 1 || iso->debug > 1) + isodump(iso, 0); + + nframes = iso->nframes / 2; /* limit how many we look */ + if(nframes > Nisoframes) + nframes = Nisoframes; + + if(iso->tok == Tdtokin) + tdi->ndata = 0; + /* else, it has the number of bytes transferred */ + + for(i = 0; i < nframes && itdactive(tdi) == 0; i++){ + if(iso->tok == Tdtokin) + tdi->ndata += (tdi->csw[i] >> Itdlenshift) & Itdlenmask; + err = 0; + coherence(); + for(t = 0; t < nelem(tdi->csw); t++){ + tdi->csw[t] &= ~Itdioc; + coherence(); + err |= tdi->csw[t] & Itderrors; + } + if(err == 0) + iso->nerrs = 0; + else if(iso->nerrs++ > iso->nframes/2){ + if(iso->err == nil){ + iso->err = ierrmsg(err); + diprint("isohsintr: tdi %#p error %#ux %s\n", + tdi, err, iso->err); + diprint("ctlr load %uld\n", ctlr->load); + } + tdi->ndata = 0; + }else + tdi->ndata = 0; + if(tdi->next == iso->tdu || tdi->next->next == iso->tdu){ + memset(iso->tdu->data, 0, iso->tdu->mdata); + itdinit(iso, iso->tdu); + iso->tdu = iso->tdu->next; + iso->nleft = 0; + } + tdi = tdi->next; + coherence(); + } + ddiprint("isohsintr: %d frames processed\n", nframes); + if(i == nframes){ + tdi->csw[0] |= Itdioc; + coherence(); + } + iso->tdi = tdi; + coherence(); + if(isocanwrite(iso) || isocanread(iso)){ + diprint("wakeup iso %#p tdi %#p tdu %#p\n", iso, + iso->tdi, iso->tdu); + wakeup(iso); + } + return 1; +} + +static int +isofsinterrupt(Ctlr *ctlr, Isoio *iso) +{ + int err, i, nframes; + Sitd *stdi; + + stdi = iso->stdi; + if(stdi == nil || (stdi->csw & Stdactive) != 0) /* nothing new done */ + return 0; + ctlr->nisointr++; + ddiprint("isofsintr: iso %#p: tdi %#p tdu %#p\n", iso, stdi, iso->stdu); + if(iso->state != Qrun && iso->state != Qdone) + panic("isofsintr: iso state"); + if(ehcidebug > 1 || iso->debug > 1) + isodump(iso, 0); + + nframes = iso->nframes / 2; /* limit how many we look */ + if(nframes > Nisoframes) + nframes = Nisoframes; + + for(i = 0; i < nframes && (stdi->csw & Stdactive) == 0; i++){ + stdi->csw &= ~Stdioc; + /* write back csw and see if it produces errors */ + coherence(); + err = stdi->csw & Stderrors; + if(err == 0){ + iso->nerrs = 0; + if(iso->tok == Tdtokin) + stdi->ndata = (stdi->csw>>Stdlenshift)&Stdlenmask; + /* else len is assumed correct */ + }else if(iso->nerrs++ > iso->nframes/2){ + if(iso->err == nil){ + iso->err = serrmsg(err); + diprint("isofsintr: tdi %#p error %#ux %s\n", + stdi, err, iso->err); + diprint("ctlr load %uld\n", ctlr->load); + } + stdi->ndata = 0; + }else + stdi->ndata = 0; + + if(stdi->next == iso->stdu || stdi->next->next == iso->stdu){ + memset(iso->stdu->data, 0, iso->stdu->mdata); + coherence(); + sitdinit(iso, iso->stdu); + iso->stdu = iso->stdu->next; + iso->nleft = 0; + } + coherence(); + stdi = stdi->next; + } + ddiprint("isofsintr: %d frames processed\n", nframes); + if(i == nframes){ + stdi->csw |= Stdioc; + coherence(); + } + iso->stdi = stdi; + coherence(); + if(isocanwrite(iso) || isocanread(iso)){ + diprint("wakeup iso %#p tdi %#p tdu %#p\n", iso, + iso->stdi, iso->stdu); + wakeup(iso); + } + return 1; +} + +static int +qhinterrupt(Ctlr *ctlr, Qh *qh) +{ + Td *td; + int err; + + if(qh->state != Qrun) + panic("qhinterrupt: qh state"); + td = qh->tds; + if(td == nil) + return 0; + if((td->csw & Tdactive) == 0) + ddqprint("qhinterrupt port %#p qh %#p\n", ctlr->capio, qh); + for(; td != nil; td = td->next){ + if(td->csw & Tdactive) + return 0; + err = td->csw & Tderrors; + if(err != 0){ + if(qh->io->err == nil){ + qh->io->err = errmsg(err); + dqprint("qhintr: td %#p csw %#ux error %#ux %s\n", + td, td->csw, err, qh->io->err); + } + break; + } + td->ndata = tdlen(td); + coherence(); + if(td->ndata < maxtdlen(td)){ /* EOT */ + td = td->next; + break; + } + } + /* + * Done. Make void the Tds not used (errors or EOT) and wakeup epio. + */ + for(; td != nil; td = td->next) + td->ndata = 0; + coherence(); + qh->state = Qdone; + coherence(); + wakeup(qh->io); + return 1; +} + +static int +ehciintr(Hci *hp) +{ + Ctlr *ctlr; + Eopio *opio; + Isoio *iso; + u32int sts; + Qh *qh; + int i, some; + + ctlr = hp->aux; + opio = ctlr->opio; + + /* + * Will we know in USB 3.0 who the interrupt was for?. + * Do they still teach indexing in CS? + * This is Intel's doing. + */ + ilock(ctlr); + sts = opio->sts & Sintrs; + if(sts == 0){ /* not ours; shared intr. */ + iunlock(ctlr); + return 0; + } + opio->sts = sts; + coherence(); + ctlr->nintr++; + if((sts & Sherr) != 0) + iprint("ehci: port %#p fatal host system error\n", ctlr->capio); + if((sts & Shalted) != 0) + iprint("ehci: port %#p: halted\n", ctlr->capio); + if((sts & Sasync) != 0){ + dprint("ehci: doorbell\n"); + wakeup(ctlr); + } + /* + * We enter always this if, even if it seems the + * interrupt does not report anything done/failed. + * Some controllers don't post interrupts right. + */ + some = 0; + if((sts & (Serrintr|Sintr)) != 0){ + ctlr->ntdintr++; + if(ehcidebug > 1){ + iprint("ehci port %#p frames %#p nintr %d ntdintr %d", + ctlr->capio, ctlr->frames, + ctlr->nintr, ctlr->ntdintr); + iprint(" nqhintr %d nisointr %d\n", + ctlr->nqhintr, ctlr->nisointr); + iprint("\tcmd %#ux sts %#ux intr %#ux frno %ud", + opio->cmd, opio->sts, opio->intr, opio->frno); + } + + /* process the Iso transfers */ + for(iso = ctlr->iso; iso != nil; iso = iso->next) + if(iso->state == Qrun || iso->state == Qdone) + if(iso->hs != 0) + some += isohsinterrupt(ctlr, iso); + else + some += isofsinterrupt(ctlr, iso); + + /* process the qhs in the periodic tree */ + for(qh = ctlr->intrqhs; qh != nil; qh = qh->inext) + if(qh->state == Qrun) + some += qhinterrupt(ctlr, qh); + + /* process the async Qh circular list */ + qh = ctlr->qhs; + i = 0; + do{ + if(qh == nil) + break; + if(qh->state == Qrun) + some += qhinterrupt(ctlr, qh); + qh = qh->next; + }while(qh != ctlr->qhs && i++ < 100); + if(i > 100) + iprint("echi: interrupt: qh loop?\n"); + } +// if (some == 0) +// panic("ehciintr: no work"); + iunlock(ctlr); + return some; +} + +static void +interrupt(Ureg*, void* a) +{ + ehciintr(a); +} + +static int +portenable(Hci *hp, int port, int on) +{ + Ctlr *ctlr; + Eopio *opio; + int s; + + ctlr = hp->aux; + opio = ctlr->opio; + s = opio->portsc[port-1]; + qlock(&ctlr->portlck); + if(waserror()){ + qunlock(&ctlr->portlck); + nexterror(); + } + dprint("ehci %#p port %d enable=%d; sts %#x\n", + ctlr->capio, port, on, s); + ilock(ctlr); + if(s & (Psstatuschg | Pschange)) + opio->portsc[port-1] = s; + if(on) + opio->portsc[port-1] |= Psenable; + else + opio->portsc[port-1] &= ~Psenable; + coherence(); + microdelay(64); + iunlock(ctlr); + tsleep(&up->sleep, return0, 0, Enabledelay); + dprint("ehci %#p port %d enable=%d: sts %#ux\n", + ctlr->capio, port, on, opio->portsc[port-1]); + qunlock(&ctlr->portlck); + poperror(); + return 0; +} + +/* + * If we detect during status that the port is low-speed or + * during reset that it's full-speed, the device is not for + * ourselves. The companion controller will take care. + * Low-speed devices will not be seen by usbd. Full-speed + * ones are seen because it's only after reset that we know what + * they are (usbd may notice a device not enabled in this case). + */ +static void +portlend(Ctlr *ctlr, int port, char *ss) +{ + Eopio *opio; + u32int s; + + opio = ctlr->opio; + + dprint("ehci %#p port %d: %s speed device: no longer owned\n", + ctlr->capio, port, ss); + s = opio->portsc[port-1] & ~(Pschange|Psstatuschg); + opio->portsc[port-1] = s | Psowner; + coherence(); +} + +static int +portreset(Hci *hp, int port, int on) +{ + u32int *portscp; + Eopio *opio; + Ctlr *ctlr; + int i; + + if(on == 0) + return 0; + + ctlr = hp->aux; + opio = ctlr->opio; + qlock(&ctlr->portlck); + if(waserror()){ + iunlock(ctlr); + qunlock(&ctlr->portlck); + nexterror(); + } + portscp = &opio->portsc[port-1]; + dprint("ehci %#p port %d reset; sts %#ux\n", ctlr->capio, port, *portscp); + ilock(ctlr); + /* Shalted must be zero, else Psreset will stay set */ + if (opio->sts & Shalted) + iprint("ehci %#p: halted yet trying to reset port\n", + ctlr->capio); + + *portscp = (*portscp & ~Psenable) | Psreset; /* initiate reset */ + /* + * usb 2 spec: reset must finish within 20 ms. + * linux says spec says it can take 50 ms. for hubs. + */ + delay(50); + *portscp &= ~Psreset; /* terminate reset */ + + delay(10); + for(i = 0; *portscp & Psreset && i < 10; i++) + delay(10); + + if (*portscp & Psreset) + iprint("ehci %#p: port %d didn't reset; sts %#ux\n", + ctlr->capio, port, *portscp); + + delay(10); /* ehci spec: enable within 2 ms. */ + if((*portscp & Psenable) == 0) + portlend(ctlr, port, "full"); + + iunlock(ctlr); + dprint("ehci %#p after port %d reset; sts %#ux\n", + ctlr->capio, port, *portscp); + qunlock(&ctlr->portlck); + poperror(); + return 0; +} + +static int +portstatus(Hci *hp, int port) +{ + int s, r; + Eopio *opio; + Ctlr *ctlr; + + ctlr = hp->aux; + opio = ctlr->opio; + qlock(&ctlr->portlck); + if(waserror()){ + iunlock(ctlr); + qunlock(&ctlr->portlck); + nexterror(); + } + ilock(ctlr); + s = opio->portsc[port-1]; + if(s & (Psstatuschg | Pschange)){ + opio->portsc[port-1] = s; + coherence(); + ddprint("ehci %#p port %d status %#x\n", ctlr->capio, port, s); + } + /* + * If the port is a low speed port we yield ownership now + * to the [uo]hci companion controller and pretend it's not here. + */ + if((s & Pspresent) != 0 && (s & Pslinemask) == Pslow){ + portlend(ctlr, port, "low"); + s &= ~Pspresent; /* not for us this time */ + } + iunlock(ctlr); + qunlock(&ctlr->portlck); + poperror(); + + /* + * We must return status bits as a + * get port status hub request would do. + */ + r = 0; + if(s & Pspresent) + r |= HPpresent|HPhigh; + if(s & Psenable) + r |= HPenable; + if(s & Pssuspend) + r |= HPsuspend; + if(s & Psreset) + r |= HPreset; + if(s & Psstatuschg) + r |= HPstatuschg; + if(s & Pschange) + r |= HPchange; + return r; +} + +static char* +seprintio(char *s, char *e, Qio *io, char *pref) +{ + s = seprint(s,e,"%s io %#p qh %#p id %#x", pref, io, io->qh, io->usbid); + s = seprint(s,e," iot %ld", io->iotime); + s = seprint(s,e," tog %#x tok %#x err %s", io->toggle, io->tok, io->err); + return s; +} + +static char* +seprintep(char *s, char *e, Ep *ep) +{ + Qio *io; + Ctlio *cio; + Ctlr *ctlr; + + ctlr = ep->hp->aux; + ilock(ctlr); + if(ep->aux == nil){ + *s = 0; + iunlock(ctlr); + return s; + } + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + s = seprintio(s, e, cio, "c"); + s = seprint(s, e, "\trepl %d ndata %d\n", ep->rhrepl, cio->ndata); + break; + case Tbulk: + case Tintr: + io = ep->aux; + if(ep->mode != OWRITE) + s = seprintio(s, e, &io[OREAD], "r"); + if(ep->mode != OREAD) + s = seprintio(s, e, &io[OWRITE], "w"); + break; + case Tiso: + *s = 0; + break; + } + iunlock(ctlr); + return s; +} + +/* + * halt condition was cleared on the endpoint. update our toggles. + */ +static void +clrhalt(Ep *ep) +{ + Qio *io; + + ep->clrhalt = 0; + coherence(); + switch(ep->ttype){ + case Tintr: + case Tbulk: + io = ep->aux; + if(ep->mode != OREAD){ + qlock(&io[OWRITE]); + io[OWRITE].toggle = Tddata0; + deprint("ep clrhalt for io %#p\n", io+OWRITE); + qunlock(&io[OWRITE]); + } + if(ep->mode != OWRITE){ + qlock(&io[OREAD]); + io[OREAD].toggle = Tddata0; + deprint("ep clrhalt for io %#p\n", io+OREAD); + qunlock(&io[OREAD]); + } + break; + } +} + +static void +xdump(char* pref, void *qh) +{ + int i; + u32int *u; + + u = qh; + print("%s %#p:", pref, u); + for(i = 0; i < 16; i++) + if((i%4) == 0) + print("\n %#.8ux", u[i]); + else + print(" %#.8ux", u[i]); + print("\n"); +} + +static long +episohscpy(Ctlr *ctlr, Ep *ep, Isoio* iso, uchar *b, long count) +{ + int nr; + long tot; + Itd *tdu; + + for(tot = 0; iso->tdi != iso->tdu && tot < count; tot += nr){ + tdu = iso->tdu; + if(itdactive(tdu)) + break; + nr = tdu->ndata; + if(tot + nr > count) + nr = count - tot; + if(nr == 0) + print("ehci: ep%d.%d: too many polls\n", + ep->dev->nb, ep->nb); + else{ + iunlock(ctlr); /* We could page fault here */ + memmove(b+tot, tdu->data, nr); + ilock(ctlr); + if(iso->tdu != tdu) + continue; + if(nr < tdu->ndata) + memmove(tdu->data, tdu->data+nr, tdu->ndata - nr); + tdu->ndata -= nr; + coherence(); + } + if(tdu->ndata == 0){ + itdinit(iso, tdu); + iso->tdu = tdu->next; + } + } + return tot; +} + +static long +episofscpy(Ctlr *ctlr, Ep *ep, Isoio* iso, uchar *b, long count) +{ + int nr; + long tot; + Sitd *stdu; + + for(tot = 0; iso->stdi != iso->stdu && tot < count; tot += nr){ + stdu = iso->stdu; + if(stdu->csw & Stdactive){ + diprint("ehci: episoread: %#p tdu active\n", iso); + break; + } + nr = stdu->ndata; + if(tot + nr > count) + nr = count - tot; + if(nr == 0) + print("ehci: ep%d.%d: too many polls\n", + ep->dev->nb, ep->nb); + else{ + iunlock(ctlr); /* We could page fault here */ + memmove(b+tot, stdu->data, nr); + ilock(ctlr); + if(iso->stdu != stdu) + continue; + if(nr < stdu->ndata) + memmove(stdu->data, stdu->data+nr, + stdu->ndata - nr); + stdu->ndata -= nr; + coherence(); + } + if(stdu->ndata == 0){ + sitdinit(iso, stdu); + iso->stdu = stdu->next; + } + } + return tot; +} + +static long +episoread(Ep *ep, Isoio *iso, void *a, long count) +{ + Ctlr *ctlr; + uchar *b; + long tot; + + iso->debug = ep->debug; + diprint("ehci: episoread: %#p ep%d.%d\n", iso, ep->dev->nb, ep->nb); + + b = a; + ctlr = ep->hp->aux; + qlock(iso); + if(waserror()){ + qunlock(iso); + nexterror(); + } + iso->err = nil; + iso->nerrs = 0; + ilock(ctlr); + if(iso->state == Qclose){ + iunlock(ctlr); + error(iso->err ? iso->err : Eio); + } + iso->state = Qrun; + coherence(); + while(isocanread(iso) == 0){ + iunlock(ctlr); + diprint("ehci: episoread: %#p sleep\n", iso); + if(waserror()){ + if(iso->err == nil) + iso->err = "I/O timed out"; + ilock(ctlr); + break; + } + tsleep(iso, isocanread, iso, ep->tmout); + poperror(); + ilock(ctlr); + } + if(iso->state == Qclose){ + iunlock(ctlr); + error(iso->err ? iso->err : Eio); + } + iso->state = Qdone; + coherence(); + assert(iso->tdu != iso->tdi); + + if(iso->hs != 0) + tot = episohscpy(ctlr, ep, iso, b, count); + else + tot = episofscpy(ctlr, ep, iso, b, count); + iunlock(ctlr); + qunlock(iso); + poperror(); + diprint("uhci: episoread: %#p %uld bytes err '%s'\n", iso, tot, iso->err); + if(iso->err != nil) + error(iso->err); + return tot; +} + +/* + * iso->tdu is the next place to put data. When it gets full + * it is activated and tdu advanced. + */ +static long +putsamples(Ctlr *ctlr, Isoio *iso, uchar *b, long count) +{ + long left, tot, n; + Sitd *stdu; + Itd *tdu; + + for(tot = 0; isocanwrite(iso) && tot < count; tot += n){ + n = count-tot; + left = iso->nleft; + if(iso->hs != 0){ + tdu = iso->tdu; + if(n > tdu->mdata - left) + n = tdu->mdata - left; + iunlock(ctlr); /* We could page fault here */ + memmove(tdu->data + left, b + tot, n); + ilock(ctlr); + if(iso->tdu != tdu) + continue; + iso->nleft += n; + if(iso->nleft == tdu->mdata){ + itdinit(iso, tdu); + iso->tdu = tdu->next; + iso->nleft = 0; + } + }else{ + stdu = iso->stdu; + if(n > stdu->mdata - left) + n = stdu->mdata - left; + iunlock(ctlr); /* We could page fault here */ + memmove(stdu->data + left, b + tot, n); + ilock(ctlr); + if(iso->stdu != stdu) + continue; + iso->nleft += n; + if(iso->nleft == stdu->mdata){ + sitdinit(iso, stdu); + iso->stdu = stdu->next; + iso->nleft = 0; + } + } + } + return tot; +} + +/* + * Queue data for writing and return error status from + * last writes done, to maintain buffered data. + */ +static long +episowrite(Ep *ep, Isoio *iso, void *a, long count) +{ + Ctlr *ctlr; + uchar *b; + int tot, nw; + char *err; + + iso->delay = ep->sampledelay * ep->samplesz; + iso->debug = ep->debug; + diprint("ehci: episowrite: %#p ep%d.%d\n", iso, ep->dev->nb, ep->nb); + + ctlr = ep->hp->aux; + qlock(iso); + if(waserror()){ + qunlock(iso); + nexterror(); + } + ilock(ctlr); + if(iso->state == Qclose){ + iunlock(ctlr); + error(iso->err ? iso->err : Eio); + } + iso->state = Qrun; + coherence(); + b = a; + for(tot = 0; tot < count; tot += nw){ + while(isocanwrite(iso) == 0){ + iunlock(ctlr); + diprint("ehci: episowrite: %#p sleep\n", iso); + if(waserror()){ + if(iso->err == nil) + iso->err = "I/O timed out"; + ilock(ctlr); + break; + } + tsleep(iso, isocanwrite, iso, ep->tmout); + poperror(); + ilock(ctlr); + } + err = iso->err; + iso->err = nil; + if(iso->state == Qclose || err != nil){ + iunlock(ctlr); + error(err ? err : Eio); + } + if(iso->state != Qrun) + panic("episowrite: iso not running"); + nw = putsamples(ctlr, iso, b+tot, count-tot); + } + while(isodelay(iso) == 0){ + iunlock(ctlr); + sleep(iso, isodelay, iso); + ilock(ctlr); + } + if(iso->state != Qclose) + iso->state = Qdone; + iunlock(ctlr); + err = iso->err; /* in case it failed early */ + iso->err = nil; + qunlock(iso); + poperror(); + if(err != nil) + error(err); + diprint("ehci: episowrite: %#p %d bytes\n", iso, tot); + return tot; +} + +static int +nexttoggle(int toggle, int count, int maxpkt) +{ + int np; + + np = count / maxpkt; + if(np == 0) + np = 1; + if((np % 2) == 0) + return toggle; + if(toggle == Tddata1) + return Tddata0; + else + return Tddata1; +} + +static Td* +epgettd(Qio *io, int flags, void *a, int count, int maxpkt) +{ + Td *td; + ulong pa; + int i; + + if(count > Tdmaxpkt) + panic("ehci: epgettd: too many bytes"); + td = tdalloc(); + td->csw = flags | io->toggle | io->tok | count << Tdlenshift | + Tderr2 | Tderr1; + + /* + * use the space wasted by alignment as an + * embedded buffer if count bytes fit in there. + */ + assert(Align > sizeof(Td)); + if(count <= Align - sizeof(Td)){ + td->data = td->sbuff; + td->buff = nil; + }else + td->data = td->buff = smalloc(Tdmaxpkt); + + pa = PADDR(td->data); + for(i = 0; i < nelem(td->buffer); i++){ + td->buffer[i] = pa; + if(i > 0) + td->buffer[i] &= ~0xFFF; + pa += 0x1000; + } + td->ndata = count; + if(a != nil && count > 0) + memmove(td->data, a, count); + coherence(); + io->toggle = nexttoggle(io->toggle, count, maxpkt); + coherence(); + return td; +} + +/* + * Try to get them idle + */ +static void +aborttds(Qh *qh) +{ + Td *td; + + if(qh->sched >= 0 && (qh->eps0 & Qhspeedmask) != Qhhigh) + qh->eps0 |= Qhint; /* inactivate on next pass */ + qh->csw = (qh->csw & ~Tdactive) | Tdhalt; + coherence(); + for(td = qh->tds; td != nil; td = td->next){ + if(td->csw & Tdactive){ + td->ndata = 0; + td->csw |= Tdhalt; + } + } + coherence(); +} + +/* + * Some controllers do not post the usb/error interrupt after + * the work has been done. It seems that we must poll for them. + */ +static int +workpending(void *a) +{ + Ctlr *ctlr; + + ctlr = a; + return ctlr->nreqs > 0; +} + +static void +ehcipoll(void* a) +{ + Hci *hp; + Ctlr *ctlr; + Poll *poll; + int i; + + hp = a; + ctlr = hp->aux; + poll = &ctlr->poll; + for(;;){ + if(ctlr->nreqs == 0){ + if(0)ddprint("ehcipoll %#p sleep\n", ctlr->capio); + sleep(poll, workpending, ctlr); + if(0)ddprint("ehcipoll %#p awaken\n", ctlr->capio); + } + for(i = 0; i < 16 && ctlr->nreqs > 0; i++) + if(ehciintr(hp) == 0) + break; + do{ + tsleep(&up->sleep, return0, 0, 1); + ehciintr(hp); + }while(ctlr->nreqs > 0); + } +} + +static void +pollcheck(Hci *hp) +{ + Ctlr *ctlr; + Poll *poll; + + ctlr = hp->aux; + poll = &ctlr->poll; + + if(poll->must != 0 && poll->does == 0){ + lock(poll); + if(poll->must != 0 && poll->does == 0){ + poll->does++; + print("ehci %#p: polling\n", ctlr->capio); + kproc("ehcipoll", ehcipoll, hp); + } + unlock(poll); + } +} + +static int +epiodone(void *a) +{ + Qh *qh; + + qh = a; + return qh->state != Qrun; +} + +static void +epiowait(Hci *hp, Qio *io, int tmout, ulong load) +{ + Qh *qh; + int timedout; + Ctlr *ctlr; + + ctlr = hp->aux; + qh = io->qh; + ddqprint("ehci %#p: io %#p sleep on qh %#p state %s\n", + ctlr->capio, io, qh, qhsname[qh->state]); + timedout = 0; + if(waserror()){ + dqprint("ehci %#p: io %#p qh %#p timed out\n", + ctlr->capio, io, qh); + timedout++; + }else{ + if(tmout == 0) + sleep(io, epiodone, qh); + else + tsleep(io, epiodone, qh, tmout); + poperror(); + } + + ilock(ctlr); + /* Are we missing interrupts? */ + if(qh->state == Qrun){ + iunlock(ctlr); + ehciintr(hp); + ilock(ctlr); + if(qh->state == Qdone){ + dqprint("ehci %#p: polling required\n", ctlr->capio); + ctlr->poll.must = 1; + pollcheck(hp); + } + } + + if(qh->state == Qrun){ +// dqprint("ehci %#p: io %#p qh %#p timed out (no intr?)\n", + iprint("ehci %#p: io %#p qh %#p timed out (no intr?)\n", + ctlr->capio, io, qh); + timedout = 1; + }else if(qh->state != Qdone && qh->state != Qclose) + panic("ehci: epio: queue state %d", qh->state); + if(timedout){ + aborttds(qh); + qh->state = Qdone; + if(io->err == nil) + io->err = "request timed out"; + iunlock(ctlr); + while(waserror()) + ; + tsleep(&up->sleep, return0, 0, Abortdelay); + poperror(); + ilock(ctlr); + } + if(qh->state != Qclose) + qh->state = Qidle; + coherence(); + qhlinktd(qh, nil); + ctlr->load -= load; + ctlr->nreqs--; + iunlock(ctlr); +} + +/* + * Non iso I/O. + * To make it work for control transfers, the caller may + * lock the Qio for the entire control transfer. + */ +static long +epio(Ep *ep, Qio *io, void *a, long count, int mustlock) +{ + int saved, ntds, tmout; + long n, tot; + ulong load; + char *err; + char buf[128]; + uchar *c; + Ctlr *ctlr; + Qh* qh; + Td *td, *ltd, *td0, *ntd; + + ctlr = ep->hp->aux; + io->debug = ep->debug; + tmout = ep->tmout; + ddeprint("epio: %s ep%d.%d io %#p count %ld load %uld\n", + io->tok == Tdtokin ? "in" : "out", + ep->dev->nb, ep->nb, io, count, ctlr->load); + if((ehcidebug > 1 || ep->debug > 1) && io->tok != Tdtokin){ + seprintdata(buf, buf+sizeof(buf), a, count); + print("echi epio: user data: %s\n", buf); + } + if(mustlock){ + qlock(io); + if(waserror()){ + qunlock(io); + nexterror(); + } + } + io->err = nil; + ilock(ctlr); + qh = io->qh; + if(qh == nil || qh->state == Qclose){ /* Tds released by cancelio */ + iunlock(ctlr); + error(io->err ? io->err : Eio); + } + if(qh->state != Qidle) + panic("epio: qh not idle"); + qh->state = Qinstall; + iunlock(ctlr); + + c = a; + td0 = ltd = nil; + load = tot = 0; + do{ + n = (Tdmaxpkt / ep->maxpkt) * ep->maxpkt; + if(count-tot < n) + n = count-tot; + if(c != nil && io->tok != Tdtokin) + td = epgettd(io, Tdactive, c+tot, n, ep->maxpkt); + else + td = epgettd(io, Tdactive, nil, n, ep->maxpkt); + if(td0 == nil) + td0 = td; + else + tdlinktd(ltd, td); + ltd = td; + tot += n; + load += ep->load; + }while(tot < count); + if(td0 == nil || ltd == nil) + panic("epio: no td"); + + ltd->csw |= Tdioc; /* the last one interrupts */ + coherence(); + + ddeprint("ehci: load %uld ctlr load %uld\n", load, ctlr->load); + if(ehcidebug > 1 || ep->debug > 1) + dumptd(td0, "epio: put: "); + + ilock(ctlr); + if(qh->state != Qclose){ + io->iotime = TK2MS(sys->ticks); + qh->state = Qrun; + coherence(); + qhlinktd(qh, td0); + ctlr->nreqs++; + ctlr->load += load; + } + iunlock(ctlr); + + if(ctlr->poll.does) + wakeup(&ctlr->poll); + + epiowait(ep->hp, io, tmout, load); + if(ehcidebug > 1 || ep->debug > 1){ + dumptd(td0, "epio: got: "); + qhdump(qh); + } + err = io->err; + + tot = 0; + c = a; + saved = 0; + ntds = 0; + for(td = td0; td != nil; td = ntd){ + ntds++; + /* + * Use td tok, not io tok, because of setup packets. + * Also, we must save the next toggle value from the + * last completed Td (in case of a short packet, or + * fewer than the requested number of packets in the + * Td being transferred). + */ + if(td->csw & (Tdhalt|Tdactive)) + saved++; + else{ + if(!saved){ + io->toggle = td->csw & Tddata1; + coherence(); + } + if(err == nil && (n = td->ndata) > 0 && tot < count){ + if((tot + n) > count) + n = count - tot; + if(c != nil && (td->csw & Tdtok) == Tdtokin){ + memmove(c, td->data, n); + c += n; + } + tot += n; + } + } + ntd = td->next; + tdfree(td); + } + if(mustlock){ + qunlock(io); + poperror(); + } + ddeprint("epio: io %#p: %d tds: return %ld err '%s'\n", + io, ntds, tot, err); + if(err == Estalled) + return 0; /* that's our convention */ + if(err != nil) + error(err); + return tot; +} + +static long +epread(Ep *ep, void *a, long count) +{ + Ctlio *cio; + Qio *io; + Isoio *iso; + char buf[160]; + ulong delta; + + ddeprint("ehci: epread\n"); + if(ep->aux == nil) + panic("epread: not open"); + + pollcheck(ep->hp); + + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + qlock(cio); + if(waserror()){ + qunlock(cio); + nexterror(); + } + ddeprint("epread ctl ndata %d\n", cio->ndata); + if(cio->ndata < 0) + error("request expected"); + else if(cio->ndata == 0){ + cio->ndata = -1; + count = 0; + }else{ + if(count > cio->ndata) + count = cio->ndata; + if(count > 0) + memmove(a, cio->data, count); + /* BUG for big transfers */ + free(cio->data); + cio->data = nil; + cio->ndata = 0; /* signal EOF next time */ + } + qunlock(cio); + poperror(); + if(ehcidebug>1 || ep->debug){ + seprintdata(buf, buf+sizeof(buf), a, count); + print("epread: %s\n", buf); + } + return count; + case Tbulk: + io = ep->aux; + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OREAD], a, count, 1); + case Tintr: + io = ep->aux; + delta = TK2MS(sys->ticks) - io[OREAD].iotime + 1; + if(delta < ep->pollival / 2) + tsleep(&up->sleep, return0, 0, ep->pollival/2 - delta); + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OREAD], a, count, 1); + case Tiso: + iso = ep->aux; + return episoread(ep, iso, a, count); + } + return -1; +} + +/* + * Control transfers are one setup write (data0) + * plus zero or more reads/writes (data1, data0, ...) + * plus a final write/read with data1 to ack. + * For both host to device and device to host we perform + * the entire transfer when the user writes the request, + * and keep any data read from the device for a later read. + * We call epio three times instead of placing all Tds at + * the same time because doing so leads to crc/tmout errors + * for some devices. + * Upon errors on the data phase we must still run the status + * phase or the device may cease responding in the future. + */ +static long +epctlio(Ep *ep, Ctlio *cio, void *a, long count) +{ + uchar *c; + long len; + + ddeprint("epctlio: cio %#p ep%d.%d count %ld\n", + cio, ep->dev->nb, ep->nb, count); + if(count < Rsetuplen) + error("short usb comand"); + qlock(cio); + free(cio->data); + cio->data = nil; + cio->ndata = 0; + if(waserror()){ + free(cio->data); + cio->data = nil; + cio->ndata = 0; + qunlock(cio); + nexterror(); + } + + /* set the address if unset and out of configuration state */ + if(ep->dev->state != Dconfig && ep->dev->state != Dreset) + if(cio->usbid == 0){ + cio->usbid = (ep->nb&Epmax) << 7 | ep->dev->nb&Devmax; + coherence(); + qhsetaddr(cio->qh, cio->usbid); + } + /* adjust maxpkt if the user has learned a different one */ + if(qhmaxpkt(cio->qh) != ep->maxpkt) + qhsetmaxpkt(cio->qh, ep->maxpkt); + c = a; + cio->tok = Tdtoksetup; + cio->toggle = Tddata0; + coherence(); + if(epio(ep, cio, a, Rsetuplen, 0) < Rsetuplen) + error(Eio); + a = c + Rsetuplen; + count -= Rsetuplen; + + cio->toggle = Tddata1; + if(c[Rtype] & Rd2h){ + cio->tok = Tdtokin; + len = GET2(c+Rcount); + if(len <= 0) + error("bad length in d2h request"); + if(len > Maxctllen) + error("d2h data too large to fit in ehci"); + a = cio->data = smalloc(len+1); + }else{ + cio->tok = Tdtokout; + len = count; + } + coherence(); + if(len > 0) + if(waserror()) + len = -1; + else{ + len = epio(ep, cio, a, len, 0); + poperror(); + } + if(c[Rtype] & Rd2h){ + count = Rsetuplen; + cio->ndata = len; + cio->tok = Tdtokout; + }else{ + if(len < 0) + count = -1; + else + count = Rsetuplen + len; + cio->tok = Tdtokin; + } + cio->toggle = Tddata1; + coherence(); + epio(ep, cio, nil, 0, 0); + qunlock(cio); + poperror(); + ddeprint("epctlio cio %#p return %ld\n", cio, count); + return count; +} + +static long +epwrite(Ep *ep, void *a, long count) +{ + Qio *io; + Ctlio *cio; + Isoio *iso; + ulong delta; + + pollcheck(ep->hp); + + ddeprint("ehci: epwrite ep%d.%d\n", ep->dev->nb, ep->nb); + if(ep->aux == nil) + panic("ehci: epwrite: not open"); + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + return epctlio(ep, cio, a, count); + case Tbulk: + io = ep->aux; + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OWRITE], a, count, 1); + case Tintr: + io = ep->aux; + delta = TK2MS(sys->ticks) - io[OWRITE].iotime + 1; + if(delta < ep->pollival) + tsleep(&up->sleep, return0, 0, ep->pollival - delta); + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OWRITE], a, count, 1); + case Tiso: + iso = ep->aux; + return episowrite(ep, iso, a, count); + } + return -1; +} + +static void +isofsinit(Ep *ep, Isoio *iso) +{ + long left; + Sitd *td, *ltd; + int i; + ulong frno; + + left = 0; + ltd = nil; + frno = iso->td0frno; + for(i = 0; i < iso->nframes; i++){ + td = sitdalloc(); + td->data = iso->data + i * ep->maxpkt; + td->epc = ep->dev->port << Stdportshift; + td->epc |= ep->dev->hub << Stdhubshift; + td->epc |= ep->nb << Stdepshift; + td->epc |= ep->dev->nb << Stddevshift; + td->mfs = 034 << Stdscmshift | 1 << Stdssmshift; + if(ep->mode == OREAD){ + td->epc |= Stdin; + td->mdata = ep->maxpkt; + }else{ + td->mdata = (ep->hz+left) * ep->pollival / 1000; + td->mdata *= ep->samplesz; + left = (ep->hz+left) * ep->pollival % 1000; + if(td->mdata > ep->maxpkt){ + print("ehci: ep%d.%d: size > maxpkt\n", + ep->dev->nb, ep->nb); + print("size = %d max = %ld\n", + td->mdata, ep->maxpkt); + td->mdata = ep->maxpkt; + } + } + coherence(); + + iso->sitdps[frno] = td; + coherence(); + sitdinit(iso, td); + if(ltd != nil) + ltd->next = td; + ltd = td; + frno = TRUNC(frno+ep->pollival, Nisoframes); + } + ltd->next = iso->sitdps[iso->td0frno]; + coherence(); +} + +static void +isohsinit(Ep *ep, Isoio *iso) +{ + int ival, p; + long left; + ulong frno, i, pa; + Itd *ltd, *td; + + iso->hs = 1; + ival = 1; + if(ep->pollival > 8) + ival = ep->pollival/8; + left = 0; + ltd = nil; + frno = iso->td0frno; + for(i = 0; i < iso->nframes; i++){ + td = itdalloc(); + td->data = iso->data + i * 8 * iso->maxsize; + pa = PADDR(td->data) & ~0xFFF; + for(p = 0; p < 8; p++) + td->buffer[i] = pa + p * 0x1000; + td->buffer[0] = PADDR(iso->data) & ~0xFFF | + ep->nb << Itdepshift | ep->dev->nb << Itddevshift; + if(ep->mode == OREAD) + td->buffer[1] |= Itdin; + else + td->buffer[1] |= Itdout; + td->buffer[1] |= ep->maxpkt << Itdmaxpktshift; + td->buffer[2] |= ep->ntds << Itdntdsshift; + + if(ep->mode == OREAD) + td->mdata = 8 * iso->maxsize; + else{ + td->mdata = (ep->hz + left) * ep->pollival / 1000; + td->mdata *= ep->samplesz; + left = (ep->hz + left) * ep->pollival % 1000; + } + coherence(); + iso->itdps[frno] = td; + coherence(); + itdinit(iso, td); + if(ltd != nil) + ltd->next = td; + ltd = td; + frno = TRUNC(frno + ival, Nisoframes); + } +} + +static void +isoopen(Ctlr *ctlr, Ep *ep) +{ + int ival; /* pollival in ms */ + int tpf; /* tds per frame */ + int i, n, w, woff; + ulong frno; + Isoio *iso; + + iso = ep->aux; + switch(ep->mode){ + case OREAD: + iso->tok = Tdtokin; + break; + case OWRITE: + iso->tok = Tdtokout; + break; + default: + error("iso i/o is half-duplex"); + } + iso->usbid = ep->nb << 7 | ep->dev->nb & Devmax; + iso->state = Qidle; + coherence(); + iso->debug = ep->debug; + ival = ep->pollival; + tpf = 1; + if(ep->dev->speed == Highspeed){ + tpf = 8; + if(ival <= 8) + ival = 1; + else + ival /= 8; + } + assert(ival != 0); + iso->nframes = Nisoframes / ival; + if(iso->nframes < 3) + error("uhci isoopen bug"); /* we need at least 3 tds */ + iso->maxsize = ep->ntds * ep->maxpkt; + if(ctlr->load + ep->load > 800) + print("usb: ehci: bandwidth may be exceeded\n"); + ilock(ctlr); + ctlr->load += ep->load; + ctlr->isoload += ep->load; + ctlr->nreqs++; + dprint("ehci: load %uld isoload %uld\n", ctlr->load, ctlr->isoload); + diprint("iso nframes %d pollival %uld ival %d maxpkt %uld ntds %d\n", + iso->nframes, ep->pollival, ival, ep->maxpkt, ep->ntds); + iunlock(ctlr); + if(ctlr->poll.does) + wakeup(&ctlr->poll); + + /* + * From here on this cannot raise errors + * unless we catch them and release here all memory allocated. + */ + assert(ep->maxpkt > 0 && ep->ntds > 0 && ep->ntds < 4); + assert(ep->maxpkt <= 1024); + iso->tdps = smalloc(sizeof(uintptr) * Nisoframes); + iso->data = smalloc(iso->nframes * tpf * ep->ntds * ep->maxpkt); + iso->td0frno = TRUNC(ctlr->opio->frno + 10, Nisoframes); + /* read: now; write: 1s ahead */ + + if(ep->dev->speed == Highspeed) + isohsinit(ep, iso); + else + isofsinit(ep, iso); + iso->tdu = iso->tdi = iso->itdps[iso->td0frno]; + iso->stdu = iso->stdi = iso->sitdps[iso->td0frno]; + coherence(); + + ilock(ctlr); + frno = iso->td0frno; + for(i = 0; i < iso->nframes; i++){ + *iso->tdps[frno] = ctlr->frames[frno]; + frno = TRUNC(frno+ival, Nisoframes); + } + + /* + * Iso uses a virtual frame window of Nisoframes, and we must + * fill the actual ctlr frame array by placing ctlr->nframes/Nisoframes + * copies of the window in the frame array. + */ + assert(ctlr->nframes >= Nisoframes && Nisoframes >= iso->nframes); + assert(Nisoframes >= Nintrleafs); + n = ctlr->nframes / Nisoframes; + for(w = 0; w < n; w++){ + frno = iso->td0frno; + woff = w * Nisoframes; + for(i = 0; i < iso->nframes ; i++){ + assert(woff+frno < ctlr->nframes); + assert(iso->tdps[frno] != nil); + if(ep->dev->speed == Highspeed) + ctlr->frames[woff+frno] = PADDR(iso->tdps[frno]) + |Litd; + else + ctlr->frames[woff+frno] = PADDR(iso->tdps[frno]) + |Lsitd; + coherence(); + frno = TRUNC(frno+ep->pollival, Nisoframes); + } + } + coherence(); + iso->next = ctlr->iso; + ctlr->iso = iso; + coherence(); + iso->state = Qdone; + iunlock(ctlr); + if(ehcidebug > 1 || iso->debug >1) + isodump(iso, 0); +} + +/* + * Allocate the endpoint and set it up for I/O + * in the controller. This must follow what's said + * in Ep regarding configuration, including perhaps + * the saved toggles (saved on a previous close of + * the endpoint data file by epclose). + */ +static void +epopen(Ep *ep) +{ + Ctlr *ctlr; + Ctlio *cio; + Qio *io; + int usbid; + + ctlr = ep->hp->aux; + deprint("ehci: epopen ep%d.%d\n", ep->dev->nb, ep->nb); + if(ep->aux != nil) + panic("ehci: epopen called with open ep"); + if(waserror()){ + free(ep->aux); + ep->aux = nil; + nexterror(); + } + switch(ep->ttype){ + case Tnone: + error("endpoint not configured"); + case Tiso: + ep->aux = smalloc(sizeof(Isoio)); + isoopen(ctlr, ep); + break; + case Tctl: + cio = ep->aux = smalloc(sizeof(Ctlio)); + cio->debug = ep->debug; + cio->ndata = -1; + cio->data = nil; + if(ep->dev->isroot != 0 && ep->nb == 0) /* root hub */ + break; + cio->qh = qhalloc(ctlr, ep, cio, "epc"); + break; + case Tbulk: + ep->pollival = 1; /* assume this; doesn't really matter */ + /* and fall... */ + case Tintr: + io = ep->aux = smalloc(sizeof(Qio)*2); + io[OREAD].debug = io[OWRITE].debug = ep->debug; + usbid = (ep->nb&Epmax) << 7 | ep->dev->nb &Devmax; + assert(ep->pollival != 0); + if(ep->mode != OREAD){ + if(ep->toggle[OWRITE] != 0) + io[OWRITE].toggle = Tddata1; + else + io[OWRITE].toggle = Tddata0; + io[OWRITE].tok = Tdtokout; + io[OWRITE].usbid = usbid; + io[OWRITE].bw = ep->maxpkt*1000/ep->pollival; /* bytes/s */ + io[OWRITE].qh = qhalloc(ctlr, ep, io+OWRITE, "epw"); + } + if(ep->mode != OWRITE){ + if(ep->toggle[OREAD] != 0) + io[OREAD].toggle = Tddata1; + else + io[OREAD].toggle = Tddata0; + io[OREAD].tok = Tdtokin; + io[OREAD].usbid = usbid; + io[OREAD].bw = ep->maxpkt*1000/ep->pollival; /* bytes/s */ + io[OREAD].qh = qhalloc(ctlr, ep, io+OREAD, "epr"); + } + break; + } + coherence(); + if(ehcidebug>1 || ep->debug) + dump(ep->hp); + deprint("ehci: epopen done\n"); + poperror(); +} + +static void +cancelio(Ctlr *ctlr, Qio *io) +{ + Qh *qh; + + ilock(ctlr); + qh = io->qh; + if(qh == nil || qh->state == Qclose){ + iunlock(ctlr); + return; + } + dqprint("ehci: cancelio for qh %#p state %s\n", + qh, qhsname[qh->state]); + aborttds(qh); + qh->state = Qclose; + iunlock(ctlr); + while(waserror()) + ; + tsleep(&up->sleep, return0, 0, Abortdelay); + poperror(); + wakeup(io); + qlock(io); + /* wait for epio if running */ + if(io->qh == qh) + io->qh = nil; + qunlock(io); + + qhfree(ctlr, qh); +} + +static void +cancelisoio(Ctlr *ctlr, Isoio *iso, int pollival, ulong load) +{ + int frno, i, n, t, w, woff; + ulong *lp, *tp; + Isoio **il; + Itd *td; + Sitd *std; + + ilock(ctlr); + if(iso->state == Qclose){ + iunlock(ctlr); + return; + } + ctlr->nreqs--; + if(iso->state != Qrun && iso->state != Qdone) + panic("bad iso state"); + iso->state = Qclose; + coherence(); + if(ctlr->isoload < load) + panic("ehci: low isoload"); + ctlr->isoload -= load; + ctlr->load -= load; + for(il = &ctlr->iso; *il != nil; il = &(*il)->next) + if(*il == iso) + break; + if(*il == nil) + panic("cancleiso: not found"); + *il = iso->next; + + frno = iso->td0frno; + for(i = 0; i < iso->nframes; i++){ + tp = iso->tdps[frno]; + if(iso->hs != 0){ + td = iso->itdps[frno]; + for(t = 0; t < nelem(td->csw); t++) + td->csw[t] &= ~(Itdioc|Itdactive); + }else{ + std = iso->sitdps[frno]; + std->csw &= ~(Stdioc|Stdactive); + } + coherence(); + for(lp = &ctlr->frames[frno]; !(*lp & Lterm); + lp = &LPTR(*lp)[0]) + if(LPTR(*lp) == tp) + break; + if(*lp & Lterm) + panic("cancelisoio: td not found"); + *lp = tp[0]; + /* + * Iso uses a virtual frame window of Nisoframes, and we must + * restore pointers in copies of the window kept at ctlr->frames. + */ + if(lp == &ctlr->frames[frno]){ + n = ctlr->nframes / Nisoframes; + for(w = 1; w < n; w++){ + woff = w * Nisoframes; + ctlr->frames[woff+frno] = *lp; + } + } + coherence(); + frno = TRUNC(frno+pollival, Nisoframes); + } + iunlock(ctlr); + + /* + * wakeup anyone waiting for I/O and + * wait to be sure no I/O is in progress in the controller. + * and then wait to be sure episo* is no longer running. + */ + wakeup(iso); + diprint("cancelisoio iso %#p waiting for I/O to cease\n", iso); + tsleep(&up->sleep, return0, 0, 5); + qlock(iso); + qunlock(iso); + diprint("cancelisoio iso %#p releasing iso\n", iso); + + frno = iso->td0frno; + for(i = 0; i < iso->nframes; i++){ + if(iso->hs != 0) + itdfree(iso->itdps[frno]); + else + sitdfree(iso->sitdps[frno]); + iso->tdps[frno] = nil; + frno = TRUNC(frno+pollival, Nisoframes); + } + free(iso->tdps); + iso->tdps = nil; + free(iso->data); + iso->data = nil; + coherence(); +} + +static void +epclose(Ep *ep) +{ + Qio *io; + Ctlio *cio; + Isoio *iso; + Ctlr *ctlr; + + ctlr = ep->hp->aux; + deprint("ehci: epclose ep%d.%d\n", ep->dev->nb, ep->nb); + + if(ep->aux == nil) + panic("ehci: epclose called with closed ep"); + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + cancelio(ctlr, cio); + free(cio->data); + cio->data = nil; + break; + case Tintr: + case Tbulk: + io = ep->aux; + ep->toggle[OREAD] = ep->toggle[OWRITE] = 0; + if(ep->mode != OWRITE){ + cancelio(ctlr, &io[OREAD]); + if(io[OREAD].toggle == Tddata1) + ep->toggle[OREAD] = 1; + } + if(ep->mode != OREAD){ + cancelio(ctlr, &io[OWRITE]); + if(io[OWRITE].toggle == Tddata1) + ep->toggle[OWRITE] = 1; + } + coherence(); + break; + case Tiso: + iso = ep->aux; + cancelisoio(ctlr, iso, ep->pollival, ep->load); + break; + default: + panic("epclose: bad ttype"); + } + free(ep->aux); + ep->aux = nil; +} + +/* + * return smallest power of 2 >= n + */ +static int +flog2(int n) +{ + int i; + + for(i = 0; (1 << i) < n; i++) + ; + return i; +} + +/* + * build the periodic scheduling tree: + * framesize must be a multiple of the tree size + */ +static void +mkqhtree(Ctlr *ctlr) +{ + int i, n, d, o, leaf0, depth; + ulong leafs[Nintrleafs]; + Qh *qh; + Qh **tree; + Qtree *qt; + + depth = flog2(Nintrleafs); + n = (1 << (depth+1)) - 1; + qt = mallocz(sizeof(*qt), 1); + if(qt == nil) + panic("ehci: mkqhtree: no memory"); + qt->nel = n; + qt->depth = depth; + qt->bw = mallocz(n * sizeof(qt->bw), 1); + qt->root = tree = mallocz(n * sizeof(Qh *), 1); + if(qt->bw == nil || tree == nil) + panic("ehci: mkqhtree: no memory"); + for(i = 0; i < n; i++){ + tree[i] = qh = edalloc(); + if(qh == nil) + panic("ehci: mkqhtree: no memory"); + qh->nlink = qh->alink = qh->link = Lterm; + qh->csw = Tdhalt; + qh->state = Qidle; + coherence(); + if(i > 0) + qhlinkqh(tree[i], tree[(i-1)/2]); + } + ctlr->ntree = i; + dprint("ehci: tree: %d endpoints allocated\n", i); + + /* distribute leaves evenly round the frame list */ + leaf0 = n / 2; + for(i = 0; i < Nintrleafs; i++){ + o = 0; + for(d = 0; d < depth; d++){ + o <<= 1; + if(i & (1 << d)) + o |= 1; + } + if(leaf0 + o >= n){ + print("leaf0=%d o=%d i=%d n=%d\n", leaf0, o, i, n); + break; + } + leafs[i] = PADDR(tree[leaf0 + o]) | Lqh; + } + assert((ctlr->nframes % Nintrleafs) == 0); + for(i = 0; i < ctlr->nframes; i += Nintrleafs){ + memmove(ctlr->frames + i, leafs, sizeof leafs); + coherence(); + } + ctlr->tree = qt; + coherence(); +} + +void +ehcimeminit(Ctlr *ctlr) +{ + int i, frsize; + Eopio *opio; + + opio = ctlr->opio; + frsize = ctlr->nframes * sizeof(ulong); + assert((frsize & 0xFFF) == 0); /* must be 4k aligned */ + ctlr->frames = mallocalign(frsize, frsize, 0, 0); + if(ctlr->frames == nil) + panic("ehci reset: no memory"); + + for (i = 0; i < ctlr->nframes; i++) + ctlr->frames[i] = Lterm; + opio->frbase = PADDR(ctlr->frames); + opio->frno = 0; + coherence(); + + qhalloc(ctlr, nil, nil, nil); /* init async list */ + mkqhtree(ctlr); /* init sync list */ + edfree(edalloc()); /* try to get some ones pre-allocated */ + + dprint("ehci %#p flb %#ux frno %#ux\n", + ctlr->capio, opio->frbase, opio->frno); +} + +static void +init(Hci *hp) +{ + Ctlr *ctlr; + Eopio *opio; + int i; + static int ctlrno; + + hp->highspeed = 1; + ctlr = hp->aux; + opio = ctlr->opio; + dprint("ehci %#p init\n", ctlr->capio); + + ilock(ctlr); + /* + * Unless we activate frroll interrupt + * some machines won't post other interrupts. + */ + opio->intr = Iusb|Ierr|Iportchg|Ihcerr|Iasync; + coherence(); + opio->cmd |= Cpse; + coherence(); + opio->cmd |= Case; + coherence(); + ehcirun(ctlr, 1); + + /* route all ports to us */ + opio->config = Callmine; + coherence(); + + for (i = 0; i < hp->nports; i++) + opio->portsc[i] = Pspower; + iunlock(ctlr); + if(ehcidebug > 1) + dump(hp); + ctlrno++; +} + +void +ehcilinkage(Hci *hp) +{ + hp->init = init; + hp->dump = dump; + hp->interrupt = interrupt; + hp->epopen = epopen; + hp->epclose = epclose; + hp->epread = epread; + hp->epwrite = epwrite; + hp->seprintep = seprintep; + hp->portenable = portenable; + hp->portreset = portreset; + hp->portstatus = portstatus; +// hp->shutdown = shutdown; +// hp->debug = setdebug; + hp->type = "ehci"; +} diff -Nru /sys/src/9k/port/watermarks.c /sys/src/9k/port/watermarks.c --- /sys/src/9k/port/watermarks.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/port/watermarks.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,40 @@ +/* + * high-watermark measurements + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +void +initmark(Watermark *wp, int max, char *name) +{ + memset(wp, 0, sizeof *wp); + wp->max = max; + wp->name = name; +} + +void +notemark(Watermark *wp, int val) +{ + /* enforce obvious limits */ + if (val < 0) + val = 0; + else if (val > wp->max) + val = wp->max; + + if (val > wp->highwater) { + wp->highwater = val; + if (val == wp->max && wp->curr < val) + wp->hitmax++; + } + wp->curr = val; +} + +char * +seprintmark(char *buf, char *ebuf, Watermark *wp) +{ + return seprint(buf, ebuf, "%s:\thighwater %d/%d curr %d hitmax %d\n", + wp->name, wp->highwater, wp->max, wp->curr, wp->hitmax); +} diff -Nru /sys/src/9k/root/blow.c /sys/src/9k/root/blow.c --- /sys/src/9k/root/blow.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/root/blow.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,297 @@ +#include +#include + +#define ESTR 256 + +static void +error(char* fmt, ...) +{ + va_list v; + char *e, estr[ESTR], *p; + + va_start(v, fmt); + e = estr + ESTR; + p = seprint(estr, e, "%s: ", argv0); + p = vseprint(p, e, fmt, v); + p = seprint(p, e, "\n"); + va_end(v); + + write(2, estr, p-estr); +} + +static void +fatal(char* fmt, ...) +{ + va_list v; + char *e, estr[ESTR], *p; + + va_start(v, fmt); + e = estr + ESTR; + p = seprint(estr, e, "%s: ", argv0); + p = vseprint(p, e, fmt, v); + p = seprint(p, e, "\n"); + va_end(v); + + write(2, estr, p - estr); + exits("fatal"); +} + +static void +usage(void) +{ + char *e, estr[ESTR], *p; + + e = estr + ESTR; + p = seprint(estr, e, "usage: %s" + " [whatever]" + "\n", + argv0); + write(2, estr, p-estr); + exits("usage"); +} + +#define F(v, o, w) (((v) & ((1<<(w))-1))<<(o)) + +enum { + X = 0, /* dimension */ + Y = 1, + Z = 2, + N = 3, + + Chunk = 32, /* granularity of FIFO */ + Pchunk = 8, /* Chunks in a packet */ + + Quad = 16, +}; + +/* + * Packet header. The hardware requires an 8-byte header + * of which the last two are reserved (they contain a sequence + * number and a header checksum inserted by the hardware). + * The hardware also requires the packet to be aligned on a + * 128-bit boundary for loading into the HUMMER. + */ +typedef struct Tpkt Tpkt; +struct Tpkt { + u8int sk; /* Skip Checksum Control */ + u8int hint; /* Hint|Dp|Pid0 */ + u8int size; /* Size|Pid1|Dm|Dy|VC */ + u8int dst[N]; /* Destination Coordinates */ + u8int _6_[2]; /* reserved */ + u8int _8_[8]; /* protocol header */ + u8int payload[]; +}; + +/* + * SKIP is a field in .sk giving the number of 2-bytes + * to skip from the top of the packet before including + * the packet bytes into the running checksum. + * SIZE is a field in .size giving the size of the + * packet in 32-byte 'chunks'. + */ +#define SKIP(n) F(n, 1, 7) +#define SIZE(n) F(n, 5, 3) + +enum { + Sk = 0x01, /* Skip Checksum */ + + Pid0 = 0x01, /* Destination Group FIFO MSb */ + Dp = 0x02, /* Multicast Deposit */ + Hzm = 0x04, /* Z- Hint */ + Hzp = 0x08, /* Z+ Hint */ + Hym = 0x10, /* Y- Hint */ + Hyp = 0x20, /* Y+ Hint */ + Hxm = 0x40, /* X- Hint */ + Hxp = 0x80, /* X+ Hint */ + + Vcd0 = 0x00, /* Dynamic 0 VC */ + Vcd1 = 0x01, /* Dynamic 1 VC */ + Vcbn = 0x02, /* Deterministic Bubble VC */ + Vcbp = 0x03, /* Deterministic Priority VC */ + Dy = 0x04, /* Dynamic Routing */ + Dm = 0x08, /* DMA Mode */ + Pid1 = 0x10, /* Destination Group FIFO LSb */ +}; + +static int +torusparse(u8int d[3], char* item, char* buf) +{ + int n; + char *p; + + if((p = strstr(buf, item)) == nil || (p != buf && *(p-1) != '\n')) + return -1; + n = strlen(item); + if(strlen(p) < n+sizeof(": x 0 y 0 z 0")) + return -1; + p += n+sizeof(": x ")-1; + if(strncmp(p-4, ": x ", 4) != 0) + return -1; + if((n = strtol(p, &p, 0)) > 255 || *p != ' ' || *(p+1) != 'y') + return -1; + d[0] = n; + if((n = strtol(p+2, &p, 0)) > 255 || *p != ' ' || *(p+1) != 'z') + return -1; + d[1] = n; + if((n = strtol(p+2, &p, 0)) > 255 || (*p != '\n' && *p != '\0')) + return -1; + d[2] = n; + + return 0; +} + +void +main(int argc, char* argv[]) +{ + Tpkt *tpkt; + u8int d[N]; + char buf[512], *p; + uvlong r, start, stop; + int count, fd, i, length, mhz, n, x, y, z, oldstyle; + int tracefd; + int procs = 1; + int rank = 0; + int pri = 19; + + count = 1; + oldstyle = 0; + length = Pchunk*Chunk; + mhz = 850; + + ARGBEGIN{ + default: + usage(); + break; + case 'l': + p = EARGF(usage()); + if((n = strtol(argv[0], &p, 0)) <= 0 || p == argv[0] || *p != 0) + usage(); + if(n <= Chunk) + usage(); + if(oldstyle){ + if(n % Chunk) + usage(); + } + length = n; + break; + case 'm': + p = EARGF(usage()); + if((n = strtol(argv[0], &p, 0)) <= 0 || p == argv[0] || *p != 0) + usage(); + mhz = n; + break; + case 'n': + p = EARGF(usage()); + if((n = strtol(argv[0], &p, 0)) <= 0 || p == argv[0] || *p != 0) + usage(); + count = n; + break; + case 'p': + p = EARGF(usage()); + if((n = strtol(argv[0], &p, 0)) <= 0 || n > 4 || p == argv[0] || *p != 0) + usage(); + procs = n; + break; + }ARGEND; + + if(argc != 3) + usage(); + if((x = strtol(argv[0], &p, 0)) < 0 || *p != 0) + fatal("x invalid: %d\n", argv[0]); + if((y = strtol(argv[1], &p, 0)) < 0 || *p != 0) + fatal("y invalid: %d\n", argv[1]); + if((z = strtol(argv[2], &p, 0)) <= 0 || *p != 0) + fatal("z invalid: %d\n", argv[2]); + z -= 1; + + if((fd = open("/dev/torusstatus", OREAD)) < 0) + fatal("open /dev/torusstatus: %r\n"); + if((n = read(fd, buf, sizeof(buf))) < 0) + fatal("read /dev/torusstatus: %r\n"); + close(fd); + buf[n] = 0; + + if(torusparse(d, "size", buf) < 0) + fatal("parse /dev/torusstatus: <%s>\n", buf); + if(x >= d[X] || y >= d[Y] || z >= d[Z]) + fatal("destination out of range: %d.%d.%d >= %d.%d.%d", + x, y, z, d[X], d[Y], d[Z]); + + if((tpkt = mallocalign(length, Chunk, 0, 0)) == nil) + fatal("mallocalign tpkt\n"); + memset(tpkt, 0, length); + + tpkt->sk = SKIP(4); + tpkt->hint = 0; + tpkt->size = SIZE(Pchunk-1)|Dy|Vcd0; + tpkt->dst[X] = x; + tpkt->dst[Y] = y; + tpkt->dst[Z] = z; + + if((fd = open("/dev/torus", ORDWR)) < 0) + fatal("open /dev/torus: %r\n"); + + tracefd = open("/dev/tracectl", ORDWR); + if (tracefd < 0) + print("Warning: no trace device, no traces\n"); + + /* fork at bottom of loop since we are proc 0 */ + for(i = 0; i < procs; i++) { + int me = getpid(); + char *name = smprint("/proc/%d/ctl", me); + int procfd = open(name, ORDWR); + char *cmd; + int amt; + assert (procfd > 0); + rank = i; + //print("Wired to %d\n", (rank+1)%4); + cmd = smprint("wired %d\n", (rank+1)%4); + amt = write(procfd, cmd, strlen(cmd)); + assert(amt >= strlen(cmd)); + + if (pri) { + //print("Pri to %d\n", pri); + cmd = smprint("fixedpri %d\n", pri); + amt = write(procfd, cmd, strlen(cmd)); + assert(amt >= strlen(cmd)); + } + + if (i < procs-1) + if (fork()) + break; + } + + + sleep(1000); /* sync up forked processes) */ + + if (tracefd > 0 && rank == 0) + if (write(tracefd, "start", 6) < 6) + print("Warning: could not start trace device\n"); + + cycles(&start); + for(i = 0; i < count; i++){ + n = pwrite(fd, tpkt, length, 0); + if(n < 0) + fatal("write /dev/torus: %r\n", n); + else if(n < length) + fatal("write /dev/torus: short write %d\n", n); + } + cycles(&stop); + + /* we may chop some off but tough */ + if (tracefd > 0 && rank == 0) + if(write(tracefd, "stop", 5) < 5) + print("Warning: could not stop trace device\n"); +; + + close(fd); + + r = (count*length); + r *= mhz; + r /= stop - start; + + print("%d writes of %d in %llud cycles @ %dMHz = %llud MB/s\n", + count, length, stop - start, mhz, r); + + exits(0); +} diff -Nru /sys/src/9k/root/common /sys/src/9k/root/common --- /sys/src/9k/root/common Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/root/common Wed Dec 9 00:00:00 2015 @@ -0,0 +1,303 @@ +# +# The master for this file is /n/fs/lib/ndb/common +# + +# +# real dns root server ips +# +dom=A.ROOT-SERVERS.NET ip=198.41.0.4 +dom=B.ROOT-SERVERS.NET ip=128.9.0.107 +dom=C.ROOT-SERVERS.NET ip=192.33.4.12 +dom=D.ROOT-SERVERS.NET ip=128.8.10.90 +dom=E.ROOT-SERVERS.NET ip=192.203.230.10 +dom=F.ROOT-SERVERS.NET ip=192.5.5.241 +dom=G.ROOT-SERVERS.NET ip=192.112.36.4 +dom=H.ROOT-SERVERS.NET ip=128.63.2.53 +dom=I.ROOT-SERVERS.NET ip=192.36.148.17 +dom=J.ROOT-SERVERS.NET ip=198.41.0.10 +dom=K.ROOT-SERVERS.NET ip=193.0.14.129 +dom=L.ROOT-SERVERS.NET ip=198.32.64.12 # old + ip=199.7.83.42 # new, nov 2007 +dom=M.ROOT-SERVERS.NET ip=202.12.27.33 + +dom=a.gtld-servers.net ip=192.5.6.30 +dom=b.gtld-servers.net ip=192.33.14.30 +dom=c.gtld-servers.net ip=192.26.92.30 +dom=d.gtld-servers.net ip=192.31.80.30 +dom=e.gtld-servers.net ip=192.12.94.30 +dom=f.gtld-servers.net ip=192.35.51.30 +dom=g.gtld-servers.net ip=192.42.93.30 +dom=h.gtld-servers.net ip=192.54.112.30 +dom=i.gtld-servers.net ip=192.43.172.30 +dom=j.gtld-servers.net ip=192.48.79.30 +dom=k.gtld-servers.net ip=192.52.178.30 +dom=l.gtld-servers.net ip=192.41.162.30 +dom=m.gtld-servers.net ip=192.55.83.30 + +# +# spam defense. unfortunately, arin doesn't give negative +# rcodes for these non-routable addresses. we'll do it for them. +# +dom=10.in-addr.arpa soa= # rfc1918 zones + dom=16.172.in-addr.arpa soa= + dom=17.172.in-addr.arpa soa= + dom=18.172.in-addr.arpa soa= + dom=19.172.in-addr.arpa soa= + dom=20.172.in-addr.arpa soa= + dom=21.172.in-addr.arpa soa= + dom=22.172.in-addr.arpa soa= + dom=23.172.in-addr.arpa soa= + dom=24.172.in-addr.arpa soa= + dom=25.172.in-addr.arpa soa= + dom=26.172.in-addr.arpa soa= + dom=27.172.in-addr.arpa soa= + dom=28.172.in-addr.arpa soa= + dom=29.172.in-addr.arpa soa= + dom=30.172.in-addr.arpa soa= + dom=31.172.in-addr.arpa soa= + dom=168.192.in-addr.arpa soa= + dom=0.in-addr.arpa soa= # rfc3330 zones + dom=127.in-addr.arpa soa= + dom=254.169.in-addr.arpa soa= + dom=2.0.192.in-addr.arpa soa= + dom=255.255.255.255.in-addr.arpa soa= + dom=d.f.ip6.arpa soa= # rfc4193 recommendation + dom=0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa soa= # rfc4291 zones + dom=1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa soa= + dom=8.e.f.ip6.arpa soa= + dom=9.e.f.ip6.arpa soa= + dom=a.e.f.ip6.arpa soa= + dom=b.e.f.ip6.arpa soa= + refresh=3600 ttl=3600 + ns=ns1.cs.bell-labs.com + ns=ns2.cs.bell-labs.com + +# +# ip protocol numbers +# +protocol=reserved ipv4proto=0 +protocol=icmp ipv4proto=1 +protocol=igmp ipv4proto=2 +protocol=ggp ipv4proto=3 +protocol=ip ipv4proto=4 +protocol=st ipv4proto=5 +protocol=tcp ipv4proto=6 +protocol=ucl ipv4proto=7 +protocol=egp ipv4proto=8 +protocol=igp ipv4proto=9 +protocol=bbn-rcc-mon ipv4proto=10 +protocol=nvp-ii ipv4proto=11 +protocol=pup ipv4proto=12 +protocol=argus ipv4proto=13 +protocol=emcon ipv4proto=14 +protocol=xnet ipv4proto=15 +protocol=chaos ipv4proto=16 +protocol=udp ipv4proto=17 +protocol=mux ipv4proto=18 +protocol=dcn-meas ipv4proto=19 +protocol=hmp ipv4proto=20 +protocol=prm ipv4proto=21 +protocol=xns-idp ipv4proto=22 +protocol=trunk-1 ipv4proto=23 +protocol=trunk-2 ipv4proto=24 +protocol=leaf-1 ipv4proto=25 +protocol=leaf-2 ipv4proto=26 +protocol=rdp ipv4proto=27 +protocol=irtp ipv4proto=28 +protocol=iso-tp4 ipv4proto=29 +protocol=netblt ipv4proto=30 +protocol=mfe-nsp ipv4proto=31 +protocol=merit-inp ipv4proto=32 +protocol=sep ipv4proto=33 +protocol=3pc ipv4proto=34 +protocol=idpr ipv4proto=35 +protocol=xtp ipv4proto=36 +protocol=ddp ipv4proto=37 +protocol=idpr-cmtp ipv4proto=38 +protocol=tp++ ipv4proto=39 +protocol=il ipv4proto=40 +protocol=sip ipv4proto=41 +protocol=sdrp ipv4proto=42 +protocol=sip-sr ipv4proto=43 +protocol=sip-frag ipv4proto=44 +protocol=idrp ipv4proto=45 +protocol=rsvp ipv4proto=46 +protocol=gre ipv4proto=47 +protocol=mhrp ipv4proto=48 +protocol=bna ipv4proto=49 +protocol=sipp-esp ipv4proto=50 +protocol=sipp-ah ipv4proto=51 +protocol=i-nlsp ipv4proto=52 +protocol=swipe ipv4proto=53 +protocol=nhrp ipv4proto=54 +protocol=any ipv4proto=61 +protocol=cftp ipv4proto=62 +protocol=any ipv4proto=63 +protocol=sat-expak ipv4proto=64 +protocol=kryptolan ipv4proto=65 +protocol=rvd ipv4proto=66 +protocol=ippc ipv4proto=67 +protocol=any ipv4proto=68 +protocol=sat-mon ipv4proto=69 +protocol=visa ipv4proto=70 +protocol=ipcv ipv4proto=71 +protocol=cpnx ipv4proto=72 +protocol=cphb ipv4proto=73 +protocol=wsn ipv4proto=74 +protocol=pvp ipv4proto=75 +protocol=br-sat-mon ipv4proto=76 +protocol=sun-nd ipv4proto=77 +protocol=wb-mon ipv4proto=78 +protocol=wb-expak ipv4proto=79 +protocol=iso-ip ipv4proto=80 +protocol=vmtp ipv4proto=81 +protocol=secure-vmtp ipv4proto=82 +protocol=vines ipv4proto=83 +protocol=ttp ipv4proto=84 +protocol=nsfnet-igp ipv4proto=85 +protocol=dgp ipv4proto=86 +protocol=tcf ipv4proto=87 +protocol=igrp ipv4proto=88 +protocol=ospfigp ipv4proto=89 protocol=ospf +protocol=sprite-rpc ipv4proto=90 +protocol=larp ipv4proto=91 +protocol=mtp ipv4proto=92 +protocol=ax.25 ipv4proto=93 +protocol=ipip ipv4proto=94 +protocol=micp ipv4proto=95 +protocol=scc-sp ipv4proto=96 +protocol=etherip ipv4proto=97 +protocol=encap ipv4proto=98 +protocol=any ipv4proto=99 +protocol=gmtp ipv4proto=100 +protocol=rudp ipv4proto=254 # unofficial + +# +# services +# +tcp=cs port=1 +tcp=echo port=7 +tcp=discard port=9 +tcp=systat port=11 +tcp=daytime port=13 +tcp=netstat port=15 +tcp=chargen port=19 +tcp=ftp-data port=20 +tcp=ftp port=21 +tcp=ssh port=22 +tcp=telnet port=23 +tcp=smtp port=25 +tcp=time port=37 +tcp=whois port=43 +tcp=dns port=53 +tcp=domain port=53 +tcp=uucp port=64 +tcp=gopher port=70 +tcp=rje port=77 +tcp=finger port=79 +tcp=http port=80 +tcp=link port=87 +tcp=supdup port=95 +tcp=hostnames port=101 +tcp=iso-tsap port=102 +tcp=x400 port=103 +tcp=x400-snd port=104 +tcp=csnet-ns port=105 +tcp=pop-2 port=109 +tcp=pop3 port=110 +tcp=portmap port=111 +tcp=uucp-path port=117 +tcp=nntp port=119 +tcp=netbios port=139 +tcp=imap4 port=143 +tcp=imap port=143 +tcp=NeWS port=144 +tcp=print-srv port=170 +tcp=z39.50 port=210 +tcp=fsb port=400 +tcp=sysmon port=401 +tcp=proxy port=402 +tcp=proxyd port=404 +tcp=https port=443 +tcp=cifs port=445 +tcp=ssmtp port=465 +tcp=rexec port=512 restricted= +tcp=login port=513 restricted= +tcp=shell port=514 restricted= +tcp=printer port=515 +tcp=ncp port=524 +tcp=courier port=530 +tcp=cscan port=531 +tcp=uucp port=540 +tcp=snntp port=563 +tcp=9fs port=564 +tcp=whoami port=565 +tcp=guard port=566 +tcp=ticket port=567 +tcp=fmclient port=729 +tcp=imaps port=993 +tcp=pop3s port=995 +tcp=ingreslock port=1524 +tcp=pptp port=1723 +tcp=nfs port=2049 +tcp=webster port=2627 +tcp=weather port=3000 +tcp=sip port=5060 +tcp=sips port=5061 +tcp=secstore port=5356 +tcp=vnc-http port=5800 +tcp=vnc port=5900 +tcp=Xdisplay port=6000 +tcp=styx port=6666 +tcp=mpeg port=6667 +tcp=rstyx port=6668 +tcp=infdb port=6669 +tcp=infsigner port=6671 +tcp=infcsigner port=6672 +tcp=inflogin port=6673 +tcp=bandt port=7330 +tcp=face port=32000 +tcp=dhashgate port=11978 +tcp=exportfs port=17007 +tcp=rexexec port=17009 +tcp=ncpu port=17010 +tcp=cpu port=17013 +tcp=venti port=17034 +tcp=wiki port=17035 +tcp=vica port=17036 + +udp=echo port=7 +udp=tacacs port=49 +udp=tftp port=69 +udp=bootpc port=68 +udp=bootp port=67 +udp=domain port=53 +udp=dns port=53 +udp=portmap port=111 +udp=ntp port=123 +udp=netbios-ns port=137 +udp=snmp port=161 +udp=ikev2 port=500 +udp=syslog port=514 +udp=rip port=520 +udp=dhcp6c port=546 +udp=dhcp6s port=547 +udp=nfs port=2049 +udp=bfs port=2201 +udp=virgil port=2202 +udp=sip port=5060 +udp=bandt2 port=7331 +udp=oradius port=1812 +udp=radius port=1812 +udp=dhash port=11977 +udp=ulctl port=12666 +udp=uldata port=12667 +udp=dldata port=12668 + +gre=ppp port=34827 + +# +# authdom declarations need to be visible on the inside network, +# even for outside machines. putting them here ensures +# their visibility everywhere. +# diff -Nru /sys/src/9k/root/cpu.c /sys/src/9k/root/cpu.c --- /sys/src/9k/root/cpu.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/root/cpu.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,1153 @@ +/* + * cpu.c - Make a connection to a cpu server + * + * Invoked by listen as 'cpu -R | -N service net netdir' + * by users as 'cpu [-h system] [-c cmd args ...]' + */ + +#include +#include +#include +#include +#include +#include + +#define Maxfdata 8192 +#define MaxStr 128 + +void remoteside(int); +void fatal(int, char*, ...); +void lclnoteproc(int); +void rmtnoteproc(void); +void catcher(void*, char*); +void usage(void); +void writestr(int, char*, char*, int); +int readstr(int, char*, int); +char *rexcall(int*, char*, char*); +int setamalg(char*); +char *keyspec = ""; + +int notechan; +int exportpid; +char *system; +int cflag; +int dbg; +char *user; +char *patternfile; + +char *srvname = "ncpu"; +char *exportfs = "/bin/exportfs"; +char *ealgs = "rc4_256 sha1"; + +/* message size for exportfs; may be larger so we can do big graphics in CPU window */ +int msgsize = Maxfdata+IOHDRSZ; + +/* authentication mechanisms */ +static int netkeyauth(int); +static int netkeysrvauth(int, char*); +static int p9auth(int); +static int srvp9auth(int, char*); +static int noauth(int); +static int srvnoauth(int, char*); + +typedef struct AuthMethod AuthMethod; +struct AuthMethod { + char *name; /* name of method */ + int (*cf)(int); /* client side authentication */ + int (*sf)(int, char*); /* server side authentication */ +} authmethod[] = +{ + { "p9", p9auth, srvp9auth,}, + { "netkey", netkeyauth, netkeysrvauth,}, + { "none", noauth, srvnoauth,}, + { nil, nil} +}; +AuthMethod *am = authmethod; /* default is p9 */ + +char *p9authproto = "p9any"; + +int setam(char*); + +void +usage(void) +{ + fprint(2, "usage: cpu [-h system] [-u user] [-a authmethod] [-e 'crypt hash'] [-k keypattern] [-P patternfile] [-c cmd args ...]\n"); + exits("usage"); +} + +void +main(int argc, char **argv) +{ + char dat[MaxStr], buf[MaxStr], cmd[MaxStr], *p, *err; + int ac, fd, ms, data; + char *av[10]; + + /* see if we should use a larger message size */ + fd = open("/dev/draw", OREAD); + if(fd > 0){ + ms = iounit(fd); + if(msgsize < ms+IOHDRSZ) + msgsize = ms+IOHDRSZ; + close(fd); + } + + user = getuser(); + if(user == nil) + fatal(1, "can't read user name"); + ARGBEGIN{ + case 'a': + p = EARGF(usage()); + if(setam(p) < 0) + fatal(0, "unknown auth method %s", p); + break; + case 'e': + ealgs = EARGF(usage()); + if(*ealgs == 0 || strcmp(ealgs, "clear") == 0) + ealgs = nil; + break; + case 'd': + dbg++; + break; + case 'f': + /* ignored but accepted for compatibility */ + break; + case 'O': + p9authproto = "p9sk2"; + remoteside(1); /* From listen */ + break; + case 'R': /* From listen */ + remoteside(0); + break; + case 'h': + system = EARGF(usage()); + break; + case 'c': + cflag++; + cmd[0] = '!'; + cmd[1] = '\0'; + while(p = ARGF()) { + strcat(cmd, " "); + strcat(cmd, p); + } + break; + case 'k': + keyspec = smprint("%s %s", keyspec, EARGF(usage())); + break; + case 'P': + patternfile = EARGF(usage()); + break; + case 'u': + user = EARGF(usage()); + keyspec = smprint("%s user=%s", keyspec, user); + break; + default: + usage(); + }ARGEND; + + + if(argc != 0) + usage(); + + if(system == nil) { + p = getenv("cpu"); + if(p == 0) + fatal(0, "set $cpu"); + system = p; + } + + if(err = rexcall(&data, system, srvname)) + fatal(1, "%s: %s", err, system); + + /* Tell the remote side the command to execute and where our working directory is */ + if(cflag) + writestr(data, cmd, "command", 0); + if(getwd(dat, sizeof(dat)) == 0) + writestr(data, "NO", "dir", 0); + else + writestr(data, dat, "dir", 0); + + /* start up a process to pass along notes */ + lclnoteproc(data); + + /* + * Wait for the other end to execute and start our file service + * of /mnt/term + */ + if(readstr(data, buf, sizeof(buf)) < 0) + fatal(1, "waiting for FS: %r"); + if(strncmp("FS", buf, 2) != 0) { + print("remote cpu: %s", buf); + exits(buf); + } + + /* Begin serving the gnot namespace */ + close(0); + dup(data, 0); + close(data); + + sprint(buf, "%d", msgsize); + ac = 0; + av[ac++] = exportfs; + av[ac++] = "-m"; + av[ac++] = buf; + if(dbg) + av[ac++] = "-d"; + if(patternfile != nil){ + av[ac++] = "-P"; + av[ac++] = patternfile; + } + av[ac] = nil; + exec(exportfs, av); + fatal(1, "starting exportfs"); +} + +void +fatal(int syserr, char *fmt, ...) +{ + Fmt f; + char *str; + va_list arg; + + fmtstrinit(&f); + fmtprint(&f, "cpu: "); + va_start(arg, fmt); + fmtvprint(&f, fmt, arg); + va_end(arg); + if(syserr) + fmtprint(&f, ": %r"); + fmtprint(&f, "\n"); + str = fmtstrflush(&f); + write(2, str, strlen(str)); + exits(str); +} + +char *negstr = "negotiating authentication method"; + +char bug[256]; + +int +old9p(int fd) +{ + int p[2]; + + if(pipe(p) < 0) + fatal(1, "pipe"); + + switch(rfork(RFPROC|RFFDG|RFNAMEG)) { + case -1: + fatal(1, "rfork srvold9p"); + case 0: + if(fd != 1){ + dup(fd, 1); + close(fd); + } + if(p[0] != 0){ + dup(p[0], 0); + close(p[0]); + } + close(p[1]); + if(0){ + fd = open("/sys/log/cpu", OWRITE); + if(fd != 2){ + dup(fd, 2); + close(fd); + } + execl("/bin/srvold9p", "srvold9p", "-ds", nil); + } else + execl("/bin/srvold9p", "srvold9p", "-s", nil); + fatal(1, "exec srvold9p"); + default: + close(fd); + close(p[0]); + } + return p[1]; +} + +/* Invoked with stdin, stdout and stderr connected to the network connection */ +void +remoteside(int old) +{ + char user[MaxStr], home[MaxStr], buf[MaxStr], xdir[MaxStr], cmd[MaxStr]; + int i, n, fd, badchdir, gotcmd; + + rfork(RFENVG); + putenv("service", "cpu"); + fd = 0; + + /* negotiate authentication mechanism */ + n = readstr(fd, cmd, sizeof(cmd)); + if(n < 0) + fatal(1, "authenticating"); + if(setamalg(cmd) < 0){ + writestr(fd, "unsupported auth method", nil, 0); + fatal(1, "bad auth method %s", cmd); + } else + writestr(fd, "", "", 1); + + fd = (*am->sf)(fd, user); + if(fd < 0) + fatal(1, "srvauth"); + + /* Set environment values for the user */ + putenv("user", user); + sprint(home, "/usr/%s", user); + putenv("home", home); + + /* Now collect invoking cpu's current directory or possibly a command */ + gotcmd = 0; + if(readstr(fd, xdir, sizeof(xdir)) < 0) + fatal(1, "dir/cmd"); + if(xdir[0] == '!') { + strcpy(cmd, &xdir[1]); + gotcmd = 1; + if(readstr(fd, xdir, sizeof(xdir)) < 0) + fatal(1, "dir"); + } + + /* Establish the new process at the current working directory of the + * gnot */ + badchdir = 0; + if(strcmp(xdir, "NO") == 0) + chdir(home); + else if(chdir(xdir) < 0) { + badchdir = 1; + chdir(home); + } + + /* Start the gnot serving its namespace */ + writestr(fd, "FS", "FS", 0); + writestr(fd, "/", "exportfs dir", 0); + + n = read(fd, buf, sizeof(buf)); + if(n != 2 || buf[0] != 'O' || buf[1] != 'K') + exits("remote tree"); + + if(old) + fd = old9p(fd); + + /* make sure buffers are big by doing fversion explicitly; pick a huge number; other side will trim */ + strcpy(buf, VERSION9P); + if(fversion(fd, 64*1024, buf, sizeof buf) < 0) + exits("fversion failed"); + if(mount(fd, -1, "/mnt/term", MCREATE|MREPL, "") < 0) + exits("mount failed"); + + close(fd); + + /* the remote noteproc uses the mount so it must follow it */ + rmtnoteproc(); + + for(i = 0; i < 3; i++) + close(i); + + if(open("/mnt/term/dev/cons", OREAD) != 0) + exits("open stdin"); + if(open("/mnt/term/dev/cons", OWRITE) != 1) + exits("open stdout"); + dup(1, 2); + + if(badchdir) + print("cpu: failed to chdir to '%s'\n", xdir); + + if(gotcmd) + execl("/bin/rc", "rc", "-lc", cmd, nil); + else + execl("/bin/rc", "rc", "-li", nil); + fatal(1, "exec shell"); +} + +char* +rexcall(int *fd, char *host, char *service) +{ + char *na; + char dir[MaxStr]; + char err[ERRMAX]; + char msg[MaxStr]; + int n; + + na = netmkaddr(host, 0, service); + if((*fd = dial(na, 0, dir, 0)) < 0) + return "can't dial"; + + /* negotiate authentication mechanism */ + if(ealgs != nil) + snprint(msg, sizeof(msg), "%s %s", am->name, ealgs); + else + snprint(msg, sizeof(msg), "%s", am->name); + writestr(*fd, msg, negstr, 0); + n = readstr(*fd, err, sizeof err); + if(n < 0) + return negstr; + if(*err){ + werrstr(err); + return negstr; + } + + /* authenticate */ + *fd = (*am->cf)(*fd); + if(*fd < 0) + return "can't authenticate"; + return 0; +} + +void +writestr(int fd, char *str, char *thing, int ignore) +{ + int l, n; + + l = strlen(str); + n = write(fd, str, l+1); + if(!ignore && n < 0) + fatal(1, "writing network: %s", thing); +} + +int +readstr(int fd, char *str, int len) +{ + int n; + + while(len) { + n = read(fd, str, 1); + if(n < 0) + return -1; + if(*str == '\0') + return 0; + str++; + len--; + } + return -1; +} + +static int +readln(char *buf, int n) +{ + int i; + char *p; + + n--; /* room for \0 */ + p = buf; + for(i=0; ichal, "challenge", 1); + if(readstr(fd, response, sizeof response) < 0) + return -1; + ch->resp = response; + ch->nresp = strlen(response); + if((ai = auth_response(ch)) != nil) + break; + } + auth_freechal(ch); + if(ai == nil) + return -1; + writestr(fd, "", "challenge", 1); + if(auth_chuid(ai, 0) < 0) + fatal(1, "newns"); + auth_freeAI(ai); + return fd; +} + +static void +mksecret(char *t, uchar *f) +{ + sprint(t, "%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux", + f[0], f[1], f[2], f[3], f[4], f[5], f[6], f[7], f[8], f[9]); +} + +/* + * plan9 authentication followed by rc4 encryption + */ +static int +p9auth(int fd) +{ + uchar key[16]; + uchar digest[SHA1dlen]; + char fromclientsecret[21]; + char fromserversecret[21]; + int i; + AuthInfo *ai; + + ai = auth_proxy(fd, auth_getkey, "proto=%q role=client %s", p9authproto, keyspec); + if(ai == nil) + return -1; + memmove(key+4, ai->secret, ai->nsecret); + if(ealgs == nil) + return fd; + + /* exchange random numbers */ + srand(truerand()); + for(i = 0; i < 4; i++) + key[i] = rand(); + if(write(fd, key, 4) != 4) + return -1; + if(readn(fd, key+12, 4) != 4) + return -1; + + /* scramble into two secrets */ + sha1(key, sizeof(key), digest, nil); + mksecret(fromclientsecret, digest); + mksecret(fromserversecret, digest+10); + + /* set up encryption */ + i = pushssl(fd, ealgs, fromclientsecret, fromserversecret, nil); + if(i < 0) + werrstr("can't establish ssl connection: %r"); + return i; +} + +static int +noauth(int fd) +{ + ealgs = nil; + return fd; +} + +static int +srvnoauth(int fd, char *user) +{ + strecpy(user, user+MaxStr, getuser()); + ealgs = nil; + return fd; +} + +void +loghex(uchar *p, int n) +{ + char buf[100]; + int i; + + for(i = 0; i < n; i++) + sprint(buf+2*i, "%2.2ux", p[i]); + syslog(0, "cpu", buf); +} + +static int +srvp9auth(int fd, char *user) +{ + uchar key[16]; + uchar digest[SHA1dlen]; + char fromclientsecret[21]; + char fromserversecret[21]; + int i; + AuthInfo *ai; + + ai = auth_proxy(0, nil, "proto=%q role=server %s", p9authproto, keyspec); + if(ai == nil) + return -1; + if(auth_chuid(ai, nil) < 0) + return -1; + strecpy(user, user+MaxStr, ai->cuid); + memmove(key+4, ai->secret, ai->nsecret); + + if(ealgs == nil) + return fd; + + /* exchange random numbers */ + srand(truerand()); + for(i = 0; i < 4; i++) + key[i+12] = rand(); + if(readn(fd, key, 4) != 4) + return -1; + if(write(fd, key+12, 4) != 4) + return -1; + + /* scramble into two secrets */ + sha1(key, sizeof(key), digest, nil); + mksecret(fromclientsecret, digest); + mksecret(fromserversecret, digest+10); + + /* set up encryption */ + i = pushssl(fd, ealgs, fromserversecret, fromclientsecret, nil); + if(i < 0) + werrstr("can't establish ssl connection: %r"); + return i; +} + +/* + * set authentication mechanism + */ +int +setam(char *name) +{ + for(am = authmethod; am->name != nil; am++) + if(strcmp(am->name, name) == 0) + return 0; + am = authmethod; + return -1; +} + +/* + * set authentication mechanism and encryption/hash algs + */ +int +setamalg(char *s) +{ + ealgs = strchr(s, ' '); + if(ealgs != nil) + *ealgs++ = 0; + return setam(s); +} + +char *rmtnotefile = "/mnt/term/dev/cpunote"; + +/* + * loop reading /mnt/term/dev/note looking for notes. + * The child returns to start the shell. + */ +void +rmtnoteproc(void) +{ + int n, fd, pid, notepid; + char buf[256]; + + /* new proc returns to start shell */ + pid = rfork(RFPROC|RFFDG|RFNOTEG|RFNAMEG|RFMEM); + switch(pid){ + case -1: + syslog(0, "cpu", "cpu -R: can't start noteproc: %r"); + return; + case 0: + return; + } + + /* new proc reads notes from other side and posts them to shell */ + switch(notepid = rfork(RFPROC|RFFDG|RFMEM)){ + case -1: + syslog(0, "cpu", "cpu -R: can't start wait proc: %r"); + _exits(0); + case 0: + fd = open(rmtnotefile, OREAD); + if(fd < 0){ + syslog(0, "cpu", "cpu -R: can't open %s", rmtnotefile); + _exits(0); + } + + for(;;){ + n = read(fd, buf, sizeof(buf)-1); + if(n <= 0){ + postnote(PNGROUP, pid, "hangup"); + _exits(0); + } + buf[n] = 0; + postnote(PNGROUP, pid, buf); + } + } + + /* original proc waits for shell proc to die and kills note proc */ + for(;;){ + n = waitpid(); + if(n < 0 || n == pid) + break; + } + postnote(PNPROC, notepid, "kill"); + _exits(0); +} + +enum +{ + Qdir, + Qcpunote, + + Nfid = 32, +}; + +struct { + char *name; + Qid qid; + ulong perm; +} fstab[] = +{ + [Qdir] { ".", {Qdir, 0, QTDIR}, DMDIR|0555 }, + [Qcpunote] { "cpunote", {Qcpunote, 0}, 0444 }, +}; + +typedef struct Note Note; +struct Note +{ + Note *next; + char msg[ERRMAX]; +}; + +typedef struct Request Request; +struct Request +{ + Request *next; + Fcall f; +}; + +typedef struct Fid Fid; +struct Fid +{ + int fid; + int file; + int omode; +}; +Fid fids[Nfid]; + +struct { + Lock; + Note *nfirst, *nlast; + Request *rfirst, *rlast; +} nfs; + +int +fsreply(int fd, Fcall *f) +{ + uchar buf[IOHDRSZ+Maxfdata]; + int n; + + if(dbg) + fprint(2, "<-%F\n", f); + n = convS2M(f, buf, sizeof buf); + if(n > 0){ + if(write(fd, buf, n) != n){ + close(fd); + return -1; + } + } + return 0; +} + +/* match a note read request with a note, reply to the request */ +int +kick(int fd) +{ + Request *rp; + Note *np; + int rv; + + for(;;){ + lock(&nfs); + rp = nfs.rfirst; + np = nfs.nfirst; + if(rp == nil || np == nil){ + unlock(&nfs); + break; + } + nfs.rfirst = rp->next; + nfs.nfirst = np->next; + unlock(&nfs); + + rp->f.type = Rread; + rp->f.count = strlen(np->msg); + rp->f.data = np->msg; + rv = fsreply(fd, &rp->f); + free(rp); + free(np); + if(rv < 0) + return -1; + } + return 0; +} + +void +flushreq(int tag) +{ + Request **l, *rp; + + lock(&nfs); + for(l = &nfs.rfirst; *l != nil; l = &(*l)->next){ + rp = *l; + if(rp->f.tag == tag){ + *l = rp->next; + unlock(&nfs); + free(rp); + return; + } + } + unlock(&nfs); +} + +Fid* +getfid(int fid) +{ + int i, freefid; + + freefid = -1; + for(i = 0; i < Nfid; i++){ + if(freefid < 0 && fids[i].file < 0) + freefid = i; + if(fids[i].fid == fid) + return &fids[i]; + } + if(freefid >= 0){ + fids[freefid].fid = fid; + return &fids[freefid]; + } + return nil; +} + +int +fsstat(int fd, Fid *fid, Fcall *f) +{ + Dir d; + uchar statbuf[256]; + + memset(&d, 0, sizeof(d)); + d.name = fstab[fid->file].name; + d.uid = user; + d.gid = user; + d.muid = user; + d.qid = fstab[fid->file].qid; + d.mode = fstab[fid->file].perm; + d.atime = d.mtime = time(0); + f->stat = statbuf; + f->nstat = convD2M(&d, statbuf, sizeof statbuf); + return fsreply(fd, f); +} + +int +fsread(int fd, Fid *fid, Fcall *f) +{ + Dir d; + uchar buf[256]; + Request *rp; + + switch(fid->file){ + default: + return -1; + case Qdir: + if(f->offset == 0 && f->count >0){ + memset(&d, 0, sizeof(d)); + d.name = fstab[Qcpunote].name; + d.uid = user; + d.gid = user; + d.muid = user; + d.qid = fstab[Qcpunote].qid; + d.mode = fstab[Qcpunote].perm; + d.atime = d.mtime = time(0); + f->count = convD2M(&d, buf, sizeof buf); + f->data = (char*)buf; + } else + f->count = 0; + return fsreply(fd, f); + case Qcpunote: + rp = mallocz(sizeof(*rp), 1); + if(rp == nil) + return -1; + rp->f = *f; + lock(&nfs); + if(nfs.rfirst == nil) + nfs.rfirst = rp; + else + nfs.rlast->next = rp; + nfs.rlast = rp; + unlock(&nfs); + return kick(fd);; + } +} + +char Eperm[] = "permission denied"; +char Enofile[] = "out of files"; +char Enotdir[] = "not a directory"; + +void +notefs(int fd) +{ + uchar buf[IOHDRSZ+Maxfdata]; + int i, j, n, ncpunote; + char err[ERRMAX]; + Fcall f; + Fid *fid, *nfid; + int doreply; + + rfork(RFNOTEG); + fmtinstall('F', fcallfmt); + + for(n = 0; n < Nfid; n++){ + fids[n].file = -1; + fids[n].omode = -1; + } + + ncpunote = 0; + for(;;){ + n = read9pmsg(fd, buf, sizeof(buf)); + if(n <= 0){ + if(dbg) + fprint(2, "read9pmsg(%d) returns %d: %r\n", fd, n); + break; + } + if(convM2S(buf, n, &f) <= BIT16SZ) + break; + if(dbg) + fprint(2, "->%F\n", &f); + doreply = 1; + fid = getfid(f.fid); + if(fid == nil){ +nofids: + f.type = Rerror; + f.ename = Enofile; + fsreply(fd, &f); + continue; + } + switch(f.type++){ + default: + f.type = Rerror; + f.ename = "unknown type"; + break; + case Tflush: + flushreq(f.oldtag); + break; + case Tversion: + if(f.msize > IOHDRSZ+Maxfdata) + f.msize = IOHDRSZ+Maxfdata; + break; + case Tauth: + f.type = Rerror; + f.ename = "cpu: authentication not required"; + break; + case Tattach: + f.qid = fstab[Qdir].qid; + fid->file = Qdir; + break; + case Twalk: + nfid = nil; + if(f.newfid != f.fid){ + nfid = getfid(f.newfid); + if(nfid == nil) + goto nofids; + nfid->file = fid->file; + fid = nfid; + } + + f.ename = nil; + for(i=0; i MAXWELEM){ + f.type = Rerror; + f.ename = "too many name elements"; + break; + } + if(fid->file != Qdir){ + f.type = Rerror; + f.ename = Enotdir; + break; + } + if(strcmp(f.wname[i], "cpunote") == 0){ + fid->file = Qcpunote; + f.wqid[i] = fstab[Qcpunote].qid; + continue; + } + f.type = Rerror; + f.ename = err; + strcpy(err, "cpu: file \""); + for(j=0; j<=i; j++){ + if(strlen(err)+1+strlen(f.wname[j])+32 > sizeof err) + break; + if(j != 0) + strcat(err, "/"); + strcat(err, f.wname[j]); + } + strcat(err, "\" does not exist"); + break; + } + if(nfid != nil && (f.ename != nil || i < f.nwname)) + nfid ->file = -1; + if(f.type != Rerror) + f.nwqid = i; + break; + case Topen: + if(f.mode != OREAD){ + f.type = Rerror; + f.ename = Eperm; + } + fid->omode = f.mode; + if(fid->file == Qcpunote) + ncpunote++; + f.qid = fstab[fid->file].qid; + break; + case Tcreate: + f.type = Rerror; + f.ename = Eperm; + break; + case Tread: + if(fsread(fd, fid, &f) < 0) + goto err; + doreply = 0; + break; + case Twrite: + f.type = Rerror; + f.ename = Eperm; + break; + case Tclunk: + if(fid->omode != -1 && fid->file == Qcpunote){ + ncpunote--; + if(ncpunote == 0) /* remote side is done */ + goto err; + } + fid->file = -1; + fid->omode = -1; + break; + case Tremove: + f.type = Rerror; + f.ename = Eperm; + break; + case Tstat: + if(fsstat(fd, fid, &f) < 0) + goto err; + doreply = 0; + break; + case Twstat: + f.type = Rerror; + f.ename = Eperm; + break; + } + if(doreply) + if(fsreply(fd, &f) < 0) + break; + } +err: + if(dbg) + fprint(2, "notefs exiting: %r\n"); + werrstr("success"); + postnote(PNGROUP, exportpid, "kill"); + if(dbg) + fprint(2, "postnote PNGROUP %d: %r\n", exportpid); + close(fd); +} + +char notebuf[ERRMAX]; + +void +catcher(void*, char *text) +{ + int n; + + n = strlen(text); + if(n >= sizeof(notebuf)) + n = sizeof(notebuf)-1; + memmove(notebuf, text, n); + notebuf[n] = '\0'; + noted(NCONT); +} + +/* + * mount in /dev a note file for the remote side to read. + */ +void +lclnoteproc(int netfd) +{ + Waitmsg *w; + Note *np; + int pfd[2]; + int pid; + + if(pipe(pfd) < 0){ + fprint(2, "cpu: can't start note proc: pipe: %r\n"); + return; + } + + /* new proc mounts and returns to start exportfs */ + switch(pid = rfork(RFPROC|RFNAMEG|RFFDG|RFMEM)){ + default: + exportpid = pid; + break; + case -1: + fprint(2, "cpu: can't start note proc: rfork: %r\n"); + return; + case 0: + close(pfd[0]); + if(mount(pfd[1], -1, "/dev", MBEFORE, "") < 0) + fprint(2, "cpu: can't mount note proc: %r\n"); + close(pfd[1]); + return; + } + + close(netfd); + close(pfd[1]); + + /* new proc listens for note file system rpc's */ + switch(rfork(RFPROC|RFNAMEG|RFMEM)){ + case -1: + fprint(2, "cpu: can't start note proc: rfork1: %r\n"); + _exits(0); + case 0: + notefs(pfd[0]); + _exits(0); + } + + /* original proc waits for notes */ + notify(catcher); + w = nil; + for(;;) { + *notebuf = 0; + free(w); + w = wait(); + if(w == nil) { + if(*notebuf == 0) + break; + np = mallocz(sizeof(Note), 1); + if(np != nil){ + strcpy(np->msg, notebuf); + lock(&nfs); + if(nfs.nfirst == nil) + nfs.nfirst = np; + else + nfs.nlast->next = np; + nfs.nlast = np; + unlock(&nfs); + kick(pfd[0]); + } + unlock(&nfs); + } else if(w->pid == exportpid) + break; + } + + if(w == nil) + exits(nil); + exits(0); +/* exits(w->msg); */ +} diff -Nru /sys/src/9k/root/mkfile /sys/src/9k/root/mkfile --- /sys/src/9k/root/mkfile Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/root/mkfile Wed Dec 9 00:00:00 2015 @@ -0,0 +1,10 @@ + +#include + +#define ESTR 256 + +/* ping pong. You have to tell it if is ping or pong. you have to tell it the partner + * address. ping sends packet, waits in loop, replies with packet. + * pong is same except does not send an initial packet. always wire to core 1. + */ +static void +error(char* fmt, ...) +{ + va_list v; + char *e, estr[ESTR], *p; + + va_start(v, fmt); + e = estr + ESTR; + p = seprint(estr, e, "%s: ", argv0); + p = vseprint(p, e, fmt, v); + p = seprint(p, e, "\n"); + va_end(v); + + write(2, estr, p-estr); +} + +static void +fatal(char* fmt, ...) +{ + va_list v; + char *e, estr[ESTR], *p; + + va_start(v, fmt); + e = estr + ESTR; + p = seprint(estr, e, "%s: ", argv0); + p = vseprint(p, e, fmt, v); + p = seprint(p, e, "\n"); + va_end(v); + + write(2, estr, p - estr); + exits("fatal"); +} + +static void +usage(void) +{ + char *e, estr[ESTR], *p; + + e = estr + ESTR; + p = seprint(estr, e, "usage: %s" + " [whatever]" + "\n", + argv0); + write(2, estr, p-estr); + exits("usage"); +} + +#define F(v, o, w) (((v) & ((1<<(w))-1))<<(o)) + +enum { + X = 0, /* dimension */ + Y = 1, + Z = 2, + N = 3, + + Chunk = 32, /* granularity of FIFO */ + Pchunk = 7, /* Chunks in a packet */ + + Quad = 16, +}; + +/* + * Packet header. The hardware requires an 8-byte header + * of which the last two are reserved (they contain a sequence + * number and a header checksum inserted by the hardware). + * The hardware also requires the packet to be aligned on a + * 128-bit boundary for loading into the HUMMER. + */ +typedef struct Tpkt Tpkt; +struct Tpkt { + u8int sk; /* Skip Checksum Control */ + u8int hint; /* Hint|Dp|Pid0 */ + u8int size; /* Size|Pid1|Dm|Dy|VC */ + u8int dst[N]; /* Destination Coordinates */ + u8int _6_[2]; /* reserved */ + u8int _8_[8]; /* protocol header */ + u32int payload[]; +}; + +/* + * SKIP is a field in .sk giving the number of 2-bytes + * to skip from the top of the packet before including + * the packet bytes into the running checksum. + * SIZE is a field in .size giving the size of the + * packet in 32-byte 'chunks'. + */ +#define SKIP(n) F(n, 1, 7) +#define SIZE(n) F(n, 5, 3) + +enum { + Sk = 0x01, /* Skip Checksum */ + + Pid0 = 0x01, /* Destination Group FIFO MSb */ + Dp = 0x02, /* Multicast Deposit */ + Hzm = 0x04, /* Z- Hint */ + Hzp = 0x08, /* Z+ Hint */ + Hym = 0x10, /* Y- Hint */ + Hyp = 0x20, /* Y+ Hint */ + Hxm = 0x40, /* X- Hint */ + Hxp = 0x80, /* X+ Hint */ + + Vcd0 = 0x00, /* Dynamic 0 VC */ + Vcd1 = 0x01, /* Dynamic 1 VC */ + Vcbn = 0x02, /* Deterministic Bubble VC */ + Vcbp = 0x03, /* Deterministic Priority VC */ + Dy = 0x04, /* Dynamic Routing */ + Dm = 0x08, /* DMA Mode */ + Pid1 = 0x10, /* Destination Group FIFO LSb */ +}; + +static int +torusparse(u8int d[3], char* item, char* buf) +{ + int n; + char *p; + + if((p = strstr(buf, item)) == nil || (p != buf && *(p-1) != '\n')) + return -1; + n = strlen(item); + if(strlen(p) < n+sizeof(": x 0 y 0 z 0")) + return -1; + p += n+sizeof(": x ")-1; + if(strncmp(p-4, ": x ", 4) != 0) + return -1; + if((n = strtol(p, &p, 0)) > 255 || *p != ' ' || *(p+1) != 'y') + return -1; + d[0] = n; + if((n = strtol(p+2, &p, 0)) > 255 || *p != ' ' || *(p+1) != 'z') + return -1; + d[1] = n; + if((n = strtol(p+2, &p, 0)) > 255 || (*p != '\n' && *p != '\0')) + return -1; + d[2] = n; + + return 0; +} + +void wire(int core, int pri) +{ + + int me = getpid(); + char *name = smprint("/proc/%d/ctl", me); + int procfd = open(name, ORDWR); + char *cmd; + int amt; + assert (procfd > 0); + print("Wired to %d\n", core); + cmd = smprint("wired %d\n", core); + amt = write(procfd, cmd, strlen(cmd)); + assert(amt >= strlen(cmd)); + + if (pri) { + print("Pri to %d\n", pri); + cmd = smprint("fixedpri %d\n", pri); + amt = write(procfd, cmd, strlen(cmd)); + assert(amt >= strlen(cmd)); + } +} + +void +send(int fd, void *tpkt, int length, u64int *x) +{ + u64int start, end; + cycles(&start); + int n = pwrite(fd, tpkt, length, 0); + cycles(&end); + *x = end - start; + if(n < 0) + fatal("write /dev/torus: %r\n", n); + else if(n < length) + fatal("write /dev/torus: short write %d\n", n); +} + +void +recv(int fd, void *rpkt, int length, u64int *x) +{ + int n; + u64int start, end; + cycles(&start); + n = pread(fd, rpkt, length, 0); + cycles(&end); + *x = end - start; + if(n < length) + fatal("read /dev/torus: %r\n", n); +} +static void +dumptpkt(Tpkt* tpkt, int hflag, int dflag) +{ + uchar *t; + int i, j, n; + char buf[512], *e, *p; + + n = ((tpkt->size>>5)+1) * Chunk; + + p = buf; + e = buf + sizeof(buf); + if(hflag){ + p = seprint(p, e, "Hw:"); + t = (uchar*)tpkt; + for(i = 0; i < 8; i++) + p = seprint(p, e, " %2.2ux", t[i]); + p = seprint(p, e, "\n"); + + p = seprint(p, e, "Sw:"); + t = (uchar*)tpkt->_8_; + for(i = 0; i < 8; i++) + p = seprint(p, e, " %#2.2ux", t[i]); + print("%s\n", buf); + + } + + if(!dflag) + return; + + n -= sizeof(Tpkt); + for(i = 0; i < n; i += 16){ + p = seprint(buf, e, "%4.4ux:", i); + for(j = 0; j < 16; j++) + seprint(p, e, " %2.2ux", tpkt->payload[i+j]); + print("%s\n", buf); + } +} + + +void +main(int argc, char* argv[]) +{ + Tpkt *tpkt, *rpkt; + u8int d[N]; + char buf[512], *p; + uvlong r, start, stop; + u64int *xtimes, *rtimes; + int count, fd, i, length, mhz, n, x, y, z; + int tracefd; + int rank; + int pri = 19; + count = 1; + length = Pchunk*Chunk; + mhz = 850; + + ARGBEGIN{ + default: + usage(); + break; + case 'l': + p = EARGF(usage()); + if((n = strtol(argv[0], &p, 0)) <= 0 || p == argv[0] || *p != 0) + usage(); + if(n % Chunk) + usage(); + length = n; + break; + case 'm': + p = EARGF(usage()); + if((n = strtol(argv[0], &p, 0)) <= 0 || p == argv[0] || *p != 0) + usage(); + mhz = n; + break; + case 'n': + p = EARGF(usage()); + if((n = strtol(argv[0], &p, 0)) <= 0 || p == argv[0] || *p != 0) + usage(); + count = n; + break; + }ARGEND; + + if(argc != 4) + usage(); + if((x = strtol(argv[0], &p, 0)) < 0 || *p != 0) + fatal("x invalid: %s\n", argv[0]); + if((y = strtol(argv[1], &p, 0)) < 0 || *p != 0) + fatal("y invalid: %s\n", argv[1]); + if((z = strtol(argv[2], &p, 0)) <= 0 || *p != 0) + fatal("z invalid: %s\n", argv[2]); + if((rank= strtol(argv[3], &p, 0)) < 0 || *p != 0) + fatal("rank invalid: %s\n", argv[3]); + z -= 1; + + if((fd = open("/dev/torusstatus", OREAD)) < 0) + fatal("open /dev/torusstatus: %r\n"); + if((n = read(fd, buf, sizeof(buf))) < 0) + fatal("read /dev/torusstatus: %r\n"); + close(fd); + buf[n] = 0; + + if(torusparse(d, "size", buf) < 0) + fatal("parse /dev/torusstatus: <%s>\n", buf); + if(x >= d[X] || y >= d[Y] || z >= d[Z]) + fatal("destination out of range: %d.%d.%d >= %d.%d.%d", + x, y, z, d[X], d[Y], d[Z]); + + if((tpkt = mallocalign(length, Chunk, 0, 0)) == nil) + fatal("mallocalign tpkt\n"); + memset(tpkt, 0, length); + + if((rpkt = mallocalign(length, Chunk, 0, 0)) == nil) + fatal("mallocalign rptk\n"); + memset(rpkt, 0, length); + + xtimes = malloc(sizeof(*xtimes)*count); + if (xtimes == nil) + fatal("malloc x\n"); + rtimes = malloc(sizeof(*xtimes)*count); + if (rtimes == nil) + fatal("malloc r\n"); + tpkt->sk = SKIP(4); + tpkt->hint = 0; + tpkt->size = SIZE(Pchunk-1)|Dy|Vcd0; + tpkt->dst[X] = x; + tpkt->dst[Y] = y; + tpkt->dst[Z] = z; + + if((fd = open("/dev/torus", ORDWR)) < 0) + fatal("open /dev/torus: %r\n"); + + tracefd = open("/dev/tracectl", ORDWR); + if (tracefd < 0) + print("Warning: no trace device, no traces\n"); + + wire(1, pri); + + if (tracefd > 0) + if (write(tracefd, "start", 6) < 6) + print("Warning: could not start trace device\n"); + + cycles(&start); + if (! rank){ + tpkt->payload[0] = 1; + send(fd, tpkt, length, &xtimes[0]); + } + + for(i = 0; i < count; i++){ + recv(fd, rpkt, length, &rtimes[i]); + if (rpkt->payload[0] != i + 1) + print("SEQ: Got %d expect %d\n", rpkt->payload[0], i); + tpkt->payload[0] = rpkt->payload[0]; + if (! rank) + tpkt->payload[0] ++; + send(fd, tpkt, length, &xtimes[i]); + } + + cycles(&stop); + + /* we may chop some off but tough */ + if (tracefd > 0) + if(write(tracefd, "stop", 5) < 5) + print("Warning: could not stop trace device\n"); +; + + close(fd); + + r = (count*length); + r *= mhz; + r /= stop - start; + + print("%d writes of %d in %llud cycles @ %dMHz = %llud MB/s\n", + count, length, stop - start, mhz, r); + print("xmit\n"); + for(i = 0; i < count; i++) + print("%d %lld\n", i, xtimes[i]); + print("recv\n"); + for(i = 0; i < count; i++) + print("%d %lld\n", i, rtimes[i]); + + exits(0); +} diff -Nru /sys/src/9k/root/profile /sys/src/9k/root/profile --- /sys/src/9k/root/profile Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/root/profile Wed Dec 9 00:00:00 2015 @@ -0,0 +1,13 @@ +#!/bin/rc + +switch($service){ +case terminal + prompt=('% ' ' ') + status='' +case cpu + prompt=($sysname'% ' ' ') + bind /mnt/term/dev/cons /dev/cons + bind /mnt/term/dev/consctl /dev/consctl + bind -a /mnt/term/dev /dev + status='' +} diff -Nru /sys/src/9k/root/rcmain /sys/src/9k/root/rcmain --- /sys/src/9k/root/rcmain Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/root/rcmain Wed Dec 9 00:00:00 2015 @@ -0,0 +1,36 @@ +# +# rcmain +# Plan 9 initial boot environment version +# +home=/ +ifs=' +' +prompt=('# ' ' ') +path=(. /bin) + +finit +fn sigexit +fn ps {@{ + cd /proc; + for(i in `{echo [1-9] [1-9][0-9] [1-9][0-9][0-9] [1-9][0-9][0-9][0-9] [1-9][0-9][0-9][0-9][0-9] [1-9][0-9][0-9][0-9][0-9]*|sed 's/\[.*\][ \*]//'}){ + for(f in $i^/status $i^/args) + >[2]/dev/null sed '' $f + }|sed -e '$!N;s/([^ ])$/\1/;ta' -e 'P;D;b' -e ':a;s/\n//' \ + |sed 's/ +/ /g;s/^([^ ]+) +([^ ]+) +([^ ]+) +([^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+) +([^ ]+)+ ([^ ]+ +[^ ]+)(.*)/\2 '^$i^' \5K \3 \1 \7/' +}} +fn netstat {@{ + for(p in tcp udp){ + cd /net/$p; + for(i in `{echo [0-9] [1-9][0-9] [1-9][0-9][0-9] [1-9][0-9][0-9]*|sed 's/\[.*\][ \*]//'}){ + echo -n $p' '$i' *owner* '; + cat $i/status $i/local $i/remote \ + | sed -n -e :a -e '$!N; s/ .*//; s/!/ /; s/\n/ /; ta; + s/([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+)/\1 \3 \5 \4/p' + } + } +}} + +status='' +if(! ~ $#* 0) . $* +. -i '#d/0' +exit $status diff -Nru /sys/src/9k/root/suck.c /sys/src/9k/root/suck.c --- /sys/src/9k/root/suck.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/root/suck.c Wed Dec 9 00:00:00 2015 @@ -0,0 +1,337 @@ +#include +#include + +#define ESTR 256 + +static void +error(char* fmt, ...) +{ + va_list v; + char *e, estr[ESTR], *p; + + va_start(v, fmt); + e = estr + ESTR; + p = seprint(estr, e, "%s: ", argv0); + p = vseprint(p, e, fmt, v); + p = seprint(p, e, "\n"); + va_end(v); + + write(2, estr, p-estr); +} + +static void +fatal(char* fmt, ...) +{ + va_list v; + char *e, estr[ESTR], *p; + + va_start(v, fmt); + e = estr + ESTR; + p = seprint(estr, e, "%s: ", argv0); + p = vseprint(p, e, fmt, v); + p = seprint(p, e, "\n"); + va_end(v); + + write(2, estr, p - estr); + exits("fatal"); +} + +static void +usage(void) +{ + char *e, estr[ESTR], *p; + + e = estr + ESTR; + p = seprint(estr, e, "usage: %s" + " [whatever]" + "\n", + argv0); + write(2, estr, p-estr); + exits("usage"); +} + +#define F(v, o, w) (((v) & ((1<<(w))-1))<<(o)) + +enum { + X = 0, /* dimension */ + Y = 1, + Z = 2, + N = 3, + + Chunk = 32, /* granularity of FIFO */ + Pchunk = 8, /* Chunks in a packet */ + + Quad = 16, +}; + +/* + * Packet header. The hardware requires an 8-byte header + * of which the last two are reserved (they contain a sequence + * number and a header checksum inserted by the hardware). + * The hardware also requires the packet to be aligned on a + * 128-bit boundary for loading into the HUMMER. + */ +typedef struct Tpkt Tpkt; +struct Tpkt { + u8int sk; /* Skip Checksum Control */ + u8int hint; /* Hint|Dp|Pid0 */ + u8int size; /* Size|Pid1|Dm|Dy|VC */ + u8int dst[N]; /* Destination Coordinates */ + u8int _6_[2]; /* reserved */ + u8int _8_[8]; /* protocol header */ + u8int payload[]; +}; + +/* + * SKIP is a field in .sk giving the number of 2-bytes + * to skip from the top of the packet before including + * the packet bytes into the running checksum. + * SIZE is a field in .size giving the size of the + * packet in 32-byte 'chunks'. + */ +#define SKIP(n) F(n, 1, 7) +#define SIZE(n) F(n, 5, 3) + +enum { + Sk = 0x01, /* Skip Checksum */ + + Pid0 = 0x01, /* Destination Group FIFO MSb */ + Dp = 0x02, /* Multicast Deposit */ + Hzm = 0x04, /* Z- Hint */ + Hzp = 0x08, /* Z+ Hint */ + Hym = 0x10, /* Y- Hint */ + Hyp = 0x20, /* Y+ Hint */ + Hxm = 0x40, /* X- Hint */ + Hxp = 0x80, /* X+ Hint */ + + Vcd0 = 0x00, /* Dynamic 0 VC */ + Vcd1 = 0x01, /* Dynamic 1 VC */ + Vcbn = 0x02, /* Deterministic Bubble VC */ + Vcbp = 0x03, /* Deterministic Priority VC */ + Dy = 0x04, /* Dynamic Routing */ + Dm = 0x08, /* DMA Mode */ + Pid1 = 0x10, /* Destination Group FIFO LSb */ +}; + +static int +torusparse(u8int d[3], char* item, char* buf) +{ + int n; + char *p; + + if((p = strstr(buf, item)) == nil || (p != buf && *(p-1) != '\n')) + return -1; + n = strlen(item); + if(strlen(p) < n+sizeof(": x 0 y 0 z 0")) + return -1; + p += n+sizeof(": x ")-1; + if(strncmp(p-4, ": x ", 4) != 0) + return -1; + if((n = strtol(p, &p, 0)) > 255 || *p != ' ' || *(p+1) != 'y') + return -1; + d[0] = n; + if((n = strtol(p+2, &p, 0)) > 255 || *p != ' ' || *(p+1) != 'z') + return -1; + d[1] = n; + if((n = strtol(p+2, &p, 0)) > 255 || (*p != '\n' && *p != '\0')) + return -1; + d[2] = n; + + return 0; +} + +static void +dumptpkt(Tpkt* tpkt, int hflag, int dflag) +{ + uchar *t; + int i, j, n; + char buf[512], *e, *p; + + n = ((tpkt->size>>5)+1) * Chunk; + + p = buf; + e = buf + sizeof(buf); + if(hflag){ + p = seprint(p, e, "Hw:"); +#ifdef notdef + p = seprint(p, e, " sk %#2.2ux (Skip %d Sk %d)", + tpkt->sk, tpkt->sk & Sk, tpkt->sk>>1); + p = seprint(p, e, " hint %#2.2ux", tpkt->hint); + p = seprint(p, e, " size %#2.2ux", tpkt->size); + p = seprint(p, e, " dst [%d, %d, %d]", + tpkt->dst[X], tpkt->dst[Y], tpkt->dst[Z]); + p = seprint(p, e, " _6_[0] %#2.2ux (seqno %d)", + tpkt->_6_[0], tpkt->_6_[0]); + p = seprint(p, e, " _6_[1] %#2.2ux (crc)\n", tpkt->_6_[1]); +#else + t = (uchar*)tpkt; + for(i = 0; i < 8; i++) + p = seprint(p, e, " %2.2ux", t[i]); + p = seprint(p, e, "\n"); +#endif /* notdef */ + + p = seprint(p, e, "Sw:"); + t = (uchar*)tpkt->_8_; + for(i = 0; i < 8; i++) + p = seprint(p, e, " %#2.2ux", t[i]); + print("%s\n", buf); + + } + + if(!dflag) + return; + + n -= sizeof(Tpkt); + for(i = 0; i < n; i += 16){ + p = seprint(buf, e, "%4.4ux:", i); + for(j = 0; j < 16; j++) + seprint(p, e, " %2.2ux", tpkt->payload[i+j]); + print("%s\n", buf); + } +} + +void +main(int argc, char* argv[]) +{ + Tpkt *tpkt; + u8int d[N]; + char buf[512], *p; + uvlong r, start, stop; + int count, dflag, fd, i, hflag, length, mhz, n; + int tracefd; + int procs = 1; + int rank = 0; + int pri = 19; + int oldstyle = 0; + + count = 1; + dflag = hflag = 0; + length = Pchunk*Chunk; + mhz = 850; + + ARGBEGIN{ + default: + usage(); + break; + case 'd': + dflag = 1; + break; + case 'h': + hflag = 1; + break; + case 'l': + p = EARGF(usage()); + if((n = strtol(argv[0], &p, 0)) <= 0 || p == argv[0] || *p != 0) + usage(); + length = n; + if(n <= Chunk) + usage(); + if(oldstyle && length > Pchunk*Chunk){ + if(n % Chunk) + usage(); + n = (n + (Pchunk*Chunk)-1)/(Pchunk*Chunk); + length += (n-1) * sizeof(Tpkt); + } + break; + case 'm': + p = EARGF(usage()); + if((n = strtol(argv[0], &p, 0)) <= 0 || p == argv[0] || *p != 0) + usage(); + mhz = n; + break; + case 'n': + p = EARGF(usage()); + if((n = strtol(argv[0], &p, 0)) <= 0 || p == argv[0] || *p != 0) + usage(); + count = n; + break; + case 'o': + oldstyle = 1; + break; + case 'p': + p = EARGF(usage()); + if((n = strtol(argv[0], &p, 0)) <= 0 || n > 4 || p == argv[0] || *p != 0) + usage(); + procs = n; + break; + }ARGEND; + + if((fd = open("/dev/torusstatus", OREAD)) < 0) + fatal("open /dev/torusstatus: %r\n"); + if((n = read(fd, buf, sizeof(buf))) < 0) + fatal("read /dev/torusstatus: %r\n"); + close(fd); + buf[n] = 0; + + if(torusparse(d, "addr", buf) < 0) + fatal("parse /dev/torusstatus: <%s>\n", buf); + print("addr: %d.%d.%d\n", d[X], d[Y], d[Z]); + if(torusparse(d, "size", buf) < 0) + fatal("parse /dev/torusstatus: <%s>\n", buf); + print("size: %d.%d.%d\n", d[X], d[Y], d[Z]); + + if((tpkt = mallocalign(length, Chunk, 0, 0)) == nil) + fatal("mallocalign tpkt\n"); + + if((fd = open("/dev/torus", ORDWR)) < 0) + fatal("open /dev/torus: %r\n"); + + print("starting %d reads of %d\n", count, length); + + r = count*length; + + tracefd = open("/dev/tracectl", ORDWR); + + /* fork at bottom of loop since we are proc 0 */ + for(i = 0; i < procs; i++) { + int me = getpid(); + char *name = smprint("/proc/%d/ctl", me); + int procfd = open(name, ORDWR); + char *cmd; + int amt; + assert (procfd > 0); + rank = i; + //print("Wired to %d\n", rank); + cmd = smprint("wired %d\n", rank); + amt = write(procfd, cmd, strlen(cmd)); + assert(amt >= strlen(cmd)); + + if (pri) { + //print("Pri to %d\n", pri); + cmd = smprint("fixedpri %d\n", pri); + amt = write(procfd, cmd, strlen(cmd)); + assert(amt >= strlen(cmd)); + } + + if (i < procs-1) + if (fork()) + break; + } + + if (tracefd > 0 && rank == 0) + write(tracefd, "start", 6); + + cycles(&start); + for(i = 0; i < r; i += n){ + if((n = pread(fd, tpkt, length, 0)) < 0) + fatal("read /dev/torus: %r\n", n); + if(hflag || dflag) + dumptpkt(tpkt, hflag, dflag); + } + cycles(&stop); + + /* we may chop some off but tough */ + if (tracefd > 0 && rank == 0) + write(tracefd, "stop", 5); + + close(fd); + + r = (count*length); + r *= mhz; + r /= stop - start; + + print("%d reads in %llud cycles @ %dMHz = %llud MB/s\n", + i, stop - start, mhz, r); + + exits(0); +} diff -Nru /sys/src/9k/root/tcp23 /sys/src/9k/root/tcp23 --- /sys/src/9k/root/tcp23 Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/root/tcp23 Wed Dec 9 00:00:00 2015 @@ -0,0 +1,2 @@ +#!/bin/rc +exec /bin/ip/telnetd -at $* diff -Nru /sys/src/9k/root/tcp564 /sys/src/9k/root/tcp564 --- /sys/src/9k/root/tcp564 Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/root/tcp564 Wed Dec 9 00:00:00 2015 @@ -0,0 +1,2 @@ +#!/bin/rc +exec exportfs -r / diff -Nru /sys/src/9k/words/CHANGES /sys/src/9k/words/CHANGES --- /sys/src/9k/words/CHANGES Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/words/CHANGES Wed Dec 9 00:00:00 2015 @@ -0,0 +1,31 @@ +Made + +- get rid of conf, by2pg and vars named m +- add acpi +- add colors to pages and cores +- fixes to exec and rfnomnt + +Quick changes + +- perf counters +- cleanup spurious interrupt handling +- /dev/config, if not there +- regression testing +- add ahci +- add monitor, waitwhile +- aoe from 386 to port + +Slow changes + +- revisit *apic, *ipi, and init code +- int types (look at GiB ull, eg) +- kprof +- sched and lock instrumentation for benchs +- new sched +- new mm +- add XCs + +Researchy changes + +- sems & tubes +- zero copy diff -Nru /sys/src/9k/words/boot.out /sys/src/9k/words/boot.out --- /sys/src/9k/words/boot.out Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/words/boot.out Wed Dec 9 00:00:00 2015 @@ -0,0 +1,300 @@ +/amd64/9k10cpu .. 470889+548528+177 +Plan 9 +asmalloc: 0x00000000001cb000@0x0000000000000000, type 1 +mmuinit: vmstart 0xfffffffff0000000 vmunused 0xfffffffff0235000 vmunmapped 0xfffffffff0400000 vmend 0xfffffffff4000000 +sys->pd 0x108003 0x108023 +l 3 0xffffff7fbfdfeff8 107023 +l 2 0xffffff7fbfdffff8 108023 +l 1 0xffffff7fbffffc00 e3 +l 1 0xffffff7fbffffc00 e3 +asmalloc: 0x0000000003c00000@0x0000000000400000, type 1 +pa 0x400000 mem 0x400000 +0xfffffffff0400000 l 1 +0xfffffffff0600000 l 1 +0xfffffffff0800000 l 1 +0xfffffffff0a00000 l 1 +0xfffffffff0c00000 l 1 +0xfffffffff0e00000 l 1 +0xfffffffff1000000 l 1 +0xfffffffff1200000 l 1 +0xfffffffff1400000 l 1 +0xfffffffff1600000 l 1 +0xfffffffff1800000 l 1 +0xfffffffff1a00000 l 1 +0xfffffffff1c00000 l 1 +0xfffffffff1e00000 l 1 +0xfffffffff2000000 l 1 +0xfffffffff2200000 l 1 +0xfffffffff2400000 l 1 +0xfffffffff2600000 l 1 +0xfffffffff2800000 l 1 +0xfffffffff2a00000 l 1 +0xfffffffff2c00000 l 1 +0xfffffffff2e00000 l 1 +0xfffffffff3000000 l 1 +0xfffffffff3200000 l 1 +0xfffffffff3400000 l 1 +0xfffffffff3600000 l 1 +0xfffffffff3800000 l 1 +0xfffffffff3a00000 l 1 +0xfffffffff3c00000 l 1 +0xfffffffff3e00000 l 1 + 0x0000000004000000 0x00000000d7e60000 1 (3555065856) va 0xfffffe0004000000 + 0x0000000100000000 0x0000001028000000 1 (65095598080) va 0xfffffe0100000000 +asm: base 0x4000000 npage 137216; pm0: base 0x4000000 npage 137216 +asm: base 0x0 npage 0; pm1: base 0x0 npage 0 +asm: base 0x0 npage 0; pm2: base 0x0 npage 0 +asm: base 0x0 npage 0; pm3: base 0x0 npage 0 +asm: base 0x0 npage 0; pm4: base 0x0 npage 0 +asm: base 0x0 npage 0; pm5: base 0x0 npage 0 +asm: base 0x0 npage 0; pm6: base 0x0 npage 0 +asm: base 0x0 npage 0; pm7: base 0x0 npage 0 +asmmeminit: losing 0 pages +asmmeminit: losing 0 pages +asmmeminit: losing 0 pages +base 0xfffffffff023a000 ptr 0xfffffffff023a000 nunints 4048385 +vmap(0xe5c20, 48) +vmap(0xe5c20, 988) +vmap(0xfee00000, 1024) +vmap(0xfec00000, 1024) +vmap(0xfec20000, 1024) +apic16: hz 199999980 max 1999999 min 19999 div 10 +pcirouting: ignoring south bridge PCI.0.20.3 1002/439D +asmmapinit 0x00000000deffe400 0x0000000000000400 dev +asmalloc: 0x0000000000000400@0x00000000deffe400, type 0 +asmfree: 0x0000000006ffe400@0x00000000d8000000, type 0 +asmfree: 0x0000000000000400@0x00000000deffe400, type 5 +asmmapinit 0x00000000deffa000 0x0000000000001000 dev +asmalloc: 0x0000000000001000@0x00000000deffa000, type 0 +asmfree: 0x0000000006ffa000@0x00000000d8000000, type 0 +asmfree: 0x0000000000001000@0x00000000deffa000, type 5 +asmmapinit 0x00000000deffb000 0x0000000000001000 dev +asmalloc: 0x0000000000001000@0x00000000deffb000, type 0 +asmfree: 0x0000000000001000@0x00000000deffb000, type 5 +asmmapinit 0x00000000deffe800 0x0000000000000100 dev +asmalloc: 0x0000000000000100@0x00000000deffe800, type 0 +asmfree: 0x0000000000000100@0x00000000deffe800, type 5 +asmmapinit 0x00000000deffc000 0x0000000000001000 dev +asmalloc: 0x0000000000001000@0x00000000deffc000, type 0 +asmfree: 0x0000000000001000@0x00000000deffc000, type 5 +asmmapinit 0x00000000deffd000 0x0000000000001000 dev +asmalloc: 0x0000000000001000@0x00000000deffd000, type 0 +asmfree: 0x0000000000001000@0x00000000deffd000, type 5 +asmmapinit 0x00000000deffec00 0x0000000000000100 dev +asmalloc: 0x0000000000000100@0x00000000deffec00, type 0 +asmfree: 0x0000000000000300@0x00000000deffe900, type 0 +asmfree: 0x0000000000000100@0x00000000deffec00, type 5 +asmmapinit 0x00000000defff000 0x0000000000001000 dev +asmalloc: 0x0000000000001000@0x00000000defff000, type 0 +asmfree: 0x0000000000000300@0x00000000deffed00, type 0 +asmfree: 0x0000000000001000@0x00000000defff000, type 5 +asmmapinit 0x00000000dff60000 0x0000000000020000 dev +asmalloc: 0x0000000000020000@0x00000000dff60000, type 0 +asmfree: 0x0000000000f60000@0x00000000df000000, type 0 +asmfree: 0x0000000000020000@0x00000000dff60000, type 5 +asmmapinit 0x00000000dff40000 0x0000000000020000 dev +asmalloc: 0x0000000000020000@0x00000000dff40000, type 0 +asmfree: 0x0000000000f40000@0x00000000df000000, type 0 +asmfree: 0x0000000000020000@0x00000000dff40000, type 5 +asmmapinit 0x00000000dff98000 0x0000000000004000 dev +asmalloc: 0x0000000000004000@0x00000000dff98000, type 0 +asmfree: 0x0000000000018000@0x00000000dff80000, type 0 +asmfree: 0x0000000000004000@0x00000000dff98000, type 5 +asmmapinit 0x00000000dffe0000 0x0000000000020000 dev +asmalloc: 0x0000000000020000@0x00000000dffe0000, type 0 +asmfree: 0x0000000000044000@0x00000000dff9c000, type 0 +asmfree: 0x0000000000020000@0x00000000dffe0000, type 5 +asmmapinit 0x00000000dffc0000 0x0000000000020000 dev +asmalloc: 0x0000000000020000@0x00000000dffc0000, type 0 +asmfree: 0x0000000000024000@0x00000000dff9c000, type 0 +asmfree: 0x0000000000020000@0x00000000dffc0000, type 5 +asmmapinit 0x00000000dff9c000 0x0000000000004000 dev +asmalloc: 0x0000000000004000@0x00000000dff9c000, type 0 +asmfree: 0x0000000000004000@0x00000000dff9c000, type 5 +asmmapinit 0x00000000dc000000 0x0000000001000000 dev +asmalloc: 0x0000000001000000@0x00000000dc000000, type 0 +asmfree: 0x0000000004000000@0x00000000d8000000, type 0 +asmfree: 0x0000000001000000@0x00000000dc000000, type 5 +asmmapinit 0x00000000dfefc000 0x0000000000004000 dev +asmalloc: 0x0000000000004000@0x00000000dfefc000, type 0 +asmfree: 0x0000000000efc000@0x00000000df000000, type 0 +asmfree: 0x0000000000004000@0x00000000dfefc000, type 5 +asmmapinit 0x00000000df000000 0x0000000000800000 dev +asmalloc: 0x0000000000800000@0x00000000df000000, type 0 +asmfree: 0x0000000000800000@0x00000000df000000, type 5 +vmap(0xdff60000, 131072) +vmap(0xdffe0000, 131072) +#l0: igbepcie: 1000Mbps port 0xdff60000 irq 10: 003048ff2106 +#l1: igbepcie: 1000Mbps port 0xdffe0000 irq 7: 003048ff2107 +538M memory: 2M kernel data, 536M user, 536M swap +newpage called from 0xfffffffff0115141 +newpage called from 0xfffffffff01151b9 +Hello Squidboy 17 1 +mach1: 0xfffffffff1490000 pml4 0xfffffffff1490070 +Hello Squidboy 18 2 +mach2: 0xfffffffff149d000 pml4 0xfffffffff149d070 +Hello Squidboy 19 3 +mach3: 0xfffffffff14aa000 pml4 0xfffffffff14aa070 +Hello Squidboy 20 4 +mach4: 0xfffffffff14b7000 pml4 0xfffffffff14b7070 +Hello Squidboy 21 5 +mach5: 0xfffffffff14c4000 pml4 0xfffffffff14c4070 +Hello Squidboy 22 6 +mach6: 0xfffffffff14d1000 pml4 0xfffffffff14d1070 +Hello Squidboy 23 7 +mach7: 0xfffffffff14de000 pml4 0xfffffffff14de070 +Hello Squidboy 32 8 +mach8: 0xfffffffff14eb000 pml4 0xfffffffff14eb070 +Hello Squidboy 33 9 +mach9: 0xfffffffff14f8000 pml4 0xfffffffff14f8070 +Hello Squidboy 34 10 +mach10: 0xfffffffff1505000 pml4 0xfffffffff1505070 +Hello Squidboy 35 11 +mach11: 0xfffffffff1512000 pml4 0xfffffffff1512070 +Hello Squidboy 36 12 +mach12: 0xfffffffff151f000 pml4 0xfffffffff151f070 +Hello Squidboy 37 13 +mach13: 0xfffffffff152c000 pml4 0xfffffffff152c070 +Hello Squidboy 38 14 +mach14: 0xfffffffff1539000 pml4 0xfffffffff1539070 +Hello Squidboy 39 15 +mach15: 0xfffffffff1546000 pml4 0xfffffffff1546070 +Hello Squidboy 48 16 +mach16: 0xfffffffff1553000 pml4 0xfffffffff1553070 +Hello Squidboy 49 17 +mach17: 0xfffffffff1560000 pml4 0xfffffffff1560070 +Hello Squidboy 50 18 +mach18: 0xfffffffff156d000 pml4 0xfffffffff156d070 +Hello Squidboy 51 19 +mach19: 0xfffffffff157a000 pml4 0xfffffffff157a070 +Hello Squidboy 52 20 +mach20: 0xfffffffff1587000 pml4 0xfffffffff1587070 +Hello Squidboy 53 21 +mach21: 0xfffffffff1594000 pml4 0xfffffffff1594070 +Hello Squidboy 54 22 +mach22: 0xfffffffff15a1000 pml4 0xfffffffff15a1070 +Hello Squidboy 55 23 +mach23: 0xfffffffff15ae000 pml4 0xfffffffff15ae070 +Hello Squidboy 64 24 +mach24: 0xfffffffff15bb000 pml4 0xfffffffff15bb070 +Hello Squidboy 65 25 +mach25: 0xfffffffff15c8000 pml4 0xfffffffff15c8070 +Hello Squidboy 66 26 +mach26: 0xfffffffff15d5000 pml4 0xfffffffff15d5070 +Hello Squidboy 67 27 +mach27: 0xfffffffff15e2000 pml4 0xfffffffff15e2070 +Hello Squidboy 68 28 +mach28: 0xfffffffff15ef000 pml4 0xfffffffff15ef070 +Hello Squidboy 69 29 +mach29: 0xfffffffff15fc000 pml4 0xfffffffff15fc070 +Hello Squidboy 70 30 +mach30: 0xfffffffff1609000 pml4 0xfffffffff1609070 +Hello Squidboy 71 31 +mach31: 0xfffffffff1616000 pml4 0xfffffffff1616070 +mach 2 is go 0xfffffffff149d000 0xfffffffff1498000 fffffffff1497ff8 +cpu0: registers for *init* 1 +mach2: online +mach 3 is go 0xfffffffff14aa000 0xfffffffff14a5000 fffffffff14a4ff8 +mach 1 is go 0xfffffffff1490000 0xfffffffff148b000 fffffffff148aff8 +ax 0x0000000000000082 +mach1: online +mach3: online +bx 0x0000000000000000 +mach 4 is go 0xfffffffff14b7000 0xfffffffff14b2000 fffffffff14b1ff8 +cx 0x0000000000000000 +mach 5 is go 0xfffffffff14c4000 0xfffffffff14bf000 fffffffff14beff8 +dx 0xfffffffff010a080 +mach 18 is go 0xfffffffff156d000 0xfffffffff1568000 fffffffff1567ff8 +di 0x0000000000000000 +mach5: online +si 0xfffffffff023c500 +mach4: online +bp 0xfffffffff023c500 +mach 6 is go 0xfffffffff14d1000 0xfffffffff14cc000 fffffffff14cbff8 +r8 0xfffffffff01bb73e +mach 16 is go 0xfffffffff1553000 0xfffffffff154e000 fffffffff154dff8 +r9 0xfffffffff15f02c0 +mach 9 is go 0xfffffffff14f8000 0xfffffffff14f3000 fffffffff14f2ff8 +r10 0x0000000000000010 +mach18: online +r11 0x0000000000000000 +mach 7 is go 0xfffffffff14de000 0xfffffffff14d9000 fffffffff14d8ff8 +r12 0xfffffffff020117c +mach 8 is go 0xfffffffff14eb000 0xfffffffff14e6000 fffffffff14e5ff8 +r13 0x0000000000000000 +mach6: online +r14 0x0000000000000000 +mach7: online +r15 0x0000000000000000 +mach16: online +ds 0x0000 es 0x0000 fs 0x0000 gs 0x0000 +mach9: online +type 0x27 +mach 11 is go 0xfffffffff1512000 0xfffffffff150d000 fffffffff150cff8 +error 0xfffffffff011055e +mach 17 is go 0xfffffffff1560000 0xfffffffff155b000 fffffffff155aff8 +pc 0xfffffffff0114d7f +mach11: online +cs 0x8 +mach8: online +flags 0x246 +mach 19 is go 0xfffffffff157a000 0xfffffffff1575000 fffffffff1574ff8 +sp 0xfffffffff1482708 +mach 26 is go 0xfffffffff15d5000 0xfffffffff15d0000 fffffffff15cfff8 +ss 0x0 +mach 10 is go 0xfffffffff1505000 0xfffffffff1500000 fffffffff14ffff8 +type 0x27 +mach19: online +m 0xfffffffff010b000 +up 0xfffffffff023c500 +mach17: online +cr0 0x000000008001003b +mach10: online +cr2 0x0000000000000000 +mach 24 is go 0xfffffffff15bb000 0xfffffffff15b6000 fffffffff15b5ff8 +cr3 0x0000000000106000 +mach 25 is go 0xfffffffff15c8000 0xfffffffff15c3000 fffffffff15c2ff8 +vno 39: buggeration @ 0xfffffffff0114d7f... +mach26: online +spurious interrupt 39 +mach25: online +ibrk addr 0x4098a8 seg 3 base 0x408000 size 1 +mach24: online +mach 27 is go 0xfffffffff15e2000 0xfffffffff15dd000 fffffffff15dcff8 +ibrk addr 0x4098a8 newtop 0x40a000 newsize 2 +mach27: online +mach 20 is go 0xfffffffff1587000 0xfffffffff1582000 fffffffff1581ff8 +mach 12 is go 0xfffffffff151f000 0xfffffffff151a000 fffffffff1519ff8 +mach 14 is go 0xfffffffff1539000 0xfffffffff1534000 fffffffff1533ff8 +mach 15 is go 0xfffffffff1546000 0xfffffffff1541000 fffffffff1540ff8 +mach12: online +mach 23 is go 0xfffffffff15ae000 0xfffffffff15a9000 fffffffff15a8ff8 +mach 13 is go 0xfffffffff152c000 0xfffffffff1527000 fffffffff1526ff8 +mach14: online +mach 21 is go 0xfffffffff1594000 0xfffffffff158f000 fffffffff158eff8 +mach15: online +mach13: online +mach20: online +mach21: online +mach 28 is go 0xfffffffff15ef000 0xfffffffff15ea000 fffffffff15e9ff8 +mach 22 is go 0xfffffffff15a1000 0xfffffffff159c000 fffffffff159bff8 +mach23: online +mach 29 is go 0xfffffffff15fc000 0xfffffffff15f7000 fffffffff15f6ff8 +mach28: online +mach 31 is go 0xfffffffff1616000 0xfffffffff1611000 fffffffff1610ff8 +mach22: online +mach 30 is go 0xfffffffff1609000 0xfffffffff1604000 fffffffff1603ff8 +mach31: online +mach29: online +mach30: online +ibrk addr 0x407790 seg 3 base 0x405000 size 2 +ibrk addr 0x407790 newtop 0x408000 newsize 3 +ibrk addr 0x417858 seg 3 base 0x405000 size 3 +ibrk addr 0x417858 newtop 0x418000 newsize 19 +ibrk addr 0x418880 seg 3 base 0x405000 size 19 +ibrk addr 0x418880 newtop 0x419000 newsize 20 +ibrk addr 0x4115a0 seg 3 base 0x40f000 size 2 +ibrk addr 0x4115a0 newtop 0x412000 newsize 3 +can't open /boot/nvram: '/env/nvroff' file does not exist +authid: >>> q +aquamar% cp /dev/text /sys/src/9k/words/boot.out diff -Nru /sys/src/9k/words/mm /sys/src/9k/words/mm --- /sys/src/9k/words/mm Thu Jan 1 00:00:00 1970 +++ /sys/src/9k/words/mm Wed Dec 9 00:00:00 2015 @@ -0,0 +1,58 @@ +Notes on mm. + +- initialization: + +asminit() + sys->pmstart set to roundup(end, pgsz) + sys->pmend = pmstart + [0, pmstart] allocated as type none + +multiboot() + asmmodinit() and + asmmapinit() + (prints later when called again by main) + asminsert() adjusts things (eg ignores low mem, overlaps...) + sys->pmend = largest addr seen +mmuinit() + archmmu() + m->pgszlg2[0], pgszmask[0], npgsz + cpuidinit() + same for [1] (2M) and [2] (1G) + sys->vmstart = KSEG0 + sys->vmunused = vmstart + pmstart rounded to 4KiB + i.e., pmstart in KSEG0 + sys->vmunmapped = KSEG0 + 4MiB + sys->vmend = vmstart + TMFM (64MiB kernel memory) + + NB: + mmuinit and walk calls asmmalloc for ptpgs + mmuput calls mallocalign and malloc + if later asmmeminit does not stop taking pages + for the kernel, asmmalloc won't find memory and + mmu will fail to allocate. + +meminit() + asmmeminit() TODO: rename asmmeminit to be meminit()? + + asmalloc from vmunmapped [rnd up to 4KiB] to vmend, type none + TODO: change how this is done; do it directly + map up to vmend with 2MiB in KSEG0 + In clu, vmstart:vmend == 0:64M (0x400000) + sys->vmunmapped is now vmend + + map all memory in asm at KSEG2, using larger pages if it can + In clu, from 64M (0x400000) to 1G (0x4000000) + sets asm->base, kbase, npage to reflect the pages there + iallocimit to ½ pages in the kernel memory (vmend - vmstart) + fills palloc pm according to base & npage + +mallocinit() + records tailnuints from sys->vmunused (end or pmstart in KSEG0) + to vmend (up to 64MiB) + nunits grows 128KiB at a time from tailnunits + +pageinit() + takes memory noted in palloc (by meminit) with npage > 0 and + splits banks according to acpi colors and + allocates a Page struct for each one. + But stops when 600MiB have been used, by now.