diff --git a/sys/src/cmd/ramcfs/bcache.c b/sys/src/cmd/ramcfs/bcache.c new file mode 100644 index 00000000..6f8935e2 --- /dev/null +++ b/sys/src/cmd/ramcfs/bcache.c @@ -0,0 +1,199 @@ +#include +#include +#include "cformat.h" +#include "lru.h" +#include "bcache.h" + +int +bcinit(Bcache *bc, char *mc, int bsize) +{ + Bbuf *b; + + /* + * allocate space for all buffers + * point all buffers into outer space + */ + bc->dfirst = 0; + bc->bsize = bsize; + bc->memcache = mc; + lruinit(bc); + for(b = bc->bb; b < &bc->bb[Nbcache]; b++){ + b->inuse = 0; + b->next = 0; + b->dirty = 0; + if(b->data == 0) + b->data = (char *)malloc(bc->bsize); + if(b->data == 0) + return -1; + lruadd(bc, b); + } + + return 0; +} + +/* + * Find a buffer for block b. If it's dirty, write it out. + */ +Bbuf * +bcfind(Bcache *bc, ulong bno) +{ + Bbuf *b; + + if(bno == Notabno) + error("bcfind: Notabno"); + bno &= ~Indbno; + + /* + * if we already have a buffer for this bno, use it + */ + for(b = bc->bb; b < &bc->bb[Nbcache]; b++) + if(b->inuse && b->bno==bno) + goto out; + + /* + * get least recently used block + */ + b = (Bbuf*)bc->lnext; +out: + /* + * if dirty, write it out + */ + if(b->dirty) + if(bcwrite(bc, b) < 0) + warning("writing dirty page"); + lruref(bc, b); + return b; +} + +/* + * allocate a buffer block for a block. it's guaranteed to be there till + * the next Nbcache bcread's. + */ +Bbuf * +bcalloc(Bcache *bc, ulong bno) +{ + Bbuf *b; + + b = bcfind(bc, bno); + bno &= ~Indbno; + b->bno = bno; + b->inuse = 1; + return b; +} + +/* + * read a block into a buffer cache. it's guaranteed to be there till + * the next Nbcache bcread's. + */ +Bbuf * +bcread(Bcache *bc, ulong bno) +{ + Bbuf *b; + + b = bcfind(bc, bno); + bno &= ~Indbno; + if(b->bno!=bno || !b->inuse) + /* + * read in the one we really want + */ + if(bread(bc, bno, b->data) < 0){ + b->inuse = 0; + return 0; + } + b->bno = bno; + b->inuse = 1; + return b; +} + +/* + * mark a page dirty, if it's already dirty force a write + * + * N.B: ordering is important. + */ +void +bcmark(Bcache *bc, Bbuf *b) +{ + lruref(bc, b); + + if(b->dirty){ + bcwrite(bc, b); + return; + } + + b->dirty = 1; + if(bc->dfirst) + bc->dlast->next = b; + else + bc->dfirst = b; + bc->dlast = b; +} + +/* + * write out a page (and all preceding dirty ones) + */ +int +bcwrite(Bcache *bc, Bbuf *b) +{ + Bbuf *nb; + + /* + * write out all preceding pages + */ + while(nb = bc->dfirst){ + if(bwrite(bc, nb->bno, nb->data) < 0) + return -1; + nb->dirty = 0; + bc->dfirst = nb->next; + nb->next = 0; + if(nb == b) + return 0; + } + + /* + * write out this page + */ + if(bwrite(bc, b->bno, b->data) < 0) + return -1; + b->dirty = 0; + b->next = 0; + return 0; +} + +/* + * write out all dirty pages (in order) + */ +int +bcsync(Bcache *bc) +{ + if(bc->dfirst) + return bcwrite(bc, bc->dlast); + return 0; +} + +/* + * read a block from memory cache + */ +int +bread(Bcache *bc, ulong bno, void *buf) +{ + uvlong x = (uvlong)bno * bc->bsize; + + if (x > cachesize - bc->bsize) + return -1; + memmove(buf, bc->memcache + x, bc->bsize); + return 0; +} + +/* + * write a block to memory cache + */ +int +bwrite(Bcache *bc, ulong bno, void *buf) +{ + uvlong x = (uvlong)bno * bc->bsize; + + if (x > cachesize - bc->bsize) + return -1; + memmove(bc->memcache + x, buf, bc->bsize); + return 0; +} diff --git a/sys/src/cmd/ramcfs/bcache.h b/sys/src/cmd/ramcfs/bcache.h new file mode 100644 index 00000000..6e190e31 --- /dev/null +++ b/sys/src/cmd/ramcfs/bcache.h @@ -0,0 +1,45 @@ +typedef struct Bbuf Bbuf; +typedef struct Bcache Bcache; + +enum +{ + Nbcache= 32, /* number of blocks kept in pool */ +}; + +/* + * block cache descriptor + */ +struct Bbuf +{ + Lru; /* must be first in struct */ + ulong bno; + int inuse; + Bbuf *next; /* next in dirty list */ + int dirty; + char *data; +}; + +/* + * the buffer cache + */ +struct Bcache +{ + Lru; + int bsize; /* block size in bytes */ + char *memcache; /* memory cache base */ + Bbuf *dfirst; /* dirty list */ + Bbuf *dlast; + Bbuf bb[Nbcache]; +}; + +int bcinit(Bcache*, char*, int); +Bbuf* bcalloc(Bcache*, ulong); +Bbuf* bcread(Bcache*, ulong); +void bcmark(Bcache*, Bbuf*); +int bcwrite(Bcache*, Bbuf*); +int bcsync(Bcache*); +int bread(Bcache*, ulong, void*); +int bwrite(Bcache*, ulong, void*); +int bref(Bcache*, Bbuf*); +void error(char*, ...); +void warning(char*); diff --git a/sys/src/cmd/ramcfs/cformat.h b/sys/src/cmd/ramcfs/cformat.h new file mode 100644 index 00000000..b69e3ee8 --- /dev/null +++ b/sys/src/cmd/ramcfs/cformat.h @@ -0,0 +1,75 @@ +/* + * format of cache on disk + */ +typedef struct Dptr Dptr; +typedef struct Dahdr Dahdr; +typedef struct Dalloc Dalloc; +typedef struct Fphdr Fphdr; +typedef struct Fptr Fptr; +typedef struct Inode Inode; +typedef struct Dihdr Dihdr; +typedef struct Dinode Dinode; + +enum +{ + Amagic= 0xbebeefed, /* allocation block magic */ + Imagic= 0xbadc00ce, /* inode block magic */ + BtoUL= 8*sizeof(ulong),/* bits in a ulong */ + CACHENAMELEN= 128 +}; +#define Indbno 0x80000000 /* indirect block */ +#define Notabno 0xFFFFFFFF /* not a block number */ + +/* + * Allocation blocks at the begining of the disk. There are + * enough of these blocks to supply 1 bit for each block on the + * disk; + */ +struct Dahdr +{ + ulong magic; + ulong bsize; /* logical block size */ + char name[CACHENAMELEN]; + short nab; /* number of allocation blocks */ +}; +struct Dalloc +{ + Dahdr; + ulong bits[1]; +}; + +/* + * A pointer to disk data + */ +struct Dptr +{ + ulong fbno; /* file block number */ + ulong bno; /* disk block number */ + ushort start; /* offset into block of valid data */ + ushort end; /* offset into block after valid data */ +}; + +/* + * A file descriptor. + */ +struct Inode +{ + Qid qid; + vlong length; + Dptr ptr; /* pointer page */ + char inuse; +}; + +/* + * inode blocks (after allocation blocks) + */ +struct Dihdr +{ + ulong magic; + ulong nino; /* number of inodes */ +}; +struct Dinode +{ + Dihdr; + Inode inode[1]; +}; diff --git a/sys/src/cmd/ramcfs/cfs.c b/sys/src/cmd/ramcfs/cfs.c new file mode 100644 index 00000000..96f07d05 --- /dev/null +++ b/sys/src/cmd/ramcfs/cfs.c @@ -0,0 +1,914 @@ +#include +#include +#include +#include + +#include "cformat.h" +#include "lru.h" +#include "bcache.h" +#include "disk.h" +#include "inode.h" +#include "file.h" +#include "stats.h" + +enum +{ + Nfid= 10240, +}; + +/* maximum length of a file */ +enum { MAXLEN = ~0ULL >> 1 }; + +typedef struct Mfile Mfile; +typedef struct Ram Ram; +typedef struct P9fs P9fs; + +struct Mfile +{ + Qid qid; + char busy; +}; + +Mfile mfile[Nfid]; +Icache ic; +int debug, statson, noauth, openserver; +int readonly; /* flag: tree being cached is expected to not change */ + +struct P9fs +{ + int fd[2]; + Fcall rhdr; + Fcall thdr; + long len; + char *name; +}; + +P9fs c; /* client conversation */ +P9fs s; /* server conversation */ + +struct Cfsstat cfsstat, cfsprev; +char statbuf[2048]; +int statlen; + +#define MAXFDATA 8192 /* i/o size for read/write */ + +int messagesize = MAXFDATA+IOHDRSZ; + +uchar datasnd[MAXFDATA + IOHDRSZ]; +uchar datarcv[MAXFDATA + IOHDRSZ]; + +Qid rootqid; +Qid ctlqid = {0x5555555555555555LL, 0, 0}; + +ulong cachesize = 512 * 1024 * 1024; + +void rversion(void); +void rauth(Mfile*); +void rflush(void); +void rattach(Mfile*); +void rwalk(Mfile*); +void ropen(Mfile*); +void rcreate(Mfile*); +void rread(Mfile*); +void rwrite(Mfile*); +void rclunk(Mfile*); +void rremove(Mfile*); +void rstat(Mfile*); +void rwstat(Mfile*); +void error(char*, ...); +void warning(char*); +void mountinit(char*, char*); +void io(void); +void sendreply(char*); +void sendmsg(P9fs*, Fcall*); +void rcvmsg(P9fs*, Fcall*); +int delegate(void); +int askserver(void); +void cachesetup(int, char*, char*); +int ctltest(Mfile*); +void genstats(void); + +char *mname[]={ + [Tversion] "Tversion", + [Tauth] "Tauth", + [Tflush] "Tflush", + [Tattach] "Tattach", + [Twalk] "Twalk", + [Topen] "Topen", + [Tcreate] "Tcreate", + [Tclunk] "Tclunk", + [Tread] "Tread", + [Twrite] "Twrite", + [Tremove] "Tremove", + [Tstat] "Tstat", + [Twstat] "Twstat", + [Rversion] "Rversion", + [Rauth] "Rauth", + [Rerror] "Rerror", + [Rflush] "Rflush", + [Rattach] "Rattach", + [Rwalk] "Rwalk", + [Ropen] "Ropen", + [Rcreate] "Rcreate", + [Rclunk] "Rclunk", + [Rread] "Rread", + [Rwrite] "Rwrite", + [Rremove] "Rremove", + [Rstat] "Rstat", + [Rwstat] "Rwstat", + 0, +}; + +void +usage(void) +{ + fprint(2, "usage:\t%s -s [-dnrS] [-m size]\n", argv0); + fprint(2, "\t%s [-a netaddr | -F srv] [-dnrS] [-m size] [mntpt]\n", + argv0); + exits("usage"); +} + +void +main(int argc, char *argv[]) +{ + int std; + char *server, *mtpt; + + std = 0; + server = "tcp!pie"; + mtpt = "/n/ramcfs"; + + ARGBEGIN{ + case 'a': + server = EARGF(usage()); + break; + case 'd': + debug = 1; + break; + case 'F': + server = EARGF(usage()); + openserver = 1; + break; + case 'm': + cachesize = atoi(EARGF(usage())) * 1024 * 1024; + if (cachesize < 8 * 1024 * 1024 || + cachesize > 3750UL * 1024 * 1024) + sysfatal("implausible cache size %lud", cachesize); + break; + case 'n': + noauth = 1; + break; + case 'r': + readonly = 1; + break; + case 'S': + statson = 1; + break; + case 's': + std = 1; + break; + default: + usage(); + }ARGEND + if(argc && *argv) + mtpt = *argv; + + if(debug) + fmtinstall('F', fcallfmt); + + c.name = "client"; + s.name = "server"; + if(std){ + c.fd[0] = c.fd[1] = 1; + s.fd[0] = s.fd[1] = 0; + }else + mountinit(server, mtpt); + + cachesetup(1, nil, nil); + + switch(fork()){ + case 0: + io(); + exits(""); + case -1: + error("fork"); + default: + exits(""); + } +} + +void +cachesetup(int format, char *name, char *) +{ + int secsize; + int inodes; + int blocksize; + char *memcache; + + secsize = 512; /* only really matters for disks */ + blocksize = 4*1024; + inodes = 2*1024; + + memcache = malloc(cachesize); + if(memcache == nil) + error("can't allocate memory for cache: %r"); + + /* + * Always format. If we don't have a name, fall + * back to our old behavior of using "bootes" + */ + USED(format); + name = (name == nil? "bootes": name); + if(iformat(&ic, memcache, inodes, name, blocksize, secsize) < 0) + error("formatting failed"); +} + +void +mountinit(char *server, char *mountpoint) +{ + int err; + int p[2]; + + /* + * grab a channel and call up the file server + */ + if (openserver) { + s.fd[0] = open(server, ORDWR); + if(s.fd[0] < 0) + error("opening srv file %s: %r", server); + } else { + s.fd[0] = dial(netmkaddr(server, 0, "9fs"), 0, 0, 0); + if(s.fd[0] < 0) + error("dialing %s: %r", server); + } + s.fd[1] = s.fd[0]; + + /* + * mount onto name space + */ + if(pipe(p) < 0) + error("pipe failed"); + switch(fork()){ + case 0: + break; + default: + if (noauth) + err = mount(p[1], -1, mountpoint, MREPL|MCREATE|MCACHE, ""); + else + err = amount(p[1], mountpoint, MREPL|MCREATE|MCACHE, ""); + if (err < 0) + error("mount failed: %r"); + exits(0); + case -1: + error("fork failed\n"); +/*BUG: no wait!*/ + } + c.fd[0] = c.fd[1] = p[0]; +} + +void +io(void) +{ + int type; + Mfile *mf; + loop: + rcvmsg(&c, &c.thdr); + + type = c.thdr.type; + + if(statson){ + cfsstat.cm[type].n++; + cfsstat.cm[type].s = nsec(); + } + mf = &mfile[c.thdr.fid]; + switch(type){ + default: + error("type"); + break; + case Tversion: + rversion(); + break; + case Tauth: + mf = &mfile[c.thdr.afid]; + rauth(mf); + break; + case Tflush: + rflush(); + break; + case Tattach: + rattach(mf); + break; + case Twalk: + rwalk(mf); + break; + case Topen: + ropen(mf); + break; + case Tcreate: + rcreate(mf); + break; + case Tread: + rread(mf); + break; + case Twrite: + rwrite(mf); + break; + case Tclunk: + rclunk(mf); + break; + case Tremove: + rremove(mf); + break; + case Tstat: + rstat(mf); + break; + case Twstat: + rwstat(mf); + break; + } + if(statson){ + cfsstat.cm[type].t += nsec() -cfsstat.cm[type].s; + } + goto loop; +} + +void +rversion(void) +{ + if(messagesize > c.thdr.msize) + messagesize = c.thdr.msize; + c.thdr.msize = messagesize; /* set downstream size */ + delegate(); +} + +void +rauth(Mfile *mf) +{ + if(mf->busy) + error("auth to used channel"); + + if(delegate() == 0){ + mf->qid = s.rhdr.aqid; + mf->busy = 1; + } +} + +void +rflush(void) /* synchronous so easy */ +{ + sendreply(0); +} + +void +rattach(Mfile *mf) +{ + if(delegate() == 0){ + mf->qid = s.rhdr.qid; + mf->busy = 1; + if (statson == 1){ + statson++; + rootqid = mf->qid; + } + } +} + +void +rwalk(Mfile *mf) +{ + Mfile *nmf; + + nmf = nil; + if(statson + && mf->qid.type == rootqid.type && mf->qid.path == rootqid.path + && c.thdr.nwname == 1 && strcmp(c.thdr.wname[0], "cfsctl") == 0){ + /* This is the ctl file */ + nmf = &mfile[c.thdr.newfid]; + if(c.thdr.newfid != c.thdr.fid && nmf->busy) + error("clone to used channel"); + nmf = &mfile[c.thdr.newfid]; + nmf->qid = ctlqid; + nmf->busy = 1; + c.rhdr.nwqid = 1; + c.rhdr.wqid[0] = ctlqid; + sendreply(0); + return; + } + if(c.thdr.newfid != c.thdr.fid){ + if(c.thdr.newfid >= Nfid) + error("clone nfid out of range"); + nmf = &mfile[c.thdr.newfid]; + if(nmf->busy) + error("clone to used channel"); + nmf = &mfile[c.thdr.newfid]; + nmf->qid = mf->qid; + nmf->busy = 1; + mf = nmf; /* Walk mf */ + } + + if(delegate() < 0){ /* complete failure */ + if(nmf) + nmf->busy = 0; + return; + } + + if(s.rhdr.nwqid == c.thdr.nwname){ /* complete success */ + if(s.rhdr.nwqid > 0) + mf->qid = s.rhdr.wqid[s.rhdr.nwqid-1]; + return; + } + + /* partial success; release fid */ + if(nmf) + nmf->busy = 0; +} + +void +ropen(Mfile *mf) +{ + if(statson && ctltest(mf)){ + /* Opening ctl file */ + if(c.thdr.mode != OREAD){ + sendreply("does not exist"); + return; + } + c.rhdr.qid = ctlqid; + c.rhdr.iounit = 0; + sendreply(0); + genstats(); + return; + } + if(delegate() == 0){ + mf->qid = s.rhdr.qid; + if(c.thdr.mode & OTRUNC) + iget(&ic, mf->qid); + } +} + +void +rcreate(Mfile *mf) +{ + if(statson && ctltest(mf)){ + sendreply("exists"); + return; + } + if(delegate() == 0){ + mf->qid = s.rhdr.qid; + mf->qid.vers++; + } +} + +void +rclunk(Mfile *mf) +{ + if(!mf->busy){ + sendreply(0); + return; + } + mf->busy = 0; + delegate(); +} + +void +rremove(Mfile *mf) +{ + if(statson && ctltest(mf)){ + sendreply("not removed"); + return; + } + mf->busy = 0; + delegate(); +} + +void +rread(Mfile *mf) +{ + int cnt, done; + long n; + vlong off, first; + char *cp; + char data[MAXFDATA]; + Ibuf *b; + + off = c.thdr.offset; + first = off; + cnt = c.thdr.count; + + if(statson && ctltest(mf)){ + if(cnt > statlen-off) + c.rhdr.count = statlen-off; + else + c.rhdr.count = cnt; + if((int)c.rhdr.count < 0){ + sendreply("eof"); + return; + } + c.rhdr.data = statbuf + off; + sendreply(0); + return; + } + if(mf->qid.type & (QTDIR|QTAUTH)){ + delegate(); + if (statson) { + cfsstat.ndirread++; + if(c.rhdr.count > 0){ + cfsstat.bytesread += c.rhdr.count; + cfsstat.bytesfromdirs += c.rhdr.count; + } + } + return; + } + + b = iget(&ic, mf->qid); + if(b == 0){ + DPRINT(2, "delegating read\n"); + delegate(); + if (statson){ + cfsstat.ndelegateread++; + if(c.rhdr.count > 0){ + cfsstat.bytesread += c.rhdr.count; + cfsstat.bytesfromserver += c.rhdr.count; + } + } + return; + } + + cp = data; + done = 0; + while(cnt>0 && !done){ + if(off >= b->inode.length){ + DPRINT(2, "offset %lld greater than length %lld\n", + off, b->inode.length); + break; + } + n = fread(&ic, b, cp, off, cnt); + if(n <= 0){ + n = -n; + if(n==0 || n>cnt) + n = cnt; + DPRINT(2, + "fetch %ld bytes of data from server at offset %lld\n", + n, off); + s.thdr.type = c.thdr.type; + s.thdr.fid = c.thdr.fid; + s.thdr.tag = c.thdr.tag; + s.thdr.offset = off; + s.thdr.count = n; + if(statson) + cfsstat.ndelegateread++; + if(askserver() < 0){ + sendreply(s.rhdr.ename); + return; + } + if(s.rhdr.count != n) + done = 1; + n = s.rhdr.count; + if(n == 0){ + /* end of file */ + if(b->inode.length > off){ + DPRINT(2, "file %llud.%ld, length %lld\n", + b->inode.qid.path, + b->inode.qid.vers, off); + b->inode.length = off; + } + break; + } + memmove(cp, s.rhdr.data, n); + fwrite(&ic, b, cp, off, n); + if (statson){ + cfsstat.bytestocache += n; + cfsstat.bytesfromserver += n; + } + }else{ + DPRINT(2, "fetched %ld bytes from cache\n", n); + if(statson) + cfsstat.bytesfromcache += n; + } + cnt -= n; + off += n; + cp += n; + } + c.rhdr.data = data; + c.rhdr.count = off - first; + if(statson) + cfsstat.bytesread += c.rhdr.count; + sendreply(0); +} + +void +rwrite(Mfile *mf) +{ + Ibuf *b; + char buf[MAXFDATA]; + + if(statson && ctltest(mf)){ + sendreply("read only"); + return; + } + if(mf->qid.type & (QTDIR|QTAUTH)){ + delegate(); + if(statson && c.rhdr.count > 0) + cfsstat.byteswritten += c.rhdr.count; + return; + } + + memmove(buf, c.thdr.data, c.thdr.count); + if(delegate() < 0) + return; + + if(s.rhdr.count > 0) + cfsstat.byteswritten += s.rhdr.count; + /* don't modify our cache for append-only data; always read from server*/ + if(mf->qid.type & QTAPPEND) + return; + b = iget(&ic, mf->qid); + if(b == 0) + return; + if (b->inode.length < c.thdr.offset + s.rhdr.count) + b->inode.length = c.thdr.offset + s.rhdr.count; + mf->qid.vers++; + if (s.rhdr.count != c.thdr.count) + syslog(0, "cfslog", "rhdr.count %ud, thdr.count %ud\n", + s.rhdr.count, c.thdr.count); + if(fwrite(&ic, b, buf, c.thdr.offset, s.rhdr.count) == s.rhdr.count){ + iinc(&ic, b); + if(statson) + cfsstat.bytestocache += s.rhdr.count; + } +} + +void +rstat(Mfile *mf) +{ + Dir d; + + if(statson && ctltest(mf)){ + genstats(); + d.qid = ctlqid; + d.mode = 0444; + d.length = statlen; /* would be nice to do better */ + d.name = "cfsctl"; + d.uid = "none"; + d.gid = "none"; + d.muid = "none"; + d.atime = time(nil); + d.mtime = d.atime; + c.rhdr.nstat = convD2M(&d, c.rhdr.stat, + sizeof c.rhdr - (c.rhdr.stat - (uchar*)&c.rhdr)); + sendreply(0); + return; + } + if(delegate() == 0){ + Ibuf *b; + + convM2D(s.rhdr.stat, s.rhdr.nstat , &d, nil); + mf->qid = d.qid; + b = iget(&ic, mf->qid); + if(b) + b->inode.length = d.length; + } +} + +void +rwstat(Mfile *mf) +{ + Ibuf *b; + + if(statson && ctltest(mf)){ + sendreply("read only"); + return; + } + delegate(); + if(b = iget(&ic, mf->qid)) + b->inode.length = MAXLEN; +} + +void +error(char *fmt, ...) +{ + va_list arg; + static char buf[2048]; + + va_start(arg, fmt); + vseprint(buf, buf+sizeof(buf), fmt, arg); + va_end(arg); + fprint(2, "%s: %s\n", argv0, buf); + exits("error"); +} + +void +warning(char *s) +{ + fprint(2, "%s: %s: %r\n", argv0, s); +} + +/* + * send a reply to the client + */ +void +sendreply(char *err) +{ + + if(err){ + c.rhdr.type = Rerror; + c.rhdr.ename = err; + }else{ + c.rhdr.type = c.thdr.type+1; + c.rhdr.fid = c.thdr.fid; + } + c.rhdr.tag = c.thdr.tag; + sendmsg(&c, &c.rhdr); +} + +/* + * send a request to the server, get the reply, and send that to + * the client + */ +int +delegate(void) +{ + int type; + + type = c.thdr.type; + if(statson){ + cfsstat.sm[type].n++; + cfsstat.sm[type].s = nsec(); + } + + sendmsg(&s, &c.thdr); + rcvmsg(&s, &s.rhdr); + + if(statson) + cfsstat.sm[type].t += nsec() - cfsstat.sm[type].s; + + sendmsg(&c, &s.rhdr); + return c.thdr.type+1 == s.rhdr.type ? 0 : -1; +} + +/* + * send a request to the server and get a reply + */ +int +askserver(void) +{ + int type; + + s.thdr.tag = c.thdr.tag; + + type = s.thdr.type; + if(statson){ + cfsstat.sm[type].n++; + cfsstat.sm[type].s = nsec(); + } + + sendmsg(&s, &s.thdr); + rcvmsg(&s, &s.rhdr); + + if(statson) + cfsstat.sm[type].t += nsec() - cfsstat.sm[type].s; + + return s.thdr.type+1 == s.rhdr.type ? 0 : -1; +} + +/* + * send/receive messages with logging + */ +void +sendmsg(P9fs *p, Fcall *f) +{ + DPRINT(2, "->%s: %F\n", p->name, f); + + p->len = convS2M(f, datasnd, messagesize); + if(p->len <= 0) + error("convS2M"); + if(write(p->fd[1], datasnd, p->len)!=p->len) + error("sendmsg"); +} + +void +dump(uchar *p, int len) +{ + fprint(2, "%d bytes", len); + while(len-- > 0) + fprint(2, " %.2ux", *p++); + fprint(2, "\n"); +} + +void +rcvmsg(P9fs *p, Fcall *f) +{ + int olen, rlen; + char buf[128]; + + olen = p->len; + p->len = read9pmsg(p->fd[0], datarcv, sizeof(datarcv)); + if(p->len <= 0){ + snprint(buf, sizeof buf, "read9pmsg(%d)->%ld: %r", + p->fd[0], p->len); + error(buf); + } + + if((rlen = convM2S(datarcv, p->len, f)) != p->len) + error("rcvmsg format error, expected length %d, got %d", + rlen, p->len); + if(f->fid >= Nfid){ + fprint(2, "<-%s: %d %s on %d\n", p->name, f->type, + mname[f->type]? mname[f->type]: "mystery", f->fid); + dump((uchar*)datasnd, olen); + dump((uchar*)datarcv, p->len); + error("rcvmsg fid out of range"); + } + DPRINT(2, "<-%s: %F\n", p->name, f); +} + +int +ctltest(Mfile *mf) +{ + return mf->busy && mf->qid.type == ctlqid.type && + mf->qid.path == ctlqid.path; +} + +void +genstats(void) +{ + int i; + char *p; + + p = statbuf; + + p += snprint(p, sizeof statbuf+statbuf-p, + " Client Server\n"); + p += snprint(p, sizeof statbuf+statbuf-p, + " #calls Δ ms/call Δ #calls Δ ms/call Δ\n"); + for (i = 0; i < nelem(cfsstat.cm); i++) + if(cfsstat.cm[i].n || cfsstat.sm[i].n) { + p += snprint(p, sizeof statbuf+statbuf-p, + "%7lud %7lud ", cfsstat.cm[i].n, + cfsstat.cm[i].n - cfsprev.cm[i].n); + if (cfsstat.cm[i].n) + p += snprint(p, sizeof statbuf+statbuf-p, + "%7.3f ", 0.000001*cfsstat.cm[i].t/ + cfsstat.cm[i].n); + else + p += snprint(p, sizeof statbuf+statbuf-p, + " "); + if(cfsstat.cm[i].n - cfsprev.cm[i].n) + p += snprint(p, sizeof statbuf+statbuf-p, + "%7.3f ", 0.000001* + (cfsstat.cm[i].t - cfsprev.cm[i].t)/ + (cfsstat.cm[i].n - cfsprev.cm[i].n)); + else + p += snprint(p, sizeof statbuf+statbuf-p, + " "); + p += snprint(p, sizeof statbuf+statbuf-p, + "%7lud %7lud ", cfsstat.sm[i].n, + cfsstat.sm[i].n - cfsprev.sm[i].n); + if (cfsstat.sm[i].n) + p += snprint(p, sizeof statbuf+statbuf-p, + "%7.3f ", 0.000001*cfsstat.sm[i].t/ + cfsstat.sm[i].n); + else + p += snprint(p, sizeof statbuf+statbuf-p, + " "); + if(cfsstat.sm[i].n - cfsprev.sm[i].n) + p += snprint(p, sizeof statbuf+statbuf-p, + "%7.3f ", 0.000001* + (cfsstat.sm[i].t - cfsprev.sm[i].t)/ + (cfsstat.sm[i].n - cfsprev.sm[i].n)); + else + p += snprint(p, sizeof statbuf+statbuf-p, + " "); + p += snprint(p, sizeof statbuf+statbuf-p, "%s\n", + mname[i]); + } + p += snprint(p, sizeof statbuf+statbuf-p, "%7lud %7lud ndirread\n", + cfsstat.ndirread, cfsstat.ndirread - cfsprev.ndirread); + p += snprint(p, sizeof statbuf+statbuf-p, "%7lud %7lud ndelegateread\n", + cfsstat.ndelegateread, cfsstat.ndelegateread - + cfsprev.ndelegateread); + p += snprint(p, sizeof statbuf+statbuf-p, "%7lud %7lud ninsert\n", + cfsstat.ninsert, cfsstat.ninsert - cfsprev.ninsert); + p += snprint(p, sizeof statbuf+statbuf-p, "%7lud %7lud ndelete\n", + cfsstat.ndelete, cfsstat.ndelete - cfsprev.ndelete); + p += snprint(p, sizeof statbuf+statbuf-p, "%7lud %7lud nupdate\n", + cfsstat.nupdate, cfsstat.nupdate - cfsprev.nupdate); + + p += snprint(p, sizeof statbuf+statbuf-p, "%7llud %7llud bytesread\n", + cfsstat.bytesread, cfsstat.bytesread - cfsprev.bytesread); + p += snprint(p, sizeof statbuf+statbuf-p, "%7llud %7llud byteswritten\n", + cfsstat.byteswritten, cfsstat.byteswritten - + cfsprev.byteswritten); + p += snprint(p, sizeof statbuf+statbuf-p, "%7llud %7llud bytesfromserver\n", + cfsstat.bytesfromserver, cfsstat.bytesfromserver - + cfsprev.bytesfromserver); + p += snprint(p, sizeof statbuf+statbuf-p, "%7llud %7llud bytesfromdirs\n", + cfsstat.bytesfromdirs, cfsstat.bytesfromdirs - + cfsprev.bytesfromdirs); + p += snprint(p, sizeof statbuf+statbuf-p, "%7llud %7llud bytesfromcache\n", + cfsstat.bytesfromcache, cfsstat.bytesfromcache - + cfsprev.bytesfromcache); + p += snprint(p, sizeof statbuf+statbuf-p, "%7llud %7llud bytestocache\n", + cfsstat.bytestocache, cfsstat.bytestocache - + cfsprev.bytestocache); + statlen = p - statbuf; + cfsprev = cfsstat; +} diff --git a/sys/src/cmd/ramcfs/disk.c b/sys/src/cmd/ramcfs/disk.c new file mode 100644 index 00000000..320ad708 --- /dev/null +++ b/sys/src/cmd/ramcfs/disk.c @@ -0,0 +1,344 @@ +#include +#include +#include "cformat.h" +#include "lru.h" +#include "bcache.h" +#include "disk.h" + +int icformat(Disk*, ulong); + +/* + * read in the disk structures, return -1 if the format + * is inconsistent. + */ +int +dinit(Disk *d, char *mc, int psize, char *expname) +{ + ulong i; + uvlong length; + char buf[1024]; + Bbuf *b; + Dalloc *ba; + + /* + * get disk size + */ + length = cachesize; + + /* + * read first physical block to get logical block size, # of inodes, + * and # of allocation blocks + */ + memmove(buf, mc, sizeof buf); + + ba = (Dalloc*)buf; + if(ba->bsize <= 0){ + fprint(2, "dinit: bsize 0x%lux<= 0\n", ba->bsize); + return -1; + } + if((ba->bsize % psize) != 0){ + fprint(2, "dinit: logical bsize (%lud) not multiple of physical (%ud)\n", + ba->bsize, psize); + return -1; + } + d->bsize = ba->bsize; + d->nb = length/d->bsize; + d->b2b = (d->bsize - sizeof(Dahdr))*8; + d->nab = (d->nb+d->b2b-1)/d->b2b; + d->p2b = d->bsize/sizeof(Dptr); + strncpy(d->name, ba->name, sizeof d->name); + + if (expname != nil && strncmp(d->name, expname, sizeof d->name) != 0) { + /* Mismatch with recorded name; fail here to force a format */ + fprint(2, "%s: name mismatch\n", argv0); + return -1; + } + + /* + * check allocation blocks for consistency + */ + if(bcinit(d, mc, d->bsize) < 0){ + fprint(2, "%s: dinit: couldn't init block cache\n", argv0); + return -1; + } + for(i = 0; i < d->nab; i++){ + b = bcread(d, i); + if(b == 0){ + perror("dinit: read"); + return -1; + } + ba = (Dalloc*)b->data; + if(ba->magic != Amagic){ + fprint(2, "dinit: bad magic in alloc block %uld\n", i); + return -1; + } + if(d->bsize != ba->bsize){ + fprint(2, "dinit: bad bsize in alloc block %uld\n", i); + return -1; + } + if(d->nab != ba->nab){ + fprint(2, "dinit: bad nab in alloc block %uld\n", i); + return -1; + } + if(strncmp(d->name, ba->name, sizeof(d->name))){ + fprint(2, "dinit: bad name in alloc block %uld\n", i); + return -1; + } + } + return 0; +} + +/* + * format the allocated memory as a cache + */ +int +dformat(Disk *d, char *mc, char *name, ulong bsize, ulong psize) +{ + int i; + uvlong length; + Bbuf *b; + Dalloc *ba; + Dptr dptr; + + fprint(2, "formatting memory\n"); + + /* + * calculate basic numbers + */ + length = cachesize; + d->bsize = bsize; + if((d->bsize % psize) != 0){ + fprint(2, "%s: logical bsize not multiple of physical\n", argv0); + return -1; + } + d->nb = length/d->bsize; + d->b2b = (d->bsize - sizeof(Dahdr))*8; + d->nab = (d->nb+d->b2b-1)/d->b2b; + d->p2b = d->bsize/sizeof(Dptr); + + /* + * init allocation blocks + */ + if(bcinit(d, mc, d->bsize) < 0) + return -1; + for(i = 0; i < d->nab; i++){ + b = bcalloc(d, i); + if(b == 0){ + perror("cfs: bcalloc"); + return -1; + } + memset(b->data, 0, d->bsize); + ba = (Dalloc*)b->data; + ba->magic = Amagic; + ba->bsize = d->bsize; + ba->nab = d->nab; + strncpy(ba->name, name, sizeof(ba->name)); + bcmark(d, b); + } + + /* + * allocate allocation blocks + */ + for(i = 0; i < d->nab; i++) + if(dalloc(d, &dptr) == Notabno){ + fprint(2, "can't allocate allocation blocks\n"); + return -1; + } + + return bcsync(d); +} + +/* + * allocate a block from a bit vector page + * + * a return value of Notabno means no blocks left + */ +static ulong +_balloc(Dalloc *ba, ulong max) +{ + int len; /* number of valid words */ + ulong i; /* bit position in long */ + ulong m; /* 1<bits, e = p + len; p < e; p++) + if(*p != 0xFFFFFFFF) + break; + if(p == e) + return Notabno; + + /* + * find the first 0 bit + */ + v = *p; + for(m = 1, i = 0; i < BtoUL; i++, m <<= 1) + if((m|v) != v) + break; + + /* + * calculate block number + */ + i += (p - ba->bits)*BtoUL; + if(i >= max) + return Notabno; + + /* + * set bit to 1 + */ + *p = v | m; + return i; +} + +/* + * allocate a block + * + * return Notabno if none left + */ +ulong +dalloc(Disk *d, Dptr *p) +{ + ulong bno, max, rv; + Bbuf *b; + Dalloc *ba; + + max = d->nb; + for(bno = 0; bno < d->nab; bno++){ + b = bcread(d, bno); + ba = (Dalloc*)b->data; + rv = _balloc(ba, max > d->b2b ? d->b2b : max); + if(rv != Notabno){ + rv = bno*d->b2b + rv; + if(p){ + p->start = p->end = 0; + p->bno = rv; + } + bcmark(d, b); + return rv; + } + max -= d->b2b; + } + if(p) + p->bno = Notabno; + return Notabno; +} + +/* + * allocate a block of pointers + */ +ulong +dpalloc(Disk *d, Dptr *p) +{ + Bbuf *b; + Dptr *sp, *ep; + + if(dalloc(d, p) == Notabno) + return Notabno; + + /* + * allocate the page and invalidate all the + * pointers + */ + b = bcalloc(d, p->bno); + if(b == 0) + return -1; + sp = (Dptr*)b->data; + for(ep = sp + d->p2b; sp < ep; sp++){ + sp->bno = Notabno; + sp->start = sp->end = 0; + } + p->bno |= Indbno; + p->start = 0; + p->end = d->bsize; + + /* + * mark the page as dirty + */ + bcmark(d, b); + return 0; +} + +/* + * free a block + */ +int +_bfree(Disk *d, ulong i) +{ + ulong bno, m; + ulong *p; + Bbuf *b; + Dalloc *ba; + + /* + * get correct allocation block + */ + bno = i/d->b2b; + if(bno >= d->nab) + return -1; + b = bcread(d, bno); + if(b == 0) + return -1; + ba = (Dalloc*)b->data; + + /* + * change bit + */ + i -= bno*d->b2b; + p = ba->bits + (i/BtoUL); + m = 1<<(i%BtoUL); + *p &= ~m; + bcmark(d, b); + + return 0; +} + +/* + * free a block (or blocks) + */ +int +dfree(Disk *d, Dptr *dp) +{ + ulong bno; + Dptr *sp, *ep; + Bbuf *b; + + bno = dp->bno; + dp->bno = Notabno; + + /* + * nothing to free + */ + if(bno == Notabno) + return 0; + + /* + * direct pointer + */ + if((bno & Indbno) == 0) + return _bfree(d, bno); + + /* + * first indirect page + */ + bno &= ~Indbno; + _bfree(d, bno); + + /* + * then all the pages it points to + * + * DANGER: this algorithm may fail if there are more + * allocation blocks than block buffers + */ + b = bcread(d, bno); + if(b == 0) + return -1; + sp = (Dptr*)b->data; + for(ep = sp + d->p2b; sp < ep; sp++) + if(dfree(d, sp) < 0) + return -1; + return 0; +} diff --git a/sys/src/cmd/ramcfs/disk.h b/sys/src/cmd/ramcfs/disk.h new file mode 100644 index 00000000..e4e98a02 --- /dev/null +++ b/sys/src/cmd/ramcfs/disk.h @@ -0,0 +1,24 @@ +typedef struct Disk Disk; + +/* + * Reference to the disk (now memory cache) + */ +struct Disk +{ + Bcache; + ulong nb; /* number of blocks */ + ulong nab; /* number of allocation blocks */ + int b2b; /* allocation bits to a block */ + int p2b; /* Dptr's per page */ + char name[CACHENAMELEN]; +}; + +int dinit(Disk*, char*, int, char*); +int dformat(Disk*, char*, char*, ulong, ulong); +ulong dalloc(Disk*, Dptr*); +ulong dpalloc(Disk*, Dptr*); +int dfree(Disk*, Dptr*); + +extern int debug; + +#define DPRINT if(debug)fprint diff --git a/sys/src/cmd/ramcfs/file.c b/sys/src/cmd/ramcfs/file.c new file mode 100644 index 00000000..457a434e --- /dev/null +++ b/sys/src/cmd/ramcfs/file.c @@ -0,0 +1,298 @@ +#include +#include +#include "cformat.h" +#include "lru.h" +#include "bcache.h" +#include "disk.h" +#include "inode.h" +#include "file.h" + +/* + * merge data with that which already exists in a block + * + * we allow only one range per block, always use the new + * data if the ranges don't overlap. + */ +void +fmerge(Dptr *p, char *to, char *from, int start, int len) +{ + int end; + + end = start + len; + memmove(to+start, from, end-start); + + /* + * if ranges do not overlap... + */ + if(start>p->end || p->start>end){ + /* + * just use the new data + */ + p->start = start; + p->end = end; + } else { + /* + * merge ranges + */ + if(start < p->start) + p->start = start; + if(end > p->end) + p->end = end; + } + +} + +/* + * write a block (or less) of data onto a disk, follow it with any necessary + * pointer writes. + * + * N.B. ordering is everything + */ +int +fbwrite(Icache *ic, Ibuf *b, char *a, ulong off, int len) +{ + int wrinode; + ulong fbno; + Bbuf *dbb; /* data block */ + Bbuf *ibb; /* indirect block */ + Dptr *p; + Dptr t; + + fbno = off / ic->bsize; + p = &b->inode.ptr; + ibb = 0; + wrinode = 0; + + /* + * are there any pages for this inode? + */ + if(p->bno == Notabno){ + wrinode = 1; + goto dowrite; + } + + /* + * is it an indirect block? + */ + if(p->bno & Indbno){ + ibb = bcread(ic, p->bno); + if(ibb == 0) + return -1; + p = (Dptr*)ibb->data; + p += fbno % ic->p2b; + goto dowrite; + } + + /* + * is it the wrong direct block? + */ + if((p->fbno%ic->p2b) != (fbno%ic->p2b)){ + /* + * yes, make an indirect block + */ + t = *p; + dpalloc(ic, p); + if(p->bno == Notabno){ + *p = t; + return -1; + } + ibb = bcalloc(ic, p->bno); + if(ibb == 0){ + *p = t; + return -1; + } + p = (Dptr*)ibb->data; + p += t.fbno % ic->p2b; + *p = t; + p = (Dptr*)ibb->data; + p += fbno % ic->p2b; + } + wrinode = 1; + +dowrite: + /* + * get the data block into the block cache + */ + if(p->bno == Notabno){ + /* + * create a new block + */ + dalloc(ic, p); + if(p->bno == Notabno) + return -1; /* no blocks left (maybe) */ + dbb = bcalloc(ic, p->bno); + } else { + /* + * use what's there + */ + dbb = bcread(ic, p->bno); + } + if(dbb == 0) + return -1; + + /* + * merge in the new data + */ + if(p->fbno != fbno){ + p->start = p->end = 0; + p->fbno = fbno; + } + fmerge(p, dbb->data, a, off % ic->bsize, len); + + /* + * write changed blocks back in the + * correct order + */ + bcmark(ic, dbb); + if(ibb) + bcmark(ic, ibb); + if(wrinode) + if(iwrite(ic, b) < 0) + return -1; + return len; +} + +/* + * write `n' bytes to the cache + * + * return number of bytes written + */ +long +fwrite(Icache *ic, Ibuf *b, char *a, ulong off, long n) +{ + int len; + long sofar; + + for(sofar = 0; sofar < n; sofar += len){ + len = ic->bsize - ((off+sofar)%ic->bsize); + if(len > n - sofar) + len = n - sofar; + if(fbwrite(ic, b, a+sofar, off+sofar, len) < 0) + return sofar; + } + return sofar; +} + +/* + * get a pointer to the next valid data at or after `off' + */ +Dptr * +fpget(Icache *ic, Ibuf *b, ulong off) +{ + ulong fbno; + long doff; + Bbuf *ibb; /* indirect block */ + Dptr *p, *p0, *pf; + + fbno = off / ic->bsize; + p = &b->inode.ptr; + + /* + * are there any pages for this inode? + */ + if(p->bno == Notabno) + return 0; + + /* + * if it's a direct block, life is easy? + */ + if(!(p->bno & Indbno)){ + /* + * a direct block, return p if it's at least past what we want + */ + if(p->fbno > fbno) + return p; + if(p->fbno < fbno) + return 0; + doff = off % ic->bsize; + if(doff>=p->start && doffend) + return p; + else + return 0; + } + + /* + * read the indirect block + */ + ibb = bcread(ic, p->bno); + if(ibb == 0) + return 0; + + /* + * find the next valid pointer + */ + p0 = (Dptr*)ibb->data; + pf = p0 + (fbno % ic->p2b); + if(pf->bno!=Notabno && pf->fbno==fbno){ + doff = off % ic->bsize; + if(doffend) + return pf; + } + for(p = pf+1; p < p0 + ic->p2b; p++){ + fbno++; + if(p->fbno==fbno && p->bno!=Notabno && p->startend) + return p; + } + for(p = p0; p < pf; p++){ + fbno++; + if(p->fbno==fbno && p->bno!=Notabno && p->startend) + return p; + } + return 0; +} + +/* + * read `n' bytes from the cache. + * + * if we hit a gap and we've read something, + * return number of bytes read so far. + * + * if we start with a gap, return minus the number of bytes + * to the next data. + * + * if there are no bytes cached, return 0. + */ +long +fread(Icache *ic, Ibuf *b, char *a, ulong off, long n) +{ + int len, start; + long sofar, gap; + Dptr *p; + Bbuf *bb; + + for(sofar = 0; sofar < n; sofar += len, off += len){ + /* + * get pointer to next data + */ + len = n - sofar; + p = fpget(ic, b, off); + + /* + * if no more data, return what we have so far + */ + if(p == 0) + return sofar; + + /* + * if there's a gap, return the size of the gap + */ + gap = (ic->bsize*p->fbno + p->start) - off; + if(gap>0) + if(sofar == 0) + return -gap; + else + return sofar; + + /* + * return what we have + */ + bb = bcread(ic, p->bno); + if(bb == 0) + return sofar; + start = p->start - gap; + if(p->end - start < len) + len = p->end - start; + memmove(a + sofar, bb->data + start, len); + } + return sofar; +} diff --git a/sys/src/cmd/ramcfs/file.h b/sys/src/cmd/ramcfs/file.h new file mode 100644 index 00000000..244bca93 --- /dev/null +++ b/sys/src/cmd/ramcfs/file.h @@ -0,0 +1,5 @@ +void fmerge(Dptr*, char*, char*, int, int); +int fbwrite(Icache*, Ibuf*, char*, ulong, int); +long fwrite(Icache*, Ibuf*, char*, ulong, long); +Dptr* fpget(Icache*, Ibuf*, ulong); +long fread(Icache*, Ibuf*, char*, ulong, long); diff --git a/sys/src/cmd/ramcfs/inode.c b/sys/src/cmd/ramcfs/inode.c new file mode 100644 index 00000000..74d03254 --- /dev/null +++ b/sys/src/cmd/ramcfs/inode.c @@ -0,0 +1,412 @@ +#include +#include +#include "cformat.h" +#include "lru.h" +#include "bcache.h" +#include "disk.h" +#include "inode.h" +#include "stats.h" + +extern int readonly; + +/* + * read the inode blocks and make sure they + * haven't been trashed. + * + * make the in-core table of qid to inode mappings. + * N.B. this is just an array. we need a linear search to find + * a particular inode. this could be done faster. + * + * nab is the first inode block. + */ +int +iinit(Icache *ic, char *mc, int psize, char* name) +{ + Ibuf *b; + Imap *m; + ulong ino; + Bbuf *bb; + Dinode *bi; + + /* + * get basic sizes and allocation info from disk + */ + if(dinit(ic, mc, psize, name) < 0) + return -1; + + /* + * read first inode block to get number of inodes + */ + bb = bcread(ic, ic->nab); + if(bb == 0){ + fprint(2, "iinit: can't read disk\n"); + return -1; + } + bi = (Dinode*)bb->data; + if(bi->nino==0 || bi->nino>2048){ + fprint(2, "iinit: bad nino\n"); + return -1; + } + ic->nino = bi->nino; + + /* + * set up sizing constants + */ + ic->i2b = (ic->bsize - sizeof(Dihdr))/sizeof(Inode); + ic->nib = (ic->nino + ic->i2b - 1)/ic->i2b; + + /* + * allocate the in-core qid/inode map, build it's lru + */ + if(ic->map) + free(ic->map); + ic->map = malloc(sizeof(Imap)*ic->nino); + if(ic->map == 0){ + fprint(2, "iinit: can't alloc map\n"); + return -1; + } + lruinit(&ic->mlru); + for(m = ic->map; m < &ic->map[ic->nino]; m++){ + m->inuse = 0; + m->b = 0; + lruadd(&ic->mlru, m); + } + + /* + * mark all cache buffers as empty, put them on the lru list + */ + lruinit(&ic->blru); + for(b = ic->ib; b < &ic->ib[Nicache]; b++){ + b->inuse = 0; + lruadd(&ic->blru, b); + } + + /* + * Read all inodes and + * build the in-core qid/inode map + */ + for(ino = 0; ino < ic->nino; ino++){ + b = iread(ic, ino); + if(b == 0){ + fprint(2, "iinit: can't read inode %ld\n", ino); + return -1; + } + if(b->inode.inuse){ + m = &ic->map[ino]; + m->inuse = 1; + m->qid = b->inode.qid; + lruref(&ic->mlru, m); + } + } + return 0; +} + +/* + * format the inode blocks + */ +int +iformat(Icache *ic, char *mc, ulong nino, char *name, int bsize, int psize) +{ + int nib; + ulong bno, i2b, i; + Bbuf *bb; + Dinode *bi; + + /* + * first format disk allocation + */ + if(dformat(ic, mc, name, bsize, psize) < 0) + return -1; + + fprint(2, "formatting inodes in memory\n"); + + i2b = (bsize - sizeof(Dihdr))/sizeof(Inode); + nib = (nino + i2b - 1)/i2b; + + for(bno = ic->nab; bno < ic->nab + nib; bno++){ + if(dalloc(ic, 0) == Notabno){ + fprint(2, "iformat: balloc failed\n"); + return -1; + } + bb = bcalloc(ic, bno); + if(bb == 0){ + fprint(2, "iformat: bcalloc failed\n"); + return -1; + } + bi = (Dinode*)bb->data; + bi->magic = Imagic; + bi->nino = nino; + for(i = 0; i < i2b; i++) + bi->inode[i].inuse = 0; + bcmark(ic, bb); + } + + bcsync(ic); + + return iinit(ic, mc, psize, name); +} + +/* + * allocate a cache buffer, use least recently used + */ +Ibuf* +ialloc(Icache *ic, ulong ino) +{ + Imap *m; + Ibuf *b; + + b = (Ibuf*)ic->blru.lnext; + if(b->inuse) + ic->map[b->ino].b = 0; + b->ino = ino; + b->inuse = 1; + m = &ic->map[ino]; + m->b = b; + return b; +} + +/* + * free a cache buffer + */ +void +ifree(Icache *ic, Ibuf *b) +{ + b->inuse = 0; + if(b->inuse) + ic->map[b->ino].b = 0; + lruderef(&ic->blru, b); +} + +/* + * get an inode into the cache. if no inode exists for this qid, create one + * from an unused qid/inode map. + */ +Ibuf * +iget(Icache *ic, Qid qid) +{ + Imap *m, *me; + Ibuf *b; + + /* + * find map entry with same qid.path + */ + for(m = ic->map, me = &ic->map[ic->nino]; m < me; m++) + if(m->inuse && m->qid.path==qid.path){ + if((m->qid.vers != qid.vers || qid.vers == 0) + && !readonly){ + /* + * our info is old or this is likely + * a synthetic file, so forget it + */ + DPRINT(2, "updating old file %llud.%lud\n", + qid.path, qid.vers); + m->qid = qid; + iupdate(ic, m - ic->map, qid); + } + break; + } + + /* + * if an already existing inode, just get it + */ + if(m != me) + return iread(ic, m - ic->map); + + /* + * create a new inode, throw out the least recently used inode + * if necessary + */ + m = (Imap*)ic->mlru.lnext; + if(m->inuse){ + DPRINT(2, "superceding file %llud.%ld by %llud.%ld\n", + m->qid.path, m->qid.vers, qid.path, qid.vers); + if(iremove(ic, m - ic->map) < 0) + return 0; + } + + if(statson) + cfsstat.ninsert++; + /* + * init inode and write to disk + */ + DPRINT(2, "new file %llud.%ld ino %ld\n", + qid.path, qid.vers, m - ic->map); + b = ialloc(ic, m - ic->map); + b->inode.inuse = m->inuse = 1; + b->inode.qid = qid; + b->inode.length = 0x7fffffffffffffffLL; + m->qid = qid; + b->inode.ptr.bno = Notabno; + iwrite(ic, b); + return b; +} + +/* + * read an inode into the cache + * + * ASSUMPTION: the inode is valid + */ +Ibuf* +iread(Icache *ic, ulong ino) +{ + Ibuf *b; + Imap *m; + ulong bno; + Bbuf *bb; + Dinode *bi; + + /* + * first see if we already have it in a cache entry + */ + m = &ic->map[ino]; + if(m->inuse && m->b){ + b = m->b; + goto out; + } + + /* + * read it + */ + b = ialloc(ic, ino); + bno = ic->nab + ino/ic->i2b; + bb = bcread(ic, bno); + if(bb == 0){ + ifree(ic, b); + return 0; + } + bi = (Dinode*)bb->data; + b->inode = bi->inode[ino % ic->i2b]; + + /* + * consistency check + */ + if(bi->nino!=ic->nino || bi->magic!=Imagic){ + fprint(2, "iread: inconsistent inode block\n"); + ifree(ic, b); + return 0; + } +out: + b->inuse = 1; + m->b = b; + if(b->inode.inuse) + lruref(&ic->mlru, m); + lruref(&ic->blru, b); + return b; +} + +/* + * write an inode back to disk + */ +int +iwrite(Icache *ic, Ibuf *b) +{ + ulong bno; + Bbuf *bb; + Dinode *bi; + + bno = ic->nab + b->ino/ic->i2b; + bb = bcread(ic, bno); + if(bb == 0) + return 0; + bi = (Dinode*)bb->data; + bi->inode[b->ino % ic->i2b] = b->inode; + bcmark(ic, bb); + lruref(&ic->mlru, &ic->map[b->ino]); + lruref(&ic->blru, b); + return 0; +} + +/* + * Forget what we know about an inode without removing it + * + * N.B: ordering of iwrite and dfree is important + */ +int +iupdate(Icache *ic, ulong ino, Qid qid) +{ + Ibuf *b; + Imap *m; + Dptr d; + + if(statson) + cfsstat.nupdate++; + b = iread(ic, ino); + if(b == 0) + return -1; + + /* + * update inode and map + */ + b->inode.qid = qid; + b->inode.length = 0x7fffffffffffffffLL; /* Set to maximum */ + m = &ic->map[ino]; + m->qid = qid; + + /* + * the free is not done if the write fails! + * this is important + */ + d = b->inode.ptr; + b->inode.ptr.bno = Notabno; + if(iwrite(ic, b) < 0) + return -1; + dfree(ic, &d); + return 0; +} + +/* + * remove an inode + * + * N.B: ordering of iwrite and dfree is important + */ +int +iremove(Icache *ic, ulong ino) +{ + Ibuf *b; + Imap *m; + + if(statson) + cfsstat.ndelete++; + m = &ic->map[ino]; + + /* + * read in inode + */ + b = iread(ic, ino); + if(b == 0) + return -1; + + /* + * mark it unused on disk + */ + b->inode.inuse = 0; + if(iwrite(ic, b) < 0) + return -1; + + /* + * throw out it's data pages + */ + dfree(ic, &b->inode.ptr); + + /* + * free the inode buffer + */ + ifree(ic, b); + + /* + * make map entry least recently used + */ + lruderef(&ic->mlru, m); + return 0; +} + +/* + * increment our version number + */ +void +iinc(Icache *ic, Ibuf *b) +{ + b->inode.qid.vers++; + ic->map[b->ino].qid = b->inode.qid; + iwrite(ic, b); +} diff --git a/sys/src/cmd/ramcfs/inode.h b/sys/src/cmd/ramcfs/inode.h new file mode 100644 index 00000000..39c2a995 --- /dev/null +++ b/sys/src/cmd/ramcfs/inode.h @@ -0,0 +1,60 @@ +typedef struct Ibuf Ibuf; +typedef struct Imap Imap; +typedef struct Icache Icache; + +enum +{ + Nicache= 64, /* number of inodes kept in pool */ +}; + +/* + * a cached inode buffer + */ +struct Ibuf +{ + Lru; /* must be first in structure */ + int inuse; /* non-0 if in use */ + ulong ino; /* index into inode table */ + Inode inode; /* the inode contents */ +}; + +/* + * in-core qid to inode mapping + */ +struct Imap +{ + Lru; /* must be first in structure */ + Qid qid; + Ibuf *b; /* cache buffer */ + int inuse; /* non-0 if in use */ +}; + +/* + * the inode cache + */ +struct Icache +{ + Disk; + + int nino; /* number of inodes */ + ulong ib0; /* first inode block */ + int nib; /* number of inode blocks */ + int i2b; /* inodes to a block */ + + Ibuf ib[Nicache]; /* inode buffers */ + Lru blru; + + Imap *map; /* inode to qid mapping */ + Lru mlru; +}; + +Ibuf* ialloc(Icache*, ulong); +Ibuf* iget(Icache*, Qid); +Ibuf* iread(Icache*, ulong); +int iformat(Icache*, char*, ulong, char*, int, int); +int iinit(Icache*, char*, int, char*); +int iremove(Icache*, ulong); +int iupdate(Icache*, ulong, Qid); +int iwrite(Icache*, Ibuf*); +void ifree(Icache*, Ibuf*); +void iinc(Icache*, Ibuf*); diff --git a/sys/src/cmd/ramcfs/lru.c b/sys/src/cmd/ramcfs/lru.c new file mode 100644 index 00000000..55f6c6e8 --- /dev/null +++ b/sys/src/cmd/ramcfs/lru.c @@ -0,0 +1,75 @@ +/* + * lru lists are circular with a list head + * pointing to the start and end of the list + */ +#include +#include "lru.h" + +/* + * Create an lru chain of buffers + */ +void +lruinit(Lru *h) +{ + h->lprev = h->lnext = h; +} + +/* + * Add a member to an lru chain + */ +void +lruadd(Lru *h, Lru *m) +{ + h->lprev->lnext = m; + m->lprev = h->lprev; + h->lprev = m; + m->lnext = h; +} + +/* + * Move to end of lru list + */ +void +lruref(Lru *h, Lru *m) +{ + if(h->lprev == m) + return; /* alread at end of list */ + + /* + * remove from list + */ + m->lprev->lnext = m->lnext; + m->lnext->lprev = m->lprev; + + /* + * add in at end + */ + h->lprev->lnext = m; + m->lprev = h->lprev; + h->lprev = m; + m->lnext = h; +} + +/* + * Move to head of lru list + */ +void +lruderef(Lru *h, Lru *m) +{ + if(h->lnext == m) + return; /* alread at head of list */ + + /* + * remove from list + */ + m->lprev->lnext = m->lnext; + m->lnext->lprev = m->lprev; + + /* + * add in at head + */ + h->lnext->lprev = m; + m->lnext = h->lnext; + h->lnext = m; + m->lprev = h; +} diff --git a/sys/src/cmd/ramcfs/lru.h b/sys/src/cmd/ramcfs/lru.h new file mode 100644 index 00000000..b16f82e8 --- /dev/null +++ b/sys/src/cmd/ramcfs/lru.h @@ -0,0 +1,15 @@ +typedef struct Lruhead Lruhead; +typedef struct Lru Lru; + +struct Lru +{ + Lru *lprev; + Lru *lnext; +}; + +void lruinit(Lru*); +void lruadd(Lru*, Lru*); +void lruref(Lru*, Lru*); +void lruderef(Lru*, Lru*); + +extern ulong cachesize; diff --git a/sys/src/cmd/ramcfs/mkfile b/sys/src/cmd/ramcfs/mkfile new file mode 100644 index 00000000..5ea5789d --- /dev/null +++ b/sys/src/cmd/ramcfs/mkfile @@ -0,0 +1,41 @@ +