diff -Nru 0/amd64/include/ape/float.h 4/amd64/include/ape/float.h --- 0/amd64/include/ape/float.h Thu Jan 1 00:00:00 1970 +++ 4/amd64/include/ape/float.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,80 @@ +#ifndef __FLOAT +#define __FLOAT +/* IEEE, default rounding */ + +#define FLT_ROUNDS 1 +#define FLT_RADIX 2 + +#define FLT_DIG 6 +#define FLT_EPSILON 1.19209290e-07 +#define FLT_MANT_DIG 24 +#define FLT_MAX 3.40282347e+38 +#define FLT_MAX_10_EXP 38 +#define FLT_MAX_EXP 128 +#define FLT_MIN 1.17549435e-38 +#define FLT_MIN_10_EXP -37 +#define FLT_MIN_EXP -125 + +#define DBL_DIG 15 +#define DBL_EPSILON 2.2204460492503131e-16 +#define DBL_MANT_DIG 53 +#define DBL_MAX 1.797693134862315708145e+308 +#define DBL_MAX_10_EXP 308 +#define DBL_MAX_EXP 1024 +#define DBL_MIN 2.225073858507201383090233e-308 +#define DBL_MIN_10_EXP -307 +#define DBL_MIN_EXP -1021 +#define LDBL_MANT_DIG DBL_MANT_DIG +#define LDBL_EPSILON DBL_EPSILON +#define LDBL_DIG DBL_DIG +#define LDBL_MIN_EXP DBL_MIN_EXP +#define LDBL_MIN DBL_MIN +#define LDBL_MIN_10_EXP DBL_MIN_10_EXP +#define LDBL_MAX_EXP DBL_MAX_EXP +#define LDBL_MAX DBL_MAX +#define LDBL_MAX_10_EXP DBL_MAX_10_EXP + +typedef union FPdbleword FPdbleword; +union FPdbleword +{ + double x; + struct { /* little endian */ + long lo; + long hi; + }; +}; + +#ifdef _RESEARCH_SOURCE +/* define stuff needed for floating conversion */ +#define IEEE_8087 1 +#define Sudden_Underflow 1 +#endif +#ifdef _PLAN9_SOURCE +/* MXCSR */ +/* fcr */ +#define FPFTZ (1<<15) /* amd64 */ +#define FPINEX (1<<12) +#define FPUNFL (1<<11) +#define FPOVFL (1<<10) +#define FPZDIV (1<<9) +#define FPDNRM (1<<8) /* amd64 */ +#define FPINVAL (1<<7) +#define FPDAZ (1<<6) /* amd64 */ +#define FPRNR (0<<13) +#define FPRZ (3<<13) +#define FPRPINF (2<<13) +#define FPRNINF (1<<13) +#define FPRMASK (3<<13) +#define FPPEXT 0 +#define FPPSGL 0 +#define FPPDBL 0 +#define FPPMASK 0 +/* fsr */ +#define FPAINEX (1<<5) +#define FPAUNFL (1<<4) +#define FPAOVFL (1<<3) +#define FPAZDIV (1<<2) +#define FPADNRM (1<<1) /* not in plan 9 */ +#define FPAINVAL (1<<0) +#endif +#endif /* __FLOAT */ diff -Nru 0/amd64/include/ape/math.h 4/amd64/include/ape/math.h --- 0/amd64/include/ape/math.h Thu Jan 1 00:00:00 1970 +++ 4/amd64/include/ape/math.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,74 @@ +#ifndef __MATH +#define __MATH +#pragma lib "/$M/lib/ape/libap.a" + +/* a HUGE_VAL appropriate for IEEE double-precision */ +/* the correct value, 1.797693134862316e+308, causes a ken overflow */ +#define HUGE_VAL 1.79769313486231e+308 + +#ifdef __cplusplus +extern "C" { +#endif + +extern double acos(double); +extern double asin(double); +extern double atan(double); +extern double atan2(double, double); +extern double cos(double); +extern double sin(double); +extern double tan(double); +extern double cosh(double); +extern double sinh(double); +extern double tanh(double); +extern double exp(double); +extern double frexp(double, int *); +extern double ldexp(double, int); +extern double log(double); +extern double log10(double); +extern double modf(double, double *); +extern double pow(double, double); +extern double sqrt(double); +extern double ceil(double); +extern double fabs(double); +extern double floor(double); +extern double fmod(double, double); +extern double NaN(void); +extern int isNaN(double); +extern double Inf(int); +extern int isInf(double, int); + +#ifdef _RESEARCH_SOURCE +/* does >> treat left operand as unsigned ? */ +#define Unsigned_Shifts 1 +#define M_E 2.7182818284590452354 /* e */ +#define M_LOG2E 1.4426950408889634074 /* log 2e */ +#define M_LOG10E 0.43429448190325182765 /* log 10e */ +#define M_LN2 0.69314718055994530942 /* log e2 */ +#define M_LN10 2.30258509299404568402 /* log e10 */ +#define M_PI 3.14159265358979323846 /* pi */ +#define M_PI_2 1.57079632679489661923 /* pi/2 */ +#define M_PI_4 0.78539816339744830962 /* pi/4 */ +#define M_1_PI 0.31830988618379067154 /* 1/pi */ +#define M_2_PI 0.63661977236758134308 /* 2/pi */ +#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */ +#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ +#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ + +extern double hypot(double, double); +extern double erf(double); +extern double erfc(double); +extern double j0(double); +extern double y0(double); +extern double j1(double); +extern double y1(double); +extern double jn(int, double); +extern double yn(int, double); + +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* __MATH */ diff -Nru 0/amd64/include/ape/stdarg.h 4/amd64/include/ape/stdarg.h --- 0/amd64/include/ape/stdarg.h Thu Jan 1 00:00:00 1970 +++ 4/amd64/include/ape/stdarg.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,18 @@ +#ifndef __STDARG +#define __STDARG + +typedef char *va_list; + +#define va_start(list, start) list = (sizeof(start)<8 ? (char *)((long long *)&(start)+1) : \ +(char *)(&(start)+1)) +#define va_end(list) +#define va_arg(list, mode)\ + ((sizeof(mode) == 1)?\ + ((mode*)(list += 8))[-8]:\ + (sizeof(mode) == 2)?\ + ((mode*)(list += 8))[-4]:\ + (sizeof(mode) == 4)?\ + ((mode*)(list += 8))[-2]:\ + ((mode*)(list += sizeof(mode)))[-1]) + +#endif /* __STDARG */ diff -Nru 0/amd64/include/ape/ureg.h 4/amd64/include/ape/ureg.h --- 0/amd64/include/ape/ureg.h Thu Jan 1 00:00:00 1970 +++ 4/amd64/include/ape/ureg.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,38 @@ +#ifndef __UREG_H +#define __UREG_H +#if !defined(_PLAN9_SOURCE) + This header file is an extension to ANSI/POSIX +#endif + +struct Ureg { + unsigned long long ax; + unsigned long long bx; + unsigned long long cx; + unsigned long long dx; + unsigned long long si; + unsigned long long di; + unsigned long long bp; + unsigned long long r8; + unsigned long long r9; + unsigned long long r10; + unsigned long long r11; + unsigned long long r12; + unsigned long long r13; + unsigned long long r14; + unsigned long long r15; + + unsigned short ds; + unsigned short es; + unsigned short fs; + unsigned short gs; + + unsigned long long type; + unsigned long long error; /* error code (or zero) */ + unsigned long long ip; /* pc */ + unsigned long long cs; /* old context */ + unsigned long long flags; /* old flags */ + unsigned long long sp; /* sp */ + unsigned long long ss; /* old stack segment */ +}; + +#endif diff -Nru 0/sys/include/libc.h 4/sys/include/libc.h --- 0/sys/include/libc.h Wed Feb 6 07:09:44 2013 +++ 4/sys/include/libc.h Wed Feb 6 00:00:00 2013 @@ -366,6 +366,7 @@ extern char* getenv(char*); extern int getfields(char*, char**, int, int, char*); extern int gettokens(char *, char **, int, char *); +extern int getcoreno(int*); extern char* getuser(void); extern char* getwd(char*, int); extern int iounit(int); @@ -585,7 +586,19 @@ RFCENVG = (1<<11), RFCFDG = (1<<12), RFREND = (1<<13), - RFNOMNT = (1<<14) + RFNOMNT = (1<<14), + RFPREPAGE = (1<<15), + RFCPREPAGE = (1<<16), + RFCORE = (1<<17), + RFCCORE = (1<<18), +}; + +/* execac */ +enum +{ + EXTC = 0, /* exec on time-sharing */ + EXAC, /* want an AC for the exec'd image */ + EXXC, /* want an XC for the exec'd image */ }; typedef @@ -698,6 +711,92 @@ extern char* sysname(void); extern void werrstr(char*, ...); #pragma varargck argpos werrstr 1 + +extern int ziop(int*); + +/* + * Atomics + * (casul was known before as casl; we don't suppy a prototype + * so we could see the warnings and update the source; the function + * is still in libc). + */ +extern int cas(uint *p, int ov, int nv); +extern int casul(ulong *p, ulong ov, ulong nv); +extern int casp(void **p, void *ov, void *nv); +extern int cas32(u32int *p, u32int ov, u32int nv); +extern int cas64(u64int *p, u64int ov, u64int nv); +extern void mfence(void); + +/* + * Zero-copy I/O + */ + +typedef struct Zio Zio; +struct Zio +{ + void* data; + ulong size; +}; + +/* kernel interface */ +extern void ziofree(Zio io[], int nio); +extern int ziopread(int fd, Zio io[], int nio, usize count, vlong offset); +extern int ziowrite(int fd, Zio io[], int nio); +extern int zioread(int fd, Zio io[], int nio, usize count); +extern int ziopwrite(int fd, Zio io[], int nio, vlong offset); + + +/* + * NIX core types + */ +enum +{ + NIXTC = 0, + NIXKC, + NIXAC, +}; + +typedef struct Sem Sem; +struct Sem +{ + int tickets; + int waiting; + int going; + Lock; +}; + +/* + * NIX system calls and library functions. + */ +extern int execac(int, char*, char*[]); + +extern void initsem(Sem *, int); +extern int altsems(Sem *[], int); +extern int downsem(Sem *, int); +extern void upsem(Sem *); +extern int semtrytimes; + +/* + * Internal NIX system calls, used by library functions. + */ +extern int semsleep(Sem*); +extern void semwakeup(Sem*); +extern int semalt(Sem*[], int); +extern void semstats(void); +extern int semdebug; + +/* + * Performance counters + */ +enum +{ + PmcOs = 1, + PmcUser = 2, + PmcEnable = 4, +}; + +extern int confpmc(int, int, int, char *); +extern uvlong rdpmc(int); extern char *argv0; #define ARGBEGIN for((argv0||(argv0=*argv)),argv++,argc--;\ diff -Nru 0/sys/include/seg.h 4/sys/include/seg.h --- 0/sys/include/seg.h Thu Jan 1 00:00:00 1970 +++ 4/sys/include/seg.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,10 @@ +#pragma lib "libseg.a" +#pragma src "/sys/src/libseg" + + +void* newseg(char*,uvlong, ulong); + + + + + diff -Nru 0/sys/include/tos.h 4/sys/include/tos.h --- 0/sys/include/tos.h Sat Aug 7 18:46:58 2004 +++ 4/sys/include/tos.h Mon Jul 29 00:00:00 2013 @@ -20,6 +20,12 @@ ulong clock; /* scratch space for kernel use (e.g., mips fp delay-slot execution) */ ulong kscr[4]; + + /* + * Fields below are not available on Plan 9 kernels. + */ + int nixtype; /* role of the core we are running at */ + int core; /* core we are running at */ /* top of stack is here */ }; diff -Nru 0/sys/include/trace.h 4/sys/include/trace.h --- 0/sys/include/trace.h Wed Feb 6 21:47:25 2013 +++ 4/sys/include/trace.h Thu Feb 7 00:00:00 2013 @@ -13,12 +13,14 @@ SInts, /* Interrupt start */ SInte, /* Interrupt end */ SUser, /* user event */ + SLock, /* blocked on a queue or lock */ Nevent, } Tevent; typedef struct Traceevent Traceevent; struct Traceevent { - ulong pid; - ulong etype; /* Event type */ - vlong time; /* time stamp */ + u32int pid; + u32int etype; /* Event type */ + u64int time; /* time stamp */ + u32int core; /* core number */ }; diff -Nru 0/sys/include/tube.h 4/sys/include/tube.h --- 0/sys/include/tube.h Thu Jan 1 00:00:00 1970 +++ 4/sys/include/tube.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,47 @@ +#pragma lib "libtube.a" +#pragma src "/sys/src/libtube" + +typedef struct Tube Tube; +typedef struct Talt Talt; + +/* + * Talt.op + */ +enum +{ + TSND = 0x6666, /* weird numbers, for debugging */ + TRCV, + TNBSND, + TNBRCV, + TNOP +}; + +struct Tube +{ + int msz; /* message size */ + int tsz; /* tube size (# of messages) */ + Sem nmsg; /* semaphore: # of messages in tube */ + Sem nhole; /* semaphore: # of free slots in tube */ + int hd; + int tl; +}; + +struct Talt +{ + Tube* t; + void* m; + int op; +}; + + + +extern void freetube(Tube *t); +extern int nbtrecv(Tube *t, void *p); +extern int nbtsend(Tube *t, void *p); +extern Tube* newtube(ulong msz, ulong n); +extern int talt(Talt a[], int na); +extern void trecv(Tube *t, void *p); +extern void tsend(Tube *t, void *p); +extern Tube* namedtube(char*,ulong,int, int); + +extern int namedtubedebug; diff -Nru 0/sys/man/2/exec 4/sys/man/2/exec --- 0/sys/man/2/exec Tue Feb 5 22:32:38 2013 +++ 4/sys/man/2/exec Wed Feb 6 00:00:00 2013 @@ -1,6 +1,6 @@ .TH EXEC 2 .SH NAME -exec, execl, _privates, _nprivates, _tos \- execute a file +exec, execl, execac, _privates, _nprivates, _tos \- execute a file .SH SYNOPSIS .B #include .br @@ -14,6 +14,9 @@ void* execl(char *name, ...) .PP .B +void* execac(int core, char *name, char* argv[]) +.PP +.B void **_privates; .PP .B @@ -92,6 +95,19 @@ in the call. The last argument to .I execl must be a null pointer. +.PP +.I Execac +is like +.I exec +except that it moves the process to an AC. When +.I core +is zero, the process is allocated to a TC. When +.I core +is positive, the process is allocated to that particular core number +(using an AC role). When +.I core +is less than zero, the kernel picks a suitable core (with the AC role) +for the process. .PP For a file beginning .BR #! , diff -Nru 0/sys/man/2/fork 4/sys/man/2/fork --- 0/sys/man/2/fork Tue Feb 5 22:32:38 2013 +++ 4/sys/man/2/fork Wed Feb 6 00:00:00 2013 @@ -42,7 +42,7 @@ and open files. .I Flags is the logical OR of some subset of -.TF RFCNAMEG +.TF RFCPREPAGE .TP .B RFPROC If set a new process is created; otherwise changes affect the @@ -130,6 +130,20 @@ Other segment types, in particular stack segments, will be unaffected. May be set only with .BR RFPROC . +.TP +.B RFPREPAGE +If set, the process (or the child) sets a flag so that any future segment +attached are prepaged. The flag is inherited by descendants. Segments +of the process making the call are also prepaged, but see the bugs section. +.TP +.B RFCPREPAGE +clears the previous flag. +.TP +.B RFCORE +If set, the process moves to an AC. +.TP +.B RFCCORE +If set, the process moves to a TC. .PD .PP File descriptors in a shared file descriptor table are kept @@ -164,3 +178,11 @@ .SH DIAGNOSTICS These functions set .IR errstr . +.SH BUGS +The +.B RFPREPAGE +flag combined with +.B RFPROC +should prepage all memory in the child process. As of now, it is not +doing so (it always prepages the memory of the caller process). The +flag to prepaginate further attachments is handled correctly in any case. diff -Nru 0/sys/man/2/getcoreno 4/sys/man/2/getcoreno --- 0/sys/man/2/getcoreno Thu Jan 1 00:00:00 1970 +++ 4/sys/man/2/getcoreno Wed Feb 6 00:00:00 2013 @@ -0,0 +1,28 @@ +.TH GETCORENO 2 +.SH NAME +getcore \- get core number and role +.SH SYNOPSIS +.B #include +.br +.B #include +.PP +.B +int getcoreno(int *rolep) +.SH DESCRIPTION +.I Getcoreno +returns the number for the core used by the process and, if +.I rlope +is not nil, fills the pointed integer with the core role, one of: +.I NIXTC +(time-sharing core), +.I NIXAC +(application core), and +.I NIXKC +(kernel core). +It uses +.I tos +and does not perform any system call or trap. +.SH SOURCE +.B /sys/src/libc/9sys/getcoreno.c +.SH SEE ALSO +.IR cons (3) diff -Nru 0/sys/man/2/pmc 4/sys/man/2/pmc --- 0/sys/man/2/pmc Thu Jan 1 00:00:00 1970 +++ 4/sys/man/2/pmc Wed Feb 6 00:00:00 2013 @@ -0,0 +1,47 @@ +.TH PMC 2 +.SH NAME +confpmc, rdpmc \- access to perfomance counters on this processor +.SH SYNOPSIS +.B #include +.br +.B #include +.PP +.B +int confpmc(int core, int index, int mode, char *desc) +.PP +.B +uvlong rdpmc(int index) +.SH DESCRIPTION +.I Confpmc +configures the counter +.I index +on core +.I core +setting the configuration according to the configuration string, +.I desc +whic is architecture dependant and the +.I mode, +which can be an OR of three values: +.TP +.BR PmcOs +the counter only counts while in kernel space, +.TP +.BR PmcUser +the counter only counts while in user space, +.TP +.BR PmcEnable +enables the counter. +.PP +.I Rdpmc +reads the counter +.I index +if possible using special instructions or mapped memory and defaulting +to reading the corresponding file if unavailable. +.SH SOURCE +.B /sys/src/libc/amd64/rdpmc.s +.br +.B /sys/src/libc/port/confpmc.c +.br +.B /sys/src/libc/port/rdpmc.c +.SH SEE ALSO +.IR pmc (3) diff -Nru 0/sys/man/2/tsend 4/sys/man/2/tsend --- 0/sys/man/2/tsend Thu Jan 1 00:00:00 1970 +++ 4/sys/man/2/tsend Wed Feb 6 00:00:00 2013 @@ -0,0 +1,188 @@ +.TH TSEND 2 +.SH NAME +tsend, trecv, newtube, talt, nbtsend, nbtrecv \- optimistic user level channels +.SH SYNOPSIS +.B #include +.br +.B #include +.br +.B #include +.PP +.B +void freetube(Tube *t); +.PP +.B +int nbtrecv(Tube *t, void *p); +.PP +.B +int nbtsend(Tube *t, void *p); +.PP +.B +Tube* newtube(ulong msz, ulong n); +.PP +.B +int talt(Talt a[], int na); +.PP +.B +void trecv(Tube *t, void *p); +.PP +.B +void tsend(Tube *t, void *p); +.PP +.B +Tube* namedtube(char *name,ulong msz,int n, int mkit); +.PP +.B +int namedtubedebug; +.SH DESCRIPTION +These functions provide an abstraction similar to +.I Channels +as found in +.IR thread (2). +However, +.I Tubes +are always buffered and are optimistic, they will not require entering the +kernel to communicate if the operation may proceed, and they are to be used +between processes. +.PP +.I Newtube +creates a tube using +.I msz +as the element size, with buffering for +.I n +messages. Tubes cannot be unbuffered. +.PP +.I Freetube +releases the resources held by a tube. It may not be called while the tube is in use. +.PP +.I Tsend +and +.I nbtsend +send a message pointed to by +.I p +through a tube +.IR t. +They do not enter the kernel if the message may be sent without blocking. See +.I upsem (2) +to learn how to tune if busy waiting is performed and for how long. +If the message may not be sent without blocked (after perhaps performing a busy +waiting), +.I tsend +enters the kernel and blocks until it may proceed. +.I Nbtsend +returns -1 in this case, and 0 otherwise. +.PP +.I Trecv +and +.I nbtrecv +are the counterparts of +.I tsend +and +.IR nbtsend . +They +receive a message from the tube +.I t +(copying it at the location pointed to by +.IR p ). +If there is a message in the tube (perhaps after performing a busy wait), they do not enter the kernel. +See +.IR upsem (2) +to learn how to tune the busy wait period. +If the operation may not proceed, +.I tsend +enters the kernel and blocks until it can, and +.I nbtsend +returns -1. +.I Nbtsend +returns 0 otherwise. +.PP +.I Talt +tries to perform any of the requests implied by the array +.I a +and blocks only if none of them may proceed. If an operation may proceed, +it does not block and does not enter the kernel (unless it implies waking up another process). +The return value is the index in +.I a +for the operation performed. The array contains +.I n +different +.I Talt +entries: +.EX +enum +{ + TSND, + TRCV, + TNBSND, + TNBRCV, + TNOP +}; + +typedef struct Talt Talt; +struct Talt +{ + Tube* t; + void* m; + int op; +}; +.EE +.PP +Each entry must have +.I t +pointing to a tube, +.I m +pointing to a message to be sent or received, and +.I op +containing one of +.I TSND +(for sending), +.I TRCV +(for receiving), +.I TNBSND +(for sending without blocking), +.I TNBRCV +(for receiving without blocking), +or +.I TNOP +(for ignoring the entry). +.PP +.I Namedtube +uses +.IR segment (2) +to hold tubes for sharing between otherwise unrelated processes. +The function locates a shared tube within a shared memory segment, identified by +.IR name . +When +.I mkit +is non zero, the requested tube (and the implied segment) is created if it does not +exist. Otherwise, the function returns nil upon failure. +.PP +The tube +.I name +must be of the form +.EX + segname!tubename +.EE +(or simply +.IR tubename ). +Here, +.I segname +identifies the shared memory segment storing the tubes. This segment should not +be written externally, or +.I namedtube +will fail. +.I Tubename +is a single word naming a tube within the segment. If the segment name is not supplied, +.B tubes +is assumes by default as the segment name. +.SH SOURCE +.B /sys/src/libtube + +.SH SEE ALSO +.IR fork (2), +.IR thread (2), +and +.IR upsem (2). +.SH DIAGNOSTICS +These functions set +.IR errstr . diff -Nru 0/sys/man/2/upsem 4/sys/man/2/upsem --- 0/sys/man/2/upsem Thu Jan 1 00:00:00 1970 +++ 4/sys/man/2/upsem Wed Feb 6 00:00:00 2013 @@ -0,0 +1,101 @@ +.TH UPSEM 2 +.SH NAME +upsem, downsem, altsems, initsem, semstats \- optimistic user level semaphores +.SH SYNOPSIS +.B #include +.br +.B #include +.PP +.B +void upsem(Sem *s); +.PP +.B +int downsem(Sem *s, int block); +.PP +.B +int altsems(Sem *ss[], int n); +.PP +.B +void initsem(Sem *s, int tickets); +.PP +.B +void semstats(void) +.PP +.B extern int semtrytimes; +.SH DESCRIPTION +.IR Upsem , +.IR downsem , +and +.I altsems +provide an interface for optimistic semaphores that work without entering the +kernel when they can proceed, and call the kernel only when it is really necessary +(e.g., to block or to unblock another process). +.PP +A semaphore is a struct shared among synchronizing processes. +Initialization is done by the user program by calling +.I initsem. +The parameter +.I tickets +must be a natural number. It +sets the initial state of the semaphore. +After the initialization, only the +following functions should be used to operate on the semaphore. +.PP +.I Downsem +tries to acquire one unit from the semaphore. If it can proceed, the call works without +calling the kernel. When it cannot proceed, the global +.I semtrytimes +controls for how long (how many times) the function will try to acquire without entering the +kernel, doing a busy wait. +If this fails and block is set, the kernel is +entered to block the process until a ticket can be acquired. +If block is not set, the process does not enter the kernel and the function returns 0. +When a ticket is acquired, the function returns 1. +If the system call fails, it returns a +negative value. +.PP +.I Upsem +releases one ticket. +The call does not enter the kernel unless a process must be awaken. +.PP +.I Altsems +tries to perform a +.I downsem +in any one of the semaphores pointed to by pointers in +.I ss +(there are +.I n +entries in that array). After a busy wait determined by +.IR semtrytimes , +if no semaphore can be acquired, +the kernel is entered and the process blocks +until it can proceed. Otherwise, the +operation is performed without calling the kernel. +The function returns the semaphore that has been acquired. +If the operation fails, it returns a negative value. +.PP +.I Semstats +prints several statistics for debugging, and may be useful to learn if the +processes using the semaphores (any semaphore) did enter the kernel or not and +how many times they did. +.SH SOURCE +.B /sys/src/libc/9sys/upsem.c +and +.B /sys/src/9/port/syssem.c +.SH SEE ALSO +.IR fork (2), +.IR lock (2), +.IR rendezvous (2), +.IR segattach (2), +.IR thread (2), +and +.IR semacquire (2). +.SH DIAGNOSTICS +These functions set +.IR errstr . +If the semaphore's internal lock is corrupted (note that this +is indistinguishable from being extremely busy) the process +can get a suicide note. +.SH BUGS +.I Semalt +only can be used with semaphores located in the same shared segment. diff -Nru 0/sys/man/2/ziop 4/sys/man/2/ziop --- 0/sys/man/2/ziop Thu Jan 1 00:00:00 1970 +++ 4/sys/man/2/ziop Thu Feb 7 00:00:00 2013 @@ -0,0 +1,72 @@ +.TH ZIOP 2 +.SH NAME +ziop \- create a zero-copy interprocess pipe +.SH SYNOPSIS +.B #include +.br +.B #include +.PP +.B +int ziop(int fd[2]) +.SH DESCRIPTION +.I ZIOP +creates a buffered channel for zero-copy interprocess I/O communication. +Unlike conventional pipes, the buffers are supplied by the writers and +.I ziop +queues only buffer descriptors. +.PP +Two file descriptors are returned in +.IR fd . +Data written to +.B fd[1] +is available for reading from +.B fd[0] +and data written to +.B fd[0] +is available for reading from +.BR fd[1] . +.PP +After the pipe has been established, +cooperating processes +created by subsequent +.IR fork (2) +calls may pass data through the +pipe with +.I zread +and +.I zwrite +calls (also with +.I read +and +.I write +system calls). +.PP +Refer to +.IR zp (3) +for properties of the communication channel (Note, to the man page, not +to the infamous president). +.PP +When all the data has been read from a pipe and the writer has closed the pipe or exited, +.IR read (2) +will return 0 bytes. Writes to a pipe with no reader will generate a note +.BR "sys: write on closed pipe" . +.SH SOURCE +.B /sys/src/libc/9sys/ziop.c +.SH SEE ALSO +.IR intro (2), +.IR zioread (2), +.IR zp (2) +.SH DIAGNOSTICS +Sets +.IR errstr . +.SH BUGS +If a read or a write of a pipe is interrupted, some unknown +number of bytes may have been transferred. +.br +When a read from a pipe returns 0 bytes, it usually means end of file +but is indistinguishable from reading the result of an explicit +write of zero bytes. +.br +The writer is not flow-controlled, other than by the availability of +buffers. + diff -Nru 0/sys/man/2/zioread 4/sys/man/2/zioread --- 0/sys/man/2/zioread Thu Jan 1 00:00:00 1970 +++ 4/sys/man/2/zioread Thu Feb 7 00:00:00 2013 @@ -0,0 +1,93 @@ +.TH ZIOREAD 2 +.SH NAME +zioread, ziowrite, ziopread, ziopwrite, ziofree \- zero-copy read and write +.SH SYNOPSIS +.B #include +.br +.B #include +.PP +.nf +.ft L +typedef struct Zio Zio; +struct Zio +{ + void* data; + ulong size; +}; +.fi +.PP +.B +int ziopread(int fd, Zio io[], int nio, usize count, vlong offset); +.PP +.B +int ziopwrite(int fd, Zio io[], int nio, vlong offset); +.PP +.B +int zioread(int fd, Zio io[], int nio, usize count); +.PP +.B +int ziowrite(int fd, Zio io[], int nio); +.PP +.B +void ziofree(Zio io[], int nio); +.SH DESCRIPTION +These functions supplement the standard read and write operations of +.IR read (2) +with facilities for zero-copy I/O. +The set of I/O buffers used should be allocated within ZIO segments, see +.IR segment (3), +or data will be copied during I/O. But they can refer to any other segment as well. +.PP +.B Zio +structures passed as an argument represent a series of zero-copy buffers +for the call. For +.I ziowrite +they should refer to actual data. For +.I zioread +the system fills them to reflect where the read data stands. In both cases, +.I nio +is the number of entries in the +.I io +array, and +.I fd +is the file descriptor where to perform I/O. The +.I count +argument to +.I zioread +limits the total ammount of data that may be retrieved. +The return value reflects the number of entries used in the +.I io +array. +.PP +.I Ziopread +and +.I ziopwrite +are similar to +.I zioread +and +.I ziowrite +but specify the offset where to read or write. +.PP +.I ziofree +is a convenience system call to notify buffer owners that they are no longer +in use. Using it is equivalent of locating the +.B free +files for the segments involved and then writing the +.I io +addresses there. +.SH SOURCE +.B /sys/src/libc/9sys/zioread.c +.br +.B /sys/src/libc/9sys/ziowrite.c +.br +.B /sys/src/nix/port/syszio.c +.SH SEE ALSO +.IR intro (2), +.IR read (2), +and +.IR segment (3). +.SH DIAGNOSTICS +These functions set +.IR errstr . +.SH BUGS +Experimental, just like everything else. diff -Nru 0/sys/man/3/cons 4/sys/man/3/cons --- 0/sys/man/3/cons Tue Feb 5 22:32:38 2013 +++ 4/sys/man/3/cons Wed Feb 6 00:00:00 2013 @@ -289,12 +289,15 @@ .PP The .B sysstat -file holds 10 numbers: +file holds 11 numbers: processor number, context switches, interrupts, system calls, page faults, -TLB faults, TLB purges, load average, idle time and time spent servicing interrupts. +TLB faults, TLB purges, load average, idle time, time spent servicing interrupts, and +scheduler number. The load average is in units of milli-CPUs and is decayed over time; idle time and interrupt time are percentage units; the others are total counts from boot time. +Afer these 10 numbers, the role for the processor (TC, AC, or KC) +is shown. If the machine is a multiprocessor, .B sysstat holds one line per processor. diff -Nru 0/sys/man/3/pmc 4/sys/man/3/pmc --- 0/sys/man/3/pmc Thu Jan 1 00:00:00 1970 +++ 4/sys/man/3/pmc Wed Feb 6 00:00:00 2013 @@ -0,0 +1,104 @@ +.TH PMC 3 +.SH NAME +pmc \- performance monitoring counters +.SH SYNOPSIS +.nf +.B bind '#ε' /dev + +.BI #ε/core0000/ctrdesc +.BI #ε/core0000/ctr00 +.BI #ε/core0000/ctr00ctl +.BI #ε/core0000/ctr01 +.BI #ε/core0000/ctr01ctl + ... +.BI #ε/core0001/ctr00 +.BI #ε/core0001/ctr00ctl +.BI #ε/core0001/ctr01 +.BI #ε/core0001/ctr01ctl + ... +.fi +.SH DESCRIPTION +.PP +The +.I pmc +device serves a two-level directory, giving +access to the hardware counters on the +different cores. +There is a directory per core, +.B coreNNNN, +containing files +pertaining to that core. +.PP +The +.B ctrNN +files provide access to the value of the counters. +The corresponding control file configures the counter. +It accepts the following commands: +.TP +.B reset +Clears the configuration for the counter disabling any possible +feature it has. +.TP +.B user +and +.B nouser +Enable and or disable whether the counter only +runs when the processor is in user space. +.TP +.B os +and +.B noos +Enable and or disable whether the counter only +runs when the processor is in the kernel. +.TP +.BI "set configuration +Sets the counter configuration. The detail of what the configuration +means is left to the specific implementation of the counter. Reading the +.B ctrdesc +file, gives back some common possible configurations for +this particular driver and processor, one per line. +.TP +.BI enable +Enables the counter which both makes it to start running and enables +any other way to access the counter from user space, like special +instructions. +.PP +Any change to or from the counters or the configurations is seen +as soon as possible. In the worst case, it is guaranteed that the values +read or set are at least as fresh as the last time a process went in or out +of the kernel on that core. +.PP +Configure the counter 0 to count L2 misses in the kernel. +.EX + % bind '#ε' /dev + % cat /dev/ctrdesc + locked instr + SMI intr + data access + data miss + L1 DTLB miss + L2 DTLB miss + L1 DTLB hit + L2 hit + L2 miss + instr miss + L1 ITLB miss + L2 ITLB miss + DRAM access + L3 miss + echo reset > /dev/core0001/ctr00ctl + echo os > /dev/core0001/ctr00ctl + echo nouser > /dev/core0001/ctr00ctl + echo set L2 miss > /dev/core0001/ctr00ctl + echo 0 > /dev/core0001/ctr00 + echo enable > /dev/core0001/ctr00ctl + cat /dev/core0001/ctr00 + 0x00000000000003e5 + % +.EE +.SH "SEE ALSO +.IR pmc (2) +.SH SOURCE +.B /sys/src/nix/port/pmc.h +.B /sys/src/nix/port/devpmc.c +.B /sys/src/nix/k10/pmcio.c diff -Nru 0/sys/man/3/proc 4/sys/man/3/proc --- 0/sys/man/3/proc Tue Feb 5 22:32:38 2013 +++ 4/sys/man/3/proc Wed Feb 6 00:00:00 2013 @@ -7,6 +7,7 @@ .sp 0.3v .B /proc/trace .BI /proc/ n /args +.BI /proc/ n /core .BI /proc/ n /ctl .BI /proc/ n /fd .BI /proc/ n /fpregs @@ -55,6 +56,10 @@ state and only to user addresses or registers. .PP The read-only +.B core +file contains the AC number for the process, or 0 if the process is at a TC. +.PP +The read-only .B proc file contains the kernel per-process structure. @@ -136,6 +141,12 @@ in units of 1024 bytes .IP \- the base and current scheduling priority, each 11 character numbers +.IP \- +the number of traps for the process +.IP \- +the number of system calls made by the process +.IP \- +the number of inter-core calls made by the process. .PP The read-only .B args @@ -333,6 +344,17 @@ .B "wired\ \fIn Wire the process to processor .IR n . +.TP 10n +.B "core\ \fIn +Send the process to AC number +.IR n , +if +.I n +is greater than zero. Send it back to a TC if +.I n +is zero. Send the process to any AC if +.I n +is less than zero. .TP 10n .B trace Without an argument, toggle trace event generation for this process into diff -Nru 0/sys/man/3/segment 4/sys/man/3/segment --- 0/sys/man/3/segment Tue Feb 5 22:32:38 2013 +++ 4/sys/man/3/segment Wed Feb 6 00:00:00 2013 @@ -5,12 +5,10 @@ .nf .B bind '#g' /mnt/segment -.BI #g/ seg1 -.BI #g/ seg1 /ctl -.BI #g/ seg1 /data -.BI #g/ seg2 -.BI #g/ seg2 /ctl -.BI #g/ seg2 /data +.BI #g/ segn +.BI #g/ segn /ctl +.BI #g/ segn /data +.BI #g/ segn /free ... .fi .SH DESCRIPTION @@ -42,26 +40,37 @@ .I segdetach it. .PP -Within each segment directory are two files, -.B data +Within each segment directory are three files, +.BR data , +.BR ctl , and -.BR ctl . +.BR free . Reading and writing .B data affects the contents of the segment. Reading and writing .B ctl retrieves and sets the segment's properties. +The +.B free +file is used to manage deallocation of buffers for zero-copy. .PP -There is only one control message, which sets the segment's +There are only three control messages, which sets the segment's virtual address and length in bytes: .EX - va \fIaddress length\fP + addr \fIaddress length\fP + umsg \fIaddress length\fP + kmsg \fIaddress length\fP .EE +The first one creates a shared segment. The second one creates a shared segment +for use with zero-copy. The third one creates a shared segment for use with +zero copy, identified as the one for use by the kernel. See below for an explanation. .I Address is automatically rounded down to a page boundary and .I length is rounded up to end the segment at a page boundary. +If the address given is zero, the system picks a unique address +(system-wide) for the segment. The segment will reside at the same virtual address in all processes sharing it. When the segment @@ -69,7 +78,6 @@ .IR segattach, the address and length arguments are ignored in the call; they are defined only by the -.B va control message. Once the address and length are set, they cannot be reset. .PP @@ -89,6 +97,34 @@ is equivalent to the one performed when opening .B data with mode ORDWR. +.PP +Segments for zero copy (either +.I umsg +or +.I kmsg +segments) make use of the +.B free +file to control data buffers for zero-copy. Each process exchanging zero-copy +buffers must attach at least to a +.I kmsg +segment, also known as a KZIO (Kernel zero-copy I/O) segment. It might also attach +to one or more +.I umsg +segments, also known as ZIO (zero-copy I/O) segments. ZIO system calls assume +that addresses contained in ZIO (or KZIO) segments may be exchanged for I/O without +requiring data copies. This is not so for addresses referring to any other segment. +.PP +Allocation of buffers within a ZIO segment is handled by the process that created +the segment. Other processes, and the kernel, are expected only to use such buffers, +but they do not allocate or deallocate buffers directly. Allocation of buffers within a KZIO +segment is handled only the kernel. User processes are expected to use such buffers, but +not to allocate or deallocate them. +.PP +When a buffer is no longer in use, the +.B free +file can be used to write its virtual address. As a result, the kernel will notify +any process reading such file of the address for the, now unused, data buffer. If the +address refers to a KZIO segment, the kernel handles deallocation on its own. .SH EXAMPLE .PP Create a one megabyte segment at address 0x10000000: diff -Nru 0/sys/man/3/zp 4/sys/man/3/zp --- 0/sys/man/3/zp Thu Jan 1 00:00:00 1970 +++ 4/sys/man/3/zp Thu Feb 7 00:00:00 2013 @@ -0,0 +1,61 @@ +.TH ZP 3 +.SH NAME +zp \- two-way zero-copy interprocess communication +.SH SYNOPSIS +.B bind #∏ +.I dir +.nf + +.IB dir /data +.IB dir /data1 +.fi +.SH DESCRIPTION +.PP +An +.IR attach (5) +of this device allocates two new cross-connected I/O streams, +.IB dir /data +and +.IB dir /data1\f1. +.PP +Data written to one channel becomes available for reading at +the other. Writes are atomic, no matter the size. Writes from +shared buffers, see +.IR segment (3), +queue data descriptors without copying any data. Writes from other +segments copies the data into a kernel ZIO segment. +.PP +Write boundaries are not preserved. A read terminates when the +buffer is full, if the first descriptor queued suffices to fill up the buffer +(in which case remaining data is copied into another shared buffer). +A read also terminates when copying from another descriptor would +overflow the buffer, in which case less bytes than requested may be +returned. +.PP +Writes from non-shared segments +are atomic only up to a certain size, typically 32768 bytes, to +avoid allocation of too large buffers within the kernel. +.PP +If there are multiple writers, each +.I write +is guaranteed to be available in a contiguous piece at the other +end of the pipe. +.PP +The +.IR zp (2) +system call performs an +.I attach +of this device and returns file descriptors to the new pipe's +.B data +and +.B data1 +files. +The files are open with mode +.BR ORDWR . +.SH "SEE ALSO" +.IR pipe (3), +.IR pipe (2), +and +.IR zp (2). +.SH SOURCE +.B /sys/src/9/port/devzp.c diff -Nru 0/sys/src/ape/lib/9/amd64/getcallerpc.s 4/sys/src/ape/lib/9/amd64/getcallerpc.s --- 0/sys/src/ape/lib/9/amd64/getcallerpc.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/ape/lib/9/amd64/getcallerpc.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,3 @@ +TEXT getcallerpc(SB), $0 + MOVQ -8(RARG), AX + RET diff -Nru 0/sys/src/ape/lib/9/amd64/getfcr.s 4/sys/src/ape/lib/9/amd64/getfcr.s --- 0/sys/src/ape/lib/9/amd64/getfcr.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/ape/lib/9/amd64/getfcr.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,38 @@ + +TEXT setfcr(SB), $4 + XORL $(0x3F<<7),RARG /* bits are cleared in csr to enable them */ + ANDL $0xFFC0, RARG /* just the fcr bits */ + WAIT /* is this needed? */ + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $~0x3F, AX + ORL RARG, AX + MOVL AX, 0(SP) + LDMXCSR 0(SP) + RET + +TEXT getfcr(SB), $4 + WAIT + STMXCSR 0(SP) + MOVWLZX 0(SP), AX + ANDL $0xFFC0, AX + XORL $(0x3F<<7),AX + RET + +TEXT getfsr(SB), $4 + WAIT + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $0x3F, AX + RET + +TEXT setfsr(SB), $4 + ANDL $0x3F, RARG + WAIT + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $~0x3F, AX + ORL RARG, AX + MOVL AX, 0(SP) + LDMXCSR 0(SP) + RET diff -Nru 0/sys/src/ape/lib/ap/amd64/_seek.c 4/sys/src/ape/lib/ap/amd64/_seek.c --- 0/sys/src/ape/lib/ap/amd64/_seek.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/ape/lib/ap/amd64/_seek.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,11 @@ +extern long __SEEK(long long*, int, long long, int); + +long long +_SEEK(int fd, long long o, int p) +{ + long long l; + + if(__SEEK(&l, fd, o, p) < 0) + l = -1; + return l; +} diff -Nru 0/sys/src/ape/lib/ap/amd64/cycles.s 4/sys/src/ape/lib/ap/amd64/cycles.s --- 0/sys/src/ape/lib/ap/amd64/cycles.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/ape/lib/ap/amd64/cycles.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,5 @@ +TEXT _cycles(SB),1,$0 /* time stamp counter; cycles since power up */ + RDTSC + MOVL AX, 0(RARG) /* lo */ + MOVL DX, 4(RARG) /* hi */ + RET diff -Nru 0/sys/src/ape/lib/ap/amd64/lock.c 4/sys/src/ape/lib/ap/amd64/lock.c --- 0/sys/src/ape/lib/ap/amd64/lock.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/ape/lib/ap/amd64/lock.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,26 @@ +#define _LOCK_EXTENSION +#include "../plan9/sys9.h" +#include + +int tas(int*); + +void +lock(Lock *lk) +{ + while(tas(&lk->val)) + _SLEEP(0); +} + +int +canlock(Lock *lk) +{ + if(tas(&lk->val)) + return 0; + return 1; +} + +void +unlock(Lock *lk) +{ + lk->val = 0; +} diff -Nru 0/sys/src/ape/lib/ap/amd64/main9.s 4/sys/src/ape/lib/ap/amd64/main9.s --- 0/sys/src/ape/lib/ap/amd64/main9.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/ape/lib/ap/amd64/main9.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,12 @@ + TEXT _main(SB), 1, $(3*8) + + CALL _envsetup(SB) + MOVL inargc-8(FP), RARG + LEAQ inargv+0(FP), AX + MOVQ AX, 8(SP) + MOVQ environ(SB), AX + MOVQ AX, 16(SP) + CALL main(SB) + MOVQ AX, RARG + CALL exit(SB) + RET diff -Nru 0/sys/src/ape/lib/ap/amd64/main9p.s 4/sys/src/ape/lib/ap/amd64/main9p.s --- 0/sys/src/ape/lib/ap/amd64/main9p.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/ape/lib/ap/amd64/main9p.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,45 @@ +#define NPRIVATES 16 + +GLOBL _tos(SB), $8 +GLOBL _privates(SB), $8 +GLOBL _nprivates(SB), $8 + +TEXT _mainp(SB), 1, $(3*8+NPRIVATES*8) + + /* _tos = arg */ + MOVQ AX, _tos(SB) + LEAQ 8(SP), AX + MOVQ AX, _privates(SB) + MOVQ $NPRIVATES, _nprivates(SB) + + /* _profmain(); */ + CALL _profmain(SB) + + /* _tos->prof.pp = _tos->prof.next; */ + MOVQ _tos+0(SB),DX + MOVQ 4(DX),CX + MOVQ CX,(DX) + + CALL _envsetup(SB) + + /* main(argc, argv, environ); */ + MOVL inargc-8(FP), RARG + LEAQ inargv+0(FP), AX + MOVQ AX, 8(SP) + MOVQ environ(SB), AX + MOVQ AX, 16(SP) + CALL main(SB) + +loop: + MOVL AX, RARG + CALL exit(SB) + MOVQ $_profin(SB), AX /* force loading of profile */ + MOVL $0, AX + JMP loop + +TEXT _savearg(SB), 1, $0 + RET + +TEXT _callpc(SB), 1, $0 + MOVQ 8(RARG), AX + RET diff -Nru 0/sys/src/ape/lib/ap/amd64/mkfile 4/sys/src/ape/lib/ap/amd64/mkfile --- 0/sys/src/ape/lib/ap/amd64/mkfile Thu Jan 1 00:00:00 1970 +++ 4/sys/src/ape/lib/ap/amd64/mkfile Wed Feb 6 00:00:00 2013 @@ -0,0 +1,20 @@ +APE=/sys/src/ape +objtype=amd64 +<$APE/config +LIB=/$objtype/lib/ape/libap.a +OFILES=\ + _seek.$O\ + cycles.$O\ + lock.$O\ + main9.$O\ + main9p.$O\ + notetramp.$O\ + setjmp.$O\ + strchr.$O\ + strlen.$O\ + tas.$O\ + + +#include + +/* A stack to hold pcs when signals nest */ +#define MAXSIGSTACK 20 +typedef struct Pcstack Pcstack; +static struct Pcstack { + int sig; + void (*hdlr)(int, char*, Ureg*); + unsigned long long restorepc; + Ureg *u; +} pcstack[MAXSIGSTACK]; +static int nstack = 0; + +static void notecont(Ureg*, char*); + +void +_notetramp(int sig, void (*hdlr)(int, char*, Ureg*), Ureg *u) +{ + Pcstack *p; + + if(nstack >= MAXSIGSTACK) + _NOTED(1); /* nesting too deep; just do system default */ + p = &pcstack[nstack]; + p->restorepc = u->ip; + p->sig = sig; + p->hdlr = hdlr; + p->u = u; + nstack++; + u->ip = (unsigned long long) notecont; + _NOTED(2); /* NSAVE: clear note but hold state */ +} + +static void +notecont(Ureg *u, char *s) +{ + Pcstack *p; + void(*f)(int, char*, Ureg*); + + p = &pcstack[nstack-1]; + f = p->hdlr; + u->ip = p->restorepc; + nstack--; + (*f)(p->sig, s, u); + _NOTED(3); /* NRSTR */ +} + +#define JMPBUFPC 1 +#define JMPBUFSP 0 + +extern sigset_t _psigblocked; + +typedef struct { + sigset_t set; + sigset_t blocked; + unsigned long long jmpbuf[2]; +} sigjmp_buf_amd64; + +void +siglongjmp(sigjmp_buf j, int ret) +{ + struct Ureg *u; + sigjmp_buf_amd64 *jb; + + jb = (sigjmp_buf_amd64*)j; + + if(jb->set) + _psigblocked = jb->blocked; + if(nstack == 0 || pcstack[nstack-1].u->sp > jb->jmpbuf[JMPBUFSP]) + longjmp((void*)jb->jmpbuf, ret); + u = pcstack[nstack-1].u; + nstack--; + u->ax = ret; + if(ret == 0) + u->ax = 1; + u->ip = jb->jmpbuf[JMPBUFPC]; + u->sp = jb->jmpbuf[JMPBUFSP] + 8; + _NOTED(3); /* NRSTR */ +} diff -Nru 0/sys/src/ape/lib/ap/amd64/setjmp.s 4/sys/src/ape/lib/ap/amd64/setjmp.s --- 0/sys/src/ape/lib/ap/amd64/setjmp.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/ape/lib/ap/amd64/setjmp.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,27 @@ +TEXT longjmp(SB), $0 + MOVL r+8(FP), AX + CMPL AX, $0 + JNE ok /* ansi: "longjmp(0) => longjmp(1)" */ + MOVL $1, AX /* bless their pointed heads */ +ok: + MOVQ 0(RARG), SP /* restore sp */ + MOVQ 8(RARG), BX /* put return pc on the stack */ + MOVQ BX, 0(SP) + RET + +TEXT setjmp(SB), $0 + MOVQ SP, 0(RARG) /* store sp */ + MOVQ 0(SP), BX /* store return pc */ + MOVQ BX, 8(RARG) + MOVL $0, AX /* return 0 */ + RET + +TEXT sigsetjmp(SB), $0 + MOVL savemask+8(FP), BX + MOVL BX, 0(RARG) + MOVL $_psigblocked(SB), 4(RARG) + MOVQ SP, 8(RARG) /* store sp */ + MOVQ 0(SP), BX /* store return pc */ + MOVQ BX, 16(RARG) + MOVL $0, AX /* return 0 */ + RET diff -Nru 0/sys/src/ape/lib/ap/amd64/strchr.s 4/sys/src/ape/lib/ap/amd64/strchr.s --- 0/sys/src/ape/lib/ap/amd64/strchr.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/ape/lib/ap/amd64/strchr.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,38 @@ + TEXT strchr(SB), $0 + + MOVQ RARG, DI + MOVB c+8(FP), AX + CMPB AX, $0 + JEQ l2 /**/ + +/* + * char is not null + */ +l1: + MOVB (DI), BX + CMPB BX, $0 + JEQ ret0 + ADDQ $1, DI + CMPB AX, BX + JNE l1 + + MOVQ DI, AX + SUBQ $1, AX + RET + +/* + * char is null + */ +l2: + MOVQ $-1, CX + CLD + + REPN; SCASB + + MOVQ DI, AX + SUBQ $1, AX + RET + +ret0: + MOVQ $0, AX + RET diff -Nru 0/sys/src/ape/lib/ap/amd64/strlen.s 4/sys/src/ape/lib/ap/amd64/strlen.s --- 0/sys/src/ape/lib/ap/amd64/strlen.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/ape/lib/ap/amd64/strlen.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,16 @@ + TEXT strlen(SB),$0 + + MOVL $0, AX + MOVQ $-1, CX + CLD +/* + * look for end of string + */ + + MOVQ RARG, DI + REPN; SCASB + + MOVQ DI, AX + SUBQ RARG, AX + SUBQ $1, AX + RET diff -Nru 0/sys/src/ape/lib/ap/amd64/tas.s 4/sys/src/ape/lib/ap/amd64/tas.s --- 0/sys/src/ape/lib/ap/amd64/tas.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/ape/lib/ap/amd64/tas.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,5 @@ +TEXT tas(SB),$0 + + MOVL $0xdeadead,AX + XCHGL AX,(RARG) + RET diff -Nru 0/sys/src/cmd/syscall/mkfile 4/sys/src/cmd/syscall/mkfile --- 0/sys/src/cmd/syscall/mkfile Tue Nov 12 17:31:04 2002 +++ 4/sys/src/cmd/syscall/mkfile Wed Feb 6 00:00:00 2013 @@ -19,6 +19,7 @@ tab.h: $SYSCALL sed '/#define._X[0-9_]/d; + /#define.NIXSYSCALL/d; /#define.NSYSCALL/d; s/#define.([A-Z0-9_][A-Z0-9_]*).*/ "\1", (int(*)(...))\1,/' $SYSCALL | tr A-Z a-z > tab.h diff -Nru 0/sys/src/cmd/trace.c 4/sys/src/cmd/trace.c --- 0/sys/src/cmd/trace.c Wed Feb 6 21:41:05 2013 +++ 4/sys/src/cmd/trace.c Wed Feb 6 00:00:00 2013 @@ -10,6 +10,24 @@ #include #include "trace.h" +#define GBIT8(p) ((p)[0]) +#define GBIT16(p) ((p)[0]|((p)[1]<<8)) +#define GBIT32(p) ((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24)) +#define GBIT64(p) ((u32int)((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24)) |\ + ((vlong)((p)[4]|((p)[5]<<8)|((p)[6]<<16)|((p)[7]<<24)) << 32)) + +#define PBIT8(p,v) (p)[0]=(v) +#define PBIT16(p,v) (p)[0]=(v);(p)[1]=(v)>>8 +#define PBIT32(p,v) (p)[0]=(v);(p)[1]=(v)>>8;(p)[2]=(v)>>16;(p)[3]=(v)>>24 +#define PBIT64(p,v) (p)[0]=(v);(p)[1]=(v)>>8;(p)[2]=(v)>>16;(p)[3]=(v)>>24;\ + (p)[4]=(v)>>32;(p)[5]=(v)>>40;(p)[6]=(v)>>48;(p)[7]=(v)>>56 + +#define BIT8SZ 1 +#define BIT16SZ 2 +#define BIT32SZ 4 +#define BIT64SZ 8 + +#pragma varargck type "t" uvlong #pragma varargck type "t" vlong #pragma varargck type "U" uvlong @@ -69,6 +87,7 @@ void drawtrace(void); int schedparse(char*, char*, char*); int timeconv(Fmt*); +static void tracefile(int); char *schedstatename[] = { [SAdmit] = "Admit", @@ -85,6 +104,7 @@ [SInte] = "Inte", [SUser] = "User", [SYield] = "Yield", + [SLock] = "Lock", }; struct { @@ -125,18 +145,26 @@ static void usage(void) { - fprint(2, "Usage: %s [-d profdev] [-w] [-v] [-t triggerproc] [processes]\n", argv0); + fprint(2, "Usage: %s [-f file [-g]] [-d profdev] [-w] [-v] [-t triggerproc] [processes]\n", argv0); exits(nil); } void threadmain(int argc, char **argv) { - int fd, i; + int fd, i, justfile, graph; char fname[80]; fmtinstall('t', timeconv); + justfile = graph = 0; ARGBEGIN { + case 'f': + justfile = 1; + profdev = EARGF(usage()); + break; + case 'g': + graph = 1; + break; case 'd': profdev = EARGF(usage()); break; @@ -153,7 +181,13 @@ usage(); } ARGEND; - + if(justfile){ + if(argc != 0) + usage(); + tracefile(graph); + exits(nil); + } + fname[sizeof fname - 1] = 0; for(i = 0; i < argc; i++){ snprint(fname, sizeof fname - 2, "/proc/%s/ctl", @@ -173,6 +207,118 @@ drawtrace(); } +static +struct{ + int pid; + int state; +}graphs[64]; + +static void +addtograph(Traceevent *t) +{ + int i, dead; + + dead = -1; + for(i = 0; i < nelem(graphs); i++){ + if(graphs[i].pid == t->pid){ + /* + * dead procs might get some sleep/wakeup events, keep them dead. + */ + if(graphs[i].state == SDead) + return; + break; + } + if(graphs[i].state == SDead) + dead = i; + if(graphs[i].pid == 0 || i == nelem(graphs)-1){ + if(dead >= 0) + i = dead; + graphs[i].pid = t->pid; + break; + } + } + if(i == nelem(graphs)) + return; + graphs[i].state = t->etype; +} + +static void +printgraph(Biobuf *bout, int pid, int core, uvlong time, char *sname) +{ + int i; + static char *schar[] = { + [SAdmit] = "!a", + [SSleep] = ".s", + [SDead] = "xd", + [SDeadline] = "??", + [SEdf] = "??", + [SExpel] = "??", + [SReady] = "!r", + [SRelease] = "??", + [SRun] = "|R", + [SSlice] = "??", + [SInts] = "!i", + [SInte] = "|e", + [SUser] = "|u", + [SYield] = "!y", + [SLock] = "!l", + }; + + Bprint(bout, "%20.20lld %02d %4d", time, core, pid); + for(i = 0; i < nelem(graphs); i++){ + if(graphs[i].pid == 0) + break; + if(graphs[i].pid != pid && graphs[i].state == SDead) + Bprint(bout, "\t "); + else + Bprint(bout, "\t%c", schar[graphs[i].state][0]); + if(graphs[i].pid == pid) + Bputc(bout, schar[graphs[i].state][1]); + } + Bprint(bout, "\t%s\n", sname); +} + +static void +tracefile(int graph) +{ + int logfd; + Traceevent t; + Biobuf bout; + uchar buf[BIT32SZ+BIT32SZ+BIT64SZ+BIT32SZ]; + uvlong t0; + + if((logfd = open(profdev, OREAD)) < 0) + sysfatal("%s: open: %r", profdev); + if(Binit(&bout, 1, OWRITE) < 0) + sysfatal("stdout: Binit: %r"); + if(graph) + Bprint(&bout, "#time core pid states\n"); + t0 = 0; + while(read(logfd, buf, sizeof buf) == sizeof buf){ + t.pid = GBIT32(buf); + t.etype = GBIT32(buf+BIT32SZ); + t.time = GBIT64(buf+BIT32SZ+BIT32SZ); + t.core = GBIT32(buf+BIT32SZ+BIT32SZ+BIT64SZ); + if(t.pid == 0) + continue; + if(t.etype >= nelem(schedstatename) || schedstatename[t.etype] == nil){ + fprint(2, "unknown state %ud\n", t.etype); + continue; + } + if(graph == 0) + Bprint(&bout, "%ud\t%-10.10s\t%ulld\t%ud\n", + t.pid, schedstatename[t.etype], t.time, t.core); + else{ + addtograph(&t); + if(t0 == 0) + t0 = t.time; + printgraph(&bout, t.pid, t.core, t.time-t0, schedstatename[t.etype]); + } + } + Bterm(&bout); + close(logfd); +} + static void mkcol(int i, int c0, int c1, int c2) { @@ -263,8 +409,8 @@ s = now - t->tstart; if(t->tevents[SRelease]) snprint(buf, sizeof(buf), " per %t — avg: %t max: %t", - (vlong)(s/t->tevents[SRelease]), - (vlong)(t->runtime/t->tevents[SRelease]), + (uvlong)(s/t->tevents[SRelease]), + (uvlong)(t->runtime/t->tevents[SRelease]), t->runmax); else if((s /=1000000000LL) != 0) snprint(buf, sizeof(buf), " per 1s — avg: %t total: %t", @@ -538,7 +684,7 @@ } break; case SDead: -print("task died %ld %t %s\n", event->pid, event->time, schedstatename[event->etype & 0xffff]); +print("task died %d %t %s\n", event->pid, event->time, schedstatename[event->etype & 0xffff]); free(t->events); free(t->name); ntasks--; @@ -695,12 +841,12 @@ nevents = n / sizeof(Traceevent); for (ep = eventbuf; ep < eventbuf + nevents; ep++){ if ((ep->etype & 0xffff) >= Nevent){ - print("%ld %t Illegal event %ld\n", + print("%ud %t Illegal event %ud\n", ep->pid, ep->time, ep->etype & 0xffff); continue; } if (verbose) - print("%ld %t %s\n", + print("%ud %t %s\n", ep->pid, ep->time, schedstatename[ep->etype & 0xffff]); for(i = 0; i < ntasks; i++) diff -Nru 0/sys/src/libc/9sys/getcore.c 4/sys/src/libc/9sys/getcore.c --- 0/sys/src/libc/9sys/getcore.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/9sys/getcore.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,14 @@ +#include +#include +#include + +/* + * getcore() conflicts with tbl source. + */ +int +getcoreno(int *type) +{ + if (type != nil) + *type = _tos->nixtype; + return _tos->core; +} diff -Nru 0/sys/src/libc/9sys/mkfile 4/sys/src/libc/9sys/mkfile --- 0/sys/src/libc/9sys/mkfile Wed Feb 6 07:04:01 2013 +++ 4/sys/src/libc/9sys/mkfile Wed Feb 6 00:00:00 2013 @@ -20,8 +20,9 @@ dirwstat.$O\ fcallfmt.$O\ fork.$O\ - getnetconninfo.$O\ + getcore.$O\ getenv.$O\ + getnetconninfo.$O\ getpid.$O\ getppid.$O\ getwd.$O\ @@ -47,13 +48,19 @@ times.$O\ tm2sec.$O\ truerand.$O\ + upsem.$O\ wait.$O\ waitpid.$O\ werrstr.$O\ write.$O\ writev.$O\ + zioread.$O\ + ziowrite.$O\ + ziop.$O\ -HFILES=/sys/include/libc.h +HFILES=\ + /sys/include/libc.h\ + /sys/include/tos.h\ UPDATE=\ mkfile\ diff -Nru 0/sys/src/libc/9sys/upsem.c 4/sys/src/libc/9sys/upsem.c --- 0/sys/src/libc/9sys/upsem.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/9sys/upsem.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,150 @@ +#include +#include "libc.h" + +typedef struct Cnt Cnt; /* debug */ +struct Cnt +{ + int uup; + int kup; + int udown; + int kdown; + int ualt; + int kalt; +}; + +#define dprint if(semdebug)fprint + +int semtrytimes = 100; +int semdebug; + +static Cnt c; + +/* + * For ACs, no backoff + */ +static void +nixaclock(Lock *lk) +{ + while(_tas(&lk->val)) + while(lk->val) + ; + return; +} + + +void +initsem(Sem *s, int val) +{ + memset(s, 0, sizeof(Sem)); + if(val > 0) + s->tickets = val; +} + +void +upsem(Sem *s) +{ + int nixtype; + + assert(s != nil); + + getcoreno(&nixtype); + + if(nixtype == NIXAC) + nixaclock(s); + else + lock(s); + + if(s->tickets == 0 && (s->waiting > 0 || s->going > 0)){ + unlock(s); + ainc(&c.kup); + semwakeup(s); + return; + } + s->tickets++; + unlock(s); + ainc(&c.uup); +} + +int +downsem(Sem *s, int block) +{ + int i; + int nixtype; + + assert(s != nil); + getcoreno(&nixtype); + + /* busy wait */ + for(i = 0; s->tickets == 0 && i < semtrytimes; i++) + ; + + if(nixtype == NIXAC) + nixaclock(s); + else + lock(s); + + if(! block && s->tickets == 0){ + unlock(s); + return 0; + } + if(s->tickets == 0){ + s->going++; + unlock(s); + ainc(&c.kdown); + if(semsleep(s) < 0) + return -1; + return 1; + } + s->tickets--; + unlock(s); + ainc(&c.udown); + return 1; +} + +static int nextindex; + +int +altsems(Sem *ss[], int n) +{ + int w, i; + ulong p; + + if(ss == nil || n <= 0){ + werrstr("altsems: bad args"); + return -1; + } + + i = 0; + p = (ulong) ainc(&nextindex); + + /* busy wait */ + for(w = 0; w < semtrytimes; w++){ + for(i = 0; i < n; i++) + if(ss[(p+i)%n]->tickets > 0) + break; + if(i < n) + break; + } + + p = (p+i)%n; + + for(i = 0; i < n; i++) + if(downsem(ss[(p+i)%n], 0) == 1){ + ainc(&c.ualt); + return (p+i)%n; + } + ainc(&c.kalt); + return semalt(ss, n); +} + +void +semstats(void) +{ + print("uup: %d\n", c.uup); + print("kup: %d\n", c.kup); + print("udown: %d\n", c.udown); + print("kdown: %d\n", c.kdown); + print("ualt: %d\n", c.ualt); + print("kalt: %d\n", c.kalt); +} + diff -Nru 0/sys/src/libc/9sys/ziop.c 4/sys/src/libc/9sys/ziop.c --- 0/sys/src/libc/9sys/ziop.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/9sys/ziop.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,18 @@ +#include +#include + +int +ziop(int fd[2]) +{ + if(bind("#∏", "/mnt/zp", MREPL|MCREATE) < 0) + return -1; + fd[0] = open("/mnt/zp/data", ORDWR); + if(fd[0] < 0) + return -1; + fd[1] = open("/mnt/zp/data1", ORDWR); + if(fd[1] < 0){ + close(fd[0]); + return -1; + } + return 0; +} diff -Nru 0/sys/src/libc/9sys/zioread.c 4/sys/src/libc/9sys/zioread.c --- 0/sys/src/libc/9sys/zioread.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/9sys/zioread.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,8 @@ +#include +#include + +int +zioread(int fd, Zio io[], int nio, usize count) +{ + return ziopread(fd, io, nio, count, -1LL); +} diff -Nru 0/sys/src/libc/9sys/ziowrite.c 4/sys/src/libc/9sys/ziowrite.c --- 0/sys/src/libc/9sys/ziowrite.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/9sys/ziowrite.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,8 @@ +#include +#include + +int +ziowrite(int fd, Zio io[], int nio) +{ + return ziopwrite(fd, io, nio, -1LL); +} diff -Nru 0/sys/src/libc/9syscall/sys.h 4/sys/src/libc/9syscall/sys.h --- 0/sys/src/libc/9syscall/sys.h Fri Jun 29 18:22:50 2012 +++ 4/sys/src/libc/9syscall/sys.h Thu Feb 7 00:00:00 2013 @@ -49,3 +49,11 @@ #define PREAD 50 #define PWRITE 51 #define TSEMACQUIRE 52 +#define SEMSLEEP 53 +#define SEMWAKEUP 54 +#define SEMALT 55 +#define EXECAC 56 +#define NIXSYSCALL 57 +#define ZIOPREAD 58 +#define ZIOPWRITE 59 +#define ZIOFREE 60 diff -Nru 0/sys/src/libc/amd64/_seek.c 4/sys/src/libc/amd64/_seek.c --- 0/sys/src/libc/amd64/_seek.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/_seek.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,14 @@ +#include +#include + +extern int _seek(vlong*, int, vlong, int); + +vlong +seek(int fd, vlong o, int p) +{ + vlong l; + + if(_seek(&l, fd, o, p) < 0) + l = -1LL; + return l; +} diff -Nru 0/sys/src/libc/amd64/argv0.s 4/sys/src/libc/amd64/argv0.s --- 0/sys/src/libc/amd64/argv0.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/argv0.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,4 @@ +GLOBL argv0(SB), $8 +GLOBL _tos(SB), $8 +GLOBL _privates(SB), $8 +GLOBL _nprivates(SB), $4 diff -Nru 0/sys/src/libc/amd64/atom.s 4/sys/src/libc/amd64/atom.s --- 0/sys/src/libc/amd64/atom.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/atom.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,66 @@ +TEXT ainc(SB), 1, $0 /* long ainc(long *); */ +ainclp: + MOVL (RARG), AX /* exp */ + MOVL AX, BX + INCL BX /* new */ + LOCK; CMPXCHGL BX, (RARG) + JNZ ainclp + MOVL BX, AX + RET + +TEXT adec(SB), 1, $0 /* long adec(long*); */ +adeclp: + MOVL (RARG), AX + MOVL AX, BX + DECL BX + LOCK; CMPXCHGL BX, (RARG) + JNZ adeclp + MOVL BX, AX + RET + +/* + * int cas32(u32int *p, u32int ov, u32int nv); + * int cas(uint *p, int ov, int nv); + * int casul(ulong *p, ulong ov, ulong nv); + */ + +TEXT cas32(SB), 1, $0 +TEXT cas(SB), 1, $0 +TEXT casul(SB), 1, $0 +TEXT casl(SB), 1, $0 /* back compat */ + MOVL exp+8(FP), AX + MOVL new+16(FP), BX + LOCK; CMPXCHGL BX, (RARG) + MOVL $1, AX /* use CMOVLEQ etc. here? */ + JNZ _cas32r0 +_cas32r1: + RET +_cas32r0: + DECL AX + RET + +/* + * int cas64(u64int *p, u64int ov, u64int nv); + * int casp(void **p, void *ov, void *nv); + */ + +TEXT cas64(SB), 1, $0 +TEXT casp(SB), 1, $0 + MOVQ exp+8(FP), AX + MOVQ new+16(FP), BX + LOCK; CMPXCHGQ BX, (RARG) + MOVL $1, AX /* use CMOVLEQ etc. here? */ + JNZ _cas64r0 +_cas64r1: + RET +_cas64r0: + DECL AX + RET + +/* + * void mfence(void); + */ +TEXT mfence(SB),0,$0 + MFENCE + RET + diff -Nru 0/sys/src/libc/amd64/cycles.s 4/sys/src/libc/amd64/cycles.s --- 0/sys/src/libc/amd64/cycles.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/cycles.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,5 @@ +TEXT cycles(SB),1,$0 /* time stamp counter; cycles since power up */ + RDTSC + MOVL AX, 0(RARG) /* lo */ + MOVL DX, 4(RARG) /* hi */ + RET diff -Nru 0/sys/src/libc/amd64/getcallerpc.s 4/sys/src/libc/amd64/getcallerpc.s --- 0/sys/src/libc/amd64/getcallerpc.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/getcallerpc.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,3 @@ +TEXT getcallerpc(SB), $0 + MOVQ -8(RARG), AX + RET diff -Nru 0/sys/src/libc/amd64/getfcr.s 4/sys/src/libc/amd64/getfcr.s --- 0/sys/src/libc/amd64/getfcr.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/getfcr.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,38 @@ + +TEXT setfcr(SB), $4 + XORL $(0x3F<<7),RARG /* bits are cleared in csr to enable them */ + ANDL $0xFFC0, RARG /* just the fcr bits */ + WAIT /* is this needed? */ + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $~0x3F, AX + ORL RARG, AX + MOVL AX, 0(SP) + LDMXCSR 0(SP) + RET + +TEXT getfcr(SB), $4 + WAIT + STMXCSR 0(SP) + MOVWLZX 0(SP), AX + ANDL $0xFFC0, AX + XORL $(0x3F<<7),AX + RET + +TEXT getfsr(SB), $4 + WAIT + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $0x3F, AX + RET + +TEXT setfsr(SB), $4 + ANDL $0x3F, RARG + WAIT + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $~0x3F, AX + ORL RARG, AX + MOVL AX, 0(SP) + LDMXCSR 0(SP) + RET diff -Nru 0/sys/src/libc/amd64/main9.s 4/sys/src/libc/amd64/main9.s --- 0/sys/src/libc/amd64/main9.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/main9.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,19 @@ +#define NPRIVATES 16 + +TEXT _main(SB), 1, $(2*8+NPRIVATES*8) + MOVQ AX, _tos(SB) + LEAQ 16(SP), AX + MOVQ AX, _privates(SB) + MOVL $NPRIVATES, _nprivates(SB) + MOVL inargc-8(FP), RARG + LEAQ inargv+0(FP), AX + MOVQ AX, 8(SP) + CALL main(SB) + +loop: + MOVQ $_exits<>(SB), RARG + CALL exits(SB) + JMP loop + +DATA _exits<>+0(SB)/4, $"main" +GLOBL _exits<>+0(SB), $5 diff -Nru 0/sys/src/libc/amd64/main9p.s 4/sys/src/libc/amd64/main9p.s --- 0/sys/src/libc/amd64/main9p.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/main9p.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,41 @@ +#define NPRIVATES 16 + +TEXT _mainp(SB), 1, $(2*8+NPRIVATES*8) + MOVQ AX, _tos(SB) /* _tos = arg */ + LEAQ 16(SP), AX + MOVQ AX, _privates(SB) + MOVL $NPRIVATES, _nprivates(SB) + + CALL _profmain(SB) /* _profmain(); */ + + MOVQ _tos+0(SB), DX /* _tos->prof.pp = _tos->prof.next; */ + MOVQ 8(DX), CX + MOVQ CX, (DX) + + MOVL inargc-8(FP), RARG /* main(argc, argv); */ + LEAQ inargv+0(FP), AX + MOVQ AX, 8(SP) + CALL main(SB) + +loop: + MOVQ $_exits<>(SB), RARG + CALL exits(SB) + MOVQ $_profin(SB), AX /* force loading of profile */ + JMP loop + +TEXT _savearg(SB), 1, $0 + MOVQ RARG, AX + RET + +TEXT _saveret(SB), 1, $0 + RET + +TEXT _restorearg(SB), 1, $0 + RET /* we want RARG in RARG */ + +TEXT _callpc(SB), 1, $0 + MOVQ 8(RARG), AX + RET + +DATA _exits<>+0(SB)/4, $"main" +GLOBL _exits<>+0(SB), $5 diff -Nru 0/sys/src/libc/amd64/memccpy.s 4/sys/src/libc/amd64/memccpy.s --- 0/sys/src/libc/amd64/memccpy.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/memccpy.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,58 @@ + TEXT memccpy(SB),$0 + + MOVL n+24(FP), CX + CMPL CX, $0 + JEQ none + MOVQ p2+8(FP), DI + MOVBLZX c+16(FP), AX + CLD +/* + * find the character in the second string + */ + + REPN; SCASB + JEQ found + +/* + * if not found, set count to 'n' + */ +none: + MOVL $0, AX + MOVL n+24(FP), BX + JMP memcpy + +/* + * if found, set count to bytes thru character + */ +found: + MOVQ DI, AX + SUBQ p2+8(FP), AX + MOVQ AX, BX + ADDQ RARG, AX + +/* + * copy the memory + */ + +memcpy: + MOVQ RARG, DI + MOVQ p2+8(FP), SI +/* + * copy whole longs, if aligned + */ + MOVQ DI, DX + ORQ SI, DX + ANDL $3, DX + JNE c3 + MOVL BX, CX + SHRQ $2, CX + REP; MOVSL +/* + * copy the rest, by bytes + */ + ANDL $3, BX +c3: + MOVL BX, CX + REP; MOVSB + + RET diff -Nru 0/sys/src/libc/amd64/memchr.s 4/sys/src/libc/amd64/memchr.s --- 0/sys/src/libc/amd64/memchr.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/memchr.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,23 @@ + TEXT memchr(SB),$0 + + MOVL n+16(FP), CX + CMPL CX, $0 + JEQ none + MOVQ RARG, DI + MOVBLZX c+8(FP), AX + CLD +/* + * SCASB is memchr instruction + */ + + REPN; SCASB + JEQ found + +none: + MOVL $0, AX + RET + +found: + MOVQ DI, AX + SUBQ $1, AX + RET diff -Nru 0/sys/src/libc/amd64/memcmp.s 4/sys/src/libc/amd64/memcmp.s --- 0/sys/src/libc/amd64/memcmp.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/memcmp.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,52 @@ + TEXT memcmp(SB),$0 + + MOVL n+16(FP), BX + CMPL BX, $0 + JEQ none + MOVQ RARG, DI + MOVQ p2+8(FP), SI + CLD + MOVQ DI, CX + ORQ SI, CX + ANDL $3, CX + JNE c3 +/* + * first by longs + */ + + MOVL BX, CX + SHRQ $2, CX + + REP; CMPSL + JNE found + +/* + * then by bytes + */ + ANDL $3, BX +c3: + MOVL BX, CX + REP; CMPSB + JNE found1 + +none: + MOVQ $0, AX + RET + +/* + * if long found, + * back up and look by bytes + */ +found: + MOVL $4, CX + SUBQ CX, DI + SUBQ CX, SI + REP; CMPSB + +found1: + JLS lt + MOVQ $-1, AX + RET +lt: + MOVQ $1, AX + RET diff -Nru 0/sys/src/libc/amd64/memcpy.s 4/sys/src/libc/amd64/memcpy.s --- 0/sys/src/libc/amd64/memcpy.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/memcpy.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,81 @@ +TEXT memcpy(SB), $0 + MOVQ RARG, DI + MOVQ DI, AX /* return value */ + MOVQ p2+8(FP), SI + MOVL n+16(FP), BX + CMPL BX, $0 + JGT _ok + JEQ _return /* nothing to do if n == 0 */ + MOVL $0, SI /* fault if n < 0 */ + +/* + * check and set for backwards: + * (p2 < p1) && ((p2+n) > p1) + */ +_ok: + CMPQ SI, DI + JGT _forward + JEQ _return /* nothing to do if p2 == p1 */ + MOVQ SI, DX + ADDQ BX, DX + CMPQ DX, DI + JGT _back + +/* + * copy whole longs if aligned + */ +_forward: + CLD + MOVQ SI, DX + ORQ DI, DX + ANDL $3, DX + JNE c3f + MOVQ BX, CX + SHRQ $2, CX + ANDL $3, BX + REP; MOVSL + +/* + * copy the rest, by bytes + */ + JEQ _return /* flags set by above ANDL */ +c3f: + MOVL BX, CX + REP; MOVSB + + RET + +/* + * whole thing backwards has + * adjusted addresses + */ +_back: + ADDQ BX, DI + ADDQ BX, SI + STD + SUBQ $4, DI + SUBQ $4, SI +/* + * copy whole longs, if aligned + */ + MOVQ DI, DX + ORQ SI, DX + ANDL $3, DX + JNE c3b + MOVL BX, CX + SHRQ $2, CX + ANDL $3, BX + REP; MOVSL +/* + * copy the rest, by bytes + */ + JEQ _return /* flags set by above ANDL */ + +c3b: + ADDQ $3, DI + ADDQ $3, SI + MOVL BX, CX + REP; MOVSB + +_return: + RET diff -Nru 0/sys/src/libc/amd64/memmove.s 4/sys/src/libc/amd64/memmove.s --- 0/sys/src/libc/amd64/memmove.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/memmove.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,81 @@ +TEXT memmove(SB), $0 + MOVQ RARG, DI + MOVQ DI, AX /* return value */ + MOVQ p2+8(FP), SI + MOVL n+16(FP), BX + CMPL BX, $0 + JGT _ok + JEQ _return /* nothing to do if n == 0 */ + MOVL $0, SI /* fault if n < 0 */ + +/* + * check and set for backwards: + * (p2 < p1) && ((p2+n) > p1) + */ +_ok: + CMPQ SI, DI + JGT _forward + JEQ _return /* nothing to do if p2 == p1 */ + MOVQ SI, DX + ADDQ BX, DX + CMPQ DX, DI + JGT _back + +/* + * copy whole longs if aligned + */ +_forward: + CLD + MOVQ SI, DX + ORQ DI, DX + ANDL $3, DX + JNE c3f + MOVQ BX, CX + SHRQ $2, CX + ANDL $3, BX + REP; MOVSL + +/* + * copy the rest, by bytes + */ + JEQ _return /* flags set by above ANDL */ +c3f: + MOVL BX, CX + REP; MOVSB + + RET + +/* + * whole thing backwards has + * adjusted addresses + */ +_back: + ADDQ BX, DI + ADDQ BX, SI + STD + SUBQ $4, DI + SUBQ $4, SI +/* + * copy whole longs, if aligned + */ + MOVQ DI, DX + ORQ SI, DX + ANDL $3, DX + JNE c3b + MOVL BX, CX + SHRQ $2, CX + ANDL $3, BX + REP; MOVSL +/* + * copy the rest, by bytes + */ + JEQ _return /* flags set by above ANDL */ + +c3b: + ADDQ $3, DI + ADDQ $3, SI + MOVL BX, CX + REP; MOVSB + +_return: + RET diff -Nru 0/sys/src/libc/amd64/memset.s 4/sys/src/libc/amd64/memset.s --- 0/sys/src/libc/amd64/memset.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/memset.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,41 @@ + TEXT memset(SB),$0 + + CLD + MOVQ RARG, DI + MOVBLZX c+8(FP), AX + MOVL n+16(FP), BX +/* + * if not enough bytes, just set bytes + */ + CMPL BX, $9 + JLS c3 +/* + * if not aligned, just set bytes + */ + MOVQ RARG, CX + ANDL $3,CX + JNE c3 +/* + * build word in AX + */ + MOVB AL, AH + MOVL AX, CX + SHLL $16, CX + ORL CX, AX +/* + * set whole longs + */ +c1: + MOVQ BX, CX + SHRQ $2, CX + ANDL $3, BX + REP; STOSL +/* + * set the rest, by bytes + */ +c3: + MOVL BX, CX + REP; STOSB +ret: + MOVQ RARG,AX + RET diff -Nru 0/sys/src/libc/amd64/mkfile 4/sys/src/libc/amd64/mkfile --- 0/sys/src/libc/amd64/mkfile Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/mkfile Wed Feb 6 00:00:00 2013 @@ -0,0 +1,42 @@ +objtype=amd64 + +#include +#include + +void +notejmp(void *vr, jmp_buf j, int ret) +{ + struct Ureg *r = vr; + + r->ax = ret; + if(ret == 0) + r->ax = 1; + r->ip = j[JMPBUFPC]; + r->sp = j[JMPBUFSP] + 8; + noted(NCONT); +} diff -Nru 0/sys/src/libc/amd64/rdpmc.s 4/sys/src/libc/amd64/rdpmc.s --- 0/sys/src/libc/amd64/rdpmc.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/rdpmc.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,9 @@ +MODE $64 + +TEXT rdpmc(SB), 1, $-4 /* performance monitor counter */ + MOVL RARG, CX + RDPMC /* read CX performance counter */ + XCHGL DX, AX /* swap lo/hi, zero-extend */ + SHLQ $32, AX /* hi<<32 */ + ORQ DX, AX /* (hi<<32)|lo */ + RET diff -Nru 0/sys/src/libc/amd64/setjmp.s 4/sys/src/libc/amd64/setjmp.s --- 0/sys/src/libc/amd64/setjmp.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/setjmp.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,17 @@ +TEXT longjmp(SB), $0 + MOVL r+8(FP), AX + CMPL AX, $0 + JNE ok /* ansi: "longjmp(0) => longjmp(1)" */ + MOVL $1, AX /* bless their pointed heads */ +ok: + MOVQ 0(RARG), SP /* restore sp */ + MOVQ 8(RARG), BX /* put return pc on the stack */ + MOVQ BX, 0(SP) + RET + +TEXT setjmp(SB), $0 + MOVQ SP, 0(RARG) /* store sp */ + MOVQ 0(SP), BX /* store return pc */ + MOVQ BX, 8(RARG) + MOVL $0, AX /* return 0 */ + RET diff -Nru 0/sys/src/libc/amd64/sqrt.s 4/sys/src/libc/amd64/sqrt.s --- 0/sys/src/libc/amd64/sqrt.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/sqrt.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,4 @@ +TEXT sqrt(SB), $0 + MOVSD a+0(FP), X0 + SQRTSD X0, X0 + RET diff -Nru 0/sys/src/libc/amd64/strcat.s 4/sys/src/libc/amd64/strcat.s --- 0/sys/src/libc/amd64/strcat.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/strcat.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,48 @@ + TEXT strcat(SB),$0 + + MOVL $0, AX + MOVQ $-1, CX + CLD + +/* + * find length of second string + */ + + MOVQ p2+8(FP), DI + REPN; SCASB + + MOVQ DI, BX + SUBQ p2+8(FP), BX + +/* + * find end of first string + */ + + MOVQ RARG, DI + REPN; SCASB + +/* + * copy the memory + */ + SUBQ $1, DI + MOVQ p2+8(FP), SI +/* + * copy whole longs, if aligned + */ + MOVQ DI, CX + ORQ SI, CX + ANDL $3, CX + JNE c3 + MOVQ BX, CX + SHRQ $2, CX + REP; MOVSL +/* + * copy the rest, by bytes + */ + ANDL $3, BX +c3: + MOVQ BX, CX + REP; MOVSB + + MOVQ RARG, AX + RET diff -Nru 0/sys/src/libc/amd64/strchr.s 4/sys/src/libc/amd64/strchr.s --- 0/sys/src/libc/amd64/strchr.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/strchr.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,38 @@ + TEXT strchr(SB), $0 + + MOVQ RARG, DI + MOVB c+8(FP), AX + CMPB AX, $0 + JEQ l2 /**/ + +/* + * char is not null + */ +l1: + MOVB (DI), BX + CMPB BX, $0 + JEQ ret0 + ADDQ $1, DI + CMPB AX, BX + JNE l1 + + MOVQ DI, AX + SUBQ $1, AX + RET + +/* + * char is null + */ +l2: + MOVQ $-1, CX + CLD + + REPN; SCASB + + MOVQ DI, AX + SUBQ $1, AX + RET + +ret0: + MOVQ $0, AX + RET diff -Nru 0/sys/src/libc/amd64/strcpy.s 4/sys/src/libc/amd64/strcpy.s --- 0/sys/src/libc/amd64/strcpy.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/strcpy.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,40 @@ + TEXT strcpy(SB),$0 + + MOVL $0, AX + MOVQ $-1, CX + CLD +/* + * find end of second string + */ + + MOVQ p2+8(FP), DI + REPN; SCASB + + MOVQ DI, BX + SUBQ p2+8(FP), BX + +/* + * copy the memory + */ + MOVQ RARG, DI + MOVQ p2+8(FP), SI +/* + * copy whole longs, if aligned + */ + MOVQ DI, CX + ORQ SI, CX + ANDL $3, CX + JNE c3 + MOVQ BX, CX + SHRQ $2, CX + REP; MOVSL +/* + * copy the rest, by bytes + */ + ANDL $3, BX +c3: + MOVL BX, CX + REP; MOVSB + + MOVQ RARG, AX + RET diff -Nru 0/sys/src/libc/amd64/strlen.s 4/sys/src/libc/amd64/strlen.s --- 0/sys/src/libc/amd64/strlen.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/strlen.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,16 @@ + TEXT strlen(SB),$0 + + MOVL $0, AX + MOVQ $-1, CX + CLD +/* + * look for end of string + */ + + MOVQ RARG, DI + REPN; SCASB + + MOVQ DI, AX + SUBQ RARG, AX + SUBQ $1, AX + RET diff -Nru 0/sys/src/libc/amd64/tas.s 4/sys/src/libc/amd64/tas.s --- 0/sys/src/libc/amd64/tas.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/amd64/tas.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,8 @@ +/* + * The kernel and the libc use the same constant for TAS + */ +TEXT _tas(SB),$0 + + MOVL $0xdeaddead,AX + XCHGL AX,(RARG) + RET diff -Nru 0/sys/src/libc/port/confpmc.c 4/sys/src/libc/port/confpmc.c --- 0/sys/src/libc/port/confpmc.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/port/confpmc.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,47 @@ +#include +#include + +int +confpmc(int core, int index, int mode, char *desc) +{ + int fd; + char name[32]; + + if(core < 0) + core = getcoreno(nil); + snprint(name, sizeof(name), "/dev/core%4.4d/ctr%2.2dctl", core, index); + + fd = open(name, OWRITE); + if (fd < 0) + return -1; + + if (fprint(fd, "reset\n") < 0) { + close(fd); + return -1; + } + + if (mode&PmcOs) + if (fprint(fd, "os\n") < 0) { + close(fd); + return -1; + } + if (mode&PmcUser) + if (fprint(fd, "user\n") < 0) { + close(fd); + return -1; + } + + if (desc != nil) + if (fprint(fd, "set %s\n", desc) < 0) { + close(fd); + return -1; + } + if (mode&PmcEnable) + if (fprint(fd, "enable\n") < 0) { + close(fd); + return -1; + } + + close(fd); + return 0; +} diff -Nru 0/sys/src/libc/port/mkfile 4/sys/src/libc/port/mkfile --- 0/sys/src/libc/port/mkfile Wed Feb 6 20:49:10 2013 +++ 4/sys/src/libc/port/mkfile Wed Feb 6 00:00:00 2013 @@ -17,6 +17,7 @@ cistrstr.c\ charstod.c\ cleanname.c\ + confpmc.c\ crypt.c\ ctype.c\ encodefmt.c\ @@ -58,6 +59,7 @@ qsort.c\ quote.c\ rand.c\ + rdpmc.c\ readn.c\ rune.c\ runestrcat.c\ diff -Nru 0/sys/src/libc/port/rdpmc.c 4/sys/src/libc/port/rdpmc.c --- 0/sys/src/libc/port/rdpmc.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libc/port/rdpmc.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,25 @@ +#include +#include + + + +uvlong +rdpmc(int index) +{ + int fd, n, core; + char name[16+2+1]; /* 0x0000000000000000\0 */ + + core = getcoreno(nil); + + snprint(name, sizeof(name), "/dev/core%4.4d/ctr%2.2ud", core, index); + + fd = open(name, OREAD); + if (fd < 0) + return 0xcafebabe; + n = read(fd, name, sizeof(name) - 1); + if (n < 0) + return 0xcafebabe; + close(fd); + name[n] = '\0'; + return atoi(name); +} diff -Nru 0/sys/src/libmp/amd64/mkfile 4/sys/src/libmp/amd64/mkfile --- 0/sys/src/libmp/amd64/mkfile Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libmp/amd64/mkfile Wed Feb 6 00:00:00 2013 @@ -0,0 +1,20 @@ +objtype=amd64 += 2^32 * divisor */ + JHS _divovfl + CMPL BX,CX /* divisor == 0 */ + JE _divovfl + DIVL BX /* AX = DX:AX/BX */ + MOVL AX,0(DI) + RET + + /* return all 1's */ +_divovfl: + NOTL CX + MOVL CX,0(DI) + RET diff -Nru 0/sys/src/libmp/amd64/mpvecadd.s 4/sys/src/libmp/amd64/mpvecadd.s --- 0/sys/src/libmp/amd64/mpvecadd.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libmp/amd64/mpvecadd.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,54 @@ +/* + * mpvecadd(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *sum) + * + * sum[0:alen] = a[0:alen-1] + b[0:blen-1] + * + * prereq: alen >= blen, sum has room for alen+1 digits + */ +TEXT mpvecadd(SB),$0 + + MOVL alen+8(FP),DX + MOVL blen+24(FP),CX +/* MOVL a+0(FP),SI */ + MOVQ RARG, SI + MOVQ b+16(FP),BX + SUBL CX,DX + MOVQ sum+32(FP),DI + XORL BP,BP /* this also sets carry to 0 */ + + /* skip addition if b is zero */ + TESTL CX,CX + JZ _add1 + + /* sum[0:blen-1],carry = a[0:blen-1] + b[0:blen-1] */ +_addloop1: + MOVL (SI)(BP*4), AX + ADCL (BX)(BP*4), AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _addloop1 + +_add1: + /* jump if alen > blen */ + INCL DX + MOVL DX,CX + LOOP _addloop2 + + /* sum[alen] = carry */ +_addend: + JC _addcarry + MOVL $0,(DI)(BP*4) + RET +_addcarry: + MOVL $1,(DI)(BP*4) + RET + + /* sum[blen:alen-1],carry = a[blen:alen-1] + 0 */ +_addloop2: + MOVL (SI)(BP*4),AX + ADCL $0,AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _addloop2 + JMP _addend + diff -Nru 0/sys/src/libmp/amd64/mpvecdigmuladd.s 4/sys/src/libmp/amd64/mpvecdigmuladd.s --- 0/sys/src/libmp/amd64/mpvecdigmuladd.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libmp/amd64/mpvecdigmuladd.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,53 @@ +/* + * mpvecdigmul(mpdigit *b, int n, mpdigit m, mpdigit *p) + * + * p += b*m + * + * each step look like: + * hi,lo = m*b[i] + * lo += oldhi + carry + * hi += carry + * p[i] += lo + * oldhi = hi + * + * the registers are: + * hi = DX - constrained by hardware + * lo = AX - constrained by hardware + * b+n = SI - can't be BP + * p+n = DI - can't be BP + * i-n = BP + * m = BX + * oldhi = CX + * + */ +TEXT mpvecdigmuladd(SB),$0 + +/* MOVQ b+0(FP),SI */ + MOVQ RARG,SI + MOVL n+8(FP),CX + MOVL m+16(FP),BX + MOVQ p+24(FP),DI + MOVL CX,BP + NEGQ BP /* BP = -n */ + SHLL $2,CX + ADDQ CX,SI /* SI = b + n */ + ADDQ CX,DI /* DI = p + n */ + XORL CX,CX +_muladdloop: + MOVL (SI)(BP*4),AX /* lo = b[i] */ + MULL BX /* hi, lo = b[i] * m */ + ADDL CX,AX /* lo += oldhi */ + JCC _muladdnocarry1 + INCL DX /* hi += carry */ +_muladdnocarry1: + ADDL AX,(DI)(BP*4) /* p[i] += lo */ + JCC _muladdnocarry2 + INCL DX /* hi += carry */ +_muladdnocarry2: + MOVL DX,CX /* oldhi = hi */ + INCQ BP /* i++ */ + JNZ _muladdloop + XORL AX,AX + ADDL CX,(DI)(BP*4) /* p[n] + oldhi */ + ADCL AX,AX /* return carry out of p[n] */ + RET diff -Nru 0/sys/src/libmp/amd64/mpvecdigmulsub.s 4/sys/src/libmp/amd64/mpvecdigmulsub.s --- 0/sys/src/libmp/amd64/mpvecdigmulsub.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libmp/amd64/mpvecdigmulsub.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,53 @@ +/* + * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p) + * + * p -= b*m + * + * each step look like: + * hi,lo = m*b[i] + * lo += oldhi + carry + * hi += carry + * p[i] += lo + * oldhi = hi + * + * the registers are: + * hi = DX - constrained by hardware + * lo = AX - constrained by hardware + * b = SI - can't be BP + * p = DI - can't be BP + * i = BP + * n = CX - constrained by LOOP instr + * m = BX + * oldhi = R8 + * + */ +TEXT mpvecdigmulsub(SB),$0 + +/* MOVL b+0(FP),SI */ + MOVQ RARG,SI + MOVL n+8(FP),CX + MOVL m+16(FP),BX + MOVQ p+24(FP),DI + XORL BP,BP + MOVL BP,R8 +_mulsubloop: + MOVL (SI)(BP*4),AX /* lo = b[i] */ + MULL BX /* hi, lo = b[i] * m */ + ADDL R8,AX /* lo += oldhi */ + JCC _mulsubnocarry1 + INCL DX /* hi += carry */ +_mulsubnocarry1: + SUBL AX,(DI)(BP*4) + JCC _mulsubnocarry2 + INCL DX /* hi += carry */ +_mulsubnocarry2: + MOVL DX,R8 + INCL BP + LOOP _mulsubloop + SUBL R8,(DI)(BP*4) + JCC _mulsubnocarry3 + MOVQ $-1,AX + RET +_mulsubnocarry3: + MOVQ $1,AX + RET diff -Nru 0/sys/src/libmp/amd64/mpvecsub.s 4/sys/src/libmp/amd64/mpvecsub.s --- 0/sys/src/libmp/amd64/mpvecsub.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libmp/amd64/mpvecsub.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,45 @@ +/* + * mpvecsub(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *diff) + * + * diff[0:alen-1] = a[0:alen-1] - b[0:blen-1] + * + * prereq: alen >= blen, diff has room for alen digits + */ +TEXT mpvecsub(SB),$0 + +/* MOVQ a+0(FP),SI */ + MOVQ RARG, SI + MOVQ b+16(FP),BX + MOVL alen+8(FP),DX + MOVL blen+24(FP),CX + MOVQ diff+32(FP),DI + SUBL CX,DX + XORL BP,BP /* this also sets carry to 0 */ + + /* skip subraction if b is zero */ + TESTL CX,CX + JZ _sub1 + + /* diff[0:blen-1],borrow = a[0:blen-1] - b[0:blen-1] */ +_subloop1: + MOVL (SI)(BP*4),AX + SBBL (BX)(BP*4),AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _subloop1 + +_sub1: + INCL DX + MOVL DX,CX + LOOP _subloop2 + RET + + /* diff[blen:alen-1] = a[blen:alen-1] - 0 */ +_subloop2: + MOVL (SI)(BP*4),AX + SBBL $0,AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _subloop2 + RET + diff -Nru 0/sys/src/libsec/amd64/mkfile 4/sys/src/libsec/amd64/mkfile --- 0/sys/src/libsec/amd64/mkfile Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libsec/amd64/mkfile Wed Feb 6 00:00:00 2013 @@ -0,0 +1,11 @@ +objtype=amd64 + +#include +#include + +enum +{ + Maxname = 80, + Extra = 20, +}; + +void* +newseg(char *name, uvlong va, ulong len) +{ + static int once; + int fd, cfd; + char sname[Maxname+Extra], *s; + void *p; + +print("newseg %s %#llx %#lx\n", name, va, len); + /* race, but ok */ + if(once++ == 0) + if(bind("#g", "/mnt/seg", MREPL|MCREATE) < 0) + return nil; + s = seprint(sname, sname+Maxname, "/mnt/seg/%s", name); + if(s == sname+Maxname){ + werrstr("name too long"); + return nil; + } + if(access(sname, AEXIST) < 0){ + if(va & (va-1)){ + werrstr("unusual virtual address"); + return nil; + } + fd = create(sname, OREAD, 0640|DMDIR); + if(fd < 0) + return nil; + close(fd); + strecpy(s, sname+sizeof sname, "/ctl"); + cfd = open(sname, OWRITE); + *s = 0; + if(cfd < 0) + return nil; + if(fprint(cfd, "addr %#llux %#lux\n", va, len) < 0){ + close(cfd); +print("newseg %s ctl failed %r\n", name); + return nil; + } + } + p = segattach(SG_CEXEC, name, (void*)va, len); + if((uintptr)p == ~0) + sysfatal("segattach: %s %#llx, %r", name, va); + return p; +} + diff -Nru 0/sys/src/libtube/mkfile 4/sys/src/libtube/mkfile --- 0/sys/src/libtube/mkfile Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libtube/mkfile Wed Feb 6 00:00:00 2013 @@ -0,0 +1,17 @@ +objtype=amd64 + +#include +#include +#include + +enum +{ + Tnamelen = 16, + Tbufsz = 128, + Tsegsz = 64 * 1024, +}; + + +typedef struct Ntube Ntube; +typedef struct Tdir Tdir; + +struct Tdir +{ + Lock; + void *end; + long avail; + char name[Tnamelen]; + Ntube *t; + Tdir *next; +}; + +struct Ntube +{ + char name[Tnamelen]; + Tube* t; + Ntube* next; +}; + +static Tdir *dir; +static Lock tlck; + +int namedtubedebug; + +#define dprint if(namedtubedebug)print + +static void +dumpdir(char *s) +{ + Tdir *tl; + Ntube *nt; + + if(namedtubedebug == 0) + return; + if(s == nil) + print("named tubes:\n"); + else + print("%s:\n", s); + for(tl = dir; tl != nil; tl = tl->next){ + print("\t%s at %#p\n", tl->name, tl->t ? tl->t->t : nil); + for(nt = tl->t; nt != nil; nt = nt->next) + print("\t\t%s at %#p\n", nt->name, nt->t); + } +} + +static Tdir* +dirlookup(char *name, int mkit) +{ + Tdir *tl; + + dprint("dirlookup %s mk=%d\n", name, mkit); + lock(&tlck); + for(tl = dir; tl != nil; tl = tl->next) + if(strcmp(name, tl->name) == 0){ + break; + } + if(tl == nil && !mkit) + werrstr("segment not found"); + if(tl == nil && mkit){ + tl = newseg(name, 0, Tsegsz); + if(tl != nil){ + strncpy(tl->name, name, sizeof tl->name); + tl->end = &tl[1]; + tl->avail = Tsegsz; + tl->next = dir; + dir = tl; + } + dumpdir("after newseg"); + } + unlock(&tlck); + dprint("dirlookup %s: %#p\n", name, tl); + return tl; +} + +static Tube* +tubelookup(Tdir *dir, char *name, ulong elsz, ulong n, int mkit) +{ + Ntube *nt; + uchar *p; + + dprint("tubelookup %s elsz=%uld mk=%d\n", name, elsz, mkit); + if(elsz <= 0 || n <= 0){ + werrstr("bad argument"); + dprint("tubelookup %s: %r\n", name); + return nil; + } + lock(dir); + for(nt = dir->t; nt != nil; nt = nt->next) + if(strcmp(nt->name, name) == 0) + break; + if(nt == nil && !mkit){ + werrstr("tube not found"); + dprint("tubelookup %s: %r\n", name); + } + if(nt == nil && mkit){ + /* + * This may overflow the segment, and we'll trap in + * that case. + */ + dir->avail -= sizeof *nt + sizeof(Tube) + n*elsz; + if(dir->avail < 0){ + unlock(dir); + werrstr("segment exhausted"); + dprint("tubelookup %s: %r\n", name); + return nil; + } + p = dir->end; + nt = dir->end; + p += sizeof *nt; + dir->end = p; + strncpy(nt->name, name, sizeof nt->name); + nt->t = dir->end; + p += sizeof(Tube) + n*elsz; + dir->end = p; + nt->t->msz = elsz; + nt->t->tsz = n; + initsem(&nt->t->nhole, n); + initsem(&nt->t->nmsg, 0); + nt->next = dir->t; + dir->t = nt; + } + unlock(dir); + if(nt == nil) + return nil; + if(nt->t->msz != elsz){ + werrstr("wrong element size"); + dprint("tubelookup %s %r\n", name); + return nil; + } + dprint("tubelookup %s: found at %#p\n", name, nt->t); + return nt->t; +} + +/* + * Return a tube for name segmentname!tubename, + * creating any of them if mkit is true and it is not found. + */ +Tube* +namedtube(char *name, ulong msz, int n, int mkit) +{ + char *dir, *tname; + Tdir *tl; + Tube *t; + + dumpdir("dir before namedtube"); + name = strdup(name); + if(name == nil) + return nil; + tname = utfrune(name, '!'); + if(tname == nil){ + dir = "tubes"; + tname = name; + }else{ + dir = name; + *tname++ = 0; + } + t = nil; + tl = dirlookup(dir, mkit); + if(tl != nil) + t = tubelookup(tl, tname, msz, n, mkit); + free(name); + return t; +} diff -Nru 0/sys/src/libtube/tube.c 4/sys/src/libtube/tube.c --- 0/sys/src/libtube/tube.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/libtube/tube.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,158 @@ +#include +#include +#include +/* + * Ten movies streaming across that, that Internet, and what happens to + * your own personal Internet? I just the other day got… an Internet was + * sent by my staff at 10 o'clock in the morning on Friday. I got it + * yesterday [Tuesday]. Why? Because it got tangled up with all these + * things going on the Internet commercially. + * + * […] They want to deliver vast amounts of information over the + * Internet. And again, the Internet is not something that you just dump + * something on. It's not a big truck. It's a series of tubes. And if + * you don't understand, those tubes can be filled and if they are + * filled, when you put your message in, it gets in line and it's going + * to be delayed by anyone that puts into that tube enormous amounts of + * material, enormous amounts of material. + * -- Ted Stevens, Alaskan Senator + */ + +enum{Block, Dontblock, Already}; /* xsend() nb argument */ + +static void +coherence(void) +{ +} + +Tube* +newtube(ulong msz, ulong n) +{ + Tube *t; + + t = mallocz(sizeof *t + (msz+1) * n, 1); + t->msz = msz; + t->tsz = n; + initsem(&t->nhole, n); + initsem(&t->nmsg, 0); + return t; +} + +void +freetube(Tube *t) +{ + free(t); +} + + +static int +xsend(Tube *t, void *p, int nb) +{ + int n; + uchar *c; + + assert(t != nil && p != nil); + if(nb != Already && downsem(&t->nhole, !nb) < 0) + return -1; + n = ainc(&t->tl) - 1; + n %= t->tsz; + c = (uchar*)&t[1]; + c += (1+t->msz) * n; + memmove(c+1, p, t->msz); + coherence(); + *c = 1; + upsem(&t->nmsg); + return 0; +} + +static int +xrecv(Tube *t, void *p, int nb) +{ + int n; + uchar *c; + + assert(t != nil && p != nil); + if(nb != Already && downsem(&t->nmsg, !nb) < 0) + return -1; + n = ainc(&t->hd) - 1; + n %= t->tsz; + c = (uchar*)&t[1]; + c += (1+t->msz) * n; + while(*c == 0) + ; /* could yield */ + memmove(p, c+1, t->msz); + coherence(); + *c = 0; + upsem(&t->nhole); + return 0; +} + +void +tsend(Tube *t, void *p) +{ + xsend(t, p, Block); +} + +void +trecv(Tube *t, void *p) +{ + xrecv(t, p, Block); +} + +int +nbtsend(Tube *t, void *p) +{ + return xsend(t, p, Dontblock); +} + +int +nbtrecv(Tube *t, void *p) +{ + return xrecv(t, p, Dontblock); +} + +int +talt(Talt a[], int na) +{ + int i, n; + Sem **ss; + + assert(a != nil && na > 0); + ss = malloc(sizeof(Sem*) * na); + n = 0; + for(i = 0; i < na; i++) + switch(a[i].op){ + case TSND: + ss[n++] = &a[i].t->nhole; + break; + case TRCV: + ss[n++] = &a[i].t->nmsg; + break; + case TNBSND: + if(nbtsend(a[i].t, a[i].m) != -1) + return i; + break; + case TNBRCV: + if(nbtrecv(a[i].t, a[i].m) != -1) + return i; + break; + } + if(n == 0) + return -1; + i = altsems(ss, n); + free(ss); + if(i < 0) + return -1; + switch(a[i].op){ + case TSND: + xsend(a[i].t, a[i].m, Already); + break; + case TRCV: + xrecv(a[i].t, a[i].m, Already); + break; + default: + sysfatal("talt"); + } + return i; +} + diff -Nru 0/sys/src/mkfile 4/sys/src/mkfile --- 0/sys/src/mkfile Wed Feb 6 22:25:47 2013 +++ 4/sys/src/mkfile Wed Feb 6 00:00:00 2013 @@ -30,9 +30,11 @@ libregexp\ libscribble\ libsec\ + libseg\ libstdio\ libsunrpc\ libthread\ + libtube\ libventi\ diff -Nru 0/sys/src/mkfile.proto 4/sys/src/mkfile.proto --- 0/sys/src/mkfile.proto Thu Sep 20 23:11:02 2012 +++ 4/sys/src/mkfile.proto Wed Feb 6 00:00:00 2013 @@ -2,8 +2,8 @@ # common mkfile parameters shared by all architectures # -OS=58qv -CPUS=arm 386 power mips +OS=568qv +CPUS=arm amd64 386 power mips CFLAGS=-FTVw LEX=lex YACC=yacc diff -Nru 0/sys/src/nix/386/aoe.h 4/sys/src/nix/386/aoe.h --- 0/sys/src/nix/386/aoe.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/386/aoe.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,78 @@ +/* + * ATA-over-Ethernet (AoE) protocol + */ +enum { + ACata, + ACconfig, +}; + +enum { + AQCread, + AQCtest, + AQCprefix, + AQCset, + AQCfset, +}; + +enum { + AEcmd = 1, + AEarg, + AEdev, + AEcfg, + AEver, +}; + +enum { + Aoetype = 0x88a2, + Aoesectsz = 512, /* standard sector size */ + Aoever = 1, + + AFerr = 1<<2, + AFrsp = 1<<3, + + AAFwrite= 1, + AAFext = 1<<6, +}; + +typedef struct { + uchar dst[Eaddrlen]; + uchar src[Eaddrlen]; + uchar type[2]; + uchar verflag; + uchar error; + uchar major[2]; + uchar minor; + uchar cmd; + uchar tag[4]; + uchar payload[]; +} Aoehdr; + +#define AOEHDRSZ offsetof(Aoehdr, payload[0]) + +typedef struct { + Aoehdr; + uchar aflag; + uchar errfeat; + uchar scnt; + uchar cmdstat; + uchar lba[6]; + uchar res[2]; + uchar payload[]; +} Aoeata; + +#define AOEATASZ offsetof(Aoeata, payload[0]) + +typedef struct { + Aoehdr; + uchar bufcnt[2]; + uchar fwver[2]; + uchar scnt; + uchar verccmd; + uchar cslen[2]; + uchar payload[]; +} Aoeqc; + +#define AOEQCSZ offsetof(Aoeqc, payload[0]) + +extern char Echange[]; +extern char Enotup[]; diff -Nru 0/sys/src/nix/386/devether.c 4/sys/src/nix/386/devether.c --- 0/sys/src/nix/386/devether.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/386/devether.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,545 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/netif.h" + +#include "etherif.h" + +static Ether *etherxx[MaxEther]; + +Chan* +etherattach(char* spec) +{ + ulong ctlrno; + char *p; + Chan *chan; + + ctlrno = 0; + if(spec && *spec){ + ctlrno = strtoul(spec, &p, 0); + if((ctlrno == 0 && p == spec) || *p || (ctlrno >= MaxEther)) + error(Ebadarg); + } + if(etherxx[ctlrno] == 0) + error(Enodev); + + chan = devattach('l', spec); + if(waserror()){ + chanfree(chan); + nexterror(); + } + chan->devno = ctlrno; + if(etherxx[ctlrno]->attach) + etherxx[ctlrno]->attach(etherxx[ctlrno]); + poperror(); + return chan; +} + +static Walkqid* +etherwalk(Chan* chan, Chan* nchan, char** name, int nname) +{ + return netifwalk(etherxx[chan->devno], chan, nchan, name, nname); +} + +static long +etherstat(Chan* chan, uchar* dp, long n) +{ + return netifstat(etherxx[chan->devno], chan, dp, n); +} + +static Chan* +etheropen(Chan* chan, int omode) +{ + return netifopen(etherxx[chan->devno], chan, omode); +} + +static void +ethercreate(Chan*, char*, int, int) +{ +} + +static void +etherclose(Chan* chan) +{ + netifclose(etherxx[chan->devno], chan); +} + +static long +etherread(Chan* chan, void* buf, long n, vlong off) +{ + Ether *ether; + ulong offset = off; + + ether = etherxx[chan->devno]; + if((chan->qid.type & QTDIR) == 0 && ether->ifstat){ + /* + * With some controllers it is necessary to reach + * into the chip to extract statistics. + */ + if(NETTYPE(chan->qid.path) == Nifstatqid) + return ether->ifstat(ether, buf, n, offset); + else if(NETTYPE(chan->qid.path) == Nstatqid) + ether->ifstat(ether, buf, 0, offset); + } + + return netifread(ether, chan, buf, n, offset); +} + +static Block* +etherbread(Chan* chan, long n, vlong offset) +{ + return netifbread(etherxx[chan->devno], chan, n, offset); +} + +static long +etherwstat(Chan* chan, uchar* dp, long n) +{ + return netifwstat(etherxx[chan->devno], chan, dp, n); +} + +static void +etherrtrace(Netfile* f, Etherpkt* pkt, int len) +{ + int i, n; + Block *bp; + + if(qwindow(f->iq) <= 0) + return; + if(len > 58) + n = 58; + else + n = len; + bp = iallocb(64); + if(bp == nil) + return; + memmove(bp->wp, pkt->d, n); + i = TK2MS(sys->ticks); + bp->wp[58] = len>>8; + bp->wp[59] = len; + bp->wp[60] = i>>24; + bp->wp[61] = i>>16; + bp->wp[62] = i>>8; + bp->wp[63] = i; + bp->wp += 64; + qpass(f->iq, bp); +} + +Block* +etheriq(Ether* ether, Block* bp, int fromwire) +{ + Etherpkt *pkt; + ushort type; + int len, multi, tome, fromme; + Netfile **ep, *f, **fp, *fx; + Block *xbp; + + ether->inpackets++; + + pkt = (Etherpkt*)bp->rp; + len = BLEN(bp); + type = (pkt->type[0]<<8)|pkt->type[1]; + fx = 0; + ep = ðer->f[Ntypes]; + + multi = pkt->d[0] & 1; + /* check for valid multicast addresses */ + if(multi && memcmp(pkt->d, ether->bcast, sizeof(pkt->d)) != 0 && ether->prom == 0){ + if(!activemulti(ether, pkt->d, sizeof(pkt->d))){ + if(fromwire){ + freeb(bp); + bp = 0; + } + return bp; + } + } + + /* is it for me? */ + tome = memcmp(pkt->d, ether->ea, sizeof(pkt->d)) == 0; + fromme = memcmp(pkt->s, ether->ea, sizeof(pkt->s)) == 0; + + /* + * Multiplex the packet to all the connections which want it. + * If the packet is not to be used subsequently (fromwire != 0), + * attempt to simply pass it into one of the connections, thereby + * saving a copy of the data (usual case hopefully). + */ + for(fp = ether->f; fp < ep; fp++){ + if(f = *fp) + if(f->type == type || f->type < 0) + if(tome || multi || f->prom || f->bridge & 2){ + /* Don't want to hear bridged packets */ + if(f->bridge && !fromwire && !fromme) + continue; + if(!f->headersonly){ + if(fromwire && fx == 0) + fx = f; + else if(xbp = iallocb(len)){ + memmove(xbp->wp, pkt, len); + xbp->wp += len; + if(qpass(f->iq, xbp) < 0) + ether->soverflows++; + } + else + ether->soverflows++; + } + else + etherrtrace(f, pkt, len); + } + } + + if(fx){ + if(qpass(fx->iq, bp) < 0) + ether->soverflows++; + return 0; + } + if(fromwire){ + freeb(bp); + return 0; + } + + return bp; +} + +static int +etheroq(Ether* ether, Block* bp) +{ + int len, loopback, s; + Etherpkt *pkt; + + ether->outpackets++; + + /* + * Check if the packet has to be placed back onto the input queue, + * i.e. if it's a loopback or broadcast packet or the interface is + * in promiscuous mode. + * If it's a loopback packet indicate to etheriq that the data isn't + * needed and return, etheriq will pass-on or free the block. + * To enable bridging to work, only packets that were originated + * by this interface are fed back. + */ + pkt = (Etherpkt*)bp->rp; + len = BLEN(bp); + loopback = memcmp(pkt->d, ether->ea, sizeof(pkt->d)) == 0; + if(loopback || memcmp(pkt->d, ether->bcast, sizeof(pkt->d)) == 0 || ether->prom){ + s = splhi(); + etheriq(ether, bp, 0); + splx(s); + } + + if(!loopback){ + qbwrite(ether->oq, bp); + if(ether->transmit != nil) + ether->transmit(ether); + } else + freeb(bp); + + return len; +} + +static long +etherwrite(Chan* chan, void* buf, long n, vlong) +{ + Ether *ether; + Block *bp; + int nn, onoff; + Cmdbuf *cb; + + ether = etherxx[chan->devno]; + if(NETTYPE(chan->qid.path) != Ndataqid) { + nn = netifwrite(ether, chan, buf, n); + if(nn >= 0) + return nn; + cb = parsecmd(buf, n); + if(cb->f[0] && strcmp(cb->f[0], "nonblocking") == 0){ + if(cb->nf <= 1) + onoff = 1; + else + onoff = atoi(cb->f[1]); + qnoblock(ether->oq, onoff); + free(cb); + return n; + } + free(cb); + if(ether->ctl != nil) + return ether->ctl(ether, buf, n); + + error(Ebadctl); + } + + if(n > ether->mtu) + error(Etoobig); + if(n < ether->minmtu) + error(Etoosmall); + + bp = allocb(n); + if(waserror()){ + freeb(bp); + nexterror(); + } + memmove(bp->rp, buf, n); + if((ether->f[NETID(chan->qid.path)]->bridge & 2) == 0) + memmove(bp->rp+Eaddrlen, ether->ea, Eaddrlen); + poperror(); + bp->wp += n; + + return etheroq(ether, bp); +} + +static long +etherbwrite(Chan* chan, Block* bp, vlong) +{ + Ether *ether; + long n; + + n = BLEN(bp); + if(NETTYPE(chan->qid.path) != Ndataqid){ + if(waserror()) { + freeb(bp); + nexterror(); + } + n = etherwrite(chan, bp->rp, n, 0); + poperror(); + freeb(bp); + return n; + } + ether = etherxx[chan->devno]; + + if(n > ether->mtu){ + freeb(bp); + error(Etoobig); + } + if(n < ether->minmtu){ + freeb(bp); + error(Etoosmall); + } + + return etheroq(ether, bp); +} + +static struct { + char* type; + int (*reset)(Ether*); +} cards[MaxEther+1]; + +void +addethercard(char* t, int (*r)(Ether*)) +{ + static int ncard; + + if(ncard == MaxEther) + panic("too many ether cards"); + cards[ncard].type = t; + cards[ncard].reset = r; + ncard++; +} + +int +parseether(uchar *to, char *from) +{ + char nip[4]; + char *p; + int i; + + p = from; + for(i = 0; i < Eaddrlen; i++){ + if(*p == 0) + return -1; + nip[0] = *p++; + if(*p == 0) + return -1; + nip[1] = *p++; + nip[2] = 0; + to[i] = strtoul(nip, 0, 16); + if(*p == ':') + p++; + } + return 0; +} + +static Ether* +etherprobe(int cardno, int ctlrno) +{ + int i, j; + Ether *ether; + char buf[128], name[32]; + + ether = malloc(sizeof(Ether)); + memset(ether, 0, sizeof(Ether)); + ether->ctlrno = ctlrno; + ether->tbdf = BUSUNKNOWN; + ether->mbps = 10; + ether->minmtu = ETHERMINTU; + ether->mtu = ETHERMAXTU; + ether->maxmtu = ETHERMAXTU; + + if(cardno < 0){ + if(isaconfig("ether", ctlrno, ether) == 0){ + free(ether); + return nil; + } + for(cardno = 0; cards[cardno].type; cardno++){ + if(cistrcmp(cards[cardno].type, ether->type)) + continue; + for(i = 0; i < ether->nopt; i++){ + if(strncmp(ether->opt[i], "ea=", 3)) + continue; + if(parseether(ether->ea, ðer->opt[i][3])) + memset(ether->ea, 0, Eaddrlen); + } + break; + } + } + + if(cardno >= MaxEther || cards[cardno].type == nil){ + free(ether); + return nil; + } + if(cards[cardno].reset(ether) < 0){ + free(ether); + return nil; + } + + /* + * IRQ2 doesn't really exist, it's used to gang the interrupt + * controllers together. A device set to IRQ2 will appear on + * the second interrupt controller as IRQ9. + */ + if(ether->irq == 2) + ether->irq = 9; + snprint(name, sizeof(name), "ether%d", ctlrno); + + /* + * If ether->irq is <0, it is a hack to indicate no interrupt + * used by ethersink. + */ + if(ether->irq >= 0) + intrenable(ether->irq, ether->interrupt, ether, ether->tbdf, name); + + i = sprint(buf, "#l%d: %s: %dMbps port %#p irq %d tu %d", + ctlrno, cards[cardno].type, ether->mbps, ether->port, ether->irq, ether->mtu); + if(ether->mem) + i += sprint(buf+i, " addr %#p", ether->mem); + if(ether->size) + i += sprint(buf+i, " size 0x%luX", ether->size); + i += sprint(buf+i, ": %2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux", + ether->ea[0], ether->ea[1], ether->ea[2], + ether->ea[3], ether->ea[4], ether->ea[5]); + sprint(buf+i, "\n"); + print(buf); + + j = ether->mbps; + if(j > 1000) + j *= 10; + for(i = 0; j >= 100; i++) + j /= 10; + i = (128<oq == 0) + ether->oq = qopen(i, Qmsg, 0, 0); + if(ether->oq == 0) + panic("etherreset %s", name); + ether->alen = Eaddrlen; + memmove(ether->addr, ether->ea, Eaddrlen); + memset(ether->bcast, 0xFF, Eaddrlen); + + return ether; +} + +static void +etherreset(void) +{ + Ether *ether; + int cardno, ctlrno; + + for(ctlrno = 0; ctlrno < MaxEther; ctlrno++){ + if((ether = etherprobe(-1, ctlrno)) == nil) + continue; + etherxx[ctlrno] = ether; + } + + if(getconf("*noetherprobe")) + return; + + cardno = ctlrno = 0; + while(cards[cardno].type != nil && ctlrno < MaxEther){ + if(etherxx[ctlrno] != nil){ + ctlrno++; + continue; + } + if((ether = etherprobe(cardno, ctlrno)) == nil){ + cardno++; + continue; + } + etherxx[ctlrno] = ether; + ctlrno++; + } +} + +static void +ethershutdown(void) +{ + char name[32]; + int i; + Ether *ether; + + for(i = 0; i < MaxEther; i++){ + ether = etherxx[i]; + if(ether == nil) + continue; + if(ether->shutdown == nil) { + print("#l%d: no shutdown function\n", i); + continue; + } + snprint(name, sizeof(name), "ether%d", i); + if(ether->irq >= 0){ + // intrdisable(ether->irq, ether->interrupt, ether, ether->tbdf, name); + } + (*ether->shutdown)(ether); + } +} + + +#define POLY 0xedb88320 + +/* really slow 32 bit crc for ethers */ +ulong +ethercrc(uchar *p, int len) +{ + int i, j; + ulong crc, b; + + crc = 0xffffffff; + for(i = 0; i < len; i++){ + b = *p++; + for(j = 0; j < 8; j++){ + crc = (crc>>1) ^ (((crc^b) & 1) ? POLY : 0); + b >>= 1; + } + } + return crc; +} + +Dev etherdevtab = { + 'l', + "ether", + + etherreset, + devinit, + ethershutdown, + etherattach, + etherwalk, + etherstat, + etheropen, + ethercreate, + etherclose, + etherread, + etherbread, + etherwrite, + etherbwrite, + devremove, + etherwstat, +}; diff -Nru 0/sys/src/nix/386/devrtc.c 4/sys/src/nix/386/devrtc.c --- 0/sys/src/nix/386/devrtc.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/386/devrtc.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,460 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +/* + * real time clock and non-volatile ram + */ + +enum { + Paddr= 0x70, /* address port */ + Pdata= 0x71, /* data port */ + + Seconds= 0x00, + Minutes= 0x02, + Hours= 0x04, + Mday= 0x07, + Month= 0x08, + Year= 0x09, + Status= 0x0A, + + Nvoff= 128, /* where usable nvram lives */ + Nvsize= 256, + + Nbcd= 6, +}; + +typedef struct Rtc Rtc; +struct Rtc +{ + int sec; + int min; + int hour; + int mday; + int mon; + int year; +}; + + +enum{ + Qdir = 0, + Qrtc, + Qnvram, +}; + +Dirtab rtcdir[]={ + ".", {Qdir, 0, QTDIR}, 0, 0555, + "nvram", {Qnvram, 0}, Nvsize, 0664, + "rtc", {Qrtc, 0}, 0, 0664, +}; + +static ulong rtc2sec(Rtc*); +static void sec2rtc(ulong, Rtc*); + +void +rtcinit(void) +{ + if(ioalloc(Paddr, 2, 0, "rtc/nvr") < 0) + panic("rtcinit: ioalloc failed"); +} + +static Chan* +rtcattach(char* spec) +{ + return devattach('r', spec); +} + +static Walkqid* +rtcwalk(Chan* c, Chan *nc, char** name, int nname) +{ + return devwalk(c, nc, name, nname, rtcdir, nelem(rtcdir), devgen); +} + +static long +rtcstat(Chan* c, uchar* dp, long n) +{ + return devstat(c, dp, n, rtcdir, nelem(rtcdir), devgen); +} + +static Chan* +rtcopen(Chan* c, int omode) +{ + omode = openmode(omode); + switch((ulong)c->qid.path){ + case Qrtc: + if(strcmp(up->user, eve)!=0 && omode!=OREAD) + error(Eperm); + break; + case Qnvram: + if(strcmp(up->user, eve)!=0) + error(Eperm); + } + return devopen(c, omode, rtcdir, nelem(rtcdir), devgen); +} + +static void +rtcclose(Chan*) +{ +} + +#define GETBCD(o) ((bcdclock[o]&0xf) + 10*(bcdclock[o]>>4)) + +static long +rtcextract(void) +{ + uchar bcdclock[Nbcd]; + Rtc rtc; + int i; + + /* don't do the read until the clock is no longer busy */ + for(i = 0; i < 10000; i++){ + outb(Paddr, Status); + if(inb(Pdata) & 0x80) + continue; + + /* read clock values */ + outb(Paddr, Seconds); bcdclock[0] = inb(Pdata); + outb(Paddr, Minutes); bcdclock[1] = inb(Pdata); + outb(Paddr, Hours); bcdclock[2] = inb(Pdata); + outb(Paddr, Mday); bcdclock[3] = inb(Pdata); + outb(Paddr, Month); bcdclock[4] = inb(Pdata); + outb(Paddr, Year); bcdclock[5] = inb(Pdata); + + outb(Paddr, Status); + if((inb(Pdata) & 0x80) == 0) + break; + } + + /* + * convert from BCD + */ + rtc.sec = GETBCD(0); + rtc.min = GETBCD(1); + rtc.hour = GETBCD(2); + rtc.mday = GETBCD(3); + rtc.mon = GETBCD(4); + rtc.year = GETBCD(5); + + /* + * the world starts jan 1 1970 + */ + if(rtc.year < 70) + rtc.year += 2000; + else + rtc.year += 1900; + return rtc2sec(&rtc); +} + +static Lock nvrtlock; + +long +rtctime(void) +{ + int i; + long t, ot; + + ilock(&nvrtlock); + + /* loop till we get two reads in a row the same */ + t = rtcextract(); + for(i = 0; i < 100; i++){ + ot = rtcextract(); + if(ot == t) + break; + } + iunlock(&nvrtlock); + + if(i == 100) print("we are boofheads\n"); + + return t; +} + +static long +rtcread(Chan* c, void* buf, long n, vlong off) +{ + ulong t; + char *a, *start; + ulong offset = off; + + if(c->qid.type & QTDIR) + return devdirread(c, buf, n, rtcdir, nelem(rtcdir), devgen); + + switch((ulong)c->qid.path){ + case Qrtc: + t = rtctime(); + n = readnum(offset, buf, n, t, 12); + return n; + case Qnvram: + if(n == 0) + return 0; + if(n > Nvsize) + n = Nvsize; + a = start = smalloc(n); + + ilock(&nvrtlock); + for(t = offset; t < offset + n; t++){ + if(t >= Nvsize) + break; + outb(Paddr, Nvoff+t); + *a++ = inb(Pdata); + } + iunlock(&nvrtlock); + + if(waserror()){ + free(start); + nexterror(); + } + memmove(buf, start, t - offset); + poperror(); + + free(start); + return t - offset; + } + error(Ebadarg); + return 0; +} + +#define PUTBCD(n,o) bcdclock[o] = (n % 10) | (((n / 10) % 10)<<4) + +static long +rtcwrite(Chan* c, void* buf, long n, vlong off) +{ + int t; + char *a, *start; + Rtc rtc; + ulong secs; + uchar bcdclock[Nbcd]; + char *cp, *ep; + ulong offset = off; + + if(offset!=0) + error(Ebadarg); + + + switch((ulong)c->qid.path){ + case Qrtc: + /* + * read the time + */ + cp = ep = buf; + ep += n; + while(cp < ep){ + if(*cp>='0' && *cp<='9') + break; + cp++; + } + secs = strtoul(cp, 0, 0); + + /* + * convert to bcd + */ + sec2rtc(secs, &rtc); + PUTBCD(rtc.sec, 0); + PUTBCD(rtc.min, 1); + PUTBCD(rtc.hour, 2); + PUTBCD(rtc.mday, 3); + PUTBCD(rtc.mon, 4); + PUTBCD(rtc.year, 5); + + /* + * write the clock + */ + ilock(&nvrtlock); + outb(Paddr, Seconds); outb(Pdata, bcdclock[0]); + outb(Paddr, Minutes); outb(Pdata, bcdclock[1]); + outb(Paddr, Hours); outb(Pdata, bcdclock[2]); + outb(Paddr, Mday); outb(Pdata, bcdclock[3]); + outb(Paddr, Month); outb(Pdata, bcdclock[4]); + outb(Paddr, Year); outb(Pdata, bcdclock[5]); + iunlock(&nvrtlock); + return n; + case Qnvram: + if(n == 0) + return 0; + if(n > Nvsize) + n = Nvsize; + + start = a = smalloc(n); + if(waserror()){ + free(start); + nexterror(); + } + memmove(a, buf, n); + poperror(); + + ilock(&nvrtlock); + for(t = offset; t < offset + n; t++){ + if(t >= Nvsize) + break; + outb(Paddr, Nvoff+t); + outb(Pdata, *a++); + } + iunlock(&nvrtlock); + + free(start); + return t - offset; + } + error(Ebadarg); + return 0; +} + +Dev rtcdevtab = { + 'r', + "rtc", + + devreset, + rtcinit, + devshutdown, + rtcattach, + rtcwalk, + rtcstat, + rtcopen, + devcreate, + rtcclose, + rtcread, + devbread, + rtcwrite, + devbwrite, + devremove, + devwstat, +}; + +#define SEC2MIN 60L +#define SEC2HOUR (60L*SEC2MIN) +#define SEC2DAY (24L*SEC2HOUR) + +/* + * days per month plus days/year + */ +static int dmsize[] = +{ + 365, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 +}; +static int ldmsize[] = +{ + 366, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 +}; + +/* + * return the days/month for the given year + */ +static int* +yrsize(int y) +{ + if((y%4) == 0 && ((y%100) != 0 || (y%400) == 0)) + return ldmsize; + else + return dmsize; +} + +/* + * compute seconds since Jan 1 1970 + */ +static ulong +rtc2sec(Rtc *rtc) +{ + ulong secs; + int i; + int *d2m; + + secs = 0; + + /* + * seconds per year + */ + for(i = 1970; i < rtc->year; i++){ + d2m = yrsize(i); + secs += d2m[0] * SEC2DAY; + } + + /* + * seconds per month + */ + d2m = yrsize(rtc->year); + for(i = 1; i < rtc->mon; i++) + secs += d2m[i] * SEC2DAY; + + secs += (rtc->mday-1) * SEC2DAY; + secs += rtc->hour * SEC2HOUR; + secs += rtc->min * SEC2MIN; + secs += rtc->sec; + + return secs; +} + +/* + * compute rtc from seconds since Jan 1 1970 + */ +static void +sec2rtc(ulong secs, Rtc *rtc) +{ + int d; + long hms, day; + int *d2m; + + /* + * break initial number into days + */ + hms = secs % SEC2DAY; + day = secs / SEC2DAY; + if(hms < 0) { + hms += SEC2DAY; + day -= 1; + } + + /* + * generate hours:minutes:seconds + */ + rtc->sec = hms % 60; + d = hms / 60; + rtc->min = d % 60; + d /= 60; + rtc->hour = d; + + /* + * year number + */ + if(day >= 0) + for(d = 1970; day >= *yrsize(d); d++) + day -= *yrsize(d); + else + for (d = 1970; day < 0; d--) + day += *yrsize(d-1); + rtc->year = d; + + /* + * generate month + */ + d2m = yrsize(rtc->year); + for(d = 1; day >= d2m[d]; d++) + day -= d2m[d]; + rtc->mday = day + 1; + rtc->mon = d; + + return; +} + +uchar +nvramread(int addr) +{ + uchar data; + + ilock(&nvrtlock); + outb(Paddr, addr); + data = inb(Pdata); + iunlock(&nvrtlock); + + return data; +} + +void +nvramwrite(int addr, uchar data) +{ + ilock(&nvrtlock); + outb(Paddr, addr); + outb(Pdata, data); + iunlock(&nvrtlock); +} diff -Nru 0/sys/src/nix/386/ether8169.c 4/sys/src/nix/386/ether8169.c --- 0/sys/src/nix/386/ether8169.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/386/ether8169.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1206 @@ +/* + * Realtek RTL8110S/8169S. + * Mostly there. There are some magic register values used + * which are not described in any datasheet or driver but seem + * to be necessary. + * No tuning has been done. Only tested on an RTL8110S, there + * are slight differences between the chips in the series so some + * tweaks may be needed. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" + +#include "../port/ethermii.h" +#include "../port/netif.h" + +#include "etherif.h" + +enum { /* registers */ + Idr0 = 0x00, /* MAC address */ + Mar0 = 0x08, /* Multicast address */ + Dtccr = 0x10, /* Dump Tally Counter Command */ + Tnpds = 0x20, /* Transmit Normal Priority Descriptors */ + Thpds = 0x28, /* Transmit High Priority Descriptors */ + Flash = 0x30, /* Flash Memory Read/Write */ + Erbcr = 0x34, /* Early Receive Byte Count */ + Ersr = 0x36, /* Early Receive Status */ + Cr = 0x37, /* Command Register */ + Tppoll = 0x38, /* Transmit Priority Polling */ + Imr = 0x3C, /* Interrupt Mask */ + Isr = 0x3E, /* Interrupt Status */ + Tcr = 0x40, /* Transmit Configuration */ + Rcr = 0x44, /* Receive Configuration */ + Tctr = 0x48, /* Timer Count */ + Mpc = 0x4C, /* Missed Packet Counter */ + Cr9346 = 0x50, /* 9346 Command Register */ + Config0 = 0x51, /* Configuration Register 0 */ + Config1 = 0x52, /* Configuration Register 1 */ + Config2 = 0x53, /* Configuration Register 2 */ + Config3 = 0x54, /* Configuration Register 3 */ + Config4 = 0x55, /* Configuration Register 4 */ + Config5 = 0x56, /* Configuration Register 5 */ + Timerint = 0x58, /* Timer Interrupt */ + Mulint = 0x5C, /* Multiple Interrupt Select */ + Phyar = 0x60, /* PHY Access */ + Tbicsr0 = 0x64, /* TBI Control and Status */ + Tbianar = 0x68, /* TBI Auto-Negotiation Advertisment */ + Tbilpar = 0x6A, /* TBI Auto-Negotiation Link Partner */ + + Rms = 0xDA, /* Receive Packet Maximum Size */ + Cplusc = 0xE0, /* C+ Command */ + Rdsar = 0xE4, /* Receive Descriptor Start Address */ + Mtps = 0xEC, /* Max. Transmit Packet Size */ +}; + +enum { /* Dtccr */ + Cmd = 0x00000008, /* Command */ +}; + +enum { /* Cr */ + Te = 0x04, /* Transmitter Enable */ + Re = 0x08, /* Receiver Enable */ + Rst = 0x10, /* Software Reset */ +}; + +enum { /* Tppoll */ + Fswint = 0x01, /* Forced Software Interrupt */ + Npq = 0x40, /* Normal Priority Queue polling */ + Hpq = 0x80, /* High Priority Queue polling */ +}; + +enum { /* Imr/Isr */ + Rok = 0x0001, /* Receive OK */ + Rer = 0x0002, /* Receive Error */ + Tok = 0x0004, /* Transmit OK */ + Ter = 0x0008, /* Transmit Error */ + Rdu = 0x0010, /* Receive Descriptor Unavailable */ + Punlc = 0x0020, /* Packet Underrun or Link Change */ + Fovw = 0x0040, /* Receive FIFO Overflow */ + Tdu = 0x0080, /* Transmit Descriptor Unavailable */ + Swint = 0x0100, /* Software Interrupt */ + Timeout = 0x4000, /* Timer */ + Serr = 0x8000, /* System Error */ +}; + +enum { /* Tcr */ + MtxdmaSHIFT = 8, /* Max. DMA Burst Size */ + MtxdmaMASK = 0x00000700, + Mtxdmaunlimited = 0x00000700, + Acrc = 0x00010000, /* Append CRC (not) */ + Lbk0 = 0x00020000, /* Loopback Test 0 */ + Lbk1 = 0x00040000, /* Loopback Test 1 */ + Ifg2 = 0x00080000, /* Interframe Gap 2 */ + HwveridSHIFT = 23, /* Hardware Version ID */ + HwveridMASK = 0x7C800000, + Macv01 = 0x00000000, /* RTL8169 */ + Macv02 = 0x00800000, /* RTL8169S/8110S */ + Macv03 = 0x04000000, /* RTL8169S/8110S */ + Macv04 = 0x10000000, /* RTL8169SB/8110SB */ + Macv05 = 0x18000000, /* RTL8169SC/8110SC */ + Macv11 = 0x30000000, /* RTL8168B/8111B */ + Macv12 = 0x38000000, /* RTL8169B/8111B */ + Macv13 = 0x34000000, /* RTL8101E */ + Macv14 = 0x30800000, /* RTL8100E */ + Macv15 = 0x38800000, /* RTL8100E */ + Ifg0 = 0x01000000, /* Interframe Gap 0 */ + Ifg1 = 0x02000000, /* Interframe Gap 1 */ +}; + +enum { /* Rcr */ + Aap = 0x00000001, /* Accept All Packets */ + Apm = 0x00000002, /* Accept Physical Match */ + Am = 0x00000004, /* Accept Multicast */ + Ab = 0x00000008, /* Accept Broadcast */ + Ar = 0x00000010, /* Accept Runt */ + Aer = 0x00000020, /* Accept Error */ + Sel9356 = 0x00000040, /* 9356 EEPROM used */ + MrxdmaSHIFT = 8, /* Max. DMA Burst Size */ + MrxdmaMASK = 0x00000700, + Mrxdmaunlimited = 0x00000700, + RxfthSHIFT = 13, /* Receive Buffer Length */ + RxfthMASK = 0x0000E000, + Rxfth256 = 0x00008000, + Rxfthnone = 0x0000E000, + Rer8 = 0x00010000, /* Accept Error Packets > 8 bytes */ + MulERINT = 0x01000000, /* Multiple Early Interrupt Select */ +}; + +enum { /* Cr9346 */ + Eedo = 0x01, /* */ + Eedi = 0x02, /* */ + Eesk = 0x04, /* */ + Eecs = 0x08, /* */ + Eem0 = 0x40, /* Operating Mode */ + Eem1 = 0x80, +}; + +enum { /* Phyar */ + DataMASK = 0x0000FFFF, /* 16-bit GMII/MII Register Data */ + DataSHIFT = 0, + RegaddrMASK = 0x001F0000, /* 5-bit GMII/MII Register Address */ + RegaddrSHIFT = 16, + Flag = 0x80000000, /* */ +}; + +enum { /* Cplusc */ + Mulrw = 0x0008, /* PCI Multiple R/W Enable */ + Dac = 0x0010, /* PCI Dual Address Cycle Enable */ + Rxchksum = 0x0020, /* Receive Checksum Offload Enable */ + Rxvlan = 0x0040, /* Receive VLAN De-tagging Enable */ + Endian = 0x0200, /* Endian Mode */ +}; + +typedef struct D D; /* Transmit/Receive Descriptor */ +struct D { + u32int control; + u32int vlan; + u32int addrlo; + u32int addrhi; +}; + +enum { /* Transmit Descriptor control */ + TxflMASK = 0x0000FFFF, /* Transmit Frame Length */ + TxflSHIFT = 0, + Tcps = 0x00010000, /* TCP Checksum Offload */ + Udpcs = 0x00020000, /* UDP Checksum Offload */ + Ipcs = 0x00040000, /* IP Checksum Offload */ + Lgsen = 0x08000000, /* Large Send */ +}; + +enum { /* Receive Descriptor control */ + RxflMASK = 0x00003FFF, /* Receive Frame Length */ + RxflSHIFT = 0, + Tcpf = 0x00004000, /* TCP Checksum Failure */ + Udpf = 0x00008000, /* UDP Checksum Failure */ + Ipf = 0x00010000, /* IP Checksum Failure */ + Pid0 = 0x00020000, /* Protocol ID0 */ + Pid1 = 0x00040000, /* Protocol ID1 */ + Crce = 0x00080000, /* CRC Error */ + Runt = 0x00100000, /* Runt Packet */ + Res = 0x00200000, /* Receive Error Summary */ + Rwt = 0x00400000, /* Receive Watchdog Timer Expired */ + Fovf = 0x00800000, /* FIFO Overflow */ + Bovf = 0x01000000, /* Buffer Overflow */ + Bar = 0x02000000, /* Broadcast Address Received */ + Pam = 0x04000000, /* Physical Address Matched */ + Mar = 0x08000000, /* Multicast Address Received */ +}; + +enum { /* General Descriptor control */ + Ls = 0x10000000, /* Last Segment Descriptor */ + Fs = 0x20000000, /* First Segment Descriptor */ + Eor = 0x40000000, /* End of Descriptor Ring */ + Own = 0x80000000, /* Ownership */ +}; + +/* + */ +enum { /* Ring sizes (<= 1024) */ + Ntd = 32, /* Transmit Ring */ + Nrd = 128, /* Receive Ring */ + + Mps = ROUNDUP(ETHERMAXTU+4, 128), +}; + +typedef struct Dtcc Dtcc; +struct Dtcc { + u64int txok; + u64int rxok; + u64int txer; + u32int rxer; + u16int misspkt; + u16int fae; + u32int tx1col; + u32int txmcol; + u64int rxokph; + u64int rxokbrd; + u32int rxokmu; + u16int txabt; + u16int txundrn; +}; + +enum { /* Variants */ + Rtl8100e = (0x8136<<16)|0x10EC, /* RTL810[01]E: pci -e */ + Rtl8169c = (0x0116<<16)|0x16EC, /* RTL8169C+ (USR997902) */ + Rtl8169sc = (0x8167<<16)|0x10EC, /* RTL8169SC */ + Rtl8168b = (0x8168<<16)|0x10EC, /* RTL8168B: pci-e */ + Rtl8169 = (0x8169<<16)|0x10EC, /* RTL8169 */ +}; + +typedef struct Ctlr Ctlr; +typedef struct Ctlr { + int port; + Pcidev* pcidev; + Ctlr* next; + int active; + + QLock alock; /* attach */ + Lock ilock; /* init */ + int init; /* */ + + int pciv; /* */ + int macv; /* MAC version */ + int phyv; /* PHY version */ + int pcie; /* flag: pci-express device? */ + + uvlong mchash; /* multicast hash */ + + Mii* mii; + + Lock tlock; /* transmit */ + D* td; /* descriptor ring */ + Block** tb; /* transmit buffers */ + int ntd; + + int tdh; /* head - producer index (host) */ + int tdt; /* tail - consumer index (NIC) */ + int ntdfree; + int ntq; + + int mtps; /* Max. Transmit Packet Size */ + + Lock rlock; /* receive */ + D* rd; /* descriptor ring */ + Block** rb; /* receive buffers */ + int nrd; + + int rdh; /* head - producer index (NIC) */ + int rdt; /* tail - consumer index (host) */ + int nrdfree; + + int tcr; /* transmit configuration register */ + int rcr; /* receive configuration register */ + int imr; + + QLock slock; /* statistics */ + Dtcc* dtcc; + uint txdu; + uint tcpf; + uint udpf; + uint ipf; + uint fovf; + uint ierrs; + uint rer; + uint rdu; + uint punlc; + uint fovw; + uint mcast; +} Ctlr; + +static Ctlr* rtl8169ctlrhead; +static Ctlr* rtl8169ctlrtail; + +#define csr8r(c, r) (inb((c)->port+(r))) +#define csr16r(c, r) (ins((c)->port+(r))) +#define csr32r(c, r) (inl((c)->port+(r))) +#define csr8w(c, r, b) (outb((c)->port+(r), (u8int)(b))) +#define csr16w(c, r, w) (outs((c)->port+(r), (u16int)(w))) +#define csr32w(c, r, l) (outl((c)->port+(r), (u32int)(l))) + +static int +rtl8169miimir(Ctlr* ctlr, int pa, int ra) +{ + uint r; + int timeo; + + if(pa != 1) + return -1; + + r = (ra<<16) & RegaddrMASK; + csr32w(ctlr, Phyar, r); + delay(1); + for(timeo = 0; timeo < 2000; timeo++){ + if((r = csr32r(ctlr, Phyar)) & Flag) + break; + microdelay(100); + } + if(!(r & Flag)) + return -1; + + return (r & DataMASK)>>DataSHIFT; +} + +static int +rtl8169miimiw(Ctlr* ctlr, int pa, int ra, int data) +{ + uint r; + int timeo; + + if(pa != 1) + return -1; + + r = Flag|((ra<<16) & RegaddrMASK)|((data<ctlr, pa, ra, data); + + return rtl8169miimir(mii->ctlr, pa, ra); +} + +static Mii* +rtl8169mii(Ctlr* ctlr) +{ + Mii* mii; + MiiPhy *phy; + + /* + * Link management. + * + * Get rev number out of Phyidr2 so can config properly. + * There's probably more special stuff for Macv0[234] needed here. + */ + ctlr->phyv = rtl8169miimir(ctlr, 1, Phyidr2) & 0x0F; + if(ctlr->macv == Macv02){ + csr8w(ctlr, 0x82, 1); /* magic */ + rtl8169miimiw(ctlr, 1, 0x0B, 0x0000); /* magic */ + } + if((mii = miiattach(ctlr, (1<<1), rtl8169miirw)) == nil) + return nil; + + phy = mii->curphy; + print("oui %#ux phyno %d, macv = %#8.8ux phyv = %#4.4ux\n", + phy->oui, phy->phyno, ctlr->macv, ctlr->phyv); + + if(miistatus(mii) < 0){ + miireset(mii); + miiane(mii, ~0, ~0, ~0); + } + + return mii; +} + +static void +rtl8169promiscuous(void* arg, int on) +{ + Ether *edev; + Ctlr * ctlr; + + edev = arg; + ctlr = edev->ctlr; + ilock(&ctlr->ilock); + + if(on) + ctlr->rcr |= Aap; + else + ctlr->rcr &= ~Aap; + csr32w(ctlr, Rcr, ctlr->rcr); + iunlock(&ctlr->ilock); +} + +enum { + /* everyone else uses 0x04c11db7, but they both produce the same crc */ + Etherpolybe = 0x04c11db6, + Bytemask = (1<<8) - 1, +}; + +static ulong +ethercrcbe(uchar *addr, long len) +{ + int i, j; + ulong c, crc, carry; + + crc = ~0UL; + for (i = 0; i < len; i++) { + c = addr[i]; + for (j = 0; j < 8; j++) { + carry = ((crc & (1UL << 31))? 1: 0) ^ (c & 1); + crc <<= 1; + c >>= 1; + if (carry) + crc = (crc ^ Etherpolybe) | carry; + } + } + return crc; +} + +static ulong +swabl(ulong l) +{ + return l>>24 | (l>>8) & (Bytemask<<8) | + (l<<8) & (Bytemask<<16) | l<<24; +} + +static void +rtl8169multicast(void* ether, uchar *eaddr, int add) +{ + Ether *edev; + Ctlr *ctlr; + + if (!add) + return; /* ok to keep receiving on old mcast addrs */ + + edev = ether; + ctlr = edev->ctlr; + ilock(&ctlr->ilock); + + ctlr->mchash |= 1ULL << (ethercrcbe(eaddr, Eaddrlen) >> 26); + + ctlr->rcr |= Am; + csr32w(ctlr, Rcr, ctlr->rcr); + + /* pci-e variants reverse the order of the hash byte registers */ + if (ctlr->pcie) { + csr32w(ctlr, Mar0, swabl(ctlr->mchash>>32)); + csr32w(ctlr, Mar0+4, swabl(ctlr->mchash)); + } else { + csr32w(ctlr, Mar0, ctlr->mchash); + csr32w(ctlr, Mar0+4, ctlr->mchash>>32); + } + + iunlock(&ctlr->ilock); +} + +static long +rtl8169ifstat(Ether* edev, void* a, long n, ulong offset) +{ + Ctlr *ctlr; + Dtcc *dtcc; + int timeo; + char *alloc, *e, *p; + + ctlr = edev->ctlr; + qlock(&ctlr->slock); + + alloc = nil; + if(waserror()){ + qunlock(&ctlr->slock); + free(alloc); + nexterror(); + } + + csr32w(ctlr, Dtccr+4, 0); + csr32w(ctlr, Dtccr, PCIWADDR(ctlr->dtcc)|Cmd); + for(timeo = 0; timeo < 1000; timeo++){ + if(!(csr32r(ctlr, Dtccr) & Cmd)) + break; + delay(1); + } + if(csr32r(ctlr, Dtccr) & Cmd) + error(Eio); + dtcc = ctlr->dtcc; + + edev->oerrs = dtcc->txer; + edev->crcs = dtcc->rxer; + edev->frames = dtcc->fae; + edev->buffs = dtcc->misspkt; + edev->overflows = ctlr->txdu+ctlr->rdu; + + if(n == 0){ + qunlock(&ctlr->slock); + poperror(); + return 0; + } + + if((alloc = malloc(READSTR)) == nil) + error(Enomem); + e = alloc+READSTR; + + p = seprint(alloc, e, "TxOk: %llud\n", dtcc->txok); + p = seprint(p, e, "RxOk: %llud\n", dtcc->rxok); + p = seprint(p, e, "TxEr: %llud\n", dtcc->txer); + p = seprint(p, e, "RxEr: %ud\n", dtcc->rxer); + p = seprint(p, e, "MissPkt: %ud\n", dtcc->misspkt); + p = seprint(p, e, "FAE: %ud\n", dtcc->fae); + p = seprint(p, e, "Tx1Col: %ud\n", dtcc->tx1col); + p = seprint(p, e, "TxMCol: %ud\n", dtcc->txmcol); + p = seprint(p, e, "RxOkPh: %llud\n", dtcc->rxokph); + p = seprint(p, e, "RxOkBrd: %llud\n", dtcc->rxokbrd); + p = seprint(p, e, "RxOkMu: %ud\n", dtcc->rxokmu); + p = seprint(p, e, "TxAbt: %ud\n", dtcc->txabt); + p = seprint(p, e, "TxUndrn: %ud\n", dtcc->txundrn); + + p = seprint(p, e, "txdu: %ud\n", ctlr->txdu); + p = seprint(p, e, "tcpf: %ud\n", ctlr->tcpf); + p = seprint(p, e, "udpf: %ud\n", ctlr->udpf); + p = seprint(p, e, "ipf: %ud\n", ctlr->ipf); + p = seprint(p, e, "fovf: %ud\n", ctlr->fovf); + p = seprint(p, e, "ierrs: %ud\n", ctlr->ierrs); + p = seprint(p, e, "rer: %ud\n", ctlr->rer); + p = seprint(p, e, "rdu: %ud\n", ctlr->rdu); + p = seprint(p, e, "punlc: %ud\n", ctlr->punlc); + p = seprint(p, e, "fovw: %ud\n", ctlr->fovw); + + p = seprint(p, e, "tcr: %#8.8ux\n", ctlr->tcr); + p = seprint(p, e, "rcr: %#8.8ux\n", ctlr->rcr); + p = seprint(p, e, "multicast: %ud\n", ctlr->mcast); + + if(ctlr->mii != nil && ctlr->mii->curphy != nil) + miidumpphy(ctlr->mii, p, e); + + n = readstr(offset, a, n, alloc); + + qunlock(&ctlr->slock); + poperror(); + free(alloc); + + return n; +} + +static void +rtl8169halt(Ctlr* ctlr) +{ + csr8w(ctlr, Cr, 0); + csr16w(ctlr, Imr, 0); + csr16w(ctlr, Isr, ~0); +} + +static int +rtl8169reset(Ctlr* ctlr) +{ + u32int r; + int timeo; + + /* + * Soft reset the controller. + */ + csr8w(ctlr, Cr, Rst); + for(r = timeo = 0; timeo < 1000; timeo++){ + r = csr8r(ctlr, Cr); + if(!(r & Rst)) + break; + delay(1); + } + rtl8169halt(ctlr); + + if(r & Rst) + return -1; + return 0; +} + +static void +rtl8169replenish(Ctlr* ctlr) +{ + D *d; + int rdt; + Block *bp; + + rdt = ctlr->rdt; + while(NEXT(rdt, ctlr->nrd) != ctlr->rdh){ + d = &ctlr->rd[rdt]; + if(ctlr->rb[rdt] == nil){ + /* + * Simple allocation for now. + * This better be aligned on 8. + */ + bp = iallocb(Mps); + if(bp == nil){ + iprint("no available buffers\n"); + break; + } + ctlr->rb[rdt] = bp; + d->addrlo = PCIWADDR(bp->rp); + d->addrhi = 0; + } + coherence(); + d->control |= Own|Mps; + rdt = NEXT(rdt, ctlr->nrd); + ctlr->nrdfree++; + } + ctlr->rdt = rdt; +} + +static int +rtl8169init(Ether* edev) +{ + int i; + u32int r; + Block *bp; + Ctlr *ctlr; + u8int cplusc; + + ctlr = edev->ctlr; + ilock(&ctlr->ilock); + + rtl8169halt(ctlr); + + /* + * MAC Address. + * Must put chip into config register write enable mode. + */ + csr8w(ctlr, Cr9346, Eem1|Eem0); + r = (edev->ea[3]<<24)|(edev->ea[2]<<16)|(edev->ea[1]<<8)|edev->ea[0]; + csr32w(ctlr, Idr0, r); + r = (edev->ea[5]<<8)|edev->ea[4]; + csr32w(ctlr, Idr0+4, r); + + /* + * Transmitter. + */ + memset(ctlr->td, 0, sizeof(D)*ctlr->ntd); + ctlr->tdh = ctlr->tdt = 0; + ctlr->td[ctlr->ntd-1].control = Eor; + + /* + * Receiver. + * Need to do something here about the multicast filter. + */ + memset(ctlr->rd, 0, sizeof(D)*ctlr->nrd); + ctlr->nrdfree = ctlr->rdh = ctlr->rdt = 0; + ctlr->rd[ctlr->nrd-1].control = Eor; + + for(i = 0; i < ctlr->nrd; i++){ + if((bp = ctlr->rb[i]) != nil){ + ctlr->rb[i] = nil; + freeb(bp); + } + } + rtl8169replenish(ctlr); + ctlr->rcr = Rxfthnone|Mrxdmaunlimited|Ab|Am|Apm; + + /* + * Mtps is in units of 128 except for the RTL8169 + * where is is 32. If using jumbo frames should be + * set to 0x3F. + * Setting Mulrw in Cplusc disables the Tx/Rx DMA burst + * settings in Tcr/Rcr; the (1<<14) is magic. + */ + ctlr->mtps = HOWMANY(Mps, 128); + cplusc = csr16r(ctlr, Cplusc) & ~(1<<14); + cplusc |= /*Rxchksum|*/Mulrw; + switch(ctlr->macv){ + default: + return -1; + case Macv01: + ctlr->mtps = HOWMANY(Mps, 32); + break; + case Macv02: + case Macv03: + cplusc |= (1<<14); /* magic */ + break; + case Macv05: + /* + * This is interpreted from clearly bogus code + * in the manufacturer-supplied driver, it could + * be wrong. Untested. + */ + r = csr8r(ctlr, Config2) & 0x07; + if(r == 0x01) /* 66MHz PCI */ + csr32w(ctlr, 0x7C, 0x0007FFFF); /* magic */ + else + csr32w(ctlr, 0x7C, 0x0007FF00); /* magic */ + pciclrmwi(ctlr->pcidev); + break; + case Macv13: + /* + * This is interpreted from clearly bogus code + * in the manufacturer-supplied driver, it could + * be wrong. Untested. + */ + pcicfgw8(ctlr->pcidev, 0x68, 0x00); /* magic */ + pcicfgw8(ctlr->pcidev, 0x69, 0x08); /* magic */ + break; + case Macv04: + case Macv11: + case Macv12: + case Macv14: + case Macv15: + break; + } + + /* + * Enable receiver/transmitter. + * Need to do this first or some of the settings below + * won't take. + */ + switch(ctlr->pciv){ + default: + csr8w(ctlr, Cr, Te|Re); + csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited); + csr32w(ctlr, Rcr, ctlr->rcr); + csr32w(ctlr, Mar0, 0); + csr32w(ctlr, Mar0+4, 0); + ctlr->mchash = 0; + case Rtl8169sc: + case Rtl8168b: + break; + } + + /* + * Interrupts. + * Disable Tdu|Tok for now, the transmit routine will tidy. + * Tdu means the NIC ran out of descriptors to send, so it + * doesn't really need to ever be on. + */ + csr32w(ctlr, Timerint, 0); + ctlr->imr = Serr|Timeout|Fovw|Punlc|Rdu|Ter|Rer|Rok; + csr16w(ctlr, Imr, ctlr->imr); + + /* + * Clear missed-packet counter; + * initial early transmit threshold value; + * set the descriptor ring base addresses; + * set the maximum receive packet size; + * no early-receive interrupts. + */ + csr32w(ctlr, Mpc, 0); + csr8w(ctlr, Mtps, ctlr->mtps); + csr32w(ctlr, Tnpds+4, 0); + csr32w(ctlr, Tnpds, PCIWADDR(ctlr->td)); + csr32w(ctlr, Rdsar+4, 0); + csr32w(ctlr, Rdsar, PCIWADDR(ctlr->rd)); + csr16w(ctlr, Rms, Mps); + r = csr16r(ctlr, Mulint) & 0xF000; + csr16w(ctlr, Mulint, r); + csr16w(ctlr, Cplusc, cplusc); + + /* + * Set configuration. + */ + switch(ctlr->pciv){ + default: + break; + case Rtl8169sc: + csr16w(ctlr, 0xE2, 0); /* magic */ + csr8w(ctlr, Cr, Te|Re); + csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited); + csr32w(ctlr, Rcr, ctlr->rcr); + break; + case Rtl8168b: + case Rtl8169c: + csr16w(ctlr, 0xE2, 0); /* magic */ + csr16w(ctlr, Cplusc, 0x2000); /* magic */ + csr8w(ctlr, Cr, Te|Re); + csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited); + csr32w(ctlr, Rcr, ctlr->rcr); + csr16w(ctlr, Rms, 0x0800); + csr8w(ctlr, Mtps, 0x3F); + break; + } + ctlr->tcr = csr32r(ctlr, Tcr); + csr8w(ctlr, Cr9346, 0); + + iunlock(&ctlr->ilock); + +// rtl8169mii(ctlr); + + return 0; +} + +static void +rtl8169attach(Ether* edev) +{ + int timeo; + Ctlr *ctlr; + MiiPhy *phy; + + ctlr = edev->ctlr; + qlock(&ctlr->alock); + if(ctlr->init == 0){ + /* + * Handle allocation/init errors here. + */ + ctlr->td = mallocalign(sizeof(D)*Ntd, 256, 0, 0); + ctlr->tb = malloc(Ntd*sizeof(Block*)); + ctlr->ntd = Ntd; + ctlr->rd = mallocalign(sizeof(D)*Nrd, 256, 0, 0); + ctlr->rb = malloc(Nrd*sizeof(Block*)); + ctlr->nrd = Nrd; + ctlr->dtcc = mallocalign(sizeof(Dtcc), 64, 0, 0); + rtl8169init(edev); + ctlr->init = 1; + } + qunlock(&ctlr->alock); + + /* + * Wait for link to be ready. + */ + for(timeo = 0; timeo < 350; timeo++){ + if(miistatus(ctlr->mii) == 0) + break; + tsleep(&up->sleep, return0, 0, 10); + } + phy = ctlr->mii->curphy; + print("%s: speed %d fd %d link %d rfc %d tfc %d\n", + edev->name, phy->speed, phy->fd, phy->link, phy->rfc, phy->tfc); +} + +static void +rtl8169link(Ether* edev) +{ + int limit; + Ctlr *ctlr; + MiiPhy *phy; + + ctlr = edev->ctlr; + + /* + * Maybe the link changed - do we care very much? + * Could stall transmits if no link, maybe? + */ + if(ctlr->mii == nil || ctlr->mii->curphy == nil) + return; + + phy = ctlr->mii->curphy; + if(miistatus(ctlr->mii) < 0){ + iprint("%slink n: speed %d fd %d link %d rfc %d tfc %d\n", + edev->name, phy->speed, phy->fd, phy->link, + phy->rfc, phy->tfc); + edev->link = 0; + return; + } + edev->link = 1; + + limit = 256*1024; + if(phy->speed == 10){ + edev->mbps = 10; + limit = 65*1024; + } + else if(phy->speed == 100) + edev->mbps = 100; + else if(phy->speed == 1000) + edev->mbps = 1000; + iprint("%slink y: speed %d fd %d link %d rfc %d tfc %d\n", + edev->name, phy->speed, phy->fd, phy->link, + phy->rfc, phy->tfc); + + if(edev->oq != nil) + qsetlimit(edev->oq, limit); +} + +static void +rtl8169transmit(Ether* edev) +{ + D *d; + Block *bp; + Ctlr *ctlr; + int control, x; + + ctlr = edev->ctlr; + + ilock(&ctlr->tlock); + for(x = ctlr->tdh; ctlr->ntq > 0; x = NEXT(x, ctlr->ntd)){ + d = &ctlr->td[x]; + if((control = d->control) & Own) + break; + + /* + * Check errors and log here. + */ + USED(control); + + /* + * Free it up. + * Need to clean the descriptor here? Not really. + * Simple freeb for now (no chain and freeblist). + * Use ntq count for now. + */ + freeb(ctlr->tb[x]); + ctlr->tb[x] = nil; + d->control &= Eor; + + ctlr->ntq--; + } + ctlr->tdh = x; + + x = ctlr->tdt; + while(ctlr->ntq < (ctlr->ntd-1)){ + if((bp = qget(edev->oq)) == nil) + break; + + d = &ctlr->td[x]; + d->addrlo = PCIWADDR(bp->rp); + d->addrhi = 0; + ctlr->tb[x] = bp; + coherence(); + d->control |= Own|Fs|Ls|((BLEN(bp)<ntd); + ctlr->ntq++; + } + if(x != ctlr->tdt){ + ctlr->tdt = x; + csr8w(ctlr, Tppoll, Npq); + } + else if(ctlr->ntq >= (ctlr->ntd-1)) + ctlr->txdu++; + + iunlock(&ctlr->tlock); +} + +static void +rtl8169receive(Ether* edev) +{ + D *d; + int rdh; + Block *bp; + Ctlr *ctlr; + u32int control; + + ctlr = edev->ctlr; + + rdh = ctlr->rdh; + for(;;){ + d = &ctlr->rd[rdh]; + + if(d->control & Own) + break; + + control = d->control; + if((control & (Fs|Ls|Res)) == (Fs|Ls)){ + bp = ctlr->rb[rdh]; + ctlr->rb[rdh] = nil; + bp->wp = bp->rp + ((control & RxflMASK)>>RxflSHIFT)-4; + bp->next = nil; + + if(control & Fovf) + ctlr->fovf++; + if(control & Mar) + ctlr->mcast++; + + switch(control & (Pid1|Pid0)){ + default: + break; + case Pid0: + if(control & Tcpf){ + ctlr->tcpf++; + break; + } + bp->flag |= Btcpck; + break; + case Pid1: + if(control & Udpf){ + ctlr->udpf++; + break; + } + bp->flag |= Budpck; + break; + case Pid1|Pid0: + if(control & Ipf){ + ctlr->ipf++; + break; + } + bp->flag |= Bipck; + break; + } + etheriq(edev, bp, 1); + } + else{ + /* + * Error stuff here. + print("control %#8.8ux\n", control); + */ + } + d->control &= Eor; + ctlr->nrdfree--; + rdh = NEXT(rdh, ctlr->nrd); + + if(ctlr->nrdfree < ctlr->nrd/2) + rtl8169replenish(ctlr); + } + ctlr->rdh = rdh; +} + +static void +rtl8169interrupt(Ureg*, void* arg) +{ + Ctlr *ctlr; + Ether *edev; + u32int isr; + + edev = arg; + ctlr = edev->ctlr; + + while((isr = csr16r(ctlr, Isr)) != 0 && isr != 0xFFFF){ + csr16w(ctlr, Isr, isr); + if((isr & ctlr->imr) == 0) + break; + if(isr & (Fovw|Punlc|Rdu|Rer|Rok)){ + rtl8169receive(edev); + if(!(isr & (Punlc|Rok))) + ctlr->ierrs++; + if(isr & Rer) + ctlr->rer++; + if(isr & Rdu) + ctlr->rdu++; + if(isr & Punlc) + ctlr->punlc++; + if(isr & Fovw) + ctlr->fovw++; + isr &= ~(Fovw|Rdu|Rer|Rok); + } + + if(isr & (Tdu|Ter|Tok)){ + rtl8169transmit(edev); + isr &= ~(Tdu|Ter|Tok); + } + + if(isr & Punlc){ + rtl8169link(edev); + isr &= ~Punlc; + } + + /* + * Some of the reserved bits get set sometimes... + */ + if(isr & (Serr|Timeout|Tdu|Fovw|Punlc|Rdu|Ter|Tok|Rer|Rok)) + panic("rtl8169interrupt: imr %#4.4ux isr %#4.4ux\n", + csr16r(ctlr, Imr), isr); + } +} + +static void +rtl8169pci(void) +{ + Pcidev *p; + Ctlr *ctlr; + int i, port, pcie; + + p = nil; + while(p = pcimatch(p, 0, 0)){ + if(p->ccrb != 0x02 || p->ccru != 0) + continue; + + pcie = 0; + switch(i = ((p->did<<16)|p->vid)){ + default: + continue; + case Rtl8100e: /* RTL810[01]E ? */ + case Rtl8168b: /* RTL8168B */ + pcie = 1; + break; + case Rtl8169c: /* RTL8169C */ + case Rtl8169sc: /* RTL8169SC */ + case Rtl8169: /* RTL8169 */ + break; + case (0xC107<<16)|0x1259: /* Corega CG-LAPCIGT */ + i = Rtl8169; + break; + } + + port = p->mem[0].bar & ~0x01; + if(ioalloc(port, p->mem[0].size, 0, "rtl8169") < 0){ + print("rtl8169: port %#ux in use\n", port); + continue; + } + + ctlr = malloc(sizeof(Ctlr)); + ctlr->port = port; + ctlr->pcidev = p; + ctlr->pciv = i; + ctlr->pcie = pcie; + + if(pcigetpms(p) > 0){ + pcisetpms(p, 0); + + for(i = 0; i < 6; i++) + pcicfgw32(p, PciBAR0+i*4, p->mem[i].bar); + pcicfgw8(p, PciINTL, p->intl); + pcicfgw8(p, PciLTR, p->ltr); + pcicfgw8(p, PciCLS, p->cls); + pcicfgw16(p, PciPCR, p->pcr); + } + + if(rtl8169reset(ctlr)){ + iofree(port); + free(ctlr); + continue; + } + + /* + * Extract the chip hardware version, + * needed to configure each properly. + */ + ctlr->macv = csr32r(ctlr, Tcr) & HwveridMASK; + if((ctlr->mii = rtl8169mii(ctlr)) == nil){ + iofree(port); + free(ctlr); + continue; + } + + pcisetbme(p); + + if(rtl8169ctlrhead != nil) + rtl8169ctlrtail->next = ctlr; + else + rtl8169ctlrhead = ctlr; + rtl8169ctlrtail = ctlr; + } +} + +static int +rtl8169pnp(Ether* edev) +{ + u32int r; + Ctlr *ctlr; + uchar ea[Eaddrlen]; + + if(rtl8169ctlrhead == nil) + rtl8169pci(); + + /* + * Any adapter matches if no edev->port is supplied, + * otherwise the ports must match. + */ + for(ctlr = rtl8169ctlrhead; ctlr != nil; ctlr = ctlr->next){ + if(ctlr->active) + continue; + if(edev->port == 0 || edev->port == ctlr->port){ + ctlr->active = 1; + break; + } + } + if(ctlr == nil) + return -1; + + edev->ctlr = ctlr; + edev->port = ctlr->port; + edev->irq = ctlr->pcidev->intl; + edev->tbdf = ctlr->pcidev->tbdf; + edev->mbps = 100; + + /* + * Check if the adapter's station address is to be overridden. + * If not, read it from the device and set in edev->ea. + */ + memset(ea, 0, Eaddrlen); + if(memcmp(ea, edev->ea, Eaddrlen) == 0){ + r = csr32r(ctlr, Idr0); + edev->ea[0] = r; + edev->ea[1] = r>>8; + edev->ea[2] = r>>16; + edev->ea[3] = r>>24; + r = csr32r(ctlr, Idr0+4); + edev->ea[4] = r; + edev->ea[5] = r>>8; + } + + edev->attach = rtl8169attach; + edev->transmit = rtl8169transmit; + edev->interrupt = rtl8169interrupt; + edev->ifstat = rtl8169ifstat; + + edev->arg = edev; + edev->promiscuous = rtl8169promiscuous; + edev->multicast = rtl8169multicast; +// edev->shutdown = rtl8169shutdown; + + rtl8169link(edev); + + return 0; +} + +void +ether8169link(void) +{ + addethercard("rtl8169", rtl8169pnp); +} diff -Nru 0/sys/src/nix/386/ether82557.c 4/sys/src/nix/386/ether82557.c --- 0/sys/src/nix/386/ether82557.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/386/ether82557.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1333 @@ +/* + * Intel 82557 Fast Ethernet PCI Bus LAN Controller + * as found on the Intel EtherExpress PRO/100B. This chip is full + * of smarts, unfortunately they're not all in the right place. + * To do: + * the PCI scanning code could be made common to other adapters; + * auto-negotiation, full-duplex; + * optionally use memory-mapped registers; + * detach for PCI reset problems (also towards loadable drivers). + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "../port/netif.h" + +#include "etherif.h" +#include "io.h" + +enum { + Nrfd = 64, /* receive frame area */ + Ncb = 64, /* maximum control blocks queued */ + + NullPointer = 0xFFFFFFFF, /* 82557 NULL pointer */ +}; + +enum { /* CSR */ + Status = 0x00, /* byte or word (word includes Ack) */ + Ack = 0x01, /* byte */ + CommandR = 0x02, /* byte or word (word includes Interrupt) */ + Interrupt = 0x03, /* byte */ + General = 0x04, /* dword */ + Port = 0x08, /* dword */ + Fcr = 0x0C, /* Flash control register */ + Ecr = 0x0E, /* EEPROM control register */ + Mcr = 0x10, /* MDI control register */ + Gstatus = 0x1D, /* General status register */ +}; + +enum { /* Status */ + RUidle = 0x0000, + RUsuspended = 0x0004, + RUnoresources = 0x0008, + RUready = 0x0010, + RUrbd = 0x0020, /* bit */ + RUstatus = 0x003F, /* mask */ + + CUidle = 0x0000, + CUsuspended = 0x0040, + CUactive = 0x0080, + CUstatus = 0x00C0, /* mask */ + + StatSWI = 0x0400, /* SoftWare generated Interrupt */ + StatMDI = 0x0800, /* MDI r/w done */ + StatRNR = 0x1000, /* Receive unit Not Ready */ + StatCNA = 0x2000, /* Command unit Not Active (Active->Idle) */ + StatFR = 0x4000, /* Finished Receiving */ + StatCX = 0x8000, /* Command eXecuted */ + StatTNO = 0x8000, /* Transmit NOT OK */ +}; + +enum { /* Command (byte) */ + CUnop = 0x00, + CUstart = 0x10, + CUresume = 0x20, + LoadDCA = 0x40, /* Load Dump Counters Address */ + DumpSC = 0x50, /* Dump Statistical Counters */ + LoadCUB = 0x60, /* Load CU Base */ + ResetSA = 0x70, /* Dump and Reset Statistical Counters */ + + RUstart = 0x01, + RUresume = 0x02, + RUabort = 0x04, + LoadHDS = 0x05, /* Load Header Data Size */ + LoadRUB = 0x06, /* Load RU Base */ + RBDresume = 0x07, /* Resume frame reception */ +}; + +enum { /* Interrupt (byte) */ + InterruptM = 0x01, /* interrupt Mask */ + InterruptSI = 0x02, /* Software generated Interrupt */ +}; + +enum { /* Ecr */ + EEsk = 0x01, /* serial clock */ + EEcs = 0x02, /* chip select */ + EEdi = 0x04, /* serial data in */ + EEdo = 0x08, /* serial data out */ + + EEstart = 0x04, /* start bit */ + EEread = 0x02, /* read opcode */ +}; + +enum { /* Mcr */ + MDIread = 0x08000000, /* read opcode */ + MDIwrite = 0x04000000, /* write opcode */ + MDIready = 0x10000000, /* ready bit */ + MDIie = 0x20000000, /* interrupt enable */ +}; + +typedef struct Rfd { + int field; + ulong link; + ulong rbd; + ushort count; + ushort size; + + uchar data[1700]; +} Rfd; + +enum { /* field */ + RfdCollision = 0x00000001, + RfdIA = 0x00000002, /* IA match */ + RfdRxerr = 0x00000010, /* PHY character error */ + RfdType = 0x00000020, /* Type frame */ + RfdRunt = 0x00000080, + RfdOverrun = 0x00000100, + RfdBuffer = 0x00000200, + RfdAlignment = 0x00000400, + RfdCRC = 0x00000800, + + RfdOK = 0x00002000, /* frame received OK */ + RfdC = 0x00008000, /* reception Complete */ + RfdSF = 0x00080000, /* Simplified or Flexible (1) Rfd */ + RfdH = 0x00100000, /* Header RFD */ + + RfdI = 0x20000000, /* Interrupt after completion */ + RfdS = 0x40000000, /* Suspend after completion */ + RfdEL = 0x80000000, /* End of List */ +}; + +enum { /* count */ + RfdF = 0x4000, + RfdEOF = 0x8000, +}; + +typedef struct Cb Cb; +typedef struct Cb { + ushort status; + ushort command; + ulong link; + union { + uchar data[24]; /* CbIAS + CbConfigure */ + struct { + ulong tbd; + ushort count; + uchar threshold; + uchar number; + + ulong tba; + ushort tbasz; + ushort pad; + }; + }; + + Block* bp; + Cb* next; +} Cb; + +enum { /* action command */ + CbU = 0x1000, /* transmit underrun */ + CbOK = 0x2000, /* DMA completed OK */ + CbC = 0x8000, /* execution Complete */ + + CbNOP = 0x0000, + CbIAS = 0x0001, /* Individual Address Setup */ + CbConfigure = 0x0002, + CbMAS = 0x0003, /* Multicast Address Setup */ + CbTransmit = 0x0004, + CbDump = 0x0006, + CbDiagnose = 0x0007, + CbCommand = 0x0007, /* mask */ + + CbSF = 0x0008, /* Flexible-mode CbTransmit */ + + CbI = 0x2000, /* Interrupt after completion */ + CbS = 0x4000, /* Suspend after completion */ + CbEL = 0x8000, /* End of List */ +}; + +enum { /* CbTransmit count */ + CbEOF = 0x8000, +}; + +typedef struct Ctlr Ctlr; +typedef struct Ctlr { + Lock slock; /* attach */ + int state; + + int port; + Pcidev* pcidev; + Ctlr* next; + int active; + + int eepromsz; /* address size in bits */ + ushort* eeprom; + + Lock miilock; + + int tick; + + Lock rlock; /* registers */ + int command; /* last command issued */ + + Block* rfdhead; /* receive side */ + Block* rfdtail; + int nrfd; + + Lock cblock; /* transmit side */ + int action; + int nop; + uchar configdata[24]; + int threshold; + int ncb; + Cb* cbr; + Cb* cbhead; + Cb* cbtail; + int cbq; + int cbqmax; + int cbqmaxhw; + + Lock dlock; /* dump statistical counters */ + ulong dump[17]; +} Ctlr; + +static Ctlr* ctlrhead; +static Ctlr* ctlrtail; + +static uchar configdata[24] = { + 0x16, /* byte count */ + 0x08, /* Rx/Tx FIFO limit */ + 0x00, /* adaptive IFS */ + 0x00, + 0x00, /* Rx DMA maximum byte count */ +// 0x80, /* Tx DMA maximum byte count */ + 0x00, /* Tx DMA maximum byte count */ + 0x32, /* !late SCB, CNA interrupts */ + 0x03, /* discard short Rx frames */ + 0x00, /* 503/MII */ + + 0x00, + 0x2E, /* normal operation, NSAI */ + 0x00, /* linear priority */ + 0x60, /* inter-frame spacing */ + 0x00, + 0xF2, + 0xC8, /* 503, promiscuous mode off */ + 0x00, + 0x40, + 0xF3, /* transmit padding enable */ + 0x80, /* full duplex pin enable */ + 0x3F, /* no Multi IA */ + 0x05, /* no Multi Cast ALL */ +}; + +#define csr8r(c, r) (inb((c)->port+(r))) +#define csr16r(c, r) (ins((c)->port+(r))) +#define csr32r(c, r) (inl((c)->port+(r))) +#define csr8w(c, r, b) (outb((c)->port+(r), (int)(b))) +#define csr16w(c, r, w) (outs((c)->port+(r), (ushort)(w))) +#define csr32w(c, r, l) (outl((c)->port+(r), (ulong)(l))) + +static void +command(Ctlr* ctlr, int c, int v) +{ + int timeo; + + ilock(&ctlr->rlock); + + /* + * Only back-to-back CUresume can be done + * without waiting for any previous command to complete. + * This should be the common case. + * Unfortunately there's a chip errata where back-to-back + * CUresumes can be lost, the fix is to always wait. + if(c == CUresume && ctlr->command == CUresume){ + csr8w(ctlr, CommandR, c); + iunlock(&ctlr->rlock); + return; + } + */ + + for(timeo = 0; timeo < 100; timeo++){ + if(!csr8r(ctlr, CommandR)) + break; + microdelay(1); + } + if(timeo >= 100){ + ctlr->command = -1; + iunlock(&ctlr->rlock); + iprint("i82557: command %#ux %#ux timeout\n", c, v); + return; + } + + switch(c){ + + case CUstart: + case LoadDCA: + case LoadCUB: + case RUstart: + case LoadHDS: + case LoadRUB: + csr32w(ctlr, General, v); + break; + + /* + case CUnop: + case CUresume: + case DumpSC: + case ResetSA: + case RUresume: + case RUabort: + */ + default: + break; + } + csr8w(ctlr, CommandR, c); + ctlr->command = c; + + iunlock(&ctlr->rlock); +} + +static Block* +rfdalloc(ulong link) +{ + Block *bp; + Rfd *rfd; + + if(bp = iallocb(sizeof(Rfd))){ + rfd = (Rfd*)bp->rp; + rfd->field = 0; + rfd->link = link; + rfd->rbd = NullPointer; + rfd->count = 0; + rfd->size = sizeof(Etherpkt); + } + + return bp; +} + +static void +watchdog(void* arg) +{ + Ether *ether; + Ctlr *ctlr; + static void txstart(Ether*); + + ether = arg; + for(;;){ + tsleep(&up->sleep, return0, 0, 4000); + + /* + * Hmmm. This doesn't seem right. Currently + * the device can't be disabled but it may be in + * the future. + */ + ctlr = ether->ctlr; + if(ctlr == nil || ctlr->state == 0){ + print("%s: exiting\n", up->text); + pexit("disabled", 0); + } + + ilock(&ctlr->cblock); + if(ctlr->tick++){ + ctlr->action = CbMAS; + txstart(ether); + } + iunlock(&ctlr->cblock); + } +} + +static void +attach(Ether* ether) +{ + Ctlr *ctlr; + char name[KNAMELEN]; + + ctlr = ether->ctlr; + lock(&ctlr->slock); + if(ctlr->state == 0){ + ilock(&ctlr->rlock); + csr8w(ctlr, Interrupt, 0); + iunlock(&ctlr->rlock); + command(ctlr, RUstart, PADDR(ctlr->rfdhead->rp)); + ctlr->state = 1; + + /* + * Start the watchdog timer for the receive lockup errata + * unless the EEPROM compatibility word indicates it may be + * omitted. + */ + if((ctlr->eeprom[0x03] & 0x0003) != 0x0003){ + snprint(name, KNAMELEN, "#l%dwatchdog", ether->ctlrno); + kproc(name, watchdog, ether); + } + } + unlock(&ctlr->slock); +} + +static long +ifstat(Ether* ether, void* a, long n, ulong offset) +{ + char *p; + int i, len, phyaddr; + Ctlr *ctlr; + ulong dump[17]; + + ctlr = ether->ctlr; + lock(&ctlr->dlock); + + /* + * Start the command then + * wait for completion status, + * should be 0xA005. + */ + ctlr->dump[16] = 0; + command(ctlr, DumpSC, 0); + while(ctlr->dump[16] == 0) + ; + + ether->oerrs = ctlr->dump[1]+ctlr->dump[2]+ctlr->dump[3]; + ether->crcs = ctlr->dump[10]; + ether->frames = ctlr->dump[11]; + ether->buffs = ctlr->dump[12]+ctlr->dump[15]; + ether->overflows = ctlr->dump[13]; + + if(n == 0){ + unlock(&ctlr->dlock); + return 0; + } + + memmove(dump, ctlr->dump, sizeof(dump)); + unlock(&ctlr->dlock); + + p = malloc(READSTR); + len = snprint(p, READSTR, "transmit good frames: %lud\n", dump[0]); + len += snprint(p+len, READSTR-len, "transmit maximum collisions errors: %lud\n", dump[1]); + len += snprint(p+len, READSTR-len, "transmit late collisions errors: %lud\n", dump[2]); + len += snprint(p+len, READSTR-len, "transmit underrun errors: %lud\n", dump[3]); + len += snprint(p+len, READSTR-len, "transmit lost carrier sense: %lud\n", dump[4]); + len += snprint(p+len, READSTR-len, "transmit deferred: %lud\n", dump[5]); + len += snprint(p+len, READSTR-len, "transmit single collisions: %lud\n", dump[6]); + len += snprint(p+len, READSTR-len, "transmit multiple collisions: %lud\n", dump[7]); + len += snprint(p+len, READSTR-len, "transmit total collisions: %lud\n", dump[8]); + len += snprint(p+len, READSTR-len, "receive good frames: %lud\n", dump[9]); + len += snprint(p+len, READSTR-len, "receive CRC errors: %lud\n", dump[10]); + len += snprint(p+len, READSTR-len, "receive alignment errors: %lud\n", dump[11]); + len += snprint(p+len, READSTR-len, "receive resource errors: %lud\n", dump[12]); + len += snprint(p+len, READSTR-len, "receive overrun errors: %lud\n", dump[13]); + len += snprint(p+len, READSTR-len, "receive collision detect errors: %lud\n", dump[14]); + len += snprint(p+len, READSTR-len, "receive short frame errors: %lud\n", dump[15]); + len += snprint(p+len, READSTR-len, "nop: %d\n", ctlr->nop); + if(ctlr->cbqmax > ctlr->cbqmaxhw) + ctlr->cbqmaxhw = ctlr->cbqmax; + len += snprint(p+len, READSTR-len, "cbqmax: %d\n", ctlr->cbqmax); + ctlr->cbqmax = 0; + len += snprint(p+len, READSTR-len, "threshold: %d\n", ctlr->threshold); + + len += snprint(p+len, READSTR-len, "eeprom:"); + for(i = 0; i < (1<eepromsz); i++){ + if(i && ((i & 0x07) == 0)) + len += snprint(p+len, READSTR-len, "\n "); + len += snprint(p+len, READSTR-len, " %4.4ux", ctlr->eeprom[i]); + } + + if((ctlr->eeprom[6] & 0x1F00) && !(ctlr->eeprom[6] & 0x8000)){ + phyaddr = ctlr->eeprom[6] & 0x00FF; + len += snprint(p+len, READSTR-len, "\nphy %2d:", phyaddr); + for(i = 0; i < 6; i++){ + static int miir(Ctlr*, int, int); + + len += snprint(p+len, READSTR-len, " %4.4ux", + miir(ctlr, phyaddr, i)); + } + } + + snprint(p+len, READSTR-len, "\n"); + n = readstr(offset, a, n, p); + free(p); + + return n; +} + +static void +txstart(Ether* ether) +{ + Ctlr *ctlr; + Block *bp; + Cb *cb; + + ctlr = ether->ctlr; + while(ctlr->cbq < (ctlr->ncb-1)){ + cb = ctlr->cbhead->next; + if(ctlr->action == 0){ + bp = qget(ether->oq); + if(bp == nil) + break; + + cb->command = CbS|CbSF|CbTransmit; + cb->tbd = PADDR(&cb->tba); + cb->count = 0; + cb->threshold = ctlr->threshold; + cb->number = 1; + cb->tba = PADDR(bp->rp); + cb->bp = bp; + cb->tbasz = BLEN(bp); + } + else if(ctlr->action == CbConfigure){ + cb->command = CbS|CbConfigure; + memmove(cb->data, ctlr->configdata, sizeof(ctlr->configdata)); + ctlr->action = 0; + } + else if(ctlr->action == CbIAS){ + cb->command = CbS|CbIAS; + memmove(cb->data, ether->ea, Eaddrlen); + ctlr->action = 0; + } + else if(ctlr->action == CbMAS){ + cb->command = CbS|CbMAS; + memset(cb->data, 0, sizeof(cb->data)); + ctlr->action = 0; + } + else{ + print("#l%d: action %#ux\n", ether->ctlrno, ctlr->action); + ctlr->action = 0; + break; + } + cb->status = 0; + + coherence(); + ctlr->cbhead->command &= ~CbS; + ctlr->cbhead = cb; + ctlr->cbq++; + } + + /* + * Workaround for some broken HUB chips + * when connected at 10Mb/s half-duplex. + */ + if(ctlr->nop){ + command(ctlr, CUnop, 0); + microdelay(1); + } + command(ctlr, CUresume, 0); + + if(ctlr->cbq > ctlr->cbqmax) + ctlr->cbqmax = ctlr->cbq; +} + +static void +configure(Ether* ether, int promiscuous) +{ + Ctlr *ctlr; + + ctlr = ether->ctlr; + ilock(&ctlr->cblock); + if(promiscuous){ + ctlr->configdata[6] |= 0x80; /* Save Bad Frames */ + //ctlr->configdata[6] &= ~0x40; /* !Discard Overrun Rx Frames */ + ctlr->configdata[7] &= ~0x01; /* !Discard Short Rx Frames */ + ctlr->configdata[15] |= 0x01; /* Promiscuous mode */ + ctlr->configdata[18] &= ~0x01; /* (!Padding enable?), !stripping enable */ + ctlr->configdata[21] |= 0x08; /* Multi Cast ALL */ + } + else{ + ctlr->configdata[6] &= ~0x80; + //ctlr->configdata[6] |= 0x40; + ctlr->configdata[7] |= 0x01; + ctlr->configdata[15] &= ~0x01; + ctlr->configdata[18] |= 0x01; /* 0x03? */ + ctlr->configdata[21] &= ~0x08; + } + ctlr->action = CbConfigure; + txstart(ether); + iunlock(&ctlr->cblock); +} + +static void +promiscuous(void* arg, int on) +{ + configure(arg, on); +} + +static void +multicast(void* ether, uchar *addr, int add) +{ + USED(addr); + /* + * TODO: if (add) add addr to list of mcast addrs in controller + * else remove addr from list of mcast addrs in controller + * enable multicast input (see CbMAS) instead of promiscuous mode. + */ + if (add) + configure(ether, 1); +} + +static void +transmit(Ether* ether) +{ + Ctlr *ctlr; + + ctlr = ether->ctlr; + ilock(&ctlr->cblock); + txstart(ether); + iunlock(&ctlr->cblock); +} + +static void +receive(Ether* ether) +{ + Rfd *rfd; + Ctlr *ctlr; + int count; + Block *bp, *pbp, *xbp; + + ctlr = ether->ctlr; + bp = ctlr->rfdhead; + for(rfd = (Rfd*)bp->rp; rfd->field & RfdC; rfd = (Rfd*)bp->rp){ + /* + * If it's an OK receive frame + * 1) save the count + * 2) if it's small, try to allocate a block and copy + * the data, then adjust the necessary fields for reuse; + * 3) if it's big, try to allocate a new Rfd and if + * successful + * adjust the received buffer pointers for the + * actual data received; + * initialise the replacement buffer to point to + * the next in the ring; + * initialise bp to point to the replacement; + * 4) if there's a good packet, pass it on for disposal. + */ + if(rfd->field & RfdOK){ + pbp = nil; + count = rfd->count & 0x3FFF; + if((count < ETHERMAXTU/4) && (pbp = iallocb(count))){ + memmove(pbp->rp, bp->rp+offsetof(Rfd, data[0]), count); + pbp->wp = pbp->rp + count; + + rfd->count = 0; + rfd->field = 0; + } + else if(xbp = rfdalloc(rfd->link)){ + bp->rp += offsetof(Rfd, data[0]); + bp->wp = bp->rp + count; + + xbp->next = bp->next; + bp->next = 0; + + pbp = bp; + bp = xbp; + } + if(pbp != nil) + etheriq(ether, pbp, 1); + } + else{ + rfd->count = 0; + rfd->field = 0; + } + + /* + * The ring tail pointer follows the head with with one + * unused buffer in between to defeat hardware prefetch; + * once the tail pointer has been bumped on to the next + * and the new tail has the Suspend bit set, it can be + * removed from the old tail buffer. + * As a replacement for the current head buffer may have + * been allocated above, ensure that the new tail points + * to it (next and link). + */ + rfd = (Rfd*)ctlr->rfdtail->rp; + ctlr->rfdtail = ctlr->rfdtail->next; + ctlr->rfdtail->next = bp; + ((Rfd*)ctlr->rfdtail->rp)->link = PADDR(bp->rp); + ((Rfd*)ctlr->rfdtail->rp)->field |= RfdS; + coherence(); + rfd->field &= ~RfdS; + + /* + * Finally done with the current (possibly replaced) + * head, move on to the next and maintain the sentinel + * between tail and head. + */ + ctlr->rfdhead = bp->next; + bp = ctlr->rfdhead; + } +} + +static void +interrupt(Ureg*, void* arg) +{ + Cb* cb; + Ctlr *ctlr; + Ether *ether; + int status; + + ether = arg; + ctlr = ether->ctlr; + + for(;;){ + ilock(&ctlr->rlock); + status = csr16r(ctlr, Status); + csr8w(ctlr, Ack, (status>>8) & 0xFF); + iunlock(&ctlr->rlock); + + if(!(status & (StatCX|StatFR|StatCNA|StatRNR|StatMDI|StatSWI))) + break; + + /* + * If the watchdog timer for the receiver lockup errata is running, + * let it know the receiver is active. + */ + if(status & (StatFR|StatRNR)){ + ilock(&ctlr->cblock); + ctlr->tick = 0; + iunlock(&ctlr->cblock); + } + + if(status & StatFR){ + receive(ether); + status &= ~StatFR; + } + + if(status & StatRNR){ + command(ctlr, RUresume, 0); + status &= ~StatRNR; + } + + if(status & StatCNA){ + ilock(&ctlr->cblock); + + cb = ctlr->cbtail; + while(ctlr->cbq){ + if(!(cb->status & CbC)) + break; + if(cb->bp){ + freeb(cb->bp); + cb->bp = nil; + } + if((cb->status & CbU) && ctlr->threshold < 0xE0) + ctlr->threshold++; + + ctlr->cbq--; + cb = cb->next; + } + ctlr->cbtail = cb; + + txstart(ether); + iunlock(&ctlr->cblock); + + status &= ~StatCNA; + } + + if(status & (StatCX|StatFR|StatCNA|StatRNR|StatMDI|StatSWI)) + panic("#l%d: status %#ux\n", ether->ctlrno, status); + } +} + +static void +ctlrinit(Ctlr* ctlr) +{ + int i; + Block *bp; + Rfd *rfd; + ulong link; + + /* + * Create the Receive Frame Area (RFA) as a ring of allocated + * buffers. + * A sentinel buffer is maintained between the last buffer in + * the ring (marked with RfdS) and the head buffer to defeat the + * hardware prefetch of the next RFD and allow dynamic buffer + * allocation. + */ + link = NullPointer; + for(i = 0; i < Nrfd; i++){ + bp = rfdalloc(link); + if(ctlr->rfdhead == nil) + ctlr->rfdtail = bp; + bp->next = ctlr->rfdhead; + ctlr->rfdhead = bp; + link = PADDR(bp->rp); + } + ctlr->rfdtail->next = ctlr->rfdhead; + rfd = (Rfd*)ctlr->rfdtail->rp; + rfd->link = PADDR(ctlr->rfdhead->rp); + rfd->field |= RfdS; + ctlr->rfdhead = ctlr->rfdhead->next; + + /* + * Create a ring of control blocks for the + * transmit side. + */ + ilock(&ctlr->cblock); + ctlr->cbr = malloc(ctlr->ncb*sizeof(Cb)); + for(i = 0; i < ctlr->ncb; i++){ + ctlr->cbr[i].status = CbC|CbOK; + ctlr->cbr[i].command = CbS|CbNOP; + ctlr->cbr[i].link = PADDR(&ctlr->cbr[NEXT(i, ctlr->ncb)].status); + ctlr->cbr[i].next = &ctlr->cbr[NEXT(i, ctlr->ncb)]; + } + ctlr->cbhead = ctlr->cbr; + ctlr->cbtail = ctlr->cbr; + ctlr->cbq = 0; + + memmove(ctlr->configdata, configdata, sizeof(configdata)); + ctlr->threshold = 80; + ctlr->tick = 0; + + iunlock(&ctlr->cblock); +} + +static int +miir(Ctlr* ctlr, int phyadd, int regadd) +{ + int mcr, timo; + + lock(&ctlr->miilock); + csr32w(ctlr, Mcr, MDIread|(phyadd<<21)|(regadd<<16)); + mcr = 0; + for(timo = 64; timo; timo--){ + mcr = csr32r(ctlr, Mcr); + if(mcr & MDIready) + break; + microdelay(1); + } + unlock(&ctlr->miilock); + + if(mcr & MDIready) + return mcr & 0xFFFF; + + return -1; +} + +static int +miiw(Ctlr* ctlr, int phyadd, int regadd, int data) +{ + int mcr, timo; + + lock(&ctlr->miilock); + csr32w(ctlr, Mcr, MDIwrite|(phyadd<<21)|(regadd<<16)|(data & 0xFFFF)); + mcr = 0; + for(timo = 64; timo; timo--){ + mcr = csr32r(ctlr, Mcr); + if(mcr & MDIready) + break; + microdelay(1); + } + unlock(&ctlr->miilock); + + if(mcr & MDIready) + return 0; + + return -1; +} + +static int +hy93c46r(Ctlr* ctlr, int r) +{ + int data, i, op, size; + + /* + * Hyundai HY93C46 or equivalent serial EEPROM. + * This sequence for reading a 16-bit register 'r' + * in the EEPROM is taken straight from Section + * 3.3.4.2 of the Intel 82557 User's Guide. + */ +reread: + csr16w(ctlr, Ecr, EEcs); + op = EEstart|EEread; + for(i = 2; i >= 0; i--){ + data = (((op>>i) & 0x01)<<2)|EEcs; + csr16w(ctlr, Ecr, data); + csr16w(ctlr, Ecr, data|EEsk); + microdelay(1); + csr16w(ctlr, Ecr, data); + microdelay(1); + } + + /* + * First time through must work out the EEPROM size. + */ + if((size = ctlr->eepromsz) == 0) + size = 8; + + for(size = size-1; size >= 0; size--){ + data = (((r>>size) & 0x01)<<2)|EEcs; + csr16w(ctlr, Ecr, data); + csr16w(ctlr, Ecr, data|EEsk); + delay(1); + csr16w(ctlr, Ecr, data); + microdelay(1); + if(!(csr16r(ctlr, Ecr) & EEdo)) + break; + } + + data = 0; + for(i = 15; i >= 0; i--){ + csr16w(ctlr, Ecr, EEcs|EEsk); + microdelay(1); + if(csr16r(ctlr, Ecr) & EEdo) + data |= (1<eepromsz == 0){ + ctlr->eepromsz = 8-size; + ctlr->eeprom = malloc((1<eepromsz)*sizeof(ushort)); + goto reread; + } + + return data; +} + +static void +i82557pci(void) +{ + Pcidev *p; + Ctlr *ctlr; + int i, nop, port; + + p = nil; + nop = 0; + while(p = pcimatch(p, 0x8086, 0)){ + switch(p->did){ + default: + continue; + case 0x1031: /* Intel 82562EM */ + case 0x1050: /* Intel 82562EZ */ + case 0x1039: /* Intel 82801BD PRO/100 VE */ + case 0x103A: /* Intel 82562 PRO/100 VE */ + case 0x103D: /* Intel 82562 PRO/100 VE */ + case 0x1064: /* Intel 82562 PRO/100 VE */ + case 0x2449: /* Intel 82562ET */ + case 0x27DC: /* Intel 82801G PRO/100 VE */ + nop = 1; + /*FALLTHROUGH*/ + case 0x1209: /* Intel 82559ER */ + case 0x1229: /* Intel 8255[789] */ + case 0x1030: /* Intel 82559 InBusiness 10/100 */ + break; + } + + if(pcigetpms(p) > 0){ + pcisetpms(p, 0); + + for(i = 0; i < 6; i++) + pcicfgw32(p, PciBAR0+i*4, p->mem[i].bar); + pcicfgw8(p, PciINTL, p->intl); + pcicfgw8(p, PciLTR, p->ltr); + pcicfgw8(p, PciCLS, p->cls); + pcicfgw16(p, PciPCR, p->pcr); + } + + /* + * bar[0] is the memory-mapped register address (4KB), + * bar[1] is the I/O port register address (32 bytes) and + * bar[2] is for the flash ROM (1MB). + */ + port = p->mem[1].bar & ~0x01; + if(ioalloc(port, p->mem[1].size, 0, "i82557") < 0){ + print("i82557: port %#ux in use\n", port); + continue; + } + + ctlr = malloc(sizeof(Ctlr)); + ctlr->port = port; + ctlr->pcidev = p; + ctlr->nop = nop; + + if(ctlrhead != nil) + ctlrtail->next = ctlr; + else + ctlrhead = ctlr; + ctlrtail = ctlr; + + pcisetbme(p); + } +} + +static char* mediatable[9] = { + "10BASE-T", /* TP */ + "10BASE-2", /* BNC */ + "10BASE-5", /* AUI */ + "100BASE-TX", + "10BASE-TFD", + "100BASE-TXFD", + "100BASE-T4", + "100BASE-FX", + "100BASE-FXFD", +}; + +static int +scanphy(Ctlr* ctlr) +{ + int i, oui, x; + + for(i = 0; i < 32; i++){ + if((oui = miir(ctlr, i, 2)) == -1 || oui == 0 || oui == 0xFFFF) + continue; + oui <<= 6; + x = miir(ctlr, i, 3); + oui |= x>>10; + //print("phy%d: oui %#ux reg1 %#ux\n", i, oui, miir(ctlr, i, 1)); + + ctlr->eeprom[6] = i; + if(oui == 0xAA00) + ctlr->eeprom[6] |= 0x07<<8; + else if(oui == 0x80017){ + if(x & 0x01) + ctlr->eeprom[6] |= 0x0A<<8; + else + ctlr->eeprom[6] |= 0x04<<8; + } + return i; + } + return -1; +} + +static void +shutdown(Ether* ether) +{ + Ctlr *ctlr = ether->ctlr; + + csr32w(ctlr, Port, 0); + delay(1); + csr8w(ctlr, Interrupt, InterruptM); +} + + +static int +reset(Ether* ether) +{ + int anar, anlpar, bmcr, bmsr, i, k, medium, phyaddr, x; + unsigned short sum; + uchar ea[Eaddrlen]; + Ctlr *ctlr; + + if(ctlrhead == nil) + i82557pci(); + + /* + * Any adapter matches if no ether->port is supplied, + * otherwise the ports must match. + */ + for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){ + if(ctlr->active) + continue; + if(ether->port == 0 || ether->port == ctlr->port){ + ctlr->active = 1; + break; + } + } + if(ctlr == nil) + return -1; + + /* + * Initialise the Ctlr structure. + * Perform a software reset after which should ensure busmastering + * is still enabled. The EtherExpress PRO/100B appears to leave + * the PCI configuration alone (see the 'To do' list above) so punt + * for now. + * Load the RUB and CUB registers for linear addressing (0). + */ + ether->ctlr = ctlr; + ether->port = ctlr->port; + ether->irq = ctlr->pcidev->intl; + ether->tbdf = ctlr->pcidev->tbdf; + + ilock(&ctlr->rlock); + csr32w(ctlr, Port, 0); + delay(1); + csr8w(ctlr, Interrupt, InterruptM); + iunlock(&ctlr->rlock); + + command(ctlr, LoadRUB, 0); + command(ctlr, LoadCUB, 0); + command(ctlr, LoadDCA, PADDR(ctlr->dump)); + + /* + * Initialise the receive frame, transmit ring and configuration areas. + */ + ctlr->ncb = Ncb; + ctlrinit(ctlr); + + /* + * Read the EEPROM. + * Do a dummy read first to get the size + * and allocate ctlr->eeprom. + */ + hy93c46r(ctlr, 0); + sum = 0; + for(i = 0; i < (1<eepromsz); i++){ + x = hy93c46r(ctlr, i); + ctlr->eeprom[i] = x; + sum += x; + } + if(sum != 0xBABA) + print("#l%d: EEPROM checksum - %#4.4ux\n", ether->ctlrno, sum); + + /* + * Eeprom[6] indicates whether there is a PHY and whether + * it's not 10Mb-only, in which case use the given PHY address + * to set any PHY specific options and determine the speed. + * Unfortunately, sometimes the EEPROM is blank except for + * the ether address and checksum; in this case look at the + * controller type and if it's am 82558 or 82559 it has an + * embedded PHY so scan for that. + * If no PHY, assume 82503 (serial) operation. + */ + if((ctlr->eeprom[6] & 0x1F00) && !(ctlr->eeprom[6] & 0x8000)) + phyaddr = ctlr->eeprom[6] & 0x00FF; + else + switch(ctlr->pcidev->rid){ + case 0x01: /* 82557 A-step */ + case 0x02: /* 82557 B-step */ + case 0x03: /* 82557 C-step */ + default: + phyaddr = -1; + break; + case 0x04: /* 82558 A-step */ + case 0x05: /* 82558 B-step */ + case 0x06: /* 82559 A-step */ + case 0x07: /* 82559 B-step */ + case 0x08: /* 82559 C-step */ + case 0x09: /* 82559ER A-step */ + phyaddr = scanphy(ctlr); + break; + } + if(phyaddr >= 0){ + /* + * Resolve the highest common ability of the two + * link partners. In descending order: + * 0x0100 100BASE-TX Full Duplex + * 0x0200 100BASE-T4 + * 0x0080 100BASE-TX + * 0x0040 10BASE-T Full Duplex + * 0x0020 10BASE-T + */ + anar = miir(ctlr, phyaddr, 0x04); + anlpar = miir(ctlr, phyaddr, 0x05) & 0x03E0; + anar &= anlpar; + bmcr = 0; + if(anar & 0x380) + bmcr = 0x2000; + if(anar & 0x0140) + bmcr |= 0x0100; + + switch((ctlr->eeprom[6]>>8) & 0x001F){ + + case 0x04: /* DP83840 */ + case 0x0A: /* DP83840A */ + /* + * The DP83840[A] requires some tweaking for + * reliable operation. + * The manual says bit 10 should be unconditionally + * set although it supposedly only affects full-duplex + * operation (an & 0x0140). + */ + x = miir(ctlr, phyaddr, 0x17) & ~0x0520; + x |= 0x0420; + for(i = 0; i < ether->nopt; i++){ + if(cistrcmp(ether->opt[i], "congestioncontrol")) + continue; + x |= 0x0100; + break; + } + miiw(ctlr, phyaddr, 0x17, x); + + /* + * If the link partner can't autonegotiate, determine + * the speed from elsewhere. + */ + if(anlpar == 0){ + miir(ctlr, phyaddr, 0x01); + bmsr = miir(ctlr, phyaddr, 0x01); + x = miir(ctlr, phyaddr, 0x19); + if((bmsr & 0x0004) && !(x & 0x0040)) + bmcr = 0x2000; + } + break; + + case 0x07: /* Intel 82555 */ + /* + * Auto-negotiation may fail if the other end is + * a DP83840A and the cable is short. + */ + miir(ctlr, phyaddr, 0x01); + bmsr = miir(ctlr, phyaddr, 0x01); + if((miir(ctlr, phyaddr, 0) & 0x1000) && !(bmsr & 0x0020)){ + miiw(ctlr, phyaddr, 0x1A, 0x2010); + x = miir(ctlr, phyaddr, 0); + miiw(ctlr, phyaddr, 0, 0x0200|x); + for(i = 0; i < 3000; i++){ + delay(1); + if(miir(ctlr, phyaddr, 0x01) & 0x0020) + break; + } + miiw(ctlr, phyaddr, 0x1A, 0x2000); + + anar = miir(ctlr, phyaddr, 0x04); + anlpar = miir(ctlr, phyaddr, 0x05) & 0x03E0; + anar &= anlpar; + bmcr = 0; + if(anar & 0x380) + bmcr = 0x2000; + if(anar & 0x0140) + bmcr |= 0x0100; + } + break; + } + + /* + * Force speed and duplex if no auto-negotiation. + */ + if(anlpar == 0){ + medium = -1; + for(i = 0; i < ether->nopt; i++){ + for(k = 0; k < nelem(mediatable); k++){ + if(cistrcmp(mediatable[k], ether->opt[i])) + continue; + medium = k; + break; + } + + switch(medium){ + default: + break; + + case 0x00: /* 10BASE-T */ + case 0x01: /* 10BASE-2 */ + case 0x02: /* 10BASE-5 */ + bmcr &= ~(0x2000|0x0100); + ctlr->configdata[19] &= ~0x40; + break; + + case 0x03: /* 100BASE-TX */ + case 0x06: /* 100BASE-T4 */ + case 0x07: /* 100BASE-FX */ + ctlr->configdata[19] &= ~0x40; + bmcr |= 0x2000; + break; + + case 0x04: /* 10BASE-TFD */ + bmcr = (bmcr & ~0x2000)|0x0100; + ctlr->configdata[19] |= 0x40; + break; + + case 0x05: /* 100BASE-TXFD */ + case 0x08: /* 100BASE-FXFD */ + bmcr |= 0x2000|0x0100; + ctlr->configdata[19] |= 0x40; + break; + } + } + if(medium != -1) + miiw(ctlr, phyaddr, 0x00, bmcr); + } + + if(bmcr & 0x2000) + ether->mbps = 100; + + ctlr->configdata[8] = 1; + ctlr->configdata[15] &= ~0x80; + } + else{ + ctlr->configdata[8] = 0; + ctlr->configdata[15] |= 0x80; + } + + /* + * Workaround for some broken HUB chips when connected at 10Mb/s + * half-duplex. + * This is a band-aid, but as there's no dynamic auto-negotiation + * code at the moment, only deactivate the workaround code in txstart + * if the link is 100Mb/s. + */ + if(ether->mbps != 10) + ctlr->nop = 0; + + /* + * Load the chip configuration and start it off. + */ + if(ether->oq == 0) + ether->oq = qopen(256*1024, Qmsg, 0, 0); + configure(ether, 0); + command(ctlr, CUstart, PADDR(&ctlr->cbr->status)); + + /* + * Check if the adapter's station address is to be overridden. + * If not, read it from the EEPROM and set in ether->ea prior to loading + * the station address with the Individual Address Setup command. + */ + memset(ea, 0, Eaddrlen); + if(memcmp(ea, ether->ea, Eaddrlen) == 0){ + for(i = 0; i < Eaddrlen/2; i++){ + x = ctlr->eeprom[i]; + ether->ea[2*i] = x; + ether->ea[2*i+1] = x>>8; + } + } + + ilock(&ctlr->cblock); + ctlr->action = CbIAS; + txstart(ether); + iunlock(&ctlr->cblock); + + /* + * Linkage to the generic ethernet driver. + */ + ether->attach = attach; + ether->transmit = transmit; + ether->interrupt = interrupt; + ether->ifstat = ifstat; + ether->shutdown = shutdown; + + ether->promiscuous = promiscuous; + ether->multicast = multicast; + ether->arg = ether; + + return 0; +} + +void +ether82557link(void) +{ + addethercard("i82557", reset); +} diff -Nru 0/sys/src/nix/386/ether82563.c 4/sys/src/nix/386/ether82563.c --- 0/sys/src/nix/386/ether82563.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/386/ether82563.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1736 @@ +/* + * Intel Gigabit Ethernet PCI-Express Controllers. + * 8256[36], 8257[12], 82573[ev] + * 82575eb + * Pretty basic, does not use many of the chip smarts. + * The interrupt mitigation tuning for each chip variant + * is probably different. The reset/initialisation + * sequence needs straightened out. Doubt the PHY code + * for the 82575eb is right. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/netif.h" + +#include "etherif.h" +#include "io.h" + +/* + * these are in the order they appear in the manual, not numeric order. + * It was too hard to find them in the book. Ref 21489, rev 2.6 + */ + +enum { + /* General */ + + Ctrl = 0x0000, /* Device Control */ + Status = 0x0008, /* Device Status */ + Eec = 0x0010, /* EEPROM/Flash Control/Data */ + Eerd = 0x0014, /* EEPROM Read */ + Ctrlext = 0x0018, /* Extended Device Control */ + Fla = 0x001c, /* Flash Access */ + Mdic = 0x0020, /* MDI Control */ + Seresctl = 0x0024, /* Serdes ana */ + Fcal = 0x0028, /* Flow Control Address Low */ + Fcah = 0x002C, /* Flow Control Address High */ + Fct = 0x0030, /* Flow Control Type */ + Kumctrlsta = 0x0034, /* MAC-PHY Interface */ + Vet = 0x0038, /* VLAN EtherType */ + Fcttv = 0x0170, /* Flow Control Transmit Timer Value */ + Txcw = 0x0178, /* Transmit Configuration Word */ + Rxcw = 0x0180, /* Receive Configuration Word */ + Ledctl = 0x0E00, /* LED control */ + Pba = 0x1000, /* Packet Buffer Allocation */ + Pbs = 0x1008, /* Packet Buffer Size */ + + /* Interrupt */ + + Icr = 0x00C0, /* Interrupt Cause Read */ + Itr = 0x00c4, /* Interrupt Throttling Rate */ + Ics = 0x00C8, /* Interrupt Cause Set */ + Ims = 0x00D0, /* Interrupt Mask Set/Read */ + Imc = 0x00D8, /* Interrupt mask Clear */ + Iam = 0x00E0, /* Interrupt acknowledge Auto Mask */ + + /* Receive */ + + Rctl = 0x0100, /* Control */ + Ert = 0x2008, /* Early Receive Threshold (573[EVL] only) */ + Fcrtl = 0x2160, /* Flow Control RX Threshold Low */ + Fcrth = 0x2168, /* Flow Control Rx Threshold High */ + Psrctl = 0x2170, /* Packet Split Receive Control */ + Rdbal = 0x2800, /* Rdesc Base Address Low Queue 0 */ + Rdbah = 0x2804, /* Rdesc Base Address High Queue 0 */ + Rdlen = 0x2808, /* Descriptor Length Queue 0 */ + Rdh = 0x2810, /* Descriptor Head Queue 0 */ + Rdt = 0x2818, /* Descriptor Tail Queue 0 */ + Rdtr = 0x2820, /* Descriptor Timer Ring */ + Rxdctl = 0x2828, /* Descriptor Control */ + Radv = 0x282C, /* Interrupt Absolute Delay Timer */ + Rdbal1 = 0x2900, /* Rdesc Base Address Low Queue 1 */ + Rdbah1 = 0x2804, /* Rdesc Base Address High Queue 1 */ + Rdlen1 = 0x2908, /* Descriptor Length Queue 1 */ + Rdh1 = 0x2910, /* Descriptor Head Queue 1 */ + Rdt1 = 0x2918, /* Descriptor Tail Queue 1 */ + Rxdctl1 = 0x2928, /* Descriptor Control Queue 1 */ + Rsrpd = 0x2c00, /* Small Packet Detect */ + Raid = 0x2c08, /* ACK interrupt delay */ + Cpuvec = 0x2c10, /* CPU Vector */ + Rxcsum = 0x5000, /* Checksum Control */ + Rfctl = 0x5008, /* Filter Control */ + Mta = 0x5200, /* Multicast Table Array */ + Ral = 0x5400, /* Receive Address Low */ + Rah = 0x5404, /* Receive Address High */ + Vfta = 0x5600, /* VLAN Filter Table Array */ + Mrqc = 0x5818, /* Multiple Receive Queues Command */ + Rssim = 0x5864, /* RSS Interrupt Mask */ + Rssir = 0x5868, /* RSS Interrupt Request */ + Reta = 0x5c00, /* Redirection Table */ + Rssrk = 0x5c80, /* RSS Random Key */ + + /* Transmit */ + + Tctl = 0x0400, /* Transmit Control */ + Tipg = 0x0410, /* Transmit IPG */ + Tkabgtxd = 0x3004, /* glci afe band gap transmit ref data, or something */ + Tdbal = 0x3800, /* Tdesc Base Address Low */ + Tdbah = 0x3804, /* Tdesc Base Address High */ + Tdlen = 0x3808, /* Descriptor Length */ + Tdh = 0x3810, /* Descriptor Head */ + Tdt = 0x3818, /* Descriptor Tail */ + Tidv = 0x3820, /* Interrupt Delay Value */ + Txdctl = 0x3828, /* Descriptor Control */ + Tadv = 0x382C, /* Interrupt Absolute Delay Timer */ + Tarc0 = 0x3840, /* Arbitration Counter Queue 0 */ + Tdbal1 = 0x3900, /* Descriptor Base Low Queue 1 */ + Tdbah1 = 0x3904, /* Descriptor Base High Queue 1 */ + Tdlen1 = 0x3908, /* Descriptor Length Queue 1 */ + Tdh1 = 0x3910, /* Descriptor Head Queue 1 */ + Tdt1 = 0x3918, /* Descriptor Tail Queue 1 */ + Txdctl1 = 0x3928, /* Descriptor Control 1 */ + Tarc1 = 0x3940, /* Arbitration Counter Queue 1 */ + + /* Statistics */ + + Statistics = 0x4000, /* Start of Statistics Area */ + Gorcl = 0x88/4, /* Good Octets Received Count */ + Gotcl = 0x90/4, /* Good Octets Transmitted Count */ + Torl = 0xC0/4, /* Total Octets Received */ + Totl = 0xC8/4, /* Total Octets Transmitted */ + Nstatistics = 0x124/4, +}; + +enum { /* Ctrl */ + GIOmd = 1<<2, /* BIO master disable */ + Lrst = 1<<3, /* link reset */ + Slu = 1<<6, /* Set Link Up */ + SspeedMASK = 3<<8, /* Speed Selection */ + SspeedSHIFT = 8, + Sspeed10 = 0x00000000, /* 10Mb/s */ + Sspeed100 = 0x00000100, /* 100Mb/s */ + Sspeed1000 = 0x00000200, /* 1000Mb/s */ + Frcspd = 1<<11, /* Force Speed */ + Frcdplx = 1<<12, /* Force Duplex */ + SwdpinsloMASK = 0x003C0000, /* Software Defined Pins - lo nibble */ + SwdpinsloSHIFT = 18, + SwdpioloMASK = 0x03C00000, /* Software Defined Pins - I or O */ + SwdpioloSHIFT = 22, + Devrst = 1<<26, /* Device Reset */ + Rfce = 1<<27, /* Receive Flow Control Enable */ + Tfce = 1<<28, /* Transmit Flow Control Enable */ + Vme = 1<<30, /* VLAN Mode Enable */ + Phyrst = 1<<31, /* Phy Reset */ +}; + +enum { /* Status */ + Lu = 1<<1, /* Link Up */ + Lanid = 3<<2, /* mask for Lan ID. */ + Txoff = 1<<4, /* Transmission Paused */ + Tbimode = 1<<5, /* TBI Mode Indication */ + Phyra = 1<<10, /* PHY Reset Asserted */ + GIOme = 1<<19, /* GIO Master Enable Status */ +}; + +enum { /* Eerd */ + EEstart = 1<<0, /* Start Read */ + EEdone = 1<<1, /* Read done */ +}; + +enum { /* Ctrlext */ + Asdchk = 1<<12, /* ASD Check */ + Eerst = 1<<13, /* EEPROM Reset */ + Spdbyps = 1<<15, /* Speed Select Bypass */ +}; + +enum { /* EEPROM content offsets */ + Ea = 0x00, /* Ethernet Address */ + Cf = 0x03, /* Compatibility Field */ + Icw1 = 0x0A, /* Initialization Control Word 1 */ + Sid = 0x0B, /* Subsystem ID */ + Svid = 0x0C, /* Subsystem Vendor ID */ + Did = 0x0D, /* Device ID */ + Vid = 0x0E, /* Vendor ID */ + Icw2 = 0x0F, /* Initialization Control Word 2 */ +}; + +enum { /* Mdic */ + MDIdMASK = 0x0000FFFF, /* Data */ + MDIdSHIFT = 0, + MDIrMASK = 0x001F0000, /* PHY Register Address */ + MDIrSHIFT = 16, + MDIpMASK = 0x03E00000, /* PHY Address */ + MDIpSHIFT = 21, + MDIwop = 0x04000000, /* Write Operation */ + MDIrop = 0x08000000, /* Read Operation */ + MDIready = 0x10000000, /* End of Transaction */ + MDIie = 0x20000000, /* Interrupt Enable */ + MDIe = 0x40000000, /* Error */ +}; + +enum { /* phy interface registers */ + Phyctl = 0, /* phy ctl */ + Physsr = 17, /* phy secondary status */ + Phyier = 18, /* 82573 phy interrupt enable */ + Phyisr = 19, /* 82563 phy interrupt status */ + Phylhr = 19, /* 8257[12] link health */ + + Rtlink = 1<<10, /* realtime link status */ + Phyan = 1<<11, /* phy has auto-negotiated */ + + /* Phyctl bits */ + Ran = 1<<9, /* restart auto-negotiation */ + Ean = 1<<12, /* enable auto-negotiation */ + + /* 82573 Phyier bits */ + Lscie = 1<<10, /* link status changed ie */ + Ancie = 1<<11, /* auto-negotiation complete ie */ + Spdie = 1<<14, /* speed changed ie */ + Panie = 1<<15, /* phy auto-negotiation error ie */ + + /* Phylhr/Phyisr bits */ + Anf = 1<<6, /* lhr: auto-negotiation fault */ + Ane = 1<<15, /* isr: auto-negotiation error */ +}; + +enum { /* Icr, Ics, Ims, Imc */ + Txdw = 0x00000001, /* Transmit Descriptor Written Back */ + Txqe = 0x00000002, /* Transmit Queue Empty */ + Lsc = 0x00000004, /* Link Status Change */ + Rxseq = 0x00000008, /* Receive Sequence Error */ + Rxdmt0 = 0x00000010, /* Rdesc Minimum Threshold Reached */ + Rxo = 0x00000040, /* Receiver Overrun */ + Rxt0 = 0x00000080, /* Receiver Timer Interrupt */ + Mdac = 0x00000200, /* MDIO Access Completed */ + Rxcfg = 0x00000400, /* Receiving /C/ ordered sets */ + Gpi0 = 0x00000800, /* General Purpose Interrupts */ + Gpi1 = 0x00001000, + Gpi2 = 0x00002000, + Gpi3 = 0x00004000, + Ack = 0x00020000, /* Receive ACK frame */ +}; + +enum { /* Txcw */ + TxcwFd = 0x00000020, /* Full Duplex */ + TxcwHd = 0x00000040, /* Half Duplex */ + TxcwPauseMASK = 0x00000180, /* Pause */ + TxcwPauseSHIFT = 7, + TxcwPs = 1<nic+((r)/4))) +#define csr32w(c, r, v) (*((c)->nic+((r)/4)) = (v)) + +static Ctlr* i82563ctlrhead; +static Ctlr* i82563ctlrtail; + +static Lock i82563rblock; /* free receive Blocks */ +static Block* i82563rbpool; + +static char* statistics[] = { + "CRC Error", + "Alignment Error", + "Symbol Error", + "RX Error", + "Missed Packets", + "Single Collision", + "Excessive Collisions", + "Multiple Collision", + "Late Collisions", + nil, + "Collision", + "Transmit Underrun", + "Defer", + "Transmit - No CRS", + "Sequence Error", + "Carrier Extension Error", + "Receive Error Length", + nil, + "XON Received", + "XON Transmitted", + "XOFF Received", + "XOFF Transmitted", + "FC Received Unsupported", + "Packets Received (64 Bytes)", + "Packets Received (65-127 Bytes)", + "Packets Received (128-255 Bytes)", + "Packets Received (256-511 Bytes)", + "Packets Received (512-1023 Bytes)", + "Packets Received (1024-mtu Bytes)", + "Good Packets Received", + "Broadcast Packets Received", + "Multicast Packets Received", + "Good Packets Transmitted", + nil, + "Good Octets Received", + nil, + "Good Octets Transmitted", + nil, + nil, + nil, + "Receive No Buffers", + "Receive Undersize", + "Receive Fragment", + "Receive Oversize", + "Receive Jabber", + "Management Packets Rx", + "Management Packets Drop", + "Management Packets Tx", + "Total Octets Received", + nil, + "Total Octets Transmitted", + nil, + "Total Packets Received", + "Total Packets Transmitted", + "Packets Transmitted (64 Bytes)", + "Packets Transmitted (65-127 Bytes)", + "Packets Transmitted (128-255 Bytes)", + "Packets Transmitted (256-511 Bytes)", + "Packets Transmitted (512-1023 Bytes)", + "Packets Transmitted (1024-mtu Bytes)", + "Multicast Packets Transmitted", + "Broadcast Packets Transmitted", + "TCP Segmentation Context Transmitted", + "TCP Segmentation Context Fail", + "Interrupt Assertion", + "Interrupt Rx Pkt Timer", + "Interrupt Rx Abs Timer", + "Interrupt Tx Pkt Timer", + "Interrupt Tx Abs Timer", + "Interrupt Tx Queue Empty", + "Interrupt Tx Desc Low", + "Interrupt Rx Min", + "Interrupt Rx Overrun", +}; + +static long +i82563ifstat(Ether* edev, void* a, long n, ulong offset) +{ + Ctlr *ctlr; + char *s, *p, *e, *stat; + int i, r; + uvlong tuvl, ruvl; + + ctlr = edev->ctlr; + qlock(&ctlr->slock); + p = s = malloc(2*READSTR); + e = p + 2*READSTR; + + for(i = 0; i < Nstatistics; i++){ + r = csr32r(ctlr, Statistics + i*4); + if((stat = statistics[i]) == nil) + continue; + switch(i){ + case Gorcl: + case Gotcl: + case Torl: + case Totl: + ruvl = r; + ruvl += (uvlong)csr32r(ctlr, Statistics+(i+1)*4) << 32; + tuvl = ruvl; + tuvl += ctlr->statistics[i]; + tuvl += (uvlong)ctlr->statistics[i+1] << 32; + if(tuvl == 0) + continue; + ctlr->statistics[i] = tuvl; + ctlr->statistics[i+1] = tuvl >> 32; + p = seprint(p, e, "%s: %llud %llud\n", stat, tuvl, ruvl); + i++; + break; + + default: + ctlr->statistics[i] += r; + if(ctlr->statistics[i] == 0) + continue; + p = seprint(p, e, "%s: %ud %ud\n", stat, + ctlr->statistics[i], r); + break; + } + } + + p = seprint(p, e, "lintr: %ud %ud\n", ctlr->lintr, ctlr->lsleep); + p = seprint(p, e, "rintr: %ud %ud\n", ctlr->rintr, ctlr->rsleep); + p = seprint(p, e, "tintr: %ud %ud\n", ctlr->tintr, ctlr->txdw); + p = seprint(p, e, "ixcs: %ud %ud %ud\n", ctlr->ixsm, ctlr->ipcs, ctlr->tcpcs); + p = seprint(p, e, "rdtr: %ud\n", ctlr->rdtr); + p = seprint(p, e, "radv: %ud\n", ctlr->radv); + p = seprint(p, e, "ctrl: %.8ux\n", csr32r(ctlr, Ctrl)); + p = seprint(p, e, "ctrlext: %.8ux\n", csr32r(ctlr, Ctrlext)); + p = seprint(p, e, "status: %.8ux\n", csr32r(ctlr, Status)); + p = seprint(p, e, "txcw: %.8ux\n", csr32r(ctlr, Txcw)); + p = seprint(p, e, "txdctl: %.8ux\n", csr32r(ctlr, Txdctl)); + p = seprint(p, e, "pba: %.8ux\n", ctlr->pba); + + p = seprint(p, e, "speeds: 10:%ud 100:%ud 1000:%ud ?:%ud\n", + ctlr->speeds[0], ctlr->speeds[1], ctlr->speeds[2], ctlr->speeds[3]); + p = seprint(p, e, "type: %s\n", tname[ctlr->type]); + +// p = seprint(p, e, "eeprom:"); +// for(i = 0; i < 0x40; i++){ +// if(i && ((i & 7) == 0)) +// p = seprint(p, e, "\n "); +// p = seprint(p, e, " %4.4ux", ctlr->eeprom[i]); +// } +// p = seprint(p, e, "\n"); + + USED(p); + n = readstr(offset, a, n, s); + free(s); + qunlock(&ctlr->slock); + + return n; +} + +enum { + CMrdtr, + CMradv, +}; + +static Cmdtab i82563ctlmsg[] = { + CMrdtr, "rdtr", 2, + CMradv, "radv", 2, +}; + +static long +i82563ctl(Ether* edev, void* buf, long n) +{ + ulong v; + char *p; + Ctlr *ctlr; + Cmdbuf *cb; + Cmdtab *ct; + + if((ctlr = edev->ctlr) == nil) + error(Enonexist); + + cb = parsecmd(buf, n); + if(waserror()){ + free(cb); + nexterror(); + } + + ct = lookupcmd(cb, i82563ctlmsg, nelem(i82563ctlmsg)); + switch(ct->index){ + case CMrdtr: + v = strtoul(cb->f[1], &p, 0); + if(p == cb->f[1] || v > 0xFFFF) + error(Ebadarg); + ctlr->rdtr = v; + csr32w(ctlr, Rdtr, v); + break; + case CMradv: + v = strtoul(cb->f[1], &p, 0); + if(p == cb->f[1] || v > 0xFFFF) + error(Ebadarg); + ctlr->radv = v; + csr32w(ctlr, Radv, v); + } + free(cb); + poperror(); + + return n; +} + +static void +i82563promiscuous(void* arg, int on) +{ + int rctl; + Ctlr *ctlr; + Ether *edev; + + edev = arg; + ctlr = edev->ctlr; + + rctl = csr32r(ctlr, Rctl); + rctl &= ~MoMASK; + if(on) + rctl |= Upe|Mpe; + else + rctl &= ~(Upe|Mpe); + csr32w(ctlr, Rctl, rctl); +} + +static void +i82563multicast(void* arg, uchar* addr, int on) +{ + int bit, x; + Ctlr *ctlr; + Ether *edev; + + edev = arg; + ctlr = edev->ctlr; + + x = addr[5]>>1; + if(ctlr->type == i82566) + x &= 31; + bit = ((addr[5] & 1)<<4)|(addr[4]>>4); + /* + * multiple ether addresses can hash to the same filter bit, + * so it's never safe to clear a filter bit. + * if we want to clear filter bits, we need to keep track of + * all the multicast addresses in use, clear all the filter bits, + * then set the ones corresponding to in-use addresses. + */ + if(on) + ctlr->mta[x] |= 1<mta[x] &= ~(1<mta[x]); +} + +static Block* +i82563rballoc(void) +{ + Block *bp; + + ilock(&i82563rblock); + if((bp = i82563rbpool) != nil){ + i82563rbpool = bp->next; + bp->next = nil; +// ainc(&bp->ref); + } + iunlock(&i82563rblock); + + return bp; +} + +static void +i82563rbfree(Block* b) +{ + b->rp = b->wp = (uchar*)ROUNDUP((uintptr)b->base, 4*KiB); + b->flag &= ~(Bpktck|Btcpck|Budpck|Bipck); + ilock(&i82563rblock); + b->next = i82563rbpool; + i82563rbpool = b; + iunlock(&i82563rblock); +} + +static void +i82563im(Ctlr* ctlr, int im) +{ + ilock(&ctlr->imlock); + ctlr->im |= im; + csr32w(ctlr, Ims, ctlr->im); + iunlock(&ctlr->imlock); +} + +static void +i82563txinit(Ctlr* ctlr) +{ + int i, r; + Block *bp; + + csr32w(ctlr, Tctl, 0x0F<tdba)); + csr32w(ctlr, Tdbah, 0); + csr32w(ctlr, Tdlen, ctlr->ntd * sizeof(Td)); + ctlr->tdh = PREV(0, ctlr->ntd); + csr32w(ctlr, Tdh, 0); + ctlr->tdt = 0; + csr32w(ctlr, Tdt, 0); + for(i = 0; i < ctlr->ntd; i++){ + if((bp = ctlr->tb[i]) != nil){ + ctlr->tb[i] = nil; + freeb(bp); + } + memset(&ctlr->tdba[i], 0, sizeof(Td)); + } + csr32w(ctlr, Tidv, 128); + r = csr32r(ctlr, Txdctl); + r &= ~(WthreshMASK|PthreshSHIFT); + r |= 4<type == i82575 || ctlr->type == i82576) + r |= Qenable; + csr32w(ctlr, Tadv, 64); + csr32w(ctlr, Txdctl, r); + r = csr32r(ctlr, Tctl); + r |= Ten; + csr32w(ctlr, Tctl, r); +// if(ctlr->type == i82671) +// csr32w(ctlr, Tarc0, csr32r(ctlr, Tarc0) | 7<<24); /* yb sez? */ +} + +#define Next(x, m) (((x)+1) & (m)) + +static int +i82563cleanup(Ctlr *c) +{ + Block *b; + int tdh, m, n; + + tdh = c->tdh; + m = c->ntd-1; + while(c->tdba[n = Next(tdh, m)].status & Tdd){ + tdh = n; + if((b = c->tb[tdh]) != nil){ + c->tb[tdh] = nil; + freeb(b); + }else + iprint("82563 tx underrun!\n"); + c->tdba[tdh].status = 0; + } + + return c->tdh = tdh; +} + +static void +i82563transmit(Ether* edev) +{ + Td *td; + Block *bp; + Ctlr *ctlr; + int tdh, tdt, m; + + ctlr = edev->ctlr; + + qlock(&ctlr->tlock); + + /* + * Free any completed packets + */ + tdh = i82563cleanup(ctlr); + + /* + * Try to fill the ring back up. + */ + tdt = ctlr->tdt; + m = ctlr->ntd-1; + for(;;){ + if(Next(tdt, m) == tdh){ + ctlr->txdw++; + i82563im(ctlr, Txdw); + break; + } + if((bp = qget(edev->oq)) == nil) + break; + td = &ctlr->tdba[tdt]; + td->addr[0] = PCIWADDR(bp->rp); + td->control = Ide|Rs|Ifcs|Teop|BLEN(bp); + ctlr->tb[tdt] = bp; + tdt = Next(tdt, m); + } + if(ctlr->tdt != tdt){ + ctlr->tdt = tdt; + csr32w(ctlr, Tdt, tdt); + } + qunlock(&ctlr->tlock); +} + +static void +i82563replenish(Ctlr* ctlr) +{ + Rd *rd; + int rdt, m; + Block *bp; + + rdt = ctlr->rdt; + m = ctlr->nrd-1; + while(Next(rdt, m) != ctlr->rdh){ + rd = &ctlr->rdba[rdt]; + if(ctlr->rb[rdt] != nil){ + iprint("82563: tx overrun\n"); + break; + } + bp = i82563rballoc(); + if(bp == nil){ + iprint("82563: no available buffers\n"); + break; + } + ctlr->rb[rdt] = bp; + rd->addr[0] = PCIWADDR(bp->rp); +// rd->addr[1] = 0; + rd->status = 0; + ctlr->rdfree++; + rdt = Next(rdt, m); + } + ctlr->rdt = rdt; + csr32w(ctlr, Rdt, rdt); +} + +static void +i82563rxinit(Ctlr* ctlr) +{ + Block *bp; + int i, r, rctl; + + if(ctlr->rbsz <= 2048) + rctl = Dpf|Bsize2048|Bam|RdtmsHALF; + else if(ctlr->rbsz <= 8192) + rctl = Lpe|Dpf|Bsize8192|Bsex|Bam|RdtmsHALF|Secrc; + else if(ctlr->rbsz <= 12*1024){ + i = ctlr->rbsz / 1024; + if(ctlr->rbsz % 1024) + i++; + rctl = Lpe|Dpf|BsizeFlex*i|Bam|RdtmsHALF|Secrc; + } + else + rctl = Lpe|Dpf|Bsize16384|Bsex|Bam|RdtmsHALF|Secrc; + + if(ctlr->type == i82575 || ctlr->type == i82576){ + /* + * Setting Qenable in Rxdctl does not + * appear to stick unless Ren is on. + */ + csr32w(ctlr, Rctl, Ren|rctl); + r = csr32r(ctlr, Rxdctl); + r |= Qenable; + csr32w(ctlr, Rxdctl, r); + } + csr32w(ctlr, Rctl, rctl); + + if(ctlr->type == i82573) + csr32w(ctlr, Ert, 1024/8); + + if(ctlr->type == i82566) + csr32w(ctlr, Pbs, 16); + + csr32w(ctlr, Rdbal, PCIWADDR(ctlr->rdba)); + csr32w(ctlr, Rdbah, 0); + csr32w(ctlr, Rdlen, ctlr->nrd * sizeof(Rd)); + ctlr->rdh = 0; + csr32w(ctlr, Rdh, 0); + ctlr->rdt = 0; + csr32w(ctlr, Rdt, 0); + ctlr->rdtr = 25; + ctlr->radv = 500; + csr32w(ctlr, Rdtr, ctlr->rdtr); + csr32w(ctlr, Radv, ctlr->radv); + + for(i = 0; i < ctlr->nrd; i++){ + if((bp = ctlr->rb[i]) != nil){ + ctlr->rb[i] = nil; + freeb(bp); + } + } + i82563replenish(ctlr); + + if(ctlr->type == i82575 || ctlr->type == i82576){ + /* + * See comment above for Qenable. + * Could shuffle the code? + */ + r = csr32r(ctlr, Rxdctl); + r &= ~(WthreshSHIFT|PthreshSHIFT); + r |= (2<rim != 0; +} + +static void +i82563rproc(void* arg) +{ + Rd *rd; + Block *bp; + Ctlr *ctlr; + int r, m, rdh, rim; + Ether *edev; + + edev = arg; + ctlr = edev->ctlr; + + i82563rxinit(ctlr); + r = csr32r(ctlr, Rctl); + r |= Ren; + csr32w(ctlr, Rctl, r); + m = ctlr->nrd-1; + + for(;;){ + i82563im(ctlr, Rxt0|Rxo|Rxdmt0|Rxseq|Ack); + ctlr->rsleep++; +// coherence(); + sleep(&ctlr->rrendez, i82563rim, ctlr); + + rdh = ctlr->rdh; + for(;;){ + rd = &ctlr->rdba[rdh]; + rim = ctlr->rim; + ctlr->rim = 0; + if(!(rd->status & Rdd)) + break; + + /* + * Accept eop packets with no errors. + * With no errors and the Ixsm bit set, + * the descriptor status Tpcs and Ipcs bits give + * an indication of whether the checksums were + * calculated and valid. + */ + bp = ctlr->rb[rdh]; + if((rd->status & Reop) && rd->errors == 0){ + bp->wp += rd->length; + bp->lim = bp->wp; /* lie like a dog. */ + if(!(rd->status & Ixsm)){ + ctlr->ixsm++; + if(rd->status & Ipcs){ + /* + * IP checksum calculated + * (and valid as errors == 0). + */ + ctlr->ipcs++; + bp->flag |= Bipck; + } + if(rd->status & Tcpcs){ + /* + * TCP/UDP checksum calculated + * (and valid as errors == 0). + */ + ctlr->tcpcs++; + bp->flag |= Btcpck|Budpck; + } + bp->checksum = rd->checksum; + bp->flag |= Bpktck; + } + etheriq(edev, bp, 1); + } else + freeb(bp); + ctlr->rb[rdh] = nil; + + rd->status = 0; + ctlr->rdfree--; + ctlr->rdh = rdh = Next(rdh, m); + if(ctlr->nrd-ctlr->rdfree >= 32 || (rim & Rxdmt0)) + i82563replenish(ctlr); + } + } +} + +static int +i82563lim(void* c) +{ + return ((Ctlr*)c)->lim != 0; +} + +static int speedtab[] = { + 10, 100, 1000, 0 +}; + +static uint +phyread(Ctlr *c, int reg) +{ + uint phy, i; + + csr32w(c, Mdic, MDIrop | 1<ctlr; + + if(c->type == i82573 && (phy = phyread(c, Phyier)) != ~0) + phywrite(c, Phyier, phy | Lscie | Ancie | Spdie | Panie); + for(;;){ + phy = phyread(c, Physsr); + if(phy == ~0) + goto next; + i = (phy>>14) & 3; + + switch(c->type){ + case i82563: + a = phyread(c, Phyisr) & Ane; + break; + case i82571: + case i82572: + a = phyread(c, Phylhr) & Anf; + i = (i-1) & 3; + break; + default: + a = 0; + break; + } + if(a) + phywrite(c, Phyctl, phyread(c, Phyctl) | Ran | Ean); + e->link = (phy & Rtlink) != 0; + if(e->link){ + c->speeds[i]++; + e->mbps = speedtab[i]; + } +next: + c->lim = 0; + i82563im(c, Lsc); + c->lsleep++; + sleep(&c->lrendez, i82563lim, c); + } +} + +static void +i82563tproc(void *v) +{ + Ether *e; + Ctlr *c; + + e = v; + c = e->ctlr; + for(;;){ + sleep(&c->trendez, return0, 0); + i82563transmit(e); + } +} + +static void +i82563attach(Ether* edev) +{ + Block *bp; + Ctlr *ctlr; + char name[KNAMELEN]; + + ctlr = edev->ctlr; + qlock(&ctlr->alock); + if(ctlr->attached){ + qunlock(&ctlr->alock); + return; + } + + ctlr->nrd = Nrd; + ctlr->ntd = Ntd; + + if(waserror()){ + while(ctlr->nrb > 0){ + bp = i82563rballoc(); + bp->free = nil; + freeb(bp); + ctlr->nrb--; + } + free(ctlr->tb); + ctlr->tb = nil; + free(ctlr->rb); + ctlr->rb = nil; + free(ctlr->tdba); + ctlr->tdba = nil; + free(ctlr->rdba); + ctlr->rdba = nil; + qunlock(&ctlr->alock); + nexterror(); + } + + if((ctlr->rdba = mallocalign(ctlr->nrd*sizeof(Rd), 128, 0, 0)) == nil) + error(Enomem); + if((ctlr->tdba = mallocalign(ctlr->ntd*sizeof(Td), 128, 0, 0)) == nil) + error(Enomem); + if((ctlr->rb = malloc(ctlr->nrd*sizeof(Block*))) == nil) + error(Enomem); + if((ctlr->tb = malloc(ctlr->ntd*sizeof(Block*))) == nil) + error(Enomem); + + for(ctlr->nrb = 0; ctlr->nrb < Nrb; ctlr->nrb++){ + if((bp = allocb(ctlr->rbsz + 4*KiB)) == nil) + break; + bp->free = i82563rbfree; + freeb(bp); + } + + ctlr->attached = 1; + + snprint(name, sizeof name, "#l%dl", edev->ctlrno); + kproc(name, i82563lproc, edev); + + snprint(name, sizeof name, "#l%dr", edev->ctlrno); + kproc(name, i82563rproc, edev); + + snprint(name, sizeof name, "#l%dt", edev->ctlrno); + kproc(name, i82563tproc, edev); + + i82563txinit(ctlr); + + qunlock(&ctlr->alock); + poperror(); +} + +static void +i82563interrupt(Ureg*, void* arg) +{ + Ctlr *ctlr; + Ether *edev; + int icr, im; + + edev = arg; + ctlr = edev->ctlr; + + ilock(&ctlr->imlock); + csr32w(ctlr, Imc, ~0); + im = ctlr->im; + + for(icr = csr32r(ctlr, Icr); icr & ctlr->im; icr = csr32r(ctlr, Icr)){ + if(icr & Lsc){ + im &= ~Lsc; + ctlr->lim = icr & Lsc; + wakeup(&ctlr->lrendez); + ctlr->lintr++; + } + if(icr & (Rxt0|Rxo|Rxdmt0|Rxseq|Ack)){ + ctlr->rim = icr & (Rxt0|Rxo|Rxdmt0|Rxseq|Ack); + im &= ~(Rxt0|Rxo|Rxdmt0|Rxseq|Ack); + wakeup(&ctlr->rrendez); + ctlr->rintr++; + } + if(icr & Txdw){ + im &= ~Txdw; + ctlr->tintr++; + wakeup(&ctlr->trendez); + } + } + + ctlr->im = im; + csr32w(ctlr, Ims, im); + iunlock(&ctlr->imlock); +} + +static int +i82563detach(Ctlr* ctlr) +{ + int r, timeo; + + /* + * Perform a device reset to get the chip back to the + * power-on state, followed by an EEPROM reset to read + * the defaults for some internal registers. + */ + csr32w(ctlr, Imc, ~0); + csr32w(ctlr, Rctl, 0); + csr32w(ctlr, Tctl, 0); + + delay(10); + + r = csr32r(ctlr, Ctrl); + if(ctlr->type == i82566) + r |= Phyrst; + csr32w(ctlr, Ctrl, Devrst | r); + delay(1); + for(timeo = 0; timeo < 1000; timeo++){ + if(!(csr32r(ctlr, Ctrl) & Devrst)) + break; + delay(1); + } + if(csr32r(ctlr, Ctrl) & Devrst) + return -1; + + r = csr32r(ctlr, Ctrlext); + csr32w(ctlr, Ctrlext, r|Eerst); + delay(1); + for(timeo = 0; timeo < 1000; timeo++){ + if(!(csr32r(ctlr, Ctrlext) & Eerst)) + break; + delay(1); + } + if(csr32r(ctlr, Ctrlext) & Eerst) + return -1; + + csr32w(ctlr, Imc, ~0); + delay(1); + for(timeo = 0; timeo < 1000; timeo++){ + if(!csr32r(ctlr, Icr)) + break; + delay(1); + } + if(csr32r(ctlr, Icr)) + return -1; + + /* + * Balance Rx/Tx packet buffer. + * No need to set PBA register unless using jumbo, defaults to 32KB + * for receive. If it is changed, then have to do a MAC reset, + * and need to do that at the the right time as it will wipe stuff. + */ + if(ctlr->rbsz > 8192 && (ctlr->type == i82563 || ctlr->type == i82571 || + ctlr->type == i82572)){ + ctlr->pba = csr32r(ctlr, Pba); + r = ctlr->pba >> 16; + r += ctlr->pba & 0xffff; + r >>= 1; + csr32w(ctlr, Pba, r); + } else if(ctlr->type == i82573 && ctlr->rbsz > 1514) + csr32w(ctlr, Pba, 14); + ctlr->pba = csr32r(ctlr, Pba); + + r = csr32r(ctlr, Ctrl); + csr32w(ctlr, Ctrl, Slu|r); + + return 0; +} + +static void +i82563shutdown(Ether* ether) +{ + i82563detach(ether->ctlr); +} + +static ushort +eeread(Ctlr *ctlr, int adr) +{ + csr32w(ctlr, Eerd, EEstart | adr << 2); + while ((csr32r(ctlr, Eerd) & EEdone) == 0) + ; + return csr32r(ctlr, Eerd) >> 16; +} + +static int +eeload(Ctlr *ctlr) +{ + ushort sum; + int data, adr; + + sum = 0; + for (adr = 0; adr < 0x40; adr++) { + data = eeread(ctlr, adr); + ctlr->eeprom[adr] = data; + sum += data; + } + return sum; +} + +static int +fcycle(Ctlr *, Flash *f) +{ + ushort s, i; + + s = f->reg[Fsts]; + if((s&Fvalid) == 0) + return -1; + f->reg[Fsts] |= Fcerr | Ael; + for(i = 0; i < 10; i++){ + if((s&Scip) == 0) + return 0; + delay(1); + s = f->reg[Fsts]; + } + return -1; +} + +static int +fread(Ctlr *c, Flash *f, int ladr) +{ + ushort s; + + delay(1); + if(fcycle(c, f) == -1) + return -1; + f->reg[Fsts] |= Fdone; + f->reg32[Faddr] = ladr; + + /* setup flash control register */ + s = f->reg[Fctl]; + s &= ~(0x1f << 8); + s |= (2-1) << 8; /* 2 bytes */ + s &= ~(2*Flcycle); /* read */ + f->reg[Fctl] = s | Fgo; + + while((f->reg[Fsts] & Fdone) == 0) + ; + if(f->reg[Fsts] & (Fcerr|Ael)) + return -1; + return f->reg32[Fdata] & 0xffff; +} + +static int +fload(Ctlr *c) +{ + ulong data, io, r, adr; + ushort sum; + Flash f; + + io = c->pcidev->mem[1].bar & ~0x0f; + f.reg = vmap(io, c->pcidev->mem[1].size); + if(f.reg == nil) + return -1; + f.reg32 = (void*)f.reg; + f.sz = f.reg32[Bfpr]; + r = f.sz & 0x1fff; + if(csr32r(c, Eec) & (1<<22)) + ++r; + r <<= 12; + + sum = 0; + for (adr = 0; adr < 0x40; adr++) { + data = fread(c, &f, r + adr*2); + if(data == -1) + break; + c->eeprom[adr] = data; + sum += data; + } + vunmap(f.reg, c->pcidev->mem[1].size); + return sum; +} + +static int +i82563reset(Ctlr *ctlr) +{ + int i, r; + + if(i82563detach(ctlr)) + return -1; + if(ctlr->type == i82566) + r = fload(ctlr); + else + r = eeload(ctlr); + if (r != 0 && r != 0xBABA){ + print("%s: bad EEPROM checksum - %#.4ux\n", + tname[ctlr->type], r); + return -1; + } + + for(i = 0; i < Eaddrlen/2; i++){ + ctlr->ra[2*i] = ctlr->eeprom[Ea+i]; + ctlr->ra[2*i+1] = ctlr->eeprom[Ea+i] >> 8; + } + r = (csr32r(ctlr, Status) & Lanid) >> 2; + ctlr->ra[5] += r; /* ea ctlr[1] = ea ctlr[0]+1 */ + + r = ctlr->ra[3]<<24 | ctlr->ra[2]<<16 | ctlr->ra[1]<<8 | ctlr->ra[0]; + csr32w(ctlr, Ral, r); + r = 0x80000000 | ctlr->ra[5]<<8 | ctlr->ra[4]; + csr32w(ctlr, Rah, r); + for(i = 1; i < 16; i++){ + csr32w(ctlr, Ral+i*8, 0); + csr32w(ctlr, Rah+i*8, 0); + } + memset(ctlr->mta, 0, sizeof(ctlr->mta)); + for(i = 0; i < 128; i++) + csr32w(ctlr, Mta + i*4, 0); + + /* + * Does autonegotiation affect this manual setting? + * The correct values here should depend on the PBA value + * and maximum frame length, no? + * ctlr->fcrt[lh] arenever set so default to 0. + */ + csr32w(ctlr, Fcal, 0x00C28001); + csr32w(ctlr, Fcah, 0x0100); + csr32w(ctlr, Fct, 0x8808); + csr32w(ctlr, Fcttv, 0x0100); + + csr32w(ctlr, Fcrtl, ctlr->fcrtl); + csr32w(ctlr, Fcrth, ctlr->fcrth); + + return 0; +} + +static void +i82563pci(void) +{ + int type; + ulong io; + void *mem; + Pcidev *p; + Ctlr *ctlr; + + p = nil; + while(p = pcimatch(p, 0x8086, 0)){ + switch(p->did){ + default: + continue; + case 0x1096: + case 0x10ba: + type = i82563; + break; + case 0x1049: /* mm */ + case 0x104a: /* dm */ + case 0x104d: /* v */ + case 0x10bd: /* dm */ + type = i82566; + break; + case 0x10a4: + case 0x105e: + type = i82571; + break; + case 0x10b9: /* sic, 82572 */ + type = i82572; + break; + case 0x108b: /* e */ + case 0x108c: /* e (iamt) */ + case 0x109a: /* l */ + type = i82573; + break; + case 0x10a7: /* 82575eb */ + type = i82575; + break; + case 0x10c9: /* 82576 copper */ + case 0x10e6: /* 82576 fiber */ + case 0x10e7: /* 82576 serdes */ + type = i82576; + break; + } + + io = p->mem[0].bar & ~0x0F; + mem = vmap(io, p->mem[0].size); + if(mem == nil){ + print("%s: can't map %.8lux\n", tname[type], io); + continue; + } + ctlr = malloc(sizeof(Ctlr)); + ctlr->port = io; + ctlr->pcidev = p; + ctlr->type = type; + ctlr->rbsz = rbtab[type]; + ctlr->nic = mem; + + if(i82563reset(ctlr)){ + vunmap(mem, p->mem[0].size); + free(ctlr); + continue; + } + pcisetbme(p); + + if(i82563ctlrhead != nil) + i82563ctlrtail->next = ctlr; + else + i82563ctlrhead = ctlr; + i82563ctlrtail = ctlr; + } +} + +static int +pnp(Ether* edev, int type) +{ + Ctlr *ctlr; + static int done; + + if(!done) { + i82563pci(); + done = 1; + } + + /* + * Any adapter matches if no edev->port is supplied, + * otherwise the ports must match. + */ + for(ctlr = i82563ctlrhead; ctlr != nil; ctlr = ctlr->next){ + if(ctlr->active) + continue; + if(type != Iany && ctlr->type != type) + continue; + if(edev->port == 0 || edev->port == ctlr->port){ + ctlr->active = 1; + break; + } + } + if(ctlr == nil) + return -1; + + edev->ctlr = ctlr; + edev->port = ctlr->port; + edev->irq = ctlr->pcidev->intl; + edev->tbdf = ctlr->pcidev->tbdf; + edev->mbps = 1000; + edev->maxmtu = ctlr->rbsz; + memmove(edev->ea, ctlr->ra, Eaddrlen); + + /* + * Linkage to the generic ethernet driver. + */ + edev->attach = i82563attach; + edev->transmit = i82563transmit; + edev->interrupt = i82563interrupt; + edev->ifstat = i82563ifstat; + edev->ctl = i82563ctl; + + edev->arg = edev; + edev->promiscuous = i82563promiscuous; + edev->shutdown = i82563shutdown; + edev->multicast = i82563multicast; + + return 0; +} + +static int +anypnp(Ether *e) +{ + return pnp(e, Iany); +} + +static int +i82563pnp(Ether *e) +{ + return pnp(e, i82563); +} + +static int +i82566pnp(Ether *e) +{ + return pnp(e, i82566); +} + +static int +i82571pnp(Ether *e) +{ + return pnp(e, i82571); +} + +static int +i82572pnp(Ether *e) +{ + return pnp(e, i82572); +} + +static int +i82573pnp(Ether *e) +{ + return pnp(e, i82573); +} + +static int +i82575pnp(Ether *e) +{ + return pnp(e, i82575); +} + +static int +i82576pnp(Ether *e) +{ + return pnp(e, i82576); +} + +void +ether82563link(void) +{ + /* recognise lots of model numbers for debugging assistance */ + addethercard("i82563", i82563pnp); + addethercard("i82566", i82566pnp); + addethercard("i82571", i82571pnp); + addethercard("i82572", i82572pnp); + addethercard("i82573", i82573pnp); + addethercard("i82575", i82575pnp); + addethercard("i82576", i82576pnp); + addethercard("igbepcie", anypnp); +} diff -Nru 0/sys/src/nix/386/etherigbe.c 4/sys/src/nix/386/etherigbe.c --- 0/sys/src/nix/386/etherigbe.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/386/etherigbe.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,2067 @@ +/* + * Intel 8254[340]NN Gigabit Ethernet PCI Controllers + * as found on the Intel PRO/1000 series of adapters: + * 82543GC Intel PRO/1000 T + * 82544EI Intel PRO/1000 XT + * 82540EM Intel PRO/1000 MT + * 82541[GP]I + * 82547GI + * 82546GB + * 82546EB + * To Do: + * finish autonegotiation code; + * integrate fiber stuff back in (this ONLY handles + * the CAT5 cards at the moment); + * add checksum-offload; + * add tuning control via ctl file; + * this driver is little-endian specific. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/ethermii.h" +#include "../port/netif.h" + +#include "etherif.h" +#include "io.h" + +enum { + i82542 = (0x1000<<16)|0x8086, + i82543gc = (0x1004<<16)|0x8086, + i82544ei = (0x1008<<16)|0x8086, + i82544eif = (0x1009<<16)|0x8086, + i82544gc = (0x100d<<16)|0x8086, + i82540em = (0x100E<<16)|0x8086, + i82540eplp = (0x101E<<16)|0x8086, + i82545em = (0x100F<<16)|0x8086, + i82545gmc = (0x1026<<16)|0x8086, + i82547ei = (0x1019<<16)|0x8086, + i82547gi = (0x1075<<16)|0x8086, + i82541ei = (0x1013<<16)|0x8086, + i82541gi = (0x1076<<16)|0x8086, + i82541gi2 = (0x1077<<16)|0x8086, + i82541pi = (0x107c<<16)|0x8086, + i82546gb = (0x1079<<16)|0x8086, + i82546eb = (0x1010<<16)|0x8086, +}; + +enum { + Ctrl = 0x00000000, /* Device Control */ + Ctrldup = 0x00000004, /* Device Control Duplicate */ + Status = 0x00000008, /* Device Status */ + Eecd = 0x00000010, /* EEPROM/Flash Control/Data */ + Ctrlext = 0x00000018, /* Extended Device Control */ + Mdic = 0x00000020, /* MDI Control */ + Fcal = 0x00000028, /* Flow Control Address Low */ + Fcah = 0x0000002C, /* Flow Control Address High */ + Fct = 0x00000030, /* Flow Control Type */ + Icr = 0x000000C0, /* Interrupt Cause Read */ + Ics = 0x000000C8, /* Interrupt Cause Set */ + Ims = 0x000000D0, /* Interrupt Mask Set/Read */ + Imc = 0x000000D8, /* Interrupt mask Clear */ + Rctl = 0x00000100, /* Receive Control */ + Fcttv = 0x00000170, /* Flow Control Transmit Timer Value */ + Txcw = 0x00000178, /* Transmit Configuration Word */ + Rxcw = 0x00000180, /* Receive Configuration Word */ + /* on the oldest cards (8254[23]), the Mta register is at 0x200 */ + Tctl = 0x00000400, /* Transmit Control */ + Tipg = 0x00000410, /* Transmit IPG */ + Tbt = 0x00000448, /* Transmit Burst Timer */ + Ait = 0x00000458, /* Adaptive IFS Throttle */ + Fcrtl = 0x00002160, /* Flow Control RX Threshold Low */ + Fcrth = 0x00002168, /* Flow Control Rx Threshold High */ + Rdfh = 0x00002410, /* Receive data fifo head */ + Rdft = 0x00002418, /* Receive data fifo tail */ + Rdfhs = 0x00002420, /* Receive data fifo head saved */ + Rdfts = 0x00002428, /* Receive data fifo tail saved */ + Rdfpc = 0x00002430, /* Receive data fifo packet count */ + Rdbal = 0x00002800, /* Rd Base Address Low */ + Rdbah = 0x00002804, /* Rd Base Address High */ + Rdlen = 0x00002808, /* Receive Descriptor Length */ + Rdh = 0x00002810, /* Receive Descriptor Head */ + Rdt = 0x00002818, /* Receive Descriptor Tail */ + Rdtr = 0x00002820, /* Receive Descriptor Timer Ring */ + Rxdctl = 0x00002828, /* Receive Descriptor Control */ + Radv = 0x0000282C, /* Receive Interrupt Absolute Delay Timer */ + Txdmac = 0x00003000, /* Transfer DMA Control */ + Ett = 0x00003008, /* Early Transmit Control */ + Tdfh = 0x00003410, /* Transmit data fifo head */ + Tdft = 0x00003418, /* Transmit data fifo tail */ + Tdfhs = 0x00003420, /* Transmit data Fifo Head saved */ + Tdfts = 0x00003428, /* Transmit data fifo tail saved */ + Tdfpc = 0x00003430, /* Trasnmit data Fifo packet count */ + Tdbal = 0x00003800, /* Td Base Address Low */ + Tdbah = 0x00003804, /* Td Base Address High */ + Tdlen = 0x00003808, /* Transmit Descriptor Length */ + Tdh = 0x00003810, /* Transmit Descriptor Head */ + Tdt = 0x00003818, /* Transmit Descriptor Tail */ + Tidv = 0x00003820, /* Transmit Interrupt Delay Value */ + Txdctl = 0x00003828, /* Transmit Descriptor Control */ + Tadv = 0x0000382C, /* Transmit Interrupt Absolute Delay Timer */ + + Statistics = 0x00004000, /* Start of Statistics Area */ + Gorcl = 0x88/4, /* Good Octets Received Count */ + Gotcl = 0x90/4, /* Good Octets Transmitted Count */ + Torl = 0xC0/4, /* Total Octets Received */ + Totl = 0xC8/4, /* Total Octets Transmitted */ + Nstatistics = 64, + + Rxcsum = 0x00005000, /* Receive Checksum Control */ + Mta = 0x00005200, /* Multicast Table Array */ + Ral = 0x00005400, /* Receive Address Low */ + Rah = 0x00005404, /* Receive Address High */ + Manc = 0x00005820, /* Management Control */ +}; + +enum { /* Ctrl */ + Bem = 0x00000002, /* Big Endian Mode */ + Prior = 0x00000004, /* Priority on the PCI bus */ + Lrst = 0x00000008, /* Link Reset */ + Asde = 0x00000020, /* Auto-Speed Detection Enable */ + Slu = 0x00000040, /* Set Link Up */ + Ilos = 0x00000080, /* Invert Loss of Signal (LOS) */ + SspeedMASK = 0x00000300, /* Speed Selection */ + SspeedSHIFT = 8, + Sspeed10 = 0x00000000, /* 10Mb/s */ + Sspeed100 = 0x00000100, /* 100Mb/s */ + Sspeed1000 = 0x00000200, /* 1000Mb/s */ + Frcspd = 0x00000800, /* Force Speed */ + Frcdplx = 0x00001000, /* Force Duplex */ + SwdpinsloMASK = 0x003C0000, /* Software Defined Pins - lo nibble */ + SwdpinsloSHIFT = 18, + SwdpioloMASK = 0x03C00000, /* Software Defined Pins - I or O */ + SwdpioloSHIFT = 22, + Devrst = 0x04000000, /* Device Reset */ + Rfce = 0x08000000, /* Receive Flow Control Enable */ + Tfce = 0x10000000, /* Transmit Flow Control Enable */ + Vme = 0x40000000, /* VLAN Mode Enable */ +}; + +/* + * can't find Tckok nor Rbcok in any Intel docs, + * but even 82543gc docs define Lanid. + */ +enum { /* Status */ + Lu = 0x00000002, /* Link Up */ + Lanid = 0x0000000C, /* mask for Lan ID. (function id) */ +// Tckok = 0x00000004, /* Transmit clock is running */ +// Rbcok = 0x00000008, /* Receive clock is running */ + Txoff = 0x00000010, /* Transmission Paused */ + Tbimode = 0x00000020, /* TBI Mode Indication */ + LspeedMASK = 0x000000C0, /* Link Speed Setting */ + LspeedSHIFT = 6, + Lspeed10 = 0x00000000, /* 10Mb/s */ + Lspeed100 = 0x00000040, /* 100Mb/s */ + Lspeed1000 = 0x00000080, /* 1000Mb/s */ + Mtxckok = 0x00000400, /* MTX clock is running */ + Pci66 = 0x00000800, /* PCI Bus speed indication */ + Bus64 = 0x00001000, /* PCI Bus width indication */ + Pcixmode = 0x00002000, /* PCI-X mode */ + PcixspeedMASK = 0x0000C000, /* PCI-X bus speed */ + PcixspeedSHIFT = 14, + Pcix66 = 0x00000000, /* 50-66MHz */ + Pcix100 = 0x00004000, /* 66-100MHz */ + Pcix133 = 0x00008000, /* 100-133MHz */ +}; + +enum { /* Ctrl and Status */ + Fd = 0x00000001, /* Full-Duplex */ + AsdvMASK = 0x00000300, + AsdvSHIFT = 8, + Asdv10 = 0x00000000, /* 10Mb/s */ + Asdv100 = 0x00000100, /* 100Mb/s */ + Asdv1000 = 0x00000200, /* 1000Mb/s */ +}; + +enum { /* Eecd */ + Sk = 0x00000001, /* Clock input to the EEPROM */ + Cs = 0x00000002, /* Chip Select */ + Di = 0x00000004, /* Data Input to the EEPROM */ + Do = 0x00000008, /* Data Output from the EEPROM */ + Areq = 0x00000040, /* EEPROM Access Request */ + Agnt = 0x00000080, /* EEPROM Access Grant */ + Eepresent = 0x00000100, /* EEPROM Present */ + Eesz256 = 0x00000200, /* EEPROM is 256 words not 64 */ + Eeszaddr = 0x00000400, /* EEPROM size for 8254[17] */ + Spi = 0x00002000, /* EEPROM is SPI not Microwire */ +}; + +enum { /* Ctrlext */ + Gpien = 0x0000000F, /* General Purpose Interrupt Enables */ + SwdpinshiMASK = 0x000000F0, /* Software Defined Pins - hi nibble */ + SwdpinshiSHIFT = 4, + SwdpiohiMASK = 0x00000F00, /* Software Defined Pins - I or O */ + SwdpiohiSHIFT = 8, + Asdchk = 0x00001000, /* ASD Check */ + Eerst = 0x00002000, /* EEPROM Reset */ + Ips = 0x00004000, /* Invert Power State */ + Spdbyps = 0x00008000, /* Speed Select Bypass */ +}; + +enum { /* EEPROM content offsets */ + Ea = 0x00, /* Ethernet Address */ + Cf = 0x03, /* Compatibility Field */ + Pba = 0x08, /* Printed Board Assembly number */ + Icw1 = 0x0A, /* Initialization Control Word 1 */ + Sid = 0x0B, /* Subsystem ID */ + Svid = 0x0C, /* Subsystem Vendor ID */ + Did = 0x0D, /* Device ID */ + Vid = 0x0E, /* Vendor ID */ + Icw2 = 0x0F, /* Initialization Control Word 2 */ +}; + +enum { /* Mdic */ + MDIdMASK = 0x0000FFFF, /* Data */ + MDIdSHIFT = 0, + MDIrMASK = 0x001F0000, /* PHY Register Address */ + MDIrSHIFT = 16, + MDIpMASK = 0x03E00000, /* PHY Address */ + MDIpSHIFT = 21, + MDIwop = 0x04000000, /* Write Operation */ + MDIrop = 0x08000000, /* Read Operation */ + MDIready = 0x10000000, /* End of Transaction */ + MDIie = 0x20000000, /* Interrupt Enable */ + MDIe = 0x40000000, /* Error */ +}; + +enum { /* Icr, Ics, Ims, Imc */ + Txdw = 0x00000001, /* Transmit Descriptor Written Back */ + Txqe = 0x00000002, /* Transmit Queue Empty */ + Lsc = 0x00000004, /* Link Status Change */ + Rxseq = 0x00000008, /* Receive Sequence Error */ + Rxdmt0 = 0x00000010, /* Rd Minimum Threshold Reached */ + Rxo = 0x00000040, /* Receiver Overrun */ + Rxt0 = 0x00000080, /* Receiver Timer Interrupt */ + Mdac = 0x00000200, /* MDIO Access Completed */ + Rxcfg = 0x00000400, /* Receiving /C/ ordered sets */ + Gpi0 = 0x00000800, /* General Purpose Interrupts */ + Gpi1 = 0x00001000, + Gpi2 = 0x00002000, + Gpi3 = 0x00004000, +}; + +/* + * The Mdic register isn't implemented on the 82543GC, + * the software defined pins are used instead. + * These definitions work for the Intel PRO/1000 T Server Adapter. + * The direction pin bits are read from the EEPROM. + */ +enum { + Mdd = ((1<<2)<nic+((r)/4))) +#define csr32w(c, r, v) (*((c)->nic+((r)/4)) = (v)) + +static Ctlr* igbectlrhead; +static Ctlr* igbectlrtail; + +static Lock igberblock; /* free receive Blocks */ +static Block* igberbpool; /* receive Blocks for all igbe controllers */ + +static char* statistics[Nstatistics] = { + "CRC Error", + "Alignment Error", + "Symbol Error", + "RX Error", + "Missed Packets", + "Single Collision", + "Excessive Collisions", + "Multiple Collision", + "Late Collisions", + nil, + "Collision", + "Transmit Underrun", + "Defer", + "Transmit - No CRS", + "Sequence Error", + "Carrier Extension Error", + "Receive Error Length", + nil, + "XON Received", + "XON Transmitted", + "XOFF Received", + "XOFF Transmitted", + "FC Received Unsupported", + "Packets Received (64 Bytes)", + "Packets Received (65-127 Bytes)", + "Packets Received (128-255 Bytes)", + "Packets Received (256-511 Bytes)", + "Packets Received (512-1023 Bytes)", + "Packets Received (1024-1522 Bytes)", + "Good Packets Received", + "Broadcast Packets Received", + "Multicast Packets Received", + "Good Packets Transmitted", + nil, + "Good Octets Received", + nil, + "Good Octets Transmitted", + nil, + nil, + nil, + "Receive No Buffers", + "Receive Undersize", + "Receive Fragment", + "Receive Oversize", + "Receive Jabber", + nil, + nil, + nil, + "Total Octets Received", + nil, + "Total Octets Transmitted", + nil, + "Total Packets Received", + "Total Packets Transmitted", + "Packets Transmitted (64 Bytes)", + "Packets Transmitted (65-127 Bytes)", + "Packets Transmitted (128-255 Bytes)", + "Packets Transmitted (256-511 Bytes)", + "Packets Transmitted (512-1023 Bytes)", + "Packets Transmitted (1024-1522 Bytes)", + "Multicast Packets Transmitted", + "Broadcast Packets Transmitted", + "TCP Segmentation Context Transmitted", + "TCP Segmentation Context Fail", +}; + +static long +igbeifstat(Ether* edev, void* a, long n, ulong offset) +{ + Ctlr *ctlr; + char *p, *s; + int i, l, r; + uvlong tuvl, ruvl; + + ctlr = edev->ctlr; + qlock(&ctlr->slock); + p = malloc(READSTR); + if(p == nil) { + qunlock(&ctlr->slock); + error(Enomem); + } + l = 0; + for(i = 0; i < Nstatistics; i++){ + r = csr32r(ctlr, Statistics+i*4); + if((s = statistics[i]) == nil) + continue; + switch(i){ + case Gorcl: + case Gotcl: + case Torl: + case Totl: + ruvl = r; + ruvl += ((uvlong)csr32r(ctlr, Statistics+(i+1)*4))<<32; + tuvl = ruvl; + tuvl += ctlr->statistics[i]; + tuvl += ((uvlong)ctlr->statistics[i+1])<<32; + if(tuvl == 0) + continue; + ctlr->statistics[i] = tuvl; + ctlr->statistics[i+1] = tuvl>>32; + l += snprint(p+l, READSTR-l, "%s: %llud %llud\n", + s, tuvl, ruvl); + i++; + break; + + default: + ctlr->statistics[i] += r; + if(ctlr->statistics[i] == 0) + continue; + l += snprint(p+l, READSTR-l, "%s: %ud %ud\n", + s, ctlr->statistics[i], r); + break; + } + } + + l += snprint(p+l, READSTR-l, "lintr: %ud %ud\n", + ctlr->lintr, ctlr->lsleep); + l += snprint(p+l, READSTR-l, "rintr: %ud %ud\n", + ctlr->rintr, ctlr->rsleep); + l += snprint(p+l, READSTR-l, "tintr: %ud %ud\n", + ctlr->tintr, ctlr->txdw); + l += snprint(p+l, READSTR-l, "ixcs: %ud %ud %ud\n", + ctlr->ixsm, ctlr->ipcs, ctlr->tcpcs); + l += snprint(p+l, READSTR-l, "rdtr: %ud\n", ctlr->rdtr); + l += snprint(p+l, READSTR-l, "Ctrlext: %08x\n", csr32r(ctlr, Ctrlext)); + + l += snprint(p+l, READSTR-l, "eeprom:"); + for(i = 0; i < 0x40; i++){ + if(i && ((i & 0x07) == 0)) + l += snprint(p+l, READSTR-l, "\n "); + l += snprint(p+l, READSTR-l, " %4.4uX", ctlr->eeprom[i]); + } + l += snprint(p+l, READSTR-l, "\n"); + + if(ctlr->mii != nil && ctlr->mii->curphy != nil){ + l += snprint(p+l, READSTR-l, "phy: "); + for(i = 0; i < NMiiPhyr; i++){ + if(i && ((i & 0x07) == 0)) + l += snprint(p+l, READSTR-l, "\n "); + r = miimir(ctlr->mii, i); + l += snprint(p+l, READSTR-l, " %4.4uX", r); + } + snprint(p+l, READSTR-l, "\n"); + } + n = readstr(offset, a, n, p); + free(p); + qunlock(&ctlr->slock); + + return n; +} + +enum { + CMrdtr, +}; + +static Cmdtab igbectlmsg[] = { + CMrdtr, "rdtr", 2, +}; + +static long +igbectl(Ether* edev, void* buf, long n) +{ + int v; + char *p; + Ctlr *ctlr; + Cmdbuf *cb; + Cmdtab *ct; + + if((ctlr = edev->ctlr) == nil) + error(Enonexist); + + cb = parsecmd(buf, n); + if(waserror()){ + free(cb); + nexterror(); + } + + ct = lookupcmd(cb, igbectlmsg, nelem(igbectlmsg)); + switch(ct->index){ + case CMrdtr: + v = strtol(cb->f[1], &p, 0); + if(v < 0 || p == cb->f[1] || v > 0xFFFF) + error(Ebadarg); + ctlr->rdtr = v; + csr32w(ctlr, Rdtr, Fpd|v); + break; + } + free(cb); + poperror(); + + return n; +} + +static void +igbepromiscuous(void* arg, int on) +{ + int rctl; + Ctlr *ctlr; + Ether *edev; + + edev = arg; + ctlr = edev->ctlr; + + rctl = csr32r(ctlr, Rctl); + rctl &= ~MoMASK; + rctl |= Mo47b36; + if(on) + rctl |= Upe|Mpe; + else + rctl &= ~(Upe|Mpe); + csr32w(ctlr, Rctl, rctl|Mpe); /* temporarily keep Mpe on */ +} + +static void +igbemulticast(void* arg, uchar* addr, int add) +{ + int bit, x; + Ctlr *ctlr; + Ether *edev; + + edev = arg; + ctlr = edev->ctlr; + + x = addr[5]>>1; + bit = ((addr[5] & 1)<<4)|(addr[4]>>4); + /* + * multiple ether addresses can hash to the same filter bit, + * so it's never safe to clear a filter bit. + * if we want to clear filter bits, we need to keep track of + * all the multicast addresses in use, clear all the filter bits, + * then set the ones corresponding to in-use addresses. + */ + if(add) + ctlr->mta[x] |= 1<mta[x] &= ~(1<mta[x]); +} + +static Block* +igberballoc(void) +{ + Block *bp; + + ilock(&igberblock); + if((bp = igberbpool) != nil){ + igberbpool = bp->next; + bp->next = nil; + /* _xinc(&bp->ref); prevent bp from being freed */ + } + iunlock(&igberblock); + + return bp; +} + +static void +igberbfree(Block* bp) +{ + bp->rp = bp->lim - Rbsz; + bp->wp = bp->rp; + bp->flag &= ~(Bipck | Budpck | Btcpck | Bpktck); + + ilock(&igberblock); + bp->next = igberbpool; + igberbpool = bp; + iunlock(&igberblock); +} + +static void +igbeim(Ctlr* ctlr, int im) +{ + ilock(&ctlr->imlock); + ctlr->im |= im; + csr32w(ctlr, Ims, ctlr->im); + iunlock(&ctlr->imlock); +} + +static int +igbelim(void* ctlr) +{ + return ((Ctlr*)ctlr)->lim != 0; +} + +static void +igbelproc(void* arg) +{ + Ctlr *ctlr; + Ether *edev; + MiiPhy *phy; + int ctrl, r; + + edev = arg; + ctlr = edev->ctlr; + for(;;){ + if(ctlr->mii == nil || ctlr->mii->curphy == nil) + continue; + + /* + * To do: + * logic to manage status change, + * this is incomplete but should work + * one time to set up the hardware. + * + * MiiPhy.speed, etc. should be in Mii. + */ + if(miistatus(ctlr->mii) < 0) + //continue; + goto enable; + + phy = ctlr->mii->curphy; + ctrl = csr32r(ctlr, Ctrl); + + switch(ctlr->id){ + case i82543gc: + case i82544ei: + case i82544eif: + default: + if(!(ctrl & Asde)){ + ctrl &= ~(SspeedMASK|Ilos|Fd); + ctrl |= Frcdplx|Frcspd; + if(phy->speed == 1000) + ctrl |= Sspeed1000; + else if(phy->speed == 100) + ctrl |= Sspeed100; + if(phy->fd) + ctrl |= Fd; + } + break; + + case i82540em: + case i82540eplp: + case i82547gi: + case i82541gi: + case i82541gi2: + case i82541pi: + break; + } + + /* + * Collision Distance. + */ + r = csr32r(ctlr, Tctl); + r &= ~ColdMASK; + if(phy->fd) + r |= 64<rfc) + ctrl |= Rfce; + if(phy->tfc) + ctrl |= Tfce; + csr32w(ctlr, Ctrl, ctrl); + +enable: + ctlr->lim = 0; + igbeim(ctlr, Lsc); + + ctlr->lsleep++; + sleep(&ctlr->lrendez, igbelim, ctlr); + } +} + +static void +igbetxinit(Ctlr* ctlr) +{ + int i, r; + Block *bp; + + csr32w(ctlr, Tctl, (0x0F<id){ + default: + r = 6; + break; + case i82543gc: + case i82544ei: + case i82544eif: + case i82544gc: + case i82540em: + case i82540eplp: + case i82541ei: + case i82541gi: + case i82541gi2: + case i82541pi: + case i82545em: + case i82545gmc: + case i82546gb: + case i82546eb: + case i82547ei: + case i82547gi: + r = 8; + break; + } + csr32w(ctlr, Tipg, (6<<20)|(8<<10)|r); + csr32w(ctlr, Ait, 0); + csr32w(ctlr, Txdmac, 0); + + csr32w(ctlr, Tdbal, PCIWADDR(ctlr->tdba)); + csr32w(ctlr, Tdbah, 0); + csr32w(ctlr, Tdlen, ctlr->ntd*sizeof(Td)); + ctlr->tdh = PREV(0, ctlr->ntd); + csr32w(ctlr, Tdh, 0); + ctlr->tdt = 0; + csr32w(ctlr, Tdt, 0); + + for(i = 0; i < ctlr->ntd; i++){ + if((bp = ctlr->tb[i]) != nil){ + ctlr->tb[i] = nil; + freeb(bp); + } + memset(&ctlr->tdba[i], 0, sizeof(Td)); + } + ctlr->tdfree = ctlr->ntd; + + csr32w(ctlr, Tidv, 128); + r = (4<id){ + default: + break; + case i82540em: + case i82540eplp: + case i82547gi: + case i82545em: + case i82545gmc: + case i82546gb: + case i82546eb: + case i82541gi: + case i82541gi2: + case i82541pi: + r = csr32r(ctlr, Txdctl); + r &= ~WthreshMASK; + r |= Gran|(4<ctlr; + + ilock(&ctlr->tlock); + + /* + * Free any completed packets + */ + tdh = ctlr->tdh; + while(NEXT(tdh, ctlr->ntd) != csr32r(ctlr, Tdh)){ + if((bp = ctlr->tb[tdh]) != nil){ + ctlr->tb[tdh] = nil; + freeb(bp); + } + memset(&ctlr->tdba[tdh], 0, sizeof(Td)); + tdh = NEXT(tdh, ctlr->ntd); + } + ctlr->tdh = tdh; + + /* + * Try to fill the ring back up. + */ + tdt = ctlr->tdt; + while(NEXT(tdt, ctlr->ntd) != tdh){ + if((bp = qget(edev->oq)) == nil) + break; + td = &ctlr->tdba[tdt]; + td->addr[0] = PCIWADDR(bp->rp); + td->control = ((BLEN(bp) & LenMASK)<control |= Dext|Ifcs|Teop|DtypeDD; + ctlr->tb[tdt] = bp; + tdt = NEXT(tdt, ctlr->ntd); + if(NEXT(tdt, ctlr->ntd) == tdh){ + td->control |= Rs; + ctlr->txdw++; + ctlr->tdt = tdt; + csr32w(ctlr, Tdt, tdt); + igbeim(ctlr, Txdw); + break; + } + ctlr->tdt = tdt; + csr32w(ctlr, Tdt, tdt); + } + + iunlock(&ctlr->tlock); +} + +static void +igbereplenish(Ctlr* ctlr) +{ + Rd *rd; + int rdt; + Block *bp; + + rdt = ctlr->rdt; + while(NEXT(rdt, ctlr->nrd) != ctlr->rdh){ + rd = &ctlr->rdba[rdt]; + if(ctlr->rb[rdt] == nil){ + bp = igberballoc(); + if(bp == nil){ + iprint("#l%d: igbereplenish: no available buffers\n", + ctlr->edev->ctlrno); + break; + } + ctlr->rb[rdt] = bp; + rd->addr[0] = PCIWADDR(bp->rp); + rd->addr[1] = 0; + } + coherence(); + rd->status = 0; + rdt = NEXT(rdt, ctlr->nrd); + ctlr->rdfree++; + } + ctlr->rdt = rdt; + csr32w(ctlr, Rdt, rdt); +} + +static void +igberxinit(Ctlr* ctlr) +{ + int i; + Block *bp; + + /* temporarily keep Mpe on */ + csr32w(ctlr, Rctl, Dpf|Bsize2048|Bam|RdtmsHALF|Mpe); + + csr32w(ctlr, Rdbal, PCIWADDR(ctlr->rdba)); + csr32w(ctlr, Rdbah, 0); + csr32w(ctlr, Rdlen, ctlr->nrd*sizeof(Rd)); + ctlr->rdh = 0; + csr32w(ctlr, Rdh, 0); + ctlr->rdt = 0; + csr32w(ctlr, Rdt, 0); + ctlr->rdtr = 0; + csr32w(ctlr, Rdtr, Fpd|0); + + for(i = 0; i < ctlr->nrd; i++){ + if((bp = ctlr->rb[i]) != nil){ + ctlr->rb[i] = nil; + freeb(bp); + } + } + igbereplenish(ctlr); + + switch(ctlr->id){ + case i82540em: + case i82540eplp: + case i82541gi: + case i82541gi2: + case i82541pi: + case i82545em: + case i82545gmc: + case i82546gb: + case i82546eb: + case i82547gi: + csr32w(ctlr, Radv, 64); + break; + } + csr32w(ctlr, Rxdctl, (8<rim != 0; +} + +static void +igberproc(void* arg) +{ + Rd *rd; + Block *bp; + Ctlr *ctlr; + int r, rdh; + Ether *edev; + + edev = arg; + ctlr = edev->ctlr; + + igberxinit(ctlr); + r = csr32r(ctlr, Rctl); + r |= Ren; + csr32w(ctlr, Rctl, r); + + for(;;){ + ctlr->rim = 0; + igbeim(ctlr, Rxt0|Rxo|Rxdmt0|Rxseq); + ctlr->rsleep++; + sleep(&ctlr->rrendez, igberim, ctlr); + + rdh = ctlr->rdh; + for(;;){ + rd = &ctlr->rdba[rdh]; + + if(!(rd->status & Rdd)) + break; + + /* + * Accept eop packets with no errors. + * With no errors and the Ixsm bit set, + * the descriptor status Tpcs and Ipcs bits give + * an indication of whether the checksums were + * calculated and valid. + */ + if((rd->status & Reop) && rd->errors == 0){ + bp = ctlr->rb[rdh]; + ctlr->rb[rdh] = nil; + bp->wp += rd->length; + bp->next = nil; + if(!(rd->status & Ixsm)){ + ctlr->ixsm++; + if(rd->status & Ipcs){ + /* + * IP checksum calculated + * (and valid as errors == 0). + */ + ctlr->ipcs++; + bp->flag |= Bipck; + } + if(rd->status & Tcpcs){ + /* + * TCP/UDP checksum calculated + * (and valid as errors == 0). + */ + ctlr->tcpcs++; + bp->flag |= Btcpck|Budpck; + } + bp->checksum = rd->checksum; + bp->flag |= Bpktck; + } + etheriq(edev, bp, 1); + } + else if(ctlr->rb[rdh] != nil){ + freeb(ctlr->rb[rdh]); + ctlr->rb[rdh] = nil; + } + + memset(rd, 0, sizeof(Rd)); + coherence(); + ctlr->rdfree--; + rdh = NEXT(rdh, ctlr->nrd); + } + ctlr->rdh = rdh; + + if(ctlr->rdfree < ctlr->nrd/2 || (ctlr->rim & Rxdmt0)) + igbereplenish(ctlr); + } +} + +static void +igbeattach(Ether* edev) +{ + Block *bp; + Ctlr *ctlr; + char name[KNAMELEN]; + + ctlr = edev->ctlr; + ctlr->edev = edev; /* point back to Ether* */ + qlock(&ctlr->alock); + if(ctlr->alloc != nil){ /* already allocated? */ + qunlock(&ctlr->alock); + return; + } + + ctlr->tb = nil; + ctlr->rb = nil; + ctlr->alloc = nil; + ctlr->nrb = 0; + if(waserror()){ + while(ctlr->nrb > 0){ + bp = igberballoc(); + bp->free = nil; + freeb(bp); + ctlr->nrb--; + } + free(ctlr->tb); + ctlr->tb = nil; + free(ctlr->rb); + ctlr->rb = nil; + free(ctlr->alloc); + ctlr->alloc = nil; + qunlock(&ctlr->alock); + nexterror(); + } + + ctlr->nrd = Nrd; + ctlr->ntd = Ntd; + ctlr->alloc = malloc(ctlr->nrd*sizeof(Rd)+ctlr->ntd*sizeof(Td) + 127); + if(ctlr->alloc == nil) { + print("igbe: can't allocate ctlr->alloc\n"); + error(Enomem); + } + ctlr->rdba = (Rd*)ROUNDUP((uintptr)ctlr->alloc, 128); + ctlr->tdba = (Td*)(ctlr->rdba+ctlr->nrd); + + ctlr->rb = malloc(ctlr->nrd*sizeof(Block*)); + ctlr->tb = malloc(ctlr->ntd*sizeof(Block*)); + if (ctlr->rb == nil || ctlr->tb == nil) { + print("igbe: can't allocate ctlr->rb or ctlr->tb\n"); + error(Enomem); + } + + for(ctlr->nrb = 0; ctlr->nrb < Nrb; ctlr->nrb++){ + if((bp = allocb(Rbsz)) == nil) + break; + bp->free = igberbfree; + freeb(bp); + } + + snprint(name, KNAMELEN, "#l%dlproc", edev->ctlrno); + kproc(name, igbelproc, edev); + + snprint(name, KNAMELEN, "#l%drproc", edev->ctlrno); + kproc(name, igberproc, edev); + + igbetxinit(ctlr); + + qunlock(&ctlr->alock); + poperror(); +} + +static void +igbeinterrupt(Ureg*, void* arg) +{ + Ctlr *ctlr; + Ether *edev; + int icr, im, txdw; + + edev = arg; + ctlr = edev->ctlr; + + ilock(&ctlr->imlock); + csr32w(ctlr, Imc, ~0); + im = ctlr->im; + txdw = 0; + + while((icr = csr32r(ctlr, Icr) & ctlr->im) != 0){ + if(icr & Lsc){ + im &= ~Lsc; + ctlr->lim = icr & Lsc; + wakeup(&ctlr->lrendez); + ctlr->lintr++; + } + if(icr & (Rxt0|Rxo|Rxdmt0|Rxseq)){ + im &= ~(Rxt0|Rxo|Rxdmt0|Rxseq); + ctlr->rim = icr & (Rxt0|Rxo|Rxdmt0|Rxseq); + wakeup(&ctlr->rrendez); + ctlr->rintr++; + } + if(icr & Txdw){ + im &= ~Txdw; + txdw++; + ctlr->tintr++; + } + } + + ctlr->im = im; + csr32w(ctlr, Ims, im); + iunlock(&ctlr->imlock); + + if(txdw) + igbetransmit(edev); +} + +static int +i82543mdior(Ctlr* ctlr, int n) +{ + int ctrl, data, i, r; + + /* + * Read n bits from the Management Data I/O Interface. + */ + ctrl = csr32r(ctlr, Ctrl); + r = (ctrl & ~Mddo)|Mdco; + data = 0; + for(i = n-1; i >= 0; i--){ + if(csr32r(ctlr, Ctrl) & Mdd) + data |= (1<= 0; i--){ + if(bits & (1<ctlr; + + /* + * MII Management Interface Read. + * + * Preamble; + * ST+OP+PHYAD+REGAD; + * TA + 16 data bits. + */ + i82543mdiow(ctlr, 0xFFFFFFFF, 32); + i82543mdiow(ctlr, 0x1800|(pa<<5)|ra, 14); + data = i82543mdior(ctlr, 18); + + if(data & 0x10000) + return -1; + + return data & 0xFFFF; +} + +static int +i82543miimiw(Mii* mii, int pa, int ra, int data) +{ + Ctlr *ctlr; + + ctlr = mii->ctlr; + + /* + * MII Management Interface Write. + * + * Preamble; + * ST+OP+PHYAD+REGAD+TA + 16 data bits; + * Z. + */ + i82543mdiow(ctlr, 0xFFFFFFFF, 32); + data &= 0xFFFF; + data |= (0x05<<(5+5+2+16))|(pa<<(5+2+16))|(ra<<(2+16))|(0x02<<16); + i82543mdiow(ctlr, data, 32); + + return 0; +} + +static int +igbemiimir(Mii* mii, int pa, int ra) +{ + Ctlr *ctlr; + int mdic, timo; + + ctlr = mii->ctlr; + + csr32w(ctlr, Mdic, MDIrop|(pa<ctlr; + + data &= MDIdMASK; + csr32w(ctlr, Mdic, MDIwop|(pa<mii = malloc(sizeof(Mii))) == nil) + return -1; + ctlr->mii->ctlr = ctlr; + + ctrl = csr32r(ctlr, Ctrl); + ctrl |= Slu; + + switch(ctlr->id){ + case i82543gc: + ctrl |= Frcdplx|Frcspd; + csr32w(ctlr, Ctrl, ctrl); + + /* + * The reset pin direction (Mdro) should already + * be set from the EEPROM load. + * If it's not set this configuration is unexpected + * so bail. + */ + r = csr32r(ctlr, Ctrlext); + if(!(r & Mdro)) + return -1; + csr32w(ctlr, Ctrlext, r); + delay(20); + r = csr32r(ctlr, Ctrlext); + r &= ~Mdr; + csr32w(ctlr, Ctrlext, r); + delay(20); + r = csr32r(ctlr, Ctrlext); + r |= Mdr; + csr32w(ctlr, Ctrlext, r); + delay(20); + + rw = i82543miirw; + break; + case i82544ei: + case i82544eif: + case i82544gc: + case i82540em: + case i82540eplp: + case i82547ei: + case i82547gi: + case i82541ei: + case i82541gi: + case i82541gi2: + case i82541pi: + case i82545em: + case i82545gmc: + case i82546gb: + case i82546eb: + ctrl &= ~(Frcdplx|Frcspd); + csr32w(ctlr, Ctrl, ctrl); + rw = i82543miirw; + break; + default: + free(ctlr->mii); + ctlr->mii = nil; + return -1; + } + + if(ctlr->mii = miiattach(ctlr, ~0, rw)){ + free(ctlr->mii); + ctlr->mii = nil; + return -1; + } + // print("oui %X phyno %d\n", phy->oui, phy->phyno); + + /* + * 8254X-specific PHY registers not in 802.3: + * 0x10 PHY specific control + * 0x14 extended PHY specific control + * Set appropriate values then reset the PHY to have + * changes noted. + */ + switch(ctlr->id){ + case i82547gi: + case i82541gi: + case i82541gi2: + case i82541pi: + case i82545em: + case i82545gmc: + case i82546gb: + case i82546eb: + break; + default: + r = miimir(ctlr->mii, 16); + r |= 0x0800; /* assert CRS on Tx */ + r |= 0x0060; /* auto-crossover all speeds */ + r |= 0x0002; /* polarity reversal enabled */ + miimiw(ctlr->mii, 16, r); + + r = miimir(ctlr->mii, 20); + r |= 0x0070; /* +25MHz clock */ + r &= ~0x0F00; + r |= 0x0100; /* 1x downshift */ + miimiw(ctlr->mii, 20, r); + + miireset(ctlr->mii); + p = 0; + if(ctlr->txcw & TxcwPs) + p |= AnaP; + if(ctlr->txcw & TxcwAs) + p |= AnaAP; + miiane(ctlr->mii, ~0, p, ~0); + break; + } + return 0; +} + +static int +at93c46io(Ctlr* ctlr, char* op, int data) +{ + char *lp, *p; + int i, loop, eecd, r; + + eecd = csr32r(ctlr, Eecd); + + r = 0; + loop = -1; + lp = nil; + for(p = op; *p != '\0'; p++){ + switch(*p){ + default: + return -1; + case ' ': + continue; + case ':': /* start of loop */ + loop = strtol(p+1, &lp, 0)-1; + lp--; + if(p == lp) + loop = 7; + p = lp; + continue; + case ';': /* end of loop */ + if(lp == nil) + return -1; + loop--; + if(loop >= 0) + p = lp; + else + lp = nil; + continue; + case 'C': /* assert clock */ + eecd |= Sk; + break; + case 'c': /* deassert clock */ + eecd &= ~Sk; + break; + case 'D': /* next bit in 'data' byte */ + if(loop < 0) + return -1; + if(data & (1<= 0) + r |= (i<= 0) + return -1; + return r; +} + +static int +at93c46r(Ctlr* ctlr) +{ + ushort sum; + char rop[20]; + int addr, areq, bits, data, eecd, i; + + eecd = csr32r(ctlr, Eecd); + if(eecd & Spi){ + print("igbe: SPI EEPROM access not implemented\n"); + return 0; + } + if(eecd & (Eeszaddr|Eesz256)) + bits = 8; + else + bits = 6; + + sum = 0; + + switch(ctlr->id){ + default: + areq = 0; + break; + case i82540em: + case i82540eplp: + case i82541ei: + case i82541gi: + case i82541gi2: + case i82541pi: + case i82545em: + case i82545gmc: + case i82546gb: + case i82546eb: + case i82547ei: + case i82547gi: + areq = 1; + csr32w(ctlr, Eecd, eecd|Areq); + for(i = 0; i < 1000; i++){ + if((eecd = csr32r(ctlr, Eecd)) & Agnt) + break; + microdelay(5); + } + if(!(eecd & Agnt)){ + print("igbe: not granted EEPROM access\n"); + goto release; + } + break; + } + snprint(rop, sizeof(rop), "S :%dDCc;", bits+3); + + for(addr = 0; addr < 0x40; addr++){ + /* + * Read a word at address 'addr' from the Atmel AT93C46 + * 3-Wire Serial EEPROM or compatible. The EEPROM access is + * controlled by 4 bits in Eecd. See the AT93C46 datasheet + * for protocol details. + */ + if(at93c46io(ctlr, rop, (0x06<eeprom[addr] = data; + sum += data; + } + +release: + if(areq) + csr32w(ctlr, Eecd, eecd & ~Areq); + return sum; +} + +static int +igbedetach(Ctlr* ctlr) +{ + int r, timeo; + + /* + * Perform a device reset to get the chip back to the + * power-on state, followed by an EEPROM reset to read + * the defaults for some internal registers. + */ + csr32w(ctlr, Imc, ~0); + csr32w(ctlr, Rctl, 0); + csr32w(ctlr, Tctl, 0); + + delay(10); + + csr32w(ctlr, Ctrl, Devrst); + delay(1); + for(timeo = 0; timeo < 1000; timeo++){ + if(!(csr32r(ctlr, Ctrl) & Devrst)) + break; + delay(1); + } + if(csr32r(ctlr, Ctrl) & Devrst) + return -1; + r = csr32r(ctlr, Ctrlext); + csr32w(ctlr, Ctrlext, r|Eerst); + delay(1); + for(timeo = 0; timeo < 1000; timeo++){ + if(!(csr32r(ctlr, Ctrlext) & Eerst)) + break; + delay(1); + } + if(csr32r(ctlr, Ctrlext) & Eerst) + return -1; + + switch(ctlr->id){ + default: + break; + case i82540em: + case i82540eplp: + case i82541gi: + case i82541gi2: + case i82541pi: + case i82545em: + case i82545gmc: + case i82547gi: + case i82546gb: + case i82546eb: + r = csr32r(ctlr, Manc); + r &= ~Arpen; + csr32w(ctlr, Manc, r); + break; + } + + csr32w(ctlr, Imc, ~0); + delay(1); + for(timeo = 0; timeo < 1000; timeo++){ + if(!csr32r(ctlr, Icr)) + break; + delay(1); + } + if(csr32r(ctlr, Icr)) + return -1; + + return 0; +} + +static void +igbeshutdown(Ether* ether) +{ + igbedetach(ether->ctlr); +} + +static int +igbereset(Ctlr* ctlr) +{ + int ctrl, i, pause, r, swdpio, txcw; + + if(igbedetach(ctlr)) + return -1; + + /* + * Read the EEPROM, validate the checksum + * then get the device back to a power-on state. + */ + if((r = at93c46r(ctlr)) != 0xBABA){ + print("igbe: bad EEPROM checksum - 0x%4.4uX\n", r); + return -1; + } + + /* + * Snarf and set up the receive addresses. + * There are 16 addresses. The first should be the MAC address. + * The others are cleared and not marked valid (MS bit of Rah). + */ + if ((ctlr->id == i82546gb || ctlr->id == i82546eb) && + BUSFNO(ctlr->pcidev->tbdf) == 1) + ctlr->eeprom[Ea+2] += 0x100; /* second interface */ + if(ctlr->id == i82541gi && ctlr->eeprom[Ea] == 0xFFFF) + ctlr->eeprom[Ea] = 0xD000; + for(i = Ea; i < Eaddrlen/2; i++){ + ctlr->ra[2*i] = ctlr->eeprom[i]; + ctlr->ra[2*i+1] = ctlr->eeprom[i]>>8; + } + /* lan id seems to vary on 82543gc; don't use it */ + if (ctlr->id != i82543gc) { + r = (csr32r(ctlr, Status) & Lanid) >> 2; + ctlr->ra[5] += r; /* ea ctlr[1] = ea ctlr[0]+1 */ + } + + r = (ctlr->ra[3]<<24)|(ctlr->ra[2]<<16)|(ctlr->ra[1]<<8)|ctlr->ra[0]; + csr32w(ctlr, Ral, r); + r = 0x80000000|(ctlr->ra[5]<<8)|ctlr->ra[4]; + csr32w(ctlr, Rah, r); + for(i = 1; i < 16; i++){ + csr32w(ctlr, Ral+i*8, 0); + csr32w(ctlr, Rah+i*8, 0); + } + + /* + * Clear the Multicast Table Array. + * It's a 4096 bit vector accessed as 128 32-bit registers. + */ + memset(ctlr->mta, 0, sizeof(ctlr->mta)); + for(i = 0; i < 128; i++) + csr32w(ctlr, Mta+i*4, 0); + + /* + * Just in case the Eerst didn't load the defaults + * (doesn't appear to fully on the 82543GC), do it manually. + */ + if (ctlr->id == i82543gc) { + txcw = csr32r(ctlr, Txcw); + txcw &= ~(TxcwAne|TxcwPauseMASK|TxcwFd); + ctrl = csr32r(ctlr, Ctrl); + ctrl &= ~(SwdpioloMASK|Frcspd|Ilos|Lrst|Fd); + + if(ctlr->eeprom[Icw1] & 0x0400){ + ctrl |= Fd; + txcw |= TxcwFd; + } + if(ctlr->eeprom[Icw1] & 0x0200) + ctrl |= Lrst; + if(ctlr->eeprom[Icw1] & 0x0010) + ctrl |= Ilos; + if(ctlr->eeprom[Icw1] & 0x0800) + ctrl |= Frcspd; + swdpio = (ctlr->eeprom[Icw1] & 0x01E0)>>5; + ctrl |= swdpio<eeprom[Icw2] & 0x00F0)>>4; + if(ctlr->eeprom[Icw1] & 0x1000) + ctrl |= Ips; + ctrl |= swdpio<eeprom[Icw2] & 0x0800) + txcw |= TxcwAne; + pause = (ctlr->eeprom[Icw2] & 0x3000)>>12; + txcw |= pause<fcrtl = 0x00002000; + ctlr->fcrth = 0x00004000; + txcw |= TxcwAs|TxcwPs; + break; + case 0: + ctlr->fcrtl = 0x00002000; + ctlr->fcrth = 0x00004000; + break; + case 2: + ctlr->fcrtl = 0; + ctlr->fcrth = 0; + txcw |= TxcwAs; + break; + } + ctlr->txcw = txcw; + csr32w(ctlr, Txcw, txcw); + } + + + /* + * Flow control - values from the datasheet. + */ + csr32w(ctlr, Fcal, 0x00C28001); + csr32w(ctlr, Fcah, 0x00000100); + csr32w(ctlr, Fct, 0x00008808); + csr32w(ctlr, Fcttv, 0x00000100); + + csr32w(ctlr, Fcrtl, ctlr->fcrtl); + csr32w(ctlr, Fcrth, ctlr->fcrth); + + if(!(csr32r(ctlr, Status) & Tbimode) && igbemii(ctlr) < 0) + return -1; + + return 0; +} + +enum { + CACHELINESZ = 32, +}; + +static void +igbepci(void) +{ + int cls; + Pcidev *p; + Ctlr *ctlr; + void *mem; + + p = nil; + while(p = pcimatch(p, 0, 0)){ + if(p->ccrb != 0x02 || p->ccru != 0) + continue; + + switch((p->did<<16)|p->vid){ + default: + continue; + case i82543gc: + case i82544ei: + case i82544eif: + case i82544gc: + case i82547ei: + case i82547gi: + case i82540em: + case i82540eplp: + case i82541ei: + case i82541gi: + case i82541gi2: + case i82541pi: + case i82545em: + case i82545gmc: + case i82546gb: + case i82546eb: + break; + } + + mem = vmap(p->mem[0].bar & ~0x0F, p->mem[0].size); + if(mem == nil){ + print("igbe: can't map %8.8luX\n", p->mem[0].bar); + continue; + } + cls = pcicfgr8(p, PciCLS); + switch(cls){ + default: + print("igbe: unexpected CLS - %d\n", cls*4); + break; + case 0x00: + case 0xFF: + /* bogus value; use a sane default */ + cls = CACHELINESZ/sizeof(long); + pcicfgw8(p, PciCLS, cls); + continue; + case 0x08: + case 0x10: + break; + } + ctlr = malloc(sizeof(Ctlr)); + if(ctlr == nil) { + vunmap(mem, p->mem[0].size); + error(Enomem); + } + ctlr->port = p->mem[0].bar & ~0x0F; + ctlr->pcidev = p; + ctlr->id = (p->did<<16)|p->vid; + ctlr->cls = cls*4; + ctlr->nic = mem; + + if(igbereset(ctlr)){ + free(ctlr); + vunmap(mem, p->mem[0].size); + continue; + } + pcisetbme(p); + + if(igbectlrhead != nil) + igbectlrtail->next = ctlr; + else + igbectlrhead = ctlr; + igbectlrtail = ctlr; + } +} + +static int +igbepnp(Ether* edev) +{ + Ctlr *ctlr; + + if(igbectlrhead == nil) + igbepci(); + + /* + * Any adapter matches if no edev->port is supplied, + * otherwise the ports must match. + */ + for(ctlr = igbectlrhead; ctlr != nil; ctlr = ctlr->next){ + if(ctlr->active) + continue; + if(edev->port == 0 || edev->port == ctlr->port){ + ctlr->active = 1; + break; + } + } + if(ctlr == nil) + return -1; + + edev->ctlr = ctlr; + edev->port = ctlr->port; + edev->irq = ctlr->pcidev->intl; + edev->tbdf = ctlr->pcidev->tbdf; + edev->mbps = 1000; + memmove(edev->ea, ctlr->ra, Eaddrlen); + + /* + * Linkage to the generic ethernet driver. + */ + edev->attach = igbeattach; + edev->transmit = igbetransmit; + edev->interrupt = igbeinterrupt; + edev->ifstat = igbeifstat; + edev->ctl = igbectl; + + edev->arg = edev; + edev->promiscuous = igbepromiscuous; + edev->shutdown = igbeshutdown; + edev->multicast = igbemulticast; + + return 0; +} + +void +etherigbelink(void) +{ + addethercard("i82543", igbepnp); + addethercard("igbe", igbepnp); +} + diff -Nru 0/sys/src/nix/386/etherm10g.c 4/sys/src/nix/386/etherm10g.c --- 0/sys/src/nix/386/etherm10g.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/386/etherm10g.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1637 @@ +/* + * myricom 10 Gb ethernet driver + * © 2007 erik quanstrom, coraid + * + * the card is big endian. + * we use u64int rather than uintptr to hold addresses so that + * we don't get "warning: stupid shift" on 32-bit architectures. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/netif.h" + +#include "etherif.h" +#include "io.h" + +#ifndef KiB +#define KiB 1024u /* Kibi 0x0000000000000400 */ +#define MiB 1048576u /* Mebi 0x0000000000100000 */ +#endif /* KiB */ + +#define dprint(...) if(debug) print(__VA_ARGS__) +#define pcicapdbg(...) +#define malign(n) mallocalign((n), 4*KiB, 0, 0) + +#include "etherm10g2k.i" +#include "etherm10g4k.i" + +static int debug = 0; +static char Etimeout[] = "timeout"; + +enum { + Epromsz = 256, + Maxslots= 1024, + Align = 4096, + Maxmtu = 9000, + Noconf = 0xffffffff, + + Fwoffset= 1*MiB, + Cmdoff = 0xf80000, /* command port offset */ + Fwsubmt = 0xfc0000, /* firmware submission command port offset */ + Rdmaoff = 0xfc01c0, /* rdma command port offset */ +}; + +enum { + CZero, + Creset, + Cversion, + + CSintrqdma, /* issue these before Cetherup */ + CSbigsz, /* in bytes bigsize = 2^n */ + CSsmallsz, + + CGsendoff, + CGsmallrxoff, + CGbigrxoff, + CGirqackoff, + CGirqdeassoff, + CGsendrgsz, + CGrxrgsz, + + CSintrqsz, /* 2^n */ + Cetherup, /* above parameters + mtu/mac addr must be set first. */ + Cetherdn, + + CSmtu, /* below may be issued live */ + CGcoaloff, /* in µs */ + CSstatsrate, /* in µs */ + CSstatsdma, + + Cpromisc, + Cnopromisc, + CSmac, + + Cenablefc, + Cdisablefc, + + Cdmatest, /* address in d[0-1], d[2]=length */ + + Cenableallmc, + Cdisableallmc, + + CSjoinmc, + CSleavemc, + Cleaveallmc, + + CSstatsdma2, /* adds (unused) multicast stats */ +}; + +typedef union { + uint i[2]; + uchar c[8]; +} Cmd; + +typedef ulong Slot; +typedef struct { + u16int cksum; + u16int len; +} Slotparts; + +enum { + SFsmall = 1, + SFfirst = 2, + SFalign = 4, + SFnotso = 16, +}; + +typedef struct { + u32int high; + u32int low; + u16int hdroff; + u16int len; + uchar pad; + uchar nrdma; + uchar chkoff; + uchar flags; +} Send; + +typedef struct { + QLock; + Send *lanai; /* tx ring (cksum+len in lanai memory) */ + Send *host; /* tx ring (data in our memory) */ + Block **bring; +// uchar *wcfifo; /* what the heck is a w/c fifo? */ + int size; /* of buffers in the z8's memory */ + u32int segsz; + uint n; /* rxslots */ + uint m; /* mask; rxslots must be a power of two */ + uint i; /* number of segments (not frames) queued */ + uint cnt; /* number of segments sent by the card */ + + ulong npkt; + vlong nbytes; +} Tx; + +typedef struct { + Lock; + Block *head; + uint size; /* buffer size of each block */ + uint n; /* n free buffers */ + uint cnt; +} Bpool; + +static Bpool smpool = { .size = 128, }; +static Bpool bgpool = { .size = Maxmtu, }; + +typedef struct { + Bpool *pool; /* free buffers */ + u32int *lanai; /* rx ring; we have no permanent host shadow */ + Block **host; /* called "info" in myricom driver */ +// uchar *wcfifo; /* cmd submission fifo */ + uint m; + uint n; /* rxslots */ + uint i; + uint cnt; /* number of buffers allocated (lifetime) */ + uint allocfail; +} Rx; + +/* dma mapped. unix network byte order. */ +typedef struct { + uchar txcnt[4]; + uchar linkstat[4]; + uchar dlink[4]; + uchar derror[4]; + uchar drunt[4]; + uchar doverrun[4]; + uchar dnosm[4]; + uchar dnobg[4]; + uchar nrdma[4]; + uchar txstopped; + uchar down; + uchar updated; + uchar valid; +} Stats; + +enum { + Detached, + Attached, + Runed, +}; + +typedef struct { + Slot *entry; + u64int busaddr; + uint m; + uint n; + uint i; +} Done; + +typedef struct Ctlr Ctlr; +typedef struct Ctlr { + QLock; + int state; + int kprocs; + u64int port; + Pcidev* pcidev; + Ctlr* next; + int active; + int id; /* do we need this? */ + + uchar ra[Eaddrlen]; + + int ramsz; + uchar *ram; + + u32int *irqack; + u32int *irqdeass; + u32int *coal; + + char eprom[Epromsz]; + ulong serial; /* unit serial number */ + + QLock cmdl; + Cmd *cmd; /* address of command return */ + u64int cprt; /* bus address of command */ + + u64int boot; /* boot address */ + + Done done; + Tx tx; + Rx sm; + Rx bg; + Stats *stats; + u64int statsprt; + + Rendez rxrendez; + Rendez txrendez; + + int msi; + u32int linkstat; + u32int nrdma; +} Ctlr; + +static Ctlr *ctlrs; + +enum { + PciCapPMG = 0x01, /* power management */ + PciCapAGP = 0x02, + PciCapVPD = 0x03, /* vital product data */ + PciCapSID = 0x04, /* slot id */ + PciCapMSI = 0x05, + PciCapCHS = 0x06, /* compact pci hot swap */ + PciCapPCIX = 0x07, + PciCapHTC = 0x08, /* hypertransport irq conf */ + PciCapVND = 0x09, /* vendor specific information */ + PciCapHSW = 0x0C, /* hot swap */ + PciCapPCIe = 0x10, + PciCapMSIX = 0x11, +}; + +enum { + PcieAERC = 1, + PcieVC, + PcieSNC, + PciePBC, +}; + +enum { + AercCCR = 0x18, /* control register */ +}; + +enum { + PcieCTL = 8, + PcieLCR = 12, + PcieMRD = 0x7000, /* maximum read size */ +}; + +static int +pcicap(Pcidev *p, int cap) +{ + int i, c, off; + + pcicapdbg("pcicap: %x:%d\n", p->vid, p->did); + off = 0x34; /* 0x14 for cardbus */ + for(i = 48; i--; ){ + pcicapdbg("\t" "loop %x\n", off); + off = pcicfgr8(p, off); + pcicapdbg("\t" "pcicfgr8 %x\n", off); + if(off < 0x40) + break; + off &= ~3; + c = pcicfgr8(p, off); + pcicapdbg("\t" "pcicfgr8 %x\n", c); + if(c == 0xff) + break; + if(c == cap) + return off; + off++; + } + return 0; +} + +/* + * this function doesn't work because pcicgr32 doesn't have access + * to the pcie extended configuration space. + */ +static int +pciecap(Pcidev *p, int cap) +{ + uint off, i; + + off = 0x100; + while(((i = pcicfgr32(p, off))&0xffff) != cap){ + off = i >> 20; + print("pciecap offset = %ud\n", off); + if(off < 0x100 || off >= 4*KiB - 1) + return 0; + } + print("pciecap found = %ud\n", off); + return off; +} + +static int +setpcie(Pcidev *p) +{ + int off; + + /* set 4k writes */ + off = pcicap(p, PciCapPCIe); + if(off < 64) + return -1; + off += PcieCTL; + pcicfgw16(p, off, (pcicfgr16(p, off) & ~PcieMRD) | 5<<12); + return 0; +} + +static int +whichfw(Pcidev *p) +{ + char *s; + int i, off, lanes, ecrc; + u32int cap; + + /* check the number of configured lanes. */ + off = pcicap(p, PciCapPCIe); + if(off < 64) + return -1; + off += PcieLCR; + cap = pcicfgr16(p, off); + lanes = (cap>>4) & 0x3f; + + /* check AERC register. we need it on. */ + off = pciecap(p, PcieAERC); + print("%d offset\n", off); + cap = 0; + if(off != 0){ + off += AercCCR; + cap = pcicfgr32(p, off); + print("%ud cap\n", cap); + } + ecrc = (cap>>4) & 0xf; + /* if we don't like the aerc, kick it here. */ + + print("m10g %d lanes; ecrc=%d; ", lanes, ecrc); + if(s = getconf("myriforce")){ + i = atoi(s); + if(i != 4*KiB || i != 2*KiB) + i = 2*KiB; + print("fw=%d [forced]\n", i); + return i; + } + if(lanes <= 4){ + print("fw = 4096 [lanes]\n"); + return 4*KiB; + } + if(ecrc & 10){ + print("fw = 4096 [ecrc set]\n"); + return 4*KiB; + } + print("fw = 4096 [default]\n"); + return 4*KiB; +} + +static int +parseeprom(Ctlr *c) +{ + int i, j, k, l, bits; + char *s; + + dprint("m10g eprom:\n"); + s = c->eprom; + bits = 3; + for(i = 0; s[i] && i < Epromsz; i++){ + l = strlen(s+i); + dprint("\t%s\n", s+i); + if(strncmp(s+i, "MAC=", 4) == 0 && l == 4+12+5){ + bits ^= 1; + j = i + 4; + for(k = 0; k < 6; k++) + c->ra[k] = strtoul(s+j+3*k, 0, 16); + }else if(strncmp(s+i, "SN=", 3) == 0){ + bits ^= 2; + c->serial = atoi(s+i+3); + } + i += l; + } + if(bits) + return -1; + return 0; +} + +static u16int +pbit16(u16int i) +{ + u16int j; + uchar *p; + + p = (uchar*)&j; + p[1] = i; + p[0] = i>>8; + return j; +} + +static u16int +gbit16(uchar i[2]) +{ + u16int j; + + j = i[1]; + j |= i[0]<<8; + return j; +} + +static u32int +pbit32(u32int i) +{ + u32int j; + uchar *p; + + p = (uchar*)&j; + p[3] = i; + p[2] = i>>8; + p[1] = i>>16; + p[0] = i>>24; + return j; +} + +static u32int +gbit32(uchar i[4]) +{ + u32int j; + + j = i[3]; + j |= i[2]<<8; + j |= i[1]<<16; + j |= i[0]<<24; + return j; +} + +static void +prepcmd(uint *cmd, int i) +{ + while(i-- > 0) + cmd[i] = pbit32(cmd[i]); +} + +/* + * the command looks like this (int 32bit integers) + * cmd type + * addr (low) + * addr (high) + * pad (used for dma testing) + * response (high) + * response (low) + * 40 byte = 5 int pad. + */ + +u32int +cmd(Ctlr *c, int type, u64int data) +{ + u32int buf[16], i; + Cmd *cmd; + + qlock(&c->cmdl); + cmd = c->cmd; + cmd->i[1] = Noconf; + memset(buf, 0, sizeof buf); + buf[0] = type; + buf[1] = data; + buf[2] = data >> 32; + buf[4] = c->cprt >> 32; + buf[5] = c->cprt; + prepcmd(buf, 6); + coherence(); + memmove(c->ram + Cmdoff, buf, sizeof buf); + + if(waserror()) + nexterror(); + for(i = 0; i < 15; i++){ + if(cmd->i[1] != Noconf){ + poperror(); + i = gbit32(cmd->c); + qunlock(&c->cmdl); + if(cmd->i[1] != 0) + dprint("[%ux]", i); + return i; + } + tsleep(&up->sleep, return0, 0, 1); + } + qunlock(&c->cmdl); + iprint("m10g: cmd timeout [%ux %ux] cmd=%d\n", + cmd->i[0], cmd->i[1], type); + error(Etimeout); + return ~0; /* silence! */ +} + +u32int +maccmd(Ctlr *c, int type, uchar *m) +{ + u32int buf[16], i; + Cmd *cmd; + + qlock(&c->cmdl); + cmd = c->cmd; + cmd->i[1] = Noconf; + memset(buf, 0, sizeof buf); + buf[0] = type; + buf[1] = m[0]<<24 | m[1]<<16 | m[2]<<8 | m[3]; + buf[2] = m[4]<< 8 | m[5]; + buf[4] = c->cprt >> 32; + buf[5] = c->cprt; + prepcmd(buf, 6); + coherence(); + memmove(c->ram + Cmdoff, buf, sizeof buf); + + if(waserror()) + nexterror(); + for(i = 0; i < 15; i++){ + if(cmd->i[1] != Noconf){ + poperror(); + i = gbit32(cmd->c); + qunlock(&c->cmdl); + if(cmd->i[1] != 0) + dprint("[%ux]", i); + return i; + } + tsleep(&up->sleep, return0, 0, 1); + } + qunlock(&c->cmdl); + iprint("m10g: maccmd timeout [%ux %ux] cmd=%d\n", + cmd->i[0], cmd->i[1], type); + error(Etimeout); + return ~0; /* silence! */ +} + +/* remove this garbage after testing */ +enum { + DMAread = 0x10000, + DMAwrite= 0x1, +}; + +u32int +dmatestcmd(Ctlr *c, int type, u64int addr, int len) +{ + u32int buf[16], i; + + memset(buf, 0, sizeof buf); + memset(c->cmd, Noconf, sizeof *c->cmd); + buf[0] = Cdmatest; + buf[1] = addr; + buf[2] = addr >> 32; + buf[3] = len * type; + buf[4] = c->cprt >> 32; + buf[5] = c->cprt; + prepcmd(buf, 6); + coherence(); + memmove(c->ram + Cmdoff, buf, sizeof buf); + + if(waserror()) + nexterror(); + for(i = 0; i < 15; i++){ + if(c->cmd->i[1] != Noconf){ + i = gbit32(c->cmd->c); + if(i == 0) + error(Eio); + poperror(); + return i; + } + tsleep(&up->sleep, return0, 0, 5); + } + error(Etimeout); + return ~0; /* silence! */ +} + +u32int +rdmacmd(Ctlr *c, int on) +{ + u32int buf[16], i; + + memset(buf, 0, sizeof buf); + c->cmd->i[0] = 0; + coherence(); + buf[0] = c->cprt >> 32; + buf[1] = c->cprt; + buf[2] = Noconf; + buf[3] = c->cprt >> 32; + buf[4] = c->cprt; + buf[5] = on; + prepcmd(buf, 6); + memmove(c->ram + Rdmaoff, buf, sizeof buf); + + if(waserror()) + nexterror(); + for(i = 0; i < 20; i++){ + if(c->cmd->i[0] == Noconf){ + poperror(); + return gbit32(c->cmd->c); + } + tsleep(&up->sleep, return0, 0, 1); + } + error(Etimeout); + iprint("m10g: rdmacmd timeout\n"); + return ~0; /* silence! */ +} + +static int +loadfw(Ctlr *c, int *align) +{ + uint *f, *s, sz; + int i; + + if((*align = whichfw(c->pcidev)) == 4*KiB){ + f = (u32int*)fw4k; + sz = sizeof fw4k; + }else{ + f = (u32int*)fw2k; + sz = sizeof fw2k; + } + + s = (u32int*)(c->ram + Fwoffset); + for(i = 0; i < sz / 4; i++) + s[i] = f[i]; + return sz & ~3; +} + +static int +bootfw(Ctlr *c) +{ + int i, sz, align; + uint buf[16]; + Cmd* cmd; + + if((sz = loadfw(c, &align)) == 0) + return 0; + dprint("bootfw %d bytes ... ", sz); + cmd = c->cmd; + + memset(buf, 0, sizeof buf); + c->cmd->i[0] = 0; + coherence(); + buf[0] = c->cprt >> 32; /* upper dma target address */ + buf[1] = c->cprt; /* lower */ + buf[2] = Noconf; /* writeback */ + buf[3] = Fwoffset + 8, + buf[4] = sz - 8; + buf[5] = 8; + buf[6] = 0; + prepcmd(buf, 7); + coherence(); + memmove(c->ram + Fwsubmt, buf, sizeof buf); + + for(i = 0; i < 20; i++){ + if(cmd->i[0] == Noconf) + break; + delay(1); + } + dprint("[%ux %ux]", gbit32(cmd->c), gbit32(cmd->c+4)); + if(i == 20){ + print("m10g: cannot load fw\n"); + return -1; + } + dprint("\n"); + c->tx.segsz = align; + return 0; +} + +static int +kickthebaby(Pcidev *p, Ctlr *c) +{ + /* don't kick the baby! */ + u32int code; + + pcicfgw8(p, 0x10 + c->boot, 0x3); + pcicfgw32(p, 0x18 + c->boot, 0xfffffff0); + code = pcicfgr32(p, 0x14 + c->boot); + + dprint("reboot status = %ux\n", code); + if(code != 0xfffffff0) + return -1; + return 0; +} + +typedef struct { + uchar len[4]; + uchar type[4]; + char version[128]; + uchar globals[4]; + uchar ramsz[4]; + uchar specs[4]; + uchar specssz[4]; +} Fwhdr; + +enum { + Tmx = 0x4d582020, + Tpcie = 0x70636965, + Teth = 0x45544820, + Tmcp0 = 0x4d435030, +}; + +static char * +fwtype(u32int type) +{ + switch(type){ + case Tmx: + return "mx"; + case Tpcie: + return "PCIe"; + case Teth: + return "eth"; + case Tmcp0: + return "mcp0"; + } + return "*GOK*"; +} + +static int +chkfw(Ctlr *c) +{ + uintptr off; + Fwhdr *h; + u32int type; + + off = gbit32(c->ram+0x3c); + dprint("firmware %llux\n", (u64int)off); + if((off&3) || off + sizeof *h > c->ramsz){ + print("!m10g: bad firmware %llux\n", (u64int)off); + return -1; + } + h = (Fwhdr*)(c->ram + off); + type = gbit32(h->type); + dprint("\t" "type %s\n", fwtype(type)); + dprint("\t" "vers %s\n", h->version); + dprint("\t" "ramsz %ux\n", gbit32(h->ramsz)); + if(type != Teth){ + print("!m10g: bad card type %s\n", fwtype(type)); + return -1; + } + + return bootfw(c) || rdmacmd(c, 0); +} + +static int +reset(Ether *e, Ctlr *c) +{ + u32int i, sz; + + if(waserror()){ + print("m10g: reset error\n"); + nexterror(); + return -1; + } + + chkfw(c); + cmd(c, Creset, 0); + + cmd(c, CSintrqsz, c->done.n * sizeof *c->done.entry); + cmd(c, CSintrqdma, c->done.busaddr); + c->irqack = (u32int*)(c->ram + cmd(c, CGirqackoff, 0)); + /* required only if we're not doing msi? */ + c->irqdeass = (u32int*)(c->ram + cmd(c, CGirqdeassoff, 0)); + /* this is the driver default, why fiddle with this? */ + c->coal = (u32int*)(c->ram + cmd(c, CGcoaloff, 0)); + *c->coal = pbit32(25); + + dprint("dma stats:\n"); + rdmacmd(c, 1); + sz = c->tx.segsz; + i = dmatestcmd(c, DMAread, c->done.busaddr, sz); + print("\t" "read: %ud MB/s\n", ((i>>16)*sz*2)/(i&0xffff)); + i = dmatestcmd(c, DMAwrite, c->done.busaddr, sz); + print("\t" "write: %ud MB/s\n", ((i>>16)*sz*2)/(i&0xffff)); + i = dmatestcmd(c, DMAwrite|DMAread, c->done.busaddr, sz); + print("\t" "r/w: %ud MB/s\n", ((i>>16)*sz*2*2)/(i&0xffff)); + memset(c->done.entry, 0, c->done.n * sizeof *c->done.entry); + + maccmd(c, CSmac, c->ra); +// cmd(c, Cnopromisc, 0); + cmd(c, Cenablefc, 0); + e->maxmtu = Maxmtu; + cmd(c, CSmtu, e->maxmtu); + dprint("CSmtu %d...\n", e->maxmtu); + + poperror(); + return 0; +} + +static void +ctlrfree(Ctlr *c) +{ + /* free up all the Block*s, too */ + free(c->tx.host); + free(c->sm.host); + free(c->bg.host); + free(c->cmd); + free(c->done.entry); + free(c->stats); + free(c); +} + +static int +setmem(Pcidev *p, Ctlr *c) +{ + u32int i; + u64int raddr; + Done *d; + void *mem; + + c->tx.segsz = 2048; + c->ramsz = 2*MiB - (2*48*KiB + 32*KiB) - 0x100; + if(c->ramsz > p->mem[0].size) + return -1; + + raddr = p->mem[0].bar & ~0x0F; + mem = vmap(raddr, p->mem[0].size); + if(mem == nil){ + print("m10g: can't map %8.8lux\n", p->mem[0].bar); + return -1; + } + dprint("%llux <- vmap(mem[0].size = %ux)\n", raddr, p->mem[0].size); + c->port = raddr; + c->ram = mem; + c->cmd = malign(sizeof *c->cmd); + c->cprt = PCIWADDR(c->cmd); + + d = &c->done; + d->n = Maxslots; + d->m = d->n - 1; + i = d->n * sizeof *d->entry; + d->entry = malign(i); + memset(d->entry, 0, i); + d->busaddr = PCIWADDR(d->entry); + + c->stats = malign(sizeof *c->stats); + memset(c->stats, 0, sizeof *c->stats); + c->statsprt = PCIWADDR(c->stats); + + memmove(c->eprom, c->ram + c->ramsz - Epromsz, Epromsz-2); + return setpcie(p) || parseeprom(c); +} + +static Rx* +whichrx(Ctlr *c, int sz) +{ + if(sz <= smpool.size) + return &c->sm; + return &c->bg; +} + +static Block* +balloc(Rx* rx) +{ + Block *b; + + ilock(rx->pool); + if((b = rx->pool->head) != nil){ + rx->pool->head = b->next; + b->next = nil; + rx->pool->n--; + } + iunlock(rx->pool); + return b; +} + +static void +smbfree(Block *b) +{ + Bpool *p; + + b->rp = b->wp = (uchar*)ROUNDUP((uintptr)b->base, 4*KiB); + b->flag &= ~(Bpktck|Btcpck|Budpck|Bipck); + + p = &smpool; + ilock(p); + b->next = p->head; + p->head = b; + p->n++; + p->cnt++; + iunlock(p); +} + +static void +bgbfree(Block *b) +{ + Bpool *p; + + b->rp = b->wp = (uchar*)ROUNDUP((uintptr)b->base, 4*KiB); + b->flag &= ~(Bpktck|Btcpck|Budpck|Bipck); + + p = &bgpool; + ilock(p); + b->next = p->head; + p->head = b; + p->n++; + p->cnt++; + iunlock(p); +} + +static void +replenish(Rx *rx) +{ + u32int buf[16], i, idx, e; + Bpool *p; + Block *b; + + p = rx->pool; + if(p->n < 8) + return; + memset(buf, 0, sizeof buf); + e = (rx->i - rx->cnt) & ~7; + e += rx->n; + while(p->n >= 8 && e){ + idx = rx->cnt & rx->m; + for(i = 0; i < 8; i++){ + b = balloc(rx); + buf[i*2] = pbit32((u64int)PCIWADDR(b->wp) >> 32); + buf[i*2+1] = pbit32(PCIWADDR(b->wp)); + rx->host[idx+i] = b; + assert(b); + } + memmove(rx->lanai + 2*idx, buf, sizeof buf); + coherence(); + rx->cnt += 8; + e -= 8; + } + if(e && p->n > 7+1) + print("should panic? pool->n = %d\n", p->n); +} + +/* + * future: + * if (c->mtrr >= 0) { + * c->tx.wcfifo = c->ram+0x200000; + * c->sm.wcfifo = c->ram+0x300000; + * c->bg.wcfifo = c->ram+0x340000; + * } + */ + +static int +nextpow(int j) +{ + int i; + + for(i = 0; j > (1 << i); i++) + ; + return 1 << i; +} + +static void* +emalign(int sz) +{ + void *v; + + v = malign(sz); + if(v == nil) + error(Enomem); + memset(v, 0, sz); + return v; +} + +static void +open0(Ether *e, Ctlr *c) +{ + Block *b; + int i, sz, entries; + + entries = cmd(c, CGsendrgsz, 0) / sizeof *c->tx.lanai; + c->tx.lanai = (Send*)(c->ram + cmd(c, CGsendoff, 0)); + c->tx.host = emalign(entries * sizeof *c->tx.host); + c->tx.bring = emalign(entries * sizeof *c->tx.bring); + c->tx.n = entries; + c->tx.m = entries-1; + + entries = cmd(c, CGrxrgsz, 0)/8; + c->sm.pool = &smpool; + cmd(c, CSsmallsz, c->sm.pool->size); + c->sm.lanai = (u32int*)(c->ram + cmd(c, CGsmallrxoff, 0)); + c->sm.n = entries; + c->sm.m = entries-1; + c->sm.host = emalign(entries * sizeof *c->sm.host); + + c->bg.pool = &bgpool; + c->bg.pool->size = nextpow(2 + e->maxmtu); /* 2-byte alignment pad */ + cmd(c, CSbigsz, c->bg.pool->size); + c->bg.lanai = (u32int*)(c->ram + cmd(c, CGbigrxoff, 0)); + c->bg.n = entries; + c->bg.m = entries-1; + c->bg.host = emalign(entries * sizeof *c->bg.host); + + sz = c->sm.pool->size + 4*KiB; + for(i = 0; i < c->sm.n; i++){ + if((b = allocb(sz)) == 0) + break; + b->free = smbfree; + freeb(b); + } + sz = c->bg.pool->size + 4*KiB; + for(i = 0; i < c->bg.n; i++){ + if((b = allocb(sz)) == 0) + break; + b->free = bgbfree; + freeb(b); + } + + cmd(c, CSstatsdma, c->statsprt); + c->linkstat = ~0; + c->nrdma = 15; + + cmd(c, Cetherup, 0); +} + +static Block* +nextblock(Ctlr *c) +{ + uint i; + u16int l, k; + Block *b; + Done *d; + Rx *rx; + Slot *s; + Slotparts *sp; + + d = &c->done; + s = d->entry; + i = d->i & d->m; + sp = (Slotparts *)(s + i); + l = sp->len; + if(l == 0) + return 0; + k = sp->cksum; + s[i] = 0; + d->i++; + l = gbit16((uchar*)&l); +//dprint("nextb: i=%d l=%d\n", d->i, l); + rx = whichrx(c, l); + if(rx->i >= rx->cnt){ + iprint("m10g: overrun\n"); + return 0; + } + i = rx->i & rx->m; + b = rx->host[i]; + rx->host[i] = 0; + if(b == 0){ + iprint("m10g: error rx to no block. memory is hosed.\n"); + return 0; + } + rx->i++; + + b->flag |= Bipck|Btcpck|Budpck; + b->checksum = k; + b->rp += 2; + b->wp += 2+l; + b->lim = b->wp; /* lie like a dog. */ + return b; +} + +static int +rxcansleep(void *v) +{ + Ctlr *c; + Slot *s; + Slotparts *sp; + Done *d; + + c = v; + d = &c->done; + s = c->done.entry; + sp = (Slotparts *)(s + (d->i & d->m)); + if(sp->len != 0) + return -1; + c->irqack[0] = pbit32(3); + return 0; +} + +static void +m10rx(void *v) +{ + Ether *e; + Ctlr *c; + Block *b; + + e = v; + c = e->ctlr; + for(;;){ + replenish(&c->sm); + replenish(&c->bg); + sleep(&c->rxrendez, rxcansleep, c); + while(b = nextblock(c)) + etheriq(e, b, 1); + } +} + +static void +txcleanup(Tx *tx, u32int n) +{ + Block *b; + uint j, l, m; + + if(tx->npkt == n) + return; + l = 0; + m = tx->m; + /* + * if tx->cnt == tx->i, yet tx->npkt == n-1, we just + * caught ourselves and myricom card updating. + */ + for(;; tx->cnt++){ + j = tx->cnt & tx->m; + if(b = tx->bring[j]){ + tx->bring[j] = 0; + tx->nbytes += BLEN(b); + freeb(b); + if(++tx->npkt == n) + return; + } + if(tx->cnt == tx->i) + return; + if(l++ == m){ + iprint("tx ovrun: %ud %uld\n", n, tx->npkt); + return; + } + } +} + +static int +txcansleep(void *v) +{ + Ctlr *c; + + c = v; + if(c->tx.cnt != c->tx.i && c->tx.npkt != gbit32(c->stats->txcnt)) + return -1; + return 0; +} + +static void +txproc(void *v) +{ + Ether *e; + Ctlr *c; + Tx *tx; + + e = v; + c = e->ctlr; + tx = &c->tx; + for(;;){ + sleep(&c->txrendez, txcansleep, c); + txcleanup(tx, gbit32(c->stats->txcnt)); + } +} + +static void +submittx(Tx *tx, int n) +{ + Send *l, *h; + int i0, i, m; + + m = tx->m; + i0 = tx->i & m; + l = tx->lanai; + h = tx->host; + for(i = n-1; i >= 0; i--) + memmove(l+(i + i0 & m), h+(i + i0 & m), sizeof *h); + tx->i += n; +// coherence(); +} + +static int +nsegments(Block *b, int segsz) +{ + uintptr bus, end, slen, len; + int i; + + bus = PCIWADDR(b->rp); + i = 0; + for(len = BLEN(b); len; len -= slen){ + end = bus + segsz & ~(segsz-1); + slen = end - bus; + if(slen > len) + slen = len; + bus += slen; + i++; + } + return i; +} + +static void +m10gtransmit(Ether *e) +{ + u16int slen; + u32int i, cnt, rdma, nseg, count, end, bus, len, segsz; + uchar flags; + Block *b; + Ctlr *c; + Send *s, *s0, *s0m8; + Tx *tx; + + c = e->ctlr; + tx = &c->tx; + segsz = tx->segsz; + + qlock(tx); + count = 0; + s = tx->host + (tx->i & tx->m); + cnt = tx->cnt; + s0 = tx->host + (cnt & tx->m); + s0m8 = tx->host + ((cnt - 8) & tx->m); + i = tx->i; + for(; s >= s0 || s < s0m8; i += nseg){ + if((b = qget(e->oq)) == nil) + break; + flags = SFfirst|SFnotso; + if((len = BLEN(b)) < 1520) + flags |= SFsmall; + rdma = nseg = nsegments(b, segsz); + bus = PCIWADDR(b->rp); + for(; len; len -= slen){ + end = bus + segsz & ~(segsz-1); + slen = end - bus; + if(slen > len) + slen = len; + s->low = pbit32(bus); + s->len = pbit16(slen); + s->nrdma = rdma; + s->flags = flags; + + bus += slen; + if(++s == tx->host + tx->n) + s = tx->host; + count++; + flags &= ~SFfirst; + rdma = 1; + } + tx->bring[i + nseg - 1 & tx->m] = b; + if(1 || count > 0){ + submittx(tx, count); + count = 0; + cnt = tx->cnt; + s0 = tx->host + (cnt & tx->m); + s0m8 = tx->host + ((cnt - 8) & tx->m); + } + } + qunlock(tx); +} + +static void +checkstats(Ether *e, Ctlr *c, Stats *s) +{ + u32int i; + + if(s->updated == 0) + return; + + i = gbit32(s->linkstat); + if(c->linkstat != i){ + e->link = i; + if(c->linkstat = i) + dprint("m10g: link up\n"); + else + dprint("m10g: link down\n"); + } + i = gbit32(s->nrdma); + if(i != c->nrdma){ + dprint("m10g: rdma timeout %d\n", i); + c->nrdma = i; + } +} + +static void +waitintx(Ctlr *c) +{ + int i; + + for(i = 0; i < 1024*1024; i++){ + if(c->stats->valid == 0) + break; + coherence(); + } +} + +static void +m10ginterrupt(Ureg *, void *v) +{ + Ether *e; + Ctlr *c; + + e = v; + c = e->ctlr; + + if(c->state != Runed || c->stats->valid == 0) /* not ready for us? */ + return; + + if(c->stats->valid & 1) + wakeup(&c->rxrendez); + if(gbit32(c->stats->txcnt) != c->tx.npkt) + wakeup(&c->txrendez); + if(c->msi == 0) + *c->irqdeass = 0; + else + c->stats->valid = 0; + waitintx(c); + checkstats(e, c, c->stats); + c->irqack[1] = pbit32(3); +} + +static void +m10gattach(Ether *e) +{ + Ctlr *c; + char name[12]; + + dprint("m10gattach\n"); + + qlock(e->ctlr); + c = e->ctlr; + if(c->state != Detached){ + qunlock(c); + return; + } + if(waserror()){ + c->state = Detached; + qunlock(c); + nexterror(); + } + reset(e, c); + c->state = Attached; + open0(e, c); + if(c->kprocs == 0){ + c->kprocs++; + snprint(name, sizeof name, "#l%drxproc", e->ctlrno); + kproc(name, m10rx, e); + snprint(name, sizeof name, "#l%dtxproc", e->ctlrno); + kproc(name, txproc, e); + } + c->state = Runed; + qunlock(c); + poperror(); +} + +static int +m10gdetach(Ctlr *c) +{ + dprint("m10gdetach\n"); +// reset(e->ctlr); + vunmap(c->ram, c->pcidev->mem[0].size); + ctlrfree(c); + return -1; +} + +static int +lstcount(Block *b) +{ + int i; + + i = 0; + for(; b; b = b->next) + i++; + return i; +} + +static long +m10gifstat(Ether *e, void *v, long n, ulong off) +{ + int l, lim; + char *p; + Ctlr *c; + Stats s; + + c = e->ctlr; + lim = 2*READSTR-1; + p = malloc(lim+1); + l = 0; + /* no point in locking this because this is done via dma. */ + memmove(&s, c->stats, sizeof s); + + // l += + snprint(p+l, lim, + "txcnt = %ud\n" "linkstat = %ud\n" "dlink = %ud\n" + "derror = %ud\n" "drunt = %ud\n" "doverrun = %ud\n" + "dnosm = %ud\n" "dnobg = %ud\n" "nrdma = %ud\n" + "txstopped = %ud\n" "down = %ud\n" "updated = %ud\n" + "valid = %ud\n\n" + "tx pkt = %uld\n" "tx bytes = %lld\n" + "tx cnt = %ud\n" "tx n = %ud\n" "tx i = %ud\n" + "sm cnt = %ud\n" "sm i = %ud\n" "sm n = %ud\n" + "sm lst = %ud\n" + "bg cnt = %ud\n" "bg i = %ud\n" "bg n = %ud\n" + "bg lst = %ud\n" + "segsz = %ud\n" "coal = %d\n", + gbit32(s.txcnt), gbit32(s.linkstat), gbit32(s.dlink), + gbit32(s.derror), gbit32(s.drunt), gbit32(s.doverrun), + gbit32(s.dnosm), gbit32(s.dnobg), gbit32(s.nrdma), + s.txstopped, s.down, s.updated, s.valid, + c->tx.npkt, c->tx.nbytes, + c->tx.cnt, c->tx.n, c->tx.i, + c->sm.cnt, c->sm.i, c->sm.pool->n, lstcount(c->sm.pool->head), + c->bg.cnt, c->bg.i, c->bg.pool->n, lstcount(c->bg.pool->head), + c->tx.segsz, gbit32((uchar*)c->coal)); + + n = readstr(off, v, n, p); + free(p); + return n; +} + +//static void +//summary(Ether *e) +//{ +// char *buf; +// int n, i, j; +// +// if(e == 0) +// return; +// buf = malloc(n=250); +// if(buf == 0) +// return; +// +// snprint(buf, n, "oq\n"); +// qsummary(e->oq, buf+3, n-3-1); +// iprint("%s", buf); +// +// if(e->f) for(i = 0; e->f[i]; i++){ +// j = snprint(buf, n, "f%d %d\n", i, e->f[i]->type); +// qsummary(e->f[i]->in, buf+j, n-j-1); +// print("%s", buf); +// } +// +// free(buf); +//} + +static void +rxring(Ctlr *c) +{ + Done *d; + Slot *s; + Slotparts *sp; + int i; + + d = &c->done; + s = d->entry; + for(i = 0; i < d->n; i++) { + sp = (Slotparts *)(s + i); + if(sp->len) + iprint("s[%d] = %d\n", i, sp->len); + } +} + +enum { + CMdebug, + CMcoal, + CMwakeup, + CMtxwakeup, + CMqsummary, + CMrxring, +}; + +static Cmdtab ctab[] = { + CMdebug, "debug", 2, + CMcoal, "coal", 2, + CMwakeup, "wakeup", 1, + CMtxwakeup, "txwakeup", 1, +// CMqsummary, "q", 1, + CMrxring, "rxring", 1, +}; + +static long +m10gctl(Ether *e, void *v, long n) +{ + int i; + Cmdbuf *c; + Cmdtab *t; + + dprint("m10gctl\n"); + if(e->ctlr == nil) + error(Enonexist); + + c = parsecmd(v, n); + if(waserror()){ + free(c); + nexterror(); + } + t = lookupcmd(c, ctab, nelem(ctab)); + switch(t->index){ + case CMdebug: + debug = (strcmp(c->f[1], "on") == 0); + break; + case CMcoal: + i = atoi(c->f[1]); + if(i < 0 || i > 1000) + error(Ebadarg); + *((Ctlr*)e->ctlr)->coal = pbit32(i); + break; + case CMwakeup: + wakeup(&((Ctlr*)e->ctlr)->rxrendez); /* you're kidding, right? */ + break; + case CMtxwakeup: + wakeup(&((Ctlr*)e->ctlr)->txrendez); /* you're kidding, right? */ + break; +// case CMqsummary: +// summary(e); +// break; + case CMrxring: + rxring(e->ctlr); + break; + default: + error(Ebadarg); + } + free(c); + poperror(); + return n; +} + +static void +m10gshutdown(Ether *e) +{ + dprint("m10gshutdown\n"); + m10gdetach(e->ctlr); +} + +static void +m10gpromiscuous(void *v, int on) +{ + Ether *e; + int i; + + dprint("m10gpromiscuous\n"); + e = v; + if(on) + i = Cpromisc; + else + i = Cnopromisc; + cmd(e->ctlr, i, 0); +} + +static int mcctab[] = { CSleavemc, CSjoinmc }; +static char *mcntab[] = { "leave", "join" }; + +static void +m10gmulticast(void *v, uchar *ea, int on) +{ + Ether *e; + int i; + + dprint("m10gmulticast\n"); + e = v; + if((i = maccmd(e->ctlr, mcctab[on], ea)) != 0) + print("m10g: can't %s %E: %d\n", mcntab[on], ea, i); +} + +static void +m10gpci(void) +{ + Pcidev *p; + Ctlr *t, *c; + + t = 0; + for(p = 0; p = pcimatch(p, 0x14c1, 0x0008); ){ + c = malloc(sizeof *c); + if(c == nil) + continue; + memset(c, 0, sizeof *c); + c->pcidev = p; + c->id = p->did<<16 | p->vid; + c->boot = pcicap(p, PciCapVND); +// kickthebaby(p, c); + pcisetbme(p); + if(setmem(p, c) == -1){ + print("m10g failed\n"); + free(c); + /* cleanup */ + continue; + } + if(t) + t->next = c; + else + ctlrs = c; + t = c; + } +} + +static int +m10gpnp(Ether *e) +{ + Ctlr *c; + + if(ctlrs == nil) + m10gpci(); + + for(c = ctlrs; c != nil; c = c->next) + if(c->active) + continue; + else if(e->port == 0 || e->port == c->port) + break; + if(c == nil) + return -1; + c->active = 1; + + e->ctlr = c; + e->port = c->port; + e->irq = c->pcidev->intl; + e->tbdf = c->pcidev->tbdf; + e->mbps = 10000; + memmove(e->ea, c->ra, Eaddrlen); + + e->attach = m10gattach; + e->detach = m10gshutdown; + e->transmit = m10gtransmit; + e->interrupt = m10ginterrupt; + e->ifstat = m10gifstat; + e->ctl = m10gctl; +// e->power = m10gpower; + e->shutdown = m10gshutdown; + + e->arg = e; + e->promiscuous = m10gpromiscuous; + e->multicast = m10gmulticast; + + return 0; +} + +void +etherm10glink(void) +{ + addethercard("m10g", m10gpnp); +} diff -Nru 0/sys/src/nix/386/kbd.c 4/sys/src/nix/386/kbd.c --- 0/sys/src/nix/386/kbd.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/386/kbd.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,639 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "io.h" + +enum { + Data= 0x60, /* data port */ + + Status= 0x64, /* status port */ + Inready= 0x01, /* input character ready */ + Outbusy= 0x02, /* output busy */ + Sysflag= 0x04, /* system flag */ + Cmddata= 0x08, /* cmd==0, data==1 */ + Inhibit= 0x10, /* keyboard/mouse inhibited */ + Minready= 0x20, /* mouse character ready */ + Rtimeout= 0x40, /* general timeout */ + Parity= 0x80, + + Cmd= 0x64, /* command port (write only) */ + + Spec= 0xF800, /* Unicode private space */ + PF= Spec|0x20, /* num pad function key */ + View= Spec|0x00, /* view (shift window up) */ + KF= 0xF000, /* function key (begin Unicode private space) */ + Shift= Spec|0x60, + Break= Spec|0x61, + Ctrl= Spec|0x62, + Latin= Spec|0x63, + Caps= Spec|0x64, + Num= Spec|0x65, + Middle= Spec|0x66, + Altgr= Spec|0x67, + Kmouse= Spec|0x100, + No= 0x00, /* peter */ + + Home= KF|13, + Up= KF|14, + Pgup= KF|15, + Print= KF|16, + Left= KF|17, + Right= KF|18, + End= KF|24, + Down= View, + Pgdown= KF|19, + Ins= KF|20, + Del= 0x7F, + Scroll= KF|21, + + Nscan= 128, +}; + +/* + * The codes at 0x79 and 0x81 are produed by the PFU Happy Hacking keyboard. + * A 'standard' keyboard doesn't produce anything above 0x58. + */ +Rune kbtab[Nscan] = +{ +[0x00] No, 0x1b, '1', '2', '3', '4', '5', '6', +[0x08] '7', '8', '9', '0', '-', '=', '\b', '\t', +[0x10] 'q', 'w', 'e', 'r', 't', 'y', 'u', 'i', +[0x18] 'o', 'p', '[', ']', '\n', Ctrl, 'a', 's', +[0x20] 'd', 'f', 'g', 'h', 'j', 'k', 'l', ';', +[0x28] '\'', '`', Shift, '\\', 'z', 'x', 'c', 'v', +[0x30] 'b', 'n', 'm', ',', '.', '/', Shift, '*', +[0x38] Latin, ' ', Ctrl, KF|1, KF|2, KF|3, KF|4, KF|5, +[0x40] KF|6, KF|7, KF|8, KF|9, KF|10, Num, Scroll, '7', +[0x48] '8', '9', '-', '4', '5', '6', '+', '1', +[0x50] '2', '3', '0', '.', No, No, No, KF|11, +[0x58] KF|12, No, No, No, No, No, No, No, +[0x60] No, No, No, No, No, No, No, No, +[0x68] No, No, No, No, No, No, No, No, +[0x70] No, No, No, No, No, No, No, No, +[0x78] No, View, No, Up, No, No, No, No, +}; + +Rune kbtabshift[Nscan] = +{ +[0x00] No, 0x1b, '!', '@', '#', '$', '%', '^', +[0x08] '&', '*', '(', ')', '_', '+', '\b', '\t', +[0x10] 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I', +[0x18] 'O', 'P', '{', '}', '\n', Ctrl, 'A', 'S', +[0x20] 'D', 'F', 'G', 'H', 'J', 'K', 'L', ':', +[0x28] '"', '~', Shift, '|', 'Z', 'X', 'C', 'V', +[0x30] 'B', 'N', 'M', '<', '>', '?', Shift, '*', +[0x38] Latin, ' ', Ctrl, KF|1, KF|2, KF|3, KF|4, KF|5, +[0x40] KF|6, KF|7, KF|8, KF|9, KF|10, Num, Scroll, '7', +[0x48] '8', '9', '-', '4', '5', '6', '+', '1', +[0x50] '2', '3', '0', '.', No, No, No, KF|11, +[0x58] KF|12, No, No, No, No, No, No, No, +[0x60] No, No, No, No, No, No, No, No, +[0x68] No, No, No, No, No, No, No, No, +[0x70] No, No, No, No, No, No, No, No, +[0x78] No, Up, No, Up, No, No, No, No, +}; + +Rune kbtabesc1[Nscan] = +{ +[0x00] No, No, No, No, No, No, No, No, +[0x08] No, No, No, No, No, No, No, No, +[0x10] No, No, No, No, No, No, No, No, +[0x18] No, No, No, No, '\n', Ctrl, No, No, +[0x20] No, No, No, No, No, No, No, No, +[0x28] No, No, Shift, No, No, No, No, No, +[0x30] No, No, No, No, No, '/', No, Print, +[0x38] Altgr, No, No, No, No, No, No, No, +[0x40] No, No, No, No, No, No, Break, Home, +[0x48] Up, Pgup, No, Left, No, Right, No, End, +[0x50] Down, Pgdown, Ins, Del, No, No, No, No, +[0x58] No, No, No, No, No, No, No, No, +[0x60] No, No, No, No, No, No, No, No, +[0x68] No, No, No, No, No, No, No, No, +[0x70] No, No, No, No, No, No, No, No, +[0x78] No, Up, No, No, No, No, No, No, +}; + +Rune kbtabaltgr[Nscan] = +{ +[0x00] No, No, No, No, No, No, No, No, +[0x08] No, No, No, No, No, No, No, No, +[0x10] No, No, No, No, No, No, No, No, +[0x18] No, No, No, No, '\n', Ctrl, No, No, +[0x20] No, No, No, No, No, No, No, No, +[0x28] No, No, Shift, No, No, No, No, No, +[0x30] No, No, No, No, No, '/', No, Print, +[0x38] Altgr, No, No, No, No, No, No, No, +[0x40] No, No, No, No, No, No, Break, Home, +[0x48] Up, Pgup, No, Left, No, Right, No, End, +[0x50] Down, Pgdown, Ins, Del, No, No, No, No, +[0x58] No, No, No, No, No, No, No, No, +[0x60] No, No, No, No, No, No, No, No, +[0x68] No, No, No, No, No, No, No, No, +[0x70] No, No, No, No, No, No, No, No, +[0x78] No, Up, No, No, No, No, No, No, +}; + +Rune kbtabctrl[] = +{ +[0x00] No, '', '', '', '', '', '', '', +[0x08] '', '', '', '', ' ', '', '\b', '\t', +[0x10] '', '', '', '', '', '', '', '\t', +[0x18] '', '', '', '', '\n', Ctrl, '', '', +[0x20] '', '', '', '\b', '\n', ' ', ' ', '', +[0x28] '', No, Shift, '', '', '', '', '', +[0x30] '', '', ' ', ' ', '', '', Shift, '\n', +[0x38] Latin, No, Ctrl, '', '', '', '', '', +[0x40] '', '', ' ', ' ', '', '', '', '', +[0x48] '', '', ' ', '', '', '', ' ', '', +[0x50] '', '', '', '', No, No, No, '', +[0x58] ' ', No, No, No, No, No, No, No, +[0x60] No, No, No, No, No, No, No, No, +[0x68] No, No, No, No, No, No, No, No, +[0x70] No, No, No, No, No, No, No, No, +[0x78] No, '', No, '\b', No, No, No, No, +}; + +enum +{ + /* controller command byte */ + Cscs1= (1<<6), /* scan code set 1 */ + Cauxdis= (1<<5), /* mouse disable */ + Ckbddis= (1<<4), /* kbd disable */ + Csf= (1<<2), /* system flag */ + Cauxint= (1<<1), /* mouse interrupt enable */ + Ckbdint= (1<<0), /* kbd interrupt enable */ +}; + +static Queue *kbdq; + +int mouseshifted; +void (*kbdmouse)(int); +static int nokbd = 1; + +static Lock i8042lock; +static uchar ccc; +static void (*auxputc)(int, int); + +/* + * wait for output no longer busy + */ +static int +outready(void) +{ + int tries; + + for(tries = 0; (inb(Status) & Outbusy); tries++){ + if(tries > 500) + return -1; + delay(2); + } + return 0; +} + +/* + * wait for input + */ +static int +inready(void) +{ + int tries; + + for(tries = 0; !(inb(Status) & Inready); tries++){ + if(tries > 500) + return -1; + delay(2); + } + return 0; +} + +/* + * ask 8042 to reset the machine + */ +void +i8042reset(void) +{ + ushort *s = KADDR(0x472); + int i, x; + + if(nokbd) + return; + + *s = 0x1234; /* BIOS warm-boot flag */ + + /* + * newer reset the machine command + */ + outready(); + outb(Cmd, 0xFE); + outready(); + + /* + * Pulse it by hand (old somewhat reliable) + */ + x = 0xDF; + for(i = 0; i < 5; i++){ + x ^= 1; + outready(); + outb(Cmd, 0xD1); + outready(); + outb(Data, x); /* toggle reset */ + delay(100); + } +} + +int +i8042auxcmd(int cmd) +{ + unsigned int c; + int tries; + + c = 0; + tries = 0; + + ilock(&i8042lock); + do{ + if(tries++ > 2) + break; + if(outready() < 0) + break; + outb(Cmd, 0xD4); + if(outready() < 0) + break; + outb(Data, cmd); + if(outready() < 0) + break; + if(inready() < 0) + break; + c = inb(Data); + } while(c == 0xFE || c == 0); + iunlock(&i8042lock); + + if(c != 0xFA){ + print("i8042: %2.2ux returned to the %2.2ux command\n", c, cmd); + return -1; + } + return 0; +} + +int +i8042auxcmds(uchar *cmd, int ncmd) +{ + int i; + + ilock(&i8042lock); + for(i=0; i sizeof kbtab){ + c |= keyup; + if(c != 0xFF) /* these come fairly often: CAPSLOCK U Y */ + print("unknown key %ux\n", c); + return; + } + + if(kbscan.esc1){ + c = kbtabesc1[c]; + kbscan.esc1 = 0; + } else if(kbscan.esc2){ + kbscan.esc2--; + return; + } else if(kbscan.shift) + c = kbtabshift[c]; + else if(kbscan.altgr) + c = kbtabaltgr[c]; + else if(kbscan.ctl) + c = kbtabctrl[c]; + else + c = kbtab[c]; + + if(kbscan.caps && c<='z' && c>='a') + c += 'A' - 'a'; + + /* + * keyup only important for shifts + */ + if(keyup){ + switch(c){ + case Latin: + kbscan.alt = 0; + break; + case Shift: + kbscan.shift = 0; + mouseshifted = 0; + break; + case Ctrl: + kbscan.ctl = 0; + break; + case Altgr: + kbscan.altgr = 0; + break; + case Kmouse|1: + case Kmouse|2: + case Kmouse|3: + case Kmouse|4: + case Kmouse|5: + kbscan.buttons &= ~(1<<(c-Kmouse-1)); + if(kbdmouse) + kbdmouse(kbscan.buttons); + break; + } + return; + } + + /* + * normal character + */ + if(!(c & (Spec|KF))){ + if(kbscan.ctl) + if(kbscan.alt && c == Del) + exit(0); + if(!kbscan.collecting){ + kbdputc(kbdq, c); + return; + } + kbscan.kc[kbscan.nk++] = c; + c = latin1(kbscan.kc, kbscan.nk); + if(c < -1) /* need more keystrokes */ + return; + if(c != -1) /* valid sequence */ + kbdputc(kbdq, c); + else /* dump characters */ + for(i=0; i 0 && (c = inb(Status)) & (Outbusy | Inready)) { + if(c & Inready) + inb(Data); + delay(1); + } + if (try <= 0) { + print(initfailed); + return; + } + + /* get current controller command byte */ + outb(Cmd, 0x20); + if(inready() < 0){ + print("i8042: kbdinit can't read ccc\n"); + ccc = 0; + } else + ccc = inb(Data); + + /* enable kbd xfers and interrupts */ + ccc &= ~Ckbddis; + ccc |= Csf | Ckbdint | Cscs1; + if(outready() < 0) { + print(initfailed); + return; + } + + nokbd = 0; + + /* disable mouse */ + if (outbyte(Cmd, 0x60) < 0 || outbyte(Data, ccc) < 0) + print("i8042: kbdinit mouse disable failed\n"); +} + +void +kbdenable(void) +{ + kbdq = qopen(4*1024, 0, 0, 0); + if(kbdq == nil) + panic("kbdinit"); + qnoblock(kbdq, 1); + addkbdq(kbdq, -1); + + ioalloc(Data, 1, 0, "kbd"); + ioalloc(Cmd, 1, 0, "kbd"); + + intrenable(IrqKBD, i8042intr, 0, BUSUNKNOWN, "kbd"); +} + +void +kbdputmap(ushort m, ushort scanc, Rune r) +{ + if(scanc >= Nscan) + error(Ebadarg); + switch(m) { + default: + error(Ebadarg); + case 0: + kbtab[scanc] = r; + break; + case 1: + kbtabshift[scanc] = r; + break; + case 2: + kbtabesc1[scanc] = r; + break; + case 3: + kbtabaltgr[scanc] = r; + break; + case 4: + kbtabctrl[scanc] = r; + break; + } +} + +int +kbdgetmap(int offset, int *t, int *sc, Rune *r) +{ + *t = offset/Nscan; + *sc = offset%Nscan; + if(*t < 0 || *sc < 0) + error(Ebadarg); + switch(*t) { + default: + return 0; + case 0: + *r = kbtab[*sc]; + return 1; + case 1: + *r = kbtabshift[*sc]; + return 1; + case 2: + *r = kbtabesc1[*sc]; + return 1; + case 3: + *r = kbtabaltgr[*sc]; + return 1; + case 4: + *r = kbtabctrl[*sc]; + return 1; + } +} diff -Nru 0/sys/src/nix/386/pci.c 4/sys/src/nix/386/pci.c --- 0/sys/src/nix/386/pci.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/386/pci.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,758 @@ +/* + * PCI support code. + * Needs a massive rewrite. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "io.h" + +enum +{ + PciADDR = 0xCF8, /* CONFIG_ADDRESS */ + PciDATA = 0xCFC, /* CONFIG_DATA */ + + Maxfn = 7, + Maxdev = 31, + Maxbus = 255, + + /* command register */ + IOen = (1<<0), + MEMen = (1<<1), + MASen = (1<<2), + MemWrInv = (1<<4), + PErrEn = (1<<6), + SErrEn = (1<<8), + + Write, + Read, +}; + +static Lock pcicfglock; +static Lock pcicfginitlock; +static int pcicfgmode = -1; +static Pcidev* pciroot; +static Pcidev* pcilist; +static Pcidev* pcitail; + +static char* bustypes[] = { + "CBUSI", + "CBUSII", + "EISA", + "FUTURE", + "INTERN", + "ISA", + "MBI", + "MBII", + "MCA", + "MPI", + "MPSA", + "NUBUS", + "PCI", + "PCMCIA", + "TC", + "VL", + "VME", + "XPRESS", +}; + +static int pcicfgrw(int, int, int, int, int); + +static int +tbdffmt(Fmt* fmt) +{ + char *p; + int l, r; + uint type, tbdf; + + if((p = malloc(READSTR)) == nil) + return fmtstrcpy(fmt, "(tbdfconv)"); + + switch(fmt->r){ + case 'T': + tbdf = va_arg(fmt->args, uint); + type = BUSTYPE(tbdf); + if(type < nelem(bustypes)) + l = snprint(p, READSTR, bustypes[type]); + else + l = snprint(p, READSTR, "%d", type); + snprint(p+l, READSTR-l, ".%d.%d.%d", + BUSBNO(tbdf), BUSDNO(tbdf), BUSFNO(tbdf)); + break; + + default: + snprint(p, READSTR, "(tbdfconv)"); + break; + } + r = fmtstrcpy(fmt, p); + free(p); + + return r; +} + +static ulong +pcibarsize(Pcidev *p, int rno) +{ + ulong v, size; + + v = pcicfgr32(p, rno); + pcicfgw32(p, rno, 0xFFFFFFF0); + size = pcicfgr32(p, rno); + if(v & 1) + size |= 0xFFFF0000; + pcicfgw32(p, rno, v); + + return -(size & ~0x0F); +} + +static int +pcilscan(int bno, Pcidev** list) +{ + Pcidev *p, *head, *tail; + int dno, fno, i, hdt, l, maxfno, maxubn, sbn, tbdf, ubn; + + maxubn = bno; + head = nil; + tail = nil; + for(dno = 0; dno <= Maxdev; dno++){ + maxfno = 0; + for(fno = 0; fno <= maxfno; fno++){ + /* + * For this possible device, form the + * bus+device+function triplet needed to address it + * and try to read the vendor and device ID. + * If successful, allocate a device struct and + * start to fill it in with some useful information + * from the device's configuration space. + */ + tbdf = MKBUS(BusPCI, bno, dno, fno); + l = pcicfgrw(tbdf, PciVID, 0, Read, 4); + if(l == 0xFFFFFFFF || l == 0) + continue; + p = malloc(sizeof(*p)); + p->tbdf = tbdf; + p->vid = l; + p->did = l>>16; + + if(pcilist != nil) + pcitail->list = p; + else + pcilist = p; + pcitail = p; + + p->pcr = pcicfgr16(p, PciPCR); + p->rid = pcicfgr8(p, PciRID); + p->ccrp = pcicfgr8(p, PciCCRp); + p->ccru = pcicfgr8(p, PciCCRu); + p->ccrb = pcicfgr8(p, PciCCRb); + p->cls = pcicfgr8(p, PciCLS); + p->ltr = pcicfgr8(p, PciLTR); + + p->intl = pcicfgr8(p, PciINTL); + + /* + * If the device is a multi-function device adjust the + * loop count so all possible functions are checked. + */ + hdt = pcicfgr8(p, PciHDT); + if(hdt & 0x80) + maxfno = Maxfn; + + /* + * If appropriate, read the base address registers + * and work out the sizes. + */ + switch(p->ccrb) { + default: + if((hdt & 0x7F) != 0) + break; + for(i = 0; i < nelem(p->mem); i++) { + p->mem[i].bar = pcicfgr32(p, PciBAR0+4*i); + p->mem[i].size = pcibarsize(p, PciBAR0+4*i); + } + break; + + case 0x00: + case 0x05: /* memory controller */ + case 0x06: /* bridge device */ + break; + } + + if(head != nil) + tail->link = p; + else + head = p; + tail = p; + } + } + + *list = head; + for(p = head; p != nil; p = p->link){ + /* + * Find PCI-PCI bridges and recursively descend the tree. + */ + if(p->ccrb != 0x06 || p->ccru != 0x04) + continue; + + /* + * If the secondary or subordinate bus number is not + * initialised try to do what the PCI BIOS should have + * done and fill in the numbers as the tree is descended. + * On the way down the subordinate bus number is set to + * the maximum as it's not known how many buses are behind + * this one; the final value is set on the way back up. + */ + sbn = pcicfgr8(p, PciSBN); + ubn = pcicfgr8(p, PciUBN); + + if(sbn == 0 || ubn == 0) { + print("%T: unconfigured bridge\n", p->tbdf); + + sbn = maxubn+1; + /* + * Make sure memory, I/O and master enables are + * off, set the primary, secondary and subordinate + * bus numbers and clear the secondary status before + * attempting to scan the secondary bus. + * + * Initialisation of the bridge should be done here. + */ + pcicfgw32(p, PciPCR, 0xFFFF0000); + pcicfgw32(p, PciPBN, Maxbus<<16 | sbn<<8 | bno); + pcicfgw16(p, PciSPSR, 0xFFFF); + maxubn = pcilscan(sbn, &p->bridge); + pcicfgw32(p, PciPBN, maxubn<<16 | sbn<<8 | bno); + } + else { + /* + * You can't go back. + * This shouldn't be possible, but the + * Iwill DK8-HTX seems to have subordinate + * bus numbers which get smaller on the + * way down. Need to look more closely at + * this. + */ + if(ubn > maxubn) + maxubn = ubn; + pcilscan(sbn, &p->bridge); + } + } + + return maxubn; +} + +static uchar +pIIxget(Pcidev *router, uchar link) +{ + uchar pirq; + + /* link should be 0x60, 0x61, 0x62, 0x63 */ + pirq = pcicfgr8(router, link); + return (pirq < 16)? pirq: 0; +} + +static void +pIIxset(Pcidev *router, uchar link, uchar irq) +{ + pcicfgw8(router, link, irq); +} + +static uchar +viaget(Pcidev *router, uchar link) +{ + uchar pirq; + + /* link should be 1, 2, 3, 5 */ + pirq = (link < 6)? pcicfgr8(router, 0x55 + (link>>1)): 0; + + return (link & 1)? (pirq >> 4): (pirq & 15); +} + +static void +viaset(Pcidev *router, uchar link, uchar irq) +{ + uchar pirq; + + pirq = pcicfgr8(router, 0x55 + (link >> 1)); + pirq &= (link & 1)? 0x0f: 0xf0; + pirq |= (link & 1)? (irq << 4): (irq & 15); + pcicfgw8(router, 0x55 + (link>>1), pirq); +} + +typedef struct Bridge Bridge; +struct Bridge +{ + ushort vid; + ushort did; + uchar (*get)(Pcidev *, uchar); + void (*set)(Pcidev *, uchar, uchar); +}; + +static Bridge southbridges[] = { + { 0x8086, 0xffff, pIIxget, pIIxset }, // Intel * + { 0x1106, 0x3227, viaget, viaset }, // Viatech VT8237 + + { 0x1022, 0x746B, nil, nil }, // AMD 8111 + { 0x10DE, 0x00D1, nil, nil }, // NVIDIA nForce 3 + { 0x1166, 0x0200, nil, nil }, // ServerWorks ServerSet III LE + { 0x1002, 0x4377, nil, nil }, // ATI Radeon Xpress 200M +}; + +typedef struct Slot Slot; +struct Slot { + uchar bus; // Pci bus number + uchar dev; // Pci device number + uchar maps[12]; // Avoid structs! Link and mask. + uchar slot; // Add-in/built-in slot + uchar reserved; +}; + +typedef struct Router Router; +struct Router { + uchar signature[4]; // Routing table signature + uchar version[2]; // Version number + uchar size[2]; // Total table size + uchar bus; // Interrupt router bus number + uchar devfn; // Router's devfunc + uchar pciirqs[2]; // Exclusive PCI irqs + uchar compat[4]; // Compatible PCI interrupt router + uchar miniport[4]; // Miniport data + uchar reserved[11]; + uchar checksum; +}; + + +static void +pcirouting(void) +{ + uchar *p, pin, irq, link, *map; + int size, i, fn, tbdf; + Bridge *southbridge; + Pcidev *sbpci, *pci; + Router *r; + Slot *e; + + // Search for PCI interrupt routing table in BIOS + for(p = (uchar *)KADDR(0xf0000); p < (uchar *)KADDR(0xfffff); p += 16) + if(p[0] == '$' && p[1] == 'P' && p[2] == 'I' && p[3] == 'R') + break; + + if(p >= (uchar *)KADDR(0xfffff)) + return; + + r = (Router *)p; + + if(0) + print("PCI interrupt routing table version %d.%d at %.6llux\n", + r->version[0], r->version[1], (uintptr)r & 0xfffff); + + tbdf = (BusPCI << 24)|(r->bus << 16)|(r->devfn << 8); + sbpci = pcimatchtbdf(tbdf); + if(sbpci == nil) { + print("pcirouting: Cannot find south bridge %T\n", tbdf); + return; + } + + for(i = 0; i != nelem(southbridges); i++) + if(sbpci->vid == southbridges[i].vid + && (sbpci->did == southbridges[i].did || southbridges[i].did == 0xffff)) + break; + + if(i == nelem(southbridges)) { + print("pcirouting: ignoring south bridge %T %.4ux/%.4ux\n", tbdf, sbpci->vid, sbpci->did); + return; + } + southbridge = &southbridges[i]; + if(southbridge->get == nil || southbridge->set == nil) + return; + + size = (r->size[1] << 8)|r->size[0]; + for(e = (Slot *)&r[1]; (uchar *)e < p + size; e++) { + if(0){ + print("%.2ux/%.2ux %.2ux: ", e->bus, e->dev, e->slot); + for (i = 0; i != 4; i++) { + uchar *m = &e->maps[i * 3]; + print("[%d] %.2ux %.4ux ", + i, m[0], (m[2] << 8)|m[1]); + } + print("\n"); + } + + for(fn = 0; fn <= Maxfn; fn++) { + tbdf = MKBUS(BusPCI, e->bus, e->dev, fn); + pci = pcimatchtbdf(tbdf); + if(pci == nil) + continue; + pin = pcicfgr8(pci, PciINTP); + if(pin == 0 || pin == 0xff) + continue; + + map = &e->maps[(pin - 1) * 3]; + link = map[0]; + irq = southbridge->get(sbpci, link); + if(irq == 0 || irq == pci->intl) + continue; + if(pci->intl != 0 && pci->intl != 0xFF) { + print("pcirouting: BIOS workaround: %T at pin %d link %d irq %d -> %d\n", + tbdf, pin, link, irq, pci->intl); + southbridge->set(sbpci, link, pci->intl); + continue; + } + print("pcirouting: %T at pin %d link %d irq %d\n", tbdf, pin, link, irq); + pcicfgw8(pci, PciINTL, irq); + pci->intl = irq; + } + } +} + +static void +pcireservemem(void) +{ + int i; + Pcidev *p; + + for(p = nil; p = pcimatch(p, 0, 0); ) + for(i=0; imem); i++) + if(p->mem[i].bar && (p->mem[i].bar&1) == 0) + asmmapinit(p->mem[i].bar&~0x0F, p->mem[i].size, 5); +} + +static void +pcicfginit(void) +{ + int sbno, bno, n; + Pcidev **list, *p; + + if(pcicfgmode != -1) + return; + lock(&pcicfginitlock); + if(pcicfgmode != -1){ + unlock(&pcicfginitlock); + return; + } + + fmtinstall('T', tbdffmt); + + /* + * Try to determine if PCI Mode1 configuration implemented. + * (Bits [30:24] of PciADDR must be 0, according to the spec.) + * Mode2 won't appear in 64-bit machines. + */ + n = inl(PciADDR); + if(!(n & 0x7F000000)){ + outl(PciADDR, 0x80000000); + outb(PciADDR+3, 0); + if(inl(PciADDR) & 0x80000000) + pcicfgmode = 1; + } + outl(PciADDR, n); + + if(pcicfgmode < 0){ + unlock(&pcicfginitlock); + return; + } + + list = &pciroot; + for(bno = 0; bno <= Maxbus; bno++) { + sbno = bno; + bno = pcilscan(bno, list); + + while(*list) + list = &(*list)->link; + if(sbno != 0) + continue; + /* + * If we have found a PCI-to-Cardbus bridge, make sure + * it has no valid mappings anymore. + */ + for(p = pciroot; p != nil; p = p->link){ + if (p->ccrb == 6 && p->ccru == 7) { + /* reset the cardbus */ + pcicfgw16(p, PciBCR, 0x40 | pcicfgr16(p, PciBCR)); + delay(50); + } + } + } + + if(pciroot != nil && getconf("*nopcirouting") == nil) + pcirouting(); + pcireservemem(); + unlock(&pcicfginitlock); + + if(getconf("*pcihinv")) + pcihinv(nil); +} + +static int +pcicfgrw(int tbdf, int r, int data, int rw, int w) +{ + int o, x, er; + + pcicfginit(); + if(pcicfgmode != 1) + return -1; + if(BUSDNO(tbdf) > Maxdev) + return -1; + + lock(&pcicfglock); + o = r & 4-w; + er = r&0xfc | (r & 0xf00)<<16; + outl(PciADDR, 0x80000000|BUSBDF(tbdf)|er); + if(rw == Read){ + x = -1; + switch(w){ + case 1: + x = inb(PciDATA+o); + break; + case 2: + x = ins(PciDATA+o); + break; + case 4: + x = inl(PciDATA+o); + break; + } + }else{ + x = 0; + switch(w){ + case 1: + outb(PciDATA+o, data); + break; + case 2: + outs(PciDATA+o, data); + break; + case 4: + outl(PciDATA+o, data); + break; + } + } +// outl(PciADDR, 0); + unlock(&pcicfglock); + + return x; +} + +int +pcicfgr8(Pcidev *p, int rno) +{ + return pcicfgrw(p->tbdf, rno, 0, Read, 1); +} + +void +pcicfgw8(Pcidev *p, int rno, int data) +{ + pcicfgrw(p->tbdf, rno, data, Write, 1); +} + +int +pcicfgr16(Pcidev *p, int rno) +{ + return pcicfgrw(p->tbdf, rno, 0, Read, 2); +} + +void +pcicfgw16(Pcidev *p, int rno, int data) +{ + pcicfgrw(p->tbdf, rno, data, Write, 2); +} + +int +pcicfgr32(Pcidev *p, int rno) +{ + return pcicfgrw(p->tbdf, rno, 0, Read, 4); +} + +void +pcicfgw32(Pcidev *p, int rno, int data) +{ + pcicfgrw(p->tbdf, rno, data, Write, 4); +} + +Pcidev* +pcimatch(Pcidev* prev, int vid, int did) +{ + pcicfginit(); + prev = prev? prev->list: pcilist; + for(; prev != nil; prev = prev->list){ + if((vid == 0 || prev->vid == vid) + && (did == 0 || prev->did == did)) + break; + } + return prev; +} + +Pcidev* +pcimatchtbdf(int tbdf) +{ + Pcidev *p; + + for(p = nil; p = pcimatch(p, 0, 0); ) + if(p->tbdf == tbdf) + break; + return p; +} + +static void +pcilhinv(Pcidev* p) +{ + int i; + Pcidev *t; + + for(t = p; t != nil; t = t->link) { + print("%d %2d/%d %.2ux %.2ux %.2ux %.4ux %.4ux %3d ", + BUSBNO(t->tbdf), BUSDNO(t->tbdf), BUSFNO(t->tbdf), + t->ccrb, t->ccru, t->ccrp, t->vid, t->did, t->intl); + + for(i = 0; i < nelem(p->mem); i++) { + if(t->mem[i].size == 0) + continue; + print("%d:%.8lux %d ", i, t->mem[i].bar, t->mem[i].size); + } + if(t->ioa.bar || t->ioa.size) + print("ioa:%.8lux %d ", t->ioa.bar, t->ioa.size); + if(t->mema.bar || t->mema.size) + print("mema:%.8lux %d ", t->mema.bar, t->mema.size); + if(t->bridge) + print("->%d", BUSBNO(t->bridge->tbdf)); + print("\n"); + } + for(; p != nil; p = p->link) + if(p->bridge != nil) + pcilhinv(p->bridge); +} + +void +pcihinv(Pcidev* p) +{ + pcicfginit(); + lock(&pcicfginitlock); + if(p == nil){ + p = pciroot; + print("bus dev type vid did intl memory\n"); + } + pcilhinv(p); + unlock(&pcicfginitlock); +} + +void +pcireset(void) +{ + Pcidev *p; + + for(p = nil; p = pcimatch(p, 0, 0); ) + /* don't mess with the bridges */ + if(p->ccrb != 0x06) + pciclrbme(p); +} + +void +pcisetbme(Pcidev* p) +{ + p->pcr |= MASen; + pcicfgw16(p, PciPCR, p->pcr); +} + +void +pciclrbme(Pcidev* p) +{ + p->pcr &= ~MASen; + pcicfgw16(p, PciPCR, p->pcr); +} + +void +pcisetmwi(Pcidev* p) +{ + p->pcr |= MemWrInv; + pcicfgw16(p, PciPCR, p->pcr); +} + +void +pciclrmwi(Pcidev* p) +{ + p->pcr &= ~MemWrInv; + pcicfgw16(p, PciPCR, p->pcr); +} + +int +pcicap(Pcidev *p, int cap) +{ + int i, c, off; + + /* status register bit 4 has capabilities */ + if((pcicfgr16(p, PciPSR) & 1<<4) == 0) + return -1; + switch(pcicfgr8(p, PciHDT) & 0x7f){ + default: + return -1; + case 0: /* etc */ + case 1: /* pci to pci bridge */ + off = 0x34; + break; + case 2: /* cardbus bridge */ + off = 0x14; + break; + } + for(i = 48; i--;){ + off = pcicfgr8(p, off); + if(off < 0x40 || (off & 3)) + break; + off &= ~3; + c = pcicfgr8(p, off); + if(c == 0xff) + break; + if(c == cap) + return off; + off++; + } + return -1; +} + +enum { + Pmgcap = 2, /* capabilities; 2 bytes*/ + Pmgctl = 4, /* ctl/status; 2 bytes */ + Pmgbrg = 6, /* bridge support */ + Pmgdata = 7, +}; + +int +pcigetpms(Pcidev* p) +{ + int ptr; + + if((ptr = pcicap(p, PciCapPMG)) == -1) + return -1; + return pcicfgr16(p, ptr+Pmgctl) & 0x0003; +} + +int +pcisetpms(Pcidev* p, int state) +{ + int pmc, pmcsr, ptr; + + if((ptr = pcicap(p, PciCapPMG)) == -1) + return -1; + + pmc = pcicfgr16(p, ptr+Pmgcap); + pmcsr = pcicfgr16(p, ptr+Pmgctl); + + switch(state){ + default: + return -1; + case 0: + break; + case 1: + if(!(pmc & 0x0200)) + return -1; + break; + case 2: + if(!(pmc & 0x0400)) + return -1; + break; + case 3: + break; + } + pcicfgw16(p, ptr+4, (pmcsr & ~3) | state); + return pmcsr & 3; +} diff -Nru 0/sys/src/nix/386/random.c 4/sys/src/nix/386/random.c --- 0/sys/src/nix/386/random.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/386/random.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,137 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +struct Rb +{ + QLock; + Rendez producer; + Rendez consumer; + ulong randomcount; + uchar buf[1024]; + uchar *ep; + uchar *rp; + uchar *wp; + uchar next; + uchar wakeme; + ushort bits; + ulong randn; +} rb; + +static int +rbnotfull(void*) +{ + int i; + + i = rb.rp - rb.wp; + return i != 1 && i != (1 - sizeof(rb.buf)); +} + +static int +rbnotempty(void*) +{ + return rb.wp != rb.rp; +} + +static void +genrandom(void*) +{ + up->basepri = PriNormal; + up->priority = up->basepri; + + for(;;){ + for(;;) + if(++rb.randomcount > 100000) + break; + if(anyhigher()) + sched(); + if(!rbnotfull(0)) + sleep(&rb.producer, rbnotfull, 0); + } +} + +/* + * produce random bits in a circular buffer + */ +static void +randomclock(void) +{ + if(rb.randomcount == 0 || !rbnotfull(0)) + return; + + rb.bits = (rb.bits<<2) ^ rb.randomcount; + rb.randomcount = 0; + + rb.next++; + if(rb.next != 8/2) + return; + rb.next = 0; + + *rb.wp ^= rb.bits; + if(rb.wp+1 == rb.ep) + rb.wp = rb.buf; + else + rb.wp = rb.wp+1; + + if(rb.wakeme) + wakeup(&rb.consumer); +} + +void +randominit(void) +{ + /* Frequency close but not equal to HZ */ + addclock0link(randomclock, 13); + rb.ep = rb.buf + sizeof(rb.buf); + rb.rp = rb.wp = rb.buf; + kproc("genrandom", genrandom, 0); +} + +/* + * consume random bytes from a circular buffer + */ +ulong +randomread(void *xp, ulong n) +{ + uchar *e, *p; + ulong x; + + p = xp; + + if(waserror()){ + qunlock(&rb); + nexterror(); + } + + qlock(&rb); + for(e = p + n; p < e; ){ + if(rb.wp == rb.rp){ + rb.wakeme = 1; + wakeup(&rb.producer); + sleep(&rb.consumer, rbnotempty, 0); + rb.wakeme = 0; + continue; + } + + /* + * beating clocks will be precictable if + * they are synchronized. Use a cheap pseudo + * random number generator to obscure any cycles. + */ + x = rb.randn*1103515245 ^ *rb.rp; + *p++ = rb.randn = x; + + if(rb.rp+1 == rb.ep) + rb.rp = rb.buf; + else + rb.rp = rb.rp+1; + } + qunlock(&rb); + poperror(); + + wakeup(&rb.producer); + + return n; +} diff -Nru 0/sys/src/nix/386/uarti8250.c 4/sys/src/nix/386/uarti8250.c --- 0/sys/src/nix/386/uarti8250.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/386/uarti8250.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,785 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +/* + * 8250 UART and compatibles. + */ +enum { + Uart0 = 0x3F8, /* COM1 */ + Uart0IRQ = 4, + Uart1 = 0x2F8, /* COM2 */ + Uart1IRQ = 3, + + UartFREQ = 1843200, +}; + +enum { /* registers */ + Rbr = 0, /* Receiver Buffer (RO) */ + Thr = 0, /* Transmitter Holding (WO) */ + Ier = 1, /* Interrupt Enable */ + Iir = 2, /* Interrupt Identification (RO) */ + Fcr = 2, /* FIFO Control (WO) */ + Lcr = 3, /* Line Control */ + Mcr = 4, /* Modem Control */ + Lsr = 5, /* Line Status */ + Msr = 6, /* Modem Status */ + Scr = 7, /* Scratch Pad */ + Dll = 0, /* Divisor Latch LSB */ + Dlm = 1, /* Divisor Latch MSB */ +}; + +enum { /* Ier */ + Erda = 0x01, /* Enable Received Data Available */ + Ethre = 0x02, /* Enable Thr Empty */ + Erls = 0x04, /* Enable Receiver Line Status */ + Ems = 0x08, /* Enable Modem Status */ +}; + +enum { /* Iir */ + Ims = 0x00, /* Ms interrupt */ + Ip = 0x01, /* Interrupt Pending (not) */ + Ithre = 0x02, /* Thr Empty */ + Irda = 0x04, /* Received Data Available */ + Irls = 0x06, /* Receiver Line Status */ + Ictoi = 0x0C, /* Character Time-out Indication */ + IirMASK = 0x3F, + Ifena = 0xC0, /* FIFOs enabled */ +}; + +enum { /* Fcr */ + FIFOena = 0x01, /* FIFO enable */ + FIFOrclr = 0x02, /* clear Rx FIFO */ + FIFOtclr = 0x04, /* clear Tx FIFO */ + FIFO1 = 0x00, /* Rx FIFO trigger level 1 byte */ + FIFO4 = 0x40, /* 4 bytes */ + FIFO8 = 0x80, /* 8 bytes */ + FIFO14 = 0xC0, /* 14 bytes */ +}; + +enum { /* Lcr */ + Wls5 = 0x00, /* Word Length Select 5 bits/byte */ + Wls6 = 0x01, /* 6 bits/byte */ + Wls7 = 0x02, /* 7 bits/byte */ + Wls8 = 0x03, /* 8 bits/byte */ + WlsMASK = 0x03, + Stb = 0x04, /* 2 stop bits */ + Pen = 0x08, /* Parity Enable */ + Eps = 0x10, /* Even Parity Select */ + Stp = 0x20, /* Stick Parity */ + Brk = 0x40, /* Break */ + Dlab = 0x80, /* Divisor Latch Access Bit */ +}; + +enum { /* Mcr */ + Dtr = 0x01, /* Data Terminal Ready */ + Rts = 0x02, /* Ready To Send */ + Out1 = 0x04, /* no longer in use */ + Ie = 0x08, /* IRQ Enable */ + Dm = 0x10, /* Diagnostic Mode loopback */ +}; + +enum { /* Lsr */ + Dr = 0x01, /* Data Ready */ + Oe = 0x02, /* Overrun Error */ + Pe = 0x04, /* Parity Error */ + Fe = 0x08, /* Framing Error */ + Bi = 0x10, /* Break Interrupt */ + Thre = 0x20, /* Thr Empty */ + Temt = 0x40, /* Tramsmitter Empty */ + FIFOerr = 0x80, /* error in receiver FIFO */ +}; + +enum { /* Msr */ + Dcts = 0x01, /* Delta Cts */ + Ddsr = 0x02, /* Delta Dsr */ + Teri = 0x04, /* Trailing Edge of Ri */ + Ddcd = 0x08, /* Delta Dcd */ + Cts = 0x10, /* Clear To Send */ + Dsr = 0x20, /* Data Set Ready */ + Ri = 0x40, /* Ring Indicator */ + Dcd = 0x80, /* Data Set Ready */ +}; + +typedef struct Ctlr { + int io; + int irq; + int tbdf; + int iena; + void* vector; + int poll; + + uchar sticky[8]; + + Lock; + int hasfifo; + int checkfifo; + int fena; +} Ctlr; + +extern PhysUart i8250physuart; + +static Ctlr i8250ctlr[2] = { +{ .io = Uart0, + .irq = Uart0IRQ, + .tbdf = -1, + .poll = 0, }, + +{ .io = Uart1, + .irq = Uart1IRQ, + .tbdf = -1, + .poll = 0, }, +}; + +static Uart i8250uart[2] = { +{ .regs = &i8250ctlr[0], + .name = "COM1", + .freq = UartFREQ, + .phys = &i8250physuart, + .special= 0, + .next = &i8250uart[1], }, + +{ .regs = &i8250ctlr[1], + .name = "COM2", + .freq = UartFREQ, + .phys = &i8250physuart, + .special= 0, + .next = nil, }, +}; + +#define csr8r(c, r) inb((c)->io+(r)) +#define csr8w(c, r, v) outb((c)->io+(r), (c)->sticky[(r)]|(v)) +#define csr8o(c, r, v) outb((c)->io+(r), (v)) + +static long +i8250status(Uart* uart, void* buf, long n, long offset) +{ + char *p; + Ctlr *ctlr; + uchar ier, lcr, mcr, msr; + + ctlr = uart->regs; + p = malloc(READSTR); + mcr = ctlr->sticky[Mcr]; + msr = csr8r(ctlr, Msr); + ier = ctlr->sticky[Ier]; + lcr = ctlr->sticky[Lcr]; + snprint(p, READSTR, + "b%d c%d d%d e%d l%d m%d p%c r%d s%d i%d\n" + "dev(%d) type(%d) framing(%d) overruns(%d) " + "berr(%d) serr(%d)%s%s%s%s\n", + + uart->baud, + uart->hup_dcd, + (msr & Dsr) != 0, + uart->hup_dsr, + (lcr & WlsMASK) + 5, + (ier & Ems) != 0, + (lcr & Pen) ? ((lcr & Eps) ? 'e': 'o'): 'n', + (mcr & Rts) != 0, + (lcr & Stb) ? 2: 1, + ctlr->fena, + + uart->dev, + uart->type, + uart->ferr, + uart->oerr, + uart->berr, + uart->serr, + (msr & Cts) ? " cts": "", + (msr & Dsr) ? " dsr": "", + (msr & Dcd) ? " dcd": "", + (msr & Ri) ? " ring": "" + ); + n = readstr(offset, buf, n, p); + free(p); + + return n; +} + +static void +i8250fifo(Uart* uart, int level) +{ + Ctlr *ctlr; + + ctlr = uart->regs; + if(ctlr->hasfifo == 0) + return; + + /* + * Changing the FIFOena bit in Fcr flushes data + * from both receive and transmit FIFOs; there's + * no easy way to guarantee not losing data on + * the receive side, but it's possible to wait until + * the transmitter is really empty. + */ + ilock(ctlr); + while(!(csr8r(ctlr, Lsr) & Temt)) + ; + + /* + * Set the trigger level, default is the max. + * value. + * Some UARTs require FIFOena to be set before + * other bits can take effect, so set it twice. + */ + ctlr->fena = level; + switch(level){ + case 0: + break; + case 1: + level = FIFO1|FIFOena; + break; + case 4: + level = FIFO4|FIFOena; + break; + case 8: + level = FIFO8|FIFOena; + break; + default: + level = FIFO14|FIFOena; + break; + } + csr8w(ctlr, Fcr, level); + csr8w(ctlr, Fcr, level); + iunlock(ctlr); +} + +static void +i8250dtr(Uart* uart, int on) +{ + Ctlr *ctlr; + + /* + * Toggle DTR. + */ + ctlr = uart->regs; + if(on) + ctlr->sticky[Mcr] |= Dtr; + else + ctlr->sticky[Mcr] &= ~Dtr; + csr8w(ctlr, Mcr, 0); +} + +static void +i8250rts(Uart* uart, int on) +{ + Ctlr *ctlr; + + /* + * Toggle RTS. + */ + ctlr = uart->regs; + if(on) + ctlr->sticky[Mcr] |= Rts; + else + ctlr->sticky[Mcr] &= ~Rts; + csr8w(ctlr, Mcr, 0); +} + +static void +i8250modemctl(Uart* uart, int on) +{ + Ctlr *ctlr; + + ctlr = uart->regs; + ilock(&uart->tlock); + if(on){ + ctlr->sticky[Ier] |= Ems; + csr8w(ctlr, Ier, ctlr->sticky[Ier]); + uart->modem = 1; + uart->cts = csr8r(ctlr, Msr) & Cts; + } + else{ + ctlr->sticky[Ier] &= ~Ems; + csr8w(ctlr, Ier, ctlr->sticky[Ier]); + uart->modem = 0; + uart->cts = 1; + } + iunlock(&uart->tlock); + + /* modem needs fifo */ + (*uart->phys->fifo)(uart, on); +} + +static int +i8250parity(Uart* uart, int parity) +{ + int lcr; + Ctlr *ctlr; + + ctlr = uart->regs; + lcr = ctlr->sticky[Lcr] & ~(Eps|Pen); + + switch(parity){ + case 'e': + lcr |= Eps|Pen; + break; + case 'o': + lcr |= Pen; + break; + case 'n': + break; + default: + return -1; + } + ctlr->sticky[Lcr] = lcr; + csr8w(ctlr, Lcr, 0); + + uart->parity = parity; + + return 0; +} + +static int +i8250stop(Uart* uart, int stop) +{ + int lcr; + Ctlr *ctlr; + + ctlr = uart->regs; + lcr = ctlr->sticky[Lcr] & ~Stb; + + switch(stop){ + case 1: + break; + case 2: + lcr |= Stb; + break; + default: + return -1; + } + ctlr->sticky[Lcr] = lcr; + csr8w(ctlr, Lcr, 0); + + uart->stop = stop; + + return 0; +} + +static int +i8250bits(Uart* uart, int bits) +{ + int lcr; + Ctlr *ctlr; + + ctlr = uart->regs; + lcr = ctlr->sticky[Lcr] & ~WlsMASK; + + switch(bits){ + case 5: + lcr |= Wls5; + break; + case 6: + lcr |= Wls6; + break; + case 7: + lcr |= Wls7; + break; + case 8: + lcr |= Wls8; + break; + default: + return -1; + } + ctlr->sticky[Lcr] = lcr; + csr8w(ctlr, Lcr, 0); + + uart->bits = bits; + + return 0; +} + +static int +i8250baud(Uart* uart, int baud) +{ + ulong bgc; + Ctlr *ctlr; + + /* + * Set the Baud rate by calculating and setting the Baud rate + * Generator Constant. This will work with fairly non-standard + * Baud rates. + */ + if(uart->freq == 0 || baud <= 0) + return -1; + bgc = (uart->freq+8*baud-1)/(16*baud); + + ctlr = uart->regs; + csr8w(ctlr, Lcr, Dlab); + csr8o(ctlr, Dlm, bgc>>8); + csr8o(ctlr, Dll, bgc); + csr8w(ctlr, Lcr, 0); + + uart->baud = baud; + + return 0; +} + +static void +i8250break(Uart* uart, int ms) +{ + Ctlr *ctlr; + + /* + * Send a break. + */ + if(ms <= 0) + ms = 200; + + ctlr = uart->regs; + csr8w(ctlr, Lcr, Brk); + tsleep(&up->sleep, return0, 0, ms); + csr8w(ctlr, Lcr, 0); +} + +static void +i8250kick(Uart* uart) +{ + int i; + Ctlr *ctlr; + + if(uart->cts == 0 || uart->blocked) + return; + + /* + * 128 here is an arbitrary limit to make sure + * we don't stay in this loop too long. If the + * chip's output queue is longer than 128, too + * bad -- presotto + */ + ctlr = uart->regs; + for(i = 0; i < 128; i++){ + if(!(csr8r(ctlr, Lsr) & Thre)) + break; + if(uart->op >= uart->oe && uartstageoutput(uart) == 0) + break; + csr8o(ctlr, Thr, *(uart->op++)); + } +} + +static void +i8250interrupt(Ureg*, void* arg) +{ + Ctlr *ctlr; + Uart *uart; + int iir, lsr, old, r; + + uart = arg; + + ctlr = uart->regs; + for(iir = csr8r(ctlr, Iir); !(iir & Ip); iir = csr8r(ctlr, Iir)){ + switch(iir & IirMASK){ + case Ims: /* Ms interrupt */ + r = csr8r(ctlr, Msr); + if(r & Dcts){ + ilock(&uart->tlock); + old = uart->cts; + uart->cts = r & Cts; + if(old == 0 && uart->cts) + uart->ctsbackoff = 2; + iunlock(&uart->tlock); + } + if(r & Ddsr){ + old = r & Dsr; + if(uart->hup_dsr && uart->dsr && !old) + uart->dohup = 1; + uart->dsr = old; + } + if(r & Ddcd){ + old = r & Dcd; + if(uart->hup_dcd && uart->dcd && !old) + uart->dohup = 1; + uart->dcd = old; + } + break; + case Ithre: /* Thr Empty */ + uartkick(uart); + break; + case Irda: /* Received Data Available */ + case Irls: /* Receiver Line Status */ + case Ictoi: /* Character Time-out Indication */ + /* + * Consume any received data. + * If the received byte came in with a break, + * parity or framing error, throw it away; + * overrun is an indication that something has + * already been tossed. + */ + while((lsr = csr8r(ctlr, Lsr)) & Dr){ + if(lsr & (FIFOerr|Oe)) + uart->oerr++; + if(lsr & Pe) + uart->perr++; + if(lsr & Fe) + uart->ferr++; + r = csr8r(ctlr, Rbr); + if(!(lsr & (Bi|Fe|Pe))) + uartrecv(uart, r); + } + break; + + default: + iprint("weird uart interrupt 0x%2.2uX\n", iir); + break; + } + } +} + +static void +i8250disable(Uart* uart) +{ + Ctlr *ctlr; + + /* + * Turn off DTR and RTS, disable interrupts and fifos. + */ + (*uart->phys->dtr)(uart, 0); + (*uart->phys->rts)(uart, 0); + (*uart->phys->fifo)(uart, 0); + + ctlr = uart->regs; + ctlr->sticky[Ier] = 0; + csr8w(ctlr, Ier, ctlr->sticky[Ier]); + + if(ctlr->iena != 0){ + if(intrdisable(ctlr->vector) == 0) + ctlr->iena = 0; + } +} + +static void +i8250enable(Uart* uart, int ie) +{ + Ctlr *ctlr; + + ctlr = uart->regs; + + /* + * Check if there is a FIFO. + * Changing the FIFOena bit in Fcr flushes data + * from both receive and transmit FIFOs; there's + * no easy way to guarantee not losing data on + * the receive side, but it's possible to wait until + * the transmitter is really empty. + * Also, reading the Iir outwith i8250interrupt() + * can be dangerous, but this should only happen + * once, before interrupts are enabled. + */ + ilock(ctlr); + if(!ctlr->checkfifo){ + /* + * Wait until the transmitter is really empty. + */ + while(!(csr8r(ctlr, Lsr) & Temt)) + ; + csr8w(ctlr, Fcr, FIFOena); + if(csr8r(ctlr, Iir) & Ifena) + ctlr->hasfifo = 1; + csr8w(ctlr, Fcr, 0); + ctlr->checkfifo = 1; + } + iunlock(ctlr); + + /* + * Enable interrupts and turn on DTR and RTS. + * Be careful if this is called to set up a polled serial line + * early on not to try to enable interrupts as interrupt- + * -enabling mechanisms might not be set up yet. + */ + if(ie){ + if(ctlr->iena == 0 && !ctlr->poll){ + ctlr->vector = intrenable(ctlr->irq, i8250interrupt, uart, ctlr->tbdf, uart->name); + ctlr->iena = 1; + } + ctlr->sticky[Ier] = Ethre|Erda; + ctlr->sticky[Mcr] |= Ie; + } + else{ + ctlr->sticky[Ier] = 0; + ctlr->sticky[Mcr] = 0; + } + csr8w(ctlr, Ier, ctlr->sticky[Ier]); + csr8w(ctlr, Mcr, ctlr->sticky[Mcr]); + + (*uart->phys->dtr)(uart, 1); + (*uart->phys->rts)(uart, 1); + + /* + * During startup, the i8259 interrupt controller is reset. + * This may result in a lost interrupt from the i8250 uart. + * The i8250 thinks the interrupt is still outstanding and does not + * generate any further interrupts. The workaround is to call the + * interrupt handler to clear any pending interrupt events. + * Note: this must be done after setting Ier. + */ + if(ie) + i8250interrupt(nil, uart); +} + +void* +i8250alloc(int io, int irq, int tbdf) +{ + Ctlr *ctlr; + + if((ctlr = malloc(sizeof(Ctlr))) != nil){ + ctlr->io = io; + ctlr->irq = irq; + ctlr->tbdf = tbdf; + } + + return ctlr; +} + +static Uart* +i8250pnp(void) +{ + int i; + Ctlr *ctlr; + Uart *head, *uart; + + head = i8250uart; + for(i = 0; i < nelem(i8250uart); i++){ + /* + * Does it exist? + * Should be able to write/read the Scratch Pad + * and reserve the I/O space. + */ + uart = &i8250uart[i]; + ctlr = uart->regs; + csr8o(ctlr, Scr, 0x55); + if(csr8r(ctlr, Scr) == 0x55) + continue; + if(ioalloc(ctlr->io, 8, 0, uart->name) < 0) + continue; + if(uart == head) + head = uart->next; + else + (uart-1)->next = uart->next; + } + + return head; +} + +static int +i8250getc(Uart* uart) +{ + Ctlr *ctlr; + + ctlr = uart->regs; + while(!(csr8r(ctlr, Lsr) & Dr)) + delay(1); + return csr8r(ctlr, Rbr); +} + +static void +i8250putc(Uart* uart, int c) +{ + int i; + Ctlr *ctlr; + + ctlr = uart->regs; + for(i = 0; !(csr8r(ctlr, Lsr) & Thre) && i < 128; i++) + delay(1); + csr8o(ctlr, Thr, c); + for(i = 0; !(csr8r(ctlr, Lsr) & Thre) && i < 128; i++) + delay(1); +} + +static void +i8250poll(Uart* uart) +{ + Ctlr *ctlr; + + /* + * If PhysUart has a non-nil .poll member, this + * routine will be called from the uartclock timer. + * If the Ctlr .poll member is non-zero, when the + * Uart is enabled interrupts will not be enabled + * and the result is polled input and output. + * Not very useful here, but ports to new hardware + * or simulators can use this to get serial I/O + * without setting up the interrupt mechanism. + */ + ctlr = uart->regs; + if(ctlr->iena || !ctlr->poll) + return; + i8250interrupt(nil, uart); +} + +PhysUart i8250physuart = { + .name = "i8250", + .pnp = i8250pnp, + .enable = i8250enable, + .disable = i8250disable, + .kick = i8250kick, + .dobreak = i8250break, + .baud = i8250baud, + .bits = i8250bits, + .stop = i8250stop, + .parity = i8250parity, + .modemctl = i8250modemctl, + .rts = i8250rts, + .dtr = i8250dtr, + .status = i8250status, + .fifo = i8250fifo, + .getc = i8250getc, + .putc = i8250putc, + .poll = i8250poll, +}; + +Uart* +i8250console(char* cfg) +{ + int i; + Uart *uart; + Ctlr *ctlr; + char *cmd, *p; + + /* + * Before i8250pnp() is run can only set the console + * to 0 or 1 because those are the only uart structs which + * will be the same before and after that. + */ + if((p = getconf("console")) == nil && (p = cfg) == nil) + return nil; + i = strtoul(p, &cmd, 0); + if(p == cmd) + return nil; + if((uart = uartconsole(i, cmd)) != nil){ + consuart = uart; + return uart; + } + switch(i){ + default: + return nil; + case 0: + uart = &i8250uart[0]; + break; + case 1: + uart = &i8250uart[1]; + break; + } + + /* + * Does it exist? + * Should be able to write/read + * the Scratch Pad. + */ + ctlr = uart->regs; + csr8o(ctlr, Scr, 0x55); + if(csr8r(ctlr, Scr) != 0x55) + return nil; + + (*uart->phys->enable)(uart, 0); + uartctl(uart, "b9600 l8 pn s1 i1"); + if(*cmd != '\0') + uartctl(uart, cmd); + + consuart = uart; + uart->console = 1; + + return uart; +} diff -Nru 0/sys/src/nix/386/uartpci.c 4/sys/src/nix/386/uartpci.c --- 0/sys/src/nix/386/uartpci.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/386/uartpci.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,180 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "io.h" + +extern PhysUart i8250physuart; +extern PhysUart pciphysuart; +extern void* i8250alloc(int, int, int); + +static Uart* +uartpci(int ctlrno, Pcidev* p, int barno, int n, int freq, char* name) +{ + int i, io; + void *ctlr; + char buf[64]; + Uart *head, *uart; + + io = p->mem[barno].bar & ~0x01; + snprint(buf, sizeof(buf), "%s%d", pciphysuart.name, ctlrno); + if(ioalloc(io, p->mem[barno].size, 0, buf) < 0){ + print("uartpci: I/O 0x%uX in use\n", io); + return nil; + } + + head = uart = malloc(sizeof(Uart)*n); + + for(i = 0; i < n; i++){ + ctlr = i8250alloc(io, p->intl, p->tbdf); + io += 8; + if(ctlr == nil) + continue; + + uart->regs = ctlr; + snprint(buf, sizeof(buf), "%s.%8.8uX", name, p->tbdf); + kstrdup(&uart->name, buf); + uart->freq = freq; + uart->phys = &i8250physuart; + if(uart != head) + (uart-1)->next = uart; + uart++; + } + + return head; +} + +static Uart* +uartpcipnp(void) +{ + Pcidev *p; + char *name; + int ctlrno, n, subid; + Uart *head, *tail, *uart; + + /* + * Loop through all PCI devices looking for simple serial + * controllers (ccrb == 0x07) and configure the ones which + * are familiar. All suitable devices are configured to + * simply point to the generic i8250 driver. + */ + head = tail = nil; + ctlrno = 0; + for(p = pcimatch(nil, 0, 0); p != nil; p = pcimatch(p, 0, 0)){ + if(p->ccrb != 0x07 || p->ccru > 2) + continue; + + switch((p->did<<16)|p->vid){ + default: + continue; + case (0x9835<<16)|0x9710: /* StarTech PCI2S550 */ + uart = uartpci(ctlrno, p, 0, 1, 1843200, "PCI2S550-0"); + if(uart == nil) + continue; + uart->next = uartpci(ctlrno, p, 1, 1, 1843200, "PCI2S550-1"); + break; + case (0x950A<<16)|0x1415: /* Oxford Semi OX16PCI954 */ + /* + * These are common devices used by 3rd-party + * manufacturers. + * Must check the subsystem VID and DID for correct + * match, mostly to get the clock frequency right. + */ + subid = pcicfgr16(p, PciSVID); + subid |= pcicfgr16(p, PciSID)<<16; + switch(subid){ + default: + continue; + case (0x2000<<16)|0x131F:/* SIIG CyberSerial PCIe */ + uart = uartpci(ctlrno, p, 0, 1, 18432000, "CyberSerial-1S"); + if(uart == nil) + continue; + break; + } + break; + case (0x9501<<16)|0x1415: /* Oxford Semi OX16PCI954 */ + /* + * These are common devices used by 3rd-party + * manufacturers. + * Should check the subsystem VID and DID for correct + * match, mostly to get the clock frequency right. + */ + subid = pcicfgr16(p, PciSVID); + subid |= pcicfgr16(p, PciSID)<<16; + switch(subid){ + default: + continue; + case (0<<16)|0x1415: /* StarTech PCI4S550 */ + uart = uartpci(ctlrno, p, 0, 1, 18432000, "PCI4S550-0"); + if(uart == nil) + continue; + break; + } + break; + case (0x9050<<16)|0x10B5: /* Perle PCI-Fast4 series */ + case (0x9030<<16)|0x10B5: /* Perle Ultraport series */ + /* + * These devices consists of a PLX bridge (the above + * PCI VID+DID) behind which are some 16C654 UARTs. + * Must check the subsystem VID and DID for correct + * match. + */ + subid = pcicfgr16(p, PciSVID); + subid |= pcicfgr16(p, PciSID)<<16; + switch(subid){ + default: + continue; + case (0x0011<<16)|0x12E0: /* Perle PCI-Fast16 */ + n = 16; + name = "PCI-Fast16"; + break; + case (0x0021<<16)|0x12E0: /* Perle PCI-Fast8 */ + n = 8; + name = "PCI-Fast8"; + break; + case (0x0031<<16)|0x12E0: /* Perle PCI-Fast4 */ + n = 4; + name = "PCI-Fast4"; + break; + case (0x0021<<16)|0x155F: /* Perle Ultraport8 */ + n = 8; + name = "Ultraport8"; /* 16C754 UARTs */ + break; + } + uart = uartpci(ctlrno, p, 2, n, 7372800, name); + if(uart == nil) + continue; + break; + } + + if(head != nil) + tail->next = uart; + else + head = uart; + for(tail = uart; tail->next != nil; tail = tail->next) + ; + ctlrno++; + } + + return head; +} + +PhysUart pciphysuart = { + .name = "UartPCI", + .pnp = uartpcipnp, + .enable = nil, + .disable = nil, + .kick = nil, + .dobreak = nil, + .baud = nil, + .bits = nil, + .stop = nil, + .parity = nil, + .modemctl = nil, + .rts = nil, + .dtr = nil, + .status = nil, + .fifo = nil, +}; diff -Nru 0/sys/src/nix/bench/1/kern 4/sys/src/nix/bench/1/kern --- 0/sys/src/nix/bench/1/kern Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/bench/1/kern Wed Feb 6 00:00:00 2013 @@ -0,0 +1,22 @@ +#!/bin/rc + +rfork ne + +# import rc functions popular among scripts, e.g. fail +# +. ../tools + +# we might have a source file here and do something like: +# to override the source used for this benchmark +bind proc.c ../../port/proc.c + +cp /cfg/pxe/003048ff2106 pxeorig +cp 003048ff2106 /cfg/pxe + +# we might change the std source like this: +# sed 's/initialTCs = [0-9]+/initialTCs = 16/' < ../../k10/main.c >main.c +# bind main.c ../../k10/main.c + +cd /sys/src/nix/k10 +mk clean +mk install diff -Nru 0/sys/src/nix/bench/1/output 4/sys/src/nix/bench/1/output --- 0/sys/src/nix/bench/1/output Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/bench/1/output Wed Feb 6 00:00:00 2013 @@ -0,0 +1,20 @@ +# sleep 2 +0.00u 0.02s 2.02r rc -c sleep 2 +0.00u 0.03s 2.02r rc -c sleep 2 +0.00u 0.02s 2.02r rc -c sleep 2 +0.00u 0.01s 2.03r rc -c sleep 2 +0.00u 0.03s 2.02r rc -c sleep 2 +0.00u 0.02s 2.02r rc -c sleep 2 +0.00u 0.02s 2.02r rc -c sleep 2 +0.00u 0.04s 2.02r rc -c sleep 2 +0.00u 0.02s 2.02r rc -c sleep 2 +0.00u 0.04s 2.03r rc -c sleep 2 +times 0 0.025 2.022 +#cat /dev/debug +steal 1 +donate 0 +locks 965080 +glare 6450 +inglare 8840 +qlock 73101 +qlockq 66 diff -Nru 0/sys/src/nix/bench/1/runbench 4/sys/src/nix/bench/1/runbench --- 0/sys/src/nix/bench/1/runbench Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/bench/1/runbench Wed Feb 6 00:00:00 2013 @@ -0,0 +1,26 @@ +#!/bin/rc + +# +# kernel as of /n/nixdump/2012/0119/sys/src/nix/bench +# Single sched, 32 TCs. +# Time to make a kernel +# + +rfork ne + +# restore the pxe file we saved +cp pxeorig /cfg/pxe/003048ff2106 + +# import rc functions popular among scripts, e.g. fail +# +. ../tools + +# How much time does it take to make a kernel +../Time 'cd /sys/src/nix/k10 ; mk clean ; mk' + + +# What's the value for measures taken from the kernel? +# echo '#cat /dev/debug' +# cat /dev/debug +# NB: this is an example. /dev/debug is reported already by Time + diff -Nru 0/sys/src/nix/bench/Benchs 4/sys/src/nix/bench/Benchs --- 0/sys/src/nix/bench/Benchs Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/bench/Benchs Wed Feb 6 00:00:00 2013 @@ -0,0 +1,15 @@ +#!/bin/rc + +# change the boot sequence to run benchs. + +# remove output from all benchs: +# rm -f [0-9]*/^(koutput output FAIL KMESG) k[0-9]*/^(koutput output FAIL KMESG) + +# remove output from some benchs, to rerun them: +# for(t in 93 94 95) rm -f $t/^(koutput output FAIL KMESG) + + +# arrange for them to run after rebooting +cp runbenchs /cfg/$sysname/runbenchs +runbenchs + diff -Nru 0/sys/src/nix/bench/Locks 4/sys/src/nix/bench/Locks --- 0/sys/src/nix/bench/Locks Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/bench/Locks Wed Feb 6 00:00:00 2013 @@ -0,0 +1,29 @@ +#!/bin/rc +rfork e +cmd=$1 +echo '#' $1 +echo measuring $1... >'#c/cons' +rm -f /tmp/bench.time +rc -c $cmd >'#c/cons' # warm cache and be able to see the output of the cmd... +# +# collect stats. A single run this time. +# +echo >/dev/sysstat +bind -a '#W' /dev +echo start >/dev/wsctl +time rc -c $cmd >/dev/null >>[2]/tmp/bench.time +echo stop >/dev/wsctl +cp /dev/wsdata /tmp/wsdata +cp /dev/sysstat /tmp/sysstat +tail -1 /tmp/bench.time +tail -1 /tmp/bench.time >'#c/cons' +sed 's/[sur]//g' $d/003048ff2106}} + +Then run ./Benchs + +for(x in `{seq 30}){ d=`{echo $x + 67|hoc} ; nc=`{echo $x + 2 | hoc}; echo dir $d echo $nc cores ; ; sed 's/ck 2$/ck '^$nc^'/' < 67/003048ff2106 >$d/003048ff2106} + +Beware that many benchs are made assuming the kernel is implemented in a +certain way, at least, those depending on a particular kernel. +That means that, for example, if you clear benchs 1-99, you might have to +rely on /n/nixdump/2012/0123/sys/src/nix sources; otherwise the kernel might +not compile, or you might be measuring something else. + +In short: feel free to clear only the benchmarks you are responsible for. +You should know what you are measuring in any case. diff -Nru 0/sys/src/nix/bench/Time 4/sys/src/nix/bench/Time --- 0/sys/src/nix/bench/Time Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/bench/Time Wed Feb 6 00:00:00 2013 @@ -0,0 +1,23 @@ +#!/bin/rc +rfork e +cmd=$1 +echo '#' $1 +echo measuring $1... >'#c/cons' +rm -f /tmp/bench.time +rc -c $cmd >'#c/cons' # warm cache and be able to see the output of the cmd... +echo >/dev/sysstat +for(i in `{seq 10}){ + time rc -c $cmd >/dev/null >>[2]/tmp/bench.time + tail -1 /tmp/bench.time + tail -1 /tmp/bench.time >'#c/cons' +} +cp /dev/sysstat /tmp/sysstat +sed 's/[sur]//g' koutput >[2=1]){ + touch FAIL + fail bench $t failed + } + reboot + } + if(test -x kern) + cp /dev/kmesg KMESG + if(! runbench>output >[2=1]){ + touch FAIL + fail bench $t failed + } + echo bench $t ok + } + } + cd .. +} +if(! ~ $#failed 0) + echo benchs $failed failed +if not + echo all benchs done + +rm /cfg/$sysname/runbenchs diff -Nru 0/sys/src/nix/bench/tools 4/sys/src/nix/bench/tools --- 0/sys/src/nix/bench/tools Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/bench/tools Wed Feb 6 00:00:00 2013 @@ -0,0 +1,37 @@ +fn fail { + echo $* >[1=2] + exit fail +} + +fn log { + echo $* + echo $* >/dev/cons +} + + +fn repeatforallnumcores { + test -e kern || fail no kernel bench + test -e 003048ff2106 || fail no pxe file + test -e koutput || fail no koutput + test -e output || fail no output + + NC=`{grep '^bootfile' 003048ff2106 | awk '{print $NF}'} + if(~ $NC 32){ + cp output output.32 + cp KMESG KMESG.32 + exit '' + } + @{ + NNC=`{echo $NC + 1|hoc} + mv 003048ff2106 003048ff2106_ + sed 's/-ck .*/-ck '^$NNC^' '^$NNC^'/' < 003048ff2106_ >003048ff2106 + + mv output output.$NC + mv KMESG KMESG.$NC + cp 003048ff2106 /cfg/pxe +# mv koutput koutput.$NC + echo reboot to run `{pwd} with $NNC cores... + reboot + @} >'#c/cons' >[2]'#c/cons' + status='' +} diff -Nru 0/sys/src/nix/boot/aux.c 4/sys/src/nix/boot/aux.c --- 0/sys/src/nix/boot/aux.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/boot/aux.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,184 @@ +#include +#include +#include <../boot/boot.h> + +/* +int +plumb(char *dir, char *dest, int *efd, char *here) +{ + char buf[128]; + char name[128]; + int n; + + sprint(name, "%s/clone", dir); + efd[0] = open(name, ORDWR); + if(efd[0] < 0) + return -1; + n = read(efd[0], buf, sizeof(buf)-1); + if(n < 0){ + close(efd[0]); + return -1; + } + buf[n] = 0; + sprint(name, "%s/%s/data", dir, buf); + if(here){ + sprint(buf, "announce %s", here); + if(sendmsg(efd[0], buf) < 0){ + close(efd[0]); + return -1; + } + } + sprint(buf, "connect %s", dest); + if(sendmsg(efd[0], buf) < 0){ + close(efd[0]); + return -1; + } + efd[1] = open(name, ORDWR); + if(efd[1] < 0){ + close(efd[0]); + return -1; + } + return efd[1]; +} + */ + +int +sendmsg(int fd, char *msg) +{ + int n; + + n = strlen(msg); + if(write(fd, msg, n) != n) + return -1; + return 0; +} + +void +warning(char *s) +{ + char buf[ERRMAX]; + + buf[0] = '\0'; + errstr(buf, sizeof buf); + fprint(2, "boot: %s: %s\n", s, buf); +} + +void +fatal(char *s) +{ + char buf[ERRMAX]; + + buf[0] = '\0'; + errstr(buf, sizeof buf); + fprint(2, "boot: %s: %s\n", s, buf); + exits(0); +} + +int +readfile(char *name, char *buf, int len) +{ + int f, n; + + buf[0] = 0; + f = open(name, OREAD); + if(f < 0) + return -1; + n = read(f, buf, len-1); + if(n >= 0) + buf[n] = 0; + close(f); + return 0; +} + +int +writefile(char *name, char *buf, int len) +{ + int f, n; + + f = open(name, OWRITE); + if(f < 0) + return -1; + n = write(f, buf, len); + close(f); + return (n != len) ? -1 : 0; +} + +void +setenv(char *name, char *val) +{ + int f; + char ename[64]; + + snprint(ename, sizeof ename, "#e/%s", name); + f = create(ename, 1, 0666); + if(f < 0){ + fprint(2, "create %s: %r\n", ename); + return; + } + write(f, val, strlen(val)); + close(f); +} + +void +srvcreate(char *name, int fd) +{ + char *srvname; + int f; + char buf[64]; + + srvname = strrchr(name, '/'); + if(srvname) + srvname++; + else + srvname = name; + + snprint(buf, sizeof buf, "#s/%s", srvname); + f = create(buf, 1, 0666); + if(f < 0) + fatal(buf); + sprint(buf, "%d", fd); + if(write(f, buf, strlen(buf)) != strlen(buf)) + fatal("write"); + close(f); +} + +void +catchint(void *a, char *note) +{ + USED(a); + if(strcmp(note, "alarm") == 0) + noted(NCONT); + noted(NDFLT); +} + +int +outin(char *prompt, char *def, int len) +{ + int n; + char buf[256]; + + if(len >= sizeof buf) + len = sizeof(buf)-1; + + if(cpuflag){ + notify(catchint); + alarm(15*1000); + } + print("%s[%s]: ", prompt, *def ? def : "no default"); + memset(buf, 0, sizeof buf); + n = read(0, buf, len); + if(cpuflag){ + alarm(0); + notify(0); + } + + if(n < 0){ + print("\n"); + return 1; + } + if(n > 1){ + buf[n-1] = 0; + strcpy(def, buf); + } + return n; +} diff -Nru 0/sys/src/nix/boot/boot.c 4/sys/src/nix/boot/boot.c --- 0/sys/src/nix/boot/boot.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/boot/boot.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,326 @@ +#include +#include +#include +#include +#include "../boot/boot.h" + +char cputype[64]; +char sys[2*64]; +char reply[256]; +int printcol; +int mflag; +int fflag; +int kflag; + +char *bargv[Nbarg]; +int bargc; + +static void swapproc(void); +static Method *rootserver(char*); +static void usbinit(void); +static void kbmap(void); + +void +boot(int argc, char *argv[]) +{ + int fd, afd; + Method *mp; + char *cmd, cmdbuf[64], *iargv[16]; + char rootbuf[64]; + int islocal, ishybrid; + char *rp, *rsp, *rdparts; + int iargc, n; + char buf[32]; + AuthInfo *ai; + + fmtinstall('r', errfmt); + + bind("#c", "/dev", MBEFORE); + open("/dev/cons", OREAD); + open("/dev/cons", OWRITE); + open("/dev/cons", OWRITE); + /* + * init will reinitialize its namespace. + * #ec gets us plan9.ini settings (*var variables). + */ + bind("#ec", "/env", MREPL); + bind("#e", "/env", MBEFORE|MCREATE); + bind("#s", "/srv", MREPL|MCREATE); + if(0){ + print("argc=%d\n", argc); + for(fd = 0; fd < argc; fd++) + print("%#p %s ", argv[fd], argv[fd]); + print("\n"); + } + + ARGBEGIN{ + case 'k': + kflag = 1; + break; + case 'm': + mflag = 1; + break; + case 'f': + fflag = 1; + break; + }ARGEND + + readfile("#e/cputype", cputype, sizeof(cputype)); + + /* + * set up usb keyboard, mouse and disk, if any. + */ + usbinit(); + + /* + * pick a method and initialize it + */ + if(method[0].name == nil) + fatal("no boot methods"); + mp = rootserver(argc ? *argv : 0); + (*mp->config)(mp); + islocal = strcmp(mp->name, "local") == 0; + ishybrid = strcmp(mp->name, "hybrid") == 0; + + /* + * load keymap if it is there. + */ + kbmap(); + bind("#æ", "/dev", MAFTER); /* nvram could be here */ + bind("#S", "/dev", MAFTER); /* nvram could be here */ + + rdparts = getenv("rdparts"); + if(rdparts) + readparts(); + free(rdparts); + + + /* + * authentication agent + */ + authentication(cpuflag); + +print("connect..."); + /* + * connect to the root file system + */ + fd = (*mp->connect)(); + if(fd < 0) + fatal("can't connect to file server"); + if(!islocal && !ishybrid){ + if(cfs) + fd = (*cfs)(fd); + } +print("\n"); + print("version..."); + buf[0] = '\0'; + n = fversion(fd, 0, buf, sizeof buf); + if(n < 0) + fatal("can't init 9P"); + srvcreate("boot", fd); + + /* + * create the name space, mount the root fs + */ + if(bind("/", "/", MREPL) < 0) + fatal("bind /"); + rp = getenv("rootspec"); + if(rp == nil) + rp = ""; + + afd = fauth(fd, rp); + if(afd >= 0){ + ai = auth_proxy(afd, auth_getkey, "proto=p9any role=client"); + if(ai == nil) + print("authentication failed (%r), trying mount anyways\n"); + } + if(mount(fd, afd, "/root", MREPL|MCREATE, rp) < 0) + fatal("mount /"); + rsp = rp; + rp = getenv("rootdir"); + if(rp == nil) + rp = rootdir; + if(bind(rp, "/", MAFTER|MCREATE) < 0){ + if(strncmp(rp, "/root", 5) == 0){ + fprint(2, "boot: couldn't bind $rootdir=%s to root: %r\n", rp); + fatal("second bind /"); + } + snprint(rootbuf, sizeof rootbuf, "/root/%s", rp); + rp = rootbuf; + if(bind(rp, "/", MAFTER|MCREATE) < 0){ + fprint(2, "boot: couldn't bind $rootdir=%s to root: %r\n", rp); + if(strcmp(rootbuf, "/root//plan9") == 0){ + fprint(2, "**** warning: remove rootdir=/plan9 entry from plan9.ini\n"); + rp = "/root"; + if(bind(rp, "/", MAFTER|MCREATE) < 0) + fatal("second bind /"); + }else + fatal("second bind /"); + } + } + close(fd); + setenv("rootdir", rp); + + settime(islocal, afd, rsp); + if(afd > 0) + close(afd); + + cmd = getenv("init"); + if(cmd == nil){ + sprint(cmdbuf, "/%s/init -%s%s", cputype, + cpuflag ? "c" : "t", mflag ? "m" : ""); + cmd = cmdbuf; + } + iargc = tokenize(cmd, iargv, nelem(iargv)-1); + cmd = iargv[0]; + + /* make iargv[0] basename(iargv[0]) */ + if(iargv[0] = strrchr(iargv[0], '/')) + iargv[0]++; + else + iargv[0] = cmd; + + iargv[iargc] = nil; + + exec(cmd, iargv); + fatal(cmd); +} + +static Method* +findmethod(char *a) +{ + Method *mp; + int i, j; + char *cp; + + if((i = strlen(a)) == 0) + return nil; + cp = strchr(a, '!'); + if(cp) + i = cp - a; + for(mp = method; mp->name; mp++){ + j = strlen(mp->name); + if(j > i) + j = i; + if(strncmp(a, mp->name, j) == 0) + break; + } + if(mp->name) + return mp; + return nil; +} + +/* + * ask user from whence cometh the root file system + */ +static Method* +rootserver(char *arg) +{ + char prompt[256]; + Method *mp; + char *cp; + int n; + + /* look for required reply */ + readfile("#e/nobootprompt", reply, sizeof(reply)); + if(reply[0]){ + mp = findmethod(reply); + if(mp) + goto HaveMethod; + print("boot method %s not found\n", reply); + reply[0] = 0; + } + + /* make list of methods */ + mp = method; + n = sprint(prompt, "root is from (%s", mp->name); + for(mp++; mp->name; mp++) + n += sprint(prompt+n, ", %s", mp->name); + sprint(prompt+n, ")"); + + /* create default reply */ + readfile("#e/bootargs", reply, sizeof(reply)); + if(reply[0] == 0 && arg != 0) + strcpy(reply, arg); + if(reply[0]){ + mp = findmethod(reply); + if(mp == 0) + reply[0] = 0; + } + if(reply[0] == 0) + strcpy(reply, method->name); + + /* parse replies */ + do{ + outin(prompt, reply, sizeof(reply)); + mp = findmethod(reply); + }while(mp == nil); + +HaveMethod: + bargc = tokenize(reply, bargv, Nbarg-2); + bargv[bargc] = nil; + cp = strchr(reply, '!'); + if(cp) + strcpy(sys, cp+1); + return mp; +} + +static void +swapproc(void) +{ + int fd; + + fd = open("#c/swap", OWRITE); + if(fd < 0){ + warning("opening #c/swap"); + return; + } + if(write(fd, "start", 5) <= 0) + warning("starting swap kproc"); + close(fd); +} + +static void +usbinit(void) +{ + static char usbd[] = "/boot/usbd"; + + if(access("#u/usb/ctl", 0) >= 0 && bind("#u", "/dev", MAFTER) >= 0 && + access(usbd, AEXIST) >= 0) + run(usbd, nil); +} + +static void +kbmap(void) +{ + char *f; + int n, in, out; + char buf[1024]; + + f = getenv("kbmap"); + if(f == nil) + return; + if(bind("#κ", "/dev", MAFTER) < 0){ + warning("can't bind #κ"); + return; + } + + in = open(f, OREAD); + if(in < 0){ + warning("can't open kbd map"); + return; + } + out = open("/dev/kbmap", OWRITE); + if(out < 0) { + warning("can't open /dev/kbmap"); + close(in); + return; + } + while((n = read(in, buf, sizeof(buf))) > 0) + if(write(out, buf, n) != n){ + warning("write to /dev/kbmap failed"); + break; + } + close(in); + close(out); +} diff -Nru 0/sys/src/nix/boot/boot.h 4/sys/src/nix/boot/boot.h --- 0/sys/src/nix/boot/boot.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/boot/boot.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,73 @@ +typedef struct Method Method; +struct Method +{ + char *name; + void (*config)(Method*); + int (*connect)(void); + char *arg; +}; +enum +{ + Statsz= 256, + Nbarg= 16, +}; + +extern void authentication(int); +extern char* bootdisk; +extern char* rootdir; +extern int (*cfs)(int); +extern int cpuflag; +extern char cputype[]; +extern int fflag; +extern int kflag; +extern Method method[]; +extern void (*pword)(int, Method*); +extern char sys[]; +extern uchar hostkey[]; +extern uchar statbuf[Statsz]; +extern int bargc; +extern char *bargv[Nbarg]; + +extern int readparts(void); + +/* libc equivalent */ +extern int cache(int); +extern char* checkkey(Method*, char*, char*); +extern void fatal(char*); +extern void getpasswd(char*, int); +extern void key(int, Method*); +extern int outin(char*, char*, int); +extern int plumb(char*, char*, int*, char*); +extern int readfile(char*, char*, int); +extern long readn(int, void*, long); +extern void run(char *file, ...); +extern int sendmsg(int, char*); +extern void setenv(char*, char*); +extern void settime(int, int, char*); +extern void srvcreate(char*, int); +extern void warning(char*); +extern int writefile(char*, char*, int); +extern void boot(int, char **); +extern void doauthenticate(int, Method*); +extern int parsefields(char*, char**, int, char*); + +/* methods */ +extern void configil(Method*); +extern int connectil(void); + +extern void configtcp(Method*); +extern int connecttcp(void); + +extern void configlocal(Method*); +extern int connectlocal(void); + +extern void configpaq(Method*); +extern int connectpaq(void); + +extern void configembed(Method*); +extern int connectembed(void); + +extern void configip(int, char**, int); + +/* hack for passing authentication address */ +extern char *authaddr; diff -Nru 0/sys/src/nix/boot/bootauth.c 4/sys/src/nix/boot/bootauth.c --- 0/sys/src/nix/boot/bootauth.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/boot/bootauth.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,73 @@ +#include +#include +#include +#include +#include "../boot/boot.h" + +char *authaddr; +static void glenda(void); + +void +authentication(int cpuflag) +{ + char *argv[16], **av; + int ac; + + if(access("/boot/factotum", AEXEC) < 0){ + glenda(); + return; + } + + /* start agent */ + ac = 0; + av = argv; + av[ac++] = "factotum"; + if(getenv("debugfactotum")) + av[ac++] = "-p"; +// av[ac++] = "-d"; /* debug traces */ +// av[ac++] = "-D"; /* 9p messages */ + if(cpuflag) + av[ac++] = "-S"; + else + av[ac++] = "-u"; + av[ac++] = "-sfactotum"; + if(authaddr != nil){ + av[ac++] = "-a"; + av[ac++] = authaddr; + } + av[ac] = 0; + switch(fork()){ + case -1: + fatal("starting factotum"); + case 0: + exec("/boot/factotum", av); + fatal("execing /boot/factotum"); + default: + break; + } + + /* wait for agent to really be there */ + while(access("/mnt/factotum", 0) < 0) + sleep(250); + + if(cpuflag) + return; +} + +static void +glenda(void) +{ + int fd; + char *s; + + s = getenv("user"); + if(s == nil) + s = "glenda"; + + fd = open("#c/hostowner", OWRITE); + if(fd >= 0){ + if(write(fd, s, strlen(s)) != strlen(s)) + fprint(2, "setting #c/hostowner to %s: %r\n", s); + close(fd); + } +} diff -Nru 0/sys/src/nix/boot/bootcache.c 4/sys/src/nix/boot/bootcache.c --- 0/sys/src/nix/boot/bootcache.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/boot/bootcache.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,80 @@ +#include +#include +#include <../boot/boot.h> + +uchar statbuf[Statsz]; + +int +cache(int fd) +{ + int argc, i, p[2]; + char *argv[5], bd[32], buf[256], partition[64], *pp; + + if(stat("/boot/cfs", statbuf, sizeof statbuf) < 0) + return fd; + + *partition = 0; + + bind("#S", "/dev", MAFTER); + readfile("#e/cfs", buf, sizeof(buf)); + if(*buf){ + argc = tokenize(buf, argv, 4); + for(i = 0; i < argc; i++){ + if(strcmp(argv[i], "off") == 0) + return fd; + else if(stat(argv[i], statbuf, sizeof statbuf) >= 0){ + strncpy(partition, argv[i], sizeof(partition)-1); + partition[sizeof(partition)-1] = 0; + } + } + } + + if(*partition == 0){ + readfile("#e/bootdisk", bd, sizeof(bd)); + if(*bd){ + if(pp = strchr(bd, ':')) + *pp = 0; + /* damned artificial intelligence */ + i = strlen(bd); + if(strcmp("disk", &bd[i-4]) == 0) + bd[i-4] = 0; + else if(strcmp("fs", &bd[i-2]) == 0) + bd[i-2] = 0; + else if(strcmp("fossil", &bd[i-6]) == 0) + bd[i-6] = 0; + sprint(partition, "%scache", bd); + if(stat(partition, statbuf, sizeof statbuf) < 0) + *bd = 0; + } + if(*bd == 0){ + sprint(partition, "%scache", bootdisk); + if(stat(partition, statbuf, sizeof statbuf) < 0) + return fd; + } + } + + print("cfs..."); + if(pipe(p)<0) + fatal("pipe"); + switch(fork()){ + case -1: + fatal("fork"); + case 0: + close(p[1]); + dup(fd, 0); + close(fd); + dup(p[0], 1); + close(p[0]); + if(fflag) + execl("/boot/cfs", "bootcfs", "-rs", "-f", partition, 0); + else + execl("/boot/cfs", "bootcfs", "-s", "-f", partition, 0); + break; + default: + close(p[0]); + close(fd); + fd = p[1]; + break; + } + return fd; +} diff -Nru 0/sys/src/nix/boot/bootip.c 4/sys/src/nix/boot/bootip.c --- 0/sys/src/nix/boot/bootip.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/boot/bootip.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,220 @@ +#include +#include +#include + +#include "boot.h" + +static uchar fsip[IPaddrlen]; + uchar auip[IPaddrlen]; +static char mpoint[32]; + +static int isvalidip(uchar*); +static void netndb(char*, uchar*); +static void netenv(char*, uchar*); + + +void +configip(int bargc, char **bargv, int needfs) +{ + Waitmsg *w; + int argc, pid; + char **arg, **argv, buf[32], *p; + + fmtinstall('I', eipfmt); + fmtinstall('M', eipfmt); + fmtinstall('E', eipfmt); + + arg = malloc((bargc+1) * sizeof(char*)); + if(arg == nil) + fatal("malloc"); + memmove(arg, bargv, bargc * sizeof(char*)); + arg[bargc] = 0; + +print("ipconfig..."); + argc = bargc; + argv = arg; + strcpy(mpoint, "/net"); + ARGBEGIN { + case 'x': + p = ARGF(); + if(p != nil) + snprint(mpoint, sizeof(mpoint), "/net%s", p); + break; + case 'g': + case 'b': + case 'h': + case 'm': + p = ARGF(); + USED(p); + break; + } ARGEND; + + /* bind in an ip interface */ + if(bind("#I", mpoint, MAFTER) < 0) + fatal("bind #I\n"); + if(access("#l0", 0) == 0 && bind("#l0", mpoint, MAFTER) < 0) + print("bind #l0: %r\n"); + if(access("#l1", 0) == 0 && bind("#l1", mpoint, MAFTER) < 0) + print("bind #l1: %r\n"); + if(access("#l2", 0) == 0 && bind("#l2", mpoint, MAFTER) < 0) + print("bind #l2: %r\n"); + if(access("#l3", 0) == 0 && bind("#l3", mpoint, MAFTER) < 0) + print("bind #l3: %r\n"); + werrstr(""); + + /* let ipconfig configure the ip interface */ + switch(pid = fork()){ + case -1: + fatal("fork configuring ip"); + case 0: + exec("/boot/ipconfig", arg); + fatal("execing /ipconfig"); + default: + break; + } + + /* wait for ipconfig to finish */ + for(;;){ + w = wait(); + if(w != nil && w->pid == pid){ + if(w->msg[0] != 0) + fatal(w->msg); + free(w); + break; + } else if(w == nil) + fatal("configuring ip"); + free(w); + } + + if(!needfs) + return; + + /* if we didn't get a file and auth server, query user */ + netndb("fs", fsip); + if(!isvalidip(fsip)) + netenv("fs", fsip); + while(!isvalidip(fsip)){ + buf[0] = 0; + outin("filesystem IP address", buf, sizeof(buf)); + if (parseip(fsip, buf) == -1) + fprint(2, "configip: can't parse fs ip %s\n", buf); + } + + netndb("auth", auip); + if(!isvalidip(auip)) + netenv("auth", auip); + while(!isvalidip(auip)){ + buf[0] = 0; + outin("authentication server IP address", buf, sizeof(buf)); + if (parseip(auip, buf) == -1) + fprint(2, "configip: can't parse auth ip %s\n", buf); + } +} + +static void +setauthaddr(char *proto, int port) +{ + char buf[128]; + + snprint(buf, sizeof buf, "%s!%I!%d", proto, auip, port); + authaddr = strdup(buf); +} + +void +configtcp(Method*) +{ + configip(bargc, bargv, 1); + setauthaddr("tcp", 567); +} + +int +connecttcp(void) +{ + int fd; + char buf[64]; + + snprint(buf, sizeof buf, "tcp!%I!564", fsip); + fd = dial(buf, 0, 0, 0); + if (fd < 0) + werrstr("dial %s: %r", buf); + return fd; +} + +void +configil(Method*) +{ + configip(bargc, bargv, 1); + setauthaddr("tcp", 567); +} + +int +connectil(void) +{ + char buf[64]; + + snprint(buf, sizeof buf, "il!%I!17008", fsip); + return dial(buf, 0, 0, 0); +} + +static int +isvalidip(uchar *ip) +{ + if(ipcmp(ip, IPnoaddr) == 0) + return 0; + if(ipcmp(ip, v4prefix) == 0) + return 0; + return 1; +} + +static void +netenv(char *attr, uchar *ip) +{ + int fd, n; + char buf[128]; + + ipmove(ip, IPnoaddr); + snprint(buf, sizeof(buf), "#e/%s", attr); + fd = open(buf, OREAD); + if(fd < 0) + return; + + n = read(fd, buf, sizeof(buf)-1); + if(n <= 0) + return; + buf[n] = 0; + if (parseip(ip, buf) == -1) + fprint(2, "netenv: can't parse ip %s\n", buf); +} + +static void +netndb(char *attr, uchar *ip) +{ + int fd, n, c; + char buf[1024]; + char *p; + + ipmove(ip, IPnoaddr); + snprint(buf, sizeof(buf), "%s/ndb", mpoint); + fd = open(buf, OREAD); + if(fd < 0) + return; + n = read(fd, buf, sizeof(buf)-1); + close(fd); + if(n <= 0) + return; + buf[n] = 0; + n = strlen(attr); + for(p = buf; ; p++){ + p = strstr(p, attr); + if(p == nil) + break; + c = *(p-1); + if(*(p + n) == '=' && (p == buf || c == '\n' || c == ' ' || c == '\t')){ + p += n+1; + if (parseip(ip, p) == -1) + fprint(2, "netndb: can't parse ip %s\n", p); + return; + } + } + return; +} diff -Nru 0/sys/src/nix/boot/doauthenticate.c 4/sys/src/nix/boot/doauthenticate.c --- 0/sys/src/nix/boot/doauthenticate.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/boot/doauthenticate.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,126 @@ +#include +#include +#include +#include "../boot/boot.h" + +static char *pbmsg = "AS protocol botch"; +static char *ccmsg = "can't connect to AS"; + +long +readn(int fd, void *buf, long len) +{ + int m, n; + char *p; + + p = buf; + for(n = 0; n < len; n += m){ + m = read(fd, p+n, len-n); + if(m <= 0) + return -1; + } + return n; +} + +static char* +fromauth(Method *mp, char *trbuf, char *tbuf) +{ + int afd; + char t; + char *msg; + static char error[2*ERRMAX]; + + if(mp->auth == 0) + fatal("no method for accessing auth server"); + afd = (*mp->auth)(); + if(afd < 0) { + sprint(error, "%s: %r", ccmsg); + return error; + } + + if(write(afd, trbuf, TICKREQLEN) < 0 || read(afd, &t, 1) != 1){ + close(afd); + sprint(error, "%s: %r", pbmsg); + return error; + } + switch(t){ + case AuthOK: + msg = 0; + if(readn(afd, tbuf, 2*TICKETLEN) < 0) { + sprint(error, "%s: %r", pbmsg); + msg = error; + } + break; + case AuthErr: + if(readn(afd, error, ERRMAX) < 0) { + sprint(error, "%s: %r", pbmsg); + msg = error; + } + else { + error[ERRMAX-1] = 0; + msg = error; + } + break; + default: + msg = pbmsg; + break; + } + + close(afd); + return msg; +} + +void +doauthenticate(int fd, Method *mp) +{ + char *msg; + char trbuf[TICKREQLEN]; + char tbuf[2*TICKETLEN]; + + print("session..."); + if(fsession(fd, trbuf, sizeof trbuf) < 0) + fatal("session command failed"); + + /* no authentication required? */ + memset(tbuf, 0, 2*TICKETLEN); + if(trbuf[0] == 0) + return; + + /* try getting to an auth server */ + print("getting ticket..."); + msg = fromauth(mp, trbuf, tbuf); + print("authenticating..."); + if(msg == 0) + if(fauth(fd, tbuf) >= 0) + return; + + /* didn't work, go for the security hole */ + fprint(2, "no authentication server (%s), using your key as server key\n", msg); +} + +char* +checkkey(Method *mp, char *name, char *key) +{ + char *msg; + Ticketreq tr; + Ticket t; + char trbuf[TICKREQLEN]; + char tbuf[TICKETLEN]; + + memset(&tr, 0, sizeof tr); + tr.type = AuthTreq; + strcpy(tr.authid, name); + strcpy(tr.hostid, name); + strcpy(tr.uid, name); + convTR2M(&tr, trbuf); + msg = fromauth(mp, trbuf, tbuf); + if(msg == ccmsg){ + fprint(2, "boot: can't contact auth server, passwd unchecked\n"); + return 0; + } + if(msg) + return msg; + convM2T(tbuf, &t, key); + if(t.num == AuthTc && strcmp(name, t.cuid)==0) + return 0; + return "no match"; +} diff -Nru 0/sys/src/nix/boot/embed.c 4/sys/src/nix/boot/embed.c --- 0/sys/src/nix/boot/embed.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/boot/embed.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,74 @@ +#include +#include +#include <../boot/boot.h> + +static char *paqfile; + +void +configembed(Method *m) +{ + if(*sys == '/' || *sys == '#'){ + /* + * if the user specifies the disk in the boot cmd or + * 'root is from' prompt, use it + */ + paqfile = sys; + } else if(m->arg){ + /* + * a default is supplied when the kernel is made + */ + paqfile = m->arg; + } +} + +int +connectembed(void) +{ + int i, p[2]; + Dir *dir; + char **arg, **argp; + + dir = dirstat("/boot/paqfs"); + if(dir == nil) + return -1; + free(dir); + + dir = dirstat(paqfile); + if(dir == nil || dir->mode & DMDIR) + return -1; + free(dir); + + print("paqfs..."); + if(bind("#c", "/dev", MREPL) < 0) + fatal("bind #c"); + if(bind("#p", "/proc", MREPL) < 0) + fatal("bind #p"); + if(pipe(p)<0) + fatal("pipe"); + switch(fork()){ + case -1: + fatal("fork"); + case 0: + arg = malloc((bargc+5)*sizeof(char*)); + argp = arg; + *argp++ = "/boot/paqfs"; + *argp++ = "-iv"; + *argp++ = paqfile; + for(i=1; i +#include +#include <../boot/boot.h> + +void +getpasswd(char *p, int len) +{ + char c; + int i, n, fd; + + fd = open("#c/consctl", OWRITE); + if(fd < 0) + fatal("can't open consctl; please reboot"); + write(fd, "rawon", 5); + Prompt: + print("password: "); + n = 0; + for(;;){ + do{ + i = read(0, &c, 1); + if(i < 0) + fatal("can't read cons; please reboot"); + }while(i == 0); + switch(c){ + case '\n': + p[n] = '\0'; + close(fd); + print("\n"); + return; + case '\b': + if(n > 0) + n--; + break; + case 'u' - 'a' + 1: /* cntrl-u */ + print("\n"); + goto Prompt; + default: + if(n < len - 1) + p[n++] = c; + break; + } + } +} diff -Nru 0/sys/src/nix/boot/local.c 4/sys/src/nix/boot/local.c --- 0/sys/src/nix/boot/local.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/boot/local.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,275 @@ +#include +#include +#include <../boot/boot.h> + +static char diskname[64]; +static char *disk; +static char **args; + +void +configlocal(Method *mp) +{ + char *p; + int n; + + if(*sys == '/' || *sys == '#'){ + /* + * if the user specifies the disk in the boot cmd or + * 'root is from' prompt, use it + */ + disk = sys; + } else if(strncmp(argv0, "dksc(0,", 7) == 0){ + /* + * on many mips arg0 of the boot command specifies the + * scsi logical unit number + */ + p = strchr(argv0, ','); + n = strtoul(p+1, 0, 10); + sprint(diskname, "#w%d/sd%dfs", n, n); + disk = diskname; + } else if(mp->arg){ + /* + * a default is supplied when the kernel is made + */ + disk = mp->arg; + } else if(*bootdisk){ + /* + * an environment variable from a pc's plan9.ini or + * from the mips nvram or generated by the kernel + * is the last resort. + */ + disk = bootdisk; + } + + /* if we've decided on one, pass it on to all programs */ + if(disk) + setenv("bootdisk", disk); + + USED(mp); +} + +int +connectlocalkfs(void) +{ + int i, pid, fd, p[2]; + char partition[64]; + char *dev; + char **arg, **argp; + Dir *d; + + if(stat("/boot/kfs", statbuf, sizeof statbuf) < 0) + return -1; + + dev = disk ? disk : bootdisk; + snprint(partition, sizeof partition, "%sfs", dev); + fd = open(partition, OREAD); + if(fd < 0){ + strcpy(partition, dev); + fd = open(partition, OREAD); + if(fd < 0) + return -1; + } + /* + * can't do this check -- might be some other server posing as kfs. + * + memset(buf, 0, sizeof buf); + pread(fd, buf, 512, 0); + close(fd); + if(memcmp(buf+256, "kfs wren device\n", 16) != 0){ + if(strstr(partition, "/fs")) + print("no kfs file system found on %s\n", partition); + return -1; + } + * + */ + d = dirfstat(fd); + close(fd); + if(d == nil) + return -1; + if(d->mode&DMDIR){ + free(d); + return -1; + } + free(d); + + print("kfs..."); + if(pipe(p)<0) + fatal("pipe"); + switch(pid = fork()){ + case -1: + fatal("fork"); + case 0: + arg = malloc((bargc+5)*sizeof(char*)); + argp = arg; + *argp++ = "kfs"; + *argp++ = "-f"; + *argp++ = partition; + *argp++ = "-s"; + for(i=1; i= 0){ + print("venti..."); + memset(buf, 0, sizeof buf); + pread(fd, buf, 512, 248*1024); + close(fd); + if(memcmp(buf, "venti config\n", 13) != 0){ + print("no venti config found on %s\n", f[0]); + return -1; + } + if(stat("/boot/venti", statbuf, sizeof statbuf) < 0){ + print("/boot/venti does not exist\n"); + return -1; + } + switch(nf){ + case 1: + f[1] = "tcp!127.1!17034"; + case 2: + f[2] = "tcp!127.1!8000"; + } + configloopback(); + run("/boot/venti", "-c", f[0], "-a", f[1], "-h", f[2], 0); + /* + * If the announce address is tcp!*!foo, then set + * $venti to tcp!127.1!foo instead, which is actually dialable. + */ + if((p = strstr(f[1], "!*!")) != 0){ + *p = 0; + snprint(buf, sizeof buf, "%s!127.1!%s", f[1], p+3); + f[1] = buf; + } + setenv("venti", f[1]); + }else{ + /* set up the network so we can talk to the venti server */ + /* this is such a crock. */ + configip(nf, f, 0); + setenv("venti", f[0]); + } + } + + /* start fossil */ + print("fossil(%s)...", partition); + run("/boot/fossil", "-f", partition, "-c", "srv -A fboot", "-c", "srv -p fscons", 0); + fd = open("#s/fboot", ORDWR); + if(fd < 0){ + print("open #s/fboot: %r\n"); + return -1; + } + remove("#s/fboot"); /* we'll repost as #s/boot */ + return fd; +} + +int +connectlocal(void) +{ + int fd; + + if(bind("#c", "/dev", MREPL) < 0) + fatal("bind #c"); + if(bind("#p", "/proc", MREPL) < 0) + fatal("bind #p"); + bind("#S", "/dev", MAFTER); + bind("#k", "/dev", MAFTER); + bind("#æ", "/dev", MAFTER); + + if((fd = connectlocalfossil()) < 0) + if((fd = connectlocalkfs()) < 0) + return -1; + return fd; +} diff -Nru 0/sys/src/nix/boot/nopsession.c 4/sys/src/nix/boot/nopsession.c --- 0/sys/src/nix/boot/nopsession.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/boot/nopsession.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,52 @@ +#include +#include +#include +#include +#include "../boot/boot.h" + +static Fcall hdr; + +static void +rpc(int fd, int type) +{ + int n, l; + char buf[128], *p; + + hdr.type = type; + hdr.tag = NOTAG; + n = convS2M(&hdr, buf); + if(write(fd, buf, n) != n) + fatal("write rpc"); + + print("..."); + p = buf; + l = 0; + while(l < 3) { + n = read(fd, p, 3); + if(n <= 0) + fatal("read rpc"); + if(n == 2 && l == 0 && buf[0] == 'O' && buf[1] == 'K') + continue; + p += n; + l += n; + } + if(convM2S(buf, &hdr, n) == 0){ + print("%ux %ux %ux\n", buf[0], buf[1], buf[2]); + fatal("rpc format"); + } + if(hdr.tag != NOTAG) + fatal("rpc tag not NOTAG"); + if(hdr.type == Rerror){ + print("error %s;", hdr.ename); + fatal("remote error"); + } + if(hdr.type != type+1) + fatal("not reply"); +} + +void +nop(int fd) +{ + print("nop"); + rpc(fd, Tnop); +} diff -Nru 0/sys/src/nix/boot/paq.c 4/sys/src/nix/boot/paq.c --- 0/sys/src/nix/boot/paq.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/boot/paq.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,67 @@ +#include +#include +#include <../boot/boot.h> + +char *fparts[] = +{ + "add bootldr 0x0000000 0x0040000", + "add params 0x0040000 0x0080000", + "add kernel 0x0080000 0x0140000", + "add user 0x0140000 0x0200000", + "add ramdisk 0x0200000 0x0600000", +}; + +void +configpaq(Method*) +{ + int fd; + int i; + + if(bind("#F", "/dev", MAFTER) < 0) + fatal("bind #c"); + if(bind("#p", "/proc", MREPL) < 0) + fatal("bind #p"); + fd = open("/dev/flash/flashctl", OWRITE); + if(fd < 0) + fatal("opening flashctl"); + for(i = 0; i < nelem(fparts); i++) + if(fprint(fd, fparts[i]) < 0) + fatal(fparts[i]); + close(fd); +} + +int +connectpaq(void) +{ + int p[2]; + char **arg, **argp; + + print("paq..."); + if(pipe(p)<0) + fatal("pipe"); + switch(fork()){ + case -1: + fatal("fork"); + case 0: + arg = malloc(10*sizeof(char*)); + argp = arg; + *argp++ = "paqfs"; + *argp++ = "-v"; + *argp++ = "-i"; + *argp++ = "/dev/flash/ramdisk"; + *argp = 0; + + dup(p[0], 0); + dup(p[1], 1); + close(p[0]); + close(p[1]); + exec("/boot/paqfs", arg); + fatal("can't exec paqfs"); + default: + break; + } + waitpid(); + + close(p[1]); + return p[0]; +} diff -Nru 0/sys/src/nix/boot/parts.c 4/sys/src/nix/boot/parts.c --- 0/sys/src/nix/boot/parts.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/boot/parts.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,612 @@ +/* + * read disk partition tables, intended for early use on systems + * that don't use 9load. borrowed from 9load. + */ + +#include +#include +#include +#include +#include +#include "../boot/boot.h" + +typedef struct Fs Fs; +#include "/sys/src/boot/pc/dosfs.h" + +#define GSHORT(p) (((p)[1]<<8)|(p)[0]) +#define GLONG(p) ((GSHORT((p)+2)<<16)|GSHORT(p)) + +#define trace 0 + +int debugboot = 1; + +enum { + parttrace = 0, + + Npart = 64, + SDnpart = Npart, + + Maxsec = 2048, + Cdsec = 2048, + Normsec = 512, /* disks */ + + NAMELEN = 256, /* hack */ +}; + +typedef struct SDpart SDpart; +typedef struct SDunit SDunit; + +typedef struct SDpart { + uvlong start; + uvlong end; + char name[NAMELEN]; + int valid; +} SDpart; + +typedef struct SDunit { + int ctl; /* fds */ + int data; + + char name[NAMELEN]; + + uvlong sectors; + ulong secsize; + SDpart* part; + int npart; /* of valid partitions */ +} SDunit; + +static uchar *mbrbuf, *partbuf; + +static void +sdaddpart(SDunit* unit, char* name, uvlong start, uvlong end) +{ + SDpart *pp; + int i, partno; + + if(parttrace) + print("add %d %s %s %lld %lld\n", unit->npart, unit->name, name, start, end); + /* + * Check name not already used + * and look for a free slot. + */ + if(unit->part != nil){ + partno = -1; + for(i = 0; i < SDnpart; i++){ + pp = &unit->part[i]; + if(!pp->valid){ + if(partno == -1) + partno = i; + break; + } + if(strcmp(name, pp->name) == 0){ + if(pp->start == start && pp->end == end){ + if(parttrace) + print("already present\n"); + return; + } + } + } + }else{ + if((unit->part = malloc(sizeof(SDpart)*SDnpart)) == nil){ + if(parttrace) + print("malloc failed\n"); + return; + } + partno = 0; + } + + /* + * Check there is a free slot and size and extent are valid. + */ + if(partno == -1 || start > end || end > unit->sectors){ + print("cannot add %s!%s [%llud,%llud) to disk [0,%llud): %s\n", + unit->name, name, start, end, unit->sectors, + partno==-1 ? "no free partitions" : "partition boundaries out of range"); + return; + } + pp = &unit->part[partno]; + pp->start = start; + pp->end = end; + strncpy(pp->name, name, NAMELEN); + pp->valid = 1; + unit->npart++; + + /* update devsd's in-memory partition table */ + if (fprint(unit->ctl, "part %s %lld %lld\n", name, start, end) < 0) + fprint(2, "can't update %s's devsd partition table for %s: %r\n", + unit->name, name); + if (debugboot) + print("part %s %lld %lld\n", name, start, end); +} + +static long +sdread(SDunit *unit, SDpart *pp, void* va, long len, vlong off) +{ + long l, secsize; + uvlong bno, nb; + + /* + * Check the request is within partition bounds. + */ + secsize = unit->secsize; + if (secsize == 0) + sysfatal("sdread: zero sector size"); + bno = off/secsize + pp->start; + nb = (off+len+secsize-1)/secsize + pp->start - bno; + if(bno+nb > pp->end) + nb = pp->end - bno; + if(bno >= pp->end || nb == 0) + return 0; + + seek(unit->data, bno * secsize, 0); + assert(va); /* "sdread" */ + l = read(unit->data, va, len); + if (l < 0) + return 0; + return l; +} + +static int +sdreadblk(SDunit *unit, SDpart *part, void *a, vlong off, int mbr) +{ + uchar *b; + + assert(a); /* sdreadblk */ + if(sdread(unit, part, a, unit->secsize, off) != unit->secsize){ + if(trace) + print("%s: read %lud at %lld failed\n", unit->name, + unit->secsize, (vlong)part->start*unit->secsize+off); + return -1; + } + b = a; + if(mbr && (b[0x1FE] != 0x55 || b[0x1FF] != 0xAA)){ + if(trace) + print("%s: bad magic %.2ux %.2ux at %lld\n", + unit->name, b[0x1FE], b[0x1FF], + (vlong)part->start*unit->secsize+off); + return -1; + } + return 0; +} + +/* + * read partition table. The partition table is just ascii strings. + */ +#define MAGIC "plan9 partitions" +static void +oldp9part(SDunit *unit) +{ + SDpart *pp; + char *field[3], *line[Npart+1]; + ulong n; + uvlong start, end; + int i; + + /* + * We have some partitions already. + */ + pp = &unit->part[unit->npart]; + + /* + * We prefer partition tables on the second to last sector, + * but some old disks use the last sector instead. + */ + strcpy(pp->name, "partition"); + pp->start = unit->sectors - 2; + pp->end = unit->sectors - 1; + + if(debugboot) + print("oldp9part %s\n", unit->name); + if(sdreadblk(unit, pp, partbuf, 0, 0) < 0) + return; + + if(strncmp((char*)partbuf, MAGIC, sizeof(MAGIC)-1) != 0) { + /* not found on 2nd last sector; look on last sector */ + pp->start++; + pp->end++; + if(sdreadblk(unit, pp, partbuf, 0, 0) < 0) + return; + if(strncmp((char*)partbuf, MAGIC, sizeof(MAGIC)-1) != 0) + return; + print("%s: using old plan9 partition table on last sector\n", unit->name); + }else + print("%s: using old plan9 partition table on 2nd-to-last sector\n", unit->name); + + /* we found a partition table, so add a partition partition */ + unit->npart++; + partbuf[unit->secsize-1] = '\0'; + + /* + * parse partition table + */ + n = gettokens((char*)partbuf, line, Npart+1, "\n"); + if(n && strncmp(line[0], MAGIC, sizeof(MAGIC)-1) == 0){ + for(i = 1; i < n && unit->npart < SDnpart; i++){ + if(gettokens(line[i], field, 3, " ") != 3) + break; + start = strtoull(field[1], 0, 0); + end = strtoull(field[2], 0, 0); + if(start >= end || end > unit->sectors) + break; + sdaddpart(unit, field[0], start, end); + } + } +} + +static SDpart* +sdfindpart(SDunit *unit, char *name) +{ + int i; + + if(parttrace) + print("findpart %d %s %s: ", unit->npart, unit->name, name); + for(i=0; inpart; i++) { + if(parttrace) + print("%s...", unit->part[i].name); + if(strcmp(unit->part[i].name, name) == 0){ + if(parttrace) + print("\n"); + return &unit->part[i]; + } + } + if(parttrace) + print("not found\n"); + return nil; +} + +/* + * look for a plan 9 partition table on drive `unit' in the second + * sector (sector 1) of partition `name'. + * if found, add the partitions defined in the table. + */ +static void +p9part(SDunit *unit, char *name) +{ + SDpart *p; + char *field[4], *line[Npart+1]; + uvlong start, end; + int i, n; + + if(debugboot) + print("p9part %s %s\n", unit->name, name); + p = sdfindpart(unit, name); + if(p == nil) + return; + + if(sdreadblk(unit, p, partbuf, unit->secsize, 0) < 0) + return; + partbuf[unit->secsize-1] = '\0'; + + if(strncmp((char*)partbuf, "part ", 5) != 0) + return; + + n = gettokens((char*)partbuf, line, Npart+1, "\n"); + if(n == 0) + return; + for(i = 0; i < n && unit->npart < SDnpart; i++){ + if(strncmp(line[i], "part ", 5) != 0) + break; + if(gettokens(line[i], field, 4, " ") != 4) + break; + start = strtoull(field[2], 0, 0); + end = strtoull(field[3], 0, 0); + if(start >= end || end > unit->sectors) + break; + sdaddpart(unit, field[1], p->start+start, p->start+end); + } +} + +static int +isdos(int t) +{ + return t==FAT12 || t==FAT16 || t==FATHUGE || t==FAT32 || t==FAT32X; +} + +static int +isextend(int t) +{ + return t==EXTEND || t==EXTHUGE || t==LEXTEND; +} + +/* + * Fetch the first dos and all plan9 partitions out of the MBR partition table. + * We return -1 if we did not find a plan9 partition. + */ +static int +mbrpart(SDunit *unit) +{ + Dospart *dp; + uvlong taboffset, start, end; + uvlong firstxpart, nxtxpart; + int havedos, i, nplan9; + char name[10]; + + taboffset = 0; + dp = (Dospart*)&mbrbuf[0x1BE]; + { + /* get the MBR (allowing for DMDDO) */ + if(sdreadblk(unit, &unit->part[0], mbrbuf, + (vlong)taboffset * unit->secsize, 1) < 0) + return -1; + for(i=0; i<4; i++) + if(dp[i].type == DMDDO) { + if(trace) + print("DMDDO partition found\n"); + taboffset = 63; + if(sdreadblk(unit, &unit->part[0], mbrbuf, + (vlong)taboffset * unit->secsize, 1) < 0) + return -1; + i = -1; /* start over */ + } + } + + /* + * Read the partitions, first from the MBR and then + * from successive extended partition tables. + */ + nplan9 = 0; + havedos = 0; + firstxpart = 0; + for(;;) { + if(sdreadblk(unit, &unit->part[0], mbrbuf, + (vlong)taboffset * unit->secsize, 1) < 0) + return -1; + if(trace) { + if(firstxpart) + print("%s ext %llud ", unit->name, taboffset); + else + print("%s mbr ", unit->name); + } + nxtxpart = 0; + for(i=0; i<4; i++) { + if(trace) + print("dp %d...", dp[i].type); + start = taboffset+GLONG(dp[i].start); + end = start+GLONG(dp[i].len); + + if(dp[i].type == PLAN9) { + if(nplan9 == 0) + strcpy(name, "plan9"); + else + sprint(name, "plan9.%d", nplan9); + sdaddpart(unit, name, start, end); + p9part(unit, name); + nplan9++; + } + + /* + * We used to take the active partition (and then the first + * when none are active). We have to take the first here, + * so that the partition we call ``dos'' agrees with the + * partition disk/fdisk calls ``dos''. + */ + if(havedos==0 && isdos(dp[i].type)){ + havedos = 1; + sdaddpart(unit, "dos", start, end); + } + + /* nxtxpart is relative to firstxpart (or 0), not taboffset */ + if(isextend(dp[i].type)){ + nxtxpart = start-taboffset+firstxpart; + if(trace) + print("link %llud...", nxtxpart); + } + } + if(trace) + print("\n"); + + if(!nxtxpart) + break; + if(!firstxpart) + firstxpart = nxtxpart; + taboffset = nxtxpart; + } + return nplan9 ? 0 : -1; +} + +/* + * To facilitate booting from CDs, we create a partition for + * the boot floppy image embedded in a bootable CD. + */ +static int +part9660(SDunit *unit) +{ + uchar buf[Maxsec]; + ulong a, n; + uchar *p; + + if(unit->secsize != Cdsec) + return -1; + + if(sdread(unit, &unit->part[0], buf, Cdsec, 17*Cdsec) < 0) + return -1; + + if(buf[0] || strcmp((char*)buf+1, "CD001\x01EL TORITO SPECIFICATION") != 0) + return -1; + + + p = buf+0x47; + a = p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24); + + if(sdread(unit, &unit->part[0], buf, Cdsec, a*Cdsec) < 0) + return -1; + + if(memcmp(buf, "\x01\x00\x00\x00", 4) != 0 + || memcmp(buf+30, "\x55\xAA", 2) != 0 + || buf[0x20] != 0x88) + return -1; + + p = buf+0x28; + a = p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24); + + switch(buf[0x21]){ + case 0x01: + n = 1200*1024; + break; + case 0x02: + n = 1440*1024; + break; + case 0x03: + n = 2880*1024; + break; + default: + return -1; + } + n /= Cdsec; + + print("found partition %s!cdboot; %lud+%lud\n", unit->name, a, n); + sdaddpart(unit, "cdboot", a, a+n); + return 0; +} + +enum { + NEW = 1<<0, + OLD = 1<<1 +}; + +/* + * read unit->data to look for partition tables. + * if found, stash partitions in environment and write them to ctl too. + */ +static void +partition(SDunit *unit) +{ + int type; + char *p; + + if(unit->part == 0) + return; + + if(part9660(unit) == 0) + return; + + p = getenv("partition"); + if(p != nil && strncmp(p, "new", 3) == 0) + type = NEW; + else if(p != nil && strncmp(p, "old", 3) == 0) + type = OLD; + else + type = NEW|OLD; + + if(mbrbuf == nil) { + mbrbuf = malloc(Maxsec); + partbuf = malloc(Maxsec); + if(mbrbuf==nil || partbuf==nil) { + free(mbrbuf); + free(partbuf); + partbuf = mbrbuf = nil; + return; + } + } + + /* + * there might be no mbr (e.g. on a very large device), so look for + * a bare plan 9 partition table if mbrpart fails. + */ + if((type & NEW) && mbrpart(unit) >= 0){ + /* nothing to do */ + } + else if (type & NEW) + p9part(unit, "data"); + else if(type & OLD) + oldp9part(unit); +} + +static void +rdgeom(SDunit *unit) +{ + char *line; + char *flds[5]; + Biobuf bb; + Biobuf *bp; + static char geom[] = "geometry "; + + bp = &bb; + seek(unit->ctl, 0, 0); + Binit(bp, unit->ctl, OREAD); + while((line = Brdline(bp, '\n')) != nil){ + line[Blinelen(bp) - 1] = '\0'; + if (strncmp(line, geom, sizeof geom - 1) == 0) + break; + } + if (line != nil && tokenize(line, flds, nelem(flds)) >= 3) { + unit->sectors = atoll(flds[1]); + unit->secsize = atoll(flds[2]); + } + Bterm(bp); + seek(unit->ctl, 0, 0); +} + +static void +setpartitions(char *name, int ctl, int data) +{ + SDunit sdunit; + SDunit *unit; + SDpart *part0; + + unit = &sdunit; + memset(unit, 0, sizeof *unit); + unit->ctl = ctl; + unit->data = data; + + unit->secsize = Normsec; /* default: won't work for CDs */ + unit->sectors = ~0ull; + rdgeom(unit); + strncpy(unit->name, name, sizeof unit->name); + unit->part = mallocz(sizeof(SDpart) * SDnpart, 1); + + part0 = &unit->part[0]; + part0->end = unit->sectors - 1; + strcpy(part0->name, "data"); + part0->valid = 1; + unit->npart++; + + mbrbuf = malloc(Maxsec); + partbuf = malloc(Maxsec); + partition(unit); + free(unit->part); +} + +/* + * read disk partition tables so that readnvram via factotum + * can see them. + */ +int +readparts(void) +{ + int i, n, ctl, data, fd; + char *name, *ctlname, *dataname; + Dir *dir; + + fd = open("/dev", OREAD); + if(fd < 0) + return -1; + n = dirreadall(fd, &dir); + close(fd); + + for(i = 0; i < n; i++) { + name = dir[i].name; + if (strncmp(name, "sd", 2) != 0) + continue; + + ctlname = smprint("/dev/%s/ctl", name); + dataname = smprint("/dev/%s/data", name); + if (ctlname == nil || dataname == nil) { + free(ctlname); + free(dataname); + continue; + } + + ctl = open(ctlname, ORDWR); + data = open(dataname, OREAD); + free(ctlname); + free(dataname); + + if (ctl >= 0 && data >= 0) + setpartitions(dataname, ctl, data); + close(ctl); + close(data); + } + free(dir); + return 0; +} diff -Nru 0/sys/src/nix/boot/printstub.c 4/sys/src/nix/boot/printstub.c --- 0/sys/src/nix/boot/printstub.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/boot/printstub.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,22 @@ +#include +#include + +static Lock fmtl; + +void +_fmtlock(void) +{ + lock(&fmtl); +} + +void +_fmtunlock(void) +{ + unlock(&fmtl); +} + +int +_efgfmt(Fmt*) +{ + return -1; +} diff -Nru 0/sys/src/nix/boot/settime.c 4/sys/src/nix/boot/settime.c --- 0/sys/src/nix/boot/settime.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/boot/settime.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,149 @@ +#include +#include +#include +#include +#include "../boot/boot.h" + +static long lusertime(char*); + +char *timeserver = "#s/boot"; + +void +settime(int islocal, int afd, char *rp) +{ + int n, f; + int timeset; + Dir dir[2]; + char timebuf[64]; + + print("time..."); + timeset = 0; + if(islocal){ + /* + * set the time from the real time clock + */ + f = open("#r/rtc", ORDWR); + if(f >= 0){ + if((n = read(f, timebuf, sizeof(timebuf)-1)) > 0){ + timebuf[n] = '\0'; + timeset = 1; + } + close(f); + }else do{ + strcpy(timebuf, "yymmddhhmm[ss]"); + outin("\ndate/time ", timebuf, sizeof(timebuf)); + }while((timeset=lusertime(timebuf)) <= 0); + } + if(timeset == 0){ + /* + * set the time from the access time of the root + */ + f = open(timeserver, ORDWR); + if(f < 0) + return; + if(mount(f, afd, "/tmp", MREPL, rp) < 0){ + warning("settime mount"); + close(f); + return; + } + close(f); + if(stat("/tmp", statbuf, sizeof statbuf) < 0) + fatal("stat"); + convM2D(statbuf, sizeof statbuf, &dir[0], (char*)&dir[1]); + sprint(timebuf, "%ld", dir[0].atime); + unmount(0, "/tmp"); + } + + f = open("#c/time", OWRITE); + if(write(f, timebuf, strlen(timebuf)) < 0) + warning("can't set #c/time"); + close(f); + print("\n"); +} + +#define SEC2MIN 60L +#define SEC2HOUR (60L*SEC2MIN) +#define SEC2DAY (24L*SEC2HOUR) + +int +g2(char **pp) +{ + int v; + + v = 10*((*pp)[0]-'0') + (*pp)[1]-'0'; + *pp += 2; + return v; +} + +/* + * days per month plus days/year + */ +static int dmsize[] = +{ + 365, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 +}; +static int ldmsize[] = +{ + 366, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 +}; + +/* + * return the days/month for the given year + */ +static int * +yrsize(int y) +{ + + if((y%4) == 0 && ((y%100) != 0 || (y%400) == 0)) + return ldmsize; + else + return dmsize; +} + +/* + * compute seconds since Jan 1 1970 + */ +static long +lusertime(char *argbuf) +{ + char *buf; + ulong secs; + int i, y, m; + int *d2m; + + buf = argbuf; + i = strlen(buf); + if(i != 10 && i != 12) + return -1; + secs = 0; + y = g2(&buf); + m = g2(&buf); + if(y < 70) + y += 2000; + else + y += 1900; + + /* + * seconds per year + */ + for(i = 1970; i < y; i++){ + d2m = yrsize(i); + secs += d2m[0] * SEC2DAY; + } + + /* + * seconds per month + */ + d2m = yrsize(y); + for(i = 1; i < m; i++) + secs += d2m[i] * SEC2DAY; + + secs += (g2(&buf)-1) * SEC2DAY; + secs += g2(&buf) * SEC2HOUR; + secs += g2(&buf) * SEC2MIN; + if(*buf) + secs += g2(&buf); + + sprint(argbuf, "%ld", secs); + return secs; +} diff -Nru 0/sys/src/nix/ip/arp.c 4/sys/src/nix/ip/arp.c --- 0/sys/src/nix/ip/arp.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/arp.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,680 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" +#include "ipv6.h" + +/* + * address resolution tables + */ + +enum +{ + NHASH = (1<<6), + NCACHE = 256, + + AOK = 1, + AWAIT = 2, +}; + +char *arpstate[] = +{ + "UNUSED", + "OK", + "WAIT", +}; + +/* + * one per Fs + */ +struct Arp +{ + QLock; + Fs *f; + Arpent *hash[NHASH]; + Arpent cache[NCACHE]; + Arpent *rxmt; + Proc *rxmitp; /* neib sol re-transmit proc */ + Rendez rxmtq; + Block *dropf, *dropl; +}; + +char *Ebadarp = "bad arp"; + +#define haship(s) ((s)[IPaddrlen-1]%NHASH) + +extern int ReTransTimer = RETRANS_TIMER; +static void rxmitproc(void *v); + +void +arpinit(Fs *f) +{ + f->arp = smalloc(sizeof(Arp)); + f->arp->f = f; + f->arp->rxmt = nil; + f->arp->dropf = f->arp->dropl = nil; + kproc("rxmitproc", rxmitproc, f->arp); +} + +/* + * create a new arp entry for an ip address. + */ +static Arpent* +newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt) +{ + uint t; + Block *next, *xp; + Arpent *a, *e, *f, **l; + int empty; + + /* find oldest entry */ + e = &arp->cache[NCACHE]; + a = arp->cache; + t = a->utime; + for(f = a; f < e; f++){ + if(f->utime < t){ + t = f->utime; + a = f; + } + } + + /* dump waiting packets */ + xp = a->hold; + a->hold = nil; + + if(isv4(a->ip)){ + while(xp){ + next = xp->list; + freeblist(xp); + xp = next; + } + } + else { /* queue icmp unreachable for rxmitproc later on, w/o arp lock */ + if(xp){ + if(arp->dropl == nil) + arp->dropf = xp; + else + arp->dropl->list = xp; + + for(next = xp->list; next; next = next->list) + xp = next; + arp->dropl = xp; + wakeup(&arp->rxmtq); + } + } + + /* take out of current chain */ + l = &arp->hash[haship(a->ip)]; + for(f = *l; f; f = f->hash){ + if(f == a){ + *l = a->hash; + break; + } + l = &f->hash; + } + + /* insert into new chain */ + l = &arp->hash[haship(ip)]; + a->hash = *l; + *l = a; + + memmove(a->ip, ip, sizeof(a->ip)); + a->utime = NOW; + a->ctime = 0; + a->type = ifc->medium; + + a->rtime = NOW + ReTransTimer; + a->rxtsrem = MAX_MULTICAST_SOLICIT; + a->ifc = ifc; + a->ifcid = ifc->ifcid; + + /* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */ + if(!ipismulticast(a->ip) && addrxt){ + l = &arp->rxmt; + empty = (*l==nil); + + for(f = *l; f; f = f->nextrxt){ + if(f == a){ + *l = a->nextrxt; + break; + } + l = &f->nextrxt; + } + for(f = *l; f; f = f->nextrxt){ + l = &f->nextrxt; + } + *l = a; + if(empty) + wakeup(&arp->rxmtq); + } + + a->nextrxt = nil; + + return a; +} + +/* called with arp qlocked */ + +void +cleanarpent(Arp *arp, Arpent *a) +{ + Arpent *f, **l; + + a->utime = 0; + a->ctime = 0; + a->type = 0; + a->state = 0; + + /* take out of current chain */ + l = &arp->hash[haship(a->ip)]; + for(f = *l; f; f = f->hash){ + if(f == a){ + *l = a->hash; + break; + } + l = &f->hash; + } + + /* take out of re-transmit chain */ + l = &arp->rxmt; + for(f = *l; f; f = f->nextrxt){ + if(f == a){ + *l = a->nextrxt; + break; + } + l = &f->nextrxt; + } + a->nextrxt = nil; + a->hash = nil; + a->hold = nil; + a->last = nil; + a->ifc = nil; +} + +/* + * fill in the media address if we have it. Otherwise return an + * Arpent that represents the state of the address resolution FSM + * for ip. Add the packet to be sent onto the list of packets + * waiting for ip->mac to be resolved. + */ +Arpent* +arpget(Arp *arp, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *mac) +{ + int hash; + Arpent *a; + Medium *type; + uchar v6ip[IPaddrlen]; + + if(version == V4){ + v4tov6(v6ip, ip); + ip = v6ip; + } + + qlock(arp); + hash = haship(ip); + type = ifc->medium; + for(a = arp->hash[hash]; a; a = a->hash){ + if(memcmp(ip, a->ip, sizeof(a->ip)) == 0) + if(type == a->type) + break; + } + + if(a == nil){ + a = newarp6(arp, ip, ifc, (version != V4)); + a->state = AWAIT; + } + a->utime = NOW; + if(a->state == AWAIT){ + if(bp != nil){ + if(a->hold) + a->last->list = bp; + else + a->hold = bp; + a->last = bp; + bp->list = nil; + } + return a; /* return with arp qlocked */ + } + + memmove(mac, a->mac, a->type->maclen); + + /* remove old entries */ + if(NOW - a->ctime > 15*60*1000) + cleanarpent(arp, a); + + qunlock(arp); + return nil; +} + +/* + * called with arp locked + */ +void +arprelease(Arp *arp, Arpent*) +{ + qunlock(arp); +} + +/* + * Copy out the mac address from the Arpent. Return the + * block waiting to get sent to this mac address. + * + * called with arp locked + */ +Block* +arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac) +{ + Block *bp; + Arpent *f, **l; + + if(!isv4(a->ip)){ + l = &arp->rxmt; + for(f = *l; f; f = f->nextrxt){ + if(f == a){ + *l = a->nextrxt; + break; + } + l = &f->nextrxt; + } + } + + memmove(a->mac, mac, type->maclen); + a->type = type; + a->state = AOK; + a->utime = NOW; + bp = a->hold; + a->hold = nil; + qunlock(arp); + + return bp; +} + +void +arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh) +{ + Arp *arp; + Route *r; + Arpent *a, *f, **l; + Ipifc *ifc; + Medium *type; + Block *bp, *next; + uchar v6ip[IPaddrlen]; + + arp = fs->arp; + + if(n != 6){ +// print("arp: len = %d\n", n); + return; + } + + switch(version){ + case V4: + r = v4lookup(fs, ip, nil); + v4tov6(v6ip, ip); + ip = v6ip; + break; + case V6: + r = v6lookup(fs, ip, nil); + break; + default: + panic("arpenter: version %d", version); + return; /* to supress warnings */ + } + + if(r == nil){ +// print("arp: no route for entry\n"); + return; + } + + ifc = r->ifc; + type = ifc->medium; + + qlock(arp); + for(a = arp->hash[haship(ip)]; a; a = a->hash){ + if(a->type != type || (a->state != AWAIT && a->state != AOK)) + continue; + + if(ipcmp(a->ip, ip) == 0){ + a->state = AOK; + memmove(a->mac, mac, type->maclen); + + if(version == V6){ + /* take out of re-transmit chain */ + l = &arp->rxmt; + for(f = *l; f; f = f->nextrxt){ + if(f == a){ + *l = a->nextrxt; + break; + } + l = &f->nextrxt; + } + } + + a->ifc = ifc; + a->ifcid = ifc->ifcid; + bp = a->hold; + a->hold = nil; + if(version == V4) + ip += IPv4off; + a->utime = NOW; + a->ctime = a->utime; + qunlock(arp); + + while(bp){ + next = bp->list; + if(ifc != nil){ + if(waserror()){ + runlock(ifc); + nexterror(); + } + rlock(ifc); + if(ifc->medium != nil) + ifc->medium->bwrite(ifc, bp, version, ip); + else + freeb(bp); + runlock(ifc); + poperror(); + } else + freeb(bp); + bp = next; + } + return; + } + } + + if(refresh == 0){ + a = newarp6(arp, ip, ifc, 0); + a->state = AOK; + a->type = type; + a->ctime = NOW; + memmove(a->mac, mac, type->maclen); + } + + qunlock(arp); +} + +int +arpwrite(Fs *fs, char *s, int len) +{ + int n; + Route *r; + Arp *arp; + Block *bp; + Arpent *a, *fl, **l; + Medium *type; + char *f[4], buf[256]; + uchar ip[IPaddrlen], mac[MAClen]; + + arp = fs->arp; + + if(len == 0) + error(Ebadarp); + if(len >= sizeof(buf)) + len = sizeof(buf)-1; + strncpy(buf, s, len); + buf[len] = 0; + if(len > 0 && buf[len-1] == '\n') + buf[len-1] = 0; + + n = getfields(buf, f, 4, 1, " "); + if(strcmp(f[0], "flush") == 0){ + qlock(arp); + for(a = arp->cache; a < &arp->cache[NCACHE]; a++){ + memset(a->ip, 0, sizeof(a->ip)); + memset(a->mac, 0, sizeof(a->mac)); + a->hash = nil; + a->state = 0; + a->utime = 0; + while(a->hold != nil){ + bp = a->hold->list; + freeblist(a->hold); + a->hold = bp; + } + } + memset(arp->hash, 0, sizeof(arp->hash)); + /* clear all pkts on these lists (rxmt, dropf/l) */ + arp->rxmt = nil; + arp->dropf = nil; + arp->dropl = nil; + qunlock(arp); + } else if(strcmp(f[0], "add") == 0){ + switch(n){ + default: + error(Ebadarg); + case 3: + parseip(ip, f[1]); + if(isv4(ip)) + r = v4lookup(fs, ip+IPv4off, nil); + else + r = v6lookup(fs, ip, nil); + if(r == nil) + error("Destination unreachable"); + type = r->ifc->medium; + n = parsemac(mac, f[2], type->maclen); + break; + case 4: + type = ipfindmedium(f[1]); + if(type == nil) + error(Ebadarp); + parseip(ip, f[2]); + n = parsemac(mac, f[3], type->maclen); + break; + } + + if(type->ares == nil) + error(Ebadarp); + + type->ares(fs, V6, ip, mac, n, 0); + } else if(strcmp(f[0], "del") == 0){ + if(n != 2) + error(Ebadarg); + + parseip(ip, f[1]); + qlock(arp); + + l = &arp->hash[haship(ip)]; + for(a = *l; a; a = a->hash){ + if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){ + *l = a->hash; + break; + } + l = &a->hash; + } + + if(a){ + /* take out of re-transmit chain */ + l = &arp->rxmt; + for(fl = *l; fl; fl = fl->nextrxt){ + if(fl == a){ + *l = a->nextrxt; + break; + } + l = &fl->nextrxt; + } + + a->nextrxt = nil; + a->hash = nil; + a->hold = nil; + a->last = nil; + a->ifc = nil; + memset(a->ip, 0, sizeof(a->ip)); + memset(a->mac, 0, sizeof(a->mac)); + } + qunlock(arp); + } else + error(Ebadarp); + + return len; +} + +enum +{ + Alinelen= 90, +}; + +char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n"; + +static void +convmac(char *p, uchar *mac, int n) +{ + while(n-- > 0) + p += sprint(p, "%2.2ux", *mac++); +} + +int +arpread(Arp *arp, char *p, ulong offset, int len) +{ + Arpent *a; + int n; + char mac[2*MAClen+1]; + + if(offset % Alinelen) + return 0; + + offset = offset/Alinelen; + len = len/Alinelen; + + n = 0; + for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){ + if(a->state == 0) + continue; + if(offset > 0){ + offset--; + continue; + } + len--; + qlock(arp); + convmac(mac, a->mac, a->type->maclen); + n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac); + qunlock(arp); + } + + return n; +} + +extern int +rxmitsols(Arp *arp) +{ + uint sflag; + Block *next, *xp; + Arpent *a, *b, **l; + Fs *f; + uchar ipsrc[IPaddrlen]; + Ipifc *ifc = nil; + long nrxt; + + qlock(arp); + f = arp->f; + + a = arp->rxmt; + if(a==nil){ + nrxt = 0; + goto dodrops; /* return nrxt; */ + } + nrxt = a->rtime - NOW; + if(nrxt > 3*ReTransTimer/4) + goto dodrops; /* return nrxt; */ + + for(; a; a = a->nextrxt){ + ifc = a->ifc; + assert(ifc != nil); + if((a->rxtsrem <= 0) || !(canrlock(ifc)) || (a->ifcid != ifc->ifcid)){ + xp = a->hold; + a->hold = nil; + + if(xp){ + if(arp->dropl == nil) + arp->dropf = xp; + else + arp->dropl->list = xp; + } + + cleanarpent(arp, a); + } + else + break; + } + if(a == nil) + goto dodrops; + + qunlock(arp); /* for icmpns */ + if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC) + icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac); + + runlock(ifc); + qlock(arp); + + /* put to the end of re-transmit chain */ + l = &arp->rxmt; + for(b = *l; b; b = b->nextrxt){ + if(b == a){ + *l = a->nextrxt; + break; + } + l = &b->nextrxt; + } + for(b = *l; b; b = b->nextrxt){ + l = &b->nextrxt; + } + *l = a; + a->rxtsrem--; + a->nextrxt = nil; + a->rtime = NOW + ReTransTimer; + + a = arp->rxmt; + if(a==nil) + nrxt = 0; + else + nrxt = a->rtime - NOW; + +dodrops: + xp = arp->dropf; + arp->dropf = nil; + arp->dropl = nil; + qunlock(arp); + + for(; xp; xp = next){ + next = xp->list; + icmphostunr(f, ifc, xp, icmp6_adr_unreach, 1); + } + + return nrxt; + +} + +static int +rxready(void *v) +{ + Arp *arp = (Arp *) v; + int x; + + x = ((arp->rxmt != nil) || (arp->dropf != nil)); + + return x; +} + +static void +rxmitproc(void *v) +{ + Arp *arp = v; + long wakeupat; + + arp->rxmitp = up; + //print("arp rxmitproc started\n"); + if(waserror()){ + arp->rxmitp = 0; + pexit("hangup", 1); + } + for(;;){ + wakeupat = rxmitsols(arp); + if(wakeupat == 0) + sleep(&arp->rxmtq, rxready, v); + else if(wakeupat > ReTransTimer/4) + tsleep(&arp->rxmtq, return0, 0, wakeupat); + } +} + diff -Nru 0/sys/src/nix/ip/chandial.c 4/sys/src/nix/ip/chandial.c --- 0/sys/src/nix/ip/chandial.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/chandial.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,124 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "../ip/ip.h" + +typedef struct DS DS; +static Chan* call(char*, char*, DS*); +static void _dial_string_parse(char*, DS*); + +enum +{ + Maxstring= 128, +}; + +struct DS +{ + char buf[Maxstring]; /* dist string */ + char *netdir; + char *proto; + char *rem; + char *local; /* other args */ + char *dir; + Chan **ctlp; +}; + +/* + * the dialstring is of the form '[/net/]proto!dest' + */ +Chan* +chandial(char *dest, char *local, char *dir, Chan **ctlp) +{ + DS ds; + char clone[Maxpath]; + + ds.local = local; + ds.dir = dir; + ds.ctlp = ctlp; + + _dial_string_parse(dest, &ds); + if(ds.netdir == 0) + ds.netdir = "/net"; + + /* no connection server, don't translate */ + snprint(clone, sizeof(clone), "%s/%s/clone", ds.netdir, ds.proto); + return call(clone, ds.rem, &ds); +} + +static Chan* +call(char *clone, char *dest, DS *ds) +{ + int n; + Chan *dchan, *cchan; + char name[Maxpath], data[Maxpath], *p; + + cchan = namec(clone, Aopen, ORDWR, 0); + + /* get directory name */ + if(waserror()){ + cclose(cchan); + nexterror(); + } + n = cchan->dev->read(cchan, name, sizeof(name)-1, 0); + name[n] = 0; + for(p = name; *p == ' '; p++) + ; + sprint(name, "%lud", strtoul(p, 0, 0)); + p = strrchr(clone, '/'); + *p = 0; + if(ds->dir) + snprint(ds->dir, Maxpath, "%s/%s", clone, name); + snprint(data, sizeof(data), "%s/%s/data", clone, name); + + /* connect */ + if(ds->local) + snprint(name, sizeof(name), "connect %s %s", dest, ds->local); + else + snprint(name, sizeof(name), "connect %s", dest); + cchan->dev->write(cchan, name, strlen(name), 0); + + /* open data connection */ + dchan = namec(data, Aopen, ORDWR, 0); + if(ds->ctlp) + *ds->ctlp = cchan; + else + cclose(cchan); + poperror(); + return dchan; + +} + +/* + * parse a dial string + */ +static void +_dial_string_parse(char *str, DS *ds) +{ + char *p, *p2; + + strncpy(ds->buf, str, Maxstring); + ds->buf[Maxstring-1] = 0; + + p = strchr(ds->buf, '!'); + if(p == 0) { + ds->netdir = 0; + ds->proto = "net"; + ds->rem = ds->buf; + } else { + if(*ds->buf != '/' && *ds->buf != '#'){ + ds->netdir = 0; + ds->proto = ds->buf; + } else { + for(p2 = p; *p2 != '/'; p2--) + ; + *p2++ = 0; + ds->netdir = ds->buf; + ds->proto = p2; + } + *p = 0; + ds->rem = p + 1; + } +} diff -Nru 0/sys/src/nix/ip/devip.c 4/sys/src/nix/ip/devip.c --- 0/sys/src/nix/ip/devip.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/devip.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1416 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "../ip/ip.h" + +enum +{ + Qtopdir= 1, /* top level directory */ + Qtopbase, + Qarp= Qtopbase, + Qbootp, + Qndb, + Qiproute, + Qipselftab, + Qlog, + + Qprotodir, /* directory for a protocol */ + Qprotobase, + Qclone= Qprotobase, + Qstats, + + Qconvdir, /* directory for a conversation */ + Qconvbase, + Qctl= Qconvbase, + Qdata, + Qerr, + Qlisten, + Qlocal, + Qremote, + Qstatus, + Qsnoop, + + Logtype= 5, + Masktype= (1<> Shiftconv) & Maskconv ) +#define PROTO(x) ( (((ulong)(x).path) >> Shiftproto) & Maskproto ) +#define QID(p, c, y) ( ((p)<<(Shiftproto)) | ((c)<devno]->p[PROTO(c->qid)]->conv[CONV(c->qid)]; + if(cv->owner == nil) + kstrdup(&cv->owner, eve); + mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE); + + switch(i) { + default: + return -1; + case Qctl: + devdir(c, q, "ctl", 0, cv->owner, cv->perm, dp); + return 1; + case Qdata: + devdir(c, q, "data", qlen(cv->rq), cv->owner, cv->perm, dp); + return 1; + case Qerr: + devdir(c, q, "err", qlen(cv->eq), cv->owner, cv->perm, dp); + return 1; + case Qlisten: + devdir(c, q, "listen", 0, cv->owner, cv->perm, dp); + return 1; + case Qlocal: + p = "local"; + break; + case Qremote: + p = "remote"; + break; + case Qsnoop: + if(strcmp(cv->p->name, "ipifc") != 0) + return -1; + devdir(c, q, "snoop", qlen(cv->sq), cv->owner, 0400, dp); + return 1; + case Qstatus: + p = "status"; + break; + } + devdir(c, q, p, 0, cv->owner, 0444, dp); + return 1; +} + +static int +ip2gen(Chan *c, int i, Dir *dp) +{ + Qid q; + + switch(i) { + case Qclone: + mkqid(&q, QID(PROTO(c->qid), 0, Qclone), 0, QTFILE); + devdir(c, q, "clone", 0, network, 0666, dp); + return 1; + case Qstats: + mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE); + devdir(c, q, "stats", 0, network, 0444, dp); + return 1; + } + return -1; +} + +static int +ip1gen(Chan *c, int i, Dir *dp) +{ + Qid q; + char *p; + int prot; + int len = 0; + Fs *f; + extern ulong kerndate; + + f = ipfs[c->devno]; + + prot = 0666; + mkqid(&q, QID(0, 0, i), 0, QTFILE); + switch(i) { + default: + return -1; + case Qarp: + p = "arp"; + prot = 0664; + break; + case Qbootp: + p = "bootp"; + break; + case Qndb: + p = "ndb"; + len = strlen(f->ndb); + q.vers = f->ndbvers; + break; + case Qiproute: + p = "iproute"; + prot = 0664; + break; + case Qipselftab: + p = "ipselftab"; + prot = 0444; + break; + case Qlog: + p = "log"; + break; + } + devdir(c, q, p, len, network, prot, dp); + if(i == Qndb && f->ndbmtime > kerndate) + dp->mtime = f->ndbmtime; + return 1; +} + +static int +ipgen(Chan *c, char*, Dirtab*, int, int s, Dir *dp) +{ + Qid q; + Conv *cv; + Fs *f; + + f = ipfs[c->devno]; + + switch(TYPE(c->qid)) { + case Qtopdir: + if(s == DEVDOTDOT){ + mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR); + sprint(up->genbuf, "#I%ud", c->devno); + devdir(c, q, up->genbuf, 0, network, 0555, dp); + return 1; + } + if(s < f->np) { + if(f->p[s]->connect == nil) + return 0; /* protocol with no user interface */ + mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR); + devdir(c, q, f->p[s]->name, 0, network, 0555, dp); + return 1; + } + s -= f->np; + return ip1gen(c, s+Qtopbase, dp); + case Qarp: + case Qbootp: + case Qndb: + case Qlog: + case Qiproute: + case Qipselftab: + return ip1gen(c, TYPE(c->qid), dp); + case Qprotodir: + if(s == DEVDOTDOT){ + mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR); + sprint(up->genbuf, "#I%ud", c->devno); + devdir(c, q, up->genbuf, 0, network, 0555, dp); + return 1; + } + if(s < f->p[PROTO(c->qid)]->ac) { + cv = f->p[PROTO(c->qid)]->conv[s]; + sprint(up->genbuf, "%d", s); + mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR); + devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp); + return 1; + } + s -= f->p[PROTO(c->qid)]->ac; + return ip2gen(c, s+Qprotobase, dp); + case Qclone: + case Qstats: + return ip2gen(c, TYPE(c->qid), dp); + case Qconvdir: + if(s == DEVDOTDOT){ + s = PROTO(c->qid); + mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR); + devdir(c, q, f->p[s]->name, 0, network, 0555, dp); + return 1; + } + return ip3gen(c, s+Qconvbase, dp); + case Qctl: + case Qdata: + case Qerr: + case Qlisten: + case Qlocal: + case Qremote: + case Qstatus: + case Qsnoop: + return ip3gen(c, TYPE(c->qid), dp); + } + return -1; +} + +static void +ipreset(void) +{ + nullmediumlink(); + pktmediumlink(); + + fmtinstall('i', eipfmt); + fmtinstall('I', eipfmt); + fmtinstall('E', eipfmt); + fmtinstall('V', eipfmt); + fmtinstall('M', eipfmt); +} + +static Fs* +ipgetfs(int dev) +{ + extern void (*ipprotoinit[])(Fs*); + Fs *f; + int i; + + if(dev >= Nfs) + return nil; + + qlock(&fslock); + if(ipfs[dev] == nil){ + f = smalloc(sizeof(Fs)); + ip_init(f); + arpinit(f); + netloginit(f); + for(i = 0; ipprotoinit[i]; i++) + ipprotoinit[i](f); + f->dev = dev; + ipfs[dev] = f; + } + qunlock(&fslock); + + return ipfs[dev]; +} + +IPaux* +newipaux(char *owner, char *tag) +{ + IPaux *a; + int n; + + a = smalloc(sizeof(*a)); + kstrdup(&a->owner, owner); + memset(a->tag, ' ', sizeof(a->tag)); + n = strlen(tag); + if(n > sizeof(a->tag)) + n = sizeof(a->tag); + memmove(a->tag, tag, n); + return a; +} + +#define ATTACHER(c) (((IPaux*)((c)->aux))->owner) + +static Chan* +ipattach(char* spec) +{ + Chan *c; + int devno; + + devno = atoi(spec); + if(devno >= Nfs) + error("bad specification"); + + ipgetfs(devno); + c = devattach('I', spec); + mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR); + c->devno = devno; + + c->aux = newipaux(commonuser(), "none"); + + return c; +} + +static Walkqid* +ipwalk(Chan* c, Chan *nc, char **name, int nname) +{ + IPaux *a = c->aux; + Walkqid* w; + + w = devwalk(c, nc, name, nname, nil, 0, ipgen); + if(w != nil && w->clone != nil) + w->clone->aux = newipaux(a->owner, a->tag); + return w; +} + +static long +ipstat(Chan* c, uchar* db, long n) +{ + return devstat(c, db, n, nil, 0, ipgen); +} + +static int +incoming(void* arg) +{ + Conv *conv; + + conv = arg; + return conv->incall != nil; +} + +static int m2p[] = { + [OREAD] 4, + [OWRITE] 2, + [ORDWR] 6 +}; + +static Chan* +ipopen(Chan* c, int omode) +{ + Conv *cv, *nc; + Proto *p; + int perm; + Fs *f; + + perm = m2p[omode&3]; + + f = ipfs[c->devno]; + + switch(TYPE(c->qid)) { + default: + break; + case Qndb: + if(omode & (OWRITE|OTRUNC) && !iseve()) + error(Eperm); + if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC)) + f->ndb[0] = 0; + break; + case Qlog: + netlogopen(f); + break; + case Qiproute: + case Qarp: + if(omode != OREAD && !iseve()) + error(Eperm); + break; + case Qtopdir: + case Qprotodir: + case Qconvdir: + case Qstatus: + case Qremote: + case Qlocal: + case Qstats: + case Qbootp: + case Qipselftab: + if(omode != OREAD) + error(Eperm); + break; + case Qsnoop: + if(omode != OREAD) + error(Eperm); + p = f->p[PROTO(c->qid)]; + cv = p->conv[CONV(c->qid)]; + if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve()) + error(Eperm); + incref(&cv->snoopers); + break; + case Qclone: + p = f->p[PROTO(c->qid)]; + qlock(p); + if(waserror()){ + qunlock(p); + nexterror(); + } + cv = Fsprotoclone(p, ATTACHER(c)); + qunlock(p); + poperror(); + if(cv == nil) { + error(Enodev); + break; + } + mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE); + break; + case Qdata: + case Qctl: + case Qerr: + p = f->p[PROTO(c->qid)]; + qlock(p); + cv = p->conv[CONV(c->qid)]; + qlock(cv); + if(waserror()) { + qunlock(cv); + qunlock(p); + nexterror(); + } + if((perm & (cv->perm>>6)) != perm) { + if(strcmp(ATTACHER(c), cv->owner) != 0) + error(Eperm); + if((perm & cv->perm) != perm) + error(Eperm); + + } + cv->inuse++; + if(cv->inuse == 1){ + kstrdup(&cv->owner, ATTACHER(c)); + cv->perm = 0660; + } + qunlock(cv); + qunlock(p); + poperror(); + break; + case Qlisten: + cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)]; + if((perm & (cv->perm>>6)) != perm) { + if(strcmp(ATTACHER(c), cv->owner) != 0) + error(Eperm); + if((perm & cv->perm) != perm) + error(Eperm); + + } + + if(cv->state != Announced) + error("not announced"); + + if(waserror()){ + closeconv(cv); + nexterror(); + } + qlock(cv); + cv->inuse++; + qunlock(cv); + + nc = nil; + while(nc == nil) { + /* give up if we got a hangup */ + if(qisclosed(cv->rq)) + error("listen hungup"); + + qlock(&cv->listenq); + if(waserror()) { + qunlock(&cv->listenq); + nexterror(); + } + + /* wait for a connect */ + sleep(&cv->listenr, incoming, cv); + + qlock(cv); + nc = cv->incall; + if(nc != nil){ + cv->incall = nc->next; + mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE); + kstrdup(&cv->owner, ATTACHER(c)); + } + qunlock(cv); + + qunlock(&cv->listenq); + poperror(); + } + closeconv(cv); + poperror(); + break; + } + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + return c; +} + +static void +ipcreate(Chan*, char*, int, int) +{ + error(Eperm); +} + +static void +ipremove(Chan*) +{ + error(Eperm); +} + +static long +ipwstat(Chan *c, uchar *dp, long n) +{ + Dir d; + Conv *cv; + Fs *f; + Proto *p; + + f = ipfs[c->devno]; + switch(TYPE(c->qid)) { + default: + error(Eperm); + break; + case Qctl: + case Qdata: + break; + } + + n = convM2D(dp, n, &d, nil); + if(n > 0){ + p = f->p[PROTO(c->qid)]; + cv = p->conv[CONV(c->qid)]; + if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0) + error(Eperm); + if(d.uid[0]) + kstrdup(&cv->owner, d.uid); + cv->perm = d.mode & 0777; + } + return n; +} + +void +closeconv(Conv *cv) +{ + Conv *nc; + Ipmulti *mp; + + qlock(cv); + + if(--cv->inuse > 0) { + qunlock(cv); + return; + } + + /* close all incoming calls since no listen will ever happen */ + for(nc = cv->incall; nc; nc = cv->incall){ + cv->incall = nc->next; + closeconv(nc); + } + cv->incall = nil; + + kstrdup(&cv->owner, network); + cv->perm = 0660; + + while((mp = cv->multi) != nil) + ipifcremmulti(cv, mp->ma, mp->ia); + + cv->r = nil; + cv->rgen = 0; + cv->p->close(cv); + cv->state = Idle; + qunlock(cv); +} + +static void +ipclose(Chan* c) +{ + Fs *f; + + f = ipfs[c->devno]; + switch(TYPE(c->qid)) { + default: + break; + case Qlog: + if(c->flag & COPEN) + netlogclose(f); + break; + case Qdata: + case Qctl: + case Qerr: + if(c->flag & COPEN) + closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]); + break; + case Qsnoop: + if(c->flag & COPEN) + decref(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers); + break; + } + free(((IPaux*)c->aux)->owner); + free(c->aux); +} + +enum +{ + Statelen= 32*1024, +}; + +static long +ipread(Chan *ch, void *a, long n, vlong off) +{ + Conv *c; + Proto *x; + char *buf, *p; + long offset, rv; + Fs *f; + + f = ipfs[ch->devno]; + + p = a; + offset = off; + switch(TYPE(ch->qid)) { + default: + error(Eperm); + case Qtopdir: + case Qprotodir: + case Qconvdir: + return devdirread(ch, a, n, 0, 0, ipgen); + case Qarp: + return arpread(f->arp, a, offset, n); + case Qbootp: + return bootpread(a, offset, n); + case Qndb: + return readstr(offset, a, n, f->ndb); + case Qiproute: + return routeread(f, a, offset, n); + case Qipselftab: + return ipselftabread(f, a, offset, n); + case Qlog: + return netlogread(f, a, offset, n); + case Qctl: + buf = smalloc(16); + sprint(buf, "%lud", CONV(ch->qid)); + rv = readstr(offset, p, n, buf); + free(buf); + return rv; + case Qremote: + buf = smalloc(Statelen); + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + if(x->remote == nil) { + sprint(buf, "%I!%d\n", c->raddr, c->rport); + } else { + (*x->remote)(c, buf, Statelen-2); + } + rv = readstr(offset, p, n, buf); + free(buf); + return rv; + case Qlocal: + buf = smalloc(Statelen); + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + if(x->local == nil) { + sprint(buf, "%I!%d\n", c->laddr, c->lport); + } else { + (*x->local)(c, buf, Statelen-2); + } + rv = readstr(offset, p, n, buf); + free(buf); + return rv; + case Qstatus: + buf = smalloc(Statelen); + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + (*x->state)(c, buf, Statelen-2); + rv = readstr(offset, p, n, buf); + free(buf); + return rv; + case Qdata: + c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)]; + return qread(c->rq, a, n); + case Qerr: + c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)]; + return qread(c->eq, a, n); + case Qsnoop: + c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)]; + return qread(c->sq, a, n); + case Qstats: + x = f->p[PROTO(ch->qid)]; + if(x->stats == nil) + error("stats not implemented"); + buf = smalloc(Statelen); + (*x->stats)(x, buf, Statelen); + rv = readstr(offset, p, n, buf); + free(buf); + return rv; + } +} + +static Block* +ipbread(Chan* ch, long n, vlong offset) +{ + Conv *c; + Proto *x; + Fs *f; + + switch(TYPE(ch->qid)){ + case Qdata: + f = ipfs[ch->devno]; + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + return qbread(c->rq, n); + default: + return devbread(ch, n, offset); + } +} + +/* + * set local address to be that of the ifc closest to remote address + */ +static void +setladdr(Conv* c) +{ + findlocalip(c->p->f, c->laddr, c->raddr); +} + +/* + * set a local port making sure the quad of raddr,rport,laddr,lport is unique + */ +char* +setluniqueport(Conv* c, int lport) +{ + Proto *p; + Conv *xp; + int x; + + p = c->p; + + qlock(p); + for(x = 0; x < p->nc; x++){ + xp = p->conv[x]; + if(xp == nil) + break; + if(xp == c) + continue; + if((xp->state == Connected || xp->state == Announced) + && xp->lport == lport + && xp->rport == c->rport + && ipcmp(xp->raddr, c->raddr) == 0 + && ipcmp(xp->laddr, c->laddr) == 0){ + qunlock(p); + return "address in use"; + } + } + c->lport = lport; + qunlock(p); + return nil; +} + + +/* + * pick a local port and set it + */ +void +setlport(Conv* c) +{ + Proto *p; + ushort *pp; + int x, found; + + p = c->p; + if(c->restricted) + pp = &p->nextrport; + else + pp = &p->nextport; + qlock(p); + for(;;(*pp)++){ + /* + * Fsproto initialises p->nextport to 0 and the restricted + * ports (p->nextrport) to 600. + * Restricted ports must lie between 600 and 1024. + * For the initial condition or if the unrestricted port number + * has wrapped round, select a random port between 5000 and 1<<15 + * to start at. + */ + if(c->restricted){ + if(*pp >= 1024) + *pp = 600; + } + else while(*pp < 5000) + *pp = nrand(1<<15); + + found = 0; + for(x = 0; x < p->nc; x++){ + if(p->conv[x] == nil) + break; + if(p->conv[x]->lport == *pp){ + found = 1; + break; + } + } + if(!found) + break; + } + c->lport = (*pp)++; + qunlock(p); +} + +/* + * set a local address and port from a string of the form + * [address!]port[!r] + */ +char* +setladdrport(Conv* c, char* str, int announcing) +{ + char *p; + char *rv; + ushort lport; + uchar addr[IPaddrlen]; + + rv = nil; + + /* + * ignore restricted part if it exists. it's + * meaningless on local ports. + */ + p = strchr(str, '!'); + if(p != nil){ + *p++ = 0; + if(strcmp(p, "r") == 0) + p = nil; + } + + c->lport = 0; + if(p == nil){ + if(announcing) + ipmove(c->laddr, IPnoaddr); + else + setladdr(c); + p = str; + } else { + if(strcmp(str, "*") == 0) + ipmove(c->laddr, IPnoaddr); + else { + parseip(addr, str); + if(ipforme(c->p->f, addr)) + ipmove(c->laddr, addr); + else + return "not a local IP address"; + } + } + + /* one process can get all connections */ + if(announcing && strcmp(p, "*") == 0){ + if(!iseve()) + error(Eperm); + return setluniqueport(c, 0); + } + + lport = atoi(p); + if(lport <= 0) + setlport(c); + else + rv = setluniqueport(c, lport); + return rv; +} + +static char* +setraddrport(Conv* c, char* str) +{ + char *p; + + p = strchr(str, '!'); + if(p == nil) + return "malformed address"; + *p++ = 0; + parseip(c->raddr, str); + c->rport = atoi(p); + p = strchr(p, '!'); + if(p){ + if(strstr(p, "!r") != nil) + c->restricted = 1; + } + return nil; +} + +/* + * called by protocol connect routine to set addresses + */ +char* +Fsstdconnect(Conv *c, char *argv[], int argc) +{ + char *p; + + switch(argc) { + default: + return "bad args to connect"; + case 2: + p = setraddrport(c, argv[1]); + if(p != nil) + return p; + setladdr(c); + setlport(c); + break; + case 3: + p = setraddrport(c, argv[1]); + if(p != nil) + return p; + p = setladdrport(c, argv[2], 0); + if(p != nil) + return p; + } + + if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 && + memcmp(c->laddr, v4prefix, IPv4off) == 0) + || ipcmp(c->raddr, IPnoaddr) == 0) + c->ipversion = V4; + else + c->ipversion = V6; + + return nil; +} +/* + * initiate connection and sleep till its set up + */ +static int +connected(void* a) +{ + return ((Conv*)a)->state == Connected; +} +static void +connectctlmsg(Proto *x, Conv *c, Cmdbuf *cb) +{ + char *p; + + if(c->state != 0) + error(Econinuse); + c->state = Connecting; + c->cerr[0] = '\0'; + if(x->connect == nil) + error("connect not supported"); + p = x->connect(c, cb->f, cb->nf); + if(p != nil) + error(p); + + qunlock(c); + if(waserror()){ + qlock(c); + nexterror(); + } + sleep(&c->cr, connected, c); + qlock(c); + poperror(); + + if(c->cerr[0] != '\0') + error(c->cerr); +} + +/* + * called by protocol announce routine to set addresses + */ +char* +Fsstdannounce(Conv* c, char* argv[], int argc) +{ + memset(c->raddr, 0, sizeof(c->raddr)); + c->rport = 0; + switch(argc){ + default: + break; + case 2: + return setladdrport(c, argv[1], 1); + } + return "bad args to announce"; +} + +/* + * initiate announcement and sleep till its set up + */ +static int +announced(void* a) +{ + return ((Conv*)a)->state == Announced; +} +static void +announcectlmsg(Proto *x, Conv *c, Cmdbuf *cb) +{ + char *p; + + if(c->state != 0) + error(Econinuse); + c->state = Announcing; + c->cerr[0] = '\0'; + if(x->announce == nil) + error("announce not supported"); + p = x->announce(c, cb->f, cb->nf); + if(p != nil) + error(p); + + qunlock(c); + if(waserror()){ + qlock(c); + nexterror(); + } + sleep(&c->cr, announced, c); + qlock(c); + poperror(); + + if(c->cerr[0] != '\0') + error(c->cerr); +} + +/* + * called by protocol bind routine to set addresses + */ +char* +Fsstdbind(Conv* c, char* argv[], int argc) +{ + switch(argc){ + default: + break; + case 2: + return setladdrport(c, argv[1], 0); + } + return "bad args to bind"; +} + +static void +bindctlmsg(Proto *x, Conv *c, Cmdbuf *cb) +{ + char *p; + + if(x->bind == nil) + p = Fsstdbind(c, cb->f, cb->nf); + else + p = x->bind(c, cb->f, cb->nf); + if(p != nil) + error(p); +} + +static void +tosctlmsg(Conv *c, Cmdbuf *cb) +{ + if(cb->nf < 2) + c->tos = 0; + else + c->tos = atoi(cb->f[1]); +} + +static void +ttlctlmsg(Conv *c, Cmdbuf *cb) +{ + if(cb->nf < 2) + c->ttl = MAXTTL; + else + c->ttl = atoi(cb->f[1]); +} + +static long +ipwrite(Chan* ch, void *v, long n, vlong off) +{ + Conv *c; + Proto *x; + char *p; + Cmdbuf *cb; + uchar ia[IPaddrlen], ma[IPaddrlen]; + Fs *f; + char *a; + ulong offset = off; + + a = v; + f = ipfs[ch->devno]; + + switch(TYPE(ch->qid)){ + default: + error(Eperm); + case Qdata: + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + + if(c->wq == nil) + error(Eperm); + + qwrite(c->wq, a, n); + break; + case Qarp: + return arpwrite(f, a, n); + case Qiproute: + return routewrite(f, ch, a, n); + case Qlog: + netlogctl(f, a, n); + return n; + case Qndb: + return ndbwrite(f, a, offset, n); + break; + case Qctl: + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + cb = parsecmd(a, n); + + qlock(c); + if(waserror()) { + qunlock(c); + free(cb); + nexterror(); + } + if(cb->nf < 1) + error("short control request"); + if(strcmp(cb->f[0], "connect") == 0) + connectctlmsg(x, c, cb); + else if(strcmp(cb->f[0], "announce") == 0) + announcectlmsg(x, c, cb); + else if(strcmp(cb->f[0], "bind") == 0) + bindctlmsg(x, c, cb); + else if(strcmp(cb->f[0], "ttl") == 0) + ttlctlmsg(c, cb); + else if(strcmp(cb->f[0], "tos") == 0) + tosctlmsg(c, cb); + else if(strcmp(cb->f[0], "ignoreadvice") == 0) + c->ignoreadvice = 1; + else if(strcmp(cb->f[0], "addmulti") == 0){ + if(cb->nf < 2) + error("addmulti needs interface address"); + if(cb->nf == 2){ + if(!ipismulticast(c->raddr)) + error("addmulti for a non multicast address"); + parseip(ia, cb->f[1]); + ipifcaddmulti(c, c->raddr, ia); + } else { + parseip(ma, cb->f[2]); + if(!ipismulticast(ma)) + error("addmulti for a non multicast address"); + parseip(ia, cb->f[1]); + ipifcaddmulti(c, ma, ia); + } + } else if(strcmp(cb->f[0], "remmulti") == 0){ + if(cb->nf < 2) + error("remmulti needs interface address"); + if(!ipismulticast(c->raddr)) + error("remmulti for a non multicast address"); + parseip(ia, cb->f[1]); + ipifcremmulti(c, c->raddr, ia); + } else if(x->ctl != nil) { + p = x->ctl(c, cb->f, cb->nf); + if(p != nil) + error(p); + } else + error("unknown control request"); + qunlock(c); + free(cb); + poperror(); + } + return n; +} + +static long +ipbwrite(Chan* ch, Block* bp, vlong offset) +{ + Conv *c; + Proto *x; + Fs *f; + int n; + + switch(TYPE(ch->qid)){ + case Qdata: + f = ipfs[ch->devno]; + x = f->p[PROTO(ch->qid)]; + c = x->conv[CONV(ch->qid)]; + + if(c->wq == nil) + error(Eperm); + + if(bp->next) + bp = concatblock(bp); + n = BLEN(bp); + qbwrite(c->wq, bp); + return n; + default: + return devbwrite(ch, bp, offset); + } +} + +Dev ipdevtab = { + 'I', + "ip", + + ipreset, + devinit, + devshutdown, + ipattach, + ipwalk, + ipstat, + ipopen, + ipcreate, + ipclose, + ipread, + ipbread, + ipwrite, + ipbwrite, + ipremove, + ipwstat, +}; + +int +Fsproto(Fs *f, Proto *p) +{ + if(f->np >= Maxproto) + return -1; + + p->f = f; + + if(p->ipproto > 0){ + if(f->t2p[p->ipproto] != nil) + return -1; + f->t2p[p->ipproto] = p; + } + + p->qid.type = QTDIR; + p->qid.path = QID(f->np, 0, Qprotodir); + p->conv = malloc(sizeof(Conv*)*(p->nc+1)); + if(p->conv == nil) + panic("Fsproto"); + + p->x = f->np; + p->nextport = 0; + p->nextrport = 600; + f->p[f->np++] = p; + + return 0; +} + +/* + * return true if this protocol is + * built in + */ +int +Fsbuiltinproto(Fs* f, uchar proto) +{ + return f->t2p[proto] != nil; +} + +/* + * called with protocol locked + */ +Conv* +Fsprotoclone(Proto *p, char *user) +{ + Conv *c, **pp, **ep; + +retry: + c = nil; + ep = &p->conv[p->nc]; + for(pp = p->conv; pp < ep; pp++) { + c = *pp; + if(c == nil){ + c = malloc(sizeof(Conv)); + if(c == nil) + error(Enomem); + qlock(c); + c->p = p; + c->x = pp - p->conv; + if(p->ptclsize != 0){ + c->ptcl = malloc(p->ptclsize); + if(c->ptcl == nil) { + free(c); + error(Enomem); + } + } + *pp = c; + p->ac++; + c->eq = qopen(1024, Qmsg, 0, 0); + (*p->create)(c); + break; + } + if(canqlock(c)){ + /* + * make sure both processes and protocol + * are done with this Conv + */ + if(c->inuse == 0 && (p->inuse == nil || (*p->inuse)(c) == 0)) + break; + + qunlock(c); + } + } + if(pp >= ep) { + if(p->gc != nil && (*p->gc)(p)) + goto retry; + return nil; + } + + c->inuse = 1; + kstrdup(&c->owner, user); + c->perm = 0660; + c->state = Idle; + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + c->r = nil; + c->rgen = 0; + c->lport = 0; + c->rport = 0; + c->restricted = 0; + c->ttl = MAXTTL; + qreopen(c->rq); + qreopen(c->wq); + qreopen(c->eq); + + qunlock(c); + return c; +} + +int +Fsconnected(Conv* c, char* msg) +{ + if(msg != nil && *msg != '\0') + strncpy(c->cerr, msg, ERRMAX-1); + + switch(c->state){ + + case Announcing: + c->state = Announced; + break; + + case Connecting: + c->state = Connected; + break; + } + + wakeup(&c->cr); + return 0; +} + +Proto* +Fsrcvpcol(Fs* f, uchar proto) +{ + if(f->ipmux) + return f->ipmux; + else + return f->t2p[proto]; +} + +Proto* +Fsrcvpcolx(Fs *f, uchar proto) +{ + return f->t2p[proto]; +} + +/* + * called with protocol locked + */ +Conv* +Fsnewcall(Conv *c, uchar *raddr, ushort rport, uchar *laddr, ushort lport, uchar version) +{ + Conv *nc; + Conv **l; + int i; + + qlock(c); + i = 0; + for(l = &c->incall; *l; l = &(*l)->next) + i++; + if(i >= Maxincall) { + qunlock(c); + return nil; + } + + /* find a free conversation */ + nc = Fsprotoclone(c->p, network); + if(nc == nil) { + qunlock(c); + return nil; + } + ipmove(nc->raddr, raddr); + nc->rport = rport; + ipmove(nc->laddr, laddr); + nc->lport = lport; + nc->next = nil; + *l = nc; + nc->state = Connected; + nc->ipversion = version; + + qunlock(c); + + wakeup(&c->listenr); + + return nc; +} + +long +ndbwrite(Fs *f, char *a, ulong off, int n) +{ + if(off > strlen(f->ndb)) + error(Eio); + if(off+n >= sizeof(f->ndb)) + error(Eio); + memmove(f->ndb+off, a, n); + f->ndb[off+n] = 0; + f->ndbvers++; + f->ndbmtime = seconds(); + return n; +} + +ulong +scalednconv(void) +{ + if(cpuserver && conf.npage*PGSZ >= 128*MB) + return Nchans*4; + return Nchans; +} diff -Nru 0/sys/src/nix/ip/ethermedium.c 4/sys/src/nix/ip/ethermedium.c --- 0/sys/src/nix/ip/ethermedium.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/ethermedium.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,785 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" +#include "ipv6.h" + +typedef struct Etherhdr Etherhdr; +struct Etherhdr +{ + uchar d[6]; + uchar s[6]; + uchar t[2]; +}; + +static uchar ipbroadcast[IPaddrlen] = { + 0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff, + 0xff,0xff,0xff,0xff, +}; + +static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + +static void etherread4(void *a); +static void etherread6(void *a); +static void etherbind(Ipifc *ifc, int argc, char **argv); +static void etherunbind(Ipifc *ifc); +static void etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip); +static void etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia); +static void etherremmulti(Ipifc *ifc, uchar *a, uchar *ia); +static Block* multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac); +static void sendarp(Ipifc *ifc, Arpent *a); +static void sendgarp(Ipifc *ifc, uchar*); +static int multicastea(uchar *ea, uchar *ip); +static void recvarpproc(void*); +static void resolveaddr6(Ipifc *ifc, Arpent *a); +static void etherpref2addr(uchar *pref, uchar *ea); + +Medium ethermedium = +{ +.name= "ether", +.hsize= 14, +.mintu= 60, +.maxtu= 1514, +.maclen= 6, +.bind= etherbind, +.unbind= etherunbind, +.bwrite= etherbwrite, +.addmulti= etheraddmulti, +.remmulti= etherremmulti, +.ares= arpenter, +.areg= sendgarp, +.pref2addr= etherpref2addr, +}; + +Medium fbemedium = +{ +.name= "fbe", +.hsize= 14, +.mintu= 60, +.maxtu= 4000, +.maclen= 6, +.bind= etherbind, +.unbind= etherunbind, +.bwrite= etherbwrite, +.addmulti= etheraddmulti, +.remmulti= etherremmulti, +.ares= arpenter, +.areg= sendgarp, +.pref2addr= etherpref2addr, +}; + +Medium gbemedium = +{ +.name= "gbe", +.hsize= 14, +.mintu= 60, +.maxtu= 9014, +.maclen= 6, +.bind= etherbind, +.unbind= etherunbind, +.bwrite= etherbwrite, +.addmulti= etheraddmulti, +.remmulti= etherremmulti, +.ares= arpenter, +.areg= sendgarp, +.pref2addr= etherpref2addr, +}; + +typedef struct Etherrock Etherrock; +struct Etherrock +{ + Fs *f; /* file system we belong to */ + Proc *arpp; /* arp process */ + Proc *read4p; /* reading process (v4)*/ + Proc *read6p; /* reading process (v6)*/ + Chan *mchan4; /* Data channel for v4 */ + Chan *achan; /* Arp channel */ + Chan *cchan4; /* Control channel for v4 */ + Chan *mchan6; /* Data channel for v6 */ + Chan *cchan6; /* Control channel for v6 */ +}; + +/* + * ethernet arp request + */ +enum +{ + ETARP = 0x0806, + ETIP4 = 0x0800, + ETIP6 = 0x86DD, + ARPREQUEST = 1, + ARPREPLY = 2, +}; + +typedef struct Etherarp Etherarp; +struct Etherarp +{ + uchar d[6]; + uchar s[6]; + uchar type[2]; + uchar hrd[2]; + uchar pro[2]; + uchar hln; + uchar pln; + uchar op[2]; + uchar sha[6]; + uchar spa[4]; + uchar tha[6]; + uchar tpa[4]; +}; + +static char *nbmsg = "nonblocking"; + +/* + * called to bind an IP ifc to an ethernet device + * called with ifc wlock'd + */ +static void +etherbind(Ipifc *ifc, int argc, char **argv) +{ + Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6, *schan; + char addr[Maxpath]; //char addr[2*KNAMELEN]; + char dir[Maxpath]; //char dir[2*KNAMELEN]; + char *buf; + int n; + char *ptr; + Etherrock *er; + + if(argc < 2) + error(Ebadarg); + + mchan4 = cchan4 = achan = mchan6 = cchan6 = nil; + buf = nil; + if(waserror()){ + if(mchan4 != nil) + cclose(mchan4); + if(cchan4 != nil) + cclose(cchan4); + if(achan != nil) + cclose(achan); + if(mchan6 != nil) + cclose(mchan6); + if(cchan6 != nil) + cclose(cchan6); + if(buf != nil) + free(buf); + nexterror(); + } + + /* + * open ipv4 converstation + * + * the dial will fail if the type is already open on + * this device. + */ + snprint(addr, sizeof(addr), "%s!0x800", argv[2]); + mchan4 = chandial(addr, nil, dir, &cchan4); + + /* + * make it non-blocking + */ + cchan4->dev->write(cchan4, nbmsg, strlen(nbmsg), 0); + + /* + * get mac address and speed + */ + snprint(addr, sizeof(addr), "%s/stats", argv[2]); + buf = smalloc(512); + schan = namec(addr, Aopen, OREAD, 0); + if(waserror()){ + cclose(schan); + nexterror(); + } + n = schan->dev->read(schan, buf, 511, 0); + cclose(schan); + poperror(); + buf[n] = 0; + + ptr = strstr(buf, "addr: "); + if(!ptr) + error(Eio); + ptr += 6; + parsemac(ifc->mac, ptr, 6); + + ptr = strstr(buf, "mbps: "); + if(ptr){ + ptr += 6; + ifc->mbps = atoi(ptr); + } else + ifc->mbps = 100; + + /* + * open arp conversation + */ + snprint(addr, sizeof(addr), "%s!0x806", argv[2]); + achan = chandial(addr, nil, nil, nil); + + /* + * open ipv6 conversation + * + * the dial will fail if the type is already open on + * this device. + */ + snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]); + mchan6 = chandial(addr, nil, dir, &cchan6); + + /* + * make it non-blocking + */ + cchan6->dev->write(cchan6, nbmsg, strlen(nbmsg), 0); + + er = smalloc(sizeof(*er)); + er->mchan4 = mchan4; + er->cchan4 = cchan4; + er->achan = achan; + er->mchan6 = mchan6; + er->cchan6 = cchan6; + er->f = ifc->conv->p->f; + ifc->arg = er; + + free(buf); + poperror(); + + kproc("etherread4", etherread4, ifc); + kproc("recvarpproc", recvarpproc, ifc); + kproc("etherread6", etherread6, ifc); +} + +/* + * called with ifc wlock'd + */ +static void +etherunbind(Ipifc *ifc) +{ + Etherrock *er = ifc->arg; + + if(er->read4p) + postnote(er->read4p, 1, "unbind", NUser); + if(er->read6p) + postnote(er->read6p, 1, "unbind", NUser); + if(er->arpp) + postnote(er->arpp, 1, "unbind", NUser); + + /* wait for readers to die */ + while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0) + tsleep(&up->sleep, return0, 0, 300); + + if(er->mchan4 != nil) + cclose(er->mchan4); + if(er->achan != nil) + cclose(er->achan); + if(er->cchan4 != nil) + cclose(er->cchan4); + if(er->mchan6 != nil) + cclose(er->mchan6); + if(er->cchan6 != nil) + cclose(er->cchan6); + + free(er); +} + +/* + * called by ipoput with a single block to write with ifc rlock'd + */ +static void +etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip) +{ + Etherhdr *eh; + Arpent *a; + uchar mac[6]; + Etherrock *er = ifc->arg; + + /* get mac address of destination */ + a = arpget(er->f->arp, bp, version, ifc, ip, mac); + if(a){ + /* check for broadcast or multicast */ + bp = multicastarp(er->f, a, ifc->medium, mac); + if(bp==nil){ + switch(version){ + case V4: + sendarp(ifc, a); + break; + case V6: + resolveaddr6(ifc, a); + break; + default: + panic("etherbwrite: version %d", version); + } + return; + } + } + + /* make it a single block with space for the ether header */ + bp = padblock(bp, ifc->medium->hsize); + if(bp->next) + bp = concatblock(bp); + if(BLEN(bp) < ifc->mintu) + bp = adjustblock(bp, ifc->mintu); + eh = (Etherhdr*)bp->rp; + + /* copy in mac addresses and ether type */ + memmove(eh->s, ifc->mac, sizeof(eh->s)); + memmove(eh->d, mac, sizeof(eh->d)); + + switch(version){ + case V4: + eh->t[0] = 0x08; + eh->t[1] = 0x00; + er->mchan4->dev->bwrite(er->mchan4, bp, 0); + break; + case V6: + eh->t[0] = 0x86; + eh->t[1] = 0xDD; + er->mchan6->dev->bwrite(er->mchan6, bp, 0); + break; + default: + panic("etherbwrite2: version %d", version); + } + ifc->out++; +} + + +/* + * process to read from the ethernet + */ +static void +etherread4(void *a) +{ + Ipifc *ifc; + Block *bp; + Etherrock *er; + + ifc = a; + er = ifc->arg; + er->read4p = up; /* hide identity under a rock for unbind */ + if(waserror()){ + er->read4p = 0; + pexit("hangup", 1); + } + for(;;){ + bp = er->mchan4->dev->bread(er->mchan4, ifc->maxtu, 0); + if(!canrlock(ifc)){ + freeb(bp); + continue; + } + if(waserror()){ + runlock(ifc); + nexterror(); + } + ifc->in++; + bp->rp += ifc->medium->hsize; + if(ifc->lifc == nil) + freeb(bp); + else + ipiput4(er->f, ifc, bp); + runlock(ifc); + poperror(); + } +} + + +/* + * process to read from the ethernet, IPv6 + */ +static void +etherread6(void *a) +{ + Ipifc *ifc; + Block *bp; + Etherrock *er; + + ifc = a; + er = ifc->arg; + er->read6p = up; /* hide identity under a rock for unbind */ + if(waserror()){ + er->read6p = 0; + pexit("hangup", 1); + } + for(;;){ + bp = er->mchan6->dev->bread(er->mchan6, ifc->maxtu, 0); + if(!canrlock(ifc)){ + freeb(bp); + continue; + } + if(waserror()){ + runlock(ifc); + nexterror(); + } + ifc->in++; + bp->rp += ifc->medium->hsize; + if(ifc->lifc == nil) + freeb(bp); + else + ipiput6(er->f, ifc, bp); + runlock(ifc); + poperror(); + } +} + +static void +etheraddmulti(Ipifc *ifc, uchar *a, uchar *) +{ + uchar mac[6]; + char buf[64]; + Etherrock *er = ifc->arg; + int version; + + version = multicastea(mac, a); + sprint(buf, "addmulti %E", mac); + switch(version){ + case V4: + er->cchan4->dev->write(er->cchan4, buf, strlen(buf), 0); + break; + case V6: + er->cchan6->dev->write(er->cchan6, buf, strlen(buf), 0); + break; + default: + panic("etheraddmulti: version %d", version); + } +} + +static void +etherremmulti(Ipifc *ifc, uchar *a, uchar *) +{ + uchar mac[6]; + char buf[64]; + Etherrock *er = ifc->arg; + int version; + + version = multicastea(mac, a); + sprint(buf, "remmulti %E", mac); + switch(version){ + case V4: + er->cchan4->dev->write(er->cchan4, buf, strlen(buf), 0); + break; + case V6: + er->cchan6->dev->write(er->cchan6, buf, strlen(buf), 0); + break; + default: + panic("etherremmulti: version %d", version); + } +} + +/* + * send an ethernet arp + * (only v4, v6 uses the neighbor discovery, rfc1970) + */ +static void +sendarp(Ipifc *ifc, Arpent *a) +{ + int n; + Block *bp; + Etherarp *e; + Etherrock *er = ifc->arg; + + /* don't do anything if it's been less than a second since the last */ + if(NOW - a->ctime < 1000){ + arprelease(er->f->arp, a); + return; + } + + /* remove all but the last message */ + while((bp = a->hold) != nil){ + if(bp == a->last) + break; + a->hold = bp->list; + freeblist(bp); + } + + /* try to keep it around for a second more */ + a->ctime = NOW; + arprelease(er->f->arp, a); + + n = sizeof(Etherarp); + if(n < a->type->mintu) + n = a->type->mintu; + bp = allocb(n); + memset(bp->rp, 0, n); + e = (Etherarp*)bp->rp; + memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa)); + ipv4local(ifc, e->spa); + memmove(e->sha, ifc->mac, sizeof(e->sha)); + memset(e->d, 0xff, sizeof(e->d)); /* ethernet broadcast */ + memmove(e->s, ifc->mac, sizeof(e->s)); + + hnputs(e->type, ETARP); + hnputs(e->hrd, 1); + hnputs(e->pro, ETIP4); + e->hln = sizeof(e->sha); + e->pln = sizeof(e->spa); + hnputs(e->op, ARPREQUEST); + bp->wp += n; + + er->achan->dev->bwrite(er->achan, bp, 0); +} + +static void +resolveaddr6(Ipifc *ifc, Arpent *a) +{ + int sflag; + Block *bp; + Etherrock *er = ifc->arg; + uchar ipsrc[IPaddrlen]; + + /* don't do anything if it's been less than a second since the last */ + if(NOW - a->ctime < ReTransTimer){ + arprelease(er->f->arp, a); + return; + } + + /* remove all but the last message */ + while((bp = a->hold) != nil){ + if(bp == a->last) + break; + a->hold = bp->list; + freeblist(bp); + } + + /* try to keep it around for a second more */ + a->ctime = NOW; + a->rtime = NOW + ReTransTimer; + if(a->rxtsrem <= 0) { + arprelease(er->f->arp, a); + return; + } + + a->rxtsrem--; + arprelease(er->f->arp, a); + + if(sflag = ipv6anylocal(ifc, ipsrc)) + icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac); +} + +/* + * send a gratuitous arp to refresh arp caches + */ +static void +sendgarp(Ipifc *ifc, uchar *ip) +{ + int n; + Block *bp; + Etherarp *e; + Etherrock *er = ifc->arg; + + /* don't arp for our initial non address */ + if(ipcmp(ip, IPnoaddr) == 0) + return; + + n = sizeof(Etherarp); + if(n < ifc->medium->mintu) + n = ifc->medium->mintu; + bp = allocb(n); + memset(bp->rp, 0, n); + e = (Etherarp*)bp->rp; + memmove(e->tpa, ip+IPv4off, sizeof(e->tpa)); + memmove(e->spa, ip+IPv4off, sizeof(e->spa)); + memmove(e->sha, ifc->mac, sizeof(e->sha)); + memset(e->d, 0xff, sizeof(e->d)); /* ethernet broadcast */ + memmove(e->s, ifc->mac, sizeof(e->s)); + + hnputs(e->type, ETARP); + hnputs(e->hrd, 1); + hnputs(e->pro, ETIP4); + e->hln = sizeof(e->sha); + e->pln = sizeof(e->spa); + hnputs(e->op, ARPREQUEST); + bp->wp += n; + + er->achan->dev->bwrite(er->achan, bp, 0); +} + +static void +recvarp(Ipifc *ifc) +{ + int n; + Block *ebp, *rbp; + Etherarp *e, *r; + uchar ip[IPaddrlen]; + static uchar eprinted[4]; + Etherrock *er = ifc->arg; + + ebp = er->achan->dev->bread(er->achan, ifc->maxtu, 0); + if(ebp == nil) + return; + + e = (Etherarp*)ebp->rp; + switch(nhgets(e->op)) { + default: + break; + + case ARPREPLY: + /* check for machine using my ip address */ + v4tov6(ip, e->spa); + if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){ + if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){ + print("arprep: 0x%E/0x%E also has ip addr %V\n", + e->s, e->sha, e->spa); + break; + } + } + + /* make sure we're not entering broadcast addresses */ + if(ipcmp(ip, ipbroadcast) == 0 || + !memcmp(e->sha, etherbroadcast, sizeof(e->sha))){ + print("arprep: 0x%E/0x%E cannot register broadcast address %I\n", + e->s, e->sha, e->spa); + break; + } + + arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0); + break; + + case ARPREQUEST: + /* don't answer arps till we know who we are */ + if(ifc->lifc == 0) + break; + + /* check for machine using my ip or ether address */ + v4tov6(ip, e->spa); + if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){ + if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){ + if (memcmp(eprinted, e->spa, sizeof(e->spa))){ + /* print only once */ + print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa); + memmove(eprinted, e->spa, sizeof(e->spa)); + } + } + } else { + if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){ + print("arpreq: %V also has ether addr %E\n", e->spa, e->sha); + break; + } + } + + /* refresh what we know about sender */ + arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1); + + /* answer only requests for our address or systems we're proxying for */ + v4tov6(ip, e->tpa); + if(!iplocalonifc(ifc, ip)) + if(!ipproxyifc(er->f, ifc, ip)) + break; + + n = sizeof(Etherarp); + if(n < ifc->mintu) + n = ifc->mintu; + rbp = allocb(n); + r = (Etherarp*)rbp->rp; + memset(r, 0, sizeof(Etherarp)); + hnputs(r->type, ETARP); + hnputs(r->hrd, 1); + hnputs(r->pro, ETIP4); + r->hln = sizeof(r->sha); + r->pln = sizeof(r->spa); + hnputs(r->op, ARPREPLY); + memmove(r->tha, e->sha, sizeof(r->tha)); + memmove(r->tpa, e->spa, sizeof(r->tpa)); + memmove(r->sha, ifc->mac, sizeof(r->sha)); + memmove(r->spa, e->tpa, sizeof(r->spa)); + memmove(r->d, e->sha, sizeof(r->d)); + memmove(r->s, ifc->mac, sizeof(r->s)); + rbp->wp += n; + + er->achan->dev->bwrite(er->achan, rbp, 0); + } + freeb(ebp); +} + +static void +recvarpproc(void *v) +{ + Ipifc *ifc = v; + Etherrock *er = ifc->arg; + + er->arpp = up; + if(waserror()){ + er->arpp = 0; + pexit("hangup", 1); + } + for(;;) + recvarp(ifc); +} + +static int +multicastea(uchar *ea, uchar *ip) +{ + int x; + + switch(x = ipismulticast(ip)){ + case V4: + ea[0] = 0x01; + ea[1] = 0x00; + ea[2] = 0x5e; + ea[3] = ip[13] & 0x7f; + ea[4] = ip[14]; + ea[5] = ip[15]; + break; + case V6: + ea[0] = 0x33; + ea[1] = 0x33; + ea[2] = ip[12]; + ea[3] = ip[13]; + ea[4] = ip[14]; + ea[5] = ip[15]; + break; + } + return x; +} + +/* + * fill in an arp entry for broadcast or multicast + * addresses. Return the first queued packet for the + * IP address. + */ +static Block* +multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac) +{ + /* is it broadcast? */ + switch(ipforme(f, a->ip)){ + case Runi: + return nil; + case Rbcast: + memset(mac, 0xff, 6); + return arpresolve(f->arp, a, medium, mac); + default: + break; + } + + /* if multicast, fill in mac */ + switch(multicastea(mac, a->ip)){ + case V4: + case V6: + return arpresolve(f->arp, a, medium, mac); + } + + /* let arp take care of it */ + return nil; +} + +void +ethermediumlink(void) +{ + addipmedium(ðermedium); + addipmedium(&fbemedium); + addipmedium(&gbemedium); +} + + +static void +etherpref2addr(uchar *pref, uchar *ea) +{ + pref[8] = ea[0] | 0x2; + pref[9] = ea[1]; + pref[10] = ea[2]; + pref[11] = 0xFF; + pref[12] = 0xFE; + pref[13] = ea[3]; + pref[14] = ea[4]; + pref[15] = ea[5]; +} diff -Nru 0/sys/src/nix/ip/gre.c 4/sys/src/nix/ip/gre.c --- 0/sys/src/nix/ip/gre.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/gre.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,281 @@ +/* + * Generic Routing Encapsulation over IPv4, rfc1702 + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +enum +{ + GRE_IPONLY = 12, /* size of ip header */ + GRE_IPPLUSGRE = 12, /* minimum size of GRE header */ + IP_GREPROTO = 47, + + GRErxms = 200, + GREtickms = 100, + GREmaxxmit = 10, +}; + +typedef struct GREhdr +{ + /* ip header */ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar len[2]; /* packet length (including headers) */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + uchar Unused; + uchar proto; /* Protocol */ + uchar cksum[2]; /* checksum */ + uchar src[4]; /* Ip source */ + uchar dst[4]; /* Ip destination */ + + /* gre header */ + uchar flags[2]; + uchar eproto[2]; /* encapsulation protocol */ +} GREhdr; + +typedef struct GREpriv GREpriv; +struct GREpriv +{ + int raw; /* Raw GRE mode */ + + /* non-MIB stats */ + ulong csumerr; /* checksum errors */ + ulong lenerr; /* short packet */ +}; + +static void grekick(void *x, Block *bp); + +static char* +greconnect(Conv *c, char **argv, int argc) +{ + Proto *p; + char *err; + Conv *tc, **cp, **ecp; + + err = Fsstdconnect(c, argv, argc); + if(err != nil) + return err; + + /* make sure noone's already connected to this other sys */ + p = c->p; + qlock(p); + ecp = &p->conv[p->nc]; + for(cp = p->conv; cp < ecp; cp++){ + tc = *cp; + if(tc == nil) + break; + if(tc == c) + continue; + if(tc->rport == c->rport && ipcmp(tc->raddr, c->raddr) == 0){ + err = "already connected to that addr/proto"; + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + break; + } + } + qunlock(p); + + if(err != nil) + return err; + Fsconnected(c, nil); + + return nil; +} + +static void +grecreate(Conv *c) +{ + c->rq = qopen(64*1024, Qmsg, 0, c); + c->wq = qbypass(grekick, c); +} + +static int +grestate(Conv *c, char *state, int n) +{ + USED(c); + return snprint(state, n, "%s\n", "Datagram"); +} + +static char* +greannounce(Conv*, char**, int) +{ + return "pktifc does not support announce"; +} + +static void +greclose(Conv *c) +{ + qclose(c->rq); + qclose(c->wq); + qclose(c->eq); + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + c->lport = 0; + c->rport = 0; +} + +static void +grekick(void *x, Block *bp) +{ + Conv *c = x; + GREhdr *ghp; + uchar laddr[IPaddrlen], raddr[IPaddrlen]; + + if(bp == nil) + return; + + /* Make space to fit ip header (gre header already there) */ + bp = padblock(bp, GRE_IPONLY); + if(bp == nil) + return; + + /* make sure the message has a GRE header */ + bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE); + if(bp == nil) + return; + + ghp = (GREhdr *)(bp->rp); + ghp->vihl = IP_VER4; + + if(!((GREpriv*)c->p->priv)->raw){ + v4tov6(raddr, ghp->dst); + if(ipcmp(raddr, v4prefix) == 0) + memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen); + v4tov6(laddr, ghp->src); + if(ipcmp(laddr, v4prefix) == 0){ + if(ipcmp(c->laddr, IPnoaddr) == 0) + findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */ + memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen); + } + hnputs(ghp->eproto, c->rport); + } + + ghp->proto = IP_GREPROTO; + ghp->frag[0] = 0; + ghp->frag[1] = 0; + + ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil); +} + +static void +greiput(Proto *gre, Ipifc*, Block *bp) +{ + int len; + GREhdr *ghp; + Conv *c, **p; + ushort eproto; + uchar raddr[IPaddrlen]; + GREpriv *gpriv; + + gpriv = gre->priv; + ghp = (GREhdr*)(bp->rp); + + v4tov6(raddr, ghp->src); + eproto = nhgets(ghp->eproto); + qlock(gre); + + /* Look for a conversation structure for this port and address */ + c = nil; + for(p = gre->conv; *p; p++) { + c = *p; + if(c->inuse == 0) + continue; + if(c->rport == eproto && + (gpriv->raw || ipcmp(c->raddr, raddr) == 0)) + break; + } + + if(*p == nil) { + qunlock(gre); + freeblist(bp); + return; + } + + qunlock(gre); + + /* + * Trim the packet down to data size + */ + len = nhgets(ghp->len) - GRE_IPONLY; + if(len < GRE_IPPLUSGRE){ + freeblist(bp); + return; + } + bp = trimblock(bp, GRE_IPONLY, len); + if(bp == nil){ + gpriv->lenerr++; + return; + } + + /* + * Can't delimit packet so pull it all into one block. + */ + if(qlen(c->rq) > 64*1024) + freeblist(bp); + else{ + bp = concatblock(bp); + if(bp == 0) + panic("greiput"); + qpass(c->rq, bp); + } +} + +int +grestats(Proto *gre, char *buf, int len) +{ + GREpriv *gpriv; + + gpriv = gre->priv; + + return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr); +} + +char* +grectl(Conv *c, char **f, int n) +{ + GREpriv *gpriv; + + gpriv = c->p->priv; + if(n == 1){ + if(strcmp(f[0], "raw") == 0){ + gpriv->raw = 1; + return nil; + } + else if(strcmp(f[0], "cooked") == 0){ + gpriv->raw = 0; + return nil; + } + } + return "unknown control request"; +} + +void +greinit(Fs *fs) +{ + Proto *gre; + + gre = smalloc(sizeof(Proto)); + gre->priv = smalloc(sizeof(GREpriv)); + gre->name = "gre"; + gre->connect = greconnect; + gre->announce = greannounce; + gre->state = grestate; + gre->create = grecreate; + gre->close = greclose; + gre->rcv = greiput; + gre->ctl = grectl; + gre->advise = nil; + gre->stats = grestats; + gre->ipproto = IP_GREPROTO; + gre->nc = 64; + gre->ptclsize = 0; + + Fsproto(fs, gre); +} diff -Nru 0/sys/src/nix/ip/icmp.c 4/sys/src/nix/ip/icmp.c --- 0/sys/src/nix/ip/icmp.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/icmp.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,492 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +typedef struct Icmp { + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + uchar ttl; /* Time to live */ + uchar proto; /* Protocol */ + uchar ipcksum[2]; /* Header checksum */ + uchar src[4]; /* Ip source */ + uchar dst[4]; /* Ip destination */ + uchar type; + uchar code; + uchar cksum[2]; + uchar icmpid[2]; + uchar seq[2]; + uchar data[1]; +} Icmp; + +enum { /* Packet Types */ + EchoReply = 0, + Unreachable = 3, + SrcQuench = 4, + Redirect = 5, + EchoRequest = 8, + TimeExceed = 11, + InParmProblem = 12, + Timestamp = 13, + TimestampReply = 14, + InfoRequest = 15, + InfoReply = 16, + AddrMaskRequest = 17, + AddrMaskReply = 18, + + Maxtype = 18, +}; + +enum +{ + MinAdvise = 24, /* minimum needed for us to advise another protocol */ +}; + +char *icmpnames[Maxtype+1] = +{ +[EchoReply] "EchoReply", +[Unreachable] "Unreachable", +[SrcQuench] "SrcQuench", +[Redirect] "Redirect", +[EchoRequest] "EchoRequest", +[TimeExceed] "TimeExceed", +[InParmProblem] "InParmProblem", +[Timestamp] "Timestamp", +[TimestampReply] "TimestampReply", +[InfoRequest] "InfoRequest", +[InfoReply] "InfoReply", +[AddrMaskRequest] "AddrMaskRequest", +[AddrMaskReply] "AddrMaskReply", +}; + +enum { + IP_ICMPPROTO = 1, + ICMP_IPSIZE = 20, + ICMP_HDRSIZE = 8, +}; + +enum +{ + InMsgs, + InErrors, + OutMsgs, + CsumErrs, + LenErrs, + HlenErrs, + + Nstats, +}; + +static char *statnames[Nstats] = +{ +[InMsgs] "InMsgs", +[InErrors] "InErrors", +[OutMsgs] "OutMsgs", +[CsumErrs] "CsumErrs", +[LenErrs] "LenErrs", +[HlenErrs] "HlenErrs", +}; + +typedef struct Icmppriv Icmppriv; +struct Icmppriv +{ + ulong stats[Nstats]; + + /* message counts */ + ulong in[Maxtype+1]; + ulong out[Maxtype+1]; +}; + +static void icmpkick(void *x, Block*); + +static void +icmpcreate(Conv *c) +{ + c->rq = qopen(64*1024, Qmsg, 0, c); + c->wq = qbypass(icmpkick, c); +} + +extern char* +icmpconnect(Conv *c, char **argv, int argc) +{ + char *e; + + e = Fsstdconnect(c, argv, argc); + if(e != nil) + return e; + Fsconnected(c, e); + + return nil; +} + +extern int +icmpstate(Conv *c, char *state, int n) +{ + USED(c); + return snprint(state, n, "%s qin %d qout %d\n", + "Datagram", + c->rq ? qlen(c->rq) : 0, + c->wq ? qlen(c->wq) : 0 + ); +} + +extern char* +icmpannounce(Conv *c, char **argv, int argc) +{ + char *e; + + e = Fsstdannounce(c, argv, argc); + if(e != nil) + return e; + Fsconnected(c, nil); + + return nil; +} + +extern void +icmpclose(Conv *c) +{ + qclose(c->rq); + qclose(c->wq); + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + c->lport = 0; +} + +static void +icmpkick(void *x, Block *bp) +{ + Conv *c = x; + Icmp *p; + Icmppriv *ipriv; + + if(bp == nil) + return; + + if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){ + freeblist(bp); + return; + } + p = (Icmp *)(bp->rp); + p->vihl = IP_VER4; + ipriv = c->p->priv; + if(p->type <= Maxtype) + ipriv->out[p->type]++; + + v6tov4(p->dst, c->raddr); + v6tov4(p->src, c->laddr); + p->proto = IP_ICMPPROTO; + hnputs(p->icmpid, c->lport); + memset(p->cksum, 0, sizeof(p->cksum)); + hnputs(p->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE)); + ipriv->stats[OutMsgs]++; + ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil); +} + +extern void +icmpttlexceeded(Fs *f, uchar *ia, Block *bp) +{ + Block *nbp; + Icmp *p, *np; + + p = (Icmp *)bp->rp; + + netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src); + nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8); + nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8; + np = (Icmp *)nbp->rp; + np->vihl = IP_VER4; + memmove(np->dst, p->src, sizeof(np->dst)); + v6tov4(np->src, ia); + memmove(np->data, bp->rp, ICMP_IPSIZE + 8); + np->type = TimeExceed; + np->code = 0; + np->proto = IP_ICMPPROTO; + hnputs(np->icmpid, 0); + hnputs(np->seq, 0); + memset(np->cksum, 0, sizeof(np->cksum)); + hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE)); + ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil); + +} + +static void +icmpunreachable(Fs *f, Block *bp, int code, int seq) +{ + Block *nbp; + Icmp *p, *np; + int i; + uchar addr[IPaddrlen]; + + p = (Icmp *)bp->rp; + + /* only do this for unicast sources and destinations */ + v4tov6(addr, p->dst); + i = ipforme(f, addr); + if((i&Runi) == 0) + return; + v4tov6(addr, p->src); + i = ipforme(f, addr); + if(i != 0 && (i&Runi) == 0) + return; + + netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src); + nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8); + nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8; + np = (Icmp *)nbp->rp; + np->vihl = IP_VER4; + memmove(np->dst, p->src, sizeof(np->dst)); + memmove(np->src, p->dst, sizeof(np->src)); + memmove(np->data, bp->rp, ICMP_IPSIZE + 8); + np->type = Unreachable; + np->code = code; + np->proto = IP_ICMPPROTO; + hnputs(np->icmpid, 0); + hnputs(np->seq, seq); + memset(np->cksum, 0, sizeof(np->cksum)); + hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE)); + ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil); +} + +extern void +icmpnoconv(Fs *f, Block *bp) +{ + icmpunreachable(f, bp, 3, 0); +} + +extern void +icmpcantfrag(Fs *f, Block *bp, int mtu) +{ + icmpunreachable(f, bp, 4, mtu); +} + +static void +goticmpkt(Proto *icmp, Block *bp) +{ + Conv **c, *s; + Icmp *p; + uchar dst[IPaddrlen]; + ushort recid; + + p = (Icmp *) bp->rp; + v4tov6(dst, p->src); + recid = nhgets(p->icmpid); + + for(c = icmp->conv; *c; c++) { + s = *c; + if(s->lport == recid) + if(ipcmp(s->raddr, dst) == 0){ + bp = concatblock(bp); + if(bp != nil) + qpass(s->rq, bp); + return; + } + } + freeblist(bp); +} + +static Block * +mkechoreply(Block *bp) +{ + Icmp *q; + uchar ip[4]; + + q = (Icmp *)bp->rp; + q->vihl = IP_VER4; + memmove(ip, q->src, sizeof(q->dst)); + memmove(q->src, q->dst, sizeof(q->src)); + memmove(q->dst, ip, sizeof(q->dst)); + q->type = EchoReply; + memset(q->cksum, 0, sizeof(q->cksum)); + hnputs(q->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE)); + + return bp; +} + +static char *unreachcode[] = +{ +[0] "net unreachable", +[1] "host unreachable", +[2] "protocol unreachable", +[3] "port unreachable", +[4] "fragmentation needed and DF set", +[5] "source route failed", +}; + +static void +icmpiput(Proto *icmp, Ipifc*, Block *bp) +{ + int n, iplen; + Icmp *p; + Block *r; + Proto *pr; + char *msg; + char m2[128]; + Icmppriv *ipriv; + + ipriv = icmp->priv; + + ipriv->stats[InMsgs]++; + + p = (Icmp *)bp->rp; + netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code); + n = blocklen(bp); + if(n < ICMP_IPSIZE+ICMP_HDRSIZE){ + ipriv->stats[InErrors]++; + ipriv->stats[HlenErrs]++; + netlog(icmp->f, Logicmp, "icmp hlen %d\n", n); + goto raise; + } + iplen = nhgets(p->length); + if(iplen > n || (iplen % 1)){ + ipriv->stats[LenErrs]++; + ipriv->stats[InErrors]++; + netlog(icmp->f, Logicmp, "icmp length error n %d iplen %d\n", + n, iplen); + goto raise; + } + if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){ + ipriv->stats[InErrors]++; + ipriv->stats[CsumErrs]++; + netlog(icmp->f, Logicmp, "icmp checksum error n %d iplen %d\n", + n, iplen); + goto raise; + } + if(p->type <= Maxtype) + ipriv->in[p->type]++; + + switch(p->type) { + case EchoRequest: + if (iplen < n) + bp = trimblock(bp, 0, iplen); + r = mkechoreply(bp); + ipriv->out[EchoReply]++; + ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil); + break; + case Unreachable: + if(p->code > 5) + msg = unreachcode[1]; + else + msg = unreachcode[p->code]; + + bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE; + if(blocklen(bp) < MinAdvise){ + ipriv->stats[LenErrs]++; + goto raise; + } + p = (Icmp *)bp->rp; + pr = Fsrcvpcolx(icmp->f, p->proto); + if(pr != nil && pr->advise != nil) { + (*pr->advise)(pr, bp, msg); + return; + } + + bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE; + goticmpkt(icmp, bp); + break; + case TimeExceed: + if(p->code == 0){ + sprint(m2, "ttl exceeded at %V", p->src); + + bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE; + if(blocklen(bp) < MinAdvise){ + ipriv->stats[LenErrs]++; + goto raise; + } + p = (Icmp *)bp->rp; + pr = Fsrcvpcolx(icmp->f, p->proto); + if(pr != nil && pr->advise != nil) { + (*pr->advise)(pr, bp, m2); + return; + } + bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE; + } + + goticmpkt(icmp, bp); + break; + default: + goticmpkt(icmp, bp); + break; + } + return; + +raise: + freeblist(bp); +} + +void +icmpadvise(Proto *icmp, Block *bp, char *msg) +{ + Conv **c, *s; + Icmp *p; + uchar dst[IPaddrlen]; + ushort recid; + + p = (Icmp *) bp->rp; + v4tov6(dst, p->dst); + recid = nhgets(p->icmpid); + + for(c = icmp->conv; *c; c++) { + s = *c; + if(s->lport == recid) + if(ipcmp(s->raddr, dst) == 0){ + qhangup(s->rq, msg); + qhangup(s->wq, msg); + break; + } + } + freeblist(bp); +} + +int +icmpstats(Proto *icmp, char *buf, int len) +{ + Icmppriv *priv; + char *p, *e; + int i; + + priv = icmp->priv; + p = buf; + e = p+len; + for(i = 0; i < Nstats; i++) + p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]); + for(i = 0; i <= Maxtype; i++){ + if(icmpnames[i]) + p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]); + else + p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]); + } + return p - buf; +} + +void +icmpinit(Fs *fs) +{ + Proto *icmp; + + icmp = smalloc(sizeof(Proto)); + icmp->priv = smalloc(sizeof(Icmppriv)); + icmp->name = "icmp"; + icmp->connect = icmpconnect; + icmp->announce = icmpannounce; + icmp->state = icmpstate; + icmp->create = icmpcreate; + icmp->close = icmpclose; + icmp->rcv = icmpiput; + icmp->stats = icmpstats; + icmp->ctl = nil; + icmp->advise = icmpadvise; + icmp->gc = nil; + icmp->ipproto = IP_ICMPPROTO; + icmp->nc = 128; + icmp->ptclsize = 0; + + Fsproto(fs, icmp); +} diff -Nru 0/sys/src/nix/ip/icmp6.c 4/sys/src/nix/ip/icmp6.c --- 0/sys/src/nix/ip/icmp6.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/icmp6.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,899 @@ +/* + * Internet Control Message Protocol for IPv6 + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "ip.h" +#include "ipv6.h" + +enum +{ + InMsgs6, + InErrors6, + OutMsgs6, + CsumErrs6, + LenErrs6, + HlenErrs6, + HoplimErrs6, + IcmpCodeErrs6, + TargetErrs6, + OptlenErrs6, + AddrmxpErrs6, + RouterAddrErrs6, + + Nstats6, +}; + +enum { + ICMP_USEAD6 = 40, +}; + +enum { + Oflag = 1<<5, + Sflag = 1<<6, + Rflag = 1<<7, +}; + +enum { + /* ICMPv6 types */ + EchoReply = 0, + UnreachableV6 = 1, + PacketTooBigV6 = 2, + TimeExceedV6 = 3, + SrcQuench = 4, + ParamProblemV6 = 4, + Redirect = 5, + EchoRequest = 8, + TimeExceed = 11, + InParmProblem = 12, + Timestamp = 13, + TimestampReply = 14, + InfoRequest = 15, + InfoReply = 16, + AddrMaskRequest = 17, + AddrMaskReply = 18, + EchoRequestV6 = 128, + EchoReplyV6 = 129, + RouterSolicit = 133, + RouterAdvert = 134, + NbrSolicit = 135, + NbrAdvert = 136, + RedirectV6 = 137, + + Maxtype6 = 137, +}; + +typedef struct ICMPpkt ICMPpkt; +typedef struct IPICMP IPICMP; +typedef struct Ndpkt Ndpkt; +typedef struct NdiscC NdiscC; + +struct ICMPpkt { + uchar type; + uchar code; + uchar cksum[2]; + uchar icmpid[2]; + uchar seq[2]; +}; + +struct IPICMP { + Ip6hdr; + ICMPpkt; +}; + +struct NdiscC +{ + IPICMP; + uchar target[IPaddrlen]; +}; + +struct Ndpkt +{ + NdiscC; + uchar otype; + uchar olen; /* length in units of 8 octets(incl type, code), + * 1 for IEEE 802 addresses */ + uchar lnaddr[6]; /* link-layer address */ +}; + +typedef struct Icmppriv6 +{ + ulong stats[Nstats6]; + + /* message counts */ + ulong in[Maxtype6+1]; + ulong out[Maxtype6+1]; +} Icmppriv6; + +typedef struct Icmpcb6 +{ + QLock; + uchar headers; +} Icmpcb6; + +char *icmpnames6[Maxtype6+1] = +{ +[EchoReply] "EchoReply", +[UnreachableV6] "UnreachableV6", +[PacketTooBigV6] "PacketTooBigV6", +[TimeExceedV6] "TimeExceedV6", +[SrcQuench] "SrcQuench", +[Redirect] "Redirect", +[EchoRequest] "EchoRequest", +[TimeExceed] "TimeExceed", +[InParmProblem] "InParmProblem", +[Timestamp] "Timestamp", +[TimestampReply] "TimestampReply", +[InfoRequest] "InfoRequest", +[InfoReply] "InfoReply", +[AddrMaskRequest] "AddrMaskRequest", +[AddrMaskReply] "AddrMaskReply", +[EchoRequestV6] "EchoRequestV6", +[EchoReplyV6] "EchoReplyV6", +[RouterSolicit] "RouterSolicit", +[RouterAdvert] "RouterAdvert", +[NbrSolicit] "NbrSolicit", +[NbrAdvert] "NbrAdvert", +[RedirectV6] "RedirectV6", +}; + +static char *statnames6[Nstats6] = +{ +[InMsgs6] "InMsgs", +[InErrors6] "InErrors", +[OutMsgs6] "OutMsgs", +[CsumErrs6] "CsumErrs", +[LenErrs6] "LenErrs", +[HlenErrs6] "HlenErrs", +[HoplimErrs6] "HoplimErrs", +[IcmpCodeErrs6] "IcmpCodeErrs", +[TargetErrs6] "TargetErrs", +[OptlenErrs6] "OptlenErrs", +[AddrmxpErrs6] "AddrmxpErrs", +[RouterAddrErrs6] "RouterAddrErrs", +}; + +static char *unreachcode[] = +{ +[icmp6_no_route] "no route to destination", +[icmp6_ad_prohib] "comm with destination administratively prohibited", +[icmp6_unassigned] "icmp unreachable: unassigned error code (2)", +[icmp6_adr_unreach] "address unreachable", +[icmp6_port_unreach] "port unreachable", +[icmp6_unkn_code] "icmp unreachable: unknown code", +}; + +static void icmpkick6(void *x, Block *bp); + +static void +icmpcreate6(Conv *c) +{ + c->rq = qopen(64*1024, Qmsg, 0, c); + c->wq = qbypass(icmpkick6, c); +} + +static void +set_cksum(Block *bp) +{ + IPICMP *p = (IPICMP *)(bp->rp); + + hnputl(p->vcf, 0); /* borrow IP header as pseudoheader */ + hnputs(p->ploadlen, blocklen(bp)-IPV6HDR_LEN); + p->proto = 0; + p->ttl = ICMPv6; /* ttl gets set later */ + hnputs(p->cksum, 0); + hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp))); + p->proto = ICMPv6; +} + +static Block * +newIPICMP(int packetlen) +{ + Block *nbp; + + nbp = allocb(packetlen); + nbp->wp += packetlen; + memset(nbp->rp, 0, packetlen); + return nbp; +} + +void +icmpadvise6(Proto *icmp, Block *bp, char *msg) +{ + ushort recid; + Conv **c, *s; + IPICMP *p; + + p = (IPICMP *)bp->rp; + recid = nhgets(p->icmpid); + + for(c = icmp->conv; *c; c++) { + s = *c; + if(s->lport == recid && ipcmp(s->raddr, p->dst) == 0){ + qhangup(s->rq, msg); + qhangup(s->wq, msg); + break; + } + } + freeblist(bp); +} + +static void +icmpkick6(void *x, Block *bp) +{ + uchar laddr[IPaddrlen], raddr[IPaddrlen]; + Conv *c = x; + IPICMP *p; + Icmppriv6 *ipriv = c->p->priv; + Icmpcb6 *icb = (Icmpcb6*)c->ptcl; + + if(bp == nil) + return; + + if(icb->headers==6) { + /* get user specified addresses */ + bp = pullupblock(bp, ICMP_USEAD6); + if(bp == nil) + return; + bp->rp += 8; + ipmove(laddr, bp->rp); + bp->rp += IPaddrlen; + ipmove(raddr, bp->rp); + bp->rp += IPaddrlen; + bp = padblock(bp, sizeof(Ip6hdr)); + } + + if(blocklen(bp) < sizeof(IPICMP)){ + freeblist(bp); + return; + } + p = (IPICMP *)(bp->rp); + if(icb->headers == 6) { + ipmove(p->dst, raddr); + ipmove(p->src, laddr); + } else { + ipmove(p->dst, c->raddr); + ipmove(p->src, c->laddr); + hnputs(p->icmpid, c->lport); + } + + set_cksum(bp); + p->vcf[0] = 0x06 << 4; + if(p->type <= Maxtype6) + ipriv->out[p->type]++; + ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil); +} + +char* +icmpctl6(Conv *c, char **argv, int argc) +{ + Icmpcb6 *icb; + + icb = (Icmpcb6*) c->ptcl; + if(argc==1 && strcmp(argv[0], "headers")==0) { + icb->headers = 6; + return nil; + } + return "unknown control request"; +} + +static void +goticmpkt6(Proto *icmp, Block *bp, int muxkey) +{ + ushort recid; + uchar *addr; + Conv **c, *s; + IPICMP *p = (IPICMP *)bp->rp; + + if(muxkey == 0) { + recid = nhgets(p->icmpid); + addr = p->src; + } else { + recid = muxkey; + addr = p->dst; + } + + for(c = icmp->conv; *c; c++){ + s = *c; + if(s->lport == recid && ipcmp(s->raddr, addr) == 0){ + bp = concatblock(bp); + if(bp != nil) + qpass(s->rq, bp); + return; + } + } + + freeblist(bp); +} + +static Block * +mkechoreply6(Block *bp, Ipifc *ifc) +{ + uchar addr[IPaddrlen]; + IPICMP *p = (IPICMP *)(bp->rp); + + ipmove(addr, p->src); + if(!isv6mcast(p->dst)) + ipmove(p->src, p->dst); + else if (!ipv6anylocal(ifc, p->src)) + return nil; + ipmove(p->dst, addr); + p->type = EchoReplyV6; + set_cksum(bp); + return bp; +} + +/* + * sends out an ICMPv6 neighbor solicitation + * suni == SRC_UNSPEC or SRC_UNI, + * tuni == TARG_MULTI => multicast for address resolution, + * and tuni == TARG_UNI => neighbor reachability. + */ +extern void +icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac) +{ + Block *nbp; + Ndpkt *np; + Proto *icmp = f->t2p[ICMPv6]; + Icmppriv6 *ipriv = icmp->priv; + + nbp = newIPICMP(sizeof(Ndpkt)); + np = (Ndpkt*) nbp->rp; + + if(suni == SRC_UNSPEC) + memmove(np->src, v6Unspecified, IPaddrlen); + else + memmove(np->src, src, IPaddrlen); + + if(tuni == TARG_UNI) + memmove(np->dst, targ, IPaddrlen); + else + ipv62smcast(np->dst, targ); + + np->type = NbrSolicit; + np->code = 0; + memmove(np->target, targ, IPaddrlen); + if(suni != SRC_UNSPEC) { + np->otype = SRC_LLADDR; + np->olen = 1; /* 1+1+6 = 8 = 1 8-octet */ + memmove(np->lnaddr, mac, sizeof(np->lnaddr)); + } else + nbp->wp -= sizeof(Ndpkt) - sizeof(NdiscC); + + set_cksum(nbp); + np = (Ndpkt*)nbp->rp; + np->ttl = HOP_LIMIT; + np->vcf[0] = 0x06 << 4; + ipriv->out[NbrSolicit]++; + netlog(f, Logicmp, "sending neighbor solicitation %I\n", targ); + ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil); +} + +/* + * sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags. + */ +extern void +icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags) +{ + Block *nbp; + Ndpkt *np; + Proto *icmp = f->t2p[ICMPv6]; + Icmppriv6 *ipriv = icmp->priv; + + nbp = newIPICMP(sizeof(Ndpkt)); + np = (Ndpkt*)nbp->rp; + + memmove(np->src, src, IPaddrlen); + memmove(np->dst, dst, IPaddrlen); + + np->type = NbrAdvert; + np->code = 0; + np->icmpid[0] = flags; + memmove(np->target, targ, IPaddrlen); + + np->otype = TARGET_LLADDR; + np->olen = 1; + memmove(np->lnaddr, mac, sizeof(np->lnaddr)); + + set_cksum(nbp); + np = (Ndpkt*) nbp->rp; + np->ttl = HOP_LIMIT; + np->vcf[0] = 0x06 << 4; + ipriv->out[NbrAdvert]++; + netlog(f, Logicmp, "sending neighbor advertisement %I\n", src); + ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil); +} + +extern void +icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free) +{ + int osz = BLEN(bp); + int sz = MIN(sizeof(IPICMP) + osz, v6MINTU); + Block *nbp; + IPICMP *np; + Ip6hdr *p; + Proto *icmp = f->t2p[ICMPv6]; + Icmppriv6 *ipriv = icmp->priv; + + p = (Ip6hdr *)bp->rp; + + if(isv6mcast(p->src)) + goto clean; + + nbp = newIPICMP(sz); + np = (IPICMP *)nbp->rp; + + rlock(ifc); + if(ipv6anylocal(ifc, np->src)) + netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n", + p->src, p->dst); + else { + netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n", + p->src, p->dst); + freeblist(nbp); + if(free) + goto clean; + else + return; + } + + memmove(np->dst, p->src, IPaddrlen); + np->type = UnreachableV6; + np->code = code; + memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP)); + set_cksum(nbp); + np->ttl = HOP_LIMIT; + np->vcf[0] = 0x06 << 4; + ipriv->out[UnreachableV6]++; + + if(free) + ipiput6(f, ifc, nbp); + else { + ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil); + return; + } + +clean: + runlock(ifc); + freeblist(bp); +} + +extern void +icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp) +{ + int osz = BLEN(bp); + int sz = MIN(sizeof(IPICMP) + osz, v6MINTU); + Block *nbp; + IPICMP *np; + Ip6hdr *p; + Proto *icmp = f->t2p[ICMPv6]; + Icmppriv6 *ipriv = icmp->priv; + + p = (Ip6hdr *)bp->rp; + + if(isv6mcast(p->src)) + return; + + nbp = newIPICMP(sz); + np = (IPICMP *) nbp->rp; + + if(ipv6anylocal(ifc, np->src)) + netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n", + p->src, p->dst); + else { + netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n", + p->src, p->dst); + return; + } + + memmove(np->dst, p->src, IPaddrlen); + np->type = TimeExceedV6; + np->code = 0; + memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP)); + set_cksum(nbp); + np->ttl = HOP_LIMIT; + np->vcf[0] = 0x06 << 4; + ipriv->out[TimeExceedV6]++; + ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil); +} + +extern void +icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp) +{ + int osz = BLEN(bp); + int sz = MIN(sizeof(IPICMP) + osz, v6MINTU); + Block *nbp; + IPICMP *np; + Ip6hdr *p; + Proto *icmp = f->t2p[ICMPv6]; + Icmppriv6 *ipriv = icmp->priv; + + p = (Ip6hdr *)bp->rp; + + if(isv6mcast(p->src)) + return; + + nbp = newIPICMP(sz); + np = (IPICMP *)nbp->rp; + + if(ipv6anylocal(ifc, np->src)) + netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n", + p->src, p->dst); + else { + netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n", + p->src, p->dst); + return; + } + + memmove(np->dst, p->src, IPaddrlen); + np->type = PacketTooBigV6; + np->code = 0; + hnputl(np->icmpid, ifc->maxtu - ifc->medium->hsize); + memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP)); + set_cksum(nbp); + np->ttl = HOP_LIMIT; + np->vcf[0] = 0x06 << 4; + ipriv->out[PacketTooBigV6]++; + ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil); +} + +/* + * RFC 2461, pages 39-40, pages 57-58. + */ +static int +valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv) +{ + int sz, osz, unsp, n, ttl, iplen; + int pktsz = BLEN(bp); + uchar *packet = bp->rp; + IPICMP *p = (IPICMP *) packet; + Ndpkt *np; + + USED(ifc); + n = blocklen(bp); + if(n < sizeof(IPICMP)) { + ipriv->stats[HlenErrs6]++; + netlog(icmp->f, Logicmp, "icmp hlen %d\n", n); + goto err; + } + + iplen = nhgets(p->ploadlen); + if(iplen > n-IPV6HDR_LEN || (iplen % 1)) { + ipriv->stats[LenErrs6]++; + netlog(icmp->f, Logicmp, "icmp length %d\n", iplen); + goto err; + } + + /* Rather than construct explicit pseudoheader, overwrite IPv6 header */ + if(p->proto != ICMPv6) { + /* This code assumes no extension headers!!! */ + netlog(icmp->f, Logicmp, "icmp error: extension header\n"); + goto err; + } + memset(packet, 0, 4); + ttl = p->ttl; + p->ttl = p->proto; + p->proto = 0; + if(ptclcsum(bp, 0, iplen + IPV6HDR_LEN)) { + ipriv->stats[CsumErrs6]++; + netlog(icmp->f, Logicmp, "icmp checksum error\n"); + goto err; + } + p->proto = p->ttl; + p->ttl = ttl; + + /* additional tests for some pkt types */ + if (p->type == NbrSolicit || p->type == NbrAdvert || + p->type == RouterAdvert || p->type == RouterSolicit || + p->type == RedirectV6) { + if(p->ttl != HOP_LIMIT) { + ipriv->stats[HoplimErrs6]++; + goto err; + } + if(p->code != 0) { + ipriv->stats[IcmpCodeErrs6]++; + goto err; + } + + switch (p->type) { + case NbrSolicit: + case NbrAdvert: + np = (Ndpkt*) p; + if(isv6mcast(np->target)) { + ipriv->stats[TargetErrs6]++; + goto err; + } + if(optexsts(np) && np->olen == 0) { + ipriv->stats[OptlenErrs6]++; + goto err; + } + + if (p->type == NbrSolicit && + ipcmp(np->src, v6Unspecified) == 0) + if(!issmcast(np->dst) || optexsts(np)) { + ipriv->stats[AddrmxpErrs6]++; + goto err; + } + + if(p->type == NbrAdvert) + if(isv6mcast(np->dst) && + (nhgets(np->icmpid) & Sflag)){ + ipriv->stats[AddrmxpErrs6]++; + goto err; + } + break; + + case RouterAdvert: + if(pktsz - sizeof(Ip6hdr) < 16) { + ipriv->stats[HlenErrs6]++; + goto err; + } + if(!islinklocal(p->src)) { + ipriv->stats[RouterAddrErrs6]++; + goto err; + } + sz = sizeof(IPICMP) + 8; + while (sz+1 < pktsz) { + osz = packet[sz+1]; + if(osz <= 0) { + ipriv->stats[OptlenErrs6]++; + goto err; + } + sz += 8*osz; + } + break; + + case RouterSolicit: + if(pktsz - sizeof(Ip6hdr) < 8) { + ipriv->stats[HlenErrs6]++; + goto err; + } + unsp = (ipcmp(p->src, v6Unspecified) == 0); + sz = sizeof(IPICMP) + 8; + while (sz+1 < pktsz) { + osz = packet[sz+1]; + if(osz <= 0 || + (unsp && packet[sz] == SRC_LLADDR)) { + ipriv->stats[OptlenErrs6]++; + goto err; + } + sz += 8*osz; + } + break; + + case RedirectV6: + /* to be filled in */ + break; + + default: + goto err; + } + } + return 1; +err: + ipriv->stats[InErrors6]++; + return 0; +} + +static int +targettype(Fs *f, Ipifc *ifc, uchar *target) +{ + Iplifc *lifc; + int t; + + rlock(ifc); + if(ipproxyifc(f, ifc, target)) { + runlock(ifc); + return Tuniproxy; + } + + for(lifc = ifc->lifc; lifc; lifc = lifc->next) + if(ipcmp(lifc->local, target) == 0) { + t = (lifc->tentative)? Tunitent: Tunirany; + runlock(ifc); + return t; + } + + runlock(ifc); + return 0; +} + +static void +icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp) +{ + int refresh = 1; + char *msg, m2[128]; + uchar pktflags; + uchar *packet = bp->rp; + uchar lsrc[IPaddrlen]; + Block *r; + IPICMP *p = (IPICMP *)packet; + Icmppriv6 *ipriv = icmp->priv; + Iplifc *lifc; + Ndpkt* np; + Proto *pr; + + if(!valid(icmp, ipifc, bp, ipriv) || p->type > Maxtype6) + goto raise; + + ipriv->in[p->type]++; + + switch(p->type) { + case EchoRequestV6: + r = mkechoreply6(bp, ipifc); + if(r == nil) + goto raise; + ipriv->out[EchoReply]++; + ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil); + break; + + case UnreachableV6: + if(p->code > 4) + msg = unreachcode[icmp6_unkn_code]; + else + msg = unreachcode[p->code]; + + bp->rp += sizeof(IPICMP); + if(blocklen(bp) < 8){ + ipriv->stats[LenErrs6]++; + goto raise; + } + p = (IPICMP *)bp->rp; + pr = Fsrcvpcolx(icmp->f, p->proto); + if(pr != nil && pr->advise != nil) { + (*pr->advise)(pr, bp, msg); + return; + } + + bp->rp -= sizeof(IPICMP); + goticmpkt6(icmp, bp, 0); + break; + + case TimeExceedV6: + if(p->code == 0){ + sprint(m2, "ttl exceeded at %I", p->src); + + bp->rp += sizeof(IPICMP); + if(blocklen(bp) < 8){ + ipriv->stats[LenErrs6]++; + goto raise; + } + p = (IPICMP *)bp->rp; + pr = Fsrcvpcolx(icmp->f, p->proto); + if(pr && pr->advise) { + (*pr->advise)(pr, bp, m2); + return; + } + bp->rp -= sizeof(IPICMP); + } + + goticmpkt6(icmp, bp, 0); + break; + + case RouterAdvert: + case RouterSolicit: + /* using lsrc as a temp, munge hdr for goticmp6 */ + if (0) { + memmove(lsrc, p->src, IPaddrlen); + memmove(p->src, p->dst, IPaddrlen); + memmove(p->dst, lsrc, IPaddrlen); + } + goticmpkt6(icmp, bp, p->type); + break; + + case NbrSolicit: + np = (Ndpkt*) p; + pktflags = 0; + switch (targettype(icmp->f, ipifc, np->target)) { + case Tunirany: + pktflags |= Oflag; + /* fall through */ + + case Tuniproxy: + if(ipcmp(np->src, v6Unspecified) != 0) { + arpenter(icmp->f, V6, np->src, np->lnaddr, + 8*np->olen-2, 0); + pktflags |= Sflag; + } + if(ipv6local(ipifc, lsrc)) + icmpna(icmp->f, lsrc, + (ipcmp(np->src, v6Unspecified) == 0? + v6allnodesL: np->src), + np->target, ipifc->mac, pktflags); + else + freeblist(bp); + break; + + case Tunitent: + /* not clear what needs to be done. send up + * an icmp mesg saying don't use this address? */ + default: + freeblist(bp); + } + break; + + case NbrAdvert: + np = (Ndpkt*) p; + + /* + * if the target address matches one of the local interface + * addresses and the local interface address has tentative bit + * set, insert into ARP table. this is so the duplicate address + * detection part of ipconfig can discover duplication through + * the arp table. + */ + lifc = iplocalonifc(ipifc, np->target); + if(lifc && lifc->tentative) + refresh = 0; + arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, + refresh); + freeblist(bp); + break; + + case PacketTooBigV6: + default: + goticmpkt6(icmp, bp, 0); + break; + } + return; +raise: + freeblist(bp); +} + +int +icmpstats6(Proto *icmp6, char *buf, int len) +{ + Icmppriv6 *priv; + char *p, *e; + int i; + + priv = icmp6->priv; + p = buf; + e = p+len; + for(i = 0; i < Nstats6; i++) + p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]); + for(i = 0; i <= Maxtype6; i++) + if(icmpnames6[i]) + p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i], + priv->in[i], priv->out[i]); +/* else + p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], + priv->out[i]); + */ + return p - buf; +} + + +/* import from icmp.c */ +extern int icmpstate(Conv *c, char *state, int n); +extern char* icmpannounce(Conv *c, char **argv, int argc); +extern char* icmpconnect(Conv *c, char **argv, int argc); +extern void icmpclose(Conv *c); + +void +icmp6init(Fs *fs) +{ + Proto *icmp6 = smalloc(sizeof(Proto)); + + icmp6->priv = smalloc(sizeof(Icmppriv6)); + icmp6->name = "icmpv6"; + icmp6->connect = icmpconnect; + icmp6->announce = icmpannounce; + icmp6->state = icmpstate; + icmp6->create = icmpcreate6; + icmp6->close = icmpclose; + icmp6->rcv = icmpiput6; + icmp6->stats = icmpstats6; + icmp6->ctl = icmpctl6; + icmp6->advise = icmpadvise6; + icmp6->gc = nil; + icmp6->ipproto = ICMPv6; + icmp6->nc = 16; + icmp6->ptclsize = sizeof(Icmpcb6); + + Fsproto(fs, icmp6); +} diff -Nru 0/sys/src/nix/ip/il.c 4/sys/src/nix/ip/il.c --- 0/sys/src/nix/ip/il.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/il.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1408 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +enum /* Connection state */ +{ + Ilclosed, + Ilsyncer, + Ilsyncee, + Ilestablished, + Illistening, + Ilclosing, + Ilopening, /* only for file server */ +}; + +char *ilstates[] = +{ + "Closed", + "Syncer", + "Syncee", + "Established", + "Listen", + "Closing", + "Opening", /* only for file server */ +}; + +enum /* Packet types */ +{ + Ilsync, + Ildata, + Ildataquery, + Ilack, + Ilquery, + Ilstate, + Ilclose, +}; + +char *iltype[] = +{ + "sync", + "data", + "dataquery", + "ack", + "query", + "state", + "close" +}; + +enum +{ + Seconds = 1000, + Iltickms = 50, /* time base */ + AckDelay = 2*Iltickms, /* max time twixt message rcvd & ack sent */ + MaxTimeout = 30*Seconds, /* max time between rexmit */ + QueryTime = 10*Seconds, /* time between subsequent queries */ + DeathTime = 30*QueryTime, + + MaxRexmit = 16, /* max retransmissions before hangup */ + Defaultwin = 20, + + LogAGain = 3, + AGain = 1< 1){ + p = strstr(argv[1], "!fasttimeout"); + if(p != nil){ + *p = 0; + fast = 1; + } + } + + e = Fsstdconnect(c, argv, argc); + if(e != nil) + return e; + return ilstart(c, IL_CONNECT, fast); +} + +static int +ilstate(Conv *c, char *state, int n) +{ + Ilcb *ic; + + ic = (Ilcb*)(c->ptcl); + return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d\n", + ilstates[ic->state], + c->rq ? qlen(c->rq) : 0, + c->wq ? qlen(c->wq) : 0, + ic->delay>>LogAGain, ic->rate>>LogAGain, ic->mdev>>LogDGain, + ic->unackedbytes, ic->rxtot, ic->rxquery, ic->maxrtt); +} + +static int +ilinuse(Conv *c) +{ + Ilcb *ic; + + ic = (Ilcb*)(c->ptcl); + return ic->state != Ilclosed; + +} + +/* called with c locked */ +static char* +ilannounce(Conv *c, char **argv, int argc) +{ + char *e; + + e = Fsstdannounce(c, argv, argc); + if(e != nil) + return e; + e = ilstart(c, IL_LISTEN, 0); + if(e != nil) + return e; + Fsconnected(c, nil); + + return nil; +} + +void +illocalclose(Conv *c) +{ + Ilcb *ic; + Ilpriv *ipriv; + + ipriv = c->p->priv; + ic = (Ilcb*)c->ptcl; + ic->state = Ilclosed; + iphtrem(&ipriv->ht, c); + ipmove(c->laddr, IPnoaddr); + c->lport = 0; +} + +static void +ilclose(Conv *c) +{ + Ilcb *ic; + + ic = (Ilcb*)c->ptcl; + + qclose(c->rq); + qclose(c->wq); + qclose(c->eq); + + switch(ic->state) { + case Ilclosing: + case Ilclosed: + break; + case Ilsyncer: + case Ilsyncee: + case Ilestablished: + ic->state = Ilclosing; + ilsettimeout(ic); + ilsendctl(c, nil, Ilclose, ic->next, ic->recvd, 0); + break; + case Illistening: + illocalclose(c); + break; + } + ilfreeq(ic); +} + +void +ilkick(void *x, Block *bp) +{ + Conv *c = x; + Ilhdr *ih; + Ilcb *ic; + int dlen; + ulong id, ack; + Fs *f; + Ilpriv *priv; + + f = c->p->f; + priv = c->p->priv; + ic = (Ilcb*)c->ptcl; + + if(bp == nil) + return; + + switch(ic->state) { + case Ilclosed: + case Illistening: + case Ilclosing: + freeblist(bp); + qhangup(c->rq, nil); + return; + } + + dlen = blocklen(bp); + + /* Make space to fit il & ip */ + bp = padblock(bp, IL_IPSIZE+IL_HDRSIZE); + ih = (Ilhdr *)(bp->rp); + ih->vihl = IP_VER4; + + /* Ip fields */ + ih->frag[0] = 0; + ih->frag[1] = 0; + v6tov4(ih->dst, c->raddr); + v6tov4(ih->src, c->laddr); + ih->proto = IP_ILPROTO; + + /* Il fields */ + hnputs(ih->illen, dlen+IL_HDRSIZE); + hnputs(ih->ilsrc, c->lport); + hnputs(ih->ildst, c->rport); + + qlock(&ic->ackq); + id = ic->next++; + hnputl(ih->ilid, id); + ack = ic->recvd; + hnputl(ih->ilack, ack); + ic->acksent = ack; + ic->acktime = NOW + AckDelay; + ih->iltype = Ildata; + ih->ilspec = 0; + ih->ilsum[0] = 0; + ih->ilsum[1] = 0; + + /* Checksum of ilheader plus data (not ip & no pseudo header) */ + if(ilcksum) + hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, dlen+IL_HDRSIZE)); + + ilackq(ic, bp); + qunlock(&ic->ackq); + + /* Start the round trip timer for this packet if the timer is free */ + if(ic->rttack == 0) { + ic->rttack = id; + ic->rttstart = fastticks(nil); + ic->rttlen = dlen + IL_IPSIZE + IL_HDRSIZE; + } + + if(later(NOW, ic->timeout, nil)) + ilsettimeout(ic); + ipoput4(f, bp, 0, c->ttl, c->tos, c); + priv->stats[OutMsgs]++; +} + +static void +ilcreate(Conv *c) +{ + c->rq = qopen(Maxrq, 0, 0, c); + c->wq = qbypass(ilkick, c); +} + +int +ilxstats(Proto *il, char *buf, int len) +{ + Ilpriv *priv; + char *p, *e; + int i; + + priv = il->priv; + p = buf; + e = p+len; + for(i = 0; i < Nstats; i++) + p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]); + return p - buf; +} + +void +ilackq(Ilcb *ic, Block *bp) +{ + Block *np; + int n; + + n = blocklen(bp); + + /* Enqueue a copy on the unacked queue in case this one gets lost */ + np = copyblock(bp, n); + if(ic->unacked) + ic->unackedtail->list = np; + else + ic->unacked = np; + ic->unackedtail = np; + np->list = nil; + ic->unackedbytes += n; +} + +static +void +ilrttcalc(Ilcb *ic, Block *bp) +{ + int rtt, tt, pt, delay, rate; + + rtt = fastticks(nil) - ic->rttstart; + rtt = (rtt*scalemul)/scalediv; + delay = ic->delay; + rate = ic->rate; + + /* Guard against zero wrap */ + if(rtt > 120000 || rtt < 0) + return; + + /* this block had to be transmitted after the one acked so count its size */ + ic->rttlen += blocklen(bp) + IL_IPSIZE + IL_HDRSIZE; + + if(ic->rttlen < 256){ + /* guess fixed delay as rtt of small packets */ + delay += rtt - (delay>>LogAGain); + if(delay < AGain) + delay = AGain; + ic->delay = delay; + } else { + /* if packet took longer than avg rtt delay, recalc rate */ + tt = rtt - (delay>>LogAGain); + if(tt > 0){ + rate += ic->rttlen/tt - (rate>>LogAGain); + if(rate < AGain) + rate = AGain; + ic->rate = rate; + } + } + + /* mdev */ + pt = ic->rttlen/(rate>>LogAGain) + (delay>>LogAGain); + ic->mdev += abs(rtt-pt) - (ic->mdev>>LogDGain); + + if(rtt > ic->maxrtt) + ic->maxrtt = rtt; +} + +void +ilackto(Ilcb *ic, ulong ackto, Block *bp) +{ + Ilhdr *h; + ulong id; + + if(ic->rttack == ackto) + ilrttcalc(ic, bp); + + /* Cancel if we've passed the packet we were interested in */ + if(ic->rttack <= ackto) + ic->rttack = 0; + + qlock(&ic->ackq); + while(ic->unacked) { + h = (Ilhdr *)ic->unacked->rp; + id = nhgetl(h->ilid); + if(ackto < id) + break; + + bp = ic->unacked; + ic->unacked = bp->list; + bp->list = nil; + ic->unackedbytes -= blocklen(bp); + freeblist(bp); + ic->rexmit = 0; + ilsettimeout(ic); + } + qunlock(&ic->ackq); +} + +void +iliput(Proto *il, Ipifc*, Block *bp) +{ + char *st; + Ilcb *ic; + Ilhdr *ih; + uchar raddr[IPaddrlen]; + uchar laddr[IPaddrlen]; + ushort sp, dp, csum; + int plen, illen; + Conv *new, *s; + Ilpriv *ipriv; + + ipriv = il->priv; + + ih = (Ilhdr *)bp->rp; + plen = blocklen(bp); + if(plen < IL_IPSIZE+IL_HDRSIZE){ + netlog(il->f, Logil, "il: hlenerr\n"); + ipriv->stats[HlenErrs]++; + goto raise; + } + + illen = nhgets(ih->illen); + if(illen+IL_IPSIZE > plen){ + netlog(il->f, Logil, "il: lenerr\n"); + ipriv->stats[LenErrs]++; + goto raise; + } + + sp = nhgets(ih->ildst); + dp = nhgets(ih->ilsrc); + v4tov6(raddr, ih->src); + v4tov6(laddr, ih->dst); + + if((csum = ptclcsum(bp, IL_IPSIZE, illen)) != 0) { + if(ih->iltype > Ilclose) + st = "?"; + else + st = iltype[ih->iltype]; + ipriv->stats[CsumErrs]++; + netlog(il->f, Logil, "il: cksum %ux %s, pkt(%ux id %ud ack %I/%d->%d)\n", + csum, st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp); + goto raise; + } + + qlock(il); + s = iphtlook(&ipriv->ht, raddr, dp, laddr, sp); + if(s == nil){ + if(ih->iltype == Ilsync) + ilreject(il->f, ih); /* no listener */ + qunlock(il); + goto raise; + } + + ic = (Ilcb*)s->ptcl; + if(ic->state == Illistening){ + if(ih->iltype != Ilsync){ + qunlock(il); + if(ih->iltype > Ilclose) + st = "?"; + else + st = iltype[ih->iltype]; + ilreject(il->f, ih); /* no channel and not sync */ + netlog(il->f, Logil, "il: no channel, pkt(%s id %ud ack %ud %I/%ud->%ud)\n", + st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp); + goto raise; + } + + new = Fsnewcall(s, raddr, dp, laddr, sp, V4); + if(new == nil){ + qunlock(il); + netlog(il->f, Logil, "il: bad newcall %I/%ud->%ud\n", raddr, sp, dp); + ilsendctl(s, ih, Ilclose, 0, nhgetl(ih->ilid), 0); + goto raise; + } + s = new; + + ic = (Ilcb*)s->ptcl; + + ic->conv = s; + ic->state = Ilsyncee; + ilcbinit(ic); + ic->rstart = nhgetl(ih->ilid); + iphtadd(&ipriv->ht, s); + } + + qlock(s); + qunlock(il); + if(waserror()){ + qunlock(s); + nexterror(); + } + ilprocess(s, ih, bp); + qunlock(s); + poperror(); + return; +raise: + freeblist(bp); +} + +void +_ilprocess(Conv *s, Ilhdr *h, Block *bp) +{ + Ilcb *ic; + ulong id, ack; + Ilpriv *priv; + + id = nhgetl(h->ilid); + ack = nhgetl(h->ilack); + + ic = (Ilcb*)s->ptcl; + + ic->lastrecv = NOW; + ic->querytime = NOW + QueryTime; + priv = s->p->priv; + priv->stats[InMsgs]++; + + switch(ic->state) { + default: + netlog(s->p->f, Logil, "il: unknown state %d\n", ic->state); + case Ilclosed: + freeblist(bp); + break; + case Ilsyncer: + switch(h->iltype) { + default: + break; + case Ilsync: + if(ack != ic->start) + ilhangup(s, "connection rejected"); + else { + ic->recvd = id; + ic->rstart = id; + ilsendctl(s, nil, Ilack, ic->next, ic->recvd, 0); + ic->state = Ilestablished; + ic->fasttimeout = 0; + ic->rexmit = 0; + Fsconnected(s, nil); + ilpullup(s); + } + break; + case Ilclose: + if(ack == ic->start) + ilhangup(s, "connection rejected"); + break; + } + freeblist(bp); + break; + case Ilsyncee: + switch(h->iltype) { + default: + break; + case Ilsync: + if(id != ic->rstart || ack != 0){ + illocalclose(s); + } else { + ic->recvd = id; + ilsendctl(s, nil, Ilsync, ic->start, ic->recvd, 0); + } + break; + case Ilack: + if(ack == ic->start) { + ic->state = Ilestablished; + ic->fasttimeout = 0; + ic->rexmit = 0; + ilpullup(s); + } + break; + case Ildata: + if(ack == ic->start) { + ic->state = Ilestablished; + ic->fasttimeout = 0; + ic->rexmit = 0; + goto established; + } + break; + case Ilclose: + if(ack == ic->start) + ilhangup(s, "remote close"); + break; + } + freeblist(bp); + break; + case Ilestablished: + established: + switch(h->iltype) { + case Ilsync: + if(id != ic->rstart) + ilhangup(s, "remote close"); + else + ilsendctl(s, nil, Ilack, ic->next, ic->rstart, 0); + freeblist(bp); + break; + case Ildata: + /* + * avoid consuming all the mount rpc buffers in the + * system. if the input queue is too long, drop this + * packet. + */ + if (s->rq && qwindow(s->rq) <= 0) { + priv->stats[DroppedMsgs]++; + freeblist(bp); + break; + } + + ilackto(ic, ack, bp); + iloutoforder(s, h, bp); + ilpullup(s); + break; + case Ildataquery: + ilackto(ic, ack, bp); + iloutoforder(s, h, bp); + ilpullup(s); + ilsendctl(s, nil, Ilstate, ic->next, ic->recvd, h->ilspec); + break; + case Ilack: + ilackto(ic, ack, bp); + freeblist(bp); + break; + case Ilquery: + ilackto(ic, ack, bp); + ilsendctl(s, nil, Ilstate, ic->next, ic->recvd, h->ilspec); + freeblist(bp); + break; + case Ilstate: + if(ack >= ic->rttack) + ic->rttack = 0; + ilackto(ic, ack, bp); + if(h->ilspec > Nqt) + h->ilspec = 0; + if(ic->qt[h->ilspec] > ack){ + ilrexmit(ic); + ilsettimeout(ic); + } + freeblist(bp); + break; + case Ilclose: + freeblist(bp); + if(ack < ic->start || ack > ic->next) + break; + ic->recvd = id; + ilsendctl(s, nil, Ilclose, ic->next, ic->recvd, 0); + ic->state = Ilclosing; + ilsettimeout(ic); + ilfreeq(ic); + break; + } + break; + case Illistening: + freeblist(bp); + break; + case Ilclosing: + switch(h->iltype) { + case Ilclose: + ic->recvd = id; + ilsendctl(s, nil, Ilclose, ic->next, ic->recvd, 0); + if(ack == ic->next) + ilhangup(s, nil); + break; + default: + break; + } + freeblist(bp); + break; + } +} + +void +ilrexmit(Ilcb *ic) +{ + Ilhdr *h; + Block *nb; + Conv *c; + ulong id; + Ilpriv *priv; + + nb = nil; + qlock(&ic->ackq); + if(ic->unacked) + nb = copyblock(ic->unacked, blocklen(ic->unacked)); + qunlock(&ic->ackq); + + if(nb == nil) + return; + + h = (Ilhdr*)nb->rp; + h->vihl = IP_VER4; + + h->iltype = Ildataquery; + hnputl(h->ilack, ic->recvd); + h->ilspec = ilnextqt(ic); + h->ilsum[0] = 0; + h->ilsum[1] = 0; + hnputs(h->ilsum, ptclcsum(nb, IL_IPSIZE, nhgets(h->illen))); + + c = ic->conv; + id = nhgetl(h->ilid); + netlog(c->p->f, Logil, "il: rexmit %lud %lud: %d %lud: %I %d/%d\n", id, ic->recvd, + ic->rexmit, ic->timeout, + c->raddr, c->lport, c->rport); + + ilbackoff(ic); + + ipoput4(c->p->f, nb, 0, c->ttl, c->tos, c); + + /* statistics */ + ic->rxtot++; + priv = c->p->priv; + priv->rexmit++; +} + +/* DEBUG */ +void +ilprocess(Conv *s, Ilhdr *h, Block *bp) +{ + Ilcb *ic; + + ic = (Ilcb*)s->ptcl; + + USED(ic); + netlog(s->p->f, Logilmsg, "%11s rcv %lud/%lud snt %lud/%lud pkt(%s id %d ack %ud %ud->%ud) ", + ilstates[ic->state], ic->rstart, ic->recvd, ic->start, + ic->next, iltype[h->iltype], nhgetl(h->ilid), + nhgetl(h->ilack), nhgets(h->ilsrc), nhgets(h->ildst)); + + _ilprocess(s, h, bp); + + netlog(s->p->f, Logilmsg, "%11s rcv %lud snt %lud\n", ilstates[ic->state], ic->recvd, ic->next); +} + +void +ilhangup(Conv *s, char *msg) +{ + Ilcb *ic; + int callout; + + netlog(s->p->f, Logil, "il: hangup! %I %d/%d: %s\n", s->raddr, + s->lport, s->rport, msg?msg:"no reason"); + + ic = (Ilcb*)s->ptcl; + callout = ic->state == Ilsyncer; + illocalclose(s); + + qhangup(s->rq, msg); + qhangup(s->wq, msg); + + if(callout) + Fsconnected(s, msg); +} + +void +ilpullup(Conv *s) +{ + Ilcb *ic; + Ilhdr *oh; + Block *bp; + ulong oid, dlen; + Ilpriv *ipriv; + + ic = (Ilcb*)s->ptcl; + if(ic->state != Ilestablished) + return; + + qlock(&ic->outo); + while(ic->outoforder) { + bp = ic->outoforder; + oh = (Ilhdr*)bp->rp; + oid = nhgetl(oh->ilid); + if(oid <= ic->recvd) { + ic->outoforder = bp->list; + freeblist(bp); + continue; + } + if(oid != ic->recvd+1){ + ipriv = s->p->priv; + ipriv->stats[OutOfOrder]++; + break; + } + + ic->recvd = oid; + ic->outoforder = bp->list; + + bp->list = nil; + dlen = nhgets(oh->illen)-IL_HDRSIZE; + bp = trimblock(bp, IL_IPSIZE+IL_HDRSIZE, dlen); + /* + * Upper levels don't know about multiple-block + * messages so copy all into one (yick). + */ + bp = concatblock(bp); + if(bp == 0) + panic("ilpullup"); + bp = packblock(bp); + if(bp == 0) + panic("ilpullup2"); + qpassnolim(s->rq, bp); + } + qunlock(&ic->outo); +} + +void +iloutoforder(Conv *s, Ilhdr *h, Block *bp) +{ + Ilcb *ic; + uchar *lid; + Block *f, **l; + ulong id, newid; + Ilpriv *ipriv; + + ipriv = s->p->priv; + ic = (Ilcb*)s->ptcl; + bp->list = nil; + + id = nhgetl(h->ilid); + /* Window checks */ + if(id <= ic->recvd || id > ic->recvd+ic->window) { + netlog(s->p->f, Logil, "il: message outside window %lud <%lud-%lud>: %I %d/%d\n", + id, ic->recvd, ic->recvd+ic->window, s->raddr, s->lport, s->rport); + freeblist(bp); + return; + } + + /* Packet is acceptable so sort onto receive queue for pullup */ + qlock(&ic->outo); + if(ic->outoforder == nil) + ic->outoforder = bp; + else { + l = &ic->outoforder; + for(f = *l; f; f = f->list) { + lid = ((Ilhdr*)(f->rp))->ilid; + newid = nhgetl(lid); + if(id <= newid) { + if(id == newid) { + ipriv->stats[DupMsg]++; + ipriv->stats[DupBytes] += blocklen(bp); + qunlock(&ic->outo); + freeblist(bp); + return; + } + bp->list = f; + *l = bp; + qunlock(&ic->outo); + return; + } + l = &f->list; + } + *l = bp; + } + qunlock(&ic->outo); +} + +void +ilsendctl(Conv *ipc, Ilhdr *inih, int type, ulong id, ulong ack, int ilspec) +{ + Ilhdr *ih; + Ilcb *ic; + Block *bp; + int ttl, tos; + + bp = allocb(IL_IPSIZE+IL_HDRSIZE); + bp->wp += IL_IPSIZE+IL_HDRSIZE; + + ih = (Ilhdr *)(bp->rp); + ih->vihl = IP_VER4; + + /* Ip fields */ + ih->proto = IP_ILPROTO; + hnputs(ih->illen, IL_HDRSIZE); + ih->frag[0] = 0; + ih->frag[1] = 0; + if(inih) { + hnputl(ih->dst, nhgetl(inih->src)); + hnputl(ih->src, nhgetl(inih->dst)); + hnputs(ih->ilsrc, nhgets(inih->ildst)); + hnputs(ih->ildst, nhgets(inih->ilsrc)); + hnputl(ih->ilid, nhgetl(inih->ilack)); + hnputl(ih->ilack, nhgetl(inih->ilid)); + ttl = MAXTTL; + tos = DFLTTOS; + } + else { + v6tov4(ih->dst, ipc->raddr); + v6tov4(ih->src, ipc->laddr); + hnputs(ih->ilsrc, ipc->lport); + hnputs(ih->ildst, ipc->rport); + hnputl(ih->ilid, id); + hnputl(ih->ilack, ack); + ic = (Ilcb*)ipc->ptcl; + ic->acksent = ack; + ic->acktime = NOW; + ttl = ipc->ttl; + tos = ipc->tos; + } + ih->iltype = type; + ih->ilspec = ilspec; + ih->ilsum[0] = 0; + ih->ilsum[1] = 0; + + if(ilcksum) + hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE)); + +if(ipc==nil) + panic("ipc is nil caller is %#p", getcallerpc(&ipc)); +if(ipc->p==nil) + panic("ipc->p is nil"); + + netlog(ipc->p->f, Logilmsg, "ctl(%s id %d ack %d %d->%d)\n", + iltype[ih->iltype], nhgetl(ih->ilid), nhgetl(ih->ilack), + nhgets(ih->ilsrc), nhgets(ih->ildst)); + + ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc); +} + +void +ilreject(Fs *f, Ilhdr *inih) +{ + Ilhdr *ih; + Block *bp; + + bp = allocb(IL_IPSIZE+IL_HDRSIZE); + bp->wp += IL_IPSIZE+IL_HDRSIZE; + + ih = (Ilhdr *)(bp->rp); + ih->vihl = IP_VER4; + + /* Ip fields */ + ih->proto = IP_ILPROTO; + hnputs(ih->illen, IL_HDRSIZE); + ih->frag[0] = 0; + ih->frag[1] = 0; + hnputl(ih->dst, nhgetl(inih->src)); + hnputl(ih->src, nhgetl(inih->dst)); + hnputs(ih->ilsrc, nhgets(inih->ildst)); + hnputs(ih->ildst, nhgets(inih->ilsrc)); + hnputl(ih->ilid, nhgetl(inih->ilack)); + hnputl(ih->ilack, nhgetl(inih->ilid)); + ih->iltype = Ilclose; + ih->ilspec = 0; + ih->ilsum[0] = 0; + ih->ilsum[1] = 0; + + if(ilcksum) + hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE)); + + ipoput4(f, bp, 0, MAXTTL, DFLTTOS, nil); +} + +void +ilsettimeout(Ilcb *ic) +{ + ulong pt; + + pt = (ic->delay>>LogAGain) + + ic->unackedbytes/(ic->rate>>LogAGain) + + (ic->mdev>>(LogDGain-1)) + + AckDelay; + if(pt > MaxTimeout) + pt = MaxTimeout; + ic->timeout = NOW + pt; +} + +void +ilbackoff(Ilcb *ic) +{ + ulong pt; + int i; + + pt = (ic->delay>>LogAGain) + + ic->unackedbytes/(ic->rate>>LogAGain) + + (ic->mdev>>(LogDGain-1)) + + AckDelay; + for(i = 0; i < ic->rexmit; i++) + pt = pt + (pt>>1); + if(pt > MaxTimeout) + pt = MaxTimeout; + ic->timeout = NOW + pt; + + if(ic->fasttimeout) + ic->timeout = NOW+Iltickms; + + ic->rexmit++; +} + +// complain if two numbers not within an hour of each other +#define Tfuture (1000*60*60) +int +later(ulong t1, ulong t2, char *x) +{ + int dt; + + dt = t1 - t2; + if(dt > 0) { + if(x != nil && dt > Tfuture) + print("%s: way future %d\n", x, dt); + return 1; + } + if(dt < -Tfuture) { + if(x != nil) + print("%s: way past %d\n", x, -dt); + return 1; + } + return 0; +} + +void +ilackproc(void *x) +{ + Ilcb *ic; + Conv **s, *p; + Proto *il; + + il = x; + +loop: + tsleep(&up->sleep, return0, 0, Iltickms); + for(s = il->conv; s && *s; s++) { + p = *s; + ic = (Ilcb*)p->ptcl; + + switch(ic->state) { + case Ilclosed: + case Illistening: + break; + case Ilclosing: + if(later(NOW, ic->timeout, "timeout0")) { + if(ic->rexmit > MaxRexmit){ + ilhangup(p, nil); + break; + } + ilsendctl(p, nil, Ilclose, ic->next, ic->recvd, 0); + ilbackoff(ic); + } + break; + + case Ilsyncee: + case Ilsyncer: + if(later(NOW, ic->timeout, "timeout1")) { + if(ic->rexmit > MaxRexmit){ + ilhangup(p, etime); + break; + } + ilsendctl(p, nil, Ilsync, ic->start, ic->recvd, 0); + ilbackoff(ic); + } + break; + + case Ilestablished: + if(ic->recvd != ic->acksent) + if(later(NOW, ic->acktime, "acktime")) + ilsendctl(p, nil, Ilack, ic->next, ic->recvd, 0); + + if(later(NOW, ic->querytime, "querytime")){ + if(later(NOW, ic->lastrecv+DeathTime, "deathtime")){ + netlog(il->f, Logil, "il: hangup: deathtime\n"); + ilhangup(p, etime); + break; + } + ilsendctl(p, nil, Ilquery, ic->next, ic->recvd, ilnextqt(ic)); + ic->querytime = NOW + QueryTime; + } + + if(ic->unacked != nil) + if(later(NOW, ic->timeout, "timeout2")) { + if(ic->rexmit > MaxRexmit){ + netlog(il->f, Logil, "il: hangup: too many rexmits\n"); + ilhangup(p, etime); + break; + } + ilsendctl(p, nil, Ilquery, ic->next, ic->recvd, ilnextqt(ic)); + ic->rxquery++; + ilbackoff(ic); + } + break; + } + } + goto loop; +} + +void +ilcbinit(Ilcb *ic) +{ + ic->start = nrand(0x1000000); + ic->next = ic->start+1; + ic->recvd = 0; + ic->window = Defaultwin; + ic->unackedbytes = 0; + ic->unacked = nil; + ic->outoforder = nil; + ic->rexmit = 0; + ic->rxtot = 0; + ic->rxquery = 0; + ic->qtx = 1; + ic->fasttimeout = 0; + + /* timers */ + ic->delay = DefRtt<mdev = DefRtt<rate = DefByteRate<querytime = NOW + QueryTime; + ic->lastrecv = NOW; /* or we'll timeout right away */ + ilsettimeout(ic); +} + +char* +ilstart(Conv *c, int type, int fasttimeout) +{ + Ilcb *ic; + Ilpriv *ipriv; + char kpname[KNAMELEN]; + + ipriv = c->p->priv; + + if(ipriv->ackprocstarted == 0){ + qlock(&ipriv->apl); + if(ipriv->ackprocstarted == 0){ + sprint(kpname, "#I%dilack", c->p->f->dev); + kproc(kpname, ilackproc, c->p); + ipriv->ackprocstarted = 1; + } + qunlock(&ipriv->apl); + } + + ic = (Ilcb*)c->ptcl; + ic->conv = c; + + if(ic->state != Ilclosed) + return nil; + + ilcbinit(ic); + + if(fasttimeout){ + /* timeout if we can't connect quickly */ + ic->fasttimeout = 1; + ic->timeout = NOW+Iltickms; + ic->rexmit = MaxRexmit - 4; + }; + + switch(type) { + default: + netlog(c->p->f, Logil, "il: start: type %d\n", type); + break; + case IL_LISTEN: + ic->state = Illistening; + iphtadd(&ipriv->ht, c); + break; + case IL_CONNECT: + ic->state = Ilsyncer; + iphtadd(&ipriv->ht, c); + ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0); + break; + } + + return nil; +} + +void +ilfreeq(Ilcb *ic) +{ + Block *bp, *next; + + qlock(&ic->ackq); + for(bp = ic->unacked; bp; bp = next) { + next = bp->list; + freeblist(bp); + } + ic->unacked = nil; + qunlock(&ic->ackq); + + qlock(&ic->outo); + for(bp = ic->outoforder; bp; bp = next) { + next = bp->list; + freeblist(bp); + } + ic->outoforder = nil; + qunlock(&ic->outo); +} + +void +iladvise(Proto *il, Block *bp, char *msg) +{ + Ilhdr *h; + Ilcb *ic; + uchar source[IPaddrlen], dest[IPaddrlen]; + ushort psource; + Conv *s, **p; + + h = (Ilhdr*)(bp->rp); + + v4tov6(dest, h->dst); + v4tov6(source, h->src); + psource = nhgets(h->ilsrc); + + + /* Look for a connection, unfortunately the destination port is missing */ + qlock(il); + for(p = il->conv; *p; p++) { + s = *p; + if(s->lport == psource) + if(ipcmp(s->laddr, source) == 0) + if(ipcmp(s->raddr, dest) == 0){ + qunlock(il); + ic = (Ilcb*)s->ptcl; + switch(ic->state){ + case Ilsyncer: + ilhangup(s, msg); + break; + } + freeblist(bp); + return; + } + } + qunlock(il); + freeblist(bp); +} + +int +ilnextqt(Ilcb *ic) +{ + int x; + + qlock(&ic->ackq); + x = ic->qtx; + if(++x > Nqt) + x = 1; + ic->qtx = x; + ic->qt[x] = ic->next-1; /* highest xmitted packet */ + ic->qt[0] = ic->qt[x]; /* compatibility with old implementations */ + qunlock(&ic->ackq); + + return x; +} + +/* calculate scale constants that converts fast ticks to ms (more or less) */ +static void +inittimescale(void) +{ + uvlong hz; + + fastticks(&hz); + if(hz > 1000){ + scalediv = hz/1000; + scalemul = 1; + } else { + scalediv = 1; + scalemul = 1000/hz; + } +} + +void +ilinit(Fs *f) +{ + Proto *il; + + inittimescale(); + + il = smalloc(sizeof(Proto)); + il->priv = smalloc(sizeof(Ilpriv)); + il->name = "il"; + il->connect = ilconnect; + il->announce = ilannounce; + il->state = ilstate; + il->create = ilcreate; + il->close = ilclose; + il->rcv = iliput; + il->ctl = nil; + il->advise = iladvise; + il->stats = ilxstats; + il->inuse = ilinuse; + il->gc = nil; + il->ipproto = IP_ILPROTO; + il->nc = scalednconv(); + il->ptclsize = sizeof(Ilcb); + Fsproto(f, il); +} diff -Nru 0/sys/src/nix/ip/inferno.c 4/sys/src/nix/ip/inferno.c --- 0/sys/src/nix/ip/inferno.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/inferno.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,41 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "ip.h" + +/* + * some hacks for commonality twixt inferno and plan9 + */ + +char* +commonuser(void) +{ + return up->user; +} + +Chan* +commonfdtochan(int fd, int mode, int a, int b) +{ + return fdtochan(fd, mode, a, b); +} + +char* +commonerror(void) +{ + return up->errstr; +} + +char* +bootp(Ipifc*) +{ + return "unimplmented"; +} + +int +bootpread(char*, ulong, int) +{ + return 0; +} diff -Nru 0/sys/src/nix/ip/ip.c 4/sys/src/nix/ip/ip.c --- 0/sys/src/nix/ip/ip.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/ip.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,805 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +typedef struct Ip4hdr Ip4hdr; +typedef struct IP IP; +typedef struct Fragment4 Fragment4; +typedef struct Fragment6 Fragment6; +typedef struct Ipfrag Ipfrag; + +enum +{ + IP4HDR = 20, /* sizeof(Ip4hdr) */ + IP6HDR = 40, /* sizeof(Ip6hdr) */ + IP_HLEN4 = 0x05, /* Header length in words */ + IP_DF = 0x4000, /* Don't fragment */ + IP_MF = 0x2000, /* More fragments */ + IP6FHDR = 8, /* sizeof(Fraghdr6) */ + IP_MAX = 64*1024, /* Maximum Internet packet size */ +}; + +#define BLKIPVER(xp) (((Ip4hdr*)((xp)->rp))->vihl&0xF0) + +struct Ip4hdr +{ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* ip->identification */ + uchar frag[2]; /* Fragment information */ + uchar ttl; /* Time to live */ + uchar proto; /* Protocol */ + uchar cksum[2]; /* Header checksum */ + uchar src[4]; /* IP source */ + uchar dst[4]; /* IP destination */ +}; + +/* MIB II counters */ +enum +{ + Forwarding, + DefaultTTL, + InReceives, + InHdrErrors, + InAddrErrors, + ForwDatagrams, + InUnknownProtos, + InDiscards, + InDelivers, + OutRequests, + OutDiscards, + OutNoRoutes, + ReasmTimeout, + ReasmReqds, + ReasmOKs, + ReasmFails, + FragOKs, + FragFails, + FragCreates, + + Nstats, +}; + +struct Fragment4 +{ + Block* blist; + Fragment4* next; + ulong src; + ulong dst; + ushort id; + ulong age; +}; + +struct Fragment6 +{ + Block* blist; + Fragment6* next; + uchar src[IPaddrlen]; + uchar dst[IPaddrlen]; + uint id; + ulong age; +}; + +struct Ipfrag +{ + ushort foff; + ushort flen; +}; + +/* an instance of IP */ +struct IP +{ + ulong stats[Nstats]; + + QLock fraglock4; + Fragment4* flisthead4; + Fragment4* fragfree4; + Ref id4; + + QLock fraglock6; + Fragment6* flisthead6; + Fragment6* fragfree6; + Ref id6; + + int iprouting; /* true if we route like a gateway */ +}; + +static char *statnames[] = +{ +[Forwarding] "Forwarding", +[DefaultTTL] "DefaultTTL", +[InReceives] "InReceives", +[InHdrErrors] "InHdrErrors", +[InAddrErrors] "InAddrErrors", +[ForwDatagrams] "ForwDatagrams", +[InUnknownProtos] "InUnknownProtos", +[InDiscards] "InDiscards", +[InDelivers] "InDelivers", +[OutRequests] "OutRequests", +[OutDiscards] "OutDiscards", +[OutNoRoutes] "OutNoRoutes", +[ReasmTimeout] "ReasmTimeout", +[ReasmReqds] "ReasmReqds", +[ReasmOKs] "ReasmOKs", +[ReasmFails] "ReasmFails", +[FragOKs] "FragOKs", +[FragFails] "FragFails", +[FragCreates] "FragCreates", +}; + +#define BLKIP(xp) ((Ip4hdr*)((xp)->rp)) +/* + * This sleazy macro relies on the media header size being + * larger than sizeof(Ipfrag). ipreassemble checks this is true + */ +#define BKFG(xp) ((Ipfrag*)((xp)->base)) + +ushort ipcsum(uchar*); +Block* ip4reassemble(IP*, int, Block*, Ip4hdr*); +void ipfragfree4(IP*, Fragment4*); +Fragment4* ipfragallo4(IP*); + + +void +ip_init_6(Fs *f) +{ + v6params *v6p; + + v6p = smalloc(sizeof(v6params)); + + v6p->rp.mflag = 0; /* default not managed */ + v6p->rp.oflag = 0; + v6p->rp.maxraint = 600000; /* millisecs */ + v6p->rp.minraint = 200000; + v6p->rp.linkmtu = 0; /* no mtu sent */ + v6p->rp.reachtime = 0; + v6p->rp.rxmitra = 0; + v6p->rp.ttl = MAXTTL; + v6p->rp.routerlt = 3*(v6p->rp.maxraint); + + v6p->hp.rxmithost = 1000; /* v6 RETRANS_TIMER */ + + v6p->cdrouter = -1; + + f->v6p = v6p; + +} + +void +initfrag(IP *ip, int size) +{ + Fragment4 *fq4, *eq4; + Fragment6 *fq6, *eq6; + + ip->fragfree4 = (Fragment4*)malloc(sizeof(Fragment4) * size); + if(ip->fragfree4 == nil) + panic("initfrag"); + + eq4 = &ip->fragfree4[size]; + for(fq4 = ip->fragfree4; fq4 < eq4; fq4++) + fq4->next = fq4+1; + + ip->fragfree4[size-1].next = nil; + + ip->fragfree6 = (Fragment6*)malloc(sizeof(Fragment6) * size); + if(ip->fragfree6 == nil) + panic("initfrag"); + + eq6 = &ip->fragfree6[size]; + for(fq6 = ip->fragfree6; fq6 < eq6; fq6++) + fq6->next = fq6+1; + + ip->fragfree6[size-1].next = nil; +} + +void +ip_init(Fs *f) +{ + IP *ip; + + ip = smalloc(sizeof(IP)); + initfrag(ip, 100); + f->ip = ip; + + ip_init_6(f); +} + +void +iprouting(Fs *f, int on) +{ + f->ip->iprouting = on; + if(f->ip->iprouting==0) + f->ip->stats[Forwarding] = 2; + else + f->ip->stats[Forwarding] = 1; +} + +int +ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c) +{ + Ipifc *ifc; + uchar *gate; + ulong fragoff; + Block *xp, *nb; + Ip4hdr *eh, *feh; + int lid, len, seglen, chunk, dlen, blklen, offset, medialen; + Route *r, *sr; + IP *ip; + int rv = 0; + + ip = f->ip; + + /* Fill out the ip header */ + eh = (Ip4hdr*)(bp->rp); + + ip->stats[OutRequests]++; + + /* Number of uchars in data and ip header to write */ + len = blocklen(bp); + + if(gating){ + chunk = nhgets(eh->length); + if(chunk > len){ + ip->stats[OutDiscards]++; + netlog(f, Logip, "short gated packet\n"); + goto free; + } + if(chunk < len) + len = chunk; + } + if(len >= IP_MAX){ + ip->stats[OutDiscards]++; + netlog(f, Logip, "exceeded ip max size %V\n", eh->dst); + goto free; + } + + r = v4lookup(f, eh->dst, c); + if(r == nil){ + ip->stats[OutNoRoutes]++; + netlog(f, Logip, "no interface %V\n", eh->dst); + rv = -1; + goto free; + } + + ifc = r->ifc; + if(r->type & (Rifc|Runi)) + gate = eh->dst; + else + if(r->type & (Rbcast|Rmulti)) { + gate = eh->dst; + sr = v4lookup(f, eh->src, nil); + if(sr != nil && (sr->type & Runi)) + ifc = sr->ifc; + } + else + gate = r->v4.gate; + + if(!gating) + eh->vihl = IP_VER4|IP_HLEN4; + eh->ttl = ttl; + if(!gating) + eh->tos = tos; + + if(!canrlock(ifc)) + goto free; + if(waserror()){ + runlock(ifc); + nexterror(); + } + if(ifc->medium == nil) + goto raise; + + /* If we dont need to fragment just send it */ + medialen = ifc->maxtu - ifc->medium->hsize; + if(len <= medialen) { + if(!gating) + hnputs(eh->id, incref(&ip->id4)); + hnputs(eh->length, len); + if(!gating){ + eh->frag[0] = 0; + eh->frag[1] = 0; + } + eh->cksum[0] = 0; + eh->cksum[1] = 0; + hnputs(eh->cksum, ipcsum(&eh->vihl)); + ifc->medium->bwrite(ifc, bp, V4, gate); + runlock(ifc); + poperror(); + return 0; + } + +if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst); + + if(eh->frag[0] & (IP_DF>>8)){ + ip->stats[FragFails]++; + ip->stats[OutDiscards]++; + icmpcantfrag(f, bp, medialen); + netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst); + goto raise; + } + + seglen = (medialen - IP4HDR) & ~7; + if(seglen < 8){ + ip->stats[FragFails]++; + ip->stats[OutDiscards]++; + netlog(f, Logip, "%V seglen < 8\n", eh->dst); + goto raise; + } + + dlen = len - IP4HDR; + xp = bp; + if(gating) + lid = nhgets(eh->id); + else + lid = incref(&ip->id4); + + offset = IP4HDR; + while(xp != nil && offset && offset >= BLEN(xp)) { + offset -= BLEN(xp); + xp = xp->next; + } + xp->rp += offset; + + if(gating) + fragoff = nhgets(eh->frag)<<3; + else + fragoff = 0; + dlen += fragoff; + for(; fragoff < dlen; fragoff += seglen) { + nb = allocb(IP4HDR+seglen); + feh = (Ip4hdr*)(nb->rp); + + memmove(nb->wp, eh, IP4HDR); + nb->wp += IP4HDR; + + if((fragoff + seglen) >= dlen) { + seglen = dlen - fragoff; + hnputs(feh->frag, fragoff>>3); + } + else + hnputs(feh->frag, (fragoff>>3)|IP_MF); + + hnputs(feh->length, seglen + IP4HDR); + hnputs(feh->id, lid); + + /* Copy up the data area */ + chunk = seglen; + while(chunk) { + if(!xp) { + ip->stats[OutDiscards]++; + ip->stats[FragFails]++; + freeblist(nb); + netlog(f, Logip, "!xp: chunk %d\n", chunk); + goto raise; + } + blklen = chunk; + if(BLEN(xp) < chunk) + blklen = BLEN(xp); + memmove(nb->wp, xp->rp, blklen); + nb->wp += blklen; + xp->rp += blklen; + chunk -= blklen; + if(xp->rp == xp->wp) + xp = xp->next; + } + + feh->cksum[0] = 0; + feh->cksum[1] = 0; + hnputs(feh->cksum, ipcsum(&feh->vihl)); + ifc->medium->bwrite(ifc, nb, V4, gate); + ip->stats[FragCreates]++; + } + ip->stats[FragOKs]++; +raise: + runlock(ifc); + poperror(); +free: + freeblist(bp); + return rv; +} + +void +ipiput4(Fs *f, Ipifc *ifc, Block *bp) +{ + int hl; + int hop, tos, proto, olen; + Ip4hdr *h; + Proto *p; + ushort frag; + int notforme; + uchar *dp, v6dst[IPaddrlen]; + IP *ip; + Route *r; + + if(BLKIPVER(bp) != IP_VER4) { + ipiput6(f, ifc, bp); + return; + } + + ip = f->ip; + ip->stats[InReceives]++; + + /* + * Ensure we have all the header info in the first + * block. Make life easier for other protocols by + * collecting up to the first 64 bytes in the first block. + */ + if(BLEN(bp) < 64) { + hl = blocklen(bp); + if(hl < IP4HDR) + hl = IP4HDR; + if(hl > 64) + hl = 64; + bp = pullupblock(bp, hl); + if(bp == nil) + return; + } + + h = (Ip4hdr*)(bp->rp); + + /* dump anything that whose header doesn't checksum */ + if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) { + ip->stats[InHdrErrors]++; + netlog(f, Logip, "ip: checksum error %V\n", h->src); + freeblist(bp); + return; + } + v4tov6(v6dst, h->dst); + notforme = ipforme(f, v6dst) == 0; + + /* Check header length and version */ + if((h->vihl&0x0F) != IP_HLEN4) { + hl = (h->vihl&0xF)<<2; + if(hl < (IP_HLEN4<<2)) { + ip->stats[InHdrErrors]++; + netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl); + freeblist(bp); + return; + } + /* If this is not routed strip off the options */ + if(notforme == 0) { + olen = nhgets(h->length); + dp = bp->rp + (hl - (IP_HLEN4<<2)); + memmove(dp, h, IP_HLEN4<<2); + bp->rp = dp; + h = (Ip4hdr*)(bp->rp); + h->vihl = (IP_VER4|IP_HLEN4); + hnputs(h->length, olen-hl+(IP_HLEN4<<2)); + } + } + + /* route */ + if(notforme) { + Conv conv; + + if(!ip->iprouting){ + freeb(bp); + return; + } + + /* don't forward to source's network */ + conv.r = nil; + r = v4lookup(f, h->dst, &conv); + if(r == nil || r->ifc == ifc){ + ip->stats[OutDiscards]++; + freeblist(bp); + return; + } + + /* don't forward if packet has timed out */ + hop = h->ttl; + if(hop < 1) { + ip->stats[InHdrErrors]++; + icmpttlexceeded(f, ifc->lifc->local, bp); + freeblist(bp); + return; + } + + /* reassemble if the interface expects it */ +if(r->ifc == nil) panic("nil route rfc"); + if(r->ifc->reassemble){ + frag = nhgets(h->frag); + if(frag) { + h->tos = 0; + if(frag & IP_MF) + h->tos = 1; + bp = ip4reassemble(ip, frag, bp, h); + if(bp == nil) + return; + h = (Ip4hdr*)(bp->rp); + } + } + + ip->stats[ForwDatagrams]++; + tos = h->tos; + hop = h->ttl; + ipoput4(f, bp, 1, hop - 1, tos, &conv); + return; + } + + frag = nhgets(h->frag); + if(frag) { + h->tos = 0; + if(frag & IP_MF) + h->tos = 1; + bp = ip4reassemble(ip, frag, bp, h); + if(bp == nil) + return; + h = (Ip4hdr*)(bp->rp); + } + + /* don't let any frag info go up the stack */ + h->frag[0] = 0; + h->frag[1] = 0; + + proto = h->proto; + p = Fsrcvpcol(f, proto); + if(p != nil && p->rcv != nil) { + ip->stats[InDelivers]++; + (*p->rcv)(p, ifc, bp); + return; + } + ip->stats[InDiscards]++; + ip->stats[InUnknownProtos]++; + freeblist(bp); +} + +int +ipstats(Fs *f, char *buf, int len) +{ + IP *ip; + char *p, *e; + int i; + + ip = f->ip; + ip->stats[DefaultTTL] = MAXTTL; + + p = buf; + e = p+len; + for(i = 0; i < Nstats; i++) + p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]); + return p - buf; +} + +Block* +ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih) +{ + int fend; + ushort id; + Fragment4 *f, *fnext; + ulong src, dst; + Block *bl, **l, *last, *prev; + int ovlap, len, fragsize, pktposn; + + src = nhgetl(ih->src); + dst = nhgetl(ih->dst); + id = nhgets(ih->id); + + /* + * block lists are too hard, pullupblock into a single block + */ + if(bp->next){ + bp = pullupblock(bp, blocklen(bp)); + ih = (Ip4hdr*)(bp->rp); + } + + qlock(&ip->fraglock4); + + /* + * find a reassembly queue for this fragment + */ + for(f = ip->flisthead4; f; f = fnext){ + fnext = f->next; /* because ipfragfree4 changes the list */ + if(f->src == src && f->dst == dst && f->id == id) + break; + if(f->age < NOW){ + ip->stats[ReasmTimeout]++; + ipfragfree4(ip, f); + } + } + + /* + * if this isn't a fragmented packet, accept it + * and get rid of any fragments that might go + * with it. + */ + if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) { + if(f != nil) { + ipfragfree4(ip, f); + ip->stats[ReasmFails]++; + } + qunlock(&ip->fraglock4); + return bp; + } + + if(bp->base+sizeof(Ipfrag) >= bp->rp){ + bp = padblock(bp, sizeof(Ipfrag)); + bp->rp += sizeof(Ipfrag); + } + + BKFG(bp)->foff = offset<<3; + BKFG(bp)->flen = nhgets(ih->length)-IP4HDR; + + /* First fragment allocates a reassembly queue */ + if(f == nil) { + f = ipfragallo4(ip); + f->id = id; + f->src = src; + f->dst = dst; + + f->blist = bp; + + qunlock(&ip->fraglock4); + ip->stats[ReasmReqds]++; + return nil; + } + + /* + * find the new fragment's position in the queue + */ + prev = nil; + l = &f->blist; + bl = f->blist; + while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) { + prev = bl; + l = &bl->next; + bl = bl->next; + } + + /* Check overlap of a previous fragment - trim away as necessary */ + if(prev) { + ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff; + if(ovlap > 0) { + if(ovlap >= BKFG(bp)->flen) { + freeblist(bp); + qunlock(&ip->fraglock4); + return nil; + } + BKFG(prev)->flen -= ovlap; + } + } + + /* Link onto assembly queue */ + bp->next = *l; + *l = bp; + + /* Check to see if succeeding segments overlap */ + if(bp->next) { + l = &bp->next; + fend = BKFG(bp)->foff + BKFG(bp)->flen; + /* Take completely covered segments out */ + while(*l) { + ovlap = fend - BKFG(*l)->foff; + if(ovlap <= 0) + break; + if(ovlap < BKFG(*l)->flen) { + BKFG(*l)->flen -= ovlap; + BKFG(*l)->foff += ovlap; + /* move up ih hdrs */ + memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR); + (*l)->rp += ovlap; + break; + } + last = (*l)->next; + (*l)->next = nil; + freeblist(*l); + *l = last; + } + } + + /* + * look for a complete packet. if we get to a fragment + * without IP_MF set, we're done. + */ + pktposn = 0; + for(bl = f->blist; bl; bl = bl->next) { + if(BKFG(bl)->foff != pktposn) + break; + if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) { + bl = f->blist; + len = nhgets(BLKIP(bl)->length); + bl->wp = bl->rp + len; + + /* Pullup all the fragment headers and + * return a complete packet + */ + for(bl = bl->next; bl; bl = bl->next) { + fragsize = BKFG(bl)->flen; + len += fragsize; + bl->rp += IP4HDR; + bl->wp = bl->rp + fragsize; + } + + bl = f->blist; + f->blist = nil; + ipfragfree4(ip, f); + ih = BLKIP(bl); + hnputs(ih->length, len); + qunlock(&ip->fraglock4); + ip->stats[ReasmOKs]++; + return bl; + } + pktposn += BKFG(bl)->flen; + } + qunlock(&ip->fraglock4); + return nil; +} + +/* + * ipfragfree4 - Free a list of fragments - assume hold fraglock4 + */ +void +ipfragfree4(IP *ip, Fragment4 *frag) +{ + Fragment4 *fl, **l; + + if(frag->blist) + freeblist(frag->blist); + + frag->src = 0; + frag->id = 0; + frag->blist = nil; + + l = &ip->flisthead4; + for(fl = *l; fl; fl = fl->next) { + if(fl == frag) { + *l = frag->next; + break; + } + l = &fl->next; + } + + frag->next = ip->fragfree4; + ip->fragfree4 = frag; + +} + +/* + * ipfragallo4 - allocate a reassembly queue - assume hold fraglock4 + */ +Fragment4 * +ipfragallo4(IP *ip) +{ + Fragment4 *f; + + while(ip->fragfree4 == nil) { + /* free last entry on fraglist */ + for(f = ip->flisthead4; f->next; f = f->next) + ; + ipfragfree4(ip, f); + } + f = ip->fragfree4; + ip->fragfree4 = f->next; + f->next = ip->flisthead4; + ip->flisthead4 = f; + f->age = NOW + 30000; + + return f; +} + +ushort +ipcsum(uchar *addr) +{ + int len; + ulong sum; + + sum = 0; + len = (addr[0]&0xf)<<2; + + while(len > 0) { + sum += addr[0]<<8 | addr[1] ; + len -= 2; + addr += 2; + } + + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + + return (sum^0xffff); +} diff -Nru 0/sys/src/nix/ip/ip.h 4/sys/src/nix/ip/ip.h --- 0/sys/src/nix/ip/ip.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/ip.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,645 @@ +typedef struct Conv Conv; +typedef struct Fs Fs; +typedef union Hwaddr Hwaddr; +typedef struct IP IP; +typedef struct IPaux IPaux; +typedef struct Ipself Ipself; +typedef struct Ipselftab Ipselftab; +typedef struct Iplink Iplink; +typedef struct Iplifc Iplifc; +typedef struct Ipmulti Ipmulti; +typedef struct Ipifc Ipifc; +typedef struct Iphash Iphash; +typedef struct Ipht Ipht; +typedef struct Netlog Netlog; +typedef struct Medium Medium; +typedef struct Proto Proto; +typedef struct Arpent Arpent; +typedef struct Arp Arp; +typedef struct Route Route; + +typedef struct Routerparams Routerparams; +typedef struct Hostparams Hostparams; +typedef struct v6router v6router; +typedef struct v6params v6params; + +#pragma incomplete Arp +#pragma incomplete Ipself +#pragma incomplete Ipselftab +#pragma incomplete IP +#pragma incomplete Netlog + +enum +{ + Addrlen= 64, + Maxproto= 20, + Nhash= 64, + Maxincall= 128, + Nchans= 1024, + MAClen= 16, /* longest mac address */ + + MAXTTL= 255, + DFLTTOS= 0, + + IPaddrlen= 16, + IPv4addrlen= 4, + IPv4off= 12, + IPllen= 4, + + /* ip versions */ + V4= 4, + V6= 6, + IP_VER4= 0x40, + IP_VER6= 0x60, + + /* 2^Lroot trees in the root table */ + Lroot= 10, + + Maxpath = 64, +}; + +enum +{ + Idle= 0, + Announcing= 1, + Announced= 2, + Connecting= 3, + Connected= 4, +}; + +/* + * one per conversation directory + */ +struct Conv +{ + QLock; + + int x; /* conversation index */ + Proto* p; + + int restricted; /* remote port is restricted */ + uint ttl; /* max time to live */ + uint tos; /* type of service */ + int ignoreadvice; /* don't terminate connection on icmp errors */ + + uchar ipversion; + uchar laddr[IPaddrlen]; /* local IP address */ + uchar raddr[IPaddrlen]; /* remote IP address */ + ushort lport; /* local port number */ + ushort rport; /* remote port number */ + + char *owner; /* protections */ + int perm; + int inuse; /* opens of listen/data/ctl */ + int length; + int state; + + /* udp specific */ + int headers; /* data src/dst headers in udp */ + int reliable; /* true if reliable udp */ + + Conv* incall; /* calls waiting to be listened for */ + Conv* next; + + Queue* rq; /* queued data waiting to be read */ + Queue* wq; /* queued data waiting to be written */ + Queue* eq; /* returned error packets */ + Queue* sq; /* snooping queue */ + Ref snoopers; /* number of processes with snoop open */ + + QLock car; + Rendez cr; + char cerr[ERRMAX]; + + QLock listenq; + Rendez listenr; + + Ipmulti *multi; /* multicast bindings for this interface */ + + void* ptcl; /* protocol specific stuff */ + + Route *r; /* last route used */ + ulong rgen; /* routetable generation for *r */ +}; + +struct Medium +{ + char *name; + int hsize; /* medium header size */ + int mintu; /* default min mtu */ + int maxtu; /* default max mtu */ + int maclen; /* mac address length */ + void (*bind)(Ipifc*, int, char**); + void (*unbind)(Ipifc*); + void (*bwrite)(Ipifc *ifc, Block *b, int version, uchar *ip); + + /* for arming interfaces to receive multicast */ + void (*addmulti)(Ipifc *ifc, uchar *a, uchar *ia); + void (*remmulti)(Ipifc *ifc, uchar *a, uchar *ia); + + /* process packets written to 'data' */ + void (*pktin)(Fs *f, Ipifc *ifc, Block *bp); + + /* routes for router boards */ + void (*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int); + void (*remroute)(Ipifc *ifc, int, uchar*, uchar*); + void (*flushroutes)(Ipifc *ifc); + + /* for routing multicast groups */ + void (*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia); + void (*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia); + + /* address resolution */ + void (*ares)(Fs*, int, uchar*, uchar*, int, int); /* resolve */ + void (*areg)(Ipifc*, uchar*); /* register */ + + /* v6 address generation */ + void (*pref2addr)(uchar *pref, uchar *ea); + + int unbindonclose; /* if non-zero, unbind on last close */ +}; + +/* logical interface associated with a physical one */ +struct Iplifc +{ + uchar local[IPaddrlen]; + uchar mask[IPaddrlen]; + uchar remote[IPaddrlen]; + uchar net[IPaddrlen]; + uchar tentative; /* =1 => v6 dup disc on, =0 => confirmed unique */ + uchar onlink; /* =1 => onlink, =0 offlink. */ + uchar autoflag; /* v6 autonomous flag */ + long validlt; /* v6 valid lifetime */ + long preflt; /* v6 preferred lifetime */ + long origint; /* time when addr was added */ + Iplink *link; /* addresses linked to this lifc */ + Iplifc *next; +}; + +/* binding twixt Ipself and Iplifc */ +struct Iplink +{ + Ipself *self; + Iplifc *lifc; + Iplink *selflink; /* next link for this local address */ + Iplink *lifclink; /* next link for this ifc */ + ulong expire; + Iplink *next; /* free list */ + int ref; +}; + +/* rfc 2461, pp.40—43. */ + +/* default values, one per stack */ +struct Routerparams { + int mflag; /* flag: managed address configuration */ + int oflag; /* flag: other stateful configuration */ + int maxraint; /* max. router adv interval (ms) */ + int minraint; /* min. router adv interval (ms) */ + int linkmtu; /* mtu options */ + int reachtime; /* reachable time */ + int rxmitra; /* retransmit interval */ + int ttl; /* cur hop count limit */ + int routerlt; /* router lifetime */ +}; + +struct Hostparams { + int rxmithost; +}; + +struct Ipifc +{ + RWlock; + + Conv *conv; /* link to its conversation structure */ + char dev[64]; /* device we're attached to */ + Medium *medium; /* Media pointer */ + int maxtu; /* Maximum transfer unit */ + int mintu; /* Minumum tranfer unit */ + int mbps; /* megabits per second */ + void *arg; /* medium specific */ + int reassemble; /* reassemble IP packets before forwarding */ + + /* these are used so that we can unbind on the fly */ + Lock idlock; + uchar ifcid; /* incremented each 'bind/unbind/add/remove' */ + int ref; /* number of proc's using this ipifc */ + Rendez wait; /* where unbinder waits for ref == 0 */ + int unbinding; + + uchar mac[MAClen]; /* MAC address */ + + Iplifc *lifc; /* logical interfaces on this physical one */ + + ulong in, out; /* message statistics */ + ulong inerr, outerr; /* ... */ + + uchar sendra6; /* flag: send router advs on this ifc */ + uchar recvra6; /* flag: recv router advs on this ifc */ + Routerparams rp; /* router parameters as in RFC 2461, pp.40—43. + used only if node is router */ +}; + +/* + * one per multicast-lifc pair used by a Conv + */ +struct Ipmulti +{ + uchar ma[IPaddrlen]; + uchar ia[IPaddrlen]; + Ipmulti *next; +}; + +/* + * hash table for 2 ip addresses + 2 ports + */ +enum +{ + Nipht= 521, /* convenient prime */ + + IPmatchexact= 0, /* match on 4 tuple */ + IPmatchany, /* *!* */ + IPmatchport, /* *!port */ + IPmatchaddr, /* addr!* */ + IPmatchpa, /* addr!port */ +}; +struct Iphash +{ + Iphash *next; + Conv *c; + int match; +}; +struct Ipht +{ + Lock; + Iphash *tab[Nipht]; +}; +void iphtadd(Ipht*, Conv*); +void iphtrem(Ipht*, Conv*); +Conv* iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp); + +/* + * one per multiplexed protocol + */ +struct Proto +{ + QLock; + char* name; /* protocol name */ + int x; /* protocol index */ + int ipproto; /* ip protocol type */ + + char* (*connect)(Conv*, char**, int); + char* (*announce)(Conv*, char**, int); + char* (*bind)(Conv*, char**, int); + int (*state)(Conv*, char*, int); + void (*create)(Conv*); + void (*close)(Conv*); + void (*rcv)(Proto*, Ipifc*, Block*); + char* (*ctl)(Conv*, char**, int); + void (*advise)(Proto*, Block*, char*); + int (*stats)(Proto*, char*, int); + int (*local)(Conv*, char*, int); + int (*remote)(Conv*, char*, int); + int (*inuse)(Conv*); + int (*gc)(Proto*); /* returns true if any conversations are freed */ + + Fs *f; /* file system this proto is part of */ + Conv **conv; /* array of conversations */ + int ptclsize; /* size of per protocol ctl block */ + int nc; /* number of conversations */ + int ac; + Qid qid; /* qid for protocol directory */ + ushort nextport; + ushort nextrport; + + void *priv; +}; + + +/* + * one per IP protocol stack + */ +struct Fs +{ + RWlock; + int dev; + + int np; + Proto* p[Maxproto+1]; /* list of supported protocols */ + Proto* t2p[256]; /* vector of all protocols */ + Proto* ipifc; /* kludge for ipifcremroute & ipifcaddroute */ + Proto* ipmux; /* kludge for finding an ip multiplexor */ + + IP *ip; + Ipselftab *self; + Arp *arp; + v6params *v6p; + + Route *v4root[1<= 0. */ +}; + + +int Fsconnected(Conv*, char*); +Conv* Fsnewcall(Conv*, uchar*, ushort, uchar*, ushort, uchar); +int Fspcolstats(char*, int); +int Fsproto(Fs*, Proto*); +int Fsbuiltinproto(Fs*, uchar); +Conv* Fsprotoclone(Proto*, char*); +Proto* Fsrcvpcol(Fs*, uchar); +Proto* Fsrcvpcolx(Fs*, uchar); +char* Fsstdconnect(Conv*, char**, int); +char* Fsstdannounce(Conv*, char**, int); +char* Fsstdbind(Conv*, char**, int); +ulong scalednconv(void); +void closeconv(Conv*); +/* + * logging + */ +enum +{ + Logip= 1<<1, + Logtcp= 1<<2, + Logfs= 1<<3, + Logil= 1<<4, + Logicmp= 1<<5, + Logudp= 1<<6, + Logcompress= 1<<7, + Logilmsg= 1<<8, + Loggre= 1<<9, + Logppp= 1<<10, + Logtcprxmt= 1<<11, + Logigmp= 1<<12, + Logudpmsg= 1<<13, + Logipmsg= 1<<14, + Logrudp= 1<<15, + Logrudpmsg= 1<<16, + Logesp= 1<<17, + Logtcpwin= 1<<18, +}; + +void netloginit(Fs*); +void netlogopen(Fs*); +void netlogclose(Fs*); +void netlogctl(Fs*, char*, int); +long netlogread(Fs*, void*, ulong, long); +void netlog(Fs*, int, char*, ...); +void ifcloginit(Fs*); +long ifclogread(Fs*, Chan *,void*, ulong, long); +void ifclog(Fs*, uchar *, int); +void ifclogopen(Fs*, Chan*); +void ifclogclose(Fs*, Chan*); + +/* + * iproute.c + */ +typedef struct RouteTree RouteTree; +typedef struct Routewalk Routewalk; +typedef struct V4route V4route; +typedef struct V6route V6route; + +enum +{ + + /* type bits */ + Rv4= (1<<0), /* this is a version 4 route */ + Rifc= (1<<1), /* this route is a directly connected interface */ + Rptpt= (1<<2), /* this route is a pt to pt interface */ + Runi= (1<<3), /* a unicast self address */ + Rbcast= (1<<4), /* a broadcast self address */ + Rmulti= (1<<5), /* a multicast self address */ + Rproxy= (1<<6), /* this route should be proxied */ +}; + +struct Routewalk +{ + int o; + int h; + char* p; + char* e; + void* state; + void (*walk)(Route*, Routewalk*); +}; + +struct RouteTree +{ + Route* right; + Route* left; + Route* mid; + uchar depth; + uchar type; + uchar ifcid; /* must match ifc->id */ + Ipifc *ifc; + char tag[4]; + int ref; +}; + +struct V4route +{ + ulong address; + ulong endaddress; + uchar gate[IPv4addrlen]; +}; + +struct V6route +{ + ulong address[IPllen]; + ulong endaddress[IPllen]; + uchar gate[IPaddrlen]; +}; + +struct Route +{ + RouteTree; + + union { + V6route v6; + V4route v4; + }; +}; +extern void v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type); +extern void v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type); +extern void v4delroute(Fs *f, uchar *a, uchar *mask, int dolock); +extern void v6delroute(Fs *f, uchar *a, uchar *mask, int dolock); +extern Route* v4lookup(Fs *f, uchar *a, Conv *c); +extern Route* v6lookup(Fs *f, uchar *a, Conv *c); +extern long routeread(Fs *f, char*, ulong, int); +extern long routewrite(Fs *f, Chan*, char*, int); +extern void routetype(int, char*); +extern void ipwalkroutes(Fs*, Routewalk*); +extern void convroute(Route*, uchar*, uchar*, uchar*, char*, int*); + +/* + * devip.c + */ + +/* + * Hanging off every ip channel's ->aux is the following structure. + * It maintains the state used by devip and iproute. + */ +struct IPaux +{ + char *owner; /* the user that did the attach */ + char tag[4]; +}; + +extern IPaux* newipaux(char*, char*); + +/* + * arp.c + */ +struct Arpent +{ + uchar ip[IPaddrlen]; + uchar mac[MAClen]; + Medium *type; /* media type */ + Arpent* hash; + Block* hold; + Block* last; + uint ctime; /* time entry was created or refreshed */ + uint utime; /* time entry was last used */ + uchar state; + Arpent *nextrxt; /* re-transmit chain */ + uint rtime; /* time for next retransmission */ + uchar rxtsrem; + Ipifc *ifc; + uchar ifcid; /* must match ifc->id */ +}; + +extern void arpinit(Fs*); +extern int arpread(Arp*, char*, ulong, int); +extern int arpwrite(Fs*, char*, int); +extern Arpent* arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h); +extern void arprelease(Arp*, Arpent *a); +extern Block* arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac); +extern void arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh); + +/* + * ipaux.c + */ + +extern int myetheraddr(uchar*, char*); +extern vlong parseip(uchar*, char*); +extern vlong parseipmask(uchar*, char*); +extern char* v4parseip(uchar*, char*); +extern void maskip(uchar *from, uchar *mask, uchar *to); +extern int parsemac(uchar *to, char *from, int len); +extern uchar* defmask(uchar*); +extern int isv4(uchar*); +extern void v4tov6(uchar *v6, uchar *v4); +extern int v6tov4(uchar *v4, uchar *v6); +extern int eipfmt(Fmt*); + +#define ipmove(x, y) memmove(x, y, IPaddrlen) +#define ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) ) + +extern uchar IPv4bcast[IPaddrlen]; +extern uchar IPv4bcastobs[IPaddrlen]; +extern uchar IPv4allsys[IPaddrlen]; +extern uchar IPv4allrouter[IPaddrlen]; +extern uchar IPnoaddr[IPaddrlen]; +extern uchar v4prefix[IPaddrlen]; +extern uchar IPallbits[IPaddrlen]; + +#define NOW TK2MS(sys->machptr[0]->ticks) + +/* + * media + */ +extern Medium ethermedium; +extern Medium nullmedium; +extern Medium pktmedium; + +/* + * ipifc.c + */ +extern Medium* ipfindmedium(char *name); +extern void addipmedium(Medium *med); +extern int ipforme(Fs*, uchar *addr); +extern int iptentative(Fs*, uchar *addr); +extern int ipisbm(uchar *); +extern int ipismulticast(uchar *); +extern Ipifc* findipifc(Fs*, uchar *remote, int type); +extern void findlocalip(Fs*, uchar *local, uchar *remote); +extern int ipv4local(Ipifc *ifc, uchar *addr); +extern int ipv6local(Ipifc *ifc, uchar *addr); +extern int ipv6anylocal(Ipifc *ifc, uchar *addr); +extern Iplifc* iplocalonifc(Ipifc *ifc, uchar *ip); +extern int ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip); +extern int ipismulticast(uchar *ip); +extern int ipisbooting(void); +extern int ipifccheckin(Ipifc *ifc, Medium *med); +extern void ipifccheckout(Ipifc *ifc); +extern int ipifcgrab(Ipifc *ifc); +extern void ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int); +extern void ipifcremroute(Fs*, int, uchar*, uchar*); +extern void ipifcremmulti(Conv *c, uchar *ma, uchar *ia); +extern void ipifcaddmulti(Conv *c, uchar *ma, uchar *ia); +extern char* ipifcrem(Ipifc *ifc, char **argv, int argc); +extern char* ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp); +extern long ipselftabread(Fs*, char *a, ulong offset, int n); +extern char* ipifcadd6(Ipifc *ifc, char**argv, int argc); +/* + * ip.c + */ +extern void iprouting(Fs*, int); +extern void icmpnoconv(Fs*, Block*); +extern void icmpcantfrag(Fs*, Block*, int); +extern void icmpttlexceeded(Fs*, uchar*, Block*); +extern ushort ipcsum(uchar*); +extern void ipiput4(Fs*, Ipifc*, Block*); +extern void ipiput6(Fs*, Ipifc*, Block*); +extern int ipoput4(Fs*, Block*, int, int, int, Conv*); +extern int ipoput6(Fs*, Block*, int, int, int, Conv*); +extern int ipstats(Fs*, char*, int); +extern ushort ptclbsum(uchar*, int); +extern ushort ptclcsum(Block*, int, int); +extern void ip_init(Fs*); +/* + * bootp.c + */ +extern char* bootp(Ipifc*); +extern int bootpread(char*, ulong, int); + +/* + * resolving inferno/plan9 differences + */ +Chan* commonfdtochan(int, int, int, int); +char* commonuser(void); +char* commonerror(void); + +/* + * chandial.c + */ +extern Chan* chandial(char*, char*, char*, Chan**); + +/* + * global to all of the stack + */ +extern void (*igmpreportfn)(Ipifc*, uchar*); diff -Nru 0/sys/src/nix/ip/ipaux.c 4/sys/src/nix/ip/ipaux.c --- 0/sys/src/nix/ip/ipaux.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/ipaux.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,368 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "ip.h" +#include "ipv6.h" + +char *v6hdrtypes[Maxhdrtype] = +{ + [HBH] "HopbyHop", + [ICMP] "ICMP", + [IGMP] "IGMP", + [GGP] "GGP", + [IPINIP] "IP", + [ST] "ST", + [TCP] "TCP", + [UDP] "UDP", + [ISO_TP4] "ISO_TP4", + [RH] "Routinghdr", + [FH] "Fraghdr", + [IDRP] "IDRP", + [RSVP] "RSVP", + [AH] "Authhdr", + [ESP] "ESP", + [ICMPv6] "ICMPv6", + [NNH] "Nonexthdr", + [ISO_IP] "ISO_IP", + [IGRP] "IGRP", + [OSPF] "OSPF", +}; + +/* + * well known IPv6 addresses + */ +uchar v6Unspecified[IPaddrlen] = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +uchar v6loopback[IPaddrlen] = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x01 +}; + +uchar v6linklocal[IPaddrlen] = { + 0xfe, 0x80, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +uchar v6linklocalmask[IPaddrlen] = { + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +int v6llpreflen = 8; /* link-local prefix length in bytes */ + +uchar v6multicast[IPaddrlen] = { + 0xff, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +uchar v6multicastmask[IPaddrlen] = { + 0xff, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +int v6mcpreflen = 1; /* multicast prefix length */ + +uchar v6allnodesN[IPaddrlen] = { + 0xff, 0x01, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x01 +}; +uchar v6allroutersN[IPaddrlen] = { + 0xff, 0x01, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x02 +}; +uchar v6allnodesNmask[IPaddrlen] = { + 0xff, 0xff, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +int v6aNpreflen = 2; /* all nodes (N) prefix */ + +uchar v6allnodesL[IPaddrlen] = { + 0xff, 0x02, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x01 +}; +uchar v6allroutersL[IPaddrlen] = { + 0xff, 0x02, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x02 +}; +uchar v6allnodesLmask[IPaddrlen] = { + 0xff, 0xff, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 +}; +int v6aLpreflen = 2; /* all nodes (L) prefix */ + +uchar v6solicitednode[IPaddrlen] = { + 0xff, 0x02, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0x01, + 0xff, 0, 0, 0 +}; +uchar v6solicitednodemask[IPaddrlen] = { + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0x0, 0x0, 0x0 +}; +int v6snpreflen = 13; + +ushort +ptclcsum(Block *bp, int offset, int len) +{ + uchar *addr; + ulong losum, hisum; + ushort csum; + int odd, blocklen, x; + + /* Correct to front of data area */ + while(bp != nil && offset && offset >= BLEN(bp)) { + offset -= BLEN(bp); + bp = bp->next; + } + if(bp == nil) + return 0; + + addr = bp->rp + offset; + blocklen = BLEN(bp) - offset; + + if(bp->next == nil) { + if(blocklen < len) + len = blocklen; + return ~ptclbsum(addr, len) & 0xffff; + } + + losum = 0; + hisum = 0; + + odd = 0; + while(len) { + x = blocklen; + if(len < x) + x = len; + + csum = ptclbsum(addr, x); + if(odd) + hisum += csum; + else + losum += csum; + odd = (odd+x) & 1; + len -= x; + + bp = bp->next; + if(bp == nil) + break; + blocklen = BLEN(bp); + addr = bp->rp; + } + + losum += hisum>>8; + losum += (hisum&0xff)<<8; + while((csum = losum>>16) != 0) + losum = csum + (losum & 0xffff); + + return ~losum & 0xffff; +} + +enum +{ + Isprefix= 16, +}; + +#define CLASS(p) ((*(uchar*)(p))>>6) + +void +ipv62smcast(uchar *smcast, uchar *a) +{ + assert(IPaddrlen == 16); + memmove(smcast, v6solicitednode, IPaddrlen); + smcast[13] = a[13]; + smcast[14] = a[14]; + smcast[15] = a[15]; +} + + +/* + * parse a hex mac address + */ +int +parsemac(uchar *to, char *from, int len) +{ + char nip[4]; + char *p; + int i; + + p = from; + memset(to, 0, len); + for(i = 0; i < len; i++){ + if(p[0] == '\0' || p[1] == '\0') + break; + + nip[0] = p[0]; + nip[1] = p[1]; + nip[2] = '\0'; + p += 2; + + to[i] = strtoul(nip, 0, 16); + if(*p == ':') + p++; + } + return i; +} + +/* + * hashing tcp, udp, ... connections + */ +ulong +iphash(uchar *sa, ushort sp, uchar *da, ushort dp) +{ + return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nhash; +} + +void +iphtadd(Ipht *ht, Conv *c) +{ + ulong hv; + Iphash *h; + + hv = iphash(c->raddr, c->rport, c->laddr, c->lport); + h = smalloc(sizeof(*h)); + if(ipcmp(c->raddr, IPnoaddr) != 0) + h->match = IPmatchexact; + else { + if(ipcmp(c->laddr, IPnoaddr) != 0){ + if(c->lport == 0) + h->match = IPmatchaddr; + else + h->match = IPmatchpa; + } else { + if(c->lport == 0) + h->match = IPmatchany; + else + h->match = IPmatchport; + } + } + h->c = c; + + lock(ht); + h->next = ht->tab[hv]; + ht->tab[hv] = h; + unlock(ht); +} + +void +iphtrem(Ipht *ht, Conv *c) +{ + ulong hv; + Iphash **l, *h; + + hv = iphash(c->raddr, c->rport, c->laddr, c->lport); + lock(ht); + for(l = &ht->tab[hv]; (*l) != nil; l = &(*l)->next) + if((*l)->c == c){ + h = *l; + (*l) = h->next; + free(h); + break; + } + unlock(ht); +} + +/* look for a matching conversation with the following precedence + * connected && raddr,rport,laddr,lport + * announced && laddr,lport + * announced && *,lport + * announced && laddr,* + * announced && *,* + */ +Conv* +iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp) +{ + ulong hv; + Iphash *h; + Conv *c; + + /* exact 4 pair match (connection) */ + hv = iphash(sa, sp, da, dp); + lock(ht); + for(h = ht->tab[hv]; h != nil; h = h->next){ + if(h->match != IPmatchexact) + continue; + c = h->c; + if(sp == c->rport && dp == c->lport + && ipcmp(sa, c->raddr) == 0 && ipcmp(da, c->laddr) == 0){ + unlock(ht); + return c; + } + } + + /* match local address and port */ + hv = iphash(IPnoaddr, 0, da, dp); + for(h = ht->tab[hv]; h != nil; h = h->next){ + if(h->match != IPmatchpa) + continue; + c = h->c; + if(dp == c->lport && ipcmp(da, c->laddr) == 0){ + unlock(ht); + return c; + } + } + + /* match just port */ + hv = iphash(IPnoaddr, 0, IPnoaddr, dp); + for(h = ht->tab[hv]; h != nil; h = h->next){ + if(h->match != IPmatchport) + continue; + c = h->c; + if(dp == c->lport){ + unlock(ht); + return c; + } + } + + /* match local address */ + hv = iphash(IPnoaddr, 0, da, 0); + for(h = ht->tab[hv]; h != nil; h = h->next){ + if(h->match != IPmatchaddr) + continue; + c = h->c; + if(ipcmp(da, c->laddr) == 0){ + unlock(ht); + return c; + } + } + + /* look for something that matches anything */ + hv = iphash(IPnoaddr, 0, IPnoaddr, 0); + for(h = ht->tab[hv]; h != nil; h = h->next){ + if(h->match != IPmatchany) + continue; + c = h->c; + unlock(ht); + return c; + } + unlock(ht); + return nil; +} diff -Nru 0/sys/src/nix/ip/ipifc.c 4/sys/src/nix/ip/ipifc.c --- 0/sys/src/nix/ip/ipifc.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/ipifc.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1654 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" +#include "ipv6.h" + +#define DPRINT if(0)print + +enum { + Maxmedia = 32, + Nself = Maxmedia*5, + NHASH = 1<<6, + NCACHE = 256, + QMAX = 64*1024-1, +}; + +Medium *media[Maxmedia] = { 0 }; + +/* + * cache of local addresses (addresses we answer to) + */ +struct Ipself +{ + uchar a[IPaddrlen]; + Ipself *hnext; /* next address in the hash table */ + Iplink *link; /* binding twixt Ipself and Ipifc */ + ulong expire; + uchar type; /* type of address */ + int ref; + Ipself *next; /* free list */ +}; + +struct Ipselftab +{ + QLock; + int inited; + int acceptall; /* true if an interface has the null address */ + Ipself *hash[NHASH]; /* hash chains */ +}; + +/* + * Multicast addresses are chained onto a Chan so that + * we can remove them when the Chan is closed. + */ +typedef struct Ipmcast Ipmcast; +struct Ipmcast +{ + Ipmcast *next; + uchar ma[IPaddrlen]; /* multicast address */ + uchar ia[IPaddrlen]; /* interface address */ +}; + +/* quick hash for ip addresses */ +#define hashipa(a) ( ( ((a)[IPaddrlen-2]<<8) | (a)[IPaddrlen-1] )%NHASH ) + +static char tifc[] = "ifc "; + +static void addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type); +static void remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a); +static char* ipifcjoinmulti(Ipifc *ifc, char **argv, int argc); +static char* ipifcleavemulti(Ipifc *ifc, char **argv, int argc); +static void ipifcregisterproxy(Fs*, Ipifc*, uchar*); +static char* ipifcremlifc(Ipifc*, Iplifc*); + +/* + * link in a new medium + */ +void +addipmedium(Medium *med) +{ + int i; + + for(i = 0; i < nelem(media)-1; i++) + if(media[i] == nil){ + media[i] = med; + break; + } +} + +/* + * find the medium with this name + */ +Medium* +ipfindmedium(char *name) +{ + Medium **mp; + + for(mp = media; *mp != nil; mp++) + if(strcmp((*mp)->name, name) == 0) + break; + return *mp; +} + +/* + * attach a device (or pkt driver) to the interface. + * called with c locked + */ +static char* +ipifcbind(Conv *c, char **argv, int argc) +{ + Ipifc *ifc; + Medium *medium; + + if(argc < 2) + return Ebadarg; + + ifc = (Ipifc*)c->ptcl; + + /* bind the device to the interface */ + medium = ipfindmedium(argv[1]); + if(medium == nil) + return "unknown interface type"; + + wlock(ifc); + if(ifc->medium != nil){ + wunlock(ifc); + return "interface already bound"; + } + if(waserror()){ + wunlock(ifc); + nexterror(); + } + + /* do medium specific binding */ + (*medium->bind)(ifc, argc, argv); + + /* set the bound device name */ + if(argc > 2) + strncpy(ifc->dev, argv[2], sizeof(ifc->dev)); + else + snprint(ifc->dev, sizeof ifc->dev, "%s%d", medium->name, c->x); + ifc->dev[sizeof(ifc->dev)-1] = 0; + + /* set up parameters */ + ifc->medium = medium; + ifc->mintu = ifc->medium->mintu; + ifc->maxtu = ifc->medium->maxtu; + if(ifc->medium->unbindonclose == 0) + ifc->conv->inuse++; + ifc->rp.mflag = 0; /* default not managed */ + ifc->rp.oflag = 0; + ifc->rp.maxraint = 600000; /* millisecs */ + ifc->rp.minraint = 200000; + ifc->rp.linkmtu = 0; /* no mtu sent */ + ifc->rp.reachtime = 0; + ifc->rp.rxmitra = 0; + ifc->rp.ttl = MAXTTL; + ifc->rp.routerlt = 3 * ifc->rp.maxraint; + + /* any ancillary structures (like routes) no longer pertain */ + ifc->ifcid++; + + /* reopen all the queues closed by a previous unbind */ + qreopen(c->rq); + qreopen(c->eq); + qreopen(c->sq); + + wunlock(ifc); + poperror(); + + return nil; +} + +/* + * detach a device from an interface, close the interface + * called with ifc->conv closed + */ +static char* +ipifcunbind(Ipifc *ifc) +{ + char *err; + + if(waserror()){ + wunlock(ifc); + nexterror(); + } + wlock(ifc); + + /* dissociate routes */ + if(ifc->medium != nil && ifc->medium->unbindonclose == 0) + ifc->conv->inuse--; + ifc->ifcid++; + + /* disassociate logical interfaces (before zeroing ifc->arg) */ + while(ifc->lifc){ + err = ipifcremlifc(ifc, ifc->lifc); + /* + * note: err non-zero means lifc not found, + * which can't happen in this case. + */ + if(err) + error(err); + } + + /* disassociate device */ + if(ifc->medium && ifc->medium->unbind) + (*ifc->medium->unbind)(ifc); + memset(ifc->dev, 0, sizeof(ifc->dev)); + ifc->arg = nil; + ifc->reassemble = 0; + + /* close queues to stop queuing of packets */ + qclose(ifc->conv->rq); + qclose(ifc->conv->wq); + qclose(ifc->conv->sq); + + ifc->medium = nil; + wunlock(ifc); + poperror(); + return nil; +} + +char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag" +" %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt" +" %d pktin %lud pktout %lud errin %lud errout %lud\n"; + +char slineformat[] = " %-40I %-10M %-40I %-12lud %-12lud\n"; + +static int +ipifcstate(Conv *c, char *state, int n) +{ + Ipifc *ifc; + Iplifc *lifc; + int m; + + ifc = (Ipifc*)c->ptcl; + m = snprint(state, n, sfixedformat, + ifc->dev, ifc->maxtu, ifc->sendra6, ifc->recvra6, + ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint, + ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime, + ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt, + ifc->in, ifc->out, ifc->inerr, ifc->outerr); + + rlock(ifc); + for(lifc = ifc->lifc; lifc && n > m; lifc = lifc->next) + m += snprint(state+m, n - m, slineformat, lifc->local, + lifc->mask, lifc->remote, lifc->validlt, lifc->preflt); + if(ifc->lifc == nil) + m += snprint(state+m, n - m, "\n"); + runlock(ifc); + return m; +} + +static int +ipifclocal(Conv *c, char *state, int n) +{ + Ipifc *ifc; + Iplifc *lifc; + Iplink *link; + int m; + + ifc = (Ipifc*)c->ptcl; + m = 0; + + rlock(ifc); + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + m += snprint(state+m, n - m, "%-40.40I ->", lifc->local); + for(link = lifc->link; link; link = link->lifclink) + m += snprint(state+m, n - m, " %-40.40I", link->self->a); + m += snprint(state+m, n - m, "\n"); + } + runlock(ifc); + return m; +} + +static int +ipifcinuse(Conv *c) +{ + Ipifc *ifc; + + ifc = (Ipifc*)c->ptcl; + return ifc->medium != nil; +} + +/* + * called when a process writes to an interface's 'data' + */ +static void +ipifckick(void *x) +{ + Conv *c = x; + Block *bp; + Ipifc *ifc; + + bp = qget(c->wq); + if(bp == nil) + return; + + ifc = (Ipifc*)c->ptcl; + if(!canrlock(ifc)){ + freeb(bp); + return; + } + if(waserror()){ + runlock(ifc); + nexterror(); + } + if(ifc->medium == nil || ifc->medium->pktin == nil) + freeb(bp); + else + (*ifc->medium->pktin)(c->p->f, ifc, bp); + runlock(ifc); + poperror(); +} + +/* + * called when a new ipifc structure is created + */ +static void +ipifccreate(Conv *c) +{ + Ipifc *ifc; + + c->rq = qopen(QMAX, 0, 0, 0); + c->sq = qopen(2*QMAX, 0, 0, 0); + c->wq = qopen(QMAX, Qkick, ipifckick, c); + ifc = (Ipifc*)c->ptcl; + ifc->conv = c; + ifc->unbinding = 0; + ifc->medium = nil; + ifc->reassemble = 0; +} + +/* + * called after last close of ipifc data or ctl + * called with c locked, we must unlock + */ +static void +ipifcclose(Conv *c) +{ + Ipifc *ifc; + Medium *medium; + + ifc = (Ipifc*)c->ptcl; + medium = ifc->medium; + if(medium != nil && medium->unbindonclose) + ipifcunbind(ifc); +} + +/* + * change an interface's mtu + */ +char* +ipifcsetmtu(Ipifc *ifc, char **argv, int argc) +{ + int mtu; + + if(argc < 2 || ifc->medium == nil) + return Ebadarg; + mtu = strtoul(argv[1], 0, 0); + if(mtu < ifc->medium->mintu || mtu > ifc->medium->maxtu) + return Ebadarg; + ifc->maxtu = mtu; + return nil; +} + +/* + * add an address to an interface. + */ +char* +ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp) +{ + int i, type, mtu, sendnbrdisc = 0; + uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen]; + uchar bcast[IPaddrlen], net[IPaddrlen]; + Iplifc *lifc, **l; + Fs *f; + + if(ifc->medium == nil) + return "ipifc not yet bound to device"; + + f = ifc->conv->p->f; + + type = Rifc; + memset(ip, 0, IPaddrlen); + memset(mask, 0, IPaddrlen); + memset(rem, 0, IPaddrlen); + switch(argc){ + case 6: + if(strcmp(argv[5], "proxy") == 0) + type |= Rproxy; + /* fall through */ + case 5: + mtu = strtoul(argv[4], 0, 0); + if(mtu >= ifc->medium->mintu && mtu <= ifc->medium->maxtu) + ifc->maxtu = mtu; + /* fall through */ + case 4: + parseip(ip, argv[1]); + parseipmask(mask, argv[2]); + parseip(rem, argv[3]); + maskip(rem, mask, net); + break; + case 3: + parseip(ip, argv[1]); + parseipmask(mask, argv[2]); + maskip(ip, mask, rem); + maskip(rem, mask, net); + break; + case 2: + parseip(ip, argv[1]); + memmove(mask, defmask(ip), IPaddrlen); + maskip(ip, mask, rem); + maskip(rem, mask, net); + break; + default: + return Ebadarg; + } + if(isv4(ip)) + tentative = 0; + wlock(ifc); + + /* ignore if this is already a local address for this ifc */ + for(lifc = ifc->lifc; lifc; lifc = lifc->next) { + if(ipcmp(lifc->local, ip) == 0) { + if(lifc->tentative != tentative) + lifc->tentative = tentative; + if(lifcp) { + lifc->onlink = lifcp->onlink; + lifc->autoflag = lifcp->autoflag; + lifc->validlt = lifcp->validlt; + lifc->preflt = lifcp->preflt; + lifc->origint = lifcp->origint; + } + goto out; + } + } + + /* add the address to the list of logical ifc's for this ifc */ + lifc = smalloc(sizeof(Iplifc)); + ipmove(lifc->local, ip); + ipmove(lifc->mask, mask); + ipmove(lifc->remote, rem); + ipmove(lifc->net, net); + lifc->tentative = tentative; + if(lifcp) { + lifc->onlink = lifcp->onlink; + lifc->autoflag = lifcp->autoflag; + lifc->validlt = lifcp->validlt; + lifc->preflt = lifcp->preflt; + lifc->origint = lifcp->origint; + } else { /* default values */ + lifc->onlink = lifc->autoflag = 1; + lifc->validlt = lifc->preflt = ~0L; + lifc->origint = NOW / 1000; + } + lifc->next = nil; + + for(l = &ifc->lifc; *l; l = &(*l)->next) + ; + *l = lifc; + + /* check for point-to-point interface */ + if(ipcmp(ip, v6loopback)) /* skip v6 loopback, it's a special address */ + if(ipcmp(mask, IPallbits) == 0) + type |= Rptpt; + + /* add local routes */ + if(isv4(ip)) + v4addroute(f, tifc, rem+IPv4off, mask+IPv4off, rem+IPv4off, type); + else + v6addroute(f, tifc, rem, mask, rem, type); + + addselfcache(f, ifc, lifc, ip, Runi); + + if((type & (Rproxy|Rptpt)) == (Rproxy|Rptpt)){ + ipifcregisterproxy(f, ifc, rem); + goto out; + } + + if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0) { + /* add subnet directed broadcast address to the self cache */ + for(i = 0; i < IPaddrlen; i++) + bcast[i] = (ip[i] & mask[i]) | ~mask[i]; + addselfcache(f, ifc, lifc, bcast, Rbcast); + + /* add subnet directed network address to the self cache */ + for(i = 0; i < IPaddrlen; i++) + bcast[i] = (ip[i] & mask[i]) & mask[i]; + addselfcache(f, ifc, lifc, bcast, Rbcast); + + /* add network directed broadcast address to the self cache */ + memmove(mask, defmask(ip), IPaddrlen); + for(i = 0; i < IPaddrlen; i++) + bcast[i] = (ip[i] & mask[i]) | ~mask[i]; + addselfcache(f, ifc, lifc, bcast, Rbcast); + + /* add network directed network address to the self cache */ + memmove(mask, defmask(ip), IPaddrlen); + for(i = 0; i < IPaddrlen; i++) + bcast[i] = (ip[i] & mask[i]) & mask[i]; + addselfcache(f, ifc, lifc, bcast, Rbcast); + + addselfcache(f, ifc, lifc, IPv4bcast, Rbcast); + } + else { + if(ipcmp(ip, v6loopback) == 0) { + /* add node-local mcast address */ + addselfcache(f, ifc, lifc, v6allnodesN, Rmulti); + + /* add route for all node multicast */ + v6addroute(f, tifc, v6allnodesN, v6allnodesNmask, + v6allnodesN, Rmulti); + } + + /* add all nodes multicast address */ + addselfcache(f, ifc, lifc, v6allnodesL, Rmulti); + + /* add route for all nodes multicast */ + v6addroute(f, tifc, v6allnodesL, v6allnodesLmask, v6allnodesL, + Rmulti); + + /* add solicited-node multicast address */ + ipv62smcast(bcast, ip); + addselfcache(f, ifc, lifc, bcast, Rmulti); + + sendnbrdisc = 1; + } + + /* register the address on this network for address resolution */ + if(isv4(ip) && ifc->medium->areg != nil) + (*ifc->medium->areg)(ifc, ip); + +out: + wunlock(ifc); + if(tentative && sendnbrdisc) + icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac); + return nil; +} + +/* + * remove a logical interface from an ifc + * always called with ifc wlock'd + */ +static char* +ipifcremlifc(Ipifc *ifc, Iplifc *lifc) +{ + Iplifc **l; + Fs *f; + + f = ifc->conv->p->f; + + /* + * find address on this interface and remove from chain. + * for pt to pt we actually specify the remote address as the + * addresss to remove. + */ + for(l = &ifc->lifc; *l != nil && *l != lifc; l = &(*l)->next) + ; + if(*l == nil) + return "address not on this interface"; + *l = lifc->next; + + /* disassociate any addresses */ + while(lifc->link) + remselfcache(f, ifc, lifc, lifc->link->self->a); + + /* remove the route for this logical interface */ + if(isv4(lifc->local)) + v4delroute(f, lifc->remote+IPv4off, lifc->mask+IPv4off, 1); + else { + v6delroute(f, lifc->remote, lifc->mask, 1); + if(ipcmp(lifc->local, v6loopback) == 0) + /* remove route for all node multicast */ + v6delroute(f, v6allnodesN, v6allnodesNmask, 1); + else if(memcmp(lifc->local, v6linklocal, v6llpreflen) == 0) + /* remove route for all link multicast */ + v6delroute(f, v6allnodesL, v6allnodesLmask, 1); + } + + free(lifc); + return nil; +} + +/* + * remove an address from an interface. + * called with c->car locked + */ +char* +ipifcrem(Ipifc *ifc, char **argv, int argc) +{ + char *rv; + uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen]; + Iplifc *lifc; + + if(argc < 3) + return Ebadarg; + + parseip(ip, argv[1]); + parseipmask(mask, argv[2]); + if(argc < 4) + maskip(ip, mask, rem); + else + parseip(rem, argv[3]); + + wlock(ifc); + + /* + * find address on this interface and remove from chain. + * for pt to pt we actually specify the remote address as the + * addresss to remove. + */ + for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next) { + if (memcmp(ip, lifc->local, IPaddrlen) == 0 + && memcmp(mask, lifc->mask, IPaddrlen) == 0 + && memcmp(rem, lifc->remote, IPaddrlen) == 0) + break; + } + + rv = ipifcremlifc(ifc, lifc); + wunlock(ifc); + return rv; +} + +/* + * distribute routes to active interfaces like the + * TRIP linecards + */ +void +ipifcaddroute(Fs *f, int vers, uchar *addr, uchar *mask, uchar *gate, int type) +{ + Medium *medium; + Conv **cp, **e; + Ipifc *ifc; + + e = &f->ipifc->conv[f->ipifc->nc]; + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp != nil) { + ifc = (Ipifc*)(*cp)->ptcl; + medium = ifc->medium; + if(medium != nil && medium->addroute != nil) + medium->addroute(ifc, vers, addr, mask, gate, type); + } + } +} + +void +ipifcremroute(Fs *f, int vers, uchar *addr, uchar *mask) +{ + Medium *medium; + Conv **cp, **e; + Ipifc *ifc; + + e = &f->ipifc->conv[f->ipifc->nc]; + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp != nil) { + ifc = (Ipifc*)(*cp)->ptcl; + medium = ifc->medium; + if(medium != nil && medium->remroute != nil) + medium->remroute(ifc, vers, addr, mask); + } + } +} + +/* + * associate an address with the interface. This wipes out any previous + * addresses. This is a macro that means, remove all the old interfaces + * and add a new one. + */ +static char* +ipifcconnect(Conv* c, char **argv, int argc) +{ + char *err; + Ipifc *ifc; + + ifc = (Ipifc*)c->ptcl; + + if(ifc->medium == nil) + return "ipifc not yet bound to device"; + + if(waserror()){ + wunlock(ifc); + nexterror(); + } + wlock(ifc); + while(ifc->lifc){ + err = ipifcremlifc(ifc, ifc->lifc); + if(err) + error(err); + } + wunlock(ifc); + poperror(); + + err = ipifcadd(ifc, argv, argc, 0, nil); + if(err) + return err; + + Fsconnected(c, nil); + return nil; +} + +char* +ipifcra6(Ipifc *ifc, char **argv, int argc) +{ + int i, argsleft, vmax = ifc->rp.maxraint, vmin = ifc->rp.minraint; + + argsleft = argc - 1; + i = 1; + + if(argsleft % 2 != 0) + return Ebadarg; + + while (argsleft > 1) { + if(strcmp(argv[i], "recvra") == 0) + ifc->recvra6 = (atoi(argv[i+1]) != 0); + else if(strcmp(argv[i], "sendra") == 0) + ifc->sendra6 = (atoi(argv[i+1]) != 0); + else if(strcmp(argv[i], "mflag") == 0) + ifc->rp.mflag = (atoi(argv[i+1]) != 0); + else if(strcmp(argv[i], "oflag") == 0) + ifc->rp.oflag = (atoi(argv[i+1]) != 0); + else if(strcmp(argv[i], "maxraint") == 0) + ifc->rp.maxraint = atoi(argv[i+1]); + else if(strcmp(argv[i], "minraint") == 0) + ifc->rp.minraint = atoi(argv[i+1]); + else if(strcmp(argv[i], "linkmtu") == 0) + ifc->rp.linkmtu = atoi(argv[i+1]); + else if(strcmp(argv[i], "reachtime") == 0) + ifc->rp.reachtime = atoi(argv[i+1]); + else if(strcmp(argv[i], "rxmitra") == 0) + ifc->rp.rxmitra = atoi(argv[i+1]); + else if(strcmp(argv[i], "ttl") == 0) + ifc->rp.ttl = atoi(argv[i+1]); + else if(strcmp(argv[i], "routerlt") == 0) + ifc->rp.routerlt = atoi(argv[i+1]); + else + return Ebadarg; + + argsleft -= 2; + i += 2; + } + + /* consistency check */ + if(ifc->rp.maxraint < ifc->rp.minraint) { + ifc->rp.maxraint = vmax; + ifc->rp.minraint = vmin; + return Ebadarg; + } + return nil; +} + +/* + * non-standard control messages. + * called with c->car locked. + */ +static char* +ipifcctl(Conv* c, char**argv, int argc) +{ + Ipifc *ifc; + int i; + + ifc = (Ipifc*)c->ptcl; + if(strcmp(argv[0], "add") == 0) + return ipifcadd(ifc, argv, argc, 0, nil); + else if(strcmp(argv[0], "try") == 0) + return ipifcadd(ifc, argv, argc, 1, nil); + else if(strcmp(argv[0], "remove") == 0) + return ipifcrem(ifc, argv, argc); + else if(strcmp(argv[0], "unbind") == 0) + return ipifcunbind(ifc); + else if(strcmp(argv[0], "joinmulti") == 0) + return ipifcjoinmulti(ifc, argv, argc); + else if(strcmp(argv[0], "leavemulti") == 0) + return ipifcleavemulti(ifc, argv, argc); + else if(strcmp(argv[0], "mtu") == 0) + return ipifcsetmtu(ifc, argv, argc); + else if(strcmp(argv[0], "reassemble") == 0){ + ifc->reassemble = 1; + return nil; + } + else if(strcmp(argv[0], "iprouting") == 0){ + i = 1; + if(argc > 1) + i = atoi(argv[1]); + iprouting(c->p->f, i); + return nil; + } + else if(strcmp(argv[0], "add6") == 0) + return ipifcadd6(ifc, argv, argc); + else if(strcmp(argv[0], "ra6") == 0) + return ipifcra6(ifc, argv, argc); + return "unsupported ctl"; +} + +int +ipifcstats(Proto *ipifc, char *buf, int len) +{ + return ipstats(ipifc->f, buf, len); +} + +void +ipifcinit(Fs *f) +{ + Proto *ipifc; + + ipifc = smalloc(sizeof(Proto)); + ipifc->name = "ipifc"; + ipifc->connect = ipifcconnect; + ipifc->announce = nil; + ipifc->bind = ipifcbind; + ipifc->state = ipifcstate; + ipifc->create = ipifccreate; + ipifc->close = ipifcclose; + ipifc->rcv = nil; + ipifc->ctl = ipifcctl; + ipifc->advise = nil; + ipifc->stats = ipifcstats; + ipifc->inuse = ipifcinuse; + ipifc->local = ipifclocal; + ipifc->ipproto = -1; + ipifc->nc = Maxmedia; + ipifc->ptclsize = sizeof(Ipifc); + + f->ipifc = ipifc; /* hack for ipifcremroute, findipifc, ... */ + f->self = smalloc(sizeof(Ipselftab)); /* hack for ipforme */ + + Fsproto(f, ipifc); +} + +/* + * add to self routing cache + * called with c->car locked + */ +static void +addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type) +{ + Ipself *p; + Iplink *lp; + int h; + + qlock(f->self); + + /* see if the address already exists */ + h = hashipa(a); + for(p = f->self->hash[h]; p; p = p->next) + if(memcmp(a, p->a, IPaddrlen) == 0) + break; + + /* allocate a local address and add to hash chain */ + if(p == nil){ + p = smalloc(sizeof(*p)); + ipmove(p->a, a); + p->type = type; + p->next = f->self->hash[h]; + f->self->hash[h] = p; + + /* if the null address, accept all packets */ + if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0) + f->self->acceptall = 1; + } + + /* look for a link for this lifc */ + for(lp = p->link; lp; lp = lp->selflink) + if(lp->lifc == lifc) + break; + + /* allocate a lifc-to-local link and link to both */ + if(lp == nil){ + lp = smalloc(sizeof(*lp)); + lp->ref = 1; + lp->lifc = lifc; + lp->self = p; + lp->selflink = p->link; + p->link = lp; + lp->lifclink = lifc->link; + lifc->link = lp; + + /* add to routing table */ + if(isv4(a)) + v4addroute(f, tifc, a+IPv4off, IPallbits+IPv4off, + a+IPv4off, type); + else + v6addroute(f, tifc, a, IPallbits, a, type); + + if((type & Rmulti) && ifc->medium->addmulti != nil) + (*ifc->medium->addmulti)(ifc, a, lifc->local); + } else + lp->ref++; + + qunlock(f->self); +} + +/* + * These structures are unlinked from their chains while + * other threads may be using them. To avoid excessive locking, + * just put them aside for a while before freeing them. + * called with f->self locked + */ +static Iplink *freeiplink; +static Ipself *freeipself; + +static void +iplinkfree(Iplink *p) +{ + Iplink **l, *np; + ulong now = NOW; + + l = &freeiplink; + for(np = *l; np; np = *l){ + if(np->expire > now){ + *l = np->next; + free(np); + continue; + } + l = &np->next; + } + p->expire = now + 5000; /* give other threads 5 secs to get out */ + p->next = nil; + *l = p; +} + +static void +ipselffree(Ipself *p) +{ + Ipself **l, *np; + ulong now = NOW; + + l = &freeipself; + for(np = *l; np; np = *l){ + if(np->expire > now){ + *l = np->next; + free(np); + continue; + } + l = &np->next; + } + p->expire = now + 5000; /* give other threads 5 secs to get out */ + p->next = nil; + *l = p; +} + +/* + * Decrement reference for this address on this link. + * Unlink from selftab if this is the last ref. + * called with c->car locked + */ +static void +remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a) +{ + Ipself *p, **l; + Iplink *link, **l_self, **l_lifc; + + qlock(f->self); + + /* find the unique selftab entry */ + l = &f->self->hash[hashipa(a)]; + for(p = *l; p; p = *l){ + if(ipcmp(p->a, a) == 0) + break; + l = &p->next; + } + + if(p == nil) + goto out; + + /* + * walk down links from an ifc looking for one + * that matches the selftab entry + */ + l_lifc = &lifc->link; + for(link = *l_lifc; link; link = *l_lifc){ + if(link->self == p) + break; + l_lifc = &link->lifclink; + } + + if(link == nil) + goto out; + + /* + * walk down the links from the selftab looking for + * the one we just found + */ + l_self = &p->link; + for(link = *l_self; link; link = *l_self){ + if(link == *l_lifc) + break; + l_self = &link->selflink; + } + + if(link == nil) + panic("remselfcache"); + + if(--(link->ref) != 0) + goto out; + + if((p->type & Rmulti) && ifc->medium->remmulti != nil) + (*ifc->medium->remmulti)(ifc, a, lifc->local); + + /* ref == 0, remove from both chains and free the link */ + *l_lifc = link->lifclink; + *l_self = link->selflink; + iplinkfree(link); + + if(p->link != nil) + goto out; + + /* remove from routing table */ + if(isv4(a)) + v4delroute(f, a+IPv4off, IPallbits+IPv4off, 1); + else + v6delroute(f, a, IPallbits, 1); + + /* no more links, remove from hash and free */ + *l = p->next; + ipselffree(p); + + /* if IPnoaddr, forget */ + if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0) + f->self->acceptall = 0; + +out: + qunlock(f->self); +} + +static char *stformat = "%-44.44I %2.2d %4.4s\n"; +enum +{ + Nstformat= 41, +}; + +long +ipselftabread(Fs *f, char *cp, ulong offset, int n) +{ + int i, m, nifc, off; + Ipself *p; + Iplink *link; + char state[8]; + + m = 0; + off = offset; + qlock(f->self); + for(i = 0; i < NHASH && m < n; i++){ + for(p = f->self->hash[i]; p != nil && m < n; p = p->next){ + nifc = 0; + for(link = p->link; link; link = link->selflink) + nifc++; + routetype(p->type, state); + m += snprint(cp + m, n - m, stformat, p->a, nifc, state); + if(off > 0){ + off -= m; + m = 0; + } + } + } + qunlock(f->self); + return m; +} + +int +iptentative(Fs *f, uchar *addr) +{ + Ipself *p; + + p = f->self->hash[hashipa(addr)]; + for(; p; p = p->next){ + if(ipcmp(addr, p->a) == 0) + return p->link->lifc->tentative; + } + return 0; +} + +/* + * returns + * 0 - no match + * Runi + * Rbcast + * Rmcast + */ +int +ipforme(Fs *f, uchar *addr) +{ + Ipself *p; + + p = f->self->hash[hashipa(addr)]; + for(; p; p = p->next){ + if(ipcmp(addr, p->a) == 0) + return p->type; + } + + /* hack to say accept anything */ + if(f->self->acceptall) + return Runi; + return 0; +} + +/* + * find the ifc on same net as the remote system. If none, + * return nil. + */ +Ipifc* +findipifc(Fs *f, uchar *remote, int type) +{ + Ipifc *ifc, *x; + Iplifc *lifc; + Conv **cp, **e; + uchar gnet[IPaddrlen], xmask[IPaddrlen]; + + x = nil; + memset(xmask, 0, IPaddrlen); + + /* find most specific match */ + e = &f->ipifc->conv[f->ipifc->nc]; + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp == 0) + continue; + ifc = (Ipifc*)(*cp)->ptcl; + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + maskip(remote, lifc->mask, gnet); + if(ipcmp(gnet, lifc->net) == 0){ + if(x == nil || ipcmp(lifc->mask, xmask) > 0){ + x = ifc; + ipmove(xmask, lifc->mask); + } + } + } + } + if(x != nil) + return x; + + /* for now for broadcast and multicast, just use first interface */ + if(type & (Rbcast|Rmulti)){ + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp == 0) + continue; + ifc = (Ipifc*)(*cp)->ptcl; + if(ifc->lifc != nil) + return ifc; + } + } + return nil; +} + +enum { + unknownv6, /* UGH */ +// multicastv6, + unspecifiedv6, + linklocalv6, + globalv6, +}; + +int +v6addrtype(uchar *addr) +{ + if(islinklocal(addr) || + isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop) + return linklocalv6; + else + return globalv6; +} + +#define v6addrcurr(lifc) ((lifc)->preflt == ~0L || \ + (lifc)->origint + (lifc)->preflt >= NOW/1000) + +static void +findprimaryipv6(Fs *f, uchar *local) +{ + int atype, atypel; + Conv **cp, **e; + Ipifc *ifc; + Iplifc *lifc; + + ipmove(local, v6Unspecified); + atype = unspecifiedv6; + + /* + * find "best" (global > link local > unspecified) + * local address; address must be current. + */ + e = &f->ipifc->conv[f->ipifc->nc]; + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp == 0) + continue; + ifc = (Ipifc*)(*cp)->ptcl; + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + atypel = v6addrtype(lifc->local); + if(atypel > atype && v6addrcurr(lifc)) { + ipmove(local, lifc->local); + atype = atypel; + if(atype == globalv6) + return; + } + } + } +} + +/* + * returns first ip address configured + */ +static void +findprimaryipv4(Fs *f, uchar *local) +{ + Conv **cp, **e; + Ipifc *ifc; + Iplifc *lifc; + + /* find first ifc local address */ + e = &f->ipifc->conv[f->ipifc->nc]; + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp == 0) + continue; + ifc = (Ipifc*)(*cp)->ptcl; + if((lifc = ifc->lifc) != nil){ + ipmove(local, lifc->local); + return; + } + } +} + +/* + * find the local address 'closest' to the remote system, copy it to + * local and return the ifc for that address + */ +void +findlocalip(Fs *f, uchar *local, uchar *remote) +{ + int version, atype = unspecifiedv6, atypel = unknownv6; + int atyper, deprecated; + uchar gate[IPaddrlen], gnet[IPaddrlen]; + Ipifc *ifc; + Iplifc *lifc; + Route *r; + + USED(atype); + USED(atypel); + qlock(f->ipifc); + r = v6lookup(f, remote, nil); + version = (memcmp(remote, v4prefix, IPv4off) == 0)? V4: V6; + + if(r != nil){ + ifc = r->ifc; + if(r->type & Rv4) + v4tov6(gate, r->v4.gate); + else { + ipmove(gate, r->v6.gate); + ipmove(local, v6Unspecified); + } + + switch(version) { + case V4: + /* find ifc address closest to the gateway to use */ + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + maskip(gate, lifc->mask, gnet); + if(ipcmp(gnet, lifc->net) == 0){ + ipmove(local, lifc->local); + goto out; + } + } + break; + case V6: + /* find ifc address with scope matching the destination */ + atyper = v6addrtype(remote); + deprecated = 0; + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + atypel = v6addrtype(lifc->local); + /* prefer appropriate scope */ + if(atypel > atype && atype < atyper || + atypel < atype && atype > atyper){ + ipmove(local, lifc->local); + deprecated = !v6addrcurr(lifc); + atype = atypel; + } else if(atypel == atype){ + /* avoid deprecated addresses */ + if(deprecated && v6addrcurr(lifc)){ + ipmove(local, lifc->local); + atype = atypel; + deprecated = 0; + } + } + if(atype == atyper && !deprecated) + goto out; + } + if(atype >= atyper) + goto out; + break; + default: + panic("findlocalip: version %d", version); + } + } + + switch(version){ + case V4: + findprimaryipv4(f, local); + break; + case V6: + findprimaryipv6(f, local); + break; + default: + panic("findlocalip2: version %d", version); + } + +out: + qunlock(f->ipifc); +} + +/* + * return first v4 address associated with an interface + */ +int +ipv4local(Ipifc *ifc, uchar *addr) +{ + Iplifc *lifc; + + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + if(isv4(lifc->local)){ + memmove(addr, lifc->local+IPv4off, IPv4addrlen); + return 1; + } + } + return 0; +} + +/* + * return first v6 address associated with an interface + */ +int +ipv6local(Ipifc *ifc, uchar *addr) +{ + Iplifc *lifc; + + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + if(!isv4(lifc->local) && !(lifc->tentative)){ + ipmove(addr, lifc->local); + return 1; + } + } + return 0; +} + +int +ipv6anylocal(Ipifc *ifc, uchar *addr) +{ + Iplifc *lifc; + + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + if(!isv4(lifc->local)){ + ipmove(addr, lifc->local); + return SRC_UNI; + } + } + return SRC_UNSPEC; +} + +/* + * see if this address is bound to the interface + */ +Iplifc* +iplocalonifc(Ipifc *ifc, uchar *ip) +{ + Iplifc *lifc; + + for(lifc = ifc->lifc; lifc; lifc = lifc->next) + if(ipcmp(ip, lifc->local) == 0) + return lifc; + return nil; +} + + +/* + * See if we're proxying for this address on this interface + */ +int +ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip) +{ + Route *r; + uchar net[IPaddrlen]; + Iplifc *lifc; + + /* see if this is a direct connected pt to pt address */ + r = v6lookup(f, ip, nil); + if(r == nil || (r->type & (Rifc|Rproxy)) != (Rifc|Rproxy)) + return 0; + + /* see if this is on the right interface */ + for(lifc = ifc->lifc; lifc; lifc = lifc->next){ + maskip(ip, lifc->mask, net); + if(ipcmp(net, lifc->remote) == 0) + return 1; + } + return 0; +} + +/* + * return multicast version if any + */ +int +ipismulticast(uchar *ip) +{ + if(isv4(ip)){ + if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0) + return V4; + } + else if(ip[0] == 0xff) + return V6; + return 0; +} +int +ipisbm(uchar *ip) +{ + if(isv4(ip)){ + if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0) + return V4; + else if(ipcmp(ip, IPv4bcast) == 0) + return V4; + } + else if(ip[0] == 0xff) + return V6; + return 0; +} + + +/* + * add a multicast address to an interface, called with c->car locked + */ +void +ipifcaddmulti(Conv *c, uchar *ma, uchar *ia) +{ + Ipifc *ifc; + Iplifc *lifc; + Conv **p; + Ipmulti *multi, **l; + Fs *f; + + f = c->p->f; + + for(l = &c->multi; *l; l = &(*l)->next) + if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0) + return; /* it's already there */ + + multi = *l = smalloc(sizeof(*multi)); + ipmove(multi->ma, ma); + ipmove(multi->ia, ia); + multi->next = nil; + + for(p = f->ipifc->conv; *p; p++){ + if((*p)->inuse == 0) + continue; + ifc = (Ipifc*)(*p)->ptcl; + if(waserror()){ + wunlock(ifc); + nexterror(); + } + wlock(ifc); + for(lifc = ifc->lifc; lifc; lifc = lifc->next) + if(ipcmp(ia, lifc->local) == 0) + addselfcache(f, ifc, lifc, ma, Rmulti); + wunlock(ifc); + poperror(); + } +} + + +/* + * remove a multicast address from an interface, called with c->car locked + */ +void +ipifcremmulti(Conv *c, uchar *ma, uchar *ia) +{ + Ipmulti *multi, **l; + Iplifc *lifc; + Conv **p; + Ipifc *ifc; + Fs *f; + + f = c->p->f; + + for(l = &c->multi; *l; l = &(*l)->next) + if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0) + break; + + multi = *l; + if(multi == nil) + return; /* we don't have it open */ + + *l = multi->next; + + for(p = f->ipifc->conv; *p; p++){ + if((*p)->inuse == 0) + continue; + + ifc = (Ipifc*)(*p)->ptcl; + if(waserror()){ + wunlock(ifc); + nexterror(); + } + wlock(ifc); + for(lifc = ifc->lifc; lifc; lifc = lifc->next) + if(ipcmp(ia, lifc->local) == 0) + remselfcache(f, ifc, lifc, ma); + wunlock(ifc); + poperror(); + } + + free(multi); +} + +/* + * make lifc's join and leave multicast groups + */ +static char* +ipifcjoinmulti(Ipifc *ifc, char **argv, int argc) +{ + USED(ifc, argv, argc); + return nil; +} + +static char* +ipifcleavemulti(Ipifc *ifc, char **argv, int argc) +{ + USED(ifc, argv, argc); + return nil; +} + +static void +ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip) +{ + Conv **cp, **e; + Ipifc *nifc; + Iplifc *lifc; + Medium *medium; + uchar net[IPaddrlen]; + + /* register the address on any network that will proxy for us */ + e = &f->ipifc->conv[f->ipifc->nc]; + + if(!isv4(ip)) { /* V6 */ + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp == nil || (nifc = (Ipifc*)(*cp)->ptcl) == ifc) + continue; + rlock(nifc); + medium = nifc->medium; + if(medium == nil || medium->addmulti == nil) { + runlock(nifc); + continue; + } + for(lifc = nifc->lifc; lifc; lifc = lifc->next){ + maskip(ip, lifc->mask, net); + if(ipcmp(net, lifc->remote) == 0) { + /* add solicited-node multicast addr */ + ipv62smcast(net, ip); + addselfcache(f, nifc, lifc, net, Rmulti); + arpenter(f, V6, ip, nifc->mac, 6, 0); + // (*medium->addmulti)(nifc, net, ip); + break; + } + } + runlock(nifc); + } + } + else { /* V4 */ + for(cp = f->ipifc->conv; cp < e; cp++){ + if(*cp == nil || (nifc = (Ipifc*)(*cp)->ptcl) == ifc) + continue; + rlock(nifc); + medium = nifc->medium; + if(medium == nil || medium->areg == nil){ + runlock(nifc); + continue; + } + for(lifc = nifc->lifc; lifc; lifc = lifc->next){ + maskip(ip, lifc->mask, net); + if(ipcmp(net, lifc->remote) == 0){ + (*medium->areg)(nifc, ip); + break; + } + } + runlock(nifc); + } + } +} + + +/* added for new v6 mesg types */ +static void +adddefroute6(Fs *f, uchar *gate, int force) +{ + Route *r; + + r = v6lookup(f, v6Unspecified, nil); + /* + * route entries generated by all other means take precedence + * over router announcements. + */ + if (r && !force && strcmp(r->tag, "ra") != 0) + return; + + v6delroute(f, v6Unspecified, v6Unspecified, 1); + v6addroute(f, "ra", v6Unspecified, v6Unspecified, gate, 0); +} + +enum { + Ngates = 3, +}; + +char* +ipifcadd6(Ipifc *ifc, char**argv, int argc) +{ + int plen = 64; + long origint = NOW / 1000, preflt = ~0L, validlt = ~0L; + char addr[40], preflen[6]; + char *params[3]; + uchar autoflag = 1, onlink = 1; + uchar prefix[IPaddrlen]; + Iplifc *lifc; + + switch(argc) { + case 7: + preflt = atoi(argv[6]); + /* fall through */ + case 6: + validlt = atoi(argv[5]); + /* fall through */ + case 5: + autoflag = atoi(argv[4]); + /* fall through */ + case 4: + onlink = atoi(argv[3]); + /* fall through */ + case 3: + plen = atoi(argv[2]); + /* fall through */ + case 2: + break; + default: + return Ebadarg; + } + + if (parseip(prefix, argv[1]) != 6 || validlt < preflt || plen < 0 || + plen > 64 || islinklocal(prefix)) + return Ebadarg; + + lifc = smalloc(sizeof(Iplifc)); + lifc->onlink = (onlink != 0); + lifc->autoflag = (autoflag != 0); + lifc->validlt = validlt; + lifc->preflt = preflt; + lifc->origint = origint; + + /* issue "add" ctl msg for v6 link-local addr and prefix len */ + if(!ifc->medium->pref2addr) + return Ebadarg; + ifc->medium->pref2addr(prefix, ifc->mac); /* mac → v6 link-local addr */ + sprint(addr, "%I", prefix); + sprint(preflen, "/%d", plen); + params[0] = "add"; + params[1] = addr; + params[2] = preflen; + + return ipifcadd(ifc, params, 3, 0, lifc); +} diff -Nru 0/sys/src/nix/ip/iproute.c 4/sys/src/nix/ip/iproute.c --- 0/sys/src/nix/ip/iproute.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/iproute.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,852 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +static void walkadd(Fs*, Route**, Route*); +static void addnode(Fs*, Route**, Route*); +static void calcd(Route*); + +/* these are used for all instances of IP */ +Route* v4freelist; +Route* v6freelist; +RWlock routelock; +ulong v4routegeneration, v6routegeneration; + +static void +freeroute(Route *r) +{ + Route **l; + + r->left = nil; + r->right = nil; + if(r->type & Rv4) + l = &v4freelist; + else + l = &v6freelist; + r->mid = *l; + *l = r; +} + +static Route* +allocroute(int type) +{ + Route *r; + int n; + Route **l; + + if(type & Rv4){ + n = sizeof(RouteTree) + sizeof(V4route); + l = &v4freelist; + } else { + n = sizeof(RouteTree) + sizeof(V6route); + l = &v6freelist; + } + + r = *l; + if(r != nil){ + *l = r->mid; + } else { + r = malloc(n); + if(r == nil) + panic("out of routing nodes"); + } + memset(r, 0, n); + r->type = type; + r->ifc = nil; + r->ref = 1; + + return r; +} + +static void +addqueue(Route **q, Route *r) +{ + Route *l; + + if(r == nil) + return; + + l = allocroute(r->type); + l->mid = *q; + *q = l; + l->left = r; +} + +/* + * compare 2 v6 addresses + */ +static int +lcmp(ulong *a, ulong *b) +{ + int i; + + for(i = 0; i < IPllen; i++){ + if(a[i] > b[i]) + return 1; + if(a[i] < b[i]) + return -1; + } + return 0; +} + +/* + * compare 2 v4 or v6 ranges + */ +enum +{ + Rpreceeds, + Rfollows, + Requals, + Rcontains, + Rcontained, +}; + +static int +rangecompare(Route *a, Route *b) +{ + if(a->type & Rv4){ + if(a->v4.endaddress < b->v4.address) + return Rpreceeds; + + if(a->v4.address > b->v4.endaddress) + return Rfollows; + + if(a->v4.address <= b->v4.address + && a->v4.endaddress >= b->v4.endaddress){ + if(a->v4.address == b->v4.address + && a->v4.endaddress == b->v4.endaddress) + return Requals; + return Rcontains; + } + return Rcontained; + } + + if(lcmp(a->v6.endaddress, b->v6.address) < 0) + return Rpreceeds; + + if(lcmp(a->v6.address, b->v6.endaddress) > 0) + return Rfollows; + + if(lcmp(a->v6.address, b->v6.address) <= 0 + && lcmp(a->v6.endaddress, b->v6.endaddress) >= 0){ + if(lcmp(a->v6.address, b->v6.address) == 0 + && lcmp(a->v6.endaddress, b->v6.endaddress) == 0) + return Requals; + return Rcontains; + } + + return Rcontained; +} + +static void +copygate(Route *old, Route *new) +{ + if(new->type & Rv4) + memmove(old->v4.gate, new->v4.gate, IPv4addrlen); + else + memmove(old->v6.gate, new->v6.gate, IPaddrlen); +} + +/* + * walk down a tree adding nodes back in + */ +static void +walkadd(Fs *f, Route **root, Route *p) +{ + Route *l, *r; + + l = p->left; + r = p->right; + p->left = 0; + p->right = 0; + addnode(f, root, p); + if(l) + walkadd(f, root, l); + if(r) + walkadd(f, root, r); +} + +/* + * calculate depth + */ +static void +calcd(Route *p) +{ + Route *q; + int d; + + if(p) { + d = 0; + q = p->left; + if(q) + d = q->depth; + q = p->right; + if(q && q->depth > d) + d = q->depth; + q = p->mid; + if(q && q->depth > d) + d = q->depth; + p->depth = d+1; + } +} + +/* + * balance the tree at the current node + */ +static void +balancetree(Route **cur) +{ + Route *p, *l, *r; + int dl, dr; + + /* + * if left and right are + * too out of balance, + * rotate tree node + */ + p = *cur; + dl = 0; if(l = p->left) dl = l->depth; + dr = 0; if(r = p->right) dr = r->depth; + + if(dl > dr+1) { + p->left = l->right; + l->right = p; + *cur = l; + calcd(p); + calcd(l); + } else + if(dr > dl+1) { + p->right = r->left; + r->left = p; + *cur = r; + calcd(p); + calcd(r); + } else + calcd(p); +} + +/* + * add a new node to the tree + */ +static void +addnode(Fs *f, Route **cur, Route *new) +{ + Route *p; + + p = *cur; + if(p == 0) { + *cur = new; + new->depth = 1; + return; + } + + switch(rangecompare(new, p)){ + case Rpreceeds: + addnode(f, &p->left, new); + break; + case Rfollows: + addnode(f, &p->right, new); + break; + case Rcontains: + /* + * if new node is superset + * of tree node, + * replace tree node and + * queue tree node to be + * merged into root. + */ + *cur = new; + new->depth = 1; + addqueue(&f->queue, p); + break; + case Requals: + /* + * supercede the old entry if the old one isn't + * a local interface. + */ + if((p->type & Rifc) == 0){ + p->type = new->type; + p->ifcid = -1; + copygate(p, new); + } else if(new->type & Rifc) + p->ref++; + freeroute(new); + break; + case Rcontained: + addnode(f, &p->mid, new); + break; + } + + balancetree(cur); +} + +#define V4H(a) ((a&0x07ffffff)>>(32-Lroot-5)) + +void +v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type) +{ + Route *p; + ulong sa; + ulong m; + ulong ea; + int h, eh; + + m = nhgetl(mask); + sa = nhgetl(a) & m; + ea = sa | ~m; + + eh = V4H(ea); + for(h=V4H(sa); h<=eh; h++) { + p = allocroute(Rv4 | type); + p->v4.address = sa; + p->v4.endaddress = ea; + memmove(p->v4.gate, gate, sizeof(p->v4.gate)); + memmove(p->tag, tag, sizeof(p->tag)); + + wlock(&routelock); + addnode(f, &f->v4root[h], p); + while(p = f->queue) { + f->queue = p->mid; + walkadd(f, &f->v4root[h], p->left); + freeroute(p); + } + wunlock(&routelock); + } + v4routegeneration++; + + ipifcaddroute(f, Rv4, a, mask, gate, type); +} + +#define V6H(a) (((a)[IPllen-1] & 0x07ffffff)>>(32-Lroot-5)) +#define ISDFLT(a, mask, tag) ((ipcmp((a),v6Unspecified)==0) && (ipcmp((mask),v6Unspecified)==0) && (strcmp((tag), "ra")!=0)) + +void +v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type) +{ + Route *p; + ulong sa[IPllen], ea[IPllen]; + ulong x, y; + int h, eh; + + /* + if(ISDFLT(a, mask, tag)) + f->v6p->cdrouter = -1; + */ + + + for(h = 0; h < IPllen; h++){ + x = nhgetl(a+4*h); + y = nhgetl(mask+4*h); + sa[h] = x & y; + ea[h] = x | ~y; + } + + eh = V6H(ea); + for(h = V6H(sa); h <= eh; h++) { + p = allocroute(type); + memmove(p->v6.address, sa, IPaddrlen); + memmove(p->v6.endaddress, ea, IPaddrlen); + memmove(p->v6.gate, gate, IPaddrlen); + memmove(p->tag, tag, sizeof(p->tag)); + + wlock(&routelock); + addnode(f, &f->v6root[h], p); + while(p = f->queue) { + f->queue = p->mid; + walkadd(f, &f->v6root[h], p->left); + freeroute(p); + } + wunlock(&routelock); + } + v6routegeneration++; + + ipifcaddroute(f, 0, a, mask, gate, type); +} + +Route** +looknode(Route **cur, Route *r) +{ + Route *p; + + for(;;){ + p = *cur; + if(p == 0) + return 0; + + switch(rangecompare(r, p)){ + case Rcontains: + return 0; + case Rpreceeds: + cur = &p->left; + break; + case Rfollows: + cur = &p->right; + break; + case Rcontained: + cur = &p->mid; + break; + case Requals: + return cur; + } + } +} + +void +v4delroute(Fs *f, uchar *a, uchar *mask, int dolock) +{ + Route **r, *p; + Route rt; + int h, eh; + ulong m; + + m = nhgetl(mask); + rt.v4.address = nhgetl(a) & m; + rt.v4.endaddress = rt.v4.address | ~m; + rt.type = Rv4; + + eh = V4H(rt.v4.endaddress); + for(h=V4H(rt.v4.address); h<=eh; h++) { + if(dolock) + wlock(&routelock); + r = looknode(&f->v4root[h], &rt); + if(r) { + p = *r; + if(--(p->ref) == 0){ + *r = 0; + addqueue(&f->queue, p->left); + addqueue(&f->queue, p->mid); + addqueue(&f->queue, p->right); + freeroute(p); + while(p = f->queue) { + f->queue = p->mid; + walkadd(f, &f->v4root[h], p->left); + freeroute(p); + } + } + } + if(dolock) + wunlock(&routelock); + } + v4routegeneration++; + + ipifcremroute(f, Rv4, a, mask); +} + +void +v6delroute(Fs *f, uchar *a, uchar *mask, int dolock) +{ + Route **r, *p; + Route rt; + int h, eh; + ulong x, y; + + for(h = 0; h < IPllen; h++){ + x = nhgetl(a+4*h); + y = nhgetl(mask+4*h); + rt.v6.address[h] = x & y; + rt.v6.endaddress[h] = x | ~y; + } + rt.type = 0; + + eh = V6H(rt.v6.endaddress); + for(h=V6H(rt.v6.address); h<=eh; h++) { + if(dolock) + wlock(&routelock); + r = looknode(&f->v6root[h], &rt); + if(r) { + p = *r; + if(--(p->ref) == 0){ + *r = 0; + addqueue(&f->queue, p->left); + addqueue(&f->queue, p->mid); + addqueue(&f->queue, p->right); + freeroute(p); + while(p = f->queue) { + f->queue = p->mid; + walkadd(f, &f->v6root[h], p->left); + freeroute(p); + } + } + } + if(dolock) + wunlock(&routelock); + } + v6routegeneration++; + + ipifcremroute(f, 0, a, mask); +} + +Route* +v4lookup(Fs *f, uchar *a, Conv *c) +{ + Route *p, *q; + ulong la; + uchar gate[IPaddrlen]; + Ipifc *ifc; + + if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v4routegeneration) + return c->r; + + la = nhgetl(a); + q = nil; + for(p=f->v4root[V4H(la)]; p;) + if(la >= p->v4.address) { + if(la <= p->v4.endaddress) { + q = p; + p = p->mid; + } else + p = p->right; + } else + p = p->left; + + if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){ + if(q->type & Rifc) { + hnputl(gate+IPv4off, q->v4.address); + memmove(gate, v4prefix, IPv4off); + } else + v4tov6(gate, q->v4.gate); + ifc = findipifc(f, gate, q->type); + if(ifc == nil) + return nil; + q->ifc = ifc; + q->ifcid = ifc->ifcid; + } + + if(c != nil){ + c->r = q; + c->rgen = v4routegeneration; + } + + return q; +} + +Route* +v6lookup(Fs *f, uchar *a, Conv *c) +{ + Route *p, *q; + ulong la[IPllen]; + int h; + ulong x, y; + uchar gate[IPaddrlen]; + Ipifc *ifc; + + if(memcmp(a, v4prefix, IPv4off) == 0){ + q = v4lookup(f, a+IPv4off, c); + if(q != nil) + return q; + } + + if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v6routegeneration) + return c->r; + + for(h = 0; h < IPllen; h++) + la[h] = nhgetl(a+4*h); + + q = 0; + for(p=f->v6root[V6H(la)]; p;){ + for(h = 0; h < IPllen; h++){ + x = la[h]; + y = p->v6.address[h]; + if(x == y) + continue; + if(x < y){ + p = p->left; + goto next; + } + break; + } + for(h = 0; h < IPllen; h++){ + x = la[h]; + y = p->v6.endaddress[h]; + if(x == y) + continue; + if(x > y){ + p = p->right; + goto next; + } + break; + } + q = p; + p = p->mid; +next: ; + } + + if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){ + if(q->type & Rifc) { + for(h = 0; h < IPllen; h++) + hnputl(gate+4*h, q->v6.address[h]); + ifc = findipifc(f, gate, q->type); + } else + ifc = findipifc(f, q->v6.gate, q->type); + if(ifc == nil) + return nil; + q->ifc = ifc; + q->ifcid = ifc->ifcid; + } + if(c != nil){ + c->r = q; + c->rgen = v6routegeneration; + } + + return q; +} + +void +routetype(int type, char *p) +{ + memset(p, ' ', 4); + p[4] = 0; + if(type & Rv4) + *p++ = '4'; + else + *p++ = '6'; + if(type & Rifc) + *p++ = 'i'; + if(type & Runi) + *p++ = 'u'; + else if(type & Rbcast) + *p++ = 'b'; + else if(type & Rmulti) + *p++ = 'm'; + if(type & Rptpt) + *p = 'p'; +} + +char *rformat = "%-15I %-4M %-15I %4.4s %4.4s %3s\n"; + +void +convroute(Route *r, uchar *addr, uchar *mask, uchar *gate, char *t, int *nifc) +{ + int i; + + if(r->type & Rv4){ + memmove(addr, v4prefix, IPv4off); + hnputl(addr+IPv4off, r->v4.address); + memset(mask, 0xff, IPv4off); + hnputl(mask+IPv4off, ~(r->v4.endaddress ^ r->v4.address)); + memmove(gate, v4prefix, IPv4off); + memmove(gate+IPv4off, r->v4.gate, IPv4addrlen); + } else { + for(i = 0; i < IPllen; i++){ + hnputl(addr + 4*i, r->v6.address[i]); + hnputl(mask + 4*i, ~(r->v6.endaddress[i] ^ r->v6.address[i])); + } + memmove(gate, r->v6.gate, IPaddrlen); + } + + routetype(r->type, t); + + if(r->ifc) + *nifc = r->ifc->conv->x; + else + *nifc = -1; +} + +/* + * this code is not in rr to reduce stack size + */ +static void +sprintroute(Route *r, Routewalk *rw) +{ + int nifc, n; + char t[5], *iname, ifbuf[5]; + uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen]; + char *p; + + convroute(r, addr, mask, gate, t, &nifc); + iname = "-"; + if(nifc != -1) { + iname = ifbuf; + sprint(ifbuf, "%d", nifc); + } + p = seprint(rw->p, rw->e, rformat, addr, mask, gate, t, r->tag, iname); + if(rw->o < 0){ + n = p - rw->p; + if(n > -rw->o){ + memmove(rw->p, rw->p-rw->o, n+rw->o); + rw->p = p + rw->o; + } + rw->o += n; + } else + rw->p = p; +} + +/* + * recurse descending tree, applying the function in Routewalk + */ +static int +rr(Route *r, Routewalk *rw) +{ + int h; + + if(rw->e <= rw->p) + return 0; + if(r == nil) + return 1; + + if(rr(r->left, rw) == 0) + return 0; + + if(r->type & Rv4) + h = V4H(r->v4.address); + else + h = V6H(r->v6.address); + + if(h == rw->h) + rw->walk(r, rw); + + if(rr(r->mid, rw) == 0) + return 0; + + return rr(r->right, rw); +} + +void +ipwalkroutes(Fs *f, Routewalk *rw) +{ + rlock(&routelock); + if(rw->e > rw->p) { + for(rw->h = 0; rw->h < nelem(f->v4root); rw->h++) + if(rr(f->v4root[rw->h], rw) == 0) + break; + } + if(rw->e > rw->p) { + for(rw->h = 0; rw->h < nelem(f->v6root); rw->h++) + if(rr(f->v6root[rw->h], rw) == 0) + break; + } + runlock(&routelock); +} + +long +routeread(Fs *f, char *p, ulong offset, int n) +{ + Routewalk rw; + + rw.p = p; + rw.e = p+n; + rw.o = -offset; + rw.walk = sprintroute; + + ipwalkroutes(f, &rw); + + return rw.p - p; +} + +/* + * this code is not in routeflush to reduce stack size + */ +void +delroute(Fs *f, Route *r, int dolock) +{ + uchar addr[IPaddrlen]; + uchar mask[IPaddrlen]; + uchar gate[IPaddrlen]; + char t[5]; + int nifc; + + convroute(r, addr, mask, gate, t, &nifc); + if(r->type & Rv4) + v4delroute(f, addr+IPv4off, mask+IPv4off, dolock); + else + v6delroute(f, addr, mask, dolock); +} + +/* + * recurse until one route is deleted + * returns 0 if nothing is deleted, 1 otherwise + */ +int +routeflush(Fs *f, Route *r, char *tag) +{ + if(r == nil) + return 0; + if(routeflush(f, r->mid, tag)) + return 1; + if(routeflush(f, r->left, tag)) + return 1; + if(routeflush(f, r->right, tag)) + return 1; + if((r->type & Rifc) == 0){ + if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0){ + delroute(f, r, 0); + return 1; + } + } + return 0; +} + +long +routewrite(Fs *f, Chan *c, char *p, int n) +{ + int h, changed; + char *tag; + Cmdbuf *cb; + uchar addr[IPaddrlen]; + uchar mask[IPaddrlen]; + uchar gate[IPaddrlen]; + IPaux *a, *na; + + cb = parsecmd(p, n); + if(waserror()){ + free(cb); + nexterror(); + } + + if(strcmp(cb->f[0], "flush") == 0){ + tag = cb->f[1]; + for(h = 0; h < nelem(f->v4root); h++) + for(changed = 1; changed;){ + wlock(&routelock); + changed = routeflush(f, f->v4root[h], tag); + wunlock(&routelock); + } + for(h = 0; h < nelem(f->v6root); h++) + for(changed = 1; changed;){ + wlock(&routelock); + changed = routeflush(f, f->v6root[h], tag); + wunlock(&routelock); + } + } else if(strcmp(cb->f[0], "remove") == 0){ + if(cb->nf < 3) + error(Ebadarg); + parseip(addr, cb->f[1]); + parseipmask(mask, cb->f[2]); + if(memcmp(addr, v4prefix, IPv4off) == 0) + v4delroute(f, addr+IPv4off, mask+IPv4off, 1); + else + v6delroute(f, addr, mask, 1); + } else if(strcmp(cb->f[0], "add") == 0){ + if(cb->nf < 4) + error(Ebadarg); + parseip(addr, cb->f[1]); + parseipmask(mask, cb->f[2]); + parseip(gate, cb->f[3]); + tag = "none"; + if(c != nil){ + a = c->aux; + tag = a->tag; + } + if(memcmp(addr, v4prefix, IPv4off) == 0) + v4addroute(f, tag, addr+IPv4off, mask+IPv4off, gate+IPv4off, 0); + else + v6addroute(f, tag, addr, mask, gate, 0); + } else if(strcmp(cb->f[0], "tag") == 0) { + if(cb->nf < 2) + error(Ebadarg); + + a = c->aux; + na = newipaux(a->owner, cb->f[1]); + c->aux = na; + free(a); + } + + poperror(); + free(cb); + return n; +} diff -Nru 0/sys/src/nix/ip/ipv6.c 4/sys/src/nix/ip/ipv6.c --- 0/sys/src/nix/ip/ipv6.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/ipv6.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,729 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" +#include "ipv6.h" + +enum +{ + IP4HDR = 20, /* sizeof(Ip4hdr) */ + IP6HDR = 40, /* sizeof(Ip6hdr) */ + IP_HLEN4 = 0x05, /* Header length in words */ + IP_DF = 0x4000, /* Don't fragment */ + IP_MF = 0x2000, /* More fragments */ + IP6FHDR = 8, /* sizeof(Fraghdr6) */ + IP_MAX = 32*1024, /* Maximum Internet packet size */ +}; + +#define IPV6CLASS(hdr) (((hdr)->vcf[0]&0x0F)<<2 | ((hdr)->vcf[1]&0xF0)>>2) +#define BLKIPVER(xp) (((Ip6hdr*)((xp)->rp))->vcf[0] & 0xF0) +/* + * This sleazy macro is stolen shamelessly from ip.c, see comment there. + */ +#define BKFG(xp) ((Ipfrag*)((xp)->base)) + +typedef struct IP IP; +typedef struct Fragment4 Fragment4; +typedef struct Fragment6 Fragment6; +typedef struct Ipfrag Ipfrag; + +Block* ip6reassemble(IP*, int, Block*, Ip6hdr*); +Fragment6* ipfragallo6(IP*); +void ipfragfree6(IP*, Fragment6*); +Block* procopts(Block *bp); +static Block* procxtns(IP *ip, Block *bp, int doreasm); +int unfraglen(Block *bp, uchar *nexthdr, int setfh); + +/* MIB II counters */ +enum +{ + Forwarding, + DefaultTTL, + InReceives, + InHdrErrors, + InAddrErrors, + ForwDatagrams, + InUnknownProtos, + InDiscards, + InDelivers, + OutRequests, + OutDiscards, + OutNoRoutes, + ReasmTimeout, + ReasmReqds, + ReasmOKs, + ReasmFails, + FragOKs, + FragFails, + FragCreates, + + Nstats, +}; + +static char *statnames[] = +{ +[Forwarding] "Forwarding", +[DefaultTTL] "DefaultTTL", +[InReceives] "InReceives", +[InHdrErrors] "InHdrErrors", +[InAddrErrors] "InAddrErrors", +[ForwDatagrams] "ForwDatagrams", +[InUnknownProtos] "InUnknownProtos", +[InDiscards] "InDiscards", +[InDelivers] "InDelivers", +[OutRequests] "OutRequests", +[OutDiscards] "OutDiscards", +[OutNoRoutes] "OutNoRoutes", +[ReasmTimeout] "ReasmTimeout", +[ReasmReqds] "ReasmReqds", +[ReasmOKs] "ReasmOKs", +[ReasmFails] "ReasmFails", +[FragOKs] "FragOKs", +[FragFails] "FragFails", +[FragCreates] "FragCreates", +}; + +struct Fragment4 +{ + Block* blist; + Fragment4* next; + ulong src; + ulong dst; + ushort id; + ulong age; +}; + +struct Fragment6 +{ + Block* blist; + Fragment6* next; + uchar src[IPaddrlen]; + uchar dst[IPaddrlen]; + uint id; + ulong age; +}; + +struct Ipfrag +{ + ushort foff; + ushort flen; +}; + +/* an instance of IP */ +struct IP +{ + ulong stats[Nstats]; + + QLock fraglock4; + Fragment4* flisthead4; + Fragment4* fragfree4; + Ref id4; + + QLock fraglock6; + Fragment6* flisthead6; + Fragment6* fragfree6; + Ref id6; + + int iprouting; /* true if we route like a gateway */ +}; + +int +ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c) +{ + int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff; + int morefrags, blklen, rv = 0, tentative; + uchar *gate, nexthdr; + Block *xp, *nb; + Fraghdr6 fraghdr; + IP *ip; + Ip6hdr *eh; + Ipifc *ifc; + Route *r, *sr; + + ip = f->ip; + + /* Fill out the ip header */ + eh = (Ip6hdr*)(bp->rp); + + ip->stats[OutRequests]++; + + /* Number of uchars in data and ip header to write */ + len = blocklen(bp); + + tentative = iptentative(f, eh->src); + if(tentative){ + netlog(f, Logip, "reject tx of packet with tentative src address\n"); + goto free; + } + + if(gating){ + chunk = nhgets(eh->ploadlen); + if(chunk > len){ + ip->stats[OutDiscards]++; + netlog(f, Logip, "short gated packet\n"); + goto free; + } + if(chunk + IPV6HDR_LEN < len) + len = chunk + IPV6HDR_LEN; + } + + if(len >= IP_MAX){ +// print("len > IP_MAX, free\n"); + ip->stats[OutDiscards]++; + netlog(f, Logip, "exceeded ip max size %I\n", eh->dst); + goto free; + } + + r = v6lookup(f, eh->dst, c); + if(r == nil){ +// print("no route for %I, src %I free\n", eh->dst, eh->src); + ip->stats[OutNoRoutes]++; + netlog(f, Logip, "no interface %I\n", eh->dst); + rv = -1; + goto free; + } + + ifc = r->ifc; + if(r->type & (Rifc|Runi)) + gate = eh->dst; + else if(r->type & (Rbcast|Rmulti)) { + gate = eh->dst; + sr = v6lookup(f, eh->src, nil); + if(sr && (sr->type & Runi)) + ifc = sr->ifc; + } + else + gate = r->v6.gate; + + if(!gating) + eh->vcf[0] = IP_VER6; + eh->ttl = ttl; + if(!gating) { + eh->vcf[0] |= tos >> 4; + eh->vcf[1] = tos << 4; + } + + if(!canrlock(ifc)) + goto free; + + if(waserror()){ + runlock(ifc); + nexterror(); + } + + if(ifc->medium == nil) + goto raise; + + /* If we dont need to fragment just send it */ + medialen = ifc->maxtu - ifc->medium->hsize; + if(len <= medialen) { + hnputs(eh->ploadlen, len-IPV6HDR_LEN); + ifc->medium->bwrite(ifc, bp, V6, gate); + runlock(ifc); + poperror(); + return 0; + } + + if(gating && ifc->reassemble <= 0) { + /* + * v6 intermediate nodes are not supposed to fragment pkts; + * we fragment if ifc->reassemble is turned on; an exception + * needed for nat. + */ + ip->stats[OutDiscards]++; + icmppkttoobig6(f, ifc, bp); + netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst); + goto raise; + } + + /* start v6 fragmentation */ + uflen = unfraglen(bp, &nexthdr, 1); + if(uflen > medialen) { + ip->stats[FragFails]++; + ip->stats[OutDiscards]++; + netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst); + goto raise; + } + + flen = len - uflen; + seglen = (medialen - (uflen + IP6FHDR)) & ~7; + if(seglen < 8) { + ip->stats[FragFails]++; + ip->stats[OutDiscards]++; + netlog(f, Logip, "%I: seglen < 8\n", eh->dst); + goto raise; + } + + lid = incref(&ip->id6); + fraghdr.nexthdr = nexthdr; + fraghdr.res = 0; + hnputl(fraghdr.id, lid); + + xp = bp; + offset = uflen; + while (xp && offset && offset >= BLEN(xp)) { + offset -= BLEN(xp); + xp = xp->next; + } + xp->rp += offset; + + fragoff = 0; + morefrags = 1; + + for(; fragoff < flen; fragoff += seglen) { + nb = allocb(uflen + IP6FHDR + seglen); + + if(fragoff + seglen >= flen) { + seglen = flen - fragoff; + morefrags = 0; + } + + hnputs(eh->ploadlen, seglen+IP6FHDR); + memmove(nb->wp, eh, uflen); + nb->wp += uflen; + + hnputs(fraghdr.offsetRM, fragoff); /* last 3 bits must be 0 */ + fraghdr.offsetRM[1] |= morefrags; + memmove(nb->wp, &fraghdr, IP6FHDR); + nb->wp += IP6FHDR; + + /* Copy data */ + chunk = seglen; + while (chunk) { + if(!xp) { + ip->stats[OutDiscards]++; + ip->stats[FragFails]++; + freeblist(nb); + netlog(f, Logip, "!xp: chunk in v6%d\n", chunk); + goto raise; + } + blklen = chunk; + if(BLEN(xp) < chunk) + blklen = BLEN(xp); + memmove(nb->wp, xp->rp, blklen); + + nb->wp += blklen; + xp->rp += blklen; + chunk -= blklen; + if(xp->rp == xp->wp) + xp = xp->next; + } + + ifc->medium->bwrite(ifc, nb, V6, gate); + ip->stats[FragCreates]++; + } + ip->stats[FragOKs]++; + +raise: + runlock(ifc); + poperror(); +free: + freeblist(bp); + return rv; +} + +void +ipiput6(Fs *f, Ipifc *ifc, Block *bp) +{ + int hl, hop, tos, notforme, tentative; + uchar proto; + uchar v6dst[IPaddrlen]; + IP *ip; + Ip6hdr *h; + Proto *p; + Route *r, *sr; + + ip = f->ip; + ip->stats[InReceives]++; + + /* + * Ensure we have all the header info in the first + * block. Make life easier for other protocols by + * collecting up to the first 64 bytes in the first block. + */ + if(BLEN(bp) < 64) { + hl = blocklen(bp); + if(hl < IP6HDR) + hl = IP6HDR; + if(hl > 64) + hl = 64; + bp = pullupblock(bp, hl); + if(bp == nil) + return; + } + + h = (Ip6hdr *)bp->rp; + + memmove(&v6dst[0], &h->dst[0], IPaddrlen); + notforme = ipforme(f, v6dst) == 0; + tentative = iptentative(f, v6dst); + + if(tentative && h->proto != ICMPv6) { + print("tentative addr, drop\n"); + freeblist(bp); + return; + } + + /* Check header version */ + if(BLKIPVER(bp) != IP_VER6) { + ip->stats[InHdrErrors]++; + netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2); + freeblist(bp); + return; + } + + /* route */ + if(notforme) { + if(!ip->iprouting){ + freeb(bp); + return; + } + + /* don't forward to link-local destinations */ + if(islinklocal(h->dst) || + (isv6mcast(h->dst) && (h->dst[1]&0xF) <= Link_local_scop)){ + ip->stats[OutDiscards]++; + freeblist(bp); + return; + } + + /* don't forward to source's network */ + sr = v6lookup(f, h->src, nil); + r = v6lookup(f, h->dst, nil); + + if(r == nil || sr == r){ + ip->stats[OutDiscards]++; + freeblist(bp); + return; + } + + /* don't forward if packet has timed out */ + hop = h->ttl; + if(hop < 1) { + ip->stats[InHdrErrors]++; + icmpttlexceeded6(f, ifc, bp); + freeblist(bp); + return; + } + + /* process headers & reassemble if the interface expects it */ + bp = procxtns(ip, bp, r->ifc->reassemble); + if(bp == nil) + return; + + ip->stats[ForwDatagrams]++; + h = (Ip6hdr *)bp->rp; + tos = IPV6CLASS(h); + hop = h->ttl; + ipoput6(f, bp, 1, hop-1, tos, nil); + return; + } + + /* reassemble & process headers if needed */ + bp = procxtns(ip, bp, 1); + if(bp == nil) + return; + + h = (Ip6hdr *) (bp->rp); + proto = h->proto; + p = Fsrcvpcol(f, proto); + if(p && p->rcv) { + ip->stats[InDelivers]++; + (*p->rcv)(p, ifc, bp); + return; + } + + ip->stats[InDiscards]++; + ip->stats[InUnknownProtos]++; + freeblist(bp); +} + +/* + * ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6 + */ +void +ipfragfree6(IP *ip, Fragment6 *frag) +{ + Fragment6 *fl, **l; + + if(frag->blist) + freeblist(frag->blist); + + memset(frag->src, 0, IPaddrlen); + frag->id = 0; + frag->blist = nil; + + l = &ip->flisthead6; + for(fl = *l; fl; fl = fl->next) { + if(fl == frag) { + *l = frag->next; + break; + } + l = &fl->next; + } + + frag->next = ip->fragfree6; + ip->fragfree6 = frag; +} + +/* + * ipfragallo6 - copied from ipfragalloc4 + */ +Fragment6* +ipfragallo6(IP *ip) +{ + Fragment6 *f; + + while(ip->fragfree6 == nil) { + /* free last entry on fraglist */ + for(f = ip->flisthead6; f->next; f = f->next) + ; + ipfragfree6(ip, f); + } + f = ip->fragfree6; + ip->fragfree6 = f->next; + f->next = ip->flisthead6; + ip->flisthead6 = f; + f->age = NOW + 30000; + + return f; +} + +static Block* +procxtns(IP *ip, Block *bp, int doreasm) +{ + int offset; + uchar proto; + Ip6hdr *h; + + h = (Ip6hdr *)bp->rp; + offset = unfraglen(bp, &proto, 0); + + if(proto == FH && doreasm != 0) { + bp = ip6reassemble(ip, offset, bp, h); + if(bp == nil) + return nil; + offset = unfraglen(bp, &proto, 0); + } + + if(proto == DOH || offset > IP6HDR) + bp = procopts(bp); + return bp; +} + +/* + * returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr, + * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value + * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr + * field of the last header in the "Unfragmentable part" is set to FH. + */ +int +unfraglen(Block *bp, uchar *nexthdr, int setfh) +{ + uchar *p, *q; + int ufl, hs; + + p = bp->rp; + q = p+6; /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */ + *nexthdr = *q; + ufl = IP6HDR; + p += ufl; + + for(;;) { + if(*nexthdr == HBH || *nexthdr == RH) { + *nexthdr = *p; + hs = ((int)*(p+1) + 1) * 8; + ufl += hs; + q = p; + p += hs; + } + else + break; + } + + if(*nexthdr == FH) + *q = *p; + if(setfh) + *q = FH; + return ufl; +} + +Block* +procopts(Block *bp) +{ + return bp; +} + +Block* +ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih) +{ + int fend, offset, ovlap, len, fragsize, pktposn; + uint id; + uchar src[IPaddrlen], dst[IPaddrlen]; + Block *bl, **l, *last, *prev; + Fraghdr6 *fraghdr; + Fragment6 *f, *fnext; + + fraghdr = (Fraghdr6 *)(bp->rp + uflen); + memmove(src, ih->src, IPaddrlen); + memmove(dst, ih->dst, IPaddrlen); + id = nhgetl(fraghdr->id); + offset = nhgets(fraghdr->offsetRM) & ~7; + + /* + * block lists are too hard, pullupblock into a single block + */ + if(bp->next){ + bp = pullupblock(bp, blocklen(bp)); + ih = (Ip6hdr *)bp->rp; + } + + qlock(&ip->fraglock6); + + /* + * find a reassembly queue for this fragment + */ + for(f = ip->flisthead6; f; f = fnext){ + fnext = f->next; + if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id) + break; + if(f->age < NOW){ + ip->stats[ReasmTimeout]++; + ipfragfree6(ip, f); + } + } + + /* + * if this isn't a fragmented packet, accept it + * and get rid of any fragments that might go + * with it. + */ + if(nhgets(fraghdr->offsetRM) == 0) { /* 1st frag is also last */ + if(f) { + ipfragfree6(ip, f); + ip->stats[ReasmFails]++; + } + qunlock(&ip->fraglock6); + return bp; + } + + if(bp->base+sizeof(Ipfrag) >= bp->rp){ + bp = padblock(bp, sizeof(Ipfrag)); + bp->rp += sizeof(Ipfrag); + } + + BKFG(bp)->foff = offset; + BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR; + + /* First fragment allocates a reassembly queue */ + if(f == nil) { + f = ipfragallo6(ip); + f->id = id; + memmove(f->src, src, IPaddrlen); + memmove(f->dst, dst, IPaddrlen); + + f->blist = bp; + + qunlock(&ip->fraglock6); + ip->stats[ReasmReqds]++; + return nil; + } + + /* + * find the new fragment's position in the queue + */ + prev = nil; + l = &f->blist; + bl = f->blist; + while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) { + prev = bl; + l = &bl->next; + bl = bl->next; + } + + /* Check overlap of a previous fragment - trim away as necessary */ + if(prev) { + ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff; + if(ovlap > 0) { + if(ovlap >= BKFG(bp)->flen) { + freeblist(bp); + qunlock(&ip->fraglock6); + return nil; + } + BKFG(prev)->flen -= ovlap; + } + } + + /* Link onto assembly queue */ + bp->next = *l; + *l = bp; + + /* Check to see if succeeding segments overlap */ + if(bp->next) { + l = &bp->next; + fend = BKFG(bp)->foff + BKFG(bp)->flen; + + /* Take completely covered segments out */ + while(*l) { + ovlap = fend - BKFG(*l)->foff; + if(ovlap <= 0) + break; + if(ovlap < BKFG(*l)->flen) { + BKFG(*l)->flen -= ovlap; + BKFG(*l)->foff += ovlap; + /* move up ih hdrs */ + memmove((*l)->rp + ovlap, (*l)->rp, uflen); + (*l)->rp += ovlap; + break; + } + last = (*l)->next; + (*l)->next = nil; + freeblist(*l); + *l = last; + } + } + + /* + * look for a complete packet. if we get to a fragment + * with the trailing bit of fraghdr->offsetRM[1] set, we're done. + */ + pktposn = 0; + for(bl = f->blist; bl && BKFG(bl)->foff == pktposn; bl = bl->next) { + fraghdr = (Fraghdr6 *)(bl->rp + uflen); + if((fraghdr->offsetRM[1] & 1) == 0) { + bl = f->blist; + + /* get rid of frag header in first fragment */ + memmove(bl->rp + IP6FHDR, bl->rp, uflen); + bl->rp += IP6FHDR; + len = nhgets(((Ip6hdr*)bl->rp)->ploadlen) - IP6FHDR; + bl->wp = bl->rp + len + IP6HDR; + /* + * Pullup all the fragment headers and + * return a complete packet + */ + for(bl = bl->next; bl; bl = bl->next) { + fragsize = BKFG(bl)->flen; + len += fragsize; + bl->rp += uflen + IP6FHDR; + bl->wp = bl->rp + fragsize; + } + + bl = f->blist; + f->blist = nil; + ipfragfree6(ip, f); + ih = (Ip6hdr*)bl->rp; + hnputs(ih->ploadlen, len); + qunlock(&ip->fraglock6); + ip->stats[ReasmOKs]++; + return bl; + } + pktposn += BKFG(bl)->flen; + } + qunlock(&ip->fraglock6); + return nil; +} diff -Nru 0/sys/src/nix/ip/ipv6.h 4/sys/src/nix/ip/ipv6.h --- 0/sys/src/nix/ip/ipv6.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/ipv6.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,185 @@ +/* + * Internet Protocol Version 6 + * + * rfc2460 defines the protocol. + * rfc4291 defines the address prefices. + * + * global unicast is anything but unspecified (::), loopback (::1), + * multicast (ff00::/8), and link-local unicast (fe80::/10). + * + * site-local (fec0::/10) is now deprecated by rfc3879. + * + * Unique Local IPv6 Unicast Addresses are defined by rfc4193. + * prefix is fc00::/7, scope is global, routing is limited to roughly a site. + */ +#define isv6mcast(addr) ((addr)[0] == 0xff) +#define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80) + +#define optexsts(np) (nhgets((np)->ploadlen) > 24) +#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0) + +#ifndef MIN +#define MIN(a, b) ((a) <= (b)? (a): (b)) +#endif + +enum { /* Header Types */ + HBH = 0, /* hop-by-hop multicast routing protocol */ + ICMP = 1, + IGMP = 2, + GGP = 3, + IPINIP = 4, + ST = 5, + TCP = 6, + UDP = 17, + ISO_TP4 = 29, + RH = 43, + FH = 44, + IDRP = 45, + RSVP = 46, + AH = 51, + ESP = 52, + ICMPv6 = 58, + NNH = 59, + DOH = 60, + ISO_IP = 80, + IGRP = 88, + OSPF = 89, + + Maxhdrtype = 256, +}; + +enum { + /* multicast flags and scopes */ + +// Well_known_flg = 0, +// Transient_flg = 1, + +// Interface_local_scop = 1, + Link_local_scop = 2, +// Site_local_scop = 5, +// Org_local_scop = 8, + Global_scop = 14, + + /* various prefix lengths */ + SOLN_PREF_LEN = 13, + + /* icmpv6 unreach codes */ + icmp6_no_route = 0, + icmp6_ad_prohib = 1, + icmp6_unassigned = 2, + icmp6_adr_unreach = 3, + icmp6_port_unreach = 4, + icmp6_unkn_code = 5, + + /* various flags & constants */ + v6MINTU = 1280, + HOP_LIMIT = 255, + ETHERHDR_LEN = 14, + IPV6HDR_LEN = 40, + IPV4HDR_LEN = 20, + + /* option types */ + + SRC_LLADDR = 1, + TARGET_LLADDR = 2, + PREFIX_INFO = 3, + REDIR_HEADER = 4, + MTU_OPTION = 5, + + SRC_UNSPEC = 0, + SRC_UNI = 1, + TARG_UNI = 2, + TARG_MULTI = 3, + + Tunitent = 1, + Tuniproxy = 2, + Tunirany = 3, + + /* Router constants (all times in milliseconds) */ + MAX_INIT_RTR_ADVERT_INTVL = 16000, + MAX_INIT_RTR_ADVERTS = 3, + MAX_FINAL_RTR_ADVERTS = 3, + MIN_DELAY_BETWEEN_RAS = 3000, + MAX_RA_DELAY_TIME = 500, + + /* Host constants */ + MAX_RTR_SOLICIT_DELAY = 1000, + RTR_SOLICIT_INTVL = 4000, + MAX_RTR_SOLICITS = 3, + + /* Node constants */ + MAX_MULTICAST_SOLICIT = 3, + MAX_UNICAST_SOLICIT = 3, + MAX_ANYCAST_DELAY_TIME = 1000, + MAX_NEIGHBOR_ADVERT = 3, + REACHABLE_TIME = 30000, + RETRANS_TIMER = 1000, + DELAY_FIRST_PROBE_TIME = 5000, +}; + +typedef struct Ip6hdr Ip6hdr; +typedef struct Opthdr Opthdr; +typedef struct Routinghdr Routinghdr; +typedef struct Fraghdr6 Fraghdr6; + +struct Ip6hdr { + uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */ + uchar ploadlen[2]; /* payload length: packet length - 40 */ + uchar proto; /* next header type */ + uchar ttl; /* hop limit */ + uchar src[IPaddrlen]; + uchar dst[IPaddrlen]; +}; + +struct Opthdr { + uchar nexthdr; + uchar len; +}; + +struct Routinghdr { + uchar nexthdr; + uchar len; + uchar rtetype; + uchar segrem; +}; + +struct Fraghdr6 { + uchar nexthdr; + uchar res; + uchar offsetRM[2]; /* Offset, Res, M flag */ + uchar id[4]; +}; + +extern uchar v6allnodesN[IPaddrlen]; +extern uchar v6allnodesL[IPaddrlen]; +extern uchar v6allroutersN[IPaddrlen]; +extern uchar v6allroutersL[IPaddrlen]; +extern uchar v6allnodesNmask[IPaddrlen]; +extern uchar v6allnodesLmask[IPaddrlen]; +extern uchar v6allroutersS[IPaddrlen]; +extern uchar v6solicitednode[IPaddrlen]; +extern uchar v6solicitednodemask[IPaddrlen]; +extern uchar v6Unspecified[IPaddrlen]; +extern uchar v6loopback[IPaddrlen]; +extern uchar v6loopbackmask[IPaddrlen]; +extern uchar v6linklocal[IPaddrlen]; +extern uchar v6linklocalmask[IPaddrlen]; +extern uchar v6glunicast[IPaddrlen]; +extern uchar v6multicast[IPaddrlen]; +extern uchar v6multicastmask[IPaddrlen]; + +extern int v6llpreflen; +extern int v6lbpreflen; +extern int v6mcpreflen; +extern int v6snpreflen; +extern int v6aNpreflen; +extern int v6aLpreflen; + +extern int ReTransTimer; + +void ipv62smcast(uchar *, uchar *); +void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac); +void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags); +void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp); +void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp); +void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free); diff -Nru 0/sys/src/nix/ip/loopbackmedium.c 4/sys/src/nix/ip/loopbackmedium.c --- 0/sys/src/nix/ip/loopbackmedium.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/loopbackmedium.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,120 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +enum +{ + Maxtu= 16*1024, +}; + +typedef struct LB LB; +struct LB +{ + Proc *readp; + Queue *q; + Fs *f; +}; + +static void loopbackread(void *a); + +static void +loopbackbind(Ipifc *ifc, int, char**) +{ + LB *lb; + + lb = smalloc(sizeof(*lb)); + lb->f = ifc->conv->p->f; + lb->q = qopen(1024*1024, Qmsg, nil, nil); + ifc->arg = lb; + ifc->mbps = 10001; + + kproc("loopbackread", loopbackread, ifc); + +} + +static void +loopbackunbind(Ipifc *ifc) +{ + LB *lb = ifc->arg; + + if(lb->readp) + postnote(lb->readp, 1, "unbind", NUser); + + /* wait for reader to die */ + while(lb->readp != 0) + tsleep(&up->sleep, return0, 0, 300); + + /* clean up */ + qfree(lb->q); + free(lb); +} + +static void +loopbackbwrite(Ipifc *ifc, Block *bp, int, uchar*) +{ + LB *lb; + + lb = ifc->arg; + if(qpass(lb->q, bp) < 0) + ifc->outerr++; + ifc->out++; +} + +static void +loopbackread(void *a) +{ + Ipifc *ifc; + Block *bp; + LB *lb; + + ifc = a; + lb = ifc->arg; + lb->readp = up; /* hide identity under a rock for unbind */ + if(waserror()){ + lb->readp = 0; + pexit("hangup", 1); + } + for(;;){ + bp = qbread(lb->q, Maxtu); + if(bp == nil) + continue; + ifc->in++; + if(!canrlock(ifc)){ + freeb(bp); + continue; + } + if(waserror()){ + runlock(ifc); + nexterror(); + } + if(ifc->lifc == nil) + freeb(bp); + else + ipiput4(lb->f, ifc, bp); + runlock(ifc); + poperror(); + } +} + +Medium loopbackmedium = +{ +.hsize= 0, +.mintu= 0, +.maxtu= Maxtu, +.maclen= 0, +.name= "loopback", +.bind= loopbackbind, +.unbind= loopbackunbind, +.bwrite= loopbackbwrite, +}; + +void +loopbackmediumlink(void) +{ + addipmedium(&loopbackmedium); +} diff -Nru 0/sys/src/nix/ip/netdevmedium.c 4/sys/src/nix/ip/netdevmedium.c --- 0/sys/src/nix/ip/netdevmedium.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/netdevmedium.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,153 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +static void netdevbind(Ipifc *ifc, int argc, char **argv); +static void netdevunbind(Ipifc *ifc); +static void netdevbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip); +static void netdevread(void *a); + +typedef struct Netdevrock Netdevrock; +struct Netdevrock +{ + Fs *f; /* file system we belong to */ + Proc *readp; /* reading process */ + Chan *mchan; /* Data channel */ +}; + +Medium netdevmedium = +{ +.name= "netdev", +.hsize= 0, +.mintu= 0, +.maxtu= 64000, +.maclen= 0, +.bind= netdevbind, +.unbind= netdevunbind, +.bwrite= netdevbwrite, +.unbindonclose= 0, +}; + +/* + * called to bind an IP ifc to a generic network device + * called with ifc qlock'd + */ +static void +netdevbind(Ipifc *ifc, int argc, char **argv) +{ + Chan *mchan; + Netdevrock *er; + + if(argc < 2) + error(Ebadarg); + + mchan = namec(argv[2], Aopen, ORDWR, 0); + + er = smalloc(sizeof(*er)); + er->mchan = mchan; + er->f = ifc->conv->p->f; + + ifc->arg = er; + + kproc("netdevread", netdevread, ifc); +} + +/* + * called with ifc wlock'd + */ +static void +netdevunbind(Ipifc *ifc) +{ + Netdevrock *er = ifc->arg; + + if(er->readp != nil) + postnote(er->readp, 1, "unbind", NUser); + + /* wait for readers to die */ + while(er->readp != nil) + tsleep(&up->sleep, return0, 0, 300); + + if(er->mchan != nil) + cclose(er->mchan); + + free(er); +} + +/* + * called by ipoput with a single block to write + */ +static void +netdevbwrite(Ipifc *ifc, Block *bp, int, uchar*) +{ + Netdevrock *er = ifc->arg; + + if(bp->next) + bp = concatblock(bp); + if(BLEN(bp) < ifc->mintu) + bp = adjustblock(bp, ifc->mintu); + + er->mchan->dev->bwrite(er->mchan, bp, 0); + ifc->out++; +} + +/* + * process to read from the device + */ +static void +netdevread(void *a) +{ + Ipifc *ifc; + Block *bp; + Netdevrock *er; + char *argv[1]; + + ifc = a; + er = ifc->arg; + er->readp = up; /* hide identity under a rock for unbind */ + if(waserror()){ + er->readp = nil; + pexit("hangup", 1); + } + for(;;){ + bp = er->mchan->dev->bread(er->mchan, ifc->maxtu, 0); + if(bp == nil){ + /* + * get here if mchan is a pipe and other side hangs up + * clean up this interface & get out +ZZZ is this a good idea? + */ + poperror(); + er->readp = nil; + argv[0] = "unbind"; + if(!waserror()) + ifc->conv->p->ctl(ifc->conv, argv, 1); + pexit("hangup", 1); + } + if(!canrlock(ifc)){ + freeb(bp); + continue; + } + if(waserror()){ + runlock(ifc); + nexterror(); + } + ifc->in++; + if(ifc->lifc == nil) + freeb(bp); + else + ipiput4(er->f, ifc, bp); + runlock(ifc); + poperror(); + } +} + +void +netdevmediumlink(void) +{ + addipmedium(&netdevmedium); +} diff -Nru 0/sys/src/nix/ip/netlog.c 4/sys/src/nix/ip/netlog.c --- 0/sys/src/nix/ip/netlog.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/netlog.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,266 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "../ip/ip.h" + +enum { + Nlog = 16*1024, +}; + +/* + * action log + */ +struct Netlog { + Lock; + int opens; + char* buf; + char *end; + char *rptr; + int len; + + int logmask; /* mask of things to debug */ + uchar iponly[IPaddrlen]; /* ip address to print debugging for */ + int iponlyset; + + QLock; + Rendez; +}; + +typedef struct Netlogflag { + char* name; + int mask; +} Netlogflag; + +static Netlogflag flags[] = +{ + { "ppp", Logppp, }, + { "ip", Logip, }, + { "fs", Logfs, }, + { "tcp", Logtcp, }, + { "il", Logil, }, + { "icmp", Logicmp, }, + { "udp", Logudp, }, + { "compress", Logcompress, }, + { "ilmsg", Logil|Logilmsg, }, + { "gre", Loggre, }, + { "tcpwin", Logtcp|Logtcpwin, }, + { "tcprxmt", Logtcp|Logtcprxmt, }, + { "udpmsg", Logudp|Logudpmsg, }, + { "ipmsg", Logip|Logipmsg, }, + { "esp", Logesp, }, + { nil, 0, }, +}; + +char Ebadnetctl[] = "too few arguments for netlog control message"; + +enum +{ + CMset, + CMclear, + CMonly, +}; + +static +Cmdtab routecmd[] = { + CMset, "set", 0, + CMclear, "clear", 0, + CMonly, "only", 0, +}; + +void +netloginit(Fs *f) +{ + f->alog = smalloc(sizeof(Netlog)); +} + +void +netlogopen(Fs *f) +{ + lock(f->alog); + if(waserror()){ + unlock(f->alog); + nexterror(); + } + if(f->alog->opens == 0){ + if(f->alog->buf == nil) + f->alog->buf = malloc(Nlog); + if(f->alog->buf == nil) + error(Enomem); + f->alog->rptr = f->alog->buf; + f->alog->end = f->alog->buf + Nlog; + } + f->alog->opens++; + unlock(f->alog); + poperror(); +} + +void +netlogclose(Fs *f) +{ + lock(f->alog); + if(waserror()){ + unlock(f->alog); + nexterror(); + } + f->alog->opens--; + if(f->alog->opens == 0){ + free(f->alog->buf); + f->alog->buf = nil; + } + unlock(f->alog); + poperror(); +} + +static int +netlogready(void *a) +{ + Fs *f = a; + + return f->alog->len; +} + +long +netlogread(Fs *f, void *a, ulong, long n) +{ + int i, d; + char *p, *rptr; + + qlock(f->alog); + if(waserror()){ + qunlock(f->alog); + nexterror(); + } + + for(;;){ + lock(f->alog); + if(f->alog->len){ + if(n > f->alog->len) + n = f->alog->len; + d = 0; + rptr = f->alog->rptr; + f->alog->rptr += n; + if(f->alog->rptr >= f->alog->end){ + d = f->alog->rptr - f->alog->end; + f->alog->rptr = f->alog->buf + d; + } + f->alog->len -= n; + unlock(f->alog); + + i = n-d; + p = a; + memmove(p, rptr, i); + memmove(p+i, f->alog->buf, d); + break; + } + else + unlock(f->alog); + + sleep(f->alog, netlogready, f); + } + + qunlock(f->alog); + poperror(); + + return n; +} + +void +netlogctl(Fs *f, char* s, int n) +{ + int i, set; + Netlogflag *fp; + Cmdbuf *cb; + Cmdtab *ct; + + cb = parsecmd(s, n); + if(waserror()){ + free(cb); + nexterror(); + } + + if(cb->nf < 2) + error(Ebadnetctl); + + ct = lookupcmd(cb, routecmd, nelem(routecmd)); + + SET(set); + + switch(ct->index){ + case CMset: + set = 1; + break; + + case CMclear: + set = 0; + break; + + case CMonly: + parseip(f->alog->iponly, cb->f[1]); + if(ipcmp(f->alog->iponly, IPnoaddr) == 0) + f->alog->iponlyset = 0; + else + f->alog->iponlyset = 1; + free(cb); + poperror(); + return; + + default: + cmderror(cb, "unknown ip control message"); + } + + for(i = 1; i < cb->nf; i++){ + for(fp = flags; fp->name; fp++) + if(strcmp(fp->name, cb->f[i]) == 0) + break; + if(fp->name == nil) + continue; + if(set) + f->alog->logmask |= fp->mask; + else + f->alog->logmask &= ~fp->mask; + } + + free(cb); + poperror(); +} + +void +netlog(Fs *f, int mask, char *fmt, ...) +{ + char buf[256], *t, *fp; + int i, n; + va_list arg; + + if(!(f->alog->logmask & mask)) + return; + + if(f->alog->opens == 0) + return; + + va_start(arg, fmt); + n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf; + va_end(arg); + + lock(f->alog); + i = f->alog->len + n - Nlog; + if(i > 0){ + f->alog->len -= i; + f->alog->rptr += i; + if(f->alog->rptr >= f->alog->end) + f->alog->rptr = f->alog->buf + (f->alog->rptr - f->alog->end); + } + t = f->alog->rptr + f->alog->len; + fp = buf; + f->alog->len += n; + while(n-- > 0){ + if(t >= f->alog->end) + t = f->alog->buf + (t - f->alog->end); + *t++ = *fp++; + } + unlock(f->alog); + + wakeup(f->alog); +} diff -Nru 0/sys/src/nix/ip/nullmedium.c 4/sys/src/nix/ip/nullmedium.c --- 0/sys/src/nix/ip/nullmedium.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/nullmedium.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,39 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +static void +nullbind(Ipifc*, int, char**) +{ + error("cannot bind null device"); +} + +static void +nullunbind(Ipifc*) +{ +} + +static void +nullbwrite(Ipifc*, Block*, int, uchar*) +{ + error("nullbwrite"); +} + +Medium nullmedium = +{ +.name= "null", +.bind= nullbind, +.unbind= nullunbind, +.bwrite= nullbwrite, +}; + +void +nullmediumlink(void) +{ + addipmedium(&nullmedium); +} diff -Nru 0/sys/src/nix/ip/pktmedium.c 4/sys/src/nix/ip/pktmedium.c --- 0/sys/src/nix/ip/pktmedium.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/pktmedium.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,79 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + + +static void pktbind(Ipifc*, int, char**); +static void pktunbind(Ipifc*); +static void pktbwrite(Ipifc*, Block*, int, uchar*); +static void pktin(Fs*, Ipifc*, Block*); + +Medium pktmedium = +{ +.name= "pkt", +.hsize= 14, +.mintu= 40, +.maxtu= 4*1024, +.maclen= 6, +.bind= pktbind, +.unbind= pktunbind, +.bwrite= pktbwrite, +.pktin= pktin, +.unbindonclose= 1, +}; + +/* + * called to bind an IP ifc to an ethernet device + * called with ifc wlock'd + */ +static void +pktbind(Ipifc*, int, char**) +{ +} + +/* + * called with ifc wlock'd + */ +static void +pktunbind(Ipifc*) +{ +} + +/* + * called by ipoput with a single packet to write + */ +static void +pktbwrite(Ipifc *ifc, Block *bp, int, uchar*) +{ + /* enqueue onto the conversation's rq */ + bp = concatblock(bp); + if(ifc->conv->snoopers.ref > 0) + qpass(ifc->conv->sq, copyblock(bp, BLEN(bp))); + qpass(ifc->conv->rq, bp); +} + +/* + * called with ifc rlocked when someone write's to 'data' + */ +static void +pktin(Fs *f, Ipifc *ifc, Block *bp) +{ + if(ifc->lifc == nil) + freeb(bp); + else { + if(ifc->conv->snoopers.ref > 0) + qpass(ifc->conv->sq, copyblock(bp, BLEN(bp))); + ipiput4(f, ifc, bp); + } +} + +void +pktmediumlink(void) +{ + addipmedium(&pktmedium); +} diff -Nru 0/sys/src/nix/ip/ptclbsum.c 4/sys/src/nix/ip/ptclbsum.c --- 0/sys/src/nix/ip/ptclbsum.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/ptclbsum.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,72 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "ip.h" + +static short endian = 1; +static uchar* aendian = (uchar*)&endian; +#define LITTLE *aendian + +ushort +ptclbsum(uchar *addr, int len) +{ + ulong losum, hisum, mdsum, x; + ulong t1, t2; + + losum = 0; + hisum = 0; + mdsum = 0; + + x = 0; + if(PTR2UINT(addr) & 1) { + if(len) { + hisum += addr[0]; + len--; + addr++; + } + x = 1; + } + while(len >= 16) { + t1 = *(ushort*)(addr+0); + t2 = *(ushort*)(addr+2); mdsum += t1; + t1 = *(ushort*)(addr+4); mdsum += t2; + t2 = *(ushort*)(addr+6); mdsum += t1; + t1 = *(ushort*)(addr+8); mdsum += t2; + t2 = *(ushort*)(addr+10); mdsum += t1; + t1 = *(ushort*)(addr+12); mdsum += t2; + t2 = *(ushort*)(addr+14); mdsum += t1; + mdsum += t2; + len -= 16; + addr += 16; + } + while(len >= 2) { + mdsum += *(ushort*)addr; + len -= 2; + addr += 2; + } + if(x) { + if(len) + losum += addr[0]; + if(LITTLE) + losum += mdsum; + else + hisum += mdsum; + } else { + if(len) + hisum += addr[0]; + if(LITTLE) + hisum += mdsum; + else + losum += mdsum; + } + + losum += hisum >> 8; + losum += (hisum & 0xff) << 8; + while(hisum = losum>>16) + losum = hisum + (losum & 0xffff); + + return losum & 0xffff; +} diff -Nru 0/sys/src/nix/ip/tcp.c 4/sys/src/nix/ip/tcp.c --- 0/sys/src/nix/ip/tcp.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/tcp.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,3285 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" + +enum +{ + QMAX = 64*1024-1, + IP_TCPPROTO = 6, + + TCP4_IPLEN = 8, + TCP4_PHDRSIZE = 12, + TCP4_HDRSIZE = 20, + TCP4_TCBPHDRSZ = 40, + TCP4_PKT = TCP4_IPLEN+TCP4_PHDRSIZE, + + TCP6_IPLEN = 0, + TCP6_PHDRSIZE = 40, + TCP6_HDRSIZE = 20, + TCP6_TCBPHDRSZ = 60, + TCP6_PKT = TCP6_IPLEN+TCP6_PHDRSIZE, + + TcptimerOFF = 0, + TcptimerON = 1, + TcptimerDONE = 2, + MAX_TIME = (1<<20), /* Forever */ + TCP_ACK = 50, /* Timed ack sequence in ms */ + MAXBACKMS = 9*60*1000, /* longest backoff time (ms) before hangup */ + + URG = 0x20, /* Data marked urgent */ + ACK = 0x10, /* Acknowledge is valid */ + PSH = 0x08, /* Whole data pipe is pushed */ + RST = 0x04, /* Reset connection */ + SYN = 0x02, /* Pkt. is synchronise */ + FIN = 0x01, /* Start close down */ + + EOLOPT = 0, + NOOPOPT = 1, + MSSOPT = 2, + MSS_LENGTH = 4, /* Maximum segment size */ + WSOPT = 3, + WS_LENGTH = 3, /* Bits to scale window size by */ + MSL2 = 10, + MSPTICK = 50, /* Milliseconds per timer tick */ + DEF_MSS = 1460, /* Default maximum segment */ + DEF_MSS6 = 1280, /* Default maximum segment (min) for v6 */ + DEF_RTT = 500, /* Default round trip */ + DEF_KAT = 120000, /* Default time (ms) between keep alives */ + TCP_LISTEN = 0, /* Listen connection */ + TCP_CONNECT = 1, /* Outgoing connection */ + SYNACK_RXTIMER = 250, /* ms between SYNACK retransmits */ + + TCPREXMTTHRESH = 3, /* dupack threshhold for rxt */ + + FORCE = 1, + CLONE = 2, + RETRAN = 4, + ACTIVE = 8, + SYNACK = 16, + + LOGAGAIN = 3, + LOGDGAIN = 2, + + Closed = 0, /* Connection states */ + Listen, + Syn_sent, + Syn_received, + Established, + Finwait1, + Finwait2, + Close_wait, + Closing, + Last_ack, + Time_wait, + + Maxlimbo = 1000, /* maximum procs waiting for response to SYN ACK */ + NLHT = 256, /* hash table size, must be a power of 2 */ + LHTMASK = NLHT-1, + HaveWS = 1<<8, +}; + +/* Must correspond to the enumeration above */ +char *tcpstates[] = +{ + "Closed", "Listen", "Syn_sent", "Syn_received", + "Established", "Finwait1", "Finwait2", "Close_wait", + "Closing", "Last_ack", "Time_wait" +}; + +typedef struct Tcptimer Tcptimer; +struct Tcptimer +{ + Tcptimer *next; + Tcptimer *prev; + Tcptimer *readynext; + int state; + int start; + int count; + void (*func)(void*); + void *arg; +}; + +/* + * v4 and v6 pseudo headers used for + * checksuming tcp + */ +typedef struct Tcp4hdr Tcp4hdr; +struct Tcp4hdr +{ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + uchar Unused; + uchar proto; + uchar tcplen[2]; + uchar tcpsrc[4]; + uchar tcpdst[4]; + uchar tcpsport[2]; + uchar tcpdport[2]; + uchar tcpseq[4]; + uchar tcpack[4]; + uchar tcpflag[2]; + uchar tcpwin[2]; + uchar tcpcksum[2]; + uchar tcpurg[2]; + /* Options segment */ + uchar tcpopt[1]; +}; + +typedef struct Tcp6hdr Tcp6hdr; +struct Tcp6hdr +{ + uchar vcf[4]; + uchar ploadlen[2]; + uchar proto; + uchar ttl; + uchar tcpsrc[IPaddrlen]; + uchar tcpdst[IPaddrlen]; + uchar tcpsport[2]; + uchar tcpdport[2]; + uchar tcpseq[4]; + uchar tcpack[4]; + uchar tcpflag[2]; + uchar tcpwin[2]; + uchar tcpcksum[2]; + uchar tcpurg[2]; + /* Options segment */ + uchar tcpopt[1]; +}; + +/* + * this represents the control info + * for a single packet. It is derived from + * a packet in ntohtcp{4,6}() and stuck into + * a packet in htontcp{4,6}(). + */ +typedef struct Tcp Tcp; +struct Tcp +{ + ushort source; + ushort dest; + ulong seq; + ulong ack; + uchar flags; + ushort ws; /* window scale option */ + ulong wnd; + ushort urg; + ushort mss; /* max segment size option (if not zero) */ + ushort len; /* size of data */ +}; + +/* + * this header is malloc'd to thread together fragments + * waiting to be coalesced + */ +typedef struct Reseq Reseq; +struct Reseq +{ + Reseq *next; + Tcp seg; + Block *bp; + ushort length; +}; + +/* + * the qlock in the Conv locks this structure + */ +typedef struct Tcpctl Tcpctl; +struct Tcpctl +{ + uchar state; /* Connection state */ + uchar type; /* Listening or active connection */ + uchar code; /* Icmp code */ + struct { + ulong una; /* Unacked data pointer */ + ulong nxt; /* Next sequence expected */ + ulong ptr; /* Data pointer */ + ulong wnd; /* Tcp send window */ + ulong urg; /* Urgent data pointer */ + ulong wl2; + uint scale; /* how much to right shift window in xmitted packets */ + /* to implement tahoe and reno TCP */ + ulong dupacks; /* number of duplicate acks rcvd */ + int recovery; /* loss recovery flag */ + ulong rxt; /* right window marker for recovery */ + } snd; + struct { + ulong nxt; /* Receive pointer to next uchar slot */ + ulong wnd; /* Receive window incoming */ + ulong urg; /* Urgent pointer */ + int blocked; + int una; /* unacked data segs, for delayed acks */ + uint scale; /* how much to left shift window in rcved packets */ + } rcv; + ulong iss; /* Initial sequence number */ + ulong cwind; /* Congestion window */ + ulong abcbytes; /* appropriate byte counting rfc 3485 */ + uint scale; /* desired snd.scale */ + ulong ssthresh; /* Slow start threshold */ + int resent; /* Bytes just resent */ + int irs; /* Initial received squence */ + ushort mss; /* Maximum segment size */ + int rerecv; /* Overlap of data rerecevived */ + ulong window; /* Our receive window (queue) */ + uint qscale; /* Log2 of our receive window (queue) */ + uchar backoff; /* Exponential backoff counter */ + int backedoff; /* ms we've backed off for rexmits */ + uchar flags; /* State flags */ + Reseq *reseq; /* Resequencing queue */ + int nreseq; + int reseqlen; + Tcptimer timer; /* Activity timer */ + Tcptimer acktimer; /* Acknowledge timer */ + Tcptimer rtt_timer; /* Round trip timer */ + Tcptimer katimer; /* keep alive timer */ + ulong rttseq; /* Round trip sequence */ + int srtt; /* Smoothed round trip */ + int mdev; /* Mean deviation of round trip */ + int kacounter; /* count down for keep alive */ + uint sndsyntime; /* time syn sent */ + ulong time; /* time Finwait2 or Syn_received was sent */ + int nochecksum; /* non-zero means don't send checksums */ + int flgcnt; /* number of flags in the sequence (FIN,SEQ) */ + + union { + Tcp4hdr tcp4hdr; + Tcp6hdr tcp6hdr; + } protohdr; /* prototype header */ +}; + +/* + * New calls are put in limbo rather than having a conversation structure + * allocated. Thus, a SYN attack results in lots of limbo'd calls but not + * any real Conv structures mucking things up. Calls in limbo rexmit their + * SYN ACK every SYNACK_RXTIMER ms up to 4 times, i.e., they disappear after 1 second. + * + * In particular they aren't on a listener's queue so that they don't figure + * in the input queue limit. + * + * If 1/2 of a T3 was attacking SYN packets, we'ld have a permanent queue + * of 70000 limbo'd calls. Not great for a linear list but doable. Therefore + * there is no hashing of this list. + */ +typedef struct Limbo Limbo; +struct Limbo +{ + Limbo *next; + + uchar laddr[IPaddrlen]; + uchar raddr[IPaddrlen]; + ushort lport; + ushort rport; + ulong irs; /* initial received sequence */ + ulong iss; /* initial sent sequence */ + ushort mss; /* mss from the other end */ + ushort rcvscale; /* how much to scale rcvd windows */ + ushort sndscale; /* how much to scale sent windows */ + ulong lastsend; /* last time we sent a synack */ + uchar version; /* v4 or v6 */ + uchar rexmits; /* number of retransmissions */ +}; + +int tcp_irtt = DEF_RTT; /* Initial guess at round trip time */ +ushort tcp_mss = DEF_MSS; /* Maximum segment size to be sent */ + +enum { + /* MIB stats */ + MaxConn, + Mss, + ActiveOpens, + PassiveOpens, + EstabResets, + CurrEstab, + InSegs, + OutSegs, + RetransSegs, + RetransTimeouts, + InErrs, + OutRsts, + + /* non-MIB stats */ + CsumErrs, + HlenErrs, + LenErrs, + OutOfOrder, + Resequenced, + ReseqBytelim, + ReseqPktlim, + + Nstats +}; + +static char *statnames[] = +{ +[MaxConn] "MaxConn", +[Mss] "MaxSegment", +[ActiveOpens] "ActiveOpens", +[PassiveOpens] "PassiveOpens", +[EstabResets] "EstabResets", +[CurrEstab] "CurrEstab", +[InSegs] "InSegs", +[OutSegs] "OutSegs", +[RetransSegs] "RetransSegs", +[RetransTimeouts] "RetransTimeouts", +[InErrs] "InErrs", +[OutRsts] "OutRsts", +[CsumErrs] "CsumErrs", +[HlenErrs] "HlenErrs", +[LenErrs] "LenErrs", +[OutOfOrder] "OutOfOrder", +[Resequenced] "Resequenced", +[ReseqBytelim] "ReseqBytelim", +[ReseqPktlim] "ReseqPktlim", +}; + +typedef struct Tcppriv Tcppriv; +struct Tcppriv +{ + /* List of active timers */ + QLock tl; + Tcptimer *timers; + + /* hash table for matching conversations */ + Ipht ht; + + /* calls in limbo waiting for an ACK to our SYN ACK */ + int nlimbo; + Limbo *lht[NLHT]; + + /* for keeping track of tcpackproc */ + QLock apl; + int ackprocstarted; + + uvlong stats[Nstats]; +}; + +/* + * Setting tcpporthogdefense to non-zero enables Dong Lin's + * solution to hijacked systems staking out port's as a form + * of DoS attack. + * + * To avoid stateless Conv hogs, we pick a sequence number at random. If + * that number gets acked by the other end, we shut down the connection. + * Look for tcpporthogdefense in the code. + */ +int tcpporthogdefense = 0; + +static int addreseq(Fs*, Tcpctl*, Tcppriv*, Tcp*, Block*, ushort); +static int dumpreseq(Tcpctl*); +static void getreseq(Tcpctl*, Tcp*, Block**, ushort*); +static void limbo(Conv*, uchar*, uchar*, Tcp*, int); +static void limborexmit(Proto*); +static void localclose(Conv*, char*); +static void procsyn(Conv*, Tcp*); +static void tcpacktimer(void*); +static void tcpiput(Proto*, Ipifc*, Block*); +static void tcpkeepalive(void*); +static void tcpoutput(Conv*); +static void tcprcvwin(Conv*); +static void tcprxmit(Conv*); +static void tcpsetkacounter(Tcpctl*); +static void tcpsetscale(Conv*, Tcpctl*, ushort, ushort); +static void tcpsettimer(Tcpctl*); +static void tcpsndsyn(Conv*, Tcpctl*); +static void tcpstart(Conv*, int); +static void tcpsynackrtt(Conv*); +static void tcptimeout(void*); +static int tcptrim(Tcpctl*, Tcp*, Block**, ushort*); + +static void +tcpsetstate(Conv *s, uchar newstate) +{ + Tcpctl *tcb; + uchar oldstate; + Tcppriv *tpriv; + + tpriv = s->p->priv; + + tcb = (Tcpctl*)s->ptcl; + + oldstate = tcb->state; + if(oldstate == newstate) + return; + + if(oldstate == Established) + tpriv->stats[CurrEstab]--; + if(newstate == Established) + tpriv->stats[CurrEstab]++; + + switch(newstate) { + case Closed: + qclose(s->rq); + qclose(s->wq); + qclose(s->eq); + break; + + case Close_wait: /* Remote closes */ + qhangup(s->rq, nil); + break; + } + + tcb->state = newstate; + + if(oldstate == Syn_sent && newstate != Closed) + Fsconnected(s, nil); +} + +static char* +tcpconnect(Conv *c, char **argv, int argc) +{ + char *e; + Tcpctl *tcb; + + tcb = (Tcpctl*)(c->ptcl); + if(tcb->state != Closed) + return Econinuse; + + e = Fsstdconnect(c, argv, argc); + if(e != nil) + return e; + tcpstart(c, TCP_CONNECT); + + return nil; +} + +static int +tcpstate(Conv *c, char *state, int n) +{ + Tcpctl *s; + + s = (Tcpctl*)(c->ptcl); + + return snprint(state, n, + "%s qin %d qout %d rq %d.%d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n", + tcpstates[s->state], + c->rq ? qlen(c->rq) : 0, + c->wq ? qlen(c->wq) : 0, + s->nreseq, s->reseqlen, + s->srtt, s->mdev, + s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale, + s->timer.start, s->timer.count, s->rerecv, + s->katimer.start, s->katimer.count); +} + +static int +tcpinuse(Conv *c) +{ + Tcpctl *s; + + s = (Tcpctl*)(c->ptcl); + return s->state != Closed; +} + +static char* +tcpannounce(Conv *c, char **argv, int argc) +{ + char *e; + Tcpctl *tcb; + + tcb = (Tcpctl*)(c->ptcl); + if(tcb->state != Closed) + return Econinuse; + + e = Fsstdannounce(c, argv, argc); + if(e != nil) + return e; + tcpstart(c, TCP_LISTEN); + Fsconnected(c, nil); + + return nil; +} + +/* + * tcpclose is always called with the q locked + */ +static void +tcpclose(Conv *c) +{ + Tcpctl *tcb; + + tcb = (Tcpctl*)c->ptcl; + + qhangup(c->rq, nil); + qhangup(c->wq, nil); + qhangup(c->eq, nil); + qflush(c->rq); + + switch(tcb->state) { + case Listen: + /* + * reset any incoming calls to this listener + */ + Fsconnected(c, "Hangup"); + + localclose(c, nil); + break; + case Closed: + case Syn_sent: + localclose(c, nil); + break; + case Syn_received: + case Established: + tcb->flgcnt++; + tcb->snd.nxt++; + tcpsetstate(c, Finwait1); + tcpoutput(c); + break; + case Close_wait: + tcb->flgcnt++; + tcb->snd.nxt++; + tcpsetstate(c, Last_ack); + tcpoutput(c); + break; + } +} + +static void +tcpkick(void *x) +{ + Conv *s = x; + Tcpctl *tcb; + + tcb = (Tcpctl*)s->ptcl; + + if(waserror()){ + qunlock(s); + nexterror(); + } + qlock(s); + + switch(tcb->state) { + case Syn_sent: + case Syn_received: + case Established: + case Close_wait: + /* + * Push data + */ + tcpoutput(s); + break; + default: + localclose(s, "Hangup"); + break; + } + + qunlock(s); + poperror(); +} + +static void +tcprcvwin(Conv *s) /* Call with tcb locked */ +{ + int w; + Tcpctl *tcb; + + tcb = (Tcpctl*)s->ptcl; + w = tcb->window - qlen(s->rq); + if(w < 0) + w = 0; + if(w != tcb->rcv.wnd) + if(w>>tcb->rcv.scale == 0 || tcb->window > 4*tcb->mss && w < tcb->mss/4){ + tcb->rcv.blocked = 1; + netlog(s->p->f, Logtcp, "tcprcvwin: window %lud qlen %d ws %ud lport %d\n", + tcb->window, qlen(s->rq), tcb->rcv.scale, s->lport); + } + tcb->rcv.wnd = w; +} + +static void +tcpacktimer(void *v) +{ + Tcpctl *tcb; + Conv *s; + + s = v; + tcb = (Tcpctl*)s->ptcl; + + if(waserror()){ + qunlock(s); + nexterror(); + } + qlock(s); + if(tcb->state != Closed){ + tcb->flags |= FORCE; + tcpoutput(s); + } + qunlock(s); + poperror(); +} + +static void +tcpcongestion(Tcpctl *tcb) +{ + ulong inflight; + + inflight = tcb->snd.nxt - tcb->snd.una; + if(inflight > tcb->cwind) + inflight = tcb->cwind; + tcb->ssthresh = inflight / 2; + if(tcb->ssthresh < 2*tcb->mss) + tcb->ssthresh = 2*tcb->mss; +} + +static void +tcpabcincr(Tcpctl *tcb, ulong acked, ulong limit) +{ + tcb->abcbytes += acked; + if(tcb->abcbytes >= limit){ + tcb->cwind += tcb->mss; + tcb->abcbytes %= limit; + } +} + +static void +tcpcreate(Conv *c) +{ + c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c); + c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c); +} + +static void +timerstate(Tcppriv *priv, Tcptimer *t, int newstate) +{ + if(newstate != TcptimerON){ + if(t->state == TcptimerON){ + /* unchain */ + if(priv->timers == t){ + priv->timers = t->next; + if(t->prev != nil) + panic("timerstate1"); + } + if(t->next) + t->next->prev = t->prev; + if(t->prev) + t->prev->next = t->next; + t->next = t->prev = nil; + } + } else { + if(t->state != TcptimerON){ + /* chain */ + if(t->prev != nil || t->next != nil) + panic("timerstate2"); + t->prev = nil; + t->next = priv->timers; + if(t->next) + t->next->prev = t; + priv->timers = t; + } + } + t->state = newstate; +} + +static void +tcpackproc(void *a) +{ + Tcptimer *t, *tp, *timeo; + Proto *tcp; + Tcppriv *priv; + int loop; + + tcp = a; + priv = tcp->priv; + + for(;;) { + tsleep(&up->sleep, return0, 0, MSPTICK); + + qlock(&priv->tl); + timeo = nil; + loop = 0; + for(t = priv->timers; t != nil; t = tp) { + if(loop++ > 10000) + panic("tcpackproc1"); + tp = t->next; + if(t->state == TcptimerON) { + t->count--; + if(t->count == 0) { + timerstate(priv, t, TcptimerDONE); + t->readynext = timeo; + timeo = t; + } + } + } + qunlock(&priv->tl); + + loop = 0; + for(t = timeo; t != nil; t = t->readynext) { + if(loop++ > 10000) + panic("tcpackproc2"); + if(t->state == TcptimerDONE && t->func != nil && !waserror()){ + (*t->func)(t->arg); + poperror(); + } + } + + limborexmit(tcp); + } +} + +static void +tcpgo(Tcppriv *priv, Tcptimer *t) +{ + if(t == nil || t->start == 0) + return; + + qlock(&priv->tl); + t->count = t->start; + timerstate(priv, t, TcptimerON); + qunlock(&priv->tl); +} + +static void +tcphalt(Tcppriv *priv, Tcptimer *t) +{ + if(t == nil) + return; + + qlock(&priv->tl); + timerstate(priv, t, TcptimerOFF); + qunlock(&priv->tl); +} + +static int +backoff(int n) +{ + return 1 << n; +} + +static void +localclose(Conv *s, char *reason) /* called with tcb locked */ +{ + Tcpctl *tcb; + Tcppriv *tpriv; + + tpriv = s->p->priv; + tcb = (Tcpctl*)s->ptcl; + + iphtrem(&tpriv->ht, s); + + tcphalt(tpriv, &tcb->timer); + tcphalt(tpriv, &tcb->rtt_timer); + tcphalt(tpriv, &tcb->acktimer); + tcphalt(tpriv, &tcb->katimer); + + /* Flush reassembly queue; nothing more can arrive */ + dumpreseq(tcb); + + if(tcb->state == Syn_sent) + Fsconnected(s, reason); + if(s->state == Announced) + wakeup(&s->listenr); + + qhangup(s->rq, reason); + qhangup(s->wq, reason); + + tcpsetstate(s, Closed); +} + +/* mtu (- TCP + IP hdr len) of 1st hop */ +static int +tcpmtu(Proto *tcp, uchar *addr, int version, uint *scale) +{ + Ipifc *ifc; + int mtu; + + ifc = findipifc(tcp->f, addr, 0); + switch(version){ + default: + case V4: + mtu = DEF_MSS; + if(ifc != nil) + mtu = ifc->maxtu - ifc->medium->hsize - (TCP4_PKT + TCP4_HDRSIZE); + break; + case V6: + mtu = DEF_MSS6; + if(ifc != nil) + mtu = ifc->maxtu - ifc->medium->hsize - (TCP6_PKT + TCP6_HDRSIZE); + break; + } + if(ifc != nil){ + if(ifc->mbps > 1000) + *scale = HaveWS | 4; + else if(ifc->mbps > 100) + *scale = HaveWS | 3; + else if(ifc->mbps > 10) + *scale = HaveWS | 1; + else + *scale = HaveWS | 0; + } else + *scale = HaveWS | 0; + + return mtu; +} + +static void +inittcpctl(Conv *s, int mode) +{ + Tcpctl *tcb; + Tcp4hdr* h4; + Tcp6hdr* h6; + Tcppriv *tpriv; + int mss; + + tcb = (Tcpctl*)s->ptcl; + + memset(tcb, 0, sizeof(Tcpctl)); + + tcb->ssthresh = QMAX; /* reset by tcpsetscale() */ + tcb->srtt = tcp_irtt<mdev = 0; + + /* setup timers */ + tcb->timer.start = tcp_irtt / MSPTICK; + tcb->timer.func = tcptimeout; + tcb->timer.arg = s; + tcb->rtt_timer.start = MAX_TIME; + tcb->acktimer.start = TCP_ACK / MSPTICK; + tcb->acktimer.func = tcpacktimer; + tcb->acktimer.arg = s; + tcb->katimer.start = DEF_KAT / MSPTICK; + tcb->katimer.func = tcpkeepalive; + tcb->katimer.arg = s; + + mss = DEF_MSS; + + /* create a prototype(pseudo) header */ + if(mode != TCP_LISTEN){ + if(ipcmp(s->laddr, IPnoaddr) == 0) + findlocalip(s->p->f, s->laddr, s->raddr); + + switch(s->ipversion){ + case V4: + h4 = &tcb->protohdr.tcp4hdr; + memset(h4, 0, sizeof(*h4)); + h4->proto = IP_TCPPROTO; + hnputs(h4->tcpsport, s->lport); + hnputs(h4->tcpdport, s->rport); + v6tov4(h4->tcpsrc, s->laddr); + v6tov4(h4->tcpdst, s->raddr); + break; + case V6: + h6 = &tcb->protohdr.tcp6hdr; + memset(h6, 0, sizeof(*h6)); + h6->proto = IP_TCPPROTO; + hnputs(h6->tcpsport, s->lport); + hnputs(h6->tcpdport, s->rport); + ipmove(h6->tcpsrc, s->laddr); + ipmove(h6->tcpdst, s->raddr); + mss = DEF_MSS6; + break; + default: + panic("inittcpctl: version %d", s->ipversion); + } + } + + tcb->mss = tcb->cwind = mss; + tcb->abcbytes = 0; + tpriv = s->p->priv; + tpriv->stats[Mss] = tcb->mss; + + /* default is no window scaling */ + tcpsetscale(s, tcb, 0, 0); +} + +/* + * called with s qlocked + */ +static void +tcpstart(Conv *s, int mode) +{ + Tcpctl *tcb; + Tcppriv *tpriv; + char kpname[KNAMELEN]; + + tpriv = s->p->priv; + + if(tpriv->ackprocstarted == 0){ + qlock(&tpriv->apl); + if(tpriv->ackprocstarted == 0){ + sprint(kpname, "#I%dtcpack", s->p->f->dev); + kproc(kpname, tcpackproc, s->p); + tpriv->ackprocstarted = 1; + } + qunlock(&tpriv->apl); + } + + tcb = (Tcpctl*)s->ptcl; + + inittcpctl(s, mode); + + iphtadd(&tpriv->ht, s); + switch(mode) { + case TCP_LISTEN: + tpriv->stats[PassiveOpens]++; + tcb->flags |= CLONE; + tcpsetstate(s, Listen); + break; + + case TCP_CONNECT: + tpriv->stats[ActiveOpens]++; + tcb->flags |= ACTIVE; + tcpsndsyn(s, tcb); + tcpsetstate(s, Syn_sent); + tcpoutput(s); + break; + } +} + +static char* +tcpflag(ushort flag) +{ + static char buf[128]; + + sprint(buf, "%d", flag>>10); /* Head len */ + if(flag & URG) + strcat(buf, " URG"); + if(flag & ACK) + strcat(buf, " ACK"); + if(flag & PSH) + strcat(buf, " PSH"); + if(flag & RST) + strcat(buf, " RST"); + if(flag & SYN) + strcat(buf, " SYN"); + if(flag & FIN) + strcat(buf, " FIN"); + + return buf; +} + +static Block* +htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb) +{ + int dlen; + Tcp6hdr *h; + ushort csum; + ushort hdrlen, optpad = 0; + uchar *opt; + + hdrlen = TCP6_HDRSIZE; + if(tcph->flags & SYN){ + if(tcph->mss) + hdrlen += MSS_LENGTH; + if(tcph->ws) + hdrlen += WS_LENGTH; + optpad = hdrlen & 3; + if(optpad) + optpad = 4 - optpad; + hdrlen += optpad; + } + + if(data) { + dlen = blocklen(data); + data = padblock(data, hdrlen + TCP6_PKT); + if(data == nil) + return nil; + } + else { + dlen = 0; + data = allocb(hdrlen + TCP6_PKT + 64); /* the 64 pad is to meet mintu's */ + if(data == nil) + return nil; + data->wp += hdrlen + TCP6_PKT; + } + + /* copy in pseudo ip header plus port numbers */ + h = (Tcp6hdr *)(data->rp); + memmove(h, ph, TCP6_TCBPHDRSZ); + + /* compose pseudo tcp header, do cksum calculation */ + hnputl(h->vcf, hdrlen + dlen); + h->ploadlen[0] = h->ploadlen[1] = h->proto = 0; + h->ttl = ph->proto; + + /* copy in variable bits */ + hnputl(h->tcpseq, tcph->seq); + hnputl(h->tcpack, tcph->ack); + hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags); + hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0)); + hnputs(h->tcpurg, tcph->urg); + + if(tcph->flags & SYN){ + opt = h->tcpopt; + if(tcph->mss != 0){ + *opt++ = MSSOPT; + *opt++ = MSS_LENGTH; + hnputs(opt, tcph->mss); + opt += 2; + } + if(tcph->ws != 0){ + *opt++ = WSOPT; + *opt++ = WS_LENGTH; + *opt++ = tcph->ws; + } + while(optpad-- > 0) + *opt++ = NOOPOPT; + } + + if(tcb != nil && tcb->nochecksum){ + h->tcpcksum[0] = h->tcpcksum[1] = 0; + } else { + csum = ptclcsum(data, TCP6_IPLEN, hdrlen+dlen+TCP6_PHDRSIZE); + hnputs(h->tcpcksum, csum); + } + + /* move from pseudo header back to normal ip header */ + memset(h->vcf, 0, 4); + h->vcf[0] = IP_VER6; + hnputs(h->ploadlen, hdrlen+dlen); + h->proto = ph->proto; + + return data; +} + +static Block* +htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb) +{ + int dlen; + Tcp4hdr *h; + ushort csum; + ushort hdrlen, optpad = 0; + uchar *opt; + + hdrlen = TCP4_HDRSIZE; + if(tcph->flags & SYN){ + if(tcph->mss) + hdrlen += MSS_LENGTH; + if(1) + hdrlen += WS_LENGTH; + optpad = hdrlen & 3; + if(optpad) + optpad = 4 - optpad; + hdrlen += optpad; + } + + if(data) { + dlen = blocklen(data); + data = padblock(data, hdrlen + TCP4_PKT); + if(data == nil) + return nil; + } + else { + dlen = 0; + data = allocb(hdrlen + TCP4_PKT + 64); /* the 64 pad is to meet mintu's */ + if(data == nil) + return nil; + data->wp += hdrlen + TCP4_PKT; + } + + /* copy in pseudo ip header plus port numbers */ + h = (Tcp4hdr *)(data->rp); + memmove(h, ph, TCP4_TCBPHDRSZ); + + /* copy in variable bits */ + hnputs(h->tcplen, hdrlen + dlen); + hnputl(h->tcpseq, tcph->seq); + hnputl(h->tcpack, tcph->ack); + hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags); + hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0)); + hnputs(h->tcpurg, tcph->urg); + + if(tcph->flags & SYN){ + opt = h->tcpopt; + if(tcph->mss != 0){ + *opt++ = MSSOPT; + *opt++ = MSS_LENGTH; + hnputs(opt, tcph->mss); + opt += 2; + } + /* always offer. rfc1323 §2.2 */ + if(1){ + *opt++ = WSOPT; + *opt++ = WS_LENGTH; + *opt++ = tcph->ws; + } + while(optpad-- > 0) + *opt++ = NOOPOPT; + } + + if(tcb != nil && tcb->nochecksum){ + h->tcpcksum[0] = h->tcpcksum[1] = 0; + } else { + csum = ptclcsum(data, TCP4_IPLEN, hdrlen+dlen+TCP4_PHDRSIZE); + hnputs(h->tcpcksum, csum); + } + + return data; +} + +static int +ntohtcp6(Tcp *tcph, Block **bpp) +{ + Tcp6hdr *h; + uchar *optr; + ushort hdrlen; + ushort optlen; + int n; + + *bpp = pullupblock(*bpp, TCP6_PKT+TCP6_HDRSIZE); + if(*bpp == nil) + return -1; + + h = (Tcp6hdr *)((*bpp)->rp); + tcph->source = nhgets(h->tcpsport); + tcph->dest = nhgets(h->tcpdport); + tcph->seq = nhgetl(h->tcpseq); + tcph->ack = nhgetl(h->tcpack); + hdrlen = (h->tcpflag[0]>>2) & ~3; + if(hdrlen < TCP6_HDRSIZE) { + freeblist(*bpp); + return -1; + } + + tcph->flags = h->tcpflag[1]; + tcph->wnd = nhgets(h->tcpwin); + tcph->urg = nhgets(h->tcpurg); + tcph->mss = 0; + tcph->ws = 0; + tcph->len = nhgets(h->ploadlen) - hdrlen; + + *bpp = pullupblock(*bpp, hdrlen+TCP6_PKT); + if(*bpp == nil) + return -1; + + optr = h->tcpopt; + n = hdrlen - TCP6_HDRSIZE; + while(n > 0 && *optr != EOLOPT) { + if(*optr == NOOPOPT) { + n--; + optr++; + continue; + } + optlen = optr[1]; + if(optlen < 2 || optlen > n) + break; + switch(*optr) { + case MSSOPT: + if(optlen == MSS_LENGTH) + tcph->mss = nhgets(optr+2); + break; + case WSOPT: + if(optlen == WS_LENGTH && *(optr+2) <= 14) + tcph->ws = HaveWS | *(optr+2); + break; + } + n -= optlen; + optr += optlen; + } + return hdrlen; +} + +static int +ntohtcp4(Tcp *tcph, Block **bpp) +{ + Tcp4hdr *h; + uchar *optr; + ushort hdrlen; + ushort optlen; + int n; + + *bpp = pullupblock(*bpp, TCP4_PKT+TCP4_HDRSIZE); + if(*bpp == nil) + return -1; + + h = (Tcp4hdr *)((*bpp)->rp); + tcph->source = nhgets(h->tcpsport); + tcph->dest = nhgets(h->tcpdport); + tcph->seq = nhgetl(h->tcpseq); + tcph->ack = nhgetl(h->tcpack); + + hdrlen = (h->tcpflag[0]>>2) & ~3; + if(hdrlen < TCP4_HDRSIZE) { + freeblist(*bpp); + return -1; + } + + tcph->flags = h->tcpflag[1]; + tcph->wnd = nhgets(h->tcpwin); + tcph->urg = nhgets(h->tcpurg); + tcph->mss = 0; + tcph->ws = 0; + tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT); + + *bpp = pullupblock(*bpp, hdrlen+TCP4_PKT); + if(*bpp == nil) + return -1; + + optr = h->tcpopt; + n = hdrlen - TCP4_HDRSIZE; + while(n > 0 && *optr != EOLOPT) { + if(*optr == NOOPOPT) { + n--; + optr++; + continue; + } + optlen = optr[1]; + if(optlen < 2 || optlen > n) + break; + switch(*optr) { + case MSSOPT: + if(optlen == MSS_LENGTH) + tcph->mss = nhgets(optr+2); + break; + case WSOPT: + if(optlen == WS_LENGTH && *(optr+2) <= 14) + tcph->ws = HaveWS | *(optr+2); + break; + } + n -= optlen; + optr += optlen; + } + return hdrlen; +} + +/* + * For outgoing calls, generate an initial sequence + * number and put a SYN on the send queue + */ +static void +tcpsndsyn(Conv *s, Tcpctl *tcb) +{ + Tcppriv *tpriv; + + tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16); + tcb->rttseq = tcb->iss; + tcb->snd.wl2 = tcb->iss; + tcb->snd.una = tcb->iss; + tcb->snd.ptr = tcb->rttseq; + tcb->snd.nxt = tcb->rttseq; + tcb->flgcnt++; + tcb->flags |= FORCE; + tcb->sndsyntime = NOW; + + /* set desired mss and scale */ + tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale); + tpriv = s->p->priv; + tpriv->stats[Mss] = tcb->mss; +} + +void +sndrst(Proto *tcp, uchar *source, uchar *dest, ushort length, Tcp *seg, uchar version, char *reason) +{ + Block *hbp; + uchar rflags; + Tcppriv *tpriv; + Tcp4hdr ph4; + Tcp6hdr ph6; + + netlog(tcp->f, Logtcp, "sndrst: %s\n", reason); + + tpriv = tcp->priv; + + if(seg->flags & RST) + return; + + /* make pseudo header */ + switch(version) { + case V4: + memset(&ph4, 0, sizeof(ph4)); + ph4.vihl = IP_VER4; + v6tov4(ph4.tcpsrc, dest); + v6tov4(ph4.tcpdst, source); + ph4.proto = IP_TCPPROTO; + hnputs(ph4.tcplen, TCP4_HDRSIZE); + hnputs(ph4.tcpsport, seg->dest); + hnputs(ph4.tcpdport, seg->source); + break; + case V6: + memset(&ph6, 0, sizeof(ph6)); + ph6.vcf[0] = IP_VER6; + ipmove(ph6.tcpsrc, dest); + ipmove(ph6.tcpdst, source); + ph6.proto = IP_TCPPROTO; + hnputs(ph6.ploadlen, TCP6_HDRSIZE); + hnputs(ph6.tcpsport, seg->dest); + hnputs(ph6.tcpdport, seg->source); + break; + default: + panic("sndrst: version %d", version); + } + + tpriv->stats[OutRsts]++; + rflags = RST; + + /* convince the other end that this reset is in band */ + if(seg->flags & ACK) { + seg->seq = seg->ack; + seg->ack = 0; + } + else { + rflags |= ACK; + seg->ack = seg->seq; + seg->seq = 0; + if(seg->flags & SYN) + seg->ack++; + seg->ack += length; + if(seg->flags & FIN) + seg->ack++; + } + seg->flags = rflags; + seg->wnd = 0; + seg->urg = 0; + seg->mss = 0; + seg->ws = 0; + switch(version) { + case V4: + hbp = htontcp4(seg, nil, &ph4, nil); + if(hbp == nil) + return; + ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil); + break; + case V6: + hbp = htontcp6(seg, nil, &ph6, nil); + if(hbp == nil) + return; + ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil); + break; + default: + panic("sndrst2: version %d", version); + } +} + +/* + * send a reset to the remote side and close the conversation + * called with s qlocked + */ +static char* +tcphangup(Conv *s) +{ + Tcp seg; + Tcpctl *tcb; + Block *hbp; + + tcb = (Tcpctl*)s->ptcl; + if(waserror()) + return commonerror(); + if(ipcmp(s->raddr, IPnoaddr) != 0) { + if(!waserror()){ + memset(&seg, 0, sizeof seg); + seg.flags = RST | ACK; + seg.ack = tcb->rcv.nxt; + tcb->rcv.una = 0; + seg.seq = tcb->snd.ptr; + seg.wnd = 0; + seg.urg = 0; + seg.mss = 0; + seg.ws = 0; + switch(s->ipversion) { + case V4: + tcb->protohdr.tcp4hdr.vihl = IP_VER4; + hbp = htontcp4(&seg, nil, &tcb->protohdr.tcp4hdr, tcb); + ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s); + break; + case V6: + tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6; + hbp = htontcp6(&seg, nil, &tcb->protohdr.tcp6hdr, tcb); + ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s); + break; + default: + panic("tcphangup: version %d", s->ipversion); + } + poperror(); + } + } + localclose(s, nil); + poperror(); + return nil; +} + +/* + * (re)send a SYN ACK + */ +static int +sndsynack(Proto *tcp, Limbo *lp) +{ + Block *hbp; + Tcp4hdr ph4; + Tcp6hdr ph6; + Tcp seg; + uint scale; + + /* make pseudo header */ + switch(lp->version) { + case V4: + memset(&ph4, 0, sizeof(ph4)); + ph4.vihl = IP_VER4; + v6tov4(ph4.tcpsrc, lp->laddr); + v6tov4(ph4.tcpdst, lp->raddr); + ph4.proto = IP_TCPPROTO; + hnputs(ph4.tcplen, TCP4_HDRSIZE); + hnputs(ph4.tcpsport, lp->lport); + hnputs(ph4.tcpdport, lp->rport); + break; + case V6: + memset(&ph6, 0, sizeof(ph6)); + ph6.vcf[0] = IP_VER6; + ipmove(ph6.tcpsrc, lp->laddr); + ipmove(ph6.tcpdst, lp->raddr); + ph6.proto = IP_TCPPROTO; + hnputs(ph6.ploadlen, TCP6_HDRSIZE); + hnputs(ph6.tcpsport, lp->lport); + hnputs(ph6.tcpdport, lp->rport); + break; + default: + panic("sndrst: version %d", lp->version); + } + + memset(&seg, 0, sizeof seg); + seg.seq = lp->iss; + seg.ack = lp->irs+1; + seg.flags = SYN|ACK; + seg.urg = 0; + seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale); + seg.wnd = QMAX; + + /* if the other side set scale, we should too */ + if(lp->rcvscale){ + seg.ws = scale; + lp->sndscale = scale; + } else { + seg.ws = 0; + lp->sndscale = 0; + } + + switch(lp->version) { + case V4: + hbp = htontcp4(&seg, nil, &ph4, nil); + if(hbp == nil) + return -1; + ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil); + break; + case V6: + hbp = htontcp6(&seg, nil, &ph6, nil); + if(hbp == nil) + return -1; + ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil); + break; + default: + panic("sndsnack: version %d", lp->version); + } + lp->lastsend = NOW; + return 0; +} + +#define hashipa(a, p) ( ( (a)[IPaddrlen-2] + (a)[IPaddrlen-1] + p )&LHTMASK ) + +/* + * put a call into limbo and respond with a SYN ACK + * + * called with proto locked + */ +static void +limbo(Conv *s, uchar *source, uchar *dest, Tcp *seg, int version) +{ + Limbo *lp, **l; + Tcppriv *tpriv; + int h; + + tpriv = s->p->priv; + h = hashipa(source, seg->source); + + for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){ + lp = *l; + if(lp->lport != seg->dest || lp->rport != seg->source || lp->version != version) + continue; + if(ipcmp(lp->raddr, source) != 0) + continue; + if(ipcmp(lp->laddr, dest) != 0) + continue; + + /* each new SYN restarts the retransmits */ + lp->irs = seg->seq; + break; + } + lp = *l; + if(lp == nil){ + if(tpriv->nlimbo >= Maxlimbo && tpriv->lht[h]){ + lp = tpriv->lht[h]; + tpriv->lht[h] = lp->next; + lp->next = nil; + } else { + lp = malloc(sizeof(*lp)); + if(lp == nil) + return; + tpriv->nlimbo++; + } + *l = lp; + lp->version = version; + ipmove(lp->laddr, dest); + ipmove(lp->raddr, source); + lp->lport = seg->dest; + lp->rport = seg->source; + lp->mss = seg->mss; + lp->rcvscale = seg->ws; + lp->irs = seg->seq; + lp->iss = (nrand(1<<16)<<16)|nrand(1<<16); + } + + if(sndsynack(s->p, lp) < 0){ + *l = lp->next; + tpriv->nlimbo--; + free(lp); + } +} + +/* + * resend SYN ACK's once every SYNACK_RXTIMER ms. + */ +static void +limborexmit(Proto *tcp) +{ + Tcppriv *tpriv; + Limbo **l, *lp; + int h; + int seen; + ulong now; + + tpriv = tcp->priv; + + if(!canqlock(tcp)) + return; + seen = 0; + now = NOW; + for(h = 0; h < NLHT && seen < tpriv->nlimbo; h++){ + for(l = &tpriv->lht[h]; *l != nil && seen < tpriv->nlimbo; ){ + lp = *l; + seen++; + if(now - lp->lastsend < (lp->rexmits+1)*SYNACK_RXTIMER) + continue; + + /* time it out after 1 second */ + if(++(lp->rexmits) > 5){ + tpriv->nlimbo--; + *l = lp->next; + free(lp); + continue; + } + + /* if we're being attacked, don't bother resending SYN ACK's */ + if(tpriv->nlimbo > 100) + continue; + + if(sndsynack(tcp, lp) < 0){ + tpriv->nlimbo--; + *l = lp->next; + free(lp); + continue; + } + + l = &lp->next; + } + } + qunlock(tcp); +} + +/* + * lookup call in limbo. if found, throw it out. + * + * called with proto locked + */ +static void +limborst(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version) +{ + Limbo *lp, **l; + int h; + Tcppriv *tpriv; + + tpriv = s->p->priv; + + /* find a call in limbo */ + h = hashipa(src, segp->source); + for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){ + lp = *l; + if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version) + continue; + if(ipcmp(lp->laddr, dst) != 0) + continue; + if(ipcmp(lp->raddr, src) != 0) + continue; + + /* RST can only follow the SYN */ + if(segp->seq == lp->irs+1){ + tpriv->nlimbo--; + *l = lp->next; + free(lp); + } + break; + } +} + +/* + * come here when we finally get an ACK to our SYN-ACK. + * lookup call in limbo. if found, create a new conversation + * + * called with proto locked + */ +static Conv* +tcpincoming(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version) +{ + Conv *new; + Tcpctl *tcb; + Tcppriv *tpriv; + Tcp4hdr *h4; + Tcp6hdr *h6; + Limbo *lp, **l; + int h; + + /* unless it's just an ack, it can't be someone coming out of limbo */ + if((segp->flags & SYN) || (segp->flags & ACK) == 0) + return nil; + + tpriv = s->p->priv; + + /* find a call in limbo */ + h = hashipa(src, segp->source); + for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){ + netlog(s->p->f, Logtcp, "tcpincoming s %I!%ud/%I!%ud d %I!%ud/%I!%ud v %d/%d\n", + src, segp->source, lp->raddr, lp->rport, + dst, segp->dest, lp->laddr, lp->lport, + version, lp->version + ); + + if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version) + continue; + if(ipcmp(lp->laddr, dst) != 0) + continue; + if(ipcmp(lp->raddr, src) != 0) + continue; + + /* we're assuming no data with the initial SYN */ + if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){ + netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux\n", + segp->seq, lp->irs+1, segp->ack, lp->iss+1); + lp = nil; + } else { + tpriv->nlimbo--; + *l = lp->next; + } + break; + } + if(lp == nil) + return nil; + + new = Fsnewcall(s, src, segp->source, dst, segp->dest, version); + if(new == nil) + return nil; + + memmove(new->ptcl, s->ptcl, sizeof(Tcpctl)); + tcb = (Tcpctl*)new->ptcl; + tcb->flags &= ~CLONE; + tcb->timer.arg = new; + tcb->timer.state = TcptimerOFF; + tcb->acktimer.arg = new; + tcb->acktimer.state = TcptimerOFF; + tcb->katimer.arg = new; + tcb->katimer.state = TcptimerOFF; + tcb->rtt_timer.arg = new; + tcb->rtt_timer.state = TcptimerOFF; + + tcb->irs = lp->irs; + tcb->rcv.nxt = tcb->irs+1; + tcb->rcv.urg = tcb->rcv.nxt; + + tcb->iss = lp->iss; + tcb->rttseq = tcb->iss; + tcb->snd.wl2 = tcb->iss; + tcb->snd.una = tcb->iss+1; + tcb->snd.ptr = tcb->iss+1; + tcb->snd.nxt = tcb->iss+1; + tcb->flgcnt = 0; + tcb->flags |= SYNACK; + + /* our sending max segment size cannot be bigger than what he asked for */ + if(lp->mss != 0 && lp->mss < tcb->mss) { + tcb->mss = lp->mss; + tpriv->stats[Mss] = tcb->mss; + } + + /* window scaling */ + tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale); + + /* the congestion window always starts out as a single segment */ + tcb->snd.wnd = segp->wnd; + tcb->cwind = tcb->mss; + + /* set initial round trip time */ + tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER; + tcpsynackrtt(new); + + free(lp); + + /* set up proto header */ + switch(version){ + case V4: + h4 = &tcb->protohdr.tcp4hdr; + memset(h4, 0, sizeof(*h4)); + h4->proto = IP_TCPPROTO; + hnputs(h4->tcpsport, new->lport); + hnputs(h4->tcpdport, new->rport); + v6tov4(h4->tcpsrc, dst); + v6tov4(h4->tcpdst, src); + break; + case V6: + h6 = &tcb->protohdr.tcp6hdr; + memset(h6, 0, sizeof(*h6)); + h6->proto = IP_TCPPROTO; + hnputs(h6->tcpsport, new->lport); + hnputs(h6->tcpdport, new->rport); + ipmove(h6->tcpsrc, dst); + ipmove(h6->tcpdst, src); + break; + default: + panic("tcpincoming: version %d", new->ipversion); + } + + tcpsetstate(new, Established); + + iphtadd(&tpriv->ht, new); + + return new; +} + +static int +seq_within(ulong x, ulong low, ulong high) +{ + if(low <= high){ + if(low <= x && x <= high) + return 1; + } + else { + if(x >= low || x <= high) + return 1; + } + return 0; +} + +static int +seq_lt(ulong x, ulong y) +{ + return (int)(x-y) < 0; +} + +static int +seq_le(ulong x, ulong y) +{ + return (int)(x-y) <= 0; +} + +static int +seq_gt(ulong x, ulong y) +{ + return (int)(x-y) > 0; +} + +static int +seq_ge(ulong x, ulong y) +{ + return (int)(x-y) >= 0; +} + +/* + * use the time between the first SYN and it's ack as the + * initial round trip time + */ +static void +tcpsynackrtt(Conv *s) +{ + Tcpctl *tcb; + int delta; + Tcppriv *tpriv; + + tcb = (Tcpctl*)s->ptcl; + tpriv = s->p->priv; + + delta = NOW - tcb->sndsyntime; + tcb->srtt = delta<mdev = delta<rtt_timer); +} + +static void +update(Conv *s, Tcp *seg) +{ + int rtt, delta; + Tcpctl *tcb; + ulong acked; + Tcppriv *tpriv; + + tpriv = s->p->priv; + tcb = (Tcpctl*)s->ptcl; + + /* if everything has been acked, force output(?) */ + if(seq_gt(seg->ack, tcb->snd.nxt)) { + tcb->flags |= FORCE; + return; + } + + /* catch zero-window updates, update window & recover */ + if(tcb->snd.wnd == 0 && seg->wnd > 0) + if(seq_lt(seg->ack, tcb->snd.ptr)){ + netlog(s->p->f, Logtcp, "tcp: zwu ack %lud una %lud ptr %lud win %lud\n", + seg->ack, tcb->snd.una, tcb->snd.ptr, seg->wnd); + tcb->snd.wnd = seg->wnd; + goto recovery; + } + + if(seg->ack == tcb->snd.una) + if(tcb->snd.una != tcb->snd.nxt) + if(seg->len == 0) + if(seg->wnd == tcb->snd.wnd) + if(++tcb->snd.dupacks == TCPREXMTTHRESH){ +recovery: + tcb->snd.recovery = 1; + tcb->snd.rxt = tcb->snd.nxt; + tcpcongestion(tcb); + tcprxmit(s); + tcb->cwind = tcb->ssthresh; + } else + tcb->cwind += tcb->mss; + + /* + * update window + */ + if(seq_gt(seg->ack, tcb->snd.wl2) + || (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){ + tcb->snd.wnd = seg->wnd; + tcb->snd.wl2 = seg->ack; + } + + if(!seq_gt(seg->ack, tcb->snd.una)){ + /* + * don't let us hangup if sending into a closed window and + * we're still getting acks + */ + if((tcb->flags&RETRAN) && tcb->snd.wnd == 0) + tcb->backedoff = MAXBACKMS/4; + return; + } + + /* Compute the new send window size */ + acked = seg->ack - tcb->snd.una; + + /* avoid slow start and timers for SYN acks */ + if((tcb->flags & SYNACK) == 0) { + tcb->flags |= SYNACK; + acked--; + tcb->flgcnt--; + goto done; + } + + /* + * congestion control + */ + if(tcb->snd.recovery){ + if(seq_ge(seg->ack, tcb->snd.rxt)){ + /* recovery finished */ + tcb->snd.dupacks = 0; + tcb->snd.recovery = 0; + tcb->cwind = (tcb->snd.nxt - seg->ack) + tcb->mss; + if(tcb->ssthresh < tcb->cwind) + tcb->cwind = tcb->ssthresh; + } else { + /* partial ack */ + tcb->cwind -= acked; + tcb->cwind += tcb->mss; + } + } else { + tcb->snd.dupacks = 0; + if(tcb->cwind < tcb->ssthresh) + tcpabcincr(tcb, acked, 2*tcb->mss); /* slow start */ + else + tcpabcincr(tcb, acked, tcb->cwind); /* congestion dance */ + } + + /* Adjust the timers according to the round trip time */ + /* todo: fix sloppy treatment of overflow cases here. */ + if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) { + tcphalt(tpriv, &tcb->rtt_timer); + if((tcb->flags&RETRAN) == 0) { + tcb->backoff = 0; + tcb->backedoff = 0; + rtt = tcb->rtt_timer.start - tcb->rtt_timer.count; + if(rtt == 0) + rtt = 1; /* otherwise all close systems will rexmit in 0 time */ + rtt *= MSPTICK; + if(tcb->srtt == 0) { + tcb->srtt = rtt << LOGAGAIN; + tcb->mdev = rtt << LOGDGAIN; + } else { + delta = rtt - (tcb->srtt>>LOGAGAIN); + tcb->srtt += delta; + if(tcb->srtt <= 0) + tcb->srtt = 1; + + delta = abs(delta) - (tcb->mdev>>LOGDGAIN); + tcb->mdev += delta; + if(tcb->mdev <= 0) + tcb->mdev = 1; + } + tcpsettimer(tcb); + } + } + +done: + if(qdiscard(s->wq, acked) < acked) + tcb->flgcnt--; + tcb->snd.una = seg->ack; + + /* newreno fast recovery */ + if(tcb->snd.recovery) + tcprxmit(s); + + /*tcplimitmaxburst(tcb);*/ + + if(seq_gt(seg->ack, tcb->snd.urg)) + tcb->snd.urg = seg->ack; + + if(tcb->snd.una != tcb->snd.nxt) + tcpgo(tpriv, &tcb->timer); + else + tcphalt(tpriv, &tcb->timer); + + if(seq_lt(tcb->snd.ptr, tcb->snd.una)) + tcb->snd.ptr = tcb->snd.una; + + if(!tcb->snd.recovery) + tcb->flags &= ~RETRAN; + tcb->backoff = 0; + tcb->backedoff = 0; +} + +static void +tcpiput(Proto *tcp, Ipifc*, Block *bp) +{ + Tcp seg; + Tcp4hdr *h4; + Tcp6hdr *h6; + int hdrlen; + Tcpctl *tcb; + ushort length, csum; + uchar source[IPaddrlen], dest[IPaddrlen]; + Conv *s; + Fs *f; + Tcppriv *tpriv; + uchar version; + + f = tcp->f; + tpriv = tcp->priv; + + tpriv->stats[InSegs]++; + + h4 = (Tcp4hdr*)(bp->rp); + h6 = (Tcp6hdr*)(bp->rp); + + if((h4->vihl&0xF0)==IP_VER4) { + version = V4; + length = nhgets(h4->length); + v4tov6(dest, h4->tcpdst); + v4tov6(source, h4->tcpsrc); + + h4->Unused = 0; + hnputs(h4->tcplen, length-TCP4_PKT); + if(!(bp->flag & Btcpck) && (h4->tcpcksum[0] || h4->tcpcksum[1]) && + ptclcsum(bp, TCP4_IPLEN, length-TCP4_IPLEN)) { + tpriv->stats[CsumErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "bad tcp proto cksum\n"); + freeblist(bp); + return; + } + + hdrlen = ntohtcp4(&seg, &bp); + if(hdrlen < 0){ + tpriv->stats[HlenErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "bad tcp hdr len\n"); + return; + } + + /* trim the packet to the size claimed by the datagram */ + length -= hdrlen+TCP4_PKT; + bp = trimblock(bp, hdrlen+TCP4_PKT, length); + if(bp == nil){ + tpriv->stats[LenErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "tcp len < 0 after trim\n"); + return; + } + } + else { + int ttl = h6->ttl; + int proto = h6->proto; + + version = V6; + length = nhgets(h6->ploadlen); + ipmove(dest, h6->tcpdst); + ipmove(source, h6->tcpsrc); + + h6->ploadlen[0] = h6->ploadlen[1] = h6->proto = 0; + h6->ttl = proto; + hnputl(h6->vcf, length); + if((h6->tcpcksum[0] || h6->tcpcksum[1]) && + (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) { + tpriv->stats[CsumErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, + "bad tcpv6 proto cksum: got %#ux, computed %#ux\n", + h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum); + freeblist(bp); + return; + } + h6->ttl = ttl; + h6->proto = proto; + hnputs(h6->ploadlen, length); + + hdrlen = ntohtcp6(&seg, &bp); + if(hdrlen < 0){ + tpriv->stats[HlenErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "bad tcpv6 hdr len\n"); + return; + } + + /* trim the packet to the size claimed by the datagram */ + length -= hdrlen; + bp = trimblock(bp, hdrlen+TCP6_PKT, length); + if(bp == nil){ + tpriv->stats[LenErrs]++; + tpriv->stats[InErrs]++; + netlog(f, Logtcp, "tcpv6 len < 0 after trim\n"); + return; + } + } + + /* lock protocol while searching for a conversation */ + qlock(tcp); + + /* Look for a matching conversation */ + s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest); + if(s == nil){ + netlog(f, Logtcp, "iphtlook(src %I!%d, dst %I!%d) failed\n", + source, seg.source, dest, seg.dest); +reset: + qunlock(tcp); + sndrst(tcp, source, dest, length, &seg, version, "no conversation"); + freeblist(bp); + return; + } + + /* if it's a listener, look for the right flags and get a new conv */ + tcb = (Tcpctl*)s->ptcl; + if(tcb->state == Listen){ + if(seg.flags & RST){ + limborst(s, &seg, source, dest, version); + qunlock(tcp); + freeblist(bp); + return; + } + + /* if this is a new SYN, put the call into limbo */ + if((seg.flags & SYN) && (seg.flags & ACK) == 0){ + limbo(s, source, dest, &seg, version); + qunlock(tcp); + freeblist(bp); + return; + } + + /* + * if there's a matching call in limbo, tcpincoming will + * return it in state Syn_received + */ + s = tcpincoming(s, &seg, source, dest, version); + if(s == nil) + goto reset; + } + + /* The rest of the input state machine is run with the control block + * locked and implements the state machine directly out of the RFC. + * Out-of-band data is ignored - it was always a bad idea. + */ + tcb = (Tcpctl*)s->ptcl; + if(waserror()){ + qunlock(s); + nexterror(); + } + qlock(s); + qunlock(tcp); + + /* fix up window */ + seg.wnd <<= tcb->rcv.scale; + + /* every input packet in puts off the keep alive time out */ + tcpsetkacounter(tcb); + + switch(tcb->state) { + case Closed: + sndrst(tcp, source, dest, length, &seg, version, "sending to Closed"); + goto raise; + case Syn_sent: + if(seg.flags & ACK) { + if(!seq_within(seg.ack, tcb->iss+1, tcb->snd.nxt)) { + sndrst(tcp, source, dest, length, &seg, version, + "bad seq in Syn_sent"); + goto raise; + } + } + if(seg.flags & RST) { + if(seg.flags & ACK) + localclose(s, Econrefused); + goto raise; + } + + if(seg.flags & SYN) { + procsyn(s, &seg); + if(seg.flags & ACK){ + update(s, &seg); + tcpsynackrtt(s); + tcpsetstate(s, Established); + tcpsetscale(s, tcb, seg.ws, tcb->scale); + } + else { + tcb->time = NOW; + tcpsetstate(s, Syn_received); /* DLP - shouldn't this be a reset? */ + } + + if(length != 0 || (seg.flags & FIN)) + break; + + freeblist(bp); + goto output; + } + else + freeblist(bp); + + qunlock(s); + poperror(); + return; + case Syn_received: + /* doesn't matter if it's the correct ack, we're just trying to set timing */ + if(seg.flags & ACK) + tcpsynackrtt(s); + break; + } + + /* + * One DOS attack is to open connections to us and then forget about them, + * thereby tying up a conv at no long term cost to the attacker. + * This is an attempt to defeat these stateless DOS attacks. See + * corresponding code in tcpsendka(). + */ + if(tcb->state != Syn_received && (seg.flags & RST) == 0){ + if(tcpporthogdefense + && seq_within(seg.ack, tcb->snd.una-(1<<31), tcb->snd.una-(1<<29))){ + print("stateless hog %I.%d->%I.%d f %ux %lux - %lux - %lux\n", + source, seg.source, dest, seg.dest, seg.flags, + tcb->snd.una-(1<<31), seg.ack, tcb->snd.una-(1<<29)); + localclose(s, "stateless hog"); + } + } + + /* Cut the data to fit the receive window */ + if(tcptrim(tcb, &seg, &bp, &length) == -1) { + netlog(f, Logtcp, "tcptrim, not accept, seq %lud-%lud win %lud-%lud from %I\n", + seg.seq, seg.seq + length - 1, + tcb->rcv.nxt, tcb->rcv.nxt + tcb->rcv.wnd-1, s->raddr); + update(s, &seg); + if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) { + tcphalt(tpriv, &tcb->rtt_timer); + tcphalt(tpriv, &tcb->acktimer); + tcphalt(tpriv, &tcb->katimer); + tcpsetstate(s, Time_wait); + tcb->timer.start = MSL2*(1000 / MSPTICK); + tcpgo(tpriv, &tcb->timer); + } + if(!(seg.flags & RST)) { + tcb->flags |= FORCE; + goto output; + } + qunlock(s); + poperror(); + return; + } + + /* Cannot accept so answer with a rst */ + if(length && tcb->state == Closed) { + sndrst(tcp, source, dest, length, &seg, version, "sending to Closed"); + goto raise; + } + + /* The segment is beyond the current receive pointer so + * queue the data in the resequence queue + */ + if(seg.seq != tcb->rcv.nxt) + if(length != 0 || (seg.flags & (SYN|FIN))) { + update(s, &seg); + if(addreseq(f, tcb, tpriv, &seg, bp, length) < 0) + print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport); + tcb->flags |= FORCE; /* force duplicate ack */ + goto output; + } + + /* + * keep looping till we've processed this packet plus any + * adjacent packets in the resequence queue + */ + for(;;) { + if(seg.flags & RST) { + if(tcb->state == Established) { + tpriv->stats[EstabResets]++; + if(tcb->rcv.nxt != seg.seq) + print("out of order RST rcvd: %I.%d -> %I.%d, rcv.nxt %lux seq %lux\n", s->raddr, s->rport, s->laddr, s->lport, tcb->rcv.nxt, seg.seq); + } + localclose(s, Econrefused); + goto raise; + } + + if((seg.flags&ACK) == 0) + goto raise; + + switch(tcb->state) { + case Syn_received: + if(!seq_within(seg.ack, tcb->snd.una+1, tcb->snd.nxt)){ + sndrst(tcp, source, dest, length, &seg, version, + "bad seq in Syn_received"); + goto raise; + } + update(s, &seg); + tcpsetstate(s, Established); + case Established: + case Close_wait: + update(s, &seg); + break; + case Finwait1: + update(s, &seg); + if(qlen(s->wq)+tcb->flgcnt == 0){ + tcphalt(tpriv, &tcb->rtt_timer); + tcphalt(tpriv, &tcb->acktimer); + tcpsetkacounter(tcb); + tcb->time = NOW; + tcpsetstate(s, Finwait2); + tcb->katimer.start = MSL2 * (1000 / MSPTICK); + tcpgo(tpriv, &tcb->katimer); + } + break; + case Finwait2: + update(s, &seg); + break; + case Closing: + update(s, &seg); + if(qlen(s->wq)+tcb->flgcnt == 0) { + tcphalt(tpriv, &tcb->rtt_timer); + tcphalt(tpriv, &tcb->acktimer); + tcphalt(tpriv, &tcb->katimer); + tcpsetstate(s, Time_wait); + tcb->timer.start = MSL2*(1000 / MSPTICK); + tcpgo(tpriv, &tcb->timer); + } + break; + case Last_ack: + update(s, &seg); + if(qlen(s->wq)+tcb->flgcnt == 0) { + localclose(s, nil); + goto raise; + } + case Time_wait: + tcb->flags |= FORCE; + if(tcb->timer.state != TcptimerON) + tcpgo(tpriv, &tcb->timer); + } + + if((seg.flags&URG) && seg.urg) { + if(seq_gt(seg.urg + seg.seq, tcb->rcv.urg)) { + tcb->rcv.urg = seg.urg + seg.seq; + pullblock(&bp, seg.urg); + } + } + else + if(seq_gt(tcb->rcv.nxt, tcb->rcv.urg)) + tcb->rcv.urg = tcb->rcv.nxt; + + if(length == 0) { + if(bp != nil) + freeblist(bp); + } + else { + switch(tcb->state){ + default: + /* Ignore segment text */ + if(bp != nil) + freeblist(bp); + break; + + case Syn_received: + case Established: + case Finwait1: + /* If we still have some data place on + * receive queue + */ + if(bp) { + bp = packblock(bp); + if(bp == nil) + panic("tcp packblock"); + qpassnolim(s->rq, bp); + bp = nil; + + /* + * Force an ack every 2 data messages. This is + * a hack for rob to make his home system run + * faster. + * + * this also keeps the standard TCP congestion + * control working since it needs an ack every + * 2 max segs worth. This is not quite that, + * but under a real stream is equivalent since + * every packet has a max seg in it. + */ + if(++(tcb->rcv.una) >= 2) + tcb->flags |= FORCE; + } + tcb->rcv.nxt += length; + + /* + * turn on the acktimer if there's something + * to ack + */ + if(tcb->acktimer.state != TcptimerON) + tcpgo(tpriv, &tcb->acktimer); + + break; + case Finwait2: + /* no process to read the data, send a reset */ + if(bp != nil) + freeblist(bp); + sndrst(tcp, source, dest, length, &seg, version, + "send to Finwait2"); + qunlock(s); + poperror(); + return; + } + } + + if(seg.flags & FIN) { + tcb->flags |= FORCE; + + switch(tcb->state) { + case Syn_received: + case Established: + tcb->rcv.nxt++; + tcpsetstate(s, Close_wait); + break; + case Finwait1: + tcb->rcv.nxt++; + if(qlen(s->wq)+tcb->flgcnt == 0) { + tcphalt(tpriv, &tcb->rtt_timer); + tcphalt(tpriv, &tcb->acktimer); + tcphalt(tpriv, &tcb->katimer); + tcpsetstate(s, Time_wait); + tcb->timer.start = MSL2*(1000/MSPTICK); + tcpgo(tpriv, &tcb->timer); + } + else + tcpsetstate(s, Closing); + break; + case Finwait2: + tcb->rcv.nxt++; + tcphalt(tpriv, &tcb->rtt_timer); + tcphalt(tpriv, &tcb->acktimer); + tcphalt(tpriv, &tcb->katimer); + tcpsetstate(s, Time_wait); + tcb->timer.start = MSL2 * (1000/MSPTICK); + tcpgo(tpriv, &tcb->timer); + break; + case Close_wait: + case Closing: + case Last_ack: + break; + case Time_wait: + tcpgo(tpriv, &tcb->timer); + break; + } + } + + /* + * get next adjacent segment from the resequence queue. + * dump/trim any overlapping segments + */ + for(;;) { + if(tcb->reseq == nil) + goto output; + + if(seq_ge(tcb->rcv.nxt, tcb->reseq->seg.seq) == 0) + goto output; + + getreseq(tcb, &seg, &bp, &length); + + if(tcptrim(tcb, &seg, &bp, &length) == 0) + break; + } + } +output: + tcpoutput(s); + qunlock(s); + poperror(); + return; +raise: + qunlock(s); + poperror(); + freeblist(bp); + tcpkick(s); +} + +/* + * always enters and exits with the s locked. We drop + * the lock to ipoput the packet so some care has to be + * taken by callers. + */ +static void +tcpoutput(Conv *s) +{ + Tcp seg; + uint msgs; + Tcpctl *tcb; + Block *hbp, *bp; + int sndcnt; + ulong ssize, dsize, sent; + Fs *f; + Tcppriv *tpriv; + uchar version; + + f = s->p->f; + tpriv = s->p->priv; + version = s->ipversion; + + for(msgs = 0; msgs < 100; msgs++) { + tcb = (Tcpctl*)s->ptcl; + + switch(tcb->state) { + case Listen: + case Closed: + case Finwait2: + return; + } + + /* Don't send anything else until our SYN has been acked */ + if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0) + break; + + /* force an ack when a window has opened up */ + tcprcvwin(s); + if(tcb->rcv.blocked && tcb->rcv.wnd > 0){ + tcb->rcv.blocked = 0; + tcb->flags |= FORCE; + } + + sndcnt = qlen(s->wq)+tcb->flgcnt; + sent = tcb->snd.ptr - tcb->snd.una; + ssize = sndcnt; + if(tcb->snd.wnd == 0){ + /* zero window probe */ + if(sent > 0) + if(!(tcb->flags & FORCE)) + break; /* already probing, rto re-probes */ + if(ssize < sent) + ssize = 0; + else{ + ssize -= sent; + if(ssize > 0) + ssize = 1; + } + } else { + /* calculate usable segment size */ + if(ssize > tcb->cwind) + ssize = tcb->cwind; + if(ssize > tcb->snd.wnd) + ssize = tcb->snd.wnd; + + if(ssize < sent) + ssize = 0; + else { + ssize -= sent; + if(ssize > tcb->mss) + ssize = tcb->mss; + } + } + + dsize = ssize; + seg.urg = 0; + + if(!(tcb->flags & FORCE)){ + if(ssize == 0) + break; + if(ssize < tcb->mss) + if(tcb->snd.nxt == tcb->snd.ptr) + if(sent > TCPREXMTTHRESH*tcb->mss) + break; + } + + tcb->flags &= ~FORCE; + + /* By default we will generate an ack */ + tcphalt(tpriv, &tcb->acktimer); + tcb->rcv.una = 0; + seg.source = s->lport; + seg.dest = s->rport; + seg.flags = ACK; + seg.mss = 0; + seg.ws = 0; + switch(tcb->state){ + case Syn_sent: + seg.flags = 0; + if(tcb->snd.ptr == tcb->iss){ + seg.flags |= SYN; + dsize--; + seg.mss = tcb->mss; + seg.ws = tcb->scale; + } + break; + case Syn_received: + /* + * don't send any data with a SYN/ACK packet + * because Linux rejects the packet in its + * attempt to solve the SYN attack problem + */ + if(tcb->snd.ptr == tcb->iss){ + seg.flags |= SYN; + dsize = 0; + ssize = 1; + seg.mss = tcb->mss; + seg.ws = tcb->scale; + } + break; + } + seg.seq = tcb->snd.ptr; + seg.ack = tcb->rcv.nxt; + seg.wnd = tcb->rcv.wnd; + + /* Pull out data to send */ + bp = nil; + if(dsize != 0) { + bp = qcopy(s->wq, dsize, sent); + if(BLEN(bp) != dsize) { + seg.flags |= FIN; + dsize--; + } + } + + if(sent+dsize == sndcnt && dsize) + seg.flags |= PSH; + + tcb->snd.ptr += ssize; + + /* Pull up the send pointer so we can accept acks + * for this window + */ + if(seq_gt(tcb->snd.ptr,tcb->snd.nxt)) + tcb->snd.nxt = tcb->snd.ptr; + + /* Build header, link data and compute cksum */ + switch(version){ + case V4: + tcb->protohdr.tcp4hdr.vihl = IP_VER4; + hbp = htontcp4(&seg, bp, &tcb->protohdr.tcp4hdr, tcb); + if(hbp == nil) { + freeblist(bp); + return; + } + break; + case V6: + tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6; + hbp = htontcp6(&seg, bp, &tcb->protohdr.tcp6hdr, tcb); + if(hbp == nil) { + freeblist(bp); + return; + } + break; + default: + hbp = nil; /* to suppress a warning */ + panic("tcpoutput: version %d", version); + } + + /* Start the transmission timers if there is new data and we + * expect acknowledges + */ + if(ssize != 0){ + if(tcb->timer.state != TcptimerON) + tcpgo(tpriv, &tcb->timer); + + /* If round trip timer isn't running, start it. + * measure the longest packet only in case the + * transmission time dominates RTT + */ + if(tcb->rtt_timer.state != TcptimerON) + if(ssize == tcb->mss) { + tcpgo(tpriv, &tcb->rtt_timer); + tcb->rttseq = tcb->snd.ptr; + } + } + + tpriv->stats[OutSegs]++; + + /* put off the next keep alive */ + tcpgo(tpriv, &tcb->katimer); + + switch(version){ + case V4: + if(ipoput4(f, hbp, 0, s->ttl, s->tos, s) < 0){ + /* a negative return means no route */ + localclose(s, "no route"); + } + break; + case V6: + if(ipoput6(f, hbp, 0, s->ttl, s->tos, s) < 0){ + /* a negative return means no route */ + localclose(s, "no route"); + } + break; + default: + panic("tcpoutput2: version %d", version); + } + if((msgs%4) == 1){ + qunlock(s); + // sched(); + qlock(s); + } + } +} + +/* + * the BSD convention (hack?) for keep alives. resend last uchar acked. + */ +static void +tcpsendka(Conv *s) +{ + Tcp seg; + Tcpctl *tcb; + Block *hbp,*dbp; + + tcb = (Tcpctl*)s->ptcl; + + dbp = nil; + memset(&seg, 0, sizeof seg); + seg.urg = 0; + seg.source = s->lport; + seg.dest = s->rport; + seg.flags = ACK|PSH; + seg.mss = 0; + seg.ws = 0; + if(tcpporthogdefense) + seg.seq = tcb->snd.una-(1<<30)-nrand(1<<20); + else + seg.seq = tcb->snd.una-1; + seg.ack = tcb->rcv.nxt; + tcb->rcv.una = 0; + seg.wnd = tcb->rcv.wnd; + if(tcb->state == Finwait2){ + seg.flags |= FIN; + } else { + dbp = allocb(1); + dbp->wp++; + } + + if(isv4(s->raddr)) { + /* Build header, link data and compute cksum */ + tcb->protohdr.tcp4hdr.vihl = IP_VER4; + hbp = htontcp4(&seg, dbp, &tcb->protohdr.tcp4hdr, tcb); + if(hbp == nil) { + freeblist(dbp); + return; + } + ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s); + } + else { + /* Build header, link data and compute cksum */ + tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6; + hbp = htontcp6(&seg, dbp, &tcb->protohdr.tcp6hdr, tcb); + if(hbp == nil) { + freeblist(dbp); + return; + } + ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s); + } +} + +/* + * set connection to time out after 12 minutes + */ +static void +tcpsetkacounter(Tcpctl *tcb) +{ + tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK); + if(tcb->kacounter < 3) + tcb->kacounter = 3; +} + +/* + * if we've timed out, close the connection + * otherwise, send a keepalive and restart the timer + */ +static void +tcpkeepalive(void *v) +{ + Tcpctl *tcb; + Conv *s; + + s = v; + tcb = (Tcpctl*)s->ptcl; + if(waserror()){ + qunlock(s); + nexterror(); + } + qlock(s); + if(tcb->state != Closed){ + if(--(tcb->kacounter) <= 0) { + localclose(s, Etimedout); + } else { + tcpsendka(s); + tcpgo(s->p->priv, &tcb->katimer); + } + } + qunlock(s); + poperror(); +} + +/* + * start keepalive timer + */ +static char* +tcpstartka(Conv *s, char **f, int n) +{ + Tcpctl *tcb; + int x; + + tcb = (Tcpctl*)s->ptcl; + if(tcb->state != Established) + return "connection must be in Establised state"; + if(n > 1){ + x = atoi(f[1]); + if(x >= MSPTICK) + tcb->katimer.start = x/MSPTICK; + } + tcpsetkacounter(tcb); + tcpgo(s->p->priv, &tcb->katimer); + + return nil; +} + +/* + * turn checksums on/off + */ +static char* +tcpsetchecksum(Conv *s, char **f, int) +{ + Tcpctl *tcb; + + tcb = (Tcpctl*)s->ptcl; + tcb->nochecksum = !atoi(f[1]); + + return nil; +} + +/* + * retransmit (at most) one segment at snd.una. + * preserve cwind & snd.ptr + */ +static void +tcprxmit(Conv *s) +{ + Tcpctl *tcb; + Tcppriv *tpriv; + ulong tcwind, tptr; + + tcb = (Tcpctl*)s->ptcl; + tcb->flags |= RETRAN|FORCE; + + tptr = tcb->snd.ptr; + tcwind = tcb->cwind; + tcb->snd.ptr = tcb->snd.una; + tcb->cwind = tcb->mss; + tcpoutput(s); + tcb->cwind = tcwind; + tcb->snd.ptr = tptr; + + tpriv = s->p->priv; + tpriv->stats[RetransSegs]++; +} + +/* + * todo: RFC 4138 F-RTO + */ +static void +tcptimeout(void *arg) +{ + Conv *s; + Tcpctl *tcb; + int maxback; + Tcppriv *tpriv; + + s = (Conv*)arg; + tpriv = s->p->priv; + tcb = (Tcpctl*)s->ptcl; + + if(waserror()){ + qunlock(s); + nexterror(); + } + qlock(s); + switch(tcb->state){ + default: + tcb->backoff++; + if(tcb->state == Syn_sent) + maxback = MAXBACKMS/2; + else + maxback = MAXBACKMS; + tcb->backedoff += tcb->timer.start * MSPTICK; + if(tcb->backedoff >= maxback) { + localclose(s, Etimedout); + break; + } + netlog(s->p->f, Logtcprxmt, "timeout rexmit %#lux %d/%lud\n", + tcb->snd.una, tcb->timer.start, NOW); + tcpsettimer(tcb); + tcpcongestion(tcb); + tcprxmit(s); + tcb->snd.ptr = tcb->snd.una; + tcb->cwind = tcb->mss; + tpriv->stats[RetransTimeouts]++; + tcb->snd.dupacks = 0; + tcb->snd.recovery = 0; + break; + case Time_wait: + localclose(s, nil); + break; + case Closed: + break; + } + qunlock(s); + poperror(); +} + +static int +inwindow(Tcpctl *tcb, int seq) +{ + return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1); +} + +/* + * set up state for a received SYN (or SYN ACK) packet + */ +static void +procsyn(Conv *s, Tcp *seg) +{ + Tcpctl *tcb; + Tcppriv *tpriv; + + tcb = (Tcpctl*)s->ptcl; + tcb->flags |= FORCE; + + tcb->rcv.nxt = seg->seq + 1; + tcb->rcv.urg = tcb->rcv.nxt; + tcb->irs = seg->seq; + + /* our sending max segment size cannot be bigger than what he asked for */ + if(seg->mss != 0 && seg->mss < tcb->mss) { + tcb->mss = seg->mss; + tpriv = s->p->priv; + tpriv->stats[Mss] = tcb->mss; + } + + tcb->snd.wnd = seg->wnd; + + /* RFC 3390 initial window */ + if(tcb->mss < 1095) + tcb->cwind = 4*tcb->mss; + else if(tcb->mss < 2190) + tcb->cwind = 4380; + else + tcb->cwind = 2*tcb->mss; +} + +static int +dumpreseq(Tcpctl *tcb) +{ + Reseq *r, *next; + + for(r = tcb->reseq; r != nil; r = next){ + next = r->next; + freeblist(r->bp); + free(r); + } + tcb->reseq = nil; + tcb->nreseq = 0; + tcb->reseqlen = 0; + return -1; +} + +static void +logreseq(Fs *f, Reseq *r) +{ + for(; r != nil; r = r->next){ + netlog(f, Logtcp, "%#lud %ud %#lud %#ux\n", r->seg.seq, r->seg.len, + r->seg.ack, r->seg.flags); + } +} + +static int +addreseq(Fs *f, Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length) +{ + Reseq *rp, **rr; + int qmax; + + rp = malloc(sizeof(Reseq)); + if(rp == nil){ + freeblist(bp); /* bp always consumed by addreseq */ + return 0; + } + + rp->seg = *seg; + rp->bp = bp; + rp->length = length; + + tcb->reseqlen += length; + tcb->nreseq++; + + /* Place on reassembly list sorting by starting seq number */ + for(rr = &tcb->reseq;; rr = &(*rr)->next) + if(*rr == nil || seq_lt(seg->seq, (*rr)->seg.seq)){ + rp->next = *rr; + *rr = rp; + tpriv->stats[Resequenced]++; + if(rp->next != nil) + tpriv->stats[OutOfOrder]++; + break; + } + + qmax = QMAX<qscale; + if(tcb->reseqlen > qmax){ + netlog(f, Logtcp, "tcp: reseq: queue > window: %d > %d; %d packets\n", tcb->reseqlen, qmax, tcb->nreseq); + logreseq(f, tcb->reseq); + tpriv->stats[ReseqBytelim]++; + return dumpreseq(tcb); + } + qmax = 15*(tcb->qscale + 1); + if(tcb->nreseq > qmax){ + netlog(f, Logtcp, "resequence queue > packets: %d %d; %d bytes\n", tcb->nreseq, qmax, tcb->reseqlen); + logreseq(f, tcb->reseq); + tpriv->stats[ReseqPktlim]++; + return dumpreseq(tcb); + } + + return 0; +} + +static void +getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length) +{ + Reseq *rp; + + rp = tcb->reseq; + if(rp == nil) + return; + + tcb->reseq = rp->next; + + *seg = rp->seg; + *bp = rp->bp; + *length = rp->length; + + tcb->nreseq--; + tcb->reseqlen -= rp->length; + + free(rp); +} + +static int +tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length) +{ + ushort len; + uchar accept; + int dupcnt, excess; + + accept = 0; + len = *length; + if(seg->flags & SYN) + len++; + if(seg->flags & FIN) + len++; + + if(tcb->rcv.wnd == 0) { + if(len == 0 && seg->seq == tcb->rcv.nxt) + return 0; + } + else { + /* Some part of the segment should be in the window */ + if(inwindow(tcb,seg->seq)) + accept++; + else + if(len != 0) { + if(inwindow(tcb, seg->seq+len-1) || + seq_within(tcb->rcv.nxt, seg->seq,seg->seq+len-1)) + accept++; + } + } + if(!accept) { + freeblist(*bp); + return -1; + } + dupcnt = tcb->rcv.nxt - seg->seq; + if(dupcnt > 0){ + tcb->rerecv += dupcnt; + if(seg->flags & SYN){ + seg->flags &= ~SYN; + seg->seq++; + + if(seg->urg > 1) + seg->urg--; + else + seg->flags &= ~URG; + dupcnt--; + } + if(dupcnt > 0){ + pullblock(bp, (ushort)dupcnt); + seg->seq += dupcnt; + *length -= dupcnt; + + if(seg->urg > dupcnt) + seg->urg -= dupcnt; + else { + seg->flags &= ~URG; + seg->urg = 0; + } + } + } + excess = seg->seq + *length - (tcb->rcv.nxt + tcb->rcv.wnd); + if(excess > 0) { + tcb->rerecv += excess; + *length -= excess; + *bp = trimblock(*bp, 0, *length); + if(*bp == nil) + panic("presotto is a boofhead"); + seg->flags &= ~FIN; + } + return 0; +} + +static void +tcpadvise(Proto *tcp, Block *bp, char *msg) +{ + Tcp4hdr *h4; + Tcp6hdr *h6; + Tcpctl *tcb; + uchar source[IPaddrlen]; + uchar dest[IPaddrlen]; + ushort psource, pdest; + Conv *s, **p; + + h4 = (Tcp4hdr*)(bp->rp); + h6 = (Tcp6hdr*)(bp->rp); + + if((h4->vihl&0xF0)==IP_VER4) { + v4tov6(dest, h4->tcpdst); + v4tov6(source, h4->tcpsrc); + psource = nhgets(h4->tcpsport); + pdest = nhgets(h4->tcpdport); + } + else { + ipmove(dest, h6->tcpdst); + ipmove(source, h6->tcpsrc); + psource = nhgets(h6->tcpsport); + pdest = nhgets(h6->tcpdport); + } + + /* Look for a connection */ + qlock(tcp); + for(p = tcp->conv; *p; p++) { + s = *p; + tcb = (Tcpctl*)s->ptcl; + if(s->rport == pdest) + if(s->lport == psource) + if(tcb->state != Closed) + if(ipcmp(s->raddr, dest) == 0) + if(ipcmp(s->laddr, source) == 0){ + qlock(s); + qunlock(tcp); + switch(tcb->state){ + case Syn_sent: + localclose(s, msg); + break; + } + qunlock(s); + freeblist(bp); + return; + } + } + qunlock(tcp); + freeblist(bp); +} + +static char* +tcpporthogdefensectl(char *val) +{ + if(strcmp(val, "on") == 0) + tcpporthogdefense = 1; + else if(strcmp(val, "off") == 0) + tcpporthogdefense = 0; + else + return "unknown value for tcpporthogdefense"; + return nil; +} + +/* called with c qlocked */ +static char* +tcpctl(Conv* c, char** f, int n) +{ + if(n == 1 && strcmp(f[0], "hangup") == 0) + return tcphangup(c); + if(n >= 1 && strcmp(f[0], "keepalive") == 0) + return tcpstartka(c, f, n); + if(n >= 1 && strcmp(f[0], "checksum") == 0) + return tcpsetchecksum(c, f, n); + if(n >= 1 && strcmp(f[0], "tcpporthogdefense") == 0) + return tcpporthogdefensectl(f[1]); + return "unknown control request"; +} + +static int +tcpstats(Proto *tcp, char *buf, int len) +{ + Tcppriv *priv; + char *p, *e; + int i; + + priv = tcp->priv; + p = buf; + e = p+len; + for(i = 0; i < Nstats; i++) + p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]); + return p - buf; +} + +/* + * garbage collect any stale conversations: + * - SYN received but no SYN-ACK after 5 seconds (could be the SYN attack) + * - Finwait2 after 5 minutes + * + * this is called whenever we run out of channels. Both checks are + * of questionable validity so we try to use them only when we're + * up against the wall. + */ +static int +tcpgc(Proto *tcp) +{ + Conv *c, **pp, **ep; + int n; + Tcpctl *tcb; + + + n = 0; + ep = &tcp->conv[tcp->nc]; + for(pp = tcp->conv; pp < ep; pp++) { + c = *pp; + if(c == nil) + break; + if(!canqlock(c)) + continue; + tcb = (Tcpctl*)c->ptcl; + switch(tcb->state){ + case Syn_received: + if(NOW - tcb->time > 5000){ + localclose(c, Etimedout); + n++; + } + break; + case Finwait2: + if(NOW - tcb->time > 5*60*1000){ + localclose(c, Etimedout); + n++; + } + break; + } + qunlock(c); + } + return n; +} + +static void +tcpsettimer(Tcpctl *tcb) +{ + int x; + + /* round trip dependency */ + x = backoff(tcb->backoff) * + (tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK; + + /* bounded twixt 1/2 and 64 seconds */ + if(x < 500/MSPTICK) + x = 500/MSPTICK; + else if(x > (64000/MSPTICK)) + x = 64000/MSPTICK; + tcb->timer.start = x; +} + +void +tcpinit(Fs *fs) +{ + Proto *tcp; + Tcppriv *tpriv; + + tcp = smalloc(sizeof(Proto)); + tpriv = tcp->priv = smalloc(sizeof(Tcppriv)); + tcp->name = "tcp"; + tcp->connect = tcpconnect; + tcp->announce = tcpannounce; + tcp->ctl = tcpctl; + tcp->state = tcpstate; + tcp->create = tcpcreate; + tcp->close = tcpclose; + tcp->rcv = tcpiput; + tcp->advise = tcpadvise; + tcp->stats = tcpstats; + tcp->inuse = tcpinuse; + tcp->gc = tcpgc; + tcp->ipproto = IP_TCPPROTO; + tcp->nc = scalednconv(); + tcp->ptclsize = sizeof(Tcpctl); + tpriv->stats[MaxConn] = tcp->nc; + + Fsproto(fs, tcp); +} + +enum { + Maxqscale = 3, /* ½ mb */ +}; + +static void +tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale) +{ + /* + * guess at reasonable queue sizes. there's no current way + * to know how many nic receive buffers we can safely tie up in the + * tcp stack, and we don't adjust our queues to maximize throughput + * and minimize bufferbloat. n.b. the offer (rcvscale) needs to be + * respected, but we still control our own buffer commentment by + * keeping a seperate qscale. + */ + tcb->rcv.scale = rcvscale & 0xff; + tcb->snd.scale = sndscale & 0xff; + tcb->qscale = rcvscale; + if(rcvscale > Maxqscale) + tcb->qscale = Maxqscale; + + if(rcvscale != tcb->rcv.scale) + netlog(s->p->f, Logtcp, "tcpsetscale: window %lud qlen %d >> window %ud lport %d\n", + tcb->window, qlen(s->rq), QMAX<qscale, s->lport); + tcb->window = QMAX<qscale; + tcb->ssthresh = tcb->window; + qsetlimit(s->rq, QMAX<qscale); + tcprcvwin(s); +} diff -Nru 0/sys/src/nix/ip/udp.c 4/sys/src/nix/ip/udp.c --- 0/sys/src/nix/ip/udp.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/ip/udp.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,619 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ip.h" +#include "ipv6.h" + + +#define DPRINT if(0)print + +enum +{ + UDP_UDPHDR_SZ = 8, + + UDP4_PHDR_OFF = 8, + UDP4_PHDR_SZ = 12, + UDP4_IPHDR_SZ = 20, + UDP6_IPHDR_SZ = 40, + UDP6_PHDR_SZ = 40, + UDP6_PHDR_OFF = 0, + + IP_UDPPROTO = 17, + UDP_USEAD7 = 52, + + Udprxms = 200, + Udptickms = 100, + Udpmaxxmit = 10, +}; + +typedef struct Udp4hdr Udp4hdr; +struct Udp4hdr +{ + /* ip header */ + uchar vihl; /* Version and header length */ + uchar tos; /* Type of service */ + uchar length[2]; /* packet length */ + uchar id[2]; /* Identification */ + uchar frag[2]; /* Fragment information */ + uchar Unused; + uchar udpproto; /* Protocol */ + uchar udpplen[2]; /* Header plus data length */ + uchar udpsrc[IPv4addrlen]; /* Ip source */ + uchar udpdst[IPv4addrlen]; /* Ip destination */ + + /* udp header */ + uchar udpsport[2]; /* Source port */ + uchar udpdport[2]; /* Destination port */ + uchar udplen[2]; /* data length */ + uchar udpcksum[2]; /* Checksum */ +}; + +typedef struct Udp6hdr Udp6hdr; +struct Udp6hdr { + uchar viclfl[4]; + uchar len[2]; + uchar nextheader; + uchar hoplimit; + uchar udpsrc[IPaddrlen]; + uchar udpdst[IPaddrlen]; + + /* udp header */ + uchar udpsport[2]; /* Source port */ + uchar udpdport[2]; /* Destination port */ + uchar udplen[2]; /* data length */ + uchar udpcksum[2]; /* Checksum */ +}; + +/* MIB II counters */ +typedef struct Udpstats Udpstats; +struct Udpstats +{ + uvlong udpInDatagrams; + uvlong udpNoPorts; + uvlong udpInErrors; + uvlong udpOutDatagrams; +}; + +typedef struct Udppriv Udppriv; +struct Udppriv +{ + Ipht ht; + + /* MIB counters */ + Udpstats ustats; + + /* non-MIB stats */ + uvlong csumerr; /* checksum errors */ + uvlong lenerr; /* short packet */ +}; + +void (*etherprofiler)(char *name, int qlen); +void udpkick(void *x, Block *bp); + +/* + * protocol specific part of Conv + */ +typedef struct Udpcb Udpcb; +struct Udpcb +{ + QLock; + uchar headers; +}; + +static char* +udpconnect(Conv *c, char **argv, int argc) +{ + char *e; + Udppriv *upriv; + + upriv = c->p->priv; + e = Fsstdconnect(c, argv, argc); + Fsconnected(c, e); + if(e != nil) + return e; + + iphtadd(&upriv->ht, c); + return nil; +} + + +static int +udpstate(Conv *c, char *state, int n) +{ + return snprint(state, n, "%s qin %d qout %d\n", + c->inuse ? "Open" : "Closed", + c->rq ? qlen(c->rq) : 0, + c->wq ? qlen(c->wq) : 0 + ); +} + +static char* +udpannounce(Conv *c, char** argv, int argc) +{ + char *e; + Udppriv *upriv; + + upriv = c->p->priv; + e = Fsstdannounce(c, argv, argc); + if(e != nil) + return e; + Fsconnected(c, nil); + iphtadd(&upriv->ht, c); + + return nil; +} + +static void +udpcreate(Conv *c) +{ + c->rq = qopen(128*1024, Qmsg, 0, 0); + c->wq = qbypass(udpkick, c); +} + +static void +udpclose(Conv *c) +{ + Udpcb *ucb; + Udppriv *upriv; + + upriv = c->p->priv; + iphtrem(&upriv->ht, c); + + c->state = 0; + qclose(c->rq); + qclose(c->wq); + qclose(c->eq); + ipmove(c->laddr, IPnoaddr); + ipmove(c->raddr, IPnoaddr); + c->lport = 0; + c->rport = 0; + + ucb = (Udpcb*)c->ptcl; + ucb->headers = 0; +} + +void +udpkick(void *x, Block *bp) +{ + Conv *c = x; + Udp4hdr *uh4; + Udp6hdr *uh6; + ushort rport; + uchar laddr[IPaddrlen], raddr[IPaddrlen]; + Udpcb *ucb; + int dlen, ptcllen; + Udppriv *upriv; + Fs *f; + int version; + Conv *rc; + + upriv = c->p->priv; + f = c->p->f; + +// netlog(c->p->f, Logudp, "udp: kick\n"); /* frequent and uninteresting */ + if(bp == nil) + return; + + ucb = (Udpcb*)c->ptcl; + switch(ucb->headers) { + case 7: + /* get user specified addresses */ + bp = pullupblock(bp, UDP_USEAD7); + if(bp == nil) + return; + ipmove(raddr, bp->rp); + bp->rp += IPaddrlen; + ipmove(laddr, bp->rp); + bp->rp += IPaddrlen; + /* pick interface closest to dest */ + if(ipforme(f, laddr) != Runi) + findlocalip(f, laddr, raddr); + bp->rp += IPaddrlen; /* Ignore ifc address */ + rport = nhgets(bp->rp); + bp->rp += 2+2; /* Ignore local port */ + break; + default: + rport = 0; + break; + } + + if(ucb->headers) { + if(memcmp(laddr, v4prefix, IPv4off) == 0 + || ipcmp(laddr, IPnoaddr) == 0) + version = 4; + else + version = 6; + } else { + if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 && + memcmp(c->laddr, v4prefix, IPv4off) == 0) + || ipcmp(c->raddr, IPnoaddr) == 0) + version = 4; + else + version = 6; + } + + dlen = blocklen(bp); + + /* fill in pseudo header and compute checksum */ + switch(version){ + case V4: + bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ); + if(bp == nil) + return; + + uh4 = (Udp4hdr *)(bp->rp); + ptcllen = dlen + UDP_UDPHDR_SZ; + uh4->Unused = 0; + uh4->udpproto = IP_UDPPROTO; + uh4->frag[0] = 0; + uh4->frag[1] = 0; + hnputs(uh4->udpplen, ptcllen); + if(ucb->headers) { + v6tov4(uh4->udpdst, raddr); + hnputs(uh4->udpdport, rport); + v6tov4(uh4->udpsrc, laddr); + rc = nil; + } else { + v6tov4(uh4->udpdst, c->raddr); + hnputs(uh4->udpdport, c->rport); + if(ipcmp(c->laddr, IPnoaddr) == 0) + findlocalip(f, c->laddr, c->raddr); + v6tov4(uh4->udpsrc, c->laddr); + rc = c; + } + hnputs(uh4->udpsport, c->lport); + hnputs(uh4->udplen, ptcllen); + uh4->udpcksum[0] = 0; + uh4->udpcksum[1] = 0; + hnputs(uh4->udpcksum, + ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ)); + uh4->vihl = IP_VER4; + ipoput4(f, bp, 0, c->ttl, c->tos, rc); + break; + + case V6: + bp = padblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ); + if(bp == nil) + return; + + /* + * using the v6 ip header to create pseudo header + * first then reset it to the normal ip header + */ + uh6 = (Udp6hdr *)(bp->rp); + memset(uh6, 0, 8); + ptcllen = dlen + UDP_UDPHDR_SZ; + hnputl(uh6->viclfl, ptcllen); + uh6->hoplimit = IP_UDPPROTO; + if(ucb->headers) { + ipmove(uh6->udpdst, raddr); + hnputs(uh6->udpdport, rport); + ipmove(uh6->udpsrc, laddr); + rc = nil; + } else { + ipmove(uh6->udpdst, c->raddr); + hnputs(uh6->udpdport, c->rport); + if(ipcmp(c->laddr, IPnoaddr) == 0) + findlocalip(f, c->laddr, c->raddr); + ipmove(uh6->udpsrc, c->laddr); + rc = c; + } + hnputs(uh6->udpsport, c->lport); + hnputs(uh6->udplen, ptcllen); + uh6->udpcksum[0] = 0; + uh6->udpcksum[1] = 0; + hnputs(uh6->udpcksum, + ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ)); + memset(uh6, 0, 8); + uh6->viclfl[0] = IP_VER6; + hnputs(uh6->len, ptcllen); + uh6->nextheader = IP_UDPPROTO; + ipoput6(f, bp, 0, c->ttl, c->tos, rc); + break; + + default: + panic("udpkick: version %d", version); + } + upriv->ustats.udpOutDatagrams++; +} + +void +udpiput(Proto *udp, Ipifc *ifc, Block *bp) +{ + int len; + Udp4hdr *uh4; + Udp6hdr *uh6; + Conv *c; + Udpcb *ucb; + uchar raddr[IPaddrlen], laddr[IPaddrlen]; + ushort rport, lport; + Udppriv *upriv; + Fs *f; + int version; + int ottl, oviclfl, olen; + uchar *p; + + upriv = udp->priv; + f = udp->f; + upriv->ustats.udpInDatagrams++; + + uh4 = (Udp4hdr*)(bp->rp); + version = ((uh4->vihl&0xF0)==IP_VER6) ? 6 : 4; + + /* Put back pseudo header for checksum + * (remember old values for icmpnoconv()) */ + switch(version) { + case V4: + ottl = uh4->Unused; + uh4->Unused = 0; + len = nhgets(uh4->udplen); + olen = nhgets(uh4->udpplen); + hnputs(uh4->udpplen, len); + + v4tov6(raddr, uh4->udpsrc); + v4tov6(laddr, uh4->udpdst); + lport = nhgets(uh4->udpdport); + rport = nhgets(uh4->udpsport); + + if(nhgets(uh4->udpcksum)) { + if(ptclcsum(bp, UDP4_PHDR_OFF, len+UDP4_PHDR_SZ)) { + upriv->ustats.udpInErrors++; + netlog(f, Logudp, "udp: checksum error %I\n", raddr); + DPRINT("udp: checksum error %I\n", raddr); + freeblist(bp); + return; + } + } + uh4->Unused = ottl; + hnputs(uh4->udpplen, olen); + break; + case V6: + uh6 = (Udp6hdr*)(bp->rp); + len = nhgets(uh6->udplen); + oviclfl = nhgetl(uh6->viclfl); + olen = nhgets(uh6->len); + ottl = uh6->hoplimit; + ipmove(raddr, uh6->udpsrc); + ipmove(laddr, uh6->udpdst); + lport = nhgets(uh6->udpdport); + rport = nhgets(uh6->udpsport); + memset(uh6, 0, 8); + hnputl(uh6->viclfl, len); + uh6->hoplimit = IP_UDPPROTO; + if(ptclcsum(bp, UDP6_PHDR_OFF, len+UDP6_PHDR_SZ)) { + upriv->ustats.udpInErrors++; + netlog(f, Logudp, "udp: checksum error %I\n", raddr); + DPRINT("udp: checksum error %I\n", raddr); + freeblist(bp); + return; + } + hnputl(uh6->viclfl, oviclfl); + hnputs(uh6->len, olen); + uh6->nextheader = IP_UDPPROTO; + uh6->hoplimit = ottl; + break; + default: + panic("udpiput: version %d", version); + return; /* to avoid a warning */ + } + + qlock(udp); + + c = iphtlook(&upriv->ht, raddr, rport, laddr, lport); + if(c == nil){ + /* no conversation found */ + upriv->ustats.udpNoPorts++; + qunlock(udp); + netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport, + laddr, lport); + + switch(version){ + case V4: + icmpnoconv(f, bp); + break; + case V6: + icmphostunr(f, ifc, bp, icmp6_adr_unreach, 0); + break; + default: + panic("udpiput2: version %d", version); + } + + freeblist(bp); + return; + } + ucb = (Udpcb*)c->ptcl; + + if(c->state == Announced){ + if(ucb->headers == 0){ + /* create a new conversation */ + if(ipforme(f, laddr) != Runi) { + switch(version){ + case V4: + v4tov6(laddr, ifc->lifc->local); + break; + case V6: + ipmove(laddr, ifc->lifc->local); + break; + default: + panic("udpiput3: version %d", version); + } + } + c = Fsnewcall(c, raddr, rport, laddr, lport, version); + if(c == nil){ + qunlock(udp); + freeblist(bp); + return; + } + iphtadd(&upriv->ht, c); + ucb = (Udpcb*)c->ptcl; + } + } + + qlock(c); + qunlock(udp); + + /* + * Trim the packet down to data size + */ + len -= UDP_UDPHDR_SZ; + switch(version){ + case V4: + bp = trimblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ, len); + break; + case V6: + bp = trimblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ, len); + break; + default: + bp = nil; + panic("udpiput4: version %d", version); + } + if(bp == nil){ + qunlock(c); + netlog(f, Logudp, "udp: len err %I.%d -> %I.%d\n", raddr, rport, + laddr, lport); + upriv->lenerr++; + return; + } + + netlog(f, Logudpmsg, "udp: %I.%d -> %I.%d l %d\n", raddr, rport, + laddr, lport, len); + + switch(ucb->headers){ + case 7: + /* pass the src address */ + bp = padblock(bp, UDP_USEAD7); + p = bp->rp; + ipmove(p, raddr); p += IPaddrlen; + ipmove(p, laddr); p += IPaddrlen; + ipmove(p, ifc->lifc->local); p += IPaddrlen; + hnputs(p, rport); p += 2; + hnputs(p, lport); + break; + } + + if(bp->next) + bp = concatblock(bp); + + if(qfull(c->rq)){ + qunlock(c); + netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport, + laddr, lport); + freeblist(bp); + return; + } + + qpass(c->rq, bp); + qunlock(c); + +} + +char* +udpctl(Conv *c, char **f, int n) +{ + Udpcb *ucb; + + ucb = (Udpcb*)c->ptcl; + if(n == 1){ + if(strcmp(f[0], "headers") == 0){ + ucb->headers = 7; /* new headers format */ + return nil; + } + } + return "unknown control request"; +} + +void +udpadvise(Proto *udp, Block *bp, char *msg) +{ + Udp4hdr *h4; + Udp6hdr *h6; + uchar source[IPaddrlen], dest[IPaddrlen]; + ushort psource, pdest; + Conv *s, **p; + int version; + + h4 = (Udp4hdr*)(bp->rp); + version = ((h4->vihl&0xF0)==IP_VER6) ? 6 : 4; + + switch(version) { + case V4: + v4tov6(dest, h4->udpdst); + v4tov6(source, h4->udpsrc); + psource = nhgets(h4->udpsport); + pdest = nhgets(h4->udpdport); + break; + case V6: + h6 = (Udp6hdr*)(bp->rp); + ipmove(dest, h6->udpdst); + ipmove(source, h6->udpsrc); + psource = nhgets(h6->udpsport); + pdest = nhgets(h6->udpdport); + break; + default: + panic("udpadvise: version %d", version); + return; /* to avoid a warning */ + } + + /* Look for a connection */ + qlock(udp); + for(p = udp->conv; *p; p++) { + s = *p; + if(s->rport == pdest) + if(s->lport == psource) + if(ipcmp(s->raddr, dest) == 0) + if(ipcmp(s->laddr, source) == 0){ + if(s->ignoreadvice) + break; + qlock(s); + qunlock(udp); + qhangup(s->rq, msg); + qhangup(s->wq, msg); + qunlock(s); + freeblist(bp); + return; + } + } + qunlock(udp); + freeblist(bp); +} + +int +udpstats(Proto *udp, char *buf, int len) +{ + Udppriv *upriv; + + upriv = udp->priv; + return snprint(buf, len, "InDatagrams: %llud\nNoPorts: %llud\nInErrors: %llud\nOutDatagrams: %llud\n", + upriv->ustats.udpInDatagrams, + upriv->ustats.udpNoPorts, + upriv->ustats.udpInErrors, + upriv->ustats.udpOutDatagrams); +} + +void +udpinit(Fs *fs) +{ + Proto *udp; + + udp = smalloc(sizeof(Proto)); + udp->priv = smalloc(sizeof(Udppriv)); + udp->name = "udp"; + udp->connect = udpconnect; + udp->announce = udpannounce; + udp->ctl = udpctl; + udp->state = udpstate; + udp->create = udpcreate; + udp->close = udpclose; + udp->rcv = udpiput; + udp->advise = udpadvise; + udp->stats = udpstats; + udp->ipproto = IP_UDPPROTO; + udp->nc = Nchans; + udp->ptclsize = sizeof(Udpcb); + + Fsproto(fs, udp); +} diff -Nru 0/sys/src/nix/k10/Linux 4/sys/src/nix/k10/Linux --- 0/sys/src/nix/k10/Linux Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/Linux Wed Feb 6 00:00:00 2013 @@ -0,0 +1,2 @@ +Linux support was removed from this kernel. +It may be found in /n/nixdump/2011/1114/sys/src/nix diff -Nru 0/sys/src/nix/k10/acore.c 4/sys/src/nix/k10/acore.c --- 0/sys/src/nix/k10/acore.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/acore.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,341 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include +#include +#include "amd64.h" +#include "ureg.h" +#include "io.h" +#include "../port/pmc.h" + +/* + * NIX code run at the AC. + * This is the "AC kernel". + */ + +/* + * FPU: + * + * The TC handles the FPU by keeping track of the state for the + * current process. If it has been used and must be saved, it is saved, etc. + * When a process gets to the AC, we handle the FPU directly, and save its + * state before going back to the TC (or the TC state would be stale). + * + * Because of this, each time the process comes back to the AC and + * uses the FPU it will get a device not available trap and + * the state will be restored. This could be optimized because the AC + * is single-process, and we do not have to disable the FPU while + * saving, so it does not have to be restored. + */ + +extern char* acfpunm(Ureg* ureg, void*); +extern char* acfpumf(Ureg* ureg, void*); +extern char* acfpuxf(Ureg* ureg, void*); +extern void acfpusysprocsetup(Proc*); + +extern void _acsysret(void); +extern void _actrapret(void); + +ACVctl *acvctl[256]; + +/* + * Test inter core calls by calling a cores to print something, and then + * waiting for it to complete. + */ +static void +testiccfn(void) +{ + print("called: %s\n", (char*)m->icc->data); +} + +void +testicc(int i) +{ + Mach *mp; + + if((mp = sys->machptr[i]) != nil && mp->nixrole != NIXAC){ + print("testicc: core %d is not an AC\n", i); + return; + } + print("calling core %d... ", i); + mp->icc->flushtlb = 0; + snprint((char*)mp->icc->data, ICCLNSZ, "<%d>", i); + coherence(); + mp->icc->fn = testiccfn; + waitwhile(&mp->icc->fn, 0); +} + +/* + * Check if the AC kernel (mach) stack has more than 4*KiB free. + * Do not call panic, the stack is gigantic. + */ +static void +acstackok(void) +{ + char dummy; + char *sstart; + + sstart = (char *)m - PGSZ - 4*PTSZ - MACHSTKSZ; + if(&dummy < sstart + 4*KiB){ + print("ac kernel stack overflow, cpu%d stopped\n", m->machno); + DONE(); + } +} + +/* + * Main scheduling loop done by the application core. + * Some of functions run will not return. + * The system call handler will reset the stack and + * call acsched again. + * We loop because some functions may return and we should + * wait for another call. + */ +void +acsched(void) +{ + acmmuswitch(); + for(;;){ + acstackok(); + waitwhile(&m->icc->fn, 0); + if(m->icc->flushtlb) + acmmuswitch(); + DBG("acsched: cpu%d: fn %#p\n", m->machno, m->icc->fn); + m->icc->fn(); + DBG("acsched: cpu%d: idle\n", m->machno); + coherence(); + m->icc->fn = nil; + } +} + +void +acmmuswitch(void) +{ + extern Page mach0pml4; + + DBG("acmmuswitch mpl4 %#p mach0pml4 %#p m0pml4 %#p\n", m->pml4->pa, mach0pml4.pa, sys->machptr[0]->pml4->pa); + + + cr3put(m->pml4->pa); +} + +/* + * Beware: up is not set when this function is called. + */ +void +actouser(void) +{ + void xactouser(u64int); + Ureg *u; + + acfpusysprocsetup(m->proc); + + u = m->proc->dbgreg; + DBG("cpu%d: touser usp = %#p entry %#p\n", m->machno, u->sp, u->ip); + xactouser(u->sp); + panic("actouser"); +} + +void +actrapret(void) +{ + /* done by actrap() */ +} + +/* + * Entered in AP core context, upon traps (system calls go through acsyscall) + * using up->dbgreg means cores MUST be homogeneous. + * + * BUG: We should setup some trapenable() mechanism for the AC, + * so that code like fpu.c could arrange for handlers specific for + * the AC, instead of doint that by hand here. + * + * All interrupts are masked while in the "kernel" + */ +void +actrap(Ureg *u) +{ + char *n; + ACVctl *v; + + n = nil; + + _pmcupdate(m); + if(m->proc != nil){ + m->proc->nactrap++; + m->proc->actime1 = fastticks(nil); + } + if(u->type < nelem(acvctl)){ + v = acvctl[u->type]; + if(v != nil){ + DBG("actrap: cpu%d: %ulld\n", m->machno, u->type); + n = v->f(u, v->a); + if(n != nil) + goto Post; + return; + } + } + switch(u->type){ + case IdtDF: + print("AC: double fault\n"); + dumpregs(u); + ndnr(); + case IdtIPI: + m->intr++; + DBG("actrap: cpu%d: IPI\n", m->machno); + apiceoi(IdtIPI); + break; + case IdtTIMER: + apiceoi(IdtTIMER); + panic("timer interrupt in an AC"); + break; + case IdtPF: + /* this case is here for debug only */ + m->pfault++; + DBG("actrap: cpu%d: PF cr2 %#ullx\n", m->machno, cr2get()); + break; + default: + print("actrap: cpu%d: %ulld\n", m->machno, u->type); + } +Post: + m->icc->rc = ICCTRAP; + m->cr2 = cr2get(); + memmove(m->proc->dbgreg, u, sizeof *u); + m->icc->note = n; + fpuprocsave(m->proc); + _pmcupdate(m); + coherence(); + m->icc->fn = nil; + ready(m->proc); + + waitwhile(&m->icc->fn, 0); + + if(m->icc->flushtlb) + acmmuswitch(); + if(m->icc->fn != actrapret) + acsched(); + DBG("actrap: ret\n"); + memmove(u, m->proc->dbgreg, sizeof *u); + if(m->proc) + m->proc->actime += fastticks2us(fastticks(nil) - m->proc->actime1); +} + +void +acsyscall(void) +{ + Proc *p; + + /* + * If we saved the Ureg into m->proc->dbgregs, + * There's nothing else we have to do. + * Otherwise, we should m->proc->dbgregs = u; + */ + DBG("acsyscall: cpu%d\n", m->machno); + + _pmcupdate(m); + p = m->proc; + p->actime1 = fastticks(nil); + m->syscall++; /* would also count it in the TS core */ + m->icc->rc = ICCSYSCALL; + m->cr2 = cr2get(); + fpuprocsave(p); + _pmcupdate(m); + coherence(); + m->icc->fn = nil; + ready(p); + /* + * The next call is probably going to make us jmp + * into user code, forgetting all our state in this + * stack, upon the next syscall. + * We don't nest calls in the current stack for too long. + */ + acsched(); +} + +/* + * Called in AP core context, to return from system call. + */ +void +acsysret(void) +{ + DBG("acsysret\n"); + if(m->proc != nil) + m->proc->actime += fastticks2us(fastticks(nil) - m->proc->actime1); + _acsysret(); +} + +void +dumpreg(void *u) +{ + print("reg is %p\n", u); + ndnr(); +} + +char *rolename[] = +{ + [NIXTC] "TC", + [NIXKC] "KC", + [NIXAC] "AC", + [NIXXC] "XC", + [NIXOC] "OC", + [NIXUC] "UC", + [NIXSC] "SC", + [NIXQC] "QC", +}; + +void +acmodeset(int mode) +{ + switch(mode){ + case NIXAC: + case NIXKC: + case NIXTC: + case NIXOC: + case NIXXC: + case NIXUC: + break; + default: + panic("acmodeset: bad mode %d", mode); + } + m->nixrole = mode; +} + +void +acinit(void) +{ + Mach *mp; + Proc *pp; + + /* + * Lower the priority of the apic to 0, + * to accept interrupts. + * Raise it later if needed to disable them. + */ + apicpri(0); + + /* + * Be sure a few assembler assumptions still hold. + * Someone moved m->stack and I had fun debugging... + */ + mp = 0; + pp = 0; + assert((uintptr)&mp->proc == 16); + assert((uintptr)&pp->dbgreg == 24); + assert((uintptr)&mp->stack == 24); +} + +void +acquiesce(void) +{ + coherence(); + + m->proc = nil; + m->icc->fn = nil; + m->nixrole = NIXQC; + coherence(); + for(;;) + halt(); + +} diff -Nru 0/sys/src/nix/k10/acpi.h 4/sys/src/nix/k10/acpi.h --- 0/sys/src/nix/k10/acpi.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/acpi.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,410 @@ +typedef struct Atable Atable; +typedef struct Facs Facs; +typedef struct Fadt Fadt; +typedef struct Gas Gas; +typedef struct Gpe Gpe; +typedef struct Rsdp Rsdp; +typedef struct Sdthdr Sdthdr; +typedef struct Parse Parse; +typedef struct Xsdt Xsdt; +typedef struct Regio Regio; +typedef struct Reg Reg; +typedef struct Madt Madt; +typedef struct Msct Msct; +typedef struct Mdom Mdom; +typedef struct Apicst Apicst; +typedef struct Srat Srat; +typedef struct Slit Slit; +typedef struct SlEntry SlEntry; + +enum +{ + + Sdthdrsz = 36, /* size of SDT header */ + + /* ACPI regions. Gas ids */ + Rsysmem = 0, + Rsysio, + Rpcicfg, + Rembed, + Rsmbus, + Rcmos, + Rpcibar, + Ripmi, + Rfixedhw = 0x7f, + + /* ACPI PM1 control */ + Pm1SciEn = 0x1, /* Generate SCI and not SMI */ + + /* ACPI tbdf as encoded in acpi region base addresses */ + Rpciregshift = 0, + Rpciregmask = 0xFFFF, + Rpcifunshift = 16, + Rpcifunmask = 0xFFFF, + Rpcidevshift = 32, + Rpcidevmask = 0xFFFF, + Rpcibusshift = 48, + Rpcibusmask = 0xFFFF, + + /* Apic structure types */ + ASlapic = 0, /* processor local apic */ + ASioapic, /* I/O apic */ + ASintovr, /* Interrupt source override */ + ASnmi, /* NMI source */ + ASlnmi, /* local apic nmi */ + ASladdr, /* local apic address override */ + ASiosapic, /* I/O sapic */ + ASlsapic, /* local sapic */ + ASintsrc, /* platform interrupt sources */ + ASlx2apic, /* local x2 apic */ + ASlx2nmi, /* local x2 apic NMI */ + + /* Apic flags */ + AFbus = 0, /* polarity/trigger like in ISA */ + AFhigh = 1, /* active high */ + AFlow = 3, /* active low */ + AFpmask = 3, /* polarity bits */ + AFedge = 1<<2, /* edge triggered */ + AFlevel = 3<<2, /* level triggered */ + AFtmask = 3<<2, /* trigger bits */ + + /* SRAT types */ + SRlapic = 0, /* Local apic/sapic affinity */ + SRmem, /* Memory affinity */ + SRlx2apic, /* x2 apic affinity */ + + /* Arg for _PIC */ + Ppic = 0, /* PIC interrupt model */ + Papic, /* APIC interrupt model */ + Psapic, /* SAPIC interrupt model */ + + + CMregion = 0, /* regio name spc base len accsz*/ + CMgpe, /* gpe name id */ + + Qdir = 0, + Qctl, + Qtbl, + Qio, +}; + +/* + * ACPI table (sw) + */ +struct Atable +{ + Atable* next; /* next table in list */ + int is64; /* uses 64bits */ + char sig[5]; /* signature */ + char oemid[7]; /* oem id str. */ + char oemtblid[9]; /* oem tbl. id str. */ + uchar* tbl; /* pointer to table in memory */ + long dlen; /* size of data in table, after Stdhdr */ +}; + +struct Gpe +{ + int stsio; /* port used for status */ + int stsbit; /* bit number */ + int enio; /* port used for enable */ + int enbit; /* bit number */ + int nb; /* event number */ + char* obj; /* handler object */ + int id; /* id as supplied by user */ +}; + +struct Parse +{ + char* sig; + Atable* (*f)(uchar*, int); /* return nil to keep vmap */ +}; + +struct Regio{ + void *arg; + u8int (*get8)(uintptr, void*); + void (*set8)(uintptr, u8int, void*); + u16int (*get16)(uintptr, void*); + void (*set16)(uintptr, u16int, void*); + u32int (*get32)(uintptr, void*); + void (*set32)(uintptr, u32int, void*); + u64int (*get64)(uintptr, void*); + void (*set64)(uintptr, u64int, void*); +}; + +struct Reg +{ + char* name; + int spc; /* io space */ + u64int base; /* address, physical */ + uchar* p; /* address, kmapped */ + u64int len; + int tbdf; + int accsz; /* access size */ +}; + +/* Generic address structure. + */ +#pragma pack on +struct Gas +{ + u8int spc; /* address space id */ + u8int len; /* register size in bits */ + u8int off; /* bit offset */ + u8int accsz; /* 1: byte; 2: word; 3: dword; 4: qword */ + u64int addr; /* address (or acpi encoded tbdf + reg) */ +}; + +/* Root system description table pointer. + * Used to locate the root system description table RSDT + * (or the extended system description table from version 2) XSDT. + * The XDST contains (after the DST header) a list of pointers to tables: + * - FADT fixed acpi description table. + * It points to the DSDT, AML code making the acpi namespace. + * - SSDTs tables with AML code to add to the acpi namespace. + * - pointers to other tables for apics, etc. + */ + +struct Rsdp +{ + u8int signature[8]; /* "RSD PTR " */ + u8int rchecksum; + u8int oemid[6]; + u8int revision; + u8int raddr[4]; /* RSDT */ + u8int length[4]; + u8int xaddr[8]; /* XSDT */ + u8int xchecksum; /* XSDT */ + u8int _33_[3]; /* reserved */ +}; + +/* Header for ACPI description tables + */ +struct Sdthdr +{ + u8int sig[4]; /* "FACP" or whatever */ + u8int length[4]; + u8int rev; + u8int csum; + u8int oemid[6]; + u8int oemtblid[8]; + u8int oemrev[4]; + u8int creatorid[4]; + u8int creatorrev[4]; +}; + +/* Firmware control structure + */ +struct Facs +{ + u32int hwsig; + u32int wakingv; + u32int glock; + u32int flags; + u64int xwakingv; + u8int vers; + u32int ospmflags; +}; + +#pragma pack off + +/* Maximum System Characteristics table + */ +struct Msct +{ + int ndoms; /* number of domains */ + int nclkdoms; /* number of clock domains */ + u64int maxpa; /* max physical address */ + + Mdom* dom; /* domain information list */ +}; + +struct Mdom +{ + Mdom* next; + int start; /* start dom id */ + int end; /* end dom id */ + int maxproc; /* max processor capacity */ + u64int maxmem; /* max memory capacity */ +}; + +/* Multiple APIC description table + * Interrupts are virtualized by ACPI and each APIC has + * a `virtual interrupt base' where its interrupts start. + * Addresses are processor-relative physical addresses. + * Only enabled devices are linked, others are filtered out. + */ +struct Madt +{ + uintmem lapicpa; /* local APIC addr */ + int pcat; /* the machine has PC/AT 8259s */ + Apicst* st; /* list of Apic related structures */ +}; + +struct Apicst +{ + int type; + Apicst* next; + union{ + struct{ + int pid; /* processor id */ + int id; /* apic no */ + } lapic; + struct{ + int id; /* io apic id */ + int ibase; /* interrupt base addr. */ + uintmem addr; /* base address */ + } ioapic, iosapic; + struct{ + int irq; /* bus intr. source (ISA only) */ + int intr; /* system interrupt */ + int flags; /* apic flags */ + } intovr; + struct{ + int intr; /* system interrupt */ + int flags; /* apic flags */ + } nmi; + struct{ + int pid; /* processor id */ + int flags; /* lapic flags */ + int lint; /* lapic LINTn for nmi */ + } lnmi; + struct{ + int pid; /* processor id */ + int id; /* apic id */ + int eid; /* apic eid */ + int puid; /* processor uid */ + char* puids; /* same thing */ + } lsapic; + struct{ + int pid; /* processor id */ + int peid; /* processor eid */ + int iosv; /* io sapic vector */ + int intr; /* global sys intr. */ + int type; /* intr type */ + int flags; /* apic flags */ + int any; /* err sts at any proc */ + } intsrc; + struct{ + int id; /* x2 apic id */ + int puid; /* processor uid */ + } lx2apic; + struct{ + int puid; + int flags; + int intr; + } lx2nmi; + }; +}; + +/* System resource affinity table + */ +struct Srat +{ + int type; + Srat* next; + union{ + struct{ + int dom; /* proximity domain */ + int apic; /* apic id */ + int sapic; /* sapic id */ + int clkdom; /* clock domain */ + } lapic; + struct{ + int dom; /* proximity domain */ + u64int addr; /* base address */ + u64int len; + int hplug; /* hot pluggable */ + int nvram; /* non volatile */ + } mem; + struct{ + int dom; /* proximity domain */ + int apic; /* x2 apic id */ + int clkdom; /* clock domain */ + } lx2apic; + }; +}; + +/* System locality information table + */ +struct Slit { + uvlong rowlen; + SlEntry **e; +}; + +struct SlEntry { + int dom; /* proximity domain */ + uint dist; /* distance to proximity domain */ +}; + +/* Fixed ACPI description table. + * Describes implementation and hardware registers. + * PM* blocks are low level functions. + * GPE* blocks refer to general purpose events. + * P_* blocks are for processor features. + * Has address for the DSDT. + */ +struct Fadt +{ + u32int facs; + u32int dsdt; + /* 1 reserved */ + u8int pmprofile; + u16int sciint; + u32int smicmd; + u8int acpienable; + u8int acpidisable; + u8int s4biosreq; + u8int pstatecnt; + u32int pm1aevtblk; + u32int pm1bevtblk; + u32int pm1acntblk; + u32int pm1bcntblk; + u32int pm2cntblk; + u32int pmtmrblk; + u32int gpe0blk; + u32int gpe1blk; + u8int pm1evtlen; + u8int pm1cntlen; + u8int pm2cntlen; + u8int pmtmrlen; + u8int gpe0blklen; + u8int gpe1blklen; + u8int gp1base; + u8int cstcnt; + u16int plvl2lat; + u16int plvl3lat; + u16int flushsz; + u16int flushstride; + u8int dutyoff; + u8int dutywidth; + u8int dayalrm; + u8int monalrm; + u8int century; + u16int iapcbootarch; + /* 1 reserved */ + u32int flags; + Gas resetreg; + u8int resetval; + /* 3 reserved */ + u64int xfacs; + u64int xdsdt; + Gas xpm1aevtblk; + Gas xpm1bevtblk; + Gas xpm1acntblk; + Gas xpm1bcntblk; + Gas xpm2cntblk; + Gas xpmtmrblk; + Gas xgpe0blk; + Gas xgpe1blk; +}; + +/* XSDT/RSDT. 4/8 byte addresses starting at p. + */ +struct Xsdt +{ + int len; + int asize; + u8int* p; +}; + +extern uintmem acpimblocksize(uintmem, int*); diff -Nru 0/sys/src/nix/k10/ahci.h 4/sys/src/nix/k10/ahci.h --- 0/sys/src/nix/k10/ahci.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/ahci.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,328 @@ +/* + * advanced host controller interface (sata) + * © 2007-9 coraid, inc + */ + +/* pci configuration */ +enum { + Abar = 5, +}; + +/* + * ahci memory configuration + * + * 0000-0023 generic host control + * 0024-009f reserved + * 00a0-00ff vendor specific. + * 0100-017f port 0 + * ... + * 1080-1100 port 31 + */ + +/* cap bits: supported features */ +enum { + H64a = 1<<31, /* 64-bit addressing */ + Hncq = 1<<30, /* ncq */ + Hsntf = 1<<29, /* snotification reg. */ + Hmps = 1<<28, /* mech pres switch */ + Hss = 1<<27, /* staggered spinup */ + Halp = 1<<26, /* aggressive link pm */ + Hal = 1<<25, /* activity led */ + Hclo = 1<<24, /* command-list override */ + Hiss = 1<<20, /* for interface speed */ + Ham = 1<<18, /* ahci-mode only */ + Hpm = 1<<17, /* port multiplier */ + Hfbs = 1<<16, /* fis-based switching */ + Hpmb = 1<<15, /* multiple-block pio */ + Hssc = 1<<14, /* slumber state */ + Hpsc = 1<<13, /* partial-slumber state */ + Hncs = 1<<8, /* n command slots */ + Hcccs = 1<<7, /* coal */ + Hems = 1<<6, /* enclosure mgmt. */ + Hxs = 1<<5, /* external sata */ + Hnp = 1<<0, /* n ports */ +}; + +/* ghc bits */ +enum { + Hae = 1<<31, /* enable ahci */ + Hie = 1<<1, /* " interrupts */ + Hhr = 1<<0, /* hba reset */ +}; + +/* cap2 bits */ +enum { + Apts = 1<<2, /* automatic partial to slumber */ + Nvmp = 1<<1, /* nvmhci present; nvram */ + Boh = 1<<0, /* bios/os handoff supported */ +}; + +/* emctl bits */ +enum { + Pm = 1<<27, /* port multiplier support */ + Alhd = 1<<26, /* activity led hardware driven */ + Xonly = 1<<25, /* rx messages not supported */ + Smb = 1<<24, /* single msg buffer; rx limited */ + Esgpio = 1<<19, /* sgpio messages supported */ + Eses2 = 1<<18, /* ses-2 supported */ + Esafte = 1<<17, /* saf-te supported */ + Elmt = 1<<16, /* led msg types support */ + Emrst = 1<<9, /* reset all em logic */ + Tmsg = 1<<8, /* transmit message */ + Mr = 1<<0, /* message rx'd */ + Emtype = Esgpio | Eses2 | Esafte | Elmt, +}; + +typedef struct { + u32int cap; + u32int ghc; + u32int isr; + u32int pi; /* ports implemented */ + u32int ver; + u32int ccc; /* coaleasing control */ + u32int cccports; + u32int emloc; + u32int emctl; + u32int cap2; + u32int bios; +} Ahba; + +enum { + Acpds = 1<<31, /* cold port detect status */ + Atfes = 1<<30, /* task file error status */ + Ahbfs = 1<<29, /* hba fatal */ + Ahbds = 1<<28, /* hba error (parity error) */ + Aifs = 1<<27, /* interface fatal §6.1.2 */ + Ainfs = 1<<26, /* interface error (recovered) */ + Aofs = 1<<24, /* too many bytes from disk */ + Aipms = 1<<23, /* incorrect prt mul status */ + Aprcs = 1<<22, /* PhyRdy change status Pxserr.diag.n */ + Adpms = 1<<7, /* mechanical presence status */ + Apcs = 1<<6, /* port connect diag.x */ + Adps = 1<<5, /* descriptor processed */ + Aufs = 1<<4, /* unknown fis diag.f */ + Asdbs = 1<<3, /* set device bits fis received w/ i bit set */ + Adss = 1<<2, /* dma setup */ + Apio = 1<<1, /* pio setup fis */ + Adhrs = 1<<0, /* device to host register fis */ + + IEM = Acpds|Atfes|Ahbds|Ahbfs|Ahbds|Aifs|Ainfs|Aprcs|Apcs|Adps| + Aufs|Asdbs|Adss|Adhrs, + Ifatal = Atfes|Ahbfs|Ahbds|Aifs, +}; + +/* serror bits */ +enum { + SerrX = 1<<26, /* exchanged */ + SerrF = 1<<25, /* unknown fis */ + SerrT = 1<<24, /* transition error */ + SerrS = 1<<23, /* link sequence */ + SerrH = 1<<22, /* handshake */ + SerrC = 1<<21, /* crc */ + SerrD = 1<<20, /* not used by ahci */ + SerrB = 1<<19, /* 10-tp-8 decode */ + SerrW = 1<<18, /* comm wake */ + SerrI = 1<<17, /* phy internal */ + SerrN = 1<<16, /* phyrdy change */ + + ErrE = 1<<11, /* internal */ + ErrP = 1<<10, /* ata protocol violation */ + ErrC = 1<<9, /* communication */ + ErrT = 1<<8, /* transient */ + ErrM = 1<<1, /* recoverd comm */ + ErrI = 1<<0, /* recovered data integrety */ + + ErrAll = ErrE|ErrP|ErrC|ErrT|ErrM|ErrI, + SerrAll = SerrX|SerrF|SerrT|SerrS|SerrH|SerrC|SerrD|SerrB|SerrW| + SerrI|SerrN|ErrAll, + SerrBad = 0x7f<<19, +}; + +/* cmd register bits */ +enum { + Aicc = 1<<28, /* interface communcations control. 4 bits */ + Aasp = 1<<27, /* aggressive slumber & partial sleep */ + Aalpe = 1<<26, /* aggressive link pm enable */ + Adlae = 1<<25, /* drive led on atapi */ + Aatapi = 1<<24, /* device is atapi */ + Apste = 1<<23, /* automatic slumber to partial cap */ + Afbsc = 1<<22, /* fis-based switching capable */ + Aesp = 1<<21, /* external sata port */ + Acpd = 1<<20, /* cold presence detect */ + Ampsp = 1<<19, /* mechanical pres. */ + Ahpcp = 1<<18, /* hot plug capable */ + Apma = 1<<17, /* pm attached */ + Acps = 1<<16, /* cold presence state */ + Acr = 1<<15, /* cmdlist running */ + Afr = 1<<14, /* fis running */ + Ampss = 1<<13, /* mechanical presence switch state */ + Accs = 1<<8, /* current command slot 12:08 */ + Afre = 1<<4, /* fis enable receive */ + Aclo = 1<<3, /* command list override */ + Apod = 1<<2, /* power on dev (requires cold-pres. detect) */ + Asud = 1<<1, /* spin-up device; requires ss capability */ + Ast = 1<<0, /* start */ + + Arun = Ast|Acr|Afre|Afr, + Apwr = Apod|Asud, +}; + +/* ctl register bits */ +enum { + Aipm = 1<<8, /* interface power mgmt. 3=off */ + Aspd = 1<<4, + Adet = 1<<0, /* device detection */ +}; + +/* sstatus register bits */ +enum{ + /* sstatus det */ + Smissing = 0<<0, + Spresent = 1<<0, + Sphylink = 3<<0, + Sbist = 4<<0, + Smask = 7<<0, + + /* sstatus speed */ + Gmissing = 0<<4, + Gi = 1<<4, + Gii = 2<<4, + Giii = 3<<4, + Gmask = 7<<4, + + /* sstatus ipm */ + Imissing = 0<<8, + Iactive = 1<<8, + Isleepy = 2<<8, + Islumber = 6<<8, + Imask = 7<<8, + + SImask = Smask | Imask, + SSmask = Smask | Isleepy, +}; + +#define sstatus scr0 +#define sctl scr2 +#define serror scr1 +#define sactive scr3 +#define ntf scr4 + +typedef struct { + u32int list; /* PxCLB must be 1kb aligned */ + u32int listhi; + u32int fis; /* 256-byte aligned */ + u32int fishi; + u32int isr; + u32int ie; /* interrupt enable */ + u32int cmd; + u32int res1; + u32int task; + u32int sig; + u32int scr0; + u32int scr2; + u32int scr1; + u32int scr3; + u32int ci; /* command issue */ + u32int scr4; + u32int fbs; + u32int res2[11]; + u32int vendor[4]; +} Aport; + +/* in host's memory; not memory mapped */ +typedef struct { + uchar *base; + uchar *d; + uchar *p; + uchar *r; + uchar *u; + u32int *devicebits; +} Afis; + +enum { + Lprdtl = 1<<16, /* physical region descriptor table len */ + Lpmp = 1<<12, /* port multiplier port */ + Lclear = 1<<10, /* clear busy on R_OK */ + Lbist = 1<<9, + Lreset = 1<<8, + Lpref = 1<<7, /* prefetchable */ + Lwrite = 1<<6, + Latapi = 1<<5, + Lcfl = 1<<0, /* command fis length in double words */ +}; + +/* in hosts memory; memory mapped */ +typedef struct { + u32int flags; + u32int len; + u32int ctab; + u32int ctabhi; + uchar reserved[16]; +} Alist; + +typedef struct { + u32int dba; + u32int dbahi; + u32int pad; + u32int count; +} Aprdt; + +typedef struct { + uchar cfis[0x40]; + uchar atapi[0x10]; + uchar pad[0x30]; + Aprdt prdt; +} Actab; + +/* enclosure message header */ +enum { + Mled = 0, + Msafte = 1, + Mses2 = 2, + Msgpio = 3, +}; + +typedef struct { + uchar dummy; + uchar msize; + uchar dsize; + uchar type; + uchar hba; /* bits 0:4 are the port */ + uchar pm; + uchar led[2]; +} Aledmsg; + +enum { + Aled = 1<<0, + Locled = 1<<3, + Errled = 1<<6, + + Ledoff = 0, + Ledon = 1, +}; + +typedef struct { + uint encsz; + u32int *enctx; + u32int *encrx; +} Aenc; + +enum { + Ferror = 1, + Fdone = 2, +}; + +typedef struct { + QLock; + Rendez; + uchar flag; + Sfisx; + Afis fis; + Alist *list; + Actab *ctab; +} Aportm; + +typedef struct { + Aport *p; + Aportm *m; +} Aportc; diff -Nru 0/sys/src/nix/k10/amd64.h 4/sys/src/nix/k10/amd64.h --- 0/sys/src/nix/k10/amd64.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/amd64.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,196 @@ +enum { /* Cr0 */ + Pe = 0x00000001, /* Protected Mode Enable */ + Mp = 0x00000002, /* Monitor Coprocessor */ + Em = 0x00000004, /* Emulate Coprocessor */ + Ts = 0x00000008, /* Task Switched */ + Et = 0x00000010, /* Extension Type */ + Ne = 0x00000020, /* Numeric Error */ + Wp = 0x00010000, /* Write Protect */ + Am = 0x00040000, /* Alignment Mask */ + Nw = 0x20000000, /* Not Writethrough */ + Cd = 0x40000000, /* Cache Disable */ + Pg = 0x80000000, /* Paging Enable */ +}; + +enum { /* Cr3 */ + Pwt = 0x00000008, /* Page-Level Writethrough */ + Pcd = 0x00000010, /* Page-Level Cache Disable */ +}; + +enum { /* Cr4 */ + Vme = 0x00000001, /* Virtual-8086 Mode Extensions */ + Pvi = 0x00000002, /* Protected Mode Virtual Interrupts */ + Tsd = 0x00000004, /* Time-Stamp Disable */ + De = 0x00000008, /* Debugging Extensions */ + Pse = 0x00000010, /* Page-Size Extensions */ + Pae = 0x00000020, /* Physical Address Extension */ + Mce = 0x00000040, /* Machine Check Enable */ + Pge = 0x00000080, /* Page-Global Enable */ + Pce = 0x00000100, /* Performance Monitoring Counter Enable */ + Osfxsr = 0x00000200, /* FXSAVE/FXRSTOR Support */ + Osxmmexcpt = 0x00000400, /* Unmasked Exception Support */ +}; + +enum { /* Rflags */ + Cf = 0x00000001, /* Carry Flag */ + Pf = 0x00000004, /* Parity Flag */ + Af = 0x00000010, /* Auxiliary Flag */ + Zf = 0x00000040, /* Zero Flag */ + Sf = 0x00000080, /* Sign Flag */ + Tf = 0x00000100, /* Trap Flag */ + If = 0x00000200, /* Interrupt Flag */ + Df = 0x00000400, /* Direction Flag */ + Of = 0x00000800, /* Overflow Flag */ + Iopl0 = 0x00000000, /* I/O Privilege Level */ + Iopl1 = 0x00001000, + Iopl2 = 0x00002000, + Iopl3 = 0x00003000, + Nt = 0x00004000, /* Nested Task */ + Rf = 0x00010000, /* Resume Flag */ + Vm = 0x00020000, /* Virtual-8086 Mode */ + Ac = 0x00040000, /* Alignment Check */ + Vif = 0x00080000, /* Virtual Interrupt Flag */ + Vip = 0x00100000, /* Virtual Interrupt Pending */ + Id = 0x00200000, /* ID Flag */ +}; + +enum { /* MSRs */ + PerfEvtbase = 0xc0010000, /* Performance Event Select */ + PerfCtrbase = 0xc0010004, /* Performance Counters */ + + Efer = 0xc0000080, /* Extended Feature Enable */ + Star = 0xc0000081, /* Legacy Target IP and [CS]S */ + Lstar = 0xc0000082, /* Long Mode Target IP */ + Cstar = 0xc0000083, /* Compatibility Target IP */ + Sfmask = 0xc0000084, /* SYSCALL Flags Mask */ + FSbase = 0xc0000100, /* 64-bit FS Base Address */ + GSbase = 0xc0000101, /* 64-bit GS Base Address */ + KernelGSbase = 0xc0000102, /* SWAPGS instruction */ +}; + +enum { /* Efer */ + Sce = 0x00000001, /* System Call Extension */ + Lme = 0x00000100, /* Long Mode Enable */ + Lma = 0x00000400, /* Long Mode Active */ + Nxe = 0x00000800, /* No-Execute Enable */ + Svme = 0x00001000, /* SVM Extension Enable */ + Ffxsr = 0x00004000, /* Fast FXSAVE/FXRSTOR */ +}; + +enum { /* PML4E/PDPE/PDE/PTE */ + PteP = 0x0000000000000001ull,/* Present */ + PteRW = 0x0000000000000002ull,/* Read/Write */ + PteU = 0x0000000000000004ull,/* User/Supervisor */ + PtePWT = 0x0000000000000008ull,/* Page-Level Write Through */ + PtePCD = 0x0000000000000010ull,/* Page Level Cache Disable */ + PteA = 0x0000000000000020ull,/* Accessed */ + PteD = 0x0000000000000040ull,/* Dirty */ + PtePS = 0x0000000000000080ull,/* Page Size */ + Pte4KPAT = PtePS, /* PTE PAT */ + PteG = 0x0000000000000100ull,/* Global */ + Pte2MPAT = 0x0000000000001000ull,/* PDE PAT */ + Pte1GPAT = Pte2MPAT, /* PDPE PAT */ + PteNX = 0x8000000000000000ull,/* No Execute */ +}; + +enum { /* Exceptions */ + IdtDE = 0, /* Divide-by-Zero Error */ + IdtDB = 1, /* Debug */ + IdtNMI = 2, /* Non-Maskable-Interrupt */ + IdtBP = 3, /* Breakpoint */ + IdtOF = 4, /* Overflow */ + IdtBR = 5, /* Bound-Range */ + IdtUD = 6, /* Invalid-Opcode */ + IdtNM = 7, /* Device-Not-Available */ + IdtDF = 8, /* Double-Fault */ + Idt09 = 9, /* unsupported */ + IdtTS = 10, /* Invalid-TSS */ + IdtNP = 11, /* Segment-Not-Present */ + IdtSS = 12, /* Stack */ + IdtGP = 13, /* General-Protection */ + IdtPF = 14, /* Page-Fault */ + Idt0F = 15, /* reserved */ + IdtMF = 16, /* x87 FPE-Pending */ + IdtAC = 17, /* Alignment-Check */ + IdtMC = 18, /* Machine-Check */ + IdtXF = 19, /* SIMD Floating-Point */ +}; + +/* + * Vestigial Segmented Virtual Memory. + */ +enum { /* Segment Descriptor */ + SdISTM = 0x0000000700000000ull,/* Interrupt Stack Table Mask */ + SdA = 0x0000010000000000ull,/* Accessed */ + SdR = 0x0000020000000000ull,/* Readable (Code) */ + SdW = 0x0000020000000000ull,/* Writeable (Data) */ + SdE = 0x0000040000000000ull,/* Expand Down */ + SdaTSS = 0x0000090000000000ull,/* Available TSS */ + SdbTSS = 0x00000b0000000000ull,/* Busy TSS */ + SdCG = 0x00000c0000000000ull,/* Call Gate */ + SdIG = 0x00000e0000000000ull,/* Interrupt Gate */ + SdTG = 0x00000f0000000000ull,/* Trap Gate */ + SdCODE = 0x0000080000000000ull,/* Code/Data */ + SdS = 0x0000100000000000ull,/* System/User */ + SdDPL0 = 0x0000000000000000ull,/* Descriptor Privilege Level */ + SdDPL1 = 0x0000200000000000ull, + SdDPL2 = 0x0000400000000000ull, + SdDPL3 = 0x0000600000000000ull, + SdP = 0x0000800000000000ull,/* Present */ + Sd4G = 0x000f00000000ffffull,/* 4G Limit */ + SdL = 0x0020000000000000ull,/* Long Attribute */ + SdD = 0x0040000000000000ull,/* Default Operand Size */ + SdG = 0x0080000000000000ull,/* Granularity */ +}; + +/* + * Performance Counter Configuration + */ +enum { /* Performance Event Selector */ + + PeHo = 0x0000020000000000ull,/* Host only */ + PeGo = 0x0000010000000000ull,/* Guest only */ + PeEvMskH = 0x0000000f00000000ull,/* Event mask H */ + PeCtMsk = 0x00000000ff000000ull,/* Counter mask */ + PeInMsk = 0x0000000000800000ull,/* Invert mask */ + PeCtEna = 0x0000000000400000ull,/* Counter enable */ + PeInEna = 0x0000000000100000ull,/* Interrupt enable */ + PePnCtl = 0x0000000000080000ull,/* Pin control */ + PeEdg = 0x0000000000040000ull,/* Edge detect */ + PeOS = 0x0000000000020000ull,/* OS mode */ + PeUsr = 0x0000000000010000ull,/* User mode */ + PeUnMsk = 0x000000000000ff00ull,/* Unit Mask */ + PeEvMskL = 0x00000000000000ffull,/* Event Mask L */ + + PeEvMsksh = 32ull, /* Event mask shift */ +}; + +enum { /* Segment Selector */ + SsRPL0 = 0x0000, /* Requestor Privilege Level */ + SsRPL1 = 0x0001, + SsRPL2 = 0x0002, + SsRPL3 = 0x0003, + SsTIGDT = 0x0000, /* GDT Table Indicator */ + SsTILDT = 0x0004, /* LDT Table Indicator */ + SsSIM = 0xfff8, /* Selector Index Mask */ +}; + +#define SSEL(si, tirpl) (((si)<<3)|(tirpl)) /* Segment Selector */ + +enum { + SiNULL = 0, /* NULL selector index */ + SiCS = 1, /* CS selector index */ + SiDS = 2, /* DS selector index */ + SiU32CS = 3, /* User CS selector index */ + SiUDS = 4, /* User DS selector index */ + SiUCS = 5, /* User CS selector index */ + SiFS = 6, /* FS selector index */ + SiGS = 7, /* GS selector index */ + SiTSS = 8, /* TSS selector index */ +}; + +/* + * Extern registers. + */ +#define RMACH R15 /* m-> */ +#define RUSER R14 /* up-> */ diff -Nru 0/sys/src/nix/k10/apic.c 4/sys/src/nix/k10/apic.c --- 0/sys/src/nix/k10/apic.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/apic.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,417 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "apic.h" +#include "io.h" + +enum { /* Local APIC registers */ + Id = 0x0020, /* Identification */ + Ver = 0x0030, /* Version */ + Tp = 0x0080, /* Task Priority */ + Ap = 0x0090, /* Arbitration Priority */ + Pp = 0x00a0, /* Processor Priority */ + Eoi = 0x00b0, /* EOI */ + Ld = 0x00d0, /* Logical Destination */ + Df = 0x00e0, /* Destination Format */ + Siv = 0x00f0, /* Spurious Interrupt Vector */ + Is = 0x0100, /* Interrupt Status (8) */ + Tm = 0x0180, /* Trigger Mode (8) */ + Ir = 0x0200, /* Interrupt Request (8) */ + Es = 0x0280, /* Error Status */ + Iclo = 0x0300, /* Interrupt Command */ + Ichi = 0x0310, /* Interrupt Command [63:32] */ + Lvt0 = 0x0320, /* Local Vector Table 0 */ + Lvt5 = 0x0330, /* Local Vector Table 5 */ + Lvt4 = 0x0340, /* Local Vector Table 4 */ + Lvt1 = 0x0350, /* Local Vector Table 1 */ + Lvt2 = 0x0360, /* Local Vector Table 2 */ + Lvt3 = 0x0370, /* Local Vector Table 3 */ + Tic = 0x0380, /* Timer Initial Count */ + Tcc = 0x0390, /* Timer Current Count */ + Tdc = 0x03e0, /* Timer Divide Configuration */ + + Tlvt = Lvt0, /* Timer */ + Lint0 = Lvt1, /* Local Interrupt 0 */ + Lint1 = Lvt2, /* Local Interrupt 1 */ + Elvt = Lvt3, /* Error */ + Pclvt = Lvt4, /* Performance Counter */ + Tslvt = Lvt5, /* Thermal Sensor */ +}; + +enum { /* Siv */ + Swen = 0x00000100, /* Software Enable */ + Fdis = 0x00000200, /* Focus Disable */ +}; + +enum { /* Iclo */ + Lassert = 0x00004000, /* Assert level */ + + DSnone = 0x00000000, /* Use Destination Field */ + DSself = 0x00040000, /* Self is only destination */ + DSallinc = 0x00080000, /* All including self */ + DSallexc = 0x000c0000, /* All Excluding self */ +}; + +enum { /* Tlvt */ + Periodic = 0x00020000, /* Periodic Timer Mode */ +}; + +enum { /* Tdc */ + DivX2 = 0x00000000, /* Divide by 2 */ + DivX4 = 0x00000001, /* Divide by 4 */ + DivX8 = 0x00000002, /* Divide by 8 */ + DivX16 = 0x00000003, /* Divide by 16 */ + DivX32 = 0x00000008, /* Divide by 32 */ + DivX64 = 0x00000009, /* Divide by 64 */ + DivX128 = 0x0000000a, /* Divide by 128 */ + DivX1 = 0x0000000b, /* Divide by 1 */ +}; + +static u8int* apicbase; +static int apmachno = 1; + +Apic xlapic[Napic]; +Mach *xlapicmachptr[Napic]; /* maintained, but unused */ + +static u32int +apicrget(int r) +{ + return *((u32int*)(apicbase+r)); +} + +static void +apicrput(int r, u32int data) +{ + *((u32int*)(apicbase+r)) = data; +} + +int +apiceoi(int vecno) +{ + apicrput(Eoi, 0); + + return vecno; +} + +int +apicisr(int vecno) +{ + int isr; + + isr = apicrget(Is + (vecno/32)*16); + + return isr & (1<<(vecno%32)); +} + +void +apicinit(int apicno, uintmem pa, int isbp) +{ + Apic *apic; + + /* + * Mark the APIC useable if it has a good ID + * and the registers can be mapped. + * The APIC Extended Broadcast and ID bits in the HyperTransport + * Transaction Control register determine whether 4 or 8 bits + * are used for the APIC ID. There is also xAPIC and x2APIC + * to be dealt with sometime. + */ + DBG("apicinit: apicno %d pa %#p isbp %d\n", apicno, pa, isbp); + if(apicno >= Napic){ + print("apicinit%d: out of range\n", apicno); + return; + } + if((apic = &xlapic[apicno])->useable){ + print("apicinit%d: already initialised\n", apicno); + return; + } + if(apicbase == nil){ + if((apicbase = vmap(pa, 1024)) == nil){ + print("apicinit%d: can't map apicbase\n", apicno); + return; + } + DBG("apicinit%d: apicbase %#p -> %#p\n", apicno, pa, apicbase); + } + apic->useable = 1; + + /* + * Assign a machno to the processor associated with this + * APIC, it may not be an identity map. + * Machno 0 is always the bootstrap processor. + */ + if(isbp){ + apic->machno = 0; + m->apicno = apicno; + } + else + apic->machno = apmachno++; +} + +static void +apicdump0(Apic *apic, int i) +{ + if(!apic->useable || apic->addr != 0) + return; + DBG("apic%d: machno %d lint0 %#8.8ux lint1 %#8.8ux\n", + i, apic->machno, apic->lvt[0], apic->lvt[1]); + DBG(" tslvt %#8.8ux pclvt %#8.8ux elvt %#8.8ux\n", + apicrget(Tslvt), apicrget(Pclvt), apicrget(Elvt)); + DBG(" tlvt %#8.8ux lint0 %#8.8ux lint1 %#8.8ux siv %#8.8ux\n", + apicrget(Tlvt), apicrget(Lint0), + apicrget(Lint1), apicrget(Siv)); +} + +void +apicdump(void) +{ + int i; + + if(!DBGFLG) + return; + + DBG("apicbase %#p apmachno %d\n", apicbase, apmachno); + for(i = 0; i < Napic; i++) + apicdump0(xlapic + i, i); + for(i = 0; i < Napic; i++) + apicdump0(xioapic + i, i); +} + +static void +apictimer(Ureg* ureg, void*) +{ + timerintr(ureg, 0); +} + +int +apiconline(void) +{ + Apic *apic; + u64int tsc; + u32int dfr, ver; + int apicno, nlvt; + + if(apicbase == nil) + return 0; + if((apicno = ((apicrget(Id)>>24) & 0xff)) >= Napic) + return 0; + apic = &xlapic[apicno]; + if(!apic->useable || apic->addr != nil) + return 0; + + /* + * Things that can only be done when on the processor + * owning the APIC, apicinit above runs on the bootstrap + * processor. + */ + ver = apicrget(Ver); + nlvt = ((ver>>16) & 0xff) + 1; + if(nlvt > nelem(apic->lvt)){ + print("apicinit%d: nlvt %d > max (%d)\n", + apicno, nlvt, nelem(apic->lvt)); + nlvt = nelem(apic->lvt); + } + apic->nlvt = nlvt; + apic->ver = ver & 0xff; + + /* + * These don't really matter in Physical mode; + * set the defaults anyway. + */ + if(memcmp(m->cpuinfo, "AuthenticAMD", 12) == 0) + dfr = 0xf0000000; + else + dfr = 0xffffffff; + apicrput(Df, dfr); + apicrput(Ld, 0x00000000); + + /* + * Disable interrupts until ready by setting the Task Priority + * register to 0xff. + */ + apicrput(Tp, 0xff); + + /* + * Software-enable the APIC in the Spurious Interrupt Vector + * register and set the vector number. The vector number must have + * bits 3-0 0x0f unless the Extended Spurious Vector Enable bit + * is set in the HyperTransport Transaction Control register. + */ + apicrput(Siv, Swen|IdtSPURIOUS); + + /* + * Acknowledge any outstanding interrupts. + */ + apicrput(Eoi, 0); + + /* + * Use the TSC to determine the APIC timer frequency. + * It might be possible to snarf this from a chipset + * register instead. + */ + apicrput(Tdc, DivX1); + apicrput(Tlvt, Im); + tsc = rdtsc() + m->cpuhz/10; + apicrput(Tic, 0xffffffff); + + while(rdtsc() < tsc) + ; + + apic->hz = (0xffffffff-apicrget(Tcc))*10; + apic->max = apic->hz/HZ; + apic->min = apic->hz/(100*HZ); + apic->div = ((m->cpuhz/apic->max)+HZ/2)/HZ; + + if(m->machno == 0 || DBGFLG){ + print("apic%d: hz %lld max %lld min %lld div %lld\n", apicno, + apic->hz, apic->max, apic->min, apic->div); + } + + /* + * Mask interrupts on Performance Counter overflow and + * Thermal Sensor if implemented, and on Lintr0 (Legacy INTR), + * and Lintr1 (Legacy NMI). + * Clear any Error Status (write followed by read) and enable + * the Error interrupt. + */ + switch(apic->nlvt){ + case 6: + apicrput(Tslvt, Im); + /*FALLTHROUGH*/ + case 5: + apicrput(Pclvt, Im); + /*FALLTHROUGH*/ + default: + break; + } + apicrput(Lint1, apic->lvt[1]|Im|IdtLINT1); + apicrput(Lint0, apic->lvt[0]|Im|IdtLINT0); + + apicrput(Es, 0); + apicrget(Es); + apicrput(Elvt, IdtERROR); + + /* + * Issue an INIT Level De-Assert to synchronise arbitration ID's. + * (Necessary in this implementation? - not if Pentium 4 or Xeon + * (APIC Version >= 0x14), or AMD). + apicrput(Ichi, 0); + apicrput(Iclo, DSallinc|Lassert|MTir); + while(apicrget(Iclo) & Ds) + ; + */ + + /* + * Reload the timer to de-synchronise the processors, + * then lower the task priority to allow interrupts to be + * accepted by the APIC. + */ + microdelay((TK2MS(1)*1000/apmachno) * m->machno); + + if(apic->machno == 0){ + apicrput(Tic, apic->max); + intrenable(IdtTIMER, apictimer, 0, -1, "APIC timer"); + apicrput(Tlvt, Periodic|IrqTIMER); + } + + if(m->machno == 0) + apicrput(Tp, 0); + + xlapicmachptr[apicno] = m; + + return 1; +} + +/* To start timers on TCs as part of the boot process. */ +void +apictimerenab(void) +{ + Apic *apic; + + apic = &xlapic[(apicrget(Id)>>24) & 0xff]; + + apiceoi(IdtTIMER); + apicrput(Tic, apic->max); + apicrput(Tlvt, Periodic|IrqTIMER); + +} + +void +apictimerset(uvlong next) +{ + Mpl pl; + Apic *apic; + vlong period; + + apic = &xlapic[(apicrget(Id)>>24) & 0xff]; + + pl = splhi(); + lock(&m->apictimerlock); + + period = apic->max; + if(next != 0){ + period = next - fastticks(nil); /* fastticks is just rdtsc() */ + period /= apic->div; + + if(period < apic->min) + period = apic->min; + else if(period > apic->max - apic->min) + period = apic->max; + } + apicrput(Tic, period); + + unlock(&m->apictimerlock); + splx(pl); +} + +/* NIPI - INIT IPI, freeze processor */ +void +apicnipi(int apicno) +{ + u32int crhi; + + crhi = apicno<<24; + apicrput(Ichi, crhi); + apicrput(Iclo, DSnone|TMlevel|Lassert|MTir); + microdelay(200); + apicrput(Iclo, DSnone|TMlevel|MTir); +} + +void +apicsipi(int apicno, uintmem pa) +{ + int i; + u32int crhi, crlo; + + /* + * SIPI - Start-up IPI. + * To do: checks on apic validity. + */ + apicnipi(apicno); + millidelay(10); + + crhi = apicno<<24; + crlo = DSnone|TMedge|MTsipi|((u32int)pa/(4*KiB)); + for(i = 0; i < 2; i++){ + apicrput(Ichi, crhi); + apicrput(Iclo, crlo); + microdelay(200); + } +} + +void +apicipi(int apicno) +{ + apicrput(Ichi, apicno<<24); + apicrput(Iclo, DSnone|TMedge|Lassert|MTf|IdtIPI); + while(apicrget(Iclo) & Ds) + ; +} + +void +apicpri(int pri) +{ + apicrput(Tp, pri); +} diff -Nru 0/sys/src/nix/k10/apic.h 4/sys/src/nix/k10/apic.h --- 0/sys/src/nix/k10/apic.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/apic.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,93 @@ +/* + * There are 2 flavours of APIC, Local APIC and IOAPIC, + * Each I/O APIC has a unique physical address, + * Local APICs are all at the same physical address as they can only be + * accessed by the local CPU. APIC ids are unique to the + * APIC type, so an IOAPIC and APIC both with id 0 is ok. + */ +typedef struct Ioapic Ioapic; +typedef struct Lapic Lapic; +typedef struct Apic Apic; + +struct Ioapic { + Lock; /* register access */ + u32int* addr; /* register base */ + uintmem paddr; /* register base */ + int nrdt; /* size of RDT */ + int ibase; /* global interrupt base */ +}; + +struct Lapic { + int machno; /* APIC */ + + u32int lvt[7]; + int nlvt; + int ver; + + vlong hz; /* APIC Timer frequency */ + vlong max; + vlong min; + vlong div; +}; + +struct Apic { + int useable; /* en */ + Ioapic; + Lapic; +}; + +enum { + Nbus = 256, /* must be 256 */ + Napic = 254, /* xAPIC architectural limit */ + Nrdt = 64, +}; + +/* + * Common bits for + * IOAPIC Redirection Table Entry (RDT); + * APIC Local Vector Table Entry (LVT); + * APIC Interrupt Command Register (ICR). + * [10:8] Message Type + * [11] Destination Mode (RW) + * [12] Delivery Status (RO) + * [13] Interrupt Input Pin Polarity (RW) + * [14] Remote IRR (RO) + * [15] Trigger Mode (RW) + * [16] Interrupt Mask + */ +enum { + MTf = 0x00000000, /* Fixed */ + MTlp = 0x00000100, /* Lowest Priority */ + MTsmi = 0x00000200, /* SMI */ + MTrr = 0x00000300, /* Remote Read */ + MTnmi = 0x00000400, /* NMI */ + MTir = 0x00000500, /* INIT/RESET */ + MTsipi = 0x00000600, /* Startup IPI */ + MTei = 0x00000700, /* ExtINT */ + + Pm = 0x00000000, /* Physical Mode */ + Lm = 0x00000800, /* Logical Mode */ + + Ds = 0x00001000, /* Delivery Status */ + IPhigh = 0x00000000, /* IIPP High */ + IPlow = 0x00002000, /* IIPP Low */ + Rirr = 0x00004000, /* Remote IRR */ + TMedge = 0x00000000, /* Trigger Mode Edge */ + TMlevel = 0x00008000, /* Trigger Mode Level */ + Im = 0x00010000, /* Interrupt Mask */ +}; + +extern Apic xlapic[Napic]; +extern Apic xioapic[Napic]; +extern Mach *xlapicmachptr[Napic]; /* maintained, but unused */ + +#define l16get(p) (((p)[1]<<8)|(p)[0]) +#define l32get(p) (((u32int)l16get(p+2)<<16)|l16get(p)) +#define l64get(p) (((u64int)l32get(p+4)<<32)|l32get(p)) + +extern void apicdump(void); +extern void apictimerenab(void); +extern void ioapicdump(void); + +extern int pcimsienable(Pcidev*, uvlong); +extern int pcimsimask(Pcidev*, int); diff -Nru 0/sys/src/nix/k10/arch.c 4/sys/src/nix/k10/arch.c --- 0/sys/src/nix/k10/arch.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/arch.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,104 @@ +/* + * EPISODE 12B + * How to recognise different types of trees from quite a long way away. + * NO. 1 + * THE LARCH + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +int +incref(Ref *r) +{ + int x; + + lock(r); + x = ++r->ref; + unlock(r); + return x; +} + +int +decref(Ref *r) +{ + int x; + + lock(r); + x = --r->ref; + unlock(r); + if(x < 0) + panic("decref pc=%#p", getcallerpc(&r)); + + return x; +} + +void +procrestore(Proc *p) +{ + uvlong t; + + if(p->kp) + return; + cycles(&t); + p->pcycles -= t; + + fpuprocrestore(p); +} + +/* + * Save the mach dependent part of the process state. + * NB: the caller should mmuflushtlb after procsave(). + * procsave/procrestore don't touch the mmu, they + * care about fpu, mostly. + */ +void +procsave(Proc *p) +{ + uvlong t; + + cycles(&t); + p->pcycles += t; + + fpuprocsave(p); +} + +static void +linkproc(void) +{ + spllo(); + up->kpfun(up->kparg); + pexit("kproc dying", 0); +} + +void +kprocchild(Proc* p, void (*func)(void*), void* arg) +{ + /* + * gotolabel() needs a word on the stack in + * which to place the return PC used to jump + * to linkproc(). + */ + p->sched.pc = PTR2UINT(linkproc); + p->sched.sp = PTR2UINT(p->kstack+KSTACK-BY2SE); + p->sched.sp = STACKALIGN(p->sched.sp); + + p->kpfun = func; + p->kparg = arg; +} + +/* + * put the processor in the halt state if we've no processes to run. + * an interrupt will get us going again. + * The boot TC in nix can't halt, because it must stay alert in + * case an AC makes a handler process ready. + * We should probably use waitwhile in that case. + */ +void +idlehands(void) +{ + halt(); +} diff -Nru 0/sys/src/nix/k10/archk10.c 4/sys/src/nix/k10/archk10.c --- 0/sys/src/nix/k10/archk10.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/archk10.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,308 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +static int +cpuidinit(void) +{ + u32int eax, info[4]; + + /* + * Standard CPUID functions. + * Functions 0 and 1 will be needed multiple times + * so cache the info now. + */ + if((m->ncpuinfos = cpuid(0, 0, m->cpuinfo[0])) == 0) + return 0; + m->ncpuinfos++; + + if(memcmp(&m->cpuinfo[0][1], "GenuntelineI", 12) == 0) + m->isintelcpu = 1; + cpuid(1, 0, m->cpuinfo[1]); + + /* + * Extended CPUID functions. + */ + if((eax = cpuid(0x80000000, 0, info)) >= 0x80000000) + m->ncpuinfoe = (eax & ~0x80000000) + 1; + + /* is mnonitor supported? */ + if (m->cpuinfo[1][2] & 8) { + cpuid(5, 0, m->cpuinfo[2]); + waitwhile = k10waitwhile; + } + + return 1; +} + +static int +cpuidinfo(u32int eax, u32int ecx, u32int info[4]) +{ + if(m->ncpuinfos == 0 && cpuidinit() == 0) + return 0; + + if(!(eax & 0x80000000)){ + if(eax >= m->ncpuinfos) + return 0; + } + else if(eax >= (0x80000000|m->ncpuinfoe)) + return 0; + + cpuid(eax, ecx, info); + + return 1; +} + +/* use intel brand string to discover hz */ +static vlong +intelbshz(void) +{ + char s[4*4*3+1], *p, *h; + int i, j; + u32int r[4]; + uvlong scale; + + p = s; + for(i = 0; i < 3; i++){ + if(!cpuidinfo(0x80000002+i, 0, r)) + return 0; + for(j = 0; j < 4; j++){ + memmove(p, r+j, 4); + p += 4; + } + } + *p = 0; + DBG("brandstring: %s\n", s); + + h = strstr(s, "Hz"); /* 3.07THz */ + if(h == nil || h-s < 5) + return 0; + h[2] = 0; + + scale = 1000; + switch(h[-1]){ + default: + return 0; + case 'T': + scale *= 1000; + case 'G': + scale *= 1000; + case 'M': + scale *= 1000; + } + + /* get rid of the fractional part */ + if(h[-4] == '.'){ + h[-4] = h[-5]; + h[-5] = ' '; + scale /= 100; + } + return atoi(h-5)*scale; +} + +static vlong +cpuidhz(u32int info[2][4]) +{ + int r; + vlong hz; + u64int msr; + + if(memcmp(&info[0][1], "GenuntelineI", 12) == 0){ + switch(info[1][0] & 0x0fff3ff0){ + default: + hz = intelbshz(); + break; + } + DBG("cpuidhz: %#llux hz\n", hz); + } + else if(memcmp(&info[0][1], "AuthcAMDenti", 12) == 0){ + switch(info[1][0] & 0x0fff0ff0){ + default: + return 0; + case 0x00000f50: /* K8 */ + msr = rdmsr(0xc0010042); + if(msr == 0) + return 0; + hz = (800 + 200*((msr>>1) & 0x1f)) * 1000000ll; + break; + case 0x00100f90: /* K10 */ + case 0x00000620: /* QEMU64 */ + msr = rdmsr(0xc0010064); + r = (msr>>6) & 0x07; + hz = (((msr & 0x3f)+0x10)*100000000ll)/(1<machno != 0) + return sys->machptr[0]->cpuhz; + + return i8254hz(info); +} + +int +archmmu(void) +{ + u32int info[4]; + + /* + * Should the check for m->machno != 0 be here + * or in the caller (mmuinit)? + * + * To do here: + * check and enable Pse; + * Pge; Nxe. + */ + + /* + * How many page sizes are there? + * Always have 4*KiB, but need to check + * configured correctly. + */ + assert(PGSZ == 4*KiB); + + m->pgszlg2[0] = 12; + m->pgszmask[0] = (1<<12)-1; + m->pgsz[0] = 1<<12; + m->npgsz = 1; + if(m->ncpuinfos == 0 && cpuidinit() == 0) + return 1; + + /* + * Check the Pse bit in function 1 DX for 2*MiB support; + * if false, only 4*KiB is available. + */ + if(!(m->cpuinfo[1][3] & 0x00000008)) + return 1; + m->pgszlg2[1] = 21; + m->pgszmask[1] = (1<<21)-1; + m->pgsz[1] = 1<<21; + m->npgsz = 2; + + /* + * Check the Page1GB bit in function 0x80000001 DX for 1*GiB support. + */ + if(cpuidinfo(0x80000001, 0, info) && (info[3] & 0x04000000)){ + m->pgszlg2[2] = 30; + m->pgszmask[2] = (1<<30)-1; + m->pgsz[2] = 1<<30; + m->npgsz = 3; + } + + return m->npgsz; +} + +static int +fmtP(Fmt* f) +{ + uintmem pa; + + pa = va_arg(f->args, uintmem); + + if(f->flags & FmtSharp) + return fmtprint(f, "%#16.16llux", pa); + + return fmtprint(f, "%llud", pa); +} + +static int +fmtL(Fmt* f) +{ + Mpl pl; + + pl = va_arg(f->args, Mpl); + + return fmtprint(f, "%#16.16llux", pl); +} + +static int +fmtR(Fmt* f) +{ + u64int r; + + r = va_arg(f->args, u64int); + + return fmtprint(f, "%#16.16llux", r); +} + +/* virtual address fmt */ +static int +fmtW(Fmt *f) +{ + u64int va; + + va = va_arg(f->args, u64int); + return fmtprint(f, "%#ullx=0x[%ullx][%ullx][%ullx][%ullx][%ullx]", va, + PTLX(va, 3), PTLX(va, 2), PTLX(va, 1), PTLX(va, 0), + va & ((1<cpumhz*microsecs; r < t; r = rdtsc()) + ; +} + +void +millidelay(int millisecs) +{ + u64int r, t; + + r = rdtsc(); + for(t = r + m->cpumhz*1000ull*millisecs; r < t; r = rdtsc()) + ; +} diff -Nru 0/sys/src/nix/k10/archk8.c 4/sys/src/nix/k10/archk8.c --- 0/sys/src/nix/k10/archk8.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/archk8.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,247 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +static int +cpuidinit(void) +{ + int i, n; + u32int eax; + + if((m->ncpuinfos = cpuid(0, m->cpuinfo[0])) == 0) + return 0; + + n = ++m->ncpuinfos; + if(n > nelem(m->cpuinfo)) + n = nelem(m->cpuinfo); + eax = cpuid(0x80000000, m->cpuinfo[m->ncpuinfos-1]); + if(eax >= 0x80000000){ + eax &= ~0x80000000; + n += ++eax; + if(n > nelem(m->cpuinfo)) + n = nelem(m->cpuinfo); + m->ncpuinfoe = n - m->ncpuinfos; + } + + for(i = 1; i < n; i++){ + eax = i; + if(i >= m->ncpuinfos) + eax = 0x80000000|(i - m->ncpuinfos); + cpuid(eax, m->cpuinfo[i]); + } + + return 1; +} + +static u32int* +cpuidinfo(u32int eax) +{ + if(m->ncpuinfos == 0 && cpuidinit() == 0) + return nil; + + if(!(eax & 0x80000000)){ + if(eax >= m->ncpuinfos) + return nil; + } + else{ + eax &= ~0x80000000; + if(eax >= m->ncpuinfoe) + return nil; + eax += m->ncpuinfos; + } + + return m->cpuinfo[eax]; +} + +static vlong +cpuidhz(u32int* info[2]) +{ + int f, r; + vlong hz; + u64int msr; + + if(memcmp(&info[0][1], "GenuntelineI", 12) == 0){ + switch(info[1][0] & 0x0fff3ff0){ + default: + return 0; + case 0x00000f30: /* Xeon (MP), Pentium [4D] */ + case 0x00000f40: /* Xeon (MP), Pentium [4D] */ + case 0x00000f60: /* Xeon 7100, 5000 or above */ + msr = rdmsr(0x2c); + r = (msr>>16) & 0x07; + switch(r){ + default: + return 0; + case 0: + hz = 266666666666ll; + break; + case 1: + hz = 133333333333ll; + break; + case 2: + hz = 200000000000ll; + break; + case 3: + hz = 166666666666ll; + break; + case 4: + hz = 333333333333ll; + break; + } + /* + * Hz is *1000 at this point. + * Do the scaling then round it. + * The manual is conflicting about + * the size of the msr field. + */ + hz = (((hz*(msr>>24))/100)+5)/10; + break; + case 0x00000690: /* Pentium M, Celeron M */ + case 0x000006d0: /* Pentium M, Celeron M */ + hz = ((rdmsr(0x2a)>>22) & 0x1f)*100 * 1000000ll; + break; + case 0x000006e0: /* Core Duo */ + case 0x000006f0: /* Core 2 Duo/Quad/Extreme */ + case 0x00010670: /* Core 2 Extreme */ + case 0x000006a0: /* i7 paurea... */ + /* + * Get the FSB frequemcy. + * If processor has Enhanced Intel Speedstep Technology + * then non-integer bus frequency ratios are possible. + */ + if(info[1][2] & 0x00000080){ + msr = rdmsr(0x198); + r = (msr>>40) & 0x1f; + } + else{ + msr = 0; + r = rdmsr(0x2a) & 0x1f; + } + f = rdmsr(0xcd) & 0x07; + switch(f){ + default: + return 0; + case 5: + hz = 100000000000ll; + break; + case 1: + hz = 133333333333ll; + break; + case 3: + hz = 166666666666ll; + break; + case 2: + hz = 200000000000ll; + break; + case 0: + hz = 266666666666ll; + break; + case 4: + hz = 333333333333ll; + break; + case 6: + hz = 400000000000ll; + break; + } + + /* + * Hz is *1000 at this point. + * Do the scaling then round it. + */ + if(msr & 0x0000400000000000ll) + hz = hz*r + hz/2; + else + hz = hz*r; + hz = ((hz/100)+5)/10; + break; + } + DBG("cpuidhz: 0x2a: %#llux hz %lld\n", rdmsr(0x2a), hz); + } + else if(memcmp(&info[0][1], "AuthcAMDenti", 12) == 0){ + switch(info[1][0] & 0x0fff0ff0){ + default: + return 0; + case 0x00000f50: /* K8 */ + msr = rdmsr(0xc0010042); + if(msr == 0) + return 0; + hz = (800 + 200*((msr>>1) & 0x1f)) * 1000000ll; + break; + case 0x00100f90: /* K10 */ + case 0x00000620: /* QEMU64 */ + msr = rdmsr(0xc0010064); + r = (msr>>6) & 0x07; + hz = (((msr & 0x3f)+0x10)*100000000ll)/(1<ncpuinfos == 0 && cpuidinit() == 0) + return; + + n = m->ncpuinfos+m->ncpuinfoe; + for(i = 0; i < n; i++){ + DBG("eax = %#8.8ux: %8.8ux %8.8ux %8.8ux %8.8ux\n", + (i >= m->ncpuinfos ? 0x80000000|(i - m->ncpuinfos): i), + m->cpuinfo[i][0], m->cpuinfo[i][1], + m->cpuinfo[i][2], m->cpuinfo[i][3]); + } +} + +vlong +archhz(void) +{ + vlong hz; + u32int *info[2]; + + if((info[0] = cpuidinfo(0)) == 0 || (info[1] = cpuidinfo(1)) == 0) + return 0; + + hz = cpuidhz(info); + if(hz != 0) + return hz; + + return i8254hz(info); +} + +void +archidle(void) +{ + halt(); +} + +void +microdelay(int microsecs) +{ + u64int r, t; + + r = rdtsc(); + for(t = r + m->cpumhz*microsecs; r < t; r = rdtsc()) + ; +} + +void +millidelay(int millisecs) +{ + u64int r, t; + + r = rdtsc(); + for(t = r + m->cpumhz*1000ull*millisecs; r < t; r = rdtsc()) + ; +} diff -Nru 0/sys/src/nix/k10/asm.c 4/sys/src/nix/k10/asm.c --- 0/sys/src/nix/k10/asm.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/asm.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,429 @@ +/* + * To do: + * find a purpose for this... + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "amd64.h" + +/* + * Address Space Map. + * Low duty cycle. + */ +typedef struct Asm Asm; +typedef struct Asm { + uintmem addr; + uintmem size; + int type; + int location; + Asm* next; +} Asm; + +enum { + AsmNONE = 0, + AsmMEMORY = 1, + AsmRESERVED = 2, + AsmACPIRECLAIM = 3, + AsmACPINVS = 4, + + AsmDEV = 5, +}; + +static Lock asmlock; +static Asm asmarray[64] = { + { 0, ~0, AsmNONE, nil, }, +}; +static int asmindex = 1; +static Asm* asmlist = &asmarray[0]; +static Asm* asmfreelist; + +/*static*/ void +asmdump(void) +{ + Asm* asm; + + print("asm: index %d:\n", asmindex); + for(asm = asmlist; asm != nil; asm = asm->next){ + print(" %#P %#P %d (%P)\n", + asm->addr, asm->addr+asm->size, + asm->type, asm->size); + } +} + +static Asm* +asmnew(uintmem addr, uintmem size, int type) +{ + Asm * asm; + + if(asmfreelist != nil){ + asm = asmfreelist; + asmfreelist = asm->next; + asm->next = nil; + } + else{ + if(asmindex >= nelem(asmarray)) + return nil; + asm = &asmarray[asmindex++]; + } + asm->addr = addr; + asm->size = size; + asm->type = type; + + return asm; +} + +int +asmfree(uintmem addr, uintmem size, int type) +{ + Asm *np, *pp, **ppp; + + DBG("asmfree: %#P@%#P, type %d\n", size, addr, type); + if(size == 0) + return 0; + + lock(&asmlock); + + /* + * Find either a map entry with an address greater + * than that being returned, or the end of the map. + */ + pp = nil; + ppp = &asmlist; + for(np = *ppp; np != nil && np->addr <= addr; np = np->next){ + pp = np; + ppp = &np->next; + } + + if((pp != nil && pp->addr+pp->size > addr) + || (np != nil && addr+size > np->addr)){ + unlock(&asmlock); + DBG("asmfree: overlap %#Px@%#P, type %d\n", size, addr, type); + return -1; + } + + if(pp != nil && pp->type == type && pp->addr+pp->size == addr){ + pp->size += size; + if(np != nil && np->type == type && addr+size == np->addr){ + pp->size += np->size; + pp->next = np->next; + + np->next = asmfreelist; + asmfreelist = np; + } + + unlock(&asmlock); + return 0; + } + + if(np != nil && np->type == type && addr+size == np->addr){ + np->addr -= size; + np->size += size; + + unlock(&asmlock); + return 0; + } + + if((pp = asmnew(addr, size, type)) == nil){ + unlock(&asmlock); + DBG("asmfree: losing %#P@%#P, type %d\n", size, addr, type); + return -1; + } + *ppp = pp; + pp->next = np; + + unlock(&asmlock); + + return 0; +} + +uintmem +asmalloc(uintmem addr, uintmem size, int type, int align) +{ + uintmem a, o; + Asm *asm, *pp; + + DBG("asmalloc: %#P@%#P, type %d\n", size, addr, type); + lock(&asmlock); + for(pp = nil, asm = asmlist; asm != nil; pp = asm, asm = asm->next){ + if(asm->type != type) + continue; + a = asm->addr; + + if(addr != 0){ + /* + * A specific address range has been given: + * if the current map entry is greater then + * the address is not in the map; + * if the current map entry does not overlap + * the beginning of the requested range then + * continue on to the next map entry; + * if the current map entry does not entirely + * contain the requested range then the range + * is not in the map. + * The comparisons are strange to prevent + * overflow. + */ + if(a > addr) + break; + if(asm->size < addr - a) + continue; + if(addr - a > asm->size - size) + break; + a = addr; + } + + if(align > 0) + a = ((a+align-1)/align)*align; + if(asm->addr+asm->size-a < size) + continue; + + o = asm->addr; + asm->addr = a+size; + asm->size -= a-o+size; + if(asm->size == 0){ + if(pp != nil) + pp->next = asm->next; + asm->next = asmfreelist; + asmfreelist = asm; + } + + unlock(&asmlock); + if(o != a) + asmfree(o, a-o, type); + return a; + } + unlock(&asmlock); + + return 0; +} + +static void +asminsert(uintmem addr, uintmem size, int type) +{ + if(type == AsmNONE || asmalloc(addr, size, AsmNONE, 0) == 0) + return; + if(asmfree(addr, size, type) == 0) + return; + asmfree(addr, size, 0); +} + +void +asminit(void) +{ + sys->pmstart = ROUNDUP(PADDR(end), PGSZ); + sys->pmend = sys->pmstart; + asmalloc(0, sys->pmstart, AsmNONE, 0); +} + +/* + * Notes: + * asmmapinit and asmmodinit called from multiboot; + * subject to change; the numerology here is probably suspect. + * Multiboot defines the alignment of modules as 4096. + */ +void +asmmapinit(uintmem addr, uintmem size, int type) +{ + switch(type){ + default: + asminsert(addr, size, type); + break; + case AsmMEMORY: + /* + * Adjust things for the peculiarities of this + * architecture. + * Sys->pmend is the largest physical memory address found, + * there may be gaps between it and sys->pmstart, the range + * and how much of it is occupied, might need to be known + * for setting up allocators later. + */ + if(addr < 1*MiB || addr+size < sys->pmstart) + break; + if(addr < sys->pmstart){ + size -= sys->pmstart - addr; + addr = sys->pmstart; + } + asminsert(addr, size, type); + sys->pmoccupied += size; + if(addr+size > sys->pmend) + sys->pmend = addr+size; + break; + } +} + +void +asmmodinit(u32int start, u32int end, char* s) +{ + DBG("asmmodinit: %#ux -> %#ux: <%s> %#ux\n", + start, end, s, ROUNDUP(end, 4096)); + + if(start < sys->pmstart) + return; + end = ROUNDUP(end, 4096); + if(end > sys->pmstart){ + asmalloc(sys->pmstart, end-sys->pmstart, AsmNONE, 0); + sys->pmstart = end; + } +} + +static int npg[4]; + +void* +asmbootalloc(usize size) +{ + uintptr va; + + assert(sys->vmunused+size <= sys->vmunmapped); + va = sys->vmunused; + sys->vmunused += size; + memset(UINT2PTR(va), 0, size); + return UINT2PTR(va); +} + +static uintmem +asmwalkalloc(usize size) +{ + uintmem pa; + + assert(size == PTSZ && sys->vmunused+size <= sys->vmunmapped); + + if(!ALIGNED(sys->vmunused, PTSZ)){ + DBG("asmwalkalloc: %ulld wasted\n", + ROUNDUP(sys->vmunused, PTSZ) - sys->vmunused); + sys->vmunused = ROUNDUP(sys->vmunused, PTSZ); + } + if((pa = mmuphysaddr(sys->vmunused)) != ~0) + sys->vmunused += size; + + return pa; +} + +// still needed so iallocb gets initialised correctly. needs to go. +#define ConfCrap + +void +asmmeminit(void) +{ + int i, l; + Asm* asm; + PTE *pte, *pml4; + uintptr va; + uintmem hi, lo, mem, nextmem, pa; +#ifdef ConfCrap + int cx; +#endif /* ConfCrap */ + + assert(!((sys->vmunmapped|sys->vmend) & m->pgszmask[1])); + + if((pa = mmuphysaddr(sys->vmunused)) == ~0) + panic("asmmeminit 1"); + pa += sys->vmunmapped - sys->vmunused; + mem = asmalloc(pa, sys->vmend - sys->vmunmapped, 1, 0); + if(mem != pa) + panic("asmmeminit 2"); + DBG("pa %#llux mem %#llux\n", pa, mem); + + /* assume already 2MiB aligned*/ + assert(ALIGNED(sys->vmunmapped, 2*MiB)); + pml4 = UINT2PTR(m->pml4->va); + while(sys->vmunmapped < sys->vmend){ + l = mmuwalk(pml4, sys->vmunmapped, 1, &pte, asmwalkalloc); + DBG("%#p l %d\n", sys->vmunmapped, l); + *pte = pa|PtePS|PteRW|PteP; + sys->vmunmapped += 2*MiB; + pa += 2*MiB; + } + +#ifdef ConfCrap + cx = 0; +#endif /* ConfCrap */ + for(asm = asmlist; asm != nil; asm = asm->next){ + if(asm->type != AsmMEMORY) + continue; + va = KSEG2+asm->addr; + print("asm: addr %#P end %#P type %d size %P\n", + asm->addr, asm->addr+asm->size, + asm->type, asm->size); + + lo = asm->addr; + hi = asm->addr+asm->size; + /* Convert a range into pages */ + for(mem = lo; mem < hi; mem = nextmem){ + nextmem = (mem + PGLSZ(0)) & ~m->pgszmask[0]; + + /* Try large pages first */ + for(i = m->npgsz - 1; i >= 0; i--){ + if((mem & m->pgszmask[i]) != 0) + continue; + if(mem + PGLSZ(i) > hi) + continue; + /* This page fits entirely within the range. */ + /* Mark it a usable */ + if((l = mmuwalk(pml4, va, i, &pte, asmwalkalloc)) < 0) + panic("asmmeminit 3"); + + *pte = mem|PteRW|PteP; + if(l > 0) + *pte |= PtePS; + + nextmem = mem + PGLSZ(i); + va += PGLSZ(i); + npg[i]++; + + break; + } + } + +#ifdef ConfCrap + /* + * Fill in conf crap. + */ + if(cx >= nelem(conf.mem)) + continue; + lo = ROUNDUP(asm->addr, PGSZ); +//if(lo >= 600ull*MiB) +// continue; + conf.mem[cx].base = lo; + hi = ROUNDDN(hi, PGSZ); +//if(hi > 600ull*MiB) +// hi = 600*MiB; + conf.mem[cx].npage = (hi - lo)/PGSZ; + conf.npage += conf.mem[cx].npage; + print("cm %d: addr %#llux npage %lud\n", + cx, conf.mem[cx].base, conf.mem[cx].npage); + cx++; +#endif /* ConfCrap */ + } + print("%d %d %d\n", npg[0], npg[1], npg[2]); + +#ifdef ConfCrap + /* + * Fill in more conf crap. + * This is why I hate Plan 9. + */ + conf.upages = conf.npage; + i = (sys->vmend - sys->vmstart)/PGSZ; /* close enough */ + conf.ialloc = (i/2)*PGSZ; + print("npage %llud upage %lud kpage %d\n", + conf.npage, conf.upages, i); + +#endif /* ConfCrap */ +} + +void +asmumeminit(void) +{ + Asm *asm; + extern void physallocdump(void); + + for(asm = asmlist; asm != nil; asm = asm->next){ + if(asm->type != AsmMEMORY) + continue; + physinit(asm->addr, asm->size); + } + physallocdump(); +} diff -Nru 0/sys/src/nix/k10/boot.fs 4/sys/src/nix/k10/boot.fs --- 0/sys/src/nix/k10/boot.fs Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/boot.fs Wed Feb 6 00:00:00 2013 @@ -0,0 +1,5 @@ +#!/boot/rc -m /boot/rcmain +/boot/echo Morning +# boot script for file servers, including standalone ones +path=(/boot /$cputype/bin /rc/bin .) +exec /boot/rc -m/boot/rcmain -i diff -Nru 0/sys/src/nix/k10/cga.c 4/sys/src/nix/k10/cga.c --- 0/sys/src/nix/k10/cga.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/cga.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,163 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +enum { + Black = 0x00, + Blue = 0x01, + Green = 0x02, + Cyan = 0x03, + Red = 0x04, + Magenta = 0x05, + Brown = 0x06, + Grey = 0x07, + + Bright = 0x08, + Blinking = 0x80, + + Attr = (Black<<4)|Grey, /* (background<<4)|foreground */ +}; + +enum { + Index = 0x3d4, + Data = Index+1, + + Width = 80*2, + Height = 25, + + Poststrlen = 0, + Postcodelen = 2, + Postlen = Poststrlen+Postcodelen, +}; + +#define CGA (BIOSSEG(0xb800)) + +static Lock cgalock; +static int cgapos; +static int cgainitdone; + +static int +cgaregr(int index) +{ + outb(Index, index); + return inb(Data) & 0xff; +} + +static void +cgaregw(int index, int data) +{ + outb(Index, index); + outb(Data, data); +} + +static void +cgacursor(void) +{ + uchar *cga; + + cgaregw(0x0e, (cgapos/2>>8) & 0xff); + cgaregw(0x0f, cgapos/2 & 0xff); + + cga = CGA; + cga[cgapos+1] = Attr; +} + +/* + * extern, so we could use it to debug things like + * lock() if necessary. + */ +void +cgaputc(int c) +{ + int i; + uchar *cga, *p; + + cga = CGA; + + if(c == '\n'){ + cgapos = cgapos/Width; + cgapos = (cgapos+1)*Width; + } + else if(c == '\t'){ + i = 8 - ((cgapos/2)&7); + while(i-- > 0) + cgaputc(' '); + } + else if(c == '\b'){ + if(cgapos >= 2) + cgapos -= 2; + cgaputc(' '); + cgapos -= 2; + } + else{ + cga[cgapos++] = c; + cga[cgapos++] = Attr; + } + if(cgapos >= (Width*Height)-Postlen*2){ + memmove(cga, &cga[Width], Width*(Height-1)); + p = &cga[Width*(Height-1)-Postlen*2]; + for(i = 0; i < Width/2; i++){ + *p++ = ' '; + *p++ = Attr; + } + cgapos -= Width; + } + cgacursor(); +} + +/* + * debug + */ +void +cgaprinthex(uintptr x) +{ + char str[30]; + char *s; + static char dig[] = "0123456789abcdef"; + + str[29] = 0; + s = &str[29]; + while(x != 0){ + *--s = dig[x&0xF]; + x >>= 4; + } + while(*s != 0) + cgaputc(*s++); + cgaputc('\n'); +} + +void +cgaconsputs(char* s, int n) +{ + ilock(&cgalock); + while(n-- > 0) + cgaputc(*s++); + iunlock(&cgalock); +} + +void +cgapost(int code) +{ + uchar *cga; + + static char hex[] = "0123456789ABCDEF"; + + cga = CGA; + cga[Width*Height-Postcodelen*2] = hex[(code>>4) & 0x0f]; + cga[Width*Height-Postcodelen*2+1] = Attr; + cga[Width*Height-Postcodelen*2+2] = hex[code & 0x0f]; + cga[Width*Height-Postcodelen*2+3] = Attr; +} + +void +cgainit(void) +{ + ilock(&cgalock); + cgapos = cgaregr(0x0e)<<8; + cgapos |= cgaregr(0x0f); + cgapos *= 2; + cgainitdone = 1; + iunlock(&cgalock); +} diff -Nru 0/sys/src/nix/k10/crap.c 4/sys/src/nix/k10/crap.c --- 0/sys/src/nix/k10/crap.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/crap.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,129 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +/* +Conf conf; +char *confname[1] = { + "console", +}; +char *confval[1] = { + "0 b115200", +}; +int nconf = nelem(confname); + */ + +/* + * Where configuration info is left for the loaded programme. + * This will turn into a structure as more is done by the boot loader + * (e.g. why parse the .ini file twice?). + * There are 3584 bytes available at CONFADDR. + */ +#define CONFADDR PTR2UINT(KADDR(0x0001200)) + +#define BOOTLINE ((char*)CONFADDR) +#define BOOTLINELEN 64 +#define BOOTARGS ((char*)(CONFADDR+BOOTLINELEN)) +#define BOOTARGSLEN (4096-0x200-BOOTLINELEN) +#define MAXCONF 64 + +char *confname[MAXCONF]; +char *confval[MAXCONF]; +int nconf; + +void +crapoptions(void) +{ + long i, n; + char *cp, *line[MAXCONF], *p, *q; + + /* + * parse configuration args from dos file plan9.ini + */ + cp = BOOTARGS; /* where b.com leaves its config */ + cp[BOOTARGSLEN-1] = 0; + + /* + * Strip out '\r', change '\t' -> ' '. + */ + p = cp; + for(q = cp; *q; q++){ + if(*q == '\r') + continue; + if(*q == '\t') + *q = ' '; + *p++ = *q; + } + *p = 0; + + n = getfields(cp, line, MAXCONF, 1, "\n"); + for(i = 0; i < n; i++){ + if(*line[i] == '#') + continue; + cp = strchr(line[i], '='); + if(cp == nil) + continue; + *cp++ = '\0'; + confname[nconf] = line[i]; + confval[nconf] = cp; + nconf++; + } +} + +char* +getconf(char *name) +{ + int i; + + for(i = 0; i < nconf; i++) + if(cistrcmp(confname[i], name) == 0) + return confval[i]; + return 0; +} + +void +confsetenv(void) +{ + int i; + + for(i = 0; i < nconf; i++){ + if(confname[i][0] != '*') + ksetenv(confname[i], confval[i], 0); + ksetenv(confname[i], confval[i], 1); + } +} + +int +isaconfig(char *class, int ctlrno, ISAConf *isa) +{ + char cc[32], *p; + int i; + + snprint(cc, sizeof cc, "%s%d", class, ctlrno); + p = getconf(cc); + if(p == nil) + return 0; + + isa->type = ""; + isa->nopt = tokenize(p, isa->opt, NISAOPT); + for(i = 0; i < isa->nopt; i++){ + p = isa->opt[i]; + if(cistrncmp(p, "type=", 5) == 0) + isa->type = p + 5; + else if(cistrncmp(p, "port=", 5) == 0) + isa->port = strtoul(p+5, &p, 0); + else if(cistrncmp(p, "irq=", 4) == 0) + isa->irq = strtoul(p+4, &p, 0); + else if(cistrncmp(p, "dma=", 4) == 0) + isa->dma = strtoul(p+4, &p, 0); + else if(cistrncmp(p, "mem=", 4) == 0) + isa->mem = strtoul(p+4, &p, 0); + else if(cistrncmp(p, "size=", 5) == 0) + isa->size = strtoul(p+5, &p, 0); + else if(cistrncmp(p, "freq=", 5) == 0) + isa->freq = strtoul(p+5, &p, 0); + } + return 1; +} diff -Nru 0/sys/src/nix/k10/dat.h 4/sys/src/nix/k10/dat.h --- 0/sys/src/nix/k10/dat.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/dat.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,424 @@ +typedef struct ACVctl ACVctl; +typedef struct Conf Conf; +typedef struct Confmem Confmem; +typedef struct Fxsave Fxsave; +typedef struct ICC ICC; +typedef struct ICCparms ICCparms; +typedef struct ISAConf ISAConf; +typedef struct Label Label; +typedef struct Lock Lock; +typedef struct MCPU MCPU; +typedef struct MFPU MFPU; +typedef struct MMMU MMMU; +typedef struct NIX NIX; +typedef struct Mach Mach; +typedef u64int Mpl; +typedef struct Page Page; +typedef struct Pcidev Pcidev; +typedef struct PFPU PFPU; +typedef struct PmcCtr PmcCtr; +typedef struct PmcCtl PmcCtl; +typedef struct PmcWait PmcWait; +typedef struct PMMU PMMU; +typedef struct PNOTIFY PNOTIFY; +typedef u64int PTE; +typedef struct Proc Proc; +typedef struct Sys Sys; +typedef u64int uintmem; /* Physical address (hideous) */ +typedef struct Ureg Ureg; +typedef struct Vctl Vctl; + +#pragma incomplete Ureg + +#define MAXSYSARG 5 /* for mount(fd, afd, mpt, flag, arg) */ + +/* + * parameters for sysproc.c + */ +#define AOUT_MAGIC (S_MAGIC) + +/* + * machine dependent definitions used by ../port/portdat.h + */ +struct Lock +{ + u32int key; + int isilock; + Mpl pl; + uintptr pc; + Proc* p; + Mach* m; + uvlong lockcycles; +}; + +struct Label +{ + uintptr sp; + uintptr pc; +}; + +struct Fxsave { + u16int fcw; /* x87 control word */ + u16int fsw; /* x87 status word */ + u8int ftw; /* x87 tag word */ + u8int zero; /* 0 */ + u16int fop; /* last x87 opcode */ + u64int rip; /* last x87 instruction pointer */ + u64int rdp; /* last x87 data pointer */ + u32int mxcsr; /* MMX control and status */ + u32int mxcsrmask; /* supported MMX feature bits */ + uchar st[128]; /* shared 64-bit media and x87 regs */ + uchar xmm[256]; /* 128-bit media regs */ + uchar ign[96]; /* reserved, ignored */ +}; + +/* + * FPU stuff in Proc + */ +struct PFPU { + int fpustate; + uchar fxsave[sizeof(Fxsave)+15]; + void* fpusave; +}; + +/* + * MMU stuff in Proc + */ +#define NCOLOR 1 +struct PMMU +{ + Page* mmuptp[4]; /* page table pages for each level */ +}; + +/* + * things saved in the Proc structure during a notify + */ +struct PNOTIFY +{ + int emptiness; +}; + +struct Confmem +{ + uintptr base; + usize npage; + uintptr kbase; + uintptr klimit; +}; + +struct Conf +{ + ulong nproc; /* processes */ + Confmem mem[4]; /* physical memory */ + uvlong npage; /* total physical pages of memory */ + usize upages; /* user page pool */ + ulong copymode; /* 0 is copy on write, 1 is copy on reference */ + ulong ialloc; /* max interrupt time allocation in bytes */ + ulong nimage; /* number of page cache image headers */ +}; + +enum +{ + NPGSZ = 4 /* # of supported pages sizes in Mach */ +}; + +#include "../port/portdat.h" + +/* + * CPU stuff in Mach. + */ +struct MCPU +{ + u32int cpuinfo[3][4]; /* CPUID Functions 0, 1, and 5 (n.b.: 2-4 are invalid) */ + int ncpuinfos; /* number of standard entries */ + int ncpuinfoe; /* number of extended entries */ + int isintelcpu; /* */ +}; + +/* + * FPU stuff in Mach. + */ +struct MFPU +{ + u16int fcw; /* x87 control word */ + u32int mxcsr; /* MMX control and status */ + u32int mxcsrmask; /* supported MMX feature bits */ +}; + +struct NIX +{ + ICC* icc; /* inter-core call */ + int nixrole; + int nnixrole; +}; + +/* + * MMU stuff in Mach. + */ +struct MMMU +{ + uintptr cr2; + Page* pml4; /* pml4 for this processor */ + PTE* pmap; /* unused as of yet */ + + uint pgszlg2[NPGSZ]; /* per Mach or per Sys? */ + uint pgszmask[NPGSZ]; + uint pgsz[NPGSZ]; + int npgsz; + + Page pml4kludge; /* NIX KLUDGE: we need a page */ +}; + +/* + * Inter core calls + */ +enum +{ + ICCLNSZ = 128, /* Cache line size for inter core calls */ + + + ICCOK = 0, /* Return codes: Ok; trap; syscall */ + ICCTRAP, + ICCSYSCALL +}; + +struct ICC +{ + /* fn is kept in its own cache line */ + union{ + void (*fn)(void); + uchar _ln1_[ICCLNSZ]; + }; + int flushtlb; /* on the AC, before running fn */ + int rc; /* return code from AC to TC */ + char* note; /* to be posted in the TC after returning */ + uchar data[ICCLNSZ]; /* sent to the AC */ +}; + +/* + * hw perf counters + */ +struct PmcCtl { + Ref; + u32int coreno; + int enab; + int user; + int os; + int nodesc; + char descstr[KNAMELEN]; + int reset; +}; + +struct PmcWait{ + Ref; + Rendez r; + PmcWait* next; +}; + +struct PmcCtr{ + int stale; + PmcWait *wq; + u64int ctr; + int ctrset; + PmcCtl; + int ctlset; +}; + +enum { + PmcMaxCtrs = 4, + PmcIgn = 0, + PmcGet = 1, + PmcSet = 2, +}; + +/* + * Per processor information. + * + * The offsets of the first few elements may be known + * to low-level assembly code, so do not re-order: + * machno - no dependency, convention + * splpc - splhi, spllo, splx + * proc - syscallentry + * stack - acsyscall + */ +struct Mach +{ + int machno; /* physical id of processor */ + uintptr splpc; /* pc of last caller to splhi */ + + Proc* proc; /* current process on this processor */ + uintptr stack; + + int apicno; + + MMMU; + + uchar* vsvm; + void* gdt; + void* tss; + + ulong ticks; /* of the clock since boot time */ + Label sched; /* scheduler wakeup */ + Lock alarmlock; /* access to alarm list */ + void* alarm; /* alarms bound to this clock */ + int inclockintr; + + ulong qstart; /* time when up started running */ + int qexpired; /* quantum expired */ + + int tlbfault; + int tlbpurge; + int pfault; + int cs; + int syscall; + int intr; + int mmuflush; /* make current proc flush it's mmu state */ + int ilockdepth; + Perf perf; /* performance counters */ + int inidle; /* profiling */ + int lastintr; + + Lock apictimerlock; + uvlong cyclefreq; /* Frequency of user readable cycle counter */ + vlong cpuhz; + int cpumhz; + u64int rdtsc; + + Lock pmclock; + PmcCtr pmc[PmcMaxCtrs]; + + Lock sipilock; + Rendez sipir; + + MFPU; + MCPU; + + NIX; +}; + +/* + * This is the low memory map, between 0x100000 and 0x110000. + * It is located there to allow fundamental datastructures to be + * created and used before knowing where free memory begins + * (e.g. there may be modules located after the kernel BSS end). + * The layout is known in the bootstrap code in l32p.s. + * It is logically two parts: the per processor data structures + * for the bootstrap processor (stack, Mach, vsvm, and page tables), + * and the global information about the system (syspage, ptrpage). + * Some of the elements must be aligned on page boundaries, hence + * the unions. + */ +struct Sys { + uchar machstk[MACHSTKSZ]; + + PTE pml4[PTSZ/sizeof(PTE)]; /* */ + PTE pdp[PTSZ/sizeof(PTE)]; + PTE pd[PTSZ/sizeof(PTE)]; + PTE pt[PTSZ/sizeof(PTE)]; + + uchar vsvmpage[4*KiB]; + + union { + Mach mach; + uchar machpage[MACHSZ]; + }; + + union { + struct { + u64int pmstart; /* physical memory */ + u64int pmoccupied; /* how much is occupied */ + u64int pmend; /* total span */ + + uintptr vmstart; /* base address for malloc */ + uintptr vmunused; /* 1st unused va */ + uintptr vmunmapped; /* 1st unmapped va */ + uintptr vmend; /* 1st unusable va */ + u64int epoch; /* crude time synchronisation */ + + int nc[NIXROLES]; /* number of online processors */ + int nmach; + int load; + ulong ticks; /* of the clock since boot time */ + }; + uchar syspage[4*KiB]; + }; + + union { + Mach* machptr[MACHMAX]; + uchar ptrpage[4*KiB]; + }; + + uchar _57344_[2][4*KiB]; /* unused */ +}; + +extern Sys* sys; + +/* + * KMap + */ +typedef void KMap; +extern KMap* kmap(Page*); + +#define kunmap(k) +#define VA(k) PTR2UINT(k) + +struct +{ + Lock; + int nonline; /* # of active CPUs */ + int nbooting; /* # of CPUs waiting for the bTC to go */ + int exiting; /* shutdown */ + int ispanic; /* shutdown in response to a panic */ + int thunderbirdsarego; /* lets the added processors continue */ +}active; + +/* + * a parsed plan9.ini line + */ +#define NISAOPT 8 + +struct ISAConf { + char *type; + uintmem port; + int irq; + ulong dma; + uintmem mem; + usize size; + int freq; + + int nopt; + char *opt[NISAOPT]; +}; + +/* + * The Mach structures must be available via the per-processor + * MMU information array machptr, mainly for disambiguation and access to + * the clock which is only maintained by the bootstrap processor (0). + */ +extern register Mach* m; /* R15 */ +extern register Proc* up; /* R14 */ + +extern uintptr kseg0; + +extern char*rolename[]; + + +#pragma varargck type "P" uintmem +#pragma varargck type "R" u64int +#pragma varargck type "W" uintptr + +/* + * Horrid. + */ +#ifdef _DBGC_ +#define DBGFLG (dbgflg[_DBGC_]) +#else +#define DBGFLG (0) +#endif /* _DBGC_ */ + +#define DBG(...) if(!DBGFLG){}else dbgprint(__VA_ARGS__) + +extern char dbgflg[256]; + +#define dbgprint print /* for now */ + diff -Nru 0/sys/src/nix/k10/devacpi.c 4/sys/src/nix/k10/devacpi.c --- 0/sys/src/nix/k10/devacpi.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/devacpi.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1717 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "mp.h" +#include "acpi.h" + +/* + * ACPI 4.0 Support. + * Still WIP. + * + * This driver locates tables and parses only the FADT + * and the XSDT. All other tables are mapped and kept there + * for the user-level interpreter. + */ + +#define l16get(p) (((p)[1]<<8)|(p)[0]) +#define l32get(p) (((u32int)l16get(p+2)<<16)|l16get(p)) +static Atable* acpifadt(uchar*, int); +static Atable* acpitable(uchar*, int); +static Atable* acpimadt(uchar*, int); +static Atable* acpimsct(uchar*, int); +static Atable* acpisrat(uchar*, int); +static Atable* acpislit(uchar*, int); + +#pragma varargck type "G" Gas* + +static Cmdtab ctls[] = +{ + {CMregion, "region", 6}, + {CMgpe, "gpe", 3}, +}; + +static Dirtab acpidir[]={ + ".", {Qdir, 0, QTDIR}, 0, DMDIR|0555, + "acpictl", {Qctl}, 0, 0666, + "acpitbl", {Qtbl}, 0, 0444, + "acpiregio", {Qio}, 0, 0666, +}; + +/* + * The DSDT is always given to the user interpreter. + * Tables listed here are also loaded from the XSDT: + * MSCT, MADT, and FADT are processed by us, because they are + * required to do early initialization before we have user processes. + * Other tables are given to the user level interpreter for + * execution. + */ +static Parse ptables[] = +{ + "FACP", acpifadt, + "APIC", acpimadt, + "SRAT", acpisrat, + "SLIT", acpislit, + "MSCT", acpimsct, + "SSDT", acpitable, +}; + +static Facs* facs; /* Firmware ACPI control structure */ +static Fadt fadt; /* Fixed ACPI description. To reach ACPI registers */ +static Xsdt* xsdt; /* XSDT table */ +static Atable* tfirst; /* loaded DSDT/SSDT/... tables */ +static Atable* tlast; /* pointer to last table */ + Madt* apics; /* APIC info */ +static Srat* srat; /* System resource affinity, used by physalloc */ +static Slit* slit; /* System locality information table used by the scheduler */ +static Msct* msct; /* Maximum system characteristics table */ +static Reg* reg; /* region used for I/O */ +static Gpe* gpes; /* General purpose events */ +static int ngpes; + +static char* regnames[] = { + "mem", "io", "pcicfg", "embed", + "smb", "cmos", "pcibar", +}; + +static char* +acpiregstr(int id) +{ + static char buf[20]; /* BUG */ + + if(id >= 0 && id < nelem(regnames)) + return regnames[id]; + seprint(buf, buf+sizeof(buf), "spc:%#x", id); + return buf; +} + +static int +acpiregid(char *s) +{ + int i; + + for(i = 0; i < nelem(regnames); i++) + if(strcmp(regnames[i], s) == 0) + return i; + return -1; +} + +static u64int +l64get(u8int* p) +{ + /* + * Doing this as a define + * #define l64get(p) (((u64int)l32get(p+4)<<32)|l32get(p)) + * causes 8c to abort with "out of fixed registers" in + * rsdlink() below. + */ + return (((u64int)l32get(p+4)<<32)|l32get(p)); +} + +static u8int +mget8(uintptr p, void*) +{ + u8int *cp = (u8int*)p; + return *cp; +} + +static void +mset8(uintptr p, u8int v, void*) +{ + u8int *cp = (u8int*)p; + *cp = v; +} + +static u16int +mget16(uintptr p, void*) +{ + u16int *cp = (u16int*)p; + return *cp; +} + +static void +mset16(uintptr p, u16int v, void*) +{ + u16int *cp = (u16int*)p; + *cp = v; +} + +static u32int +mget32(uintptr p, void*) +{ + u32int *cp = (u32int*)p; + return *cp; +} + +static void +mset32(uintptr p, u32int v, void*) +{ + u32int *cp = (u32int*)p; + *cp = v; +} + +static u64int +mget64(uintptr p, void*) +{ + u64int *cp = (u64int*)p; + return *cp; +} + +static void +mset64(uintptr p, u64int v, void*) +{ + u64int *cp = (u64int*)p; + *cp = v; +} + +static u8int +ioget8(uintptr p, void*) +{ + return inb(p); +} + +static void +ioset8(uintptr p, u8int v, void*) +{ + outb(p, v); +} + +static u16int +ioget16(uintptr p, void*) +{ + return ins(p); +} + +static void +ioset16(uintptr p, u16int v, void*) +{ + outs(p, v); +} + +static u32int +ioget32(uintptr p, void*) +{ + return inl(p); +} + +static void +ioset32(uintptr p, u32int v, void*) +{ + outl(p, v); +} + +static u8int +cfgget8(uintptr p, void* r) +{ + Reg *ro = r; + Pcidev d; + + d.tbdf = ro->tbdf; + return pcicfgr8(&d, p); +} + +static void +cfgset8(uintptr p, u8int v, void* r) +{ + Reg *ro = r; + Pcidev d; + + d.tbdf = ro->tbdf; + pcicfgw8(&d, p, v); +} + +static u16int +cfgget16(uintptr p, void* r) +{ + Reg *ro = r; + Pcidev d; + + d.tbdf = ro->tbdf; + return pcicfgr16(&d, p); +} + +static void +cfgset16(uintptr p, u16int v, void* r) +{ + Reg *ro = r; + Pcidev d; + + d.tbdf = ro->tbdf; + pcicfgw16(&d, p, v); +} + +static u32int +cfgget32(uintptr p, void* r) +{ + Reg *ro = r; + Pcidev d; + + d.tbdf = ro->tbdf; + return pcicfgr32(&d, p); +} + +static void +cfgset32(uintptr p, u32int v, void* r) +{ + Reg *ro = r; + Pcidev d; + + d.tbdf = ro->tbdf; + pcicfgw32(&d, p, v); +} + +static Regio memio = +{ + nil, + mget8, mset8, mget16, mset16, + mget32, mset32, mget64, mset64 +}; + +static Regio ioio = +{ + nil, + ioget8, ioset8, ioget16, ioset16, + ioget32, ioset32, nil, nil +}; + +static Regio cfgio = +{ + nil, + cfgget8, cfgset8, cfgget16, cfgset16, + cfgget32, cfgset32, nil, nil +}; + +/* + * Copy memory, 1/2/4/8-bytes at a time, to/from a region. + */ +static long +regcpy(Regio *dio, uintptr da, Regio *sio, uintptr sa, long len, int align) +{ + int n, i; + + DBG("regcpy %#ullx %#ullx %#ulx %#ux\n", da, sa, len, align); + if((len%align) != 0) + print("regcpy: bug: copy not aligned. truncated\n"); + n = len/align; + for(i = 0; i < n; i++){ + switch(align){ + case 1: + DBG("cpy8 %#p %#p\n", da, sa); + dio->set8(da, sio->get8(sa, sio->arg), dio->arg); + break; + case 2: + DBG("cpy16 %#p %#p\n", da, sa); + dio->set16(da, sio->get16(sa, sio->arg), dio->arg); + break; + case 4: + DBG("cpy32 %#p %#p\n", da, sa); + dio->set32(da, sio->get32(sa, sio->arg), dio->arg); + break; + case 8: + DBG("cpy64 %#p %#p\n", da, sa); + // dio->set64(da, sio->get64(sa, sio->arg), dio->arg); + break; + default: + panic("regcpy: align bug"); + } + da += align; + sa += align; + } + return n*align; +} + +/* + * Perform I/O within region in access units of accsz bytes. + * All units in bytes. + */ +static long +regio(Reg *r, void *p, ulong len, uintptr off, int iswr) +{ + Regio rio; + uintptr rp; + + DBG("reg%s %s %#p %#ullx %#lx sz=%d\n", + iswr ? "out" : "in", r->name, p, off, len, r->accsz); + rp = 0; + if(off + len > r->len){ + print("regio: access outside limits"); + len = r->len - off; + } + if(len <= 0){ + print("regio: zero len\n"); + return 0; + } + switch(r->spc){ + case Rsysmem: + // XXX should map only what we are going to use + // A region might be too large. + if(r->p == nil) + r->p = vmap(r->base, len); + if(r->p == nil) + error("regio: vmap failed"); + rp = (uintptr)r->p + off; + rio = memio; + break; + case Rsysio: + rp = r->base + off; + rio = ioio; + break; + case Rpcicfg: + rp = r->base + off; + rio = cfgio; + rio.arg = r; + break; + case Rpcibar: + case Rembed: + case Rsmbus: + case Rcmos: + case Ripmi: + case Rfixedhw: + print("regio: reg %s not supported\n", acpiregstr(r->spc)); + error("region not supported"); + } + if(iswr) + regcpy(&rio, rp, &memio, (uintptr)p, len, r->accsz); + else + regcpy(&memio, (uintptr)p, &rio, rp, len, r->accsz); + return len; +} + +static Atable* +newtable(uchar *p) +{ + Atable *t; + Sdthdr *h; + + t = malloc(sizeof(Atable)); + if(t == nil) + panic("no memory for more aml tables"); + t->tbl = p; + h = (Sdthdr*)t->tbl; + t->is64 = h->rev >= 2; + t->dlen = l32get(h->length) - Sdthdrsz; + memmove(t->sig, h->sig, sizeof(h->sig)); + t->sig[sizeof(t->sig)-1] = 0; + memmove(t->oemid, h->oemid, sizeof(h->oemid)); + t->oemtblid[sizeof(t->oemtblid)-1] = 0; + memmove(t->oemtblid, h->oemtblid, sizeof(h->oemtblid)); + t->oemtblid[sizeof(t->oemtblid)-1] = 0; + t->next = nil; + if(tfirst == nil) + tfirst = tlast = t; + else{ + tlast->next = t; + tlast = t; + } + return t; +} + +static void* +sdtchecksum(void* addr, int len) +{ + u8int *p, sum; + + sum = 0; + for(p = addr; len-- > 0; p++) + sum += *p; + if(sum == 0) + return addr; + + return nil; +} + +static void * +sdtmap(uintmem pa, int *n, int cksum) +{ + Sdthdr* sdt; + + sdt = vmap(pa, sizeof(Sdthdr)); + if(sdt == nil){ + DBG("acpi: vmap1: nil\n"); + return nil; + } + *n = l32get(sdt->length); + vunmap(sdt, sizeof(Sdthdr)); + if((sdt = vmap(pa, *n)) == nil){ + DBG("acpi: nil vmap\n"); + return nil; + } + if(cksum != 0 && sdtchecksum(sdt, *n) == nil){ + DBG("acpi: SDT: bad checksum\n"); + vunmap(sdt, sizeof(Sdthdr)); + return nil; + } + return sdt; +} + +static int +loadfacs(uintmem pa) +{ + int n; + + facs = sdtmap(pa, &n, 0); + if(facs == nil) + return -1; + if(memcmp(facs, "FACS", 4) != 0){ + vunmap(facs, n); + facs = nil; + return -1; + } + /* no unmap */ + + DBG("acpi: facs: hwsig: %#ux\n", facs->hwsig); + DBG("acpi: facs: wakingv: %#ux\n", facs->wakingv); + DBG("acpi: facs: flags: %#ux\n", facs->flags); + DBG("acpi: facs: glock: %#ux\n", facs->glock); + DBG("acpi: facs: xwakingv: %#llux\n", facs->xwakingv); + DBG("acpi: facs: vers: %#ux\n", facs->vers); + DBG("acpi: facs: ospmflags: %#ux\n", facs->ospmflags); + return 0; +} + +static void +loaddsdt(uintmem pa) +{ + int n; + uchar *dsdtp; + + dsdtp = sdtmap(pa, &n, 1); + if(dsdtp == nil) + return; + if(acpitable(dsdtp, n) == nil) + vunmap(dsdtp, n); +} + +static void +gasget(Gas *gas, uchar *p) +{ + gas->spc = p[0]; + gas->len = p[1]; + gas->off = p[2]; + gas->accsz = p[3]; + gas->addr = l64get(p+4); +} + +static void +dumpfadt(Fadt *fp) +{ + if(DBGFLG == 0) + return; + + DBG("acpi: fadt: facs: %#ux\n", fp->facs); + DBG("acpi: fadt: dsdt: %#ux\n", fp->dsdt); + DBG("acpi: fadt: pmprofile: %#ux\n", fp->pmprofile); + DBG("acpi: fadt: sciint: %#ux\n", fp->sciint); + DBG("acpi: fadt: smicmd: %#ux\n", fp->smicmd); + DBG("acpi: fadt: acpienable: %#ux\n", fp->acpienable); + DBG("acpi: fadt: acpidisable: %#ux\n", fp->acpidisable); + DBG("acpi: fadt: s4biosreq: %#ux\n", fp->s4biosreq); + DBG("acpi: fadt: pstatecnt: %#ux\n", fp->pstatecnt); + DBG("acpi: fadt: pm1aevtblk: %#ux\n", fp->pm1aevtblk); + DBG("acpi: fadt: pm1bevtblk: %#ux\n", fp->pm1bevtblk); + DBG("acpi: fadt: pm1acntblk: %#ux\n", fp->pm1acntblk); + DBG("acpi: fadt: pm1bcntblk: %#ux\n", fp->pm1bcntblk); + DBG("acpi: fadt: pm2cntblk: %#ux\n", fp->pm2cntblk); + DBG("acpi: fadt: pmtmrblk: %#ux\n", fp->pmtmrblk); + DBG("acpi: fadt: gpe0blk: %#ux\n", fp->gpe0blk); + DBG("acpi: fadt: gpe1blk: %#ux\n", fp->gpe1blk); + DBG("acpi: fadt: pm1evtlen: %#ux\n", fp->pm1evtlen); + DBG("acpi: fadt: pm1cntlen: %#ux\n", fp->pm1cntlen); + DBG("acpi: fadt: pm2cntlen: %#ux\n", fp->pm2cntlen); + DBG("acpi: fadt: pmtmrlen: %#ux\n", fp->pmtmrlen); + DBG("acpi: fadt: gpe0blklen: %#ux\n", fp->gpe0blklen); + DBG("acpi: fadt: gpe1blklen: %#ux\n", fp->gpe1blklen); + DBG("acpi: fadt: gp1base: %#ux\n", fp->gp1base); + DBG("acpi: fadt: cstcnt: %#ux\n", fp->cstcnt); + DBG("acpi: fadt: plvl2lat: %#ux\n", fp->plvl2lat); + DBG("acpi: fadt: plvl3lat: %#ux\n", fp->plvl3lat); + DBG("acpi: fadt: flushsz: %#ux\n", fp->flushsz); + DBG("acpi: fadt: flushstride: %#ux\n", fp->flushstride); + DBG("acpi: fadt: dutyoff: %#ux\n", fp->dutyoff); + DBG("acpi: fadt: dutywidth: %#ux\n", fp->dutywidth); + DBG("acpi: fadt: dayalrm: %#ux\n", fp->dayalrm); + DBG("acpi: fadt: monalrm: %#ux\n", fp->monalrm); + DBG("acpi: fadt: century: %#ux\n", fp->century); + DBG("acpi: fadt: iapcbootarch: %#ux\n", fp->iapcbootarch); + DBG("acpi: fadt: flags: %#ux\n", fp->flags); + DBG("acpi: fadt: resetreg: %G\n", &fp->resetreg); + DBG("acpi: fadt: resetval: %#ux\n", fp->resetval); + DBG("acpi: fadt: xfacs: %#llux\n", fp->xfacs); + DBG("acpi: fadt: xdsdt: %#llux\n", fp->xdsdt); + DBG("acpi: fadt: xpm1aevtblk: %G\n", &fp->xpm1aevtblk); + DBG("acpi: fadt: xpm1bevtblk: %G\n", &fp->xpm1bevtblk); + DBG("acpi: fadt: xpm1acntblk: %G\n", &fp->xpm1acntblk); + DBG("acpi: fadt: xpm1bcntblk: %G\n", &fp->xpm1bcntblk); + DBG("acpi: fadt: xpm2cntblk: %G\n", &fp->xpm2cntblk); + DBG("acpi: fadt: xpmtmrblk: %G\n", &fp->xpmtmrblk); + DBG("acpi: fadt: xgpe0blk: %G\n", &fp->xgpe0blk); + DBG("acpi: fadt: xgpe1blk: %G\n", &fp->xgpe1blk); +} + +static Atable* +acpifadt(uchar *p, int) +{ + Fadt *fp; + + fp = &fadt; + DBG("acpifadt %p\n", p); + fp->facs = l32get(p + 36); + fp->dsdt = l32get(p + 40); + fp->pmprofile = p[45]; + fp->sciint = l16get(p+46); + fp->smicmd = l32get(p+48); + fp->acpienable = p[52]; + fp->acpidisable = p[53]; + fp->s4biosreq = p[54]; + fp->pstatecnt = p[55]; + fp->pm1aevtblk = l32get(p+56); + fp->pm1bevtblk = l32get(p+60); + fp->pm1acntblk = l32get(p+64); + fp->pm1bcntblk = l32get(p+68); + fp->pm2cntblk = l32get(p+72); + fp->pmtmrblk = l32get(p+76); + fp->gpe0blk = l32get(p+80); + fp->gpe1blk = l32get(p+84); + fp->pm1evtlen = p[88]; + fp->pm1cntlen = p[89]; + fp->pm2cntlen = p[90]; + fp->pmtmrlen = p[91]; + fp->gpe0blklen = p[92]; + fp->gpe1blklen = p[93]; + fp->gp1base = p[94]; + fp->cstcnt = p[95]; + fp->plvl2lat = l16get(p+96); + fp->plvl3lat = l16get(p+98); + fp->flushsz = l16get(p+100); + fp->flushstride = l16get(p+102); + fp->dutyoff = p[104]; + fp->dutywidth = p[105]; + fp->dayalrm = p[106]; + fp->monalrm = p[107]; + fp->century = p[108]; + fp->iapcbootarch = l16get(p+109); + fp->flags = l32get(p+112); + gasget(&fp->resetreg, p+116); + + fp->resetval = p[128]; + fp->xfacs = l64get(p+132); + fp->xdsdt = l64get(p+140); + gasget(&fp->xpm1aevtblk, p+148); + gasget(&fp->xpm1bevtblk, p+160); + gasget(&fp->xpm1acntblk, p+172); + gasget(&fp->xpm1bcntblk, p+184); + gasget(&fp->xpm2cntblk, p+196); + gasget(&fp->xpmtmrblk, p+208); + gasget(&fp->xgpe0blk, p+220); + gasget(&fp->xgpe1blk, p+232); + + dumpfadt(fp); + + /* If xfacs or xdsdt are either nil + * or different from their 32-bit + * counter-parts, then go with + * the 32-bit addresses (as the + * ACPICA does), since those are + * tested by BIOS manufacturers. + */ + + if(fp->xfacs != 0 && fp->xfacs == fp->facs) + loadfacs(fp->xfacs); + else + loadfacs(fp->facs); + + if(fp->xdsdt != 0 && fp->xdsdt == fp->dsdt) + loadfacs(fp->xdsdt); + else + loadfacs(fp->dsdt); + + return nil; /* can be unmapped once parsed */ +} + +static void +dumpmsct(Msct *msct) +{ + Mdom *st; + + DBG("acpi: msct: %d doms %d clkdoms %#ullx maxpa\n", + msct->ndoms, msct->nclkdoms, msct->maxpa); + for(st = msct->dom; st != nil; st = st->next) + DBG("\t[%d:%d] %d maxproc %#ullx maxmmem\n", + st->start, st->end, st->maxproc, st->maxmem); + DBG("\n"); +} + +/* + * XXX: should perhaps update our idea of available memory. + * Else we should remove this code. + */ +static Atable* +acpimsct(uchar *p, int len) +{ + uchar *pe; + Mdom **stl, *st; + int off; + + msct = mallocz(sizeof(Msct), 1); + msct->ndoms = l32get(p+40) + 1; + msct->nclkdoms = l32get(p+44) + 1; + msct->maxpa = l64get(p+48); + msct->dom = nil; + stl = &msct->dom; + pe = p + len; + off = l32get(p+36); + for(p += off; p < pe; p += 22){ + st = mallocz(sizeof(Mdom), 1); + st->next = nil; + st->start = l32get(p+2); + st->end = l32get(p+6); + st->maxproc = l32get(p+10); + st->maxmem = l64get(p+14); + *stl = st; + stl = &st->next; + } + + dumpmsct(msct); + return nil; /* can be unmapped once parsed */ +} + +static void +dumpsrat(Srat *st) +{ + DBG("acpi: srat:\n"); + for(; st != nil; st = st->next) + switch(st->type){ + case SRlapic: + DBG("\tlapic: dom %d apic %d sapic %d clk %d\n", + st->lapic.dom, st->lapic.apic, + st->lapic.sapic, st->lapic.clkdom); + break; + case SRmem: + DBG("\tmem: dom %d %#ullx %#ullx %c%c\n", + st->mem.dom, st->mem.addr, st->mem.len, + st->mem.hplug?'h':'-', + st->mem.nvram?'n':'-'); + break; + case SRlx2apic: + DBG("\tlx2apic: dom %d apic %d clk %d\n", + st->lx2apic.dom, st->lx2apic.apic, + st->lx2apic.clkdom); + break; + default: + DBG("\t\n"); + } + DBG("\n"); +} + +static Atable* +acpisrat(uchar *p, int len) +{ + Srat **stl, *st; + uchar *pe; + int stlen, flags; + + if(srat != nil){ + print("acpi: two SRATs?\n"); + return nil; + } + + stl = &srat; + pe = p + len; + for(p += 48; p < pe; p += stlen){ + st = mallocz(sizeof(Srat), 1); + st->type = p[0]; + st->next = nil; + stlen = p[1]; + switch(st->type){ + case SRlapic: + st->lapic.dom = p[2] | p[9]<<24| p[10]<<16 | p[11]<<8; + st->lapic.apic = p[3]; + st->lapic.sapic = p[8]; + st->lapic.clkdom = l32get(p+12); + if(l32get(p+4) == 0){ + free(st); + st = nil; + } + break; + case SRmem: + st->mem.dom = l32get(p+2); + st->mem.addr = l64get(p+8); + st->mem.len = l64get(p+16); + flags = l32get(p+28); + if((flags&1) == 0){ /* not enabled */ + free(st); + st = nil; + }else{ + st->mem.hplug = flags & 2; + st->mem.nvram = flags & 4; + } + break; + case SRlx2apic: + st->lx2apic.dom = l32get(p+4); + st->lx2apic.apic = l32get(p+8); + st->lx2apic.clkdom = l32get(p+16); + if(l32get(p+12) == 0){ + free(st); + st = nil; + } + break; + default: + print("unknown SRAT structure\n"); + free(st); + st = nil; + } + if(st != nil){ + *stl = st; + stl = &st->next; + } + } + + dumpsrat(srat); + return nil; /* can be unmapped once parsed */ +} + +static void +dumpslit(Slit *sl) +{ + int i; + + DBG("acpi slit:\n"); + for(i = 0; i < sl->rowlen*sl->rowlen; i++){ + DBG("slit: %ux\n", sl->e[i/sl->rowlen][i%sl->rowlen].dist); + } +} + +static int +cmpslitent(void* v1, void* v2) +{ + SlEntry *se1, *se2; + + se1 = v1; + se2 = v2; + return se1->dist - se2->dist; +} + +static Atable* +acpislit(uchar *p, int len) +{ + uchar *pe; + int i, j, k; + SlEntry *se; + + pe = p + len; + slit = malloc(sizeof(*slit)); + slit->rowlen = l64get(p+36); + slit->e = malloc(slit->rowlen*sizeof(SlEntry*)); + for(i = 0; i < slit->rowlen; i++) + slit->e[i] = malloc(sizeof(SlEntry)*slit->rowlen); + + i = 0; + for(p += 44; p < pe; p++, i++){ + j = i/slit->rowlen; + k = i%slit->rowlen; + se = &slit->e[j][k]; + se->dom = k; + se->dist = *p; + } + dumpslit(slit); + for(i = 0; i < slit->rowlen; i++) + qsort(slit->e[i], slit->rowlen, sizeof(slit->e[0][0]), cmpslitent); + + dumpslit(slit); + return nil; /* can be unmapped once parsed */ +} + +uintmem +acpimblocksize(uintmem addr, int *dom) +{ + Srat *sl; + + for(sl = srat; sl != nil; sl = sl->next) + if(sl->type == SRmem) + if(sl->mem.addr <= addr && sl->mem.addr + sl->mem.len > addr){ + *dom = sl->mem.dom; + return sl->mem.len - (addr - sl->mem.addr); + } + return 0; +} + + +/* + * we use mp->machno (or index in Mach array) as the identifier, + * but ACPI relies on the apic identifier. + */ +int +corecolor(int core) +{ + Srat *sl; + Mach *m; + static int colors[32]; + + if(core < 0 || core >= MACHMAX) + return -1; + m = sys->machptr[core]; + if(m == nil) + return -1; + + if(core >= 0 && core < nelem(colors) && colors[core] != 0) + return colors[core] - 1; + + for(sl = srat; sl != nil; sl = sl->next) + if(sl->type == SRlapic && sl->lapic.apic == m->apicno){ + if(core >= 0 && core < nelem(colors)) + colors[core] = 1 + sl->lapic.dom; + return sl->lapic.dom; + } + return -1; +} + + +int +pickcore(int mycolor, int index) +{ + int color; + int ncorepercol; + + if(slit == nil) + return index; + ncorepercol = MACHMAX/slit->rowlen; + color = slit->e[mycolor][index/ncorepercol].dom; + return color * ncorepercol + index % ncorepercol; +} + + +static void +dumpmadt(Madt *apics) +{ + Apicst *st; + + DBG("acpi: madt lapic paddr %llux pcat %d:\n", apics->lapicpa, apics->pcat); + for(st = apics->st; st != nil; st = st->next) + switch(st->type){ + case ASlapic: + DBG("\tlapic pid %d id %d\n", st->lapic.pid, st->lapic.id); + break; + case ASioapic: + case ASiosapic: + DBG("\tioapic id %d addr %#llux ibase %d\n", + st->ioapic.id, st->ioapic.addr, st->ioapic.ibase); + break; + case ASintovr: + DBG("\tintovr irq %d intr %d flags %#ux\n", + st->intovr.irq, st->intovr.intr,st->intovr.flags); + break; + case ASnmi: + DBG("\tnmi intr %d flags %#ux\n", + st->nmi.intr, st->nmi.flags); + break; + case ASlnmi: + DBG("\tlnmi pid %d lint %d flags %#ux\n", + st->lnmi.pid, st->lnmi.lint, st->lnmi.flags); + break; + case ASlsapic: + DBG("\tlsapic pid %d id %d eid %d puid %d puids %s\n", + st->lsapic.pid, st->lsapic.id, + st->lsapic.eid, st->lsapic.puid, + st->lsapic.puids); + break; + case ASintsrc: + DBG("\tintr type %d pid %d peid %d iosv %d intr %d %#x\n", + st->type, st->intsrc.pid, + st->intsrc.peid, st->intsrc.iosv, + st->intsrc.intr, st->intsrc.flags); + break; + case ASlx2apic: + DBG("\tlx2apic puid %d id %d\n", st->lx2apic.puid, st->lx2apic.id); + break; + case ASlx2nmi: + DBG("\tlx2nmi puid %d intr %d flags %#ux\n", + st->lx2nmi.puid, st->lx2nmi.intr, st->lx2nmi.flags); + break; + default: + DBG("\t\n"); + } + DBG("\n"); +} + +static Atable* +acpimadt(uchar *p, int len) +{ + uchar *pe; + Apicst *st, *l, **stl; + int stlen, id; + + apics = mallocz(sizeof(Madt), 1); + apics->lapicpa = l32get(p+36); + apics->pcat = l32get(p+40); + apics->st = nil; + stl = &apics->st; + pe = p + len; + for(p += 44; p < pe; p += stlen){ + st = mallocz(sizeof(Apicst), 1); + st->type = p[0]; + st->next = nil; + stlen = p[1]; + switch(st->type){ + case ASlapic: + st->lapic.pid = p[2]; + st->lapic.id = p[3]; + if(l32get(p+4) == 0){ + free(st); + st = nil; + } + break; + case ASioapic: + st->ioapic.id = id = p[2]; + st->ioapic.addr = l32get(p+4); + st->ioapic.ibase = l32get(p+8); + /* iosapic overrides any ioapic entry for the same id */ + for(l = apics->st; l != nil; l = l->next) + if(l->type == ASiosapic && l->iosapic.id == id){ + st->ioapic = l->iosapic; + /* we leave it linked; could be removed */ + break; + } + break; + case ASintovr: + st->intovr.irq = p[3]; + st->intovr.intr = l32get(p+4); + st->intovr.flags = l16get(p+8); + break; + case ASnmi: + st->nmi.flags = l16get(p+2); + st->nmi.intr = l32get(p+4); + break; + case ASlnmi: + st->lnmi.pid = p[2]; + st->lnmi.flags = l16get(p+3); + st->lnmi.lint = p[5]; + break; + case ASladdr: + if(sizeof(apics->lapicpa) >= 8) + apics->lapicpa = l64get(p+8); + break; + case ASiosapic: + id = st->iosapic.id = p[2]; + st->iosapic.ibase = l32get(p+4); + st->iosapic.addr = l64get(p+8); + /* iosapic overrides any ioapic entry for the same id */ + for(l = apics->st; l != nil; l = l->next) + if(l->type == ASioapic && l->ioapic.id == id){ + l->ioapic = st->iosapic; + free(st); + st = nil; + break; + } + break; + case ASlsapic: + st->lsapic.pid = p[2]; + st->lsapic.id = p[3]; + st->lsapic.eid = p[4]; + st->lsapic.puid = l32get(p+12); + if(l32get(p+8) == 0){ + free(st); + st = nil; + }else + kstrdup(&st->lsapic.puids, (char*)p+16); + break; + case ASintsrc: + st->intsrc.flags = l16get(p+2); + st->type = p[4]; + st->intsrc.pid = p[5]; + st->intsrc.peid = p[6]; + st->intsrc.iosv = p[7]; + st->intsrc.intr = l32get(p+8); + st->intsrc.any = l32get(p+12); + break; + case ASlx2apic: + st->lx2apic.id = l32get(p+4); + st->lx2apic.puid = l32get(p+12); + if(l32get(p+8) == 0){ + free(st); + st = nil; + } + break; + case ASlx2nmi: + st->lx2nmi.flags = l16get(p+2); + st->lx2nmi.puid = l32get(p+4); + st->lx2nmi.intr = p[8]; + break; + default: + print("unknown APIC structure\n"); + free(st); + st = nil; + } + if(st != nil){ + *stl = st; + stl = &st->next; + } + } + + dumpmadt(apics); + return nil; /* can be unmapped once parsed */ +} + +/* + * Map the table and keep it there. + */ +static Atable* +acpitable(uchar *p, int len) +{ + if(len < Sdthdrsz) + return nil; + return newtable(p); +} + +static void +dumptable(char *sig, uchar *p, int l) +{ + int n, i; + + if(DBGFLG > 1){ + DBG("%s @ %#p\n", sig, p); + if(DBGFLG > 2) + n = l; + else + n = 256; + for(i = 0; i < n; i++){ + if((i % 16) == 0) + DBG("%x: ", i); + DBG(" %2.2ux", p[i]); + if((i % 16) == 15) + DBG("\n"); + } + DBG("\n"); + DBG("\n"); + } +} + +static char* +seprinttable(char *s, char *e, Atable *t) +{ + uchar *p; + int i, n; + + p = (uchar*)t->tbl; /* include header */ + n = Sdthdrsz + t->dlen; + s = seprint(s, e, "%s @ %#p\n", t->sig, p); + for(i = 0; i < n; i++){ + if((i % 16) == 0) + s = seprint(s, e, "%x: ", i); + s = seprint(s, e, " %2.2ux", p[i]); + if((i % 16) == 15) + s = seprint(s, e, "\n"); + } + return seprint(s, e, "\n\n"); +} + +/* + * process xsdt table and load tables with sig, or all if nil. + * (XXX: should be able to search for sig, oemid, oemtblid) + */ +static int +acpixsdtload(char *sig) +{ + int i, l, t, unmap, found; + uintmem dhpa; + uchar *sdt; + char tsig[5]; + + found = 0; + for(i = 0; i < xsdt->len; i += xsdt->asize){ + if(xsdt->asize == 8) + dhpa = l64get(xsdt->p+i); + else + dhpa = l32get(xsdt->p+i); + if((sdt = sdtmap(dhpa, &l, 1)) == nil) + continue; + unmap = 1; + memmove(tsig, sdt, 4); + tsig[4] = 0; + if(sig == nil || strcmp(sig, tsig) == 0){ + DBG("acpi: %s addr %#p\n", tsig, sdt); + for(t = 0; t < nelem(ptables); t++) + if(strcmp(tsig, ptables[t].sig) == 0){ + dumptable(tsig, sdt, l); + unmap = ptables[t].f(sdt, l) == nil; + found = 1; + break; + } + } + if(unmap) + vunmap(sdt, l); + } + return found; +} + +static void* +rsdscan(u8int* addr, int len, char* signature) +{ + int sl; + u8int *e, *p; + + e = addr+len; + sl = strlen(signature); + for(p = addr; p+sl < e; p += 16){ + if(memcmp(p, signature, sl)) + continue; + return p; + } + + return nil; +} + +static void* +rsdsearch(char* signature) +{ + uintmem p; + u8int *bda; + void *rsd; + + /* + * Search for the data structure signature: + * 1) in the first KB of the EBDA; + * 2) in the BIOS ROM between 0xE0000 and 0xFFFFF. + */ + if(strncmp((char*)KADDR(0xFFFD9), "EISA", 4) == 0){ + bda = BIOSSEG(0x40); + if((p = (bda[0x0F]<<8)|bda[0x0E])){ + if(rsd = rsdscan(KADDR(p), 1024, signature)) + return rsd; + } + } + return rsdscan(BIOSSEG(0xE000), 0x20000, signature); +} + +static void +acpirsdptr(void) +{ + Rsdp *rsd; + int asize; + uintmem sdtpa; + + if((rsd = rsdsearch("RSD PTR ")) == nil) + return; + + assert(sizeof(Sdthdr) == 36); + + DBG("acpi: RSD PTR@ %#p, physaddr %#ux length %ud %#llux rev %d\n", + rsd, l32get(rsd->raddr), l32get(rsd->length), + l64get(rsd->xaddr), rsd->revision); + + if(rsd->revision >= 2){ + if(sdtchecksum(rsd, 36) == nil){ + DBG("acpi: RSD: bad checksum\n"); + return; + } + sdtpa = l64get(rsd->xaddr); + asize = 8; + } + else{ + if(sdtchecksum(rsd, 20) == nil){ + DBG("acpi: RSD: bad checksum\n"); + return; + } + sdtpa = l32get(rsd->raddr); + asize = 4; + } + + /* + * process the RSDT or XSDT table. + */ + xsdt = malloc(sizeof(Xsdt)); + if(xsdt == nil){ + DBG("acpi: malloc failed\n"); + return; + } + if((xsdt->p = sdtmap(sdtpa, &xsdt->len, 1)) == nil){ + DBG("acpi: sdtmap failed\n"); + return; + } + if((xsdt->p[0] != 'R' && xsdt->p[0] != 'X') || memcmp(xsdt->p+1, "SDT", 3) != 0){ + DBG("acpi: xsdt sig: %c%c%c%c\n", + xsdt->p[0], xsdt->p[1], xsdt->p[2], xsdt->p[3]); + free(xsdt); + xsdt = nil; + vunmap(xsdt, xsdt->len); + return; + } + xsdt->p += sizeof(Sdthdr); + xsdt->len -= sizeof(Sdthdr); + xsdt->asize = asize; + DBG("acpi: XSDT %#p\n", xsdt); + acpixsdtload(nil); + /* xsdt is kept and not unmapped */ + +} + +static int +acpigen(Chan *c, char*, Dirtab *tab, int ntab, int i, Dir *dp) +{ + Qid qid; + + if(i == DEVDOTDOT){ + mkqid(&qid, Qdir, 0, QTDIR); + devdir(c, qid, ".", 0, eve, 0555, dp); + return 1; + } + i++; /* skip first element for . itself */ + if(tab==0 || i>=ntab) + return -1; + tab += i; + qid = tab->qid; + qid.path &= ~Qdir; + qid.vers = 0; + devdir(c, qid, tab->name, tab->length, eve, tab->perm, dp); + return 1; +} + +static int +Gfmt(Fmt* f) +{ + Gas *g; + + g = va_arg(f->args, Gas*); + switch(g->spc){ + case Rsysmem: + case Rsysio: + case Rembed: + case Rsmbus: + case Rcmos: + case Rpcibar: + case Ripmi: + fmtprint(f, "[%s ", acpiregstr(g->spc)); + break; + case Rpcicfg: + fmtprint(f, "[pci "); + fmtprint(f, "dev %#ulx ", (ulong)(g->addr >> 32) & 0xFFFF); + fmtprint(f, "fn %#ulx ", (ulong)(g->addr & 0xFFFF0000) >> 16); + fmtprint(f, "adr %#ulx ", (ulong)(g->addr &0xFFFF)); + break; + case Rfixedhw: + fmtprint(f, "[hw "); + break; + default: + fmtprint(f, "[spc=%#ux ", g->spc); + } + return fmtprint(f, "off %d len %d addr %#ullx sz%d]", + g->off, g->len, g->addr, g->accsz); +} + +static uint +getbanked(int ra, int rb, int sz) +{ + uint r; + + r = 0; + switch(sz){ + case 1: + if(ra != 0) + r |= inb(ra); + if(rb != 0) + r |= inb(rb); + break; + case 2: + if(ra != 0) + r |= ins(ra); + if(rb != 0) + r |= ins(rb); + break; + case 4: + if(ra != 0) + r |= inl(ra); + if(rb != 0) + r |= inl(rb); + break; + default: + print("getbanked: wrong size\n"); + } + return r; +} + +static uint +setbanked(int ra, int rb, int sz, int v) +{ + uint r; + + r = -1; + switch(sz){ + case 1: + if(ra != 0) + outb(ra, v); + if(rb != 0) + outb(rb, v); + break; + case 2: + if(ra != 0) + outs(ra, v); + if(rb != 0) + outs(rb, v); + break; + case 4: + if(ra != 0) + outl(ra, v); + if(rb != 0) + outl(rb, v); + break; + default: + print("setbanked: wrong size\n"); + } + return r; +} + +static uint +getpm1ctl(void) +{ + return getbanked(fadt.pm1acntblk, fadt.pm1bcntblk, fadt.pm1cntlen); +} + +static void +setpm1sts(uint v) +{ + DBG("acpi: setpm1sts %#ux\n", v); + setbanked(fadt.pm1aevtblk, fadt.pm1bevtblk, fadt.pm1evtlen/2, v); +} + +static uint +getpm1sts(void) +{ + return getbanked(fadt.pm1aevtblk, fadt.pm1bevtblk, fadt.pm1evtlen/2); +} + +static uint +getpm1en(void) +{ + int sz; + + sz = fadt.pm1evtlen/2; + return getbanked(fadt.pm1aevtblk+sz, fadt.pm1bevtblk+sz, sz); +} + +static int +getgpeen(int n) +{ + return inb(gpes[n].enio) & 1<ho in the + // aml process. + // enable it again when it returns. + } + sts = getpm1sts(); + en = getpm1en(); + print("acpiitr: pm1sts %#ux pm1en %#ux\n", sts, en); + if(sts&en) + print("have enabled events\n"); + if(sts&1) + print("power button\n"); + // XXX serve other interrupts here. + setpm1sts(sts); +} + +static void +initgpes(void) +{ + int i, n0, n1; + + n0 = fadt.gpe0blklen/2; + n1 = fadt.gpe1blklen/2; + ngpes = n0 + n1; + gpes = mallocz(sizeof(Gpe) * ngpes, 1); + for(i = 0; i < n0; i++){ + gpes[i].nb = i; + gpes[i].stsbit = i&7; + gpes[i].stsio = fadt.gpe0blk + (i>>3); + gpes[i].enbit = (n0 + i)&7; + gpes[i].enio = fadt.gpe0blk + ((n0 + i)>>3); + } + for(i = 0; i + n0 < ngpes; i++){ + gpes[i + n0].nb = fadt.gp1base + i; + gpes[i + n0].stsbit = i&7; + gpes[i + n0].stsio = fadt.gpe1blk + (i>>3); + gpes[i + n0].enbit = (n1 + i)&7; + gpes[i + n0].enio = fadt.gpe1blk + ((n1 + i)>>3); + } + for(i = 0; i < ngpes; i++){ + setgpeen(i, 0); + clrgpests(i); + } +} + +static void +acpiioalloc(uint addr, int len) +{ + if(addr != 0) + ioalloc(addr, len, 0, "acpi"); +} + +int +acpiinit(void) +{ + if(fadt.smicmd == 0){ + fmtinstall('G', Gfmt); + acpirsdptr(); + if(fadt.smicmd == 0) + return -1; + } + return 0; +} + +static Chan* +acpiattach(char *spec) +{ + int i; + + /* + * This was written for the stock kernel. + * This code must use 64 registers to be acpi ready in nix. + */ + if(1 || acpiinit() < 0) + error("no acpi"); + + /* + * should use fadt->xpm* and fadt->xgpe* registers for 64 bits. + * We are not ready in this kernel for that. + */ + DBG("acpi io alloc\n"); + acpiioalloc(fadt.smicmd, 1); + acpiioalloc(fadt.pm1aevtblk, fadt.pm1evtlen); + acpiioalloc(fadt.pm1bevtblk, fadt.pm1evtlen ); + acpiioalloc(fadt.pm1acntblk, fadt.pm1cntlen); + acpiioalloc(fadt.pm1bcntblk, fadt.pm1cntlen); + acpiioalloc(fadt.pm2cntblk, fadt.pm2cntlen); + acpiioalloc(fadt.pmtmrblk, fadt.pmtmrlen); + acpiioalloc(fadt.gpe0blk, fadt.gpe0blklen); + acpiioalloc(fadt.gpe1blk, fadt.gpe1blklen); + + DBG("acpi init gpes\n"); + initgpes(); + + /* + * This starts ACPI, which may require we handle + * power mgmt events ourselves. Use with care. + */ + DBG("acpi starting\n"); + outb(fadt.smicmd, fadt.acpienable); + for(i = 0; i < 10; i++) + if(getpm1ctl() & Pm1SciEn) + break; + if(i == 10) + error("acpi: failed to enable\n"); + if(fadt.sciint != 0) + intrenable(fadt.sciint, acpiintr, 0, BUSUNKNOWN, "acpi"); + return devattach(L'α', spec); +} + +static Walkqid* +acpiwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, acpidir, nelem(acpidir), acpigen); +} + +static long +acpistat(Chan *c, uchar *dp, long n) +{ + return devstat(c, dp, n, acpidir, nelem(acpidir), acpigen); +} + +static Chan* +acpiopen(Chan *c, int omode) +{ + return devopen(c, omode, acpidir, nelem(acpidir), acpigen); +} + +static void +acpiclose(Chan *) +{ +} + +static char*ttext; +static int tlen; + +static long +acpiread(Chan *c, void *a, long n, vlong off) +{ + long q; + Atable *t; + char *ns, *s, *e, *ntext; + + q = c->qid.path; + switch(q){ + case Qdir: + return devdirread(c, a, n, acpidir, nelem(acpidir), acpigen); + case Qtbl: + if(ttext == nil){ + tlen = 1024; + ttext = malloc(tlen); + if(ttext == nil){ + print("acpi: no memory\n"); + return 0; + } + s = ttext; + e = ttext + tlen; + strcpy(s, "no tables\n"); + for(t = tfirst; t != nil; t = t->next){ + ns = seprinttable(s, e, t); + while(ns == e - 1){ + DBG("acpiread: allocated %d\n", tlen*2); + ntext = realloc(ttext, tlen*2); + if(ntext == nil) + panic("acpi: no memory\n"); + s = ntext + (ttext - s); + ttext = ntext; + tlen *= 2; + e = ttext + tlen; + ns = seprinttable(s, e, t); + } + s = ns; + } + + } + return readstr(off, a, n, ttext); + case Qio: + if(reg == nil) + error("region not configured"); + return regio(reg, a, n, off, 0); + } + error(Eperm); + return -1; +} + +static long +acpiwrite(Chan *c, void *a, long n, vlong off) +{ + Cmdtab *ct; + Cmdbuf *cb; + Reg *r; + uint rno, fun, dev, bus, i; + + if(c->qid.path == Qio){ + if(reg == nil) + error("region not configured"); + return regio(reg, a, n, off, 1); + } + if(c->qid.path != Qctl) + error(Eperm); + + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + ct = lookupcmd(cb, ctls, nelem(ctls)); + DBG("acpi ctl %s\n", cb->f[0]); + switch(ct->index){ + case CMregion: + r = reg; + if(r == nil){ + r = smalloc(sizeof(Reg)); + r->name = nil; + } + kstrdup(&r->name, cb->f[1]); + r->spc = acpiregid(cb->f[2]); + if(r->spc < 0){ + free(r); + reg = nil; + error("bad region type"); + } + if(r->spc == Rpcicfg || r->spc == Rpcibar){ + rno = r->base>>Rpciregshift & Rpciregmask; + fun = r->base>>Rpcifunshift & Rpcifunmask; + dev = r->base>>Rpcidevshift & Rpcidevmask; + bus = r->base>>Rpcibusshift & Rpcibusmask; + r->tbdf = MKBUS(BusPCI, bus, dev, fun); + r->base = rno; /* register ~ our base addr */ + } + r->base = strtoull(cb->f[3], nil, 0); + r->len = strtoull(cb->f[4], nil, 0); + r->accsz = strtoul(cb->f[5], nil, 0); + if(r->accsz < 1 || r->accsz > 4){ + free(r); + reg = nil; + error("bad region access size"); + } + reg = r; + DBG("region %s %s %llux %llux sz%d", + r->name, acpiregstr(r->spc), r->base, r->len, r->accsz); + break; + case CMgpe: + i = strtoul(cb->f[1], nil, 0); + if(i >= ngpes) + error("gpe out of range"); + kstrdup(&gpes[i].obj, cb->f[2]); + DBG("gpe %d %s\n", i, gpes[i].obj); + setgpeen(i, 1); + break; + default: + panic("acpi: unknown ctl"); + } + poperror(); + free(cb); + return n; +} + + +Dev acpidevtab = { + L'α', + "acpi", + + devreset, + devinit, + devshutdown, + acpiattach, + acpiwalk, + acpistat, + acpiopen, + devcreate, + acpiclose, + acpiread, + devbread, + acpiwrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru 0/sys/src/nix/k10/devarch.c 4/sys/src/nix/k10/devarch.c --- 0/sys/src/nix/k10/devarch.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/devarch.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,658 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "ureg.h" + +typedef struct IOMap IOMap; +struct IOMap +{ + IOMap *next; + int reserved; + char tag[13]; + ulong start; + ulong end; +}; + +static struct +{ + Lock; + IOMap *map; + IOMap *free; + IOMap maps[32]; // some initial free maps + + QLock ql; // lock for reading map +} iomap; + +enum { + Qdir = 0, + Qioalloc = 1, + Qiob, + Qiow, + Qiol, + Qbase, + Qmapram, + Qrealmem, + + Qmax = 16, +}; + +typedef long Rdwrfn(Chan*, void*, long, vlong); + +static Rdwrfn *readfn[Qmax]; +static Rdwrfn *writefn[Qmax]; + +static Dirtab archdir[Qmax] = { + ".", { Qdir, 0, QTDIR }, 0, 0555, + "ioalloc", { Qioalloc, 0 }, 0, 0444, + "iob", { Qiob, 0 }, 0, 0660, + "iow", { Qiow, 0 }, 0, 0660, + "iol", { Qiol, 0 }, 0, 0660, + "mapram", { Qmapram, 0 }, 0, 0444, + "realmodemem", { Qrealmem, 0 }, 0, 0660, +}; +Lock archwlock; /* the lock is only for changing archdir */ +int narchdir = Qbase; + +/* + * Add a file to the #P listing. Once added, you can't delete it. + * You can't add a file with the same name as one already there, + * and you get a pointer to the Dirtab entry so you can do things + * like change the Qid version. Changing the Qid path is disallowed. + */ +Dirtab* +addarchfile(char *name, int perm, Rdwrfn *rdfn, Rdwrfn *wrfn) +{ + int i; + Dirtab d; + Dirtab *dp; + + memset(&d, 0, sizeof d); + strcpy(d.name, name); + d.perm = perm; + + lock(&archwlock); + if(narchdir >= Qmax){ + unlock(&archwlock); + return nil; + } + + for(i=0; inext){ + map = *l; + if (map->start < 0x400) + continue; + i = map->start - port; + if(i > size) + break; + if(align > 0) + port = ((port+align-1)/align)*align; + else + port = map->end; + } + if(*l == nil){ + unlock(&iomap); + return -1; + } + map = iomap.free; + if(map == nil){ + print("ioalloc: out of maps"); + unlock(&iomap); + return port; + } + iomap.free = map->next; + map->next = *l; + map->start = port; + map->end = port + size; + map->reserved = 1; + strncpy(map->tag, tag, sizeof(map->tag)); + map->tag[sizeof(map->tag)-1] = 0; + *l = map; + + archdir[0].qid.vers++; + + unlock(&iomap); + return map->start; +} + +// +// alloc some io port space and remember who it was +// alloced to. if port < 0, find a free region. +// +int +ioalloc(int port, int size, int align, char *tag) +{ + IOMap *map, **l; + int i; + + lock(&iomap); + if(port < 0){ + // find a free port above 0x400 and below 0x1000 + port = 0x400; + for(l = &iomap.map; *l; l = &(*l)->next){ + map = *l; + if (map->start < 0x400) + continue; + i = map->start - port; + if(i > size) + break; + if(align > 0) + port = ((port+align-1)/align)*align; + else + port = map->end; + } + if(*l == nil){ + unlock(&iomap); + return -1; + } + } else { + // Only 64KB I/O space on the x86. + if((port+size) > 0x10000){ + unlock(&iomap); + return -1; + } + // see if the space clashes with previously allocated ports + for(l = &iomap.map; *l; l = &(*l)->next){ + map = *l; + if(map->end <= port) + continue; + if(map->reserved && map->start == port && map->end == port + size) { + map->reserved = 0; + unlock(&iomap); + return map->start; + } + if(map->start >= port+size) + break; + unlock(&iomap); + return -1; + } + } + map = iomap.free; + if(map == nil){ + print("ioalloc: out of maps"); + unlock(&iomap); + return port; + } + iomap.free = map->next; + map->next = *l; + map->start = port; + map->end = port + size; + strncpy(map->tag, tag, sizeof(map->tag)); + map->tag[sizeof(map->tag)-1] = 0; + *l = map; + + archdir[0].qid.vers++; + + unlock(&iomap); + return map->start; +} + +void +iofree(int port) +{ + IOMap *map, **l; + + lock(&iomap); + for(l = &iomap.map; *l; l = &(*l)->next){ + if((*l)->start == port){ + map = *l; + *l = map->next; + map->next = iomap.free; + iomap.free = map; + break; + } + if((*l)->start > port) + break; + } + archdir[0].qid.vers++; + unlock(&iomap); +} + +int +iounused(int start, int end) +{ + IOMap *map; + + for(map = iomap.map; map; map = map->next){ + if(start >= map->start && start < map->end + || start <= map->start && end > map->start) + return 0; + } + return 1; +} + +static void +checkport(int start, int end) +{ + /* standard vga regs are OK */ + if(start >= 0x2b0 && end <= 0x2df+1) + return; + if(start >= 0x3c0 && end <= 0x3da+1) + return; + + if(iounused(start, end)) + return; + error(Eperm); +} + +static Chan* +archattach(char* spec) +{ + return devattach('P', spec); +} + +Walkqid* +archwalk(Chan* c, Chan *nc, char** name, int nname) +{ + return devwalk(c, nc, name, nname, archdir, narchdir, devgen); +} + +static long +archstat(Chan* c, uchar* dp, long n) +{ + return devstat(c, dp, n, archdir, narchdir, devgen); +} + +static Chan* +archopen(Chan* c, int omode) +{ + return devopen(c, omode, archdir, narchdir, devgen); +} + +static void +archclose(Chan*) +{ +} + +enum +{ + Linelen= 31, +}; + +static long +archread(Chan *c, void *a, long n, vlong offset) +{ + char *buf, *p; + int port; + ushort *sp; + ulong *lp; + IOMap *map; + Rdwrfn *fn; + + switch((ulong)c->qid.path){ + + case Qdir: + return devdirread(c, a, n, archdir, narchdir, devgen); + + case Qiob: + port = offset; + checkport(offset, offset+n); + for(p = a; port < offset+n; port++) + *p++ = inb(port); + return n; + + case Qiow: + if(n & 1) + error(Ebadarg); + checkport(offset, offset+n); + sp = a; + for(port = offset; port < offset+n; port += 2) + *sp++ = ins(port); + return n; + + case Qiol: + if(n & 3) + error(Ebadarg); + checkport(offset, offset+n); + lp = a; + for(port = offset; port < offset+n; port += 4) + *lp++ = inl(port); + return n; + + case Qioalloc: + break; + default: + if(c->qid.path < narchdir && (fn = readfn[c->qid.path])) + return fn(c, a, n, offset); + error(Eperm); + break; + } + + if((buf = malloc(n)) == nil) + error(Enomem); + p = buf; + n = n/Linelen; + offset = offset/Linelen; + + switch((ulong)c->qid.path){ + case Qioalloc: + lock(&iomap); + for(map = iomap.map; n > 0 && map != nil; map = map->next){ + if(offset-- > 0) + continue; + sprint(p, "%#8lux %#8lux %-12.12s\n", map->start, map->end-1, map->tag); + p += Linelen; + n--; + } + unlock(&iomap); + break; + case Qmapram: +/* shit */ +#ifdef NOTYET + for(mp = rmapram.map; mp->size; mp++){ + /* + * Up to MemMinMiB is already set up. + */ + if(mp->addr < MemMinMiB*MiB){ + if(mp->addr+mp->size <= MemMinMiB*MiB) + continue; + pa = MemMinMiB*MiB; + size = mp->size - MemMinMiB*MiB-mp->addr; + } + else{ + pa = mp->addr; + size = mp->size; + } +#endif + error("Not yet"); + + break; + } + + n = p - buf; + memmove(a, buf, n); + free(buf); + + return n; +} + +static long +archwrite(Chan *c, void *a, long n, vlong offset) +{ + char *p; + int port; + ushort *sp; + ulong *lp; + Rdwrfn *fn; + + switch((ulong)c->qid.path){ + + case Qiob: + p = a; + checkport(offset, offset+n); + for(port = offset; port < offset+n; port++) + outb(port, *p++); + return n; + + case Qiow: + if(n & 1) + error(Ebadarg); + checkport(offset, offset+n); + sp = a; + for(port = offset; port < offset+n; port += 2) + outs(port, *sp++); + return n; + + case Qiol: + if(n & 3) + error(Ebadarg); + checkport(offset, offset+n); + lp = a; + for(port = offset; port < offset+n; port += 4) + outl(port, *lp++); + return n; + default: + if(c->qid.path < narchdir && (fn = writefn[c->qid.path])) + return fn(c, a, n, offset); + error(Eperm); + break; + } + return 0; +} + +Dev archdevtab = { + 'P', + "arch", + + devreset, + devinit, + devshutdown, + archattach, + archwalk, + archstat, + archopen, + devcreate, + archclose, + archread, + devbread, + archwrite, + devbwrite, + devremove, + devwstat, +}; + +/* + */ +void +nop(void) +{ +} + +void (*coherence)(void) = mfence; + +static long +cputyperead(Chan*, void *a, long n, vlong off) +{ + char buf[512], *s, *e; + int i, k; + + e = buf+sizeof buf; + s = seprint(buf, e, "%s %ud\n", "AMD64", m->cpumhz); + k = m->ncpuinfoe - m->ncpuinfos; + if(k > 4) + k = 4; + for(i = 0; i < k; i++) + s = seprint(s, e, "%#8.8ux %#8.8ux %#8.8ux %#8.8ux\n", + m->cpuinfo[i][0], m->cpuinfo[i][1], + m->cpuinfo[i][2], m->cpuinfo[i][3]); + return readstr(off, a, n, buf); +} + + +static long +rmemrw(int isr, void *a, long n, vlong off) +{ + if(off < 0 || n < 0) + error("bad offset/count"); + if(isr){ + if(off >= MB) + return 0; + if(off+n >= MB) + n = MB - off; + memmove(a, KADDR((ulong)off), n); + }else{ + /* realmode buf page ok, allow vga framebuf's access */ + if(off >= MB || off+n > MB && + (off < 0xA0000 || off+n > 0xB0000+0x10000)) + error("bad offset/count in write"); + memmove(KADDR((ulong)off), a, n); + } + return n; +} + +static long +rmemread(Chan*, void *a, long n, vlong off) +{ + return rmemrw(1, a, n, off); +} + +static long +rmemwrite(Chan*, void *a, long n, vlong off) +{ + return rmemrw(0, a, n, off); +} + +void +archinit(void) +{ + addarchfile("cputype", 0444, cputyperead, nil); + addarchfile("realmodemem", 0660, rmemread, rmemwrite); +} + +void +archreset(void) +{ + int i; + + /* + * And sometimes there is no keyboard... + * + * The reset register (0xcf9) is usually in one of the bridge + * chips. The actual location and sequence could be extracted from + * ACPI but why bother, this is the end of the line anyway. + print("Takes a licking and keeps on ticking...\n"); + */ + i = inb(0xcf9); /* ICHx reset control */ + i &= 0x06; + outb(0xcf9, i|0x02); /* SYS_RST */ + millidelay(1); + outb(0xcf9, i|0x06); /* RST_CPU transition */ + + for(;;) + ; +} + +/* + * return value and speed of timer + */ +uvlong +fastticks(uvlong* hz) +{ + if(hz != nil) + *hz = m->cpuhz; + return rdtsc(); +} + +ulong +µs(void) +{ + return fastticks2us(rdtsc()); +} + +/* + * set next timer interrupt + */ +void +timerset(uvlong x) +{ + extern void apictimerset(uvlong); + + apictimerset(x); +} + +void +cycles(uvlong* t) +{ + *t = rdtsc(); +} + +void +delay(int millisecs) +{ + u64int r, t; + + if(millisecs <= 0) + millisecs = 1; + r = rdtsc(); + for(t = r + m->cpumhz*1000ull*millisecs; r < t; r = rdtsc()) + ; +} + +/* + * performance measurement ticks. must be low overhead. + * doesn't have to count over a second. + */ +ulong +perfticks(void) +{ + uvlong x; + +// if(m->havetsc) + cycles(&x); +// else +// x = 0; + return x; +} diff -Nru 0/sys/src/nix/k10/ether82563.c 4/sys/src/nix/k10/ether82563.c --- 0/sys/src/nix/k10/ether82563.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/ether82563.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,2151 @@ + /* + * Intel 8256[367], 8257[1-9], 82573[ev], + * 82575eb, 82576, 82577, 82579, 8258[03] + * Gigabit Ethernet PCI-Express Controllers + * Coraid EtherDrive® hba + * This rewrite has only been tested on 82579 + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/netif.h" + +#include "etherif.h" + +/* + * note: the 82575, 82576 and 82580 are operated using registers aliased + * to the 82563-style architecture. many features seen in the 82598 + * are also seen in the 82575 part. + */ + +enum { + /* General */ + + Ctrl = 0x0000, /* Device Control */ + Status = 0x0008, /* Device Status */ + Eec = 0x0010, /* EEPROM/Flash Control/Data */ + Eerd = 0x0014, /* EEPROM Read */ + Ctrlext = 0x0018, /* Extended Device Control */ + Fla = 0x001C, /* Flash Access */ + Mdic = 0x0020, /* MDI Control */ + Seresctl = 0x0024, /* Serdes ana */ + Fcal = 0x0028, /* Flow Control Address Low */ + Fcah = 0x002C, /* Flow Control Address High */ + Fct = 0x0030, /* Flow Control Type */ + Kumctrlsta = 0x0034, /* Kumeran Control and Status Register */ + Vet = 0x0038, /* VLAN EtherType */ + Fcttv = 0x0170, /* Flow Control Transmit Timer Value */ + Txcw = 0x0178, /* Transmit Configuration Word */ + Rxcw = 0x0180, /* Receive Configuration Word */ + Ledctl = 0x0E00, /* LED control */ + Pba = 0x1000, /* Packet Buffer Allocation */ + Pbs = 0x1008, /* Packet Buffer Size */ + + /* Interrupt */ + + Icr = 0x00C0, /* Interrupt Cause Read */ + Itr = 0x00C4, /* Interrupt Throttling Rate */ + Ics = 0x00C8, /* Interrupt Cause Set */ + Ims = 0x00D0, /* Interrupt Mask Set/Read */ + Imc = 0x00D8, /* Interrupt mask Clear */ + Iam = 0x00E0, /* Interrupt acknowledge Auto Mask */ + Eitr = 0x1680, /* Extended itr; 82575/6 80 only */ + + /* Receive */ + + Rctl = 0x0100, /* Control */ + Ert = 0x2008, /* Early Receive Threshold (573[EVL], 82578 only) */ + Fcrtl = 0x2160, /* Flow Control RX Threshold Low */ + Fcrth = 0x2168, /* Flow Control Rx Threshold High */ + Psrctl = 0x2170, /* Packet Split Receive Control */ + Drxmxod = 0x2540, /* dma max outstanding bytes (82575) */ + Rdbal = 0x2800, /* Rdesc Base Address Low Queue 0 */ + Rdbah = 0x2804, /* Rdesc Base Address High Queue 0 */ + Rdlen = 0x2808, /* Descriptor Length Queue 0 */ + Srrctl = 0x280C, /* split and replication rx control (82575) */ + Rdh = 0x2810, /* Descriptor Head Queue 0 */ + Rdt = 0x2818, /* Descriptor Tail Queue 0 */ + Rdtr = 0x2820, /* Descriptor Timer Ring */ + Rxdctl = 0x2828, /* Descriptor Control */ + Radv = 0x282C, /* Interrupt Absolute Delay Timer */ + Rdbal1 = 0x2900, /* Rdesc Base Address Low Queue 1 */ + Rdbah1 = 0x2804, /* Rdesc Base Address High Queue 1 */ + Rdlen1 = 0x2908, /* Descriptor Length Queue 1 */ + Rdh1 = 0x2910, /* Descriptor Head Queue 1 */ + Rdt1 = 0x2918, /* Descriptor Tail Queue 1 */ + Rxdctl1 = 0x2928, /* Descriptor Control Queue 1 */ + Rsrpd = 0x2C00, /* Small Packet Detect */ + Raid = 0x2C08, /* ACK interrupt delay */ + Cpuvec = 0x2C10, /* CPU Vector */ + Rxcsum = 0x5000, /* Checksum Control */ + Rmpl = 0x5004, /* rx maximum packet length (82575) */ + Rfctl = 0x5008, /* Filter Control */ + Mta = 0x5200, /* Multicast Table Array */ + Ral = 0x5400, /* Receive Address Low */ + Rah = 0x5404, /* Receive Address High */ + Vfta = 0x5600, /* VLAN Filter Table Array */ + Mrqc = 0x5818, /* Multiple Receive Queues Command */ + Rssim = 0x5864, /* RSS Interrupt Mask */ + Rssir = 0x5868, /* RSS Interrupt Request */ + Reta = 0x5c00, /* Redirection Table */ + Rssrk = 0x5c80, /* RSS Random Key */ + + /* Transmit */ + + Tctl = 0x0400, /* Transmit Control */ + Tipg = 0x0410, /* Transmit IPG */ + Tkabgtxd = 0x3004, /* glci afe band gap transmit ref data, or something */ + Tdbal = 0x3800, /* Tdesc Base Address Low */ + Tdbah = 0x3804, /* Tdesc Base Address High */ + Tdlen = 0x3808, /* Descriptor Length */ + Tdh = 0x3810, /* Descriptor Head */ + Tdt = 0x3818, /* Descriptor Tail */ + Tidv = 0x3820, /* Interrupt Delay Value */ + Txdctl = 0x3828, /* Descriptor Control */ + Tadv = 0x382C, /* Interrupt Absolute Delay Timer */ + Tarc0 = 0x3840, /* Arbitration Counter Queue 0 */ + Tdbal1 = 0x3900, /* Descriptor Base Low Queue 1 */ + Tdbah1 = 0x3904, /* Descriptor Base High Queue 1 */ + Tdlen1 = 0x3908, /* Descriptor Length Queue 1 */ + Tdh1 = 0x3910, /* Descriptor Head Queue 1 */ + Tdt1 = 0x3918, /* Descriptor Tail Queue 1 */ + Txdctl1 = 0x3928, /* Descriptor Control 1 */ + Tarc1 = 0x3940, /* Arbitration Counter Queue 1 */ + + /* Statistics */ + + Statistics = 0x4000, /* Start of Statistics Area */ + Gorcl = 0x88/4, /* Good Octets Received Count */ + Gotcl = 0x90/4, /* Good Octets Transmitted Count */ + Torl = 0xC0/4, /* Total Octets Received */ + Totl = 0xC8/4, /* Total Octets Transmitted */ + Nstatistics = 0x124/4, +}; + +enum { /* Ctrl */ + GIOmd = 1<<2, /* BIO master disable */ + Lrst = 1<<3, /* link reset */ + Slu = 1<<6, /* Set Link Up */ + SspeedMASK = 3<<8, /* Speed Selection */ + SspeedSHIFT = 8, + Sspeed10 = 0x00000000, /* 10Mb/s */ + Sspeed100 = 0x00000100, /* 100Mb/s */ + Sspeed1000 = 0x00000200, /* 1000Mb/s */ + Frcspd = 1<<11, /* Force Speed */ + Frcdplx = 1<<12, /* Force Duplex */ + SwdpinsloMASK = 0x003C0000, /* Software Defined Pins - lo nibble */ + SwdpinsloSHIFT = 18, + SwdpioloMASK = 0x03C00000, /* Software Defined Pins - I or O */ + SwdpioloSHIFT = 22, + Devrst = 1<<26, /* Device Reset */ + Rfce = 1<<27, /* Receive Flow Control Enable */ + Tfce = 1<<28, /* Transmit Flow Control Enable */ + Vme = 1<<30, /* VLAN Mode Enable */ + Phyrst = 1<<31, /* Phy Reset */ +}; + +enum { /* Status */ + Lu = 1<<1, /* Link Up */ + Lanid = 3<<2, /* mask for Lan ID. */ + Txoff = 1<<4, /* Transmission Paused */ + Tbimode = 1<<5, /* TBI Mode Indication */ + Phyra = 1<<10, /* PHY Reset Asserted */ + GIOme = 1<<19, /* GIO Master Enable Status */ +}; + +enum { /* Eerd */ + EEstart = 1<<0, /* Start Read */ + EEdone = 1<<1, /* Read done */ +}; + +enum { /* Ctrlext */ + Asdchk = 1<<12, /* ASD Check */ + Eerst = 1<<13, /* EEPROM Reset */ + Spdbyps = 1<<15, /* Speed Select Bypass */ + Linkmode = 3<<23, /* linkmode */ + Serdes = 3<<23, /* " serdes */ +}; + +enum { /* EEPROM content offsets */ + Ea = 0x00, /* Ethernet Address */ + Cf = 0x03, /* Compatibility Field */ + Icw1 = 0x0A, /* Initialization Control Word 1 */ + Sid = 0x0B, /* Subsystem ID */ + Svid = 0x0C, /* Subsystem Vendor ID */ + Did = 0x0D, /* Device ID */ + Vid = 0x0E, /* Vendor ID */ + Icw2 = 0x0F, /* Initialization Control Word 2 */ +}; + +enum { /* Mdic */ + MDIdMASK = 0x0000FFFF, /* Data */ + MDIdSHIFT = 0, + MDIrMASK = 0x001F0000, /* PHY Register Address */ + MDIrSHIFT = 16, + MDIpMASK = 0x03E00000, /* PHY Address */ + MDIpSHIFT = 21, + MDIwop = 0x04000000, /* Write Operation */ + MDIrop = 0x08000000, /* Read Operation */ + MDIready = 0x10000000, /* End of Transaction */ + MDIie = 0x20000000, /* Interrupt Enable */ + MDIe = 0x40000000, /* Error */ +}; + +enum { /* phy interface registers */ + Phyctl = 0, /* phy ctl */ + Physsr = 17, /* phy secondary status */ + Phyier = 18, /* 82573 phy interrupt enable */ + Phyisr = 19, /* 82563 phy interrupt status */ + Phylhr = 19, /* 8257[12] link health */ + + Phyprst = 193<<8 | 17, /* 8256[34] phy port reset */ + Phypage = 22, /* 8256[34] page register */ + Phystat = 26, /* 82580 phy status */ + Phyapage = 29, + Rtlink = 1<<10, /* realtime link status */ + Phyan = 1<<11, /* phy has auto-negotiated */ + + /* Phyctl bits */ + Ran = 1<<9, /* restart auto-negotiation */ + Ean = 1<<12, /* enable auto-negotiation */ + + /* Phyprst bits */ + Prst = 1<<0, /* reset the port */ + + /* 82573 Phyier bits */ + Lscie = 1<<10, /* link status changed ie */ + Ancie = 1<<11, /* auto-negotiation complete ie */ + Spdie = 1<<14, /* speed changed ie */ + Panie = 1<<15, /* phy auto-negotiation error ie */ + + /* Phylhr/Phyisr bits */ + Anf = 1<<6, /* lhr: auto-negotiation fault */ + Ane = 1<<15, /* isr: auto-negotiation error */ + + /* 82580 Phystat bits */ + Ans = 1<<14 | 1<<15, /* 82580 auto-negotiation status */ + Link = 1<<6, /* 82580 Link */ + + /* Rxcw builtin serdes */ + Anc = 1<<31, + Rxsynch = 1<<30, + Rxcfg = 1<<29, + Rxcfgch = 1<<28, + Rxcfgbad = 1<<27, + Rxnc = 1<<26, + + /* Txcw */ + Txane = 1<<31, + Txcfg = 1<<30, +}; + +enum { /* fiber (pcs) interface */ + Pcsctl = 0x4208, /* pcs control */ + Pcsstat = 0x420c, /* pcs status */ + + /* Pcsctl bits */ + Pan = 1<<16, /* auto-negotiate */ + Prestart = 1<<17, /* restart an (self clearing) */ + + /* Pcsstat bits */ + Linkok = 1<<0, /* link is okay */ + Andone = 1<<16, /* an phase is done see below for success */ + Anbad = 1<<19 | 1<<20, /* Anerror | Anremfault */ +}; + +enum { /* Icr, Ics, Ims, Imc */ + Txdw = 0x00000001, /* Transmit Descriptor Written Back */ + Txqe = 0x00000002, /* Transmit Queue Empty */ + Lsc = 0x00000004, /* Link Status Change */ + Rxseq = 0x00000008, /* Receive Sequence Error */ + Rxdmt0 = 0x00000010, /* Rdesc Minimum Threshold Reached */ + Rxo = 0x00000040, /* Receiver Overrun */ + Rxt0 = 0x00000080, /* Receiver Timer Interrupt; !82575/6/80 only */ + Rxdw = 0x00000080, /* Rdesc write back; 82575/6/80 only */ + Mdac = 0x00000200, /* MDIO Access Completed */ + Rxcfgsets = 0x00000400, /* Receiving /C/ ordered sets */ + Gpi0 = 0x00000800, /* General Purpose Interrupts */ + Gpi1 = 0x00001000, + Gpi2 = 0x00002000, + Gpi3 = 0x00004000, + Ack = 0x00020000, /* Receive ACK frame */ +}; + +enum { /* Txcw */ + TxcwFd = 0x00000020, /* Full Duplex */ + TxcwHd = 0x00000040, /* Half Duplex */ + TxcwPauseMASK = 0x00000180, /* Pause */ + TxcwPauseSHIFT = 7, + TxcwPs = 1<nic+((r)/4))) +#define csr32w(c, r, v) (*((c)->nic+((r)/4)) = (v)) + +static Ctlr* i82563ctlrhead; +static Ctlr* i82563ctlrtail; + +static Lock i82563rblock; /* free receive Blocks */ +static Block* i82563rbpool; + + +static char *statistics[Nstatistics] = { + "CRC Error", + "Alignment Error", + "Symbol Error", + "RX Error", + "Missed Packets", + "Single Collision", + "Excessive Collisions", + "Multiple Collision", + "Late Collisions", + nil, + "Collision", + "Transmit Underrun", + "Defer", + "Transmit - No CRS", + "Sequence Error", + "Carrier Extension Error", + "Receive Error Length", + nil, + "XON Received", + "XON Transmitted", + "XOFF Received", + "XOFF Transmitted", + "FC Received Unsupported", + "Packets Received (64 Bytes)", + "Packets Received (65-127 Bytes)", + "Packets Received (128-255 Bytes)", + "Packets Received (256-511 Bytes)", + "Packets Received (512-1023 Bytes)", + "Packets Received (1024-mtu Bytes)", + "Good Packets Received", + "Broadcast Packets Received", + "Multicast Packets Received", + "Good Packets Transmitted", + nil, + "Good Octets Received", + nil, + "Good Octets Transmitted", + nil, + nil, + nil, + "Receive No Buffers", + "Receive Undersize", + "Receive Fragment", + "Receive Oversize", + "Receive Jabber", + "Management Packets Rx", + "Management Packets Drop", + "Management Packets Tx", + "Total Octets Received", + nil, + "Total Octets Transmitted", + nil, + "Total Packets Received", + "Total Packets Transmitted", + "Packets Transmitted (64 Bytes)", + "Packets Transmitted (65-127 Bytes)", + "Packets Transmitted (128-255 Bytes)", + "Packets Transmitted (256-511 Bytes)", + "Packets Transmitted (512-1023 Bytes)", + "Packets Transmitted (1024-mtu Bytes)", + "Multicast Packets Transmitted", + "Broadcast Packets Transmitted", + "TCP Segmentation Context Transmitted", + "TCP Segmentation Context Fail", + "Interrupt Assertion", + "Interrupt Rx Pkt Timer", + "Interrupt Rx Abs Timer", + "Interrupt Tx Pkt Timer", + "Interrupt Tx Abs Timer", + "Interrupt Tx Queue Empty", + "Interrupt Tx Desc Low", + "Interrupt Rx Min", + "Interrupt Rx Overrun", +}; + +static char* +cname(Ctlr* c) +{ + if (c->type == Iany) + return "any"; + return cttab[c->type].name; +} + +static long +i82563ifstat(Ether *edev, void *a, long n, ulong offset) +{ + Ctlr *ctlr; + char *s, *p, *e, *stat; + int i, r; + uvlong tuvl, ruvl; + + ctlr = edev->ctlr; + qlock(&ctlr->slock); + p = s = malloc(READSTR); + if(p == nil) { + qunlock(&ctlr->slock); + error(Enomem); + } + e = p + READSTR; + + for(i = 0; i < Nstatistics; i++){ + r = csr32r(ctlr, Statistics + i*4); + if((stat = statistics[i]) == nil) + continue; + switch(i){ + case Gorcl: + case Gotcl: + case Torl: + case Totl: + ruvl = r; + ruvl += (uvlong)csr32r(ctlr, Statistics+(i+1)*4) << 32; + tuvl = ruvl; + tuvl += ctlr->statistics[i]; + tuvl += (uvlong)ctlr->statistics[i+1] << 32; + if(tuvl == 0) + continue; + ctlr->statistics[i] = tuvl; + ctlr->statistics[i+1] = tuvl >> 32; + p = seprint(p, e, "%s: %llud %llud\n", stat, tuvl, ruvl); + i++; + break; + + default: + ctlr->statistics[i] += r; + if(ctlr->statistics[i] == 0) + continue; + p = seprint(p, e, "%s: %ud %ud\n", stat, + ctlr->statistics[i], r); + break; + } + } + + p = seprint(p, e, "lintr: %ud %ud\n", ctlr->lintr, ctlr->lsleep); + p = seprint(p, e, "rintr: %ud %ud\n", ctlr->rintr, ctlr->rsleep); + p = seprint(p, e, "tintr: %ud %ud\n", ctlr->tintr, ctlr->txdw); + p = seprint(p, e, "ixcs: %ud %ud %ud\n", ctlr->ixsm, ctlr->ipcs, ctlr->tcpcs); + p = seprint(p, e, "rdtr: %ud\n", ctlr->rdtr); + p = seprint(p, e, "radv: %ud\n", ctlr->radv); + p = seprint(p, e, "ctrl: %.8ux\n", csr32r(ctlr, Ctrl)); + p = seprint(p, e, "ctrlext: %.8ux\n", csr32r(ctlr, Ctrlext)); + p = seprint(p, e, "status: %.8ux\n", csr32r(ctlr, Status)); + p = seprint(p, e, "txcw: %.8ux\n", csr32r(ctlr, Txcw)); + p = seprint(p, e, "txdctl: %.8ux\n", csr32r(ctlr, Txdctl)); + p = seprint(p, e, "pba: %.8ux\n", ctlr->pba); + + p = seprint(p, e, "speeds: 10:%ud 100:%ud 1000:%ud ?:%ud\n", + ctlr->speeds[0], ctlr->speeds[1], ctlr->speeds[2], ctlr->speeds[3]); + p = seprint(p, e, "type: %s\n", cname(ctlr)); + +// p = seprint(p, e, "eeprom:"); +// for(i = 0; i < 0x40; i++){ +// if(i && ((i & 7) == 0)) +// p = seprint(p, e, "\n "); +// p = seprint(p, e, " %4.4ux", ctlr->eeprom[i]); +// } +// p = seprint(p, e, "\n"); + + USED(p); + n = readstr(offset, a, n, s); + free(s); + qunlock(&ctlr->slock); + + return n; +} + +static void +i82563promiscuous(void* arg, int on) +{ + int rctl; + Ctlr *ctlr; + Ether *edev; + + edev = arg; + ctlr = edev->ctlr; + + rctl = csr32r(ctlr, Rctl); + rctl &= ~MoMASK; + if(on) + rctl |= Upe|Mpe; + else + rctl &= ~(Upe|Mpe); + csr32w(ctlr, Rctl, rctl); +} + +static void +i82563multicast(void* arg, uchar* addr, int on) +{ + int bit, x; + Ctlr *ctlr; + Ether *edev; + + edev = arg; + ctlr = edev->ctlr; + + x = addr[5]>>1; + if(ctlr->type == i82566 || ctlr->type == i82567) + x &= 31; + bit = ((addr[5] & 1)<<4)|(addr[4]>>4); + /* + * multiple ether addresses can hash to the same filter bit, + * so it's never safe to clear a filter bit. + * if we want to clear filter bits, we need to keep track of + * all the multicast addresses in use, clear all the filter bits, + * then set the ones corresponding to in-use addresses. + */ + if(on) + ctlr->mta[x] |= 1<mta[x] &= ~(1<mta[x]); +} + +static Block* +i82563rballoc(void) +{ + Block *bp; + + ilock(&i82563rblock); + if((bp = i82563rbpool) != nil){ + i82563rbpool = bp->next; + bp->next = nil; + /*ainc(&bp->ref); prevent bp from being freed */ + } + iunlock(&i82563rblock); + + return bp; +} + +static void +i82563rbfree(Block* b) +{ + b->rp = b->wp = (uchar*)ROUNDUP((uintptr)b->base, PGSZ); + b->flag &= ~(Bipck | Budpck | Btcpck | Bpktck); + ilock(&i82563rblock); + b->next = i82563rbpool; + i82563rbpool = b; + iunlock(&i82563rblock); +} + +static void +i82563im(Ctlr* ctlr, int im) +{ + ilock(&ctlr->imlock); + ctlr->im |= im; + csr32w(ctlr, Ims, ctlr->im); + iunlock(&ctlr->imlock); +} + +static void +i82563txinit(Ctlr* ctlr) +{ + int i, r; + Block *bp; + + if(cttab[ctlr->type].flag & F75) + csr32w(ctlr, Tctl, 0x0F<tdba)); + csr32w(ctlr, Tdbah, 0); + csr32w(ctlr, Tdlen, ctlr->ntd * sizeof(Td)); + ctlr->tdh = PREV(0, ctlr->ntd); + csr32w(ctlr, Tdh, 0); + ctlr->tdt = 0; + csr32w(ctlr, Tdt, 0); + for(i = 0; i < ctlr->ntd; i++){ + if((bp = ctlr->tb[i]) != nil){ + ctlr->tb[i] = nil; + freeb(bp); + } + memset(&ctlr->tdba[i], 0, sizeof(Td)); + } + csr32w(ctlr, Tidv, 128); + csr32w(ctlr, Tadv, 64); + r = csr32r(ctlr, Tctl); + r |= Ten; + csr32w(ctlr, Tctl, r); + r = csr32r(ctlr, Txdctl); + r &= ~(WthreshMASK|PthreshMASK); + r |= 4<type].flag & F75) + r |= Qenable; + csr32w(ctlr, Txdctl, r); +} + +#define Next(x, m) (((x)+1) & (m)) + +static int +i82563cleanup(Ctlr *c) +{ + Block *bp; + int tdh, m, n; + + tdh = c->tdh; + m = c->ntd-1; + while(c->tdba[n = Next(tdh, m)].status & Tdd){ + tdh = n; + if((bp = c->tb[tdh]) != nil){ + c->tb[tdh] = nil; + freeb(bp); + }else + iprint("82563 tx underrun!\n"); + c->tdba[tdh].status = 0; + } + + return c->tdh = tdh; +} + +static int +notrim(void *v) +{ + Ctlr *c; + + c = v; + return (c->im & Txdw) == 0; +} + +static void +i82563transmit(Ether* edev) +{ + Td *td; + Block *bp; + Ctlr *ctlr; + int tdh, tdt, m; + + ctlr = edev->ctlr; + + qlock(&ctlr->tlock); + + /* + * Free any completed packets + */ + tdh = i82563cleanup(ctlr); + + /* + * Try to fill the ring back up. + */ + tdt = ctlr->tdt; + m = ctlr->ntd-1; + for(;;){ + if(Next(tdt, m) == tdh){ + ctlr->txdw++; + i82563im(ctlr, Txdw); + break; + } + if((bp = qget(edev->oq)) == nil) + break; + td = &ctlr->tdba[tdt]; + td->addr[0] = PCIWADDR(bp->rp); + td->control = Ide|Rs|Ifcs|Teop|BLEN(bp); + ctlr->tb[tdt] = bp; + tdt = Next(tdt, m); + } + if(ctlr->tdt != tdt){ + ctlr->tdt = tdt; + csr32w(ctlr, Tdt, tdt); + } + qunlock(&ctlr->tlock); +} + +static void +i82563replenish(Ctlr* ctlr) +{ + Rd *rd; + int rdt, m; + Block *bp; + + rdt = ctlr->rdt; + m = ctlr->nrd-1; + while(Next(rdt, m) != ctlr->rdh){ + rd = &ctlr->rdba[rdt]; + if(ctlr->rb[rdt] != nil){ + iprint("82563: tx overrun\n"); + break; + } + bp = i82563rballoc(); + if(bp == nil){ + vlong now; + static vlong lasttime; + + /* don't flood the console */ + now = tk2ms(sys->ticks); + if (now - lasttime > 2000) + iprint("#l%d: 82563: all %d rx buffers in use\n", + ctlr->edev->ctlrno, ctlr->nrb); + lasttime = now; + break; + } + ctlr->rb[rdt] = bp; + rd->addr[0] = PCIWADDR(bp->rp); +// rd->addr[1] = 0; + rd->status = 0; + ctlr->rdfree++; + rdt = Next(rdt, m); + } + ctlr->rdt = rdt; + csr32w(ctlr, Rdt, rdt); +} + +static void +i82563rxinit(Ctlr* ctlr) +{ + Block *bp; + int i, r, rctl; + + i = ctlr->rbsz / 1024; + if(ctlr->rbsz % 1024) + i++; + + if(ctlr->rbsz <= 2048 || (cttab[ctlr->type].flag & F75)){ + if(ctlr->rbsz > 2048){ + if(ctlr->type != i82575) + i |= (ctlr->nrd/2>>4)<<20; /* RdmsHalf */ + csr32w(ctlr, Srrctl, i | Dropen); + csr32w(ctlr, Rmpl, ctlr->rbsz); +// csr32w(ctlr, Drxmxod, 0x7ff); + } + rctl = Dpf|Bsize2048|Bam|RdtmsHALF; + }else if(ctlr->rbsz <= 8192){ + rctl = Lpe|Dpf|Bsize8192|Bsex|Bam|RdtmsHALF|Secrc; + }else{ + rctl = Lpe|Dpf|BsizeFlex*i|Bam|RdtmsHALF|Secrc; + } + + if(ctlr->type == i82575 || ctlr->type == i82576){ + /* + * Setting Qenable in Rxdctl does not + * appear to stick unless Ren is on. + */ + csr32w(ctlr, Rctl, Ren|rctl); + r = csr32r(ctlr, Rxdctl); + r |= Qenable; + csr32w(ctlr, Rxdctl, r); + } + csr32w(ctlr, Rctl, rctl); + + if(cttab[ctlr->type].flag & Fert) + csr32w(ctlr, Ert, 1024/8); + + if(ctlr->type == i82566 || ctlr->type == i82567) + csr32w(ctlr, Pbs, 16); + + csr32w(ctlr, Rdbal, PCIWADDR(ctlr->rdba)); + csr32w(ctlr, Rdbah, 0); + csr32w(ctlr, Rdlen, ctlr->nrd * sizeof(Rd)); + ctlr->rdh = 0; + csr32w(ctlr, Rdh, 0); + ctlr->rdt = 0; + csr32w(ctlr, Rdt, 0); + /* keep interrupt moderation, our network is just crazy */ + ctlr->rdtr = 25; /* µs */ + ctlr->radv = 500; /* µs */ + csr32w(ctlr, Rdtr, ctlr->rdtr); + csr32w(ctlr, Radv, ctlr->radv); + + for(i = 0; i < ctlr->nrd; i++) + if((bp = ctlr->rb[i]) != nil){ + ctlr->rb[i] = nil; + freeb(bp); + } + + i82563replenish(ctlr); + + if(cttab[ctlr->type].flag & F75) + csr32w(ctlr, Rxdctl, 1<rim != 0; +} + +static void +i82563rproc(void* arg) +{ + Rd *rd; + Block *bp; + Ctlr *ctlr; + int r, m, rdh, rim, im; + Ether *edev; + + edev = arg; + ctlr = edev->ctlr; + + i82563rxinit(ctlr); + r = csr32r(ctlr, Rctl); + r |= Ren; + csr32w(ctlr, Rctl, r); + if(cttab[ctlr->type].flag & F75){ + r = csr32r(ctlr, Rxdctl); + r |= Qenable; + csr32w(ctlr, Rxdctl, r); + } + m = ctlr->nrd-1; + + im = Rxt0|Rxo|Rxdmt0|Rxseq|Ack; + for(;;){ + i82563im(ctlr, im); + ctlr->rsleep++; +// coherence(); + sleep(&ctlr->rrendez, i82563rim, ctlr); + + rdh = ctlr->rdh; + for(;;){ + rd = &ctlr->rdba[rdh]; + rim = ctlr->rim; + ctlr->rim = 0; + if(!(rd->status & Rdd)) + break; + + /* + * Accept eop packets with no errors. + * With no errors and the Ixsm bit set, + * the descriptor status Tpcs and Ipcs bits give + * an indication of whether the checksums were + * calculated and valid. + */ + bp = ctlr->rb[rdh]; + if((rd->status & Reop) && rd->errors == 0){ + bp->wp += rd->length; + /* bp->lim = bp->wp; lie like a dog. avoid packblock. */ + if(!(rd->status & Ixsm)){ + ctlr->ixsm++; + if(rd->status & Ipcs){ + /* + * IP checksum calculated + * (and valid as errors == 0). + */ + ctlr->ipcs++; + bp->flag |= Bipck; + } + if(rd->status & Tcpcs){ + /* + * TCP/UDP checksum calculated + * (and valid as errors == 0). + */ + ctlr->tcpcs++; + bp->flag |= Btcpck|Budpck; + } + bp->checksum = rd->checksum; + bp->flag |= Bpktck; + } + etheriq(edev, bp, 1); + } else { + if (rd->status & Reop && rd->errors) + print("%s: input packet error %#ux\n", + cname(ctlr), rd->errors); + freeb(bp); + } + ctlr->rb[rdh] = nil; + rd->status = 0; + ctlr->rdfree--; + ctlr->rdh = rdh = Next(rdh, m); + if(ctlr->nrd-ctlr->rdfree >= 32 || (rim & Rxdmt0)) + i82563replenish(ctlr); + } + } +} + +static int +i82563lim(void* ctrl) +{ + return ((Ctlr*)ctrl)->lim != 0; +} +static int speedtab[] = { + 10, 100, 1000, 0 +}; + +static uint +phyread(Ctlr *c, int phyno, int reg) +{ + uint phy, i; + + csr32w(c, Mdic, MDIrop | phyno<type == i82563){ + if(r >= 16 && r <= 28 && r != 22) + pr = Phypage; + else if(r == 30 || r == 31) + pr = Phyapage; + else + return 0; + return phywrite0(c, phyno, pr, p); + }else if(p == 0) + return 0; + return ~0; +} + +static uint +phywrite(Ctlr *c, uint phyno, uint reg, u16int v) +{ + if(setpage(c, phyno, reg>>8, reg & 0xff) == ~0) + panic("%s: bad phy reg %.4ux", cname(c), reg); + return phywrite0(c, phyno, reg & 0xff, v); +} + +static void +phyerrata(Ether *e, Ctlr *c) +{ + if(e->mbps == 0){ + if(c->phyerrata == 0){ + c->phyerrata++; + phywrite(c, 1, Phyprst, Prst); /* try a port reset */ + print("%s: phy port reset\n", cname(c)); + } + }else + c->phyerrata = 0; +} + +/* + * watch for changes of link state + */ +static void +phylproc(void *v) +{ + uint a, i, phy, r, phyno, phystat, link; + Ctlr *c; + Ether *e; + + e = v; + c = e->ctlr; + link = Rtlink; + + if(c->type == i82573 && (phy = phyread(c, 1, Phyier)) != ~0) + phywrite(c, 1, Phyier, phy | Lscie | Ancie | Spdie | Panie); + + phyno = 1; + if(c->type == i82579) + phyno = 2; + + phystat = Physsr; + if(c->type == i82579 || c->type == i82580){ + phystat = Phystat; + link = Link; + } + + for(;;){ + phy = phyread(c, phyno, phystat); + if(phy == ~0) + goto next; + if(c->type == i82579 || c->type == i82580) + i = (phy>>8) & 3; + else + i = (phy>>14) & 3; + switch(c->type){ + default: + a = 0; + break; + case i82579: + case i82580: + a = phy & Ans; + break; + case i82563: + case i82578: + case i82578m: + case i82583: + a = phyread(c, phyno, Phyisr) & Ane; + break; + case i82571: + case i82572: + case i82575: + case i82576: + a = phyread(c, phyno, Phylhr) & Anf; + i = (i-1) & 3; + break; + } + if(a){ + r = phyread(c, phyno, Phyctl); + phywrite(c, phyno, Phyctl, r | Ran | Ean); + } + e->link = (phy & link) != 0; + if(e->link == 0) + i = 3; + c->speeds[i]++; + e->mbps = speedtab[i]; + if(c->type == i82563) + phyerrata(e, c); +next: + c->lim = 0; + i82563im(c, Lsc); + c->lsleep++; + sleep(&c->lrendez, i82563lim, c); + } +} + +/* + * watch for changes of link state, pcs version + */ +static void +pcslproc(void *v) +{ + uint i, phy; + Ctlr *c; + Ether *e; + + e = v; + c = e->ctlr; + + for(;;){ + phy = csr32r(c, Pcsstat); + e->link = phy & Linkok; + i = 3; + if(e->link) + i = (phy & 6) >> 1; + else if(phy & Anbad) + csr32w(c, Pcsctl, csr32r(c, Pcsctl) | Pan | Prestart); + c->speeds[i]++; + e->mbps = speedtab[i]; + c->lim = 0; + i82563im(c, Lsc); + c->lsleep++; + sleep(&c->lrendez, i82563lim, c); + } +} + +/* + * watch for changes of link state, serdes version + */ +static void +serdeslproc(void *v) +{ + uint i, tx, rx; + Ctlr *c; + Ether *e; + + e = v; + c = e->ctlr; + + for(;;){ + rx = csr32r(c, Rxcw); + tx = csr32r(c, Txcw); + USED(tx); + e->link = (rx & 1<<31) != 0; +// e->link = (csr32r(c, Status) & Lu) != 0; + i = 3; + if(e->link) + i = 2; + c->speeds[i]++; + e->mbps = speedtab[i]; + c->lim = 0; + i82563im(c, Lsc); + c->lsleep++; + sleep(&c->lrendez, i82563lim, c); + } +} + +static void +i82563tproc(void *v) +{ + Ether *e; + Ctlr *c; + + e = v; + c = e->ctlr; + for(;;){ + sleep(&c->trendez, return0, 0); + i82563transmit(e); + } +} + +static void +i82563attach(Ether* edev) +{ + char name[KNAMELEN]; + Block *bp; + Ctlr *ctlr; + + ctlr = edev->ctlr; + qlock(&ctlr->alock); + if(ctlr->attached){ + qunlock(&ctlr->alock); + return; + } + + ctlr->nrd = Nrd; + ctlr->ntd = Ntd; + + if(waserror()){ + while(ctlr->nrb > 0){ + bp = i82563rballoc(); + bp->free = nil; + freeb(bp); + ctlr->nrb--; + } + free(ctlr->tb); + ctlr->tb = nil; + free(ctlr->rb); + ctlr->rb = nil; + free(ctlr->tdba); + ctlr->tdba = nil; + free(ctlr->rdba); + ctlr->rdba = nil; + qunlock(&ctlr->alock); + nexterror(); + } + + if((ctlr->rdba = mallocalign(ctlr->nrd*sizeof(Rd), 128, 0, 0)) == nil || + (ctlr->tdba = mallocalign(ctlr->ntd*sizeof(Td), 128, 0, 0)) == nil || + (ctlr->rb = malloc(ctlr->nrd*sizeof(Block*))) == nil || + (ctlr->tb = malloc(ctlr->ntd*sizeof(Block*))) == nil) + error(Enomem); + + for(ctlr->nrb = 0; ctlr->nrb < Nrb; ctlr->nrb++){ + if((bp = allocb(ctlr->rbsz + PGSZ)) == nil) + break; + bp->free = i82563rbfree; + freeb(bp); + } + + ctlr->attached = 1; + + snprint(name, sizeof name, "#l%dl", edev->ctlrno); + if((csr32r(ctlr, Ctrlext) & Linkmode) == Serdes) + kproc(name, pcslproc, edev); /* phy based serdes */ + else if(csr32r(ctlr, Status) & Tbimode) + kproc(name, serdeslproc, edev); /* mac based serdes */ + else if(ctlr->type == i82579 || ctlr->type == i82580) + kproc(name, phylproc, edev); + + snprint(name, sizeof name, "#l%dr", edev->ctlrno); + kproc(name, i82563rproc, edev); + + snprint(name, sizeof name, "#l%dt", edev->ctlrno); + kproc(name, i82563tproc, edev); + + i82563txinit(ctlr); + + qunlock(&ctlr->alock); + poperror(); +} + +static void +i82563interrupt(Ureg*, void* arg) +{ + Ctlr *ctlr; + Ether *edev; + int icr, im; + + edev = arg; + ctlr = edev->ctlr; + + ilock(&ctlr->imlock); + csr32w(ctlr, Imc, ~0); + im = ctlr->im; + + while(icr = csr32r(ctlr, Icr) & ctlr->im){ + if(icr & Lsc){ + im &= ~Lsc; + ctlr->lim = icr & Lsc; + wakeup(&ctlr->lrendez); + ctlr->lintr++; + } + if(icr & (Rxt0|Rxo|Rxdmt0|Rxseq|Ack)){ + ctlr->rim = icr & (Rxt0|Rxo|Rxdmt0|Rxseq|Ack); + im &= ~(Rxt0|Rxo|Rxdmt0|Rxseq|Ack); + wakeup(&ctlr->rrendez); + ctlr->rintr++; + } + if(icr & Txdw){ + im &= ~Txdw; + ctlr->tintr++; + wakeup(&ctlr->trendez); + } + } + + ctlr->im = im; + csr32w(ctlr, Ims, im); + iunlock(&ctlr->imlock); +} + +/* assume misrouted interrupts and check all controllers */ +static void +i82575interrupt(Ureg*, void *) +{ + Ctlr *ctlr; + + for (ctlr = i82563ctlrhead; ctlr != nil; ctlr = ctlr->next) + i82563interrupt(nil, ctlr->edev); +} + +static int +i82563detach(Ctlr* ctlr) +{ + int r, timeo; + + /* + * Perform a device reset to get the chip back to the + * power-on state, followed by an EEPROM reset to read + * the defaults for some internal registers. + */ + csr32w(ctlr, Imc, ~0); + csr32w(ctlr, Rctl, 0); + csr32w(ctlr, Tctl, csr32r(ctlr, Tctl) & ~Ten); + + delay(10); + + r = csr32r(ctlr, Ctrl); + if(ctlr->type == i82566 || ctlr->type == i82567 || ctlr->type == i82579) + r |= Phyrst; + csr32w(ctlr, Ctrl, Devrst | r); + delay(1); + for(timeo = 0;; timeo++){ + if((csr32r(ctlr, Ctrl) & (Devrst|Phyrst)) == 0) + break; + if(timeo >= 1000) + break; + delay(1); + } + if(csr32r(ctlr, Ctrl) & (Devrst|Phyrst)) + return -1; + + r = csr32r(ctlr, Ctrlext); + csr32w(ctlr, Ctrlext, r|Eerst); + delay(1); + for(timeo = 0; timeo < 1000; timeo++){ + if(!(csr32r(ctlr, Ctrlext) & Eerst)) + break; + delay(1); + } + if(csr32r(ctlr, Ctrlext) & Eerst) + return -1; + + csr32w(ctlr, Imc, ~0); + delay(1); + for(timeo = 0; timeo < 1000; timeo++){ + if((csr32r(ctlr, Icr) & ~Rxcfg) == 0) + break; + delay(1); + } + if(csr32r(ctlr, Icr) & ~Rxcfg) + return -1; + /* balance rx/tx packet buffer; survives reset */ + if(ctlr->rbsz > 8192 && cttab[ctlr->type].flag & Fpba){ + ctlr->pba = csr32r(ctlr, Pba); + r = ctlr->pba >> 16; + r += ctlr->pba & 0xffff; + r >>= 1; + csr32w(ctlr, Pba, r); + }else if(ctlr->type == i82573 && ctlr->rbsz > 1514) + csr32w(ctlr, Pba, 14); + ctlr->pba = csr32r(ctlr, Pba); + + r = csr32r(ctlr, Ctrl); + csr32w(ctlr, Ctrl, Slu|r); + return 0; +} + +static void +i82563shutdown(Ether* ether) +{ + i82563detach(ether->ctlr); +} + +static u16int +eeread(Ctlr *ctlr, int adr) +{ + csr32w(ctlr, Eerd, EEstart | adr << 2); + while ((csr32r(ctlr, Eerd) & EEdone) == 0) + ; + return csr32r(ctlr, Eerd) >> 16; +} + +static int +eeload(Ctlr *ctlr) +{ + u16int sum; + int data, adr; + + sum = 0; + for (adr = 0; adr < 0x40; adr++) { + data = eeread(ctlr, adr); + ctlr->eeprom[adr] = data; + sum += data; + } + return sum; +} + +static int +fcycle(Ctlr *, Flash *f) +{ + u16int s, i; + + s = f->reg[Fsts]; + if((s&Fvalid) == 0) + return -1; + f->reg[Fsts] |= Fcerr | Ael; + for(i = 0; i < 10; i++){ + if((s&Scip) == 0) + return 0; + delay(1); + s = f->reg[Fsts]; + } + return -1; +} + +static int +fread(Ctlr *c, Flash *f, int ladr) +{ + u16int s; + + delay(1); + if(fcycle(c, f) == -1) + return -1; + f->reg[Fsts] |= Fdone; + f->reg32[Faddr] = ladr; + + /* setup flash control register */ + s = f->reg[Fctl]; + s &= ~(0x1f << 8); + s |= (2-1) << 8; /* 2 bytes */ + s &= ~(2*Flcycle); /* read */ + f->reg[Fctl] = s | Fgo; + + while((f->reg[Fsts] & Fdone) == 0) + ; + if(f->reg[Fsts] & (Fcerr|Ael)) + return -1; + return f->reg32[Fdata] & 0xffff; +} + +static int +fload(Ctlr *c) +{ + u32int data, io, r, adr; + u16int sum; + Flash f; + + io = c->pcidev->mem[1].bar & ~0x0f; + f.reg = vmap(io, c->pcidev->mem[1].size); + if(f.reg == nil) + return -1; + f.reg32 = (void*)f.reg; + f.sz = f.reg32[Bfpr]; + r = f.sz & 0x1fff; + if(csr32r(c, Eec) & 1<<22){ + if(c->type == i82579) + r += 16; /* sector size: 64k */ + else + r += 1; /* sector size: 4k */ + } + r <<= 12; + sum = 0; + for (adr = 0; adr < 0x40; adr++) { + data = fread(c, &f, r + adr*2); + if(data == -1) + return -1; + c->eeprom[adr] = data; + sum += data; + } + vunmap(f.reg, c->pcidev->mem[1].size); + return sum; +} + +static void +defaultea(Ctlr *ctlr, uchar *ra) +{ + uint i, r; + uvlong u; + static uchar nilea[Eaddrlen]; + + if(memcmp(ra, nilea, Eaddrlen) != 0) + return; + if(cttab[ctlr->type].flag & Fflashea){ + /* intel mb bug */ + u = (uvlong)csr32r(ctlr, Rah)<<32u | (u32int)csr32r(ctlr, Ral); + for(i = 0; i < Eaddrlen; i++) + ra[i] = u >> 8*i; + } + if(memcmp(ra, nilea, Eaddrlen) != 0) + return; + for(i = 0; i < Eaddrlen/2; i++){ + ra[2*i] = ctlr->eeprom[Ea+i]; + ra[2*i+1] = ctlr->eeprom[Ea+i] >> 8; + } + r = (csr32r(ctlr, Status) & Lanid) >> 2; + ra[5] += r; /* ea ctlr[n] = ea ctlr[0]+n */ +} + +static int +i82563reset(Ctlr *ctlr) +{ + uchar *ra; + int i, r; + + if(i82563detach(ctlr)) + return -1; + if(cttab[ctlr->type].flag & Fload) + r = fload(ctlr); + else + r = eeload(ctlr); + if(r != 0 && r != 0xBABA){ + print("%s: bad EEPROM checksum - %#.4ux\n", + cname(ctlr), r); + return -1; + } + + ra = ctlr->ra; + defaultea(ctlr, ra); + r = ctlr->ra[3]<<24 | ctlr->ra[2]<<16 | ctlr->ra[1]<<8 | ctlr->ra[0]; + csr32w(ctlr, Ral, r); + r = 0x80000000 | ctlr->ra[5]<<8 | ctlr->ra[4]; + csr32w(ctlr, Rah, r); + for(i = 1; i < 16; i++){ + csr32w(ctlr, Ral+i*8, 0); + csr32w(ctlr, Rah+i*8, 0); + } + memset(ctlr->mta, 0, sizeof(ctlr->mta)); + for(i = 0; i < 128; i++) + csr32w(ctlr, Mta + i*4, 0); + /* + * Does autonegotiation affect this manual setting? + * The correct values here should depend on the PBA value + * and maximum frame length, no? + * ctlr->fcrt[lh] are never set, so default to 0. + */ + csr32w(ctlr, Fcal, 0x00C28001); + csr32w(ctlr, Fcah, 0x0100); + if(ctlr->type != i82579) + csr32w(ctlr, Fct, 0x8808); + csr32w(ctlr, Fcttv, 0x0100); + + ctlr->fcrtl = ctlr->fcrth = 0; + // ctlr->fcrtl = 0x00002000; + // ctlr->fcrth = 0x00004000; + csr32w(ctlr, Fcrtl, ctlr->fcrtl); + csr32w(ctlr, Fcrth, ctlr->fcrth); + if(cttab[ctlr->type].flag & F75) + csr32w(ctlr, Eitr, 128<<2); /* 128 ¼ microsecond intervals */ + return 0; +} + +enum { + CMrdtr, + CMradv, + CMpause, + CMan, +}; + +static Cmdtab i82563ctlmsg[] = { + CMrdtr, "rdtr", 2, + CMradv, "radv", 2, + CMpause, "pause", 1, + CMan, "an", 1, +}; + +static long +i82563ctl(Ether *edev, void *buf, long n) +{ + char *p; + u32int v; + Ctlr *ctlr; + Cmdbuf *cb; + Cmdtab *ct; + + if((ctlr = edev->ctlr) == nil) + error(Enonexist); + + cb = parsecmd(buf, n); + if(waserror()){ + free(cb); + nexterror(); + } + + ct = lookupcmd(cb, i82563ctlmsg, nelem(i82563ctlmsg)); + switch(ct->index){ + case CMrdtr: + v = strtoul(cb->f[1], &p, 0); + if(*p || v > 0xffff) + error(Ebadarg); + ctlr->rdtr = v; + csr32w(ctlr, Rdtr, v); + break; + case CMradv: + v = strtoul(cb->f[1], &p, 0); + if(*p || v > 0xffff) + error(Ebadarg); + ctlr->radv = v; + csr32w(ctlr, Radv, v); + break; + case CMpause: + csr32w(ctlr, Ctrl, csr32r(ctlr, Ctrl) ^ (1<<27 | 1<<28)); + break; + case CMan: + csr32w(ctlr, Ctrl, csr32r(ctlr, Ctrl) | Lrst | Phyrst); + break; + } + free(cb); + poperror(); + + return n; +} + +static int +didtype(int d) +{ + switch(d){ + case 0x1096: + case 0x10ba: /* “gilgal” */ + // case 0x1098: /* serdes; not seen */ + // case 0x10bb: /* serdes */ + return i82563; + case 0x1049: /* mm */ + case 0x104a: /* dm */ + case 0x104b: /* dc */ + case 0x104d: /* v “ninevah” */ + case 0x10bd: /* dm-2 */ + case 0x294c: /* ich 9 */ + return i82566; + case 0x10de: /* lm ich10d */ + case 0x10df: /* lf ich10 */ + case 0x10e5: /* lm ich9 */ + case 0x10f5: /* lm ich9m; “boazman” */ + return i82567; + case 0x10bf: /* lf ich9m */ + case 0x10cb: /* v ich9m */ + case 0x10cd: /* lf ich10 */ + case 0x10ce: /* v ich10 */ + case 0x10cc: /* lm ich10 */ + return i82567m; + case 0x105e: /* eb */ + case 0x105f: /* eb */ + case 0x1060: /* eb */ + case 0x10a4: /* eb */ + case 0x10a5: /* eb fiber */ + case 0x10bc: /* eb */ + case 0x10d9: /* eb serdes */ + case 0x10da: /* eb serdes “ophir” */ + return i82571; + case 0x107d: /* eb copper */ + case 0x107e: /* ei fiber */ + case 0x107f: /* ei */ + case 0x10b9: /* ei “rimon” */ + return i82572; + case 0x108b: /* e “vidalia” */ + case 0x108c: /* e (iamt) */ + case 0x109a: /* l “tekoa” */ + return i82573; + case 0x10d3: /* l or it; “hartwell” */ + return i82574; + case 0x10a7: + case 0x10a9: /* fiber/serdes */ + return i82575; + case 0x10c9: /* copper */ + case 0x10e6: /* fiber */ + case 0x10e7: /* serdes; “kawela” */ + return i82576; + case 0x10ea: /* lc “calpella”; aka pch lan */ + return i82577; + case 0x10eb: /* lm “calpella” */ + return i82577m; + case 0x10ef: /* dc “piketon” */ + return i82578; + case 0x1502: /* lm */ + case 0x1503: /* v */ + return i82579; + case 0x10f0: /* dm “king's creek” */ + return i82578m; + case 0x150e: /* “barton hills” */ + case 0x150f: /* fiber */ + case 0x1510: /* backplane */ + case 0x1511: /* sfp */ + case 0x1516: + return i82580; + case 0x1506: /* v */ + return i82583; + } + return -1; +} + +static void +hbafixup(Pcidev *p) +{ + uint i; + + i = pcicfgr32(p, PciSVID); + if((i & 0xffff) == 0x1b52 && p->did == 1) + p->did = i>>16; +} + +static int +setup(Ctlr *ctlr) +{ + Pcidev *p; + + p = ctlr->pcidev; + ctlr->nic = vmap(ctlr->port, p->mem[0].size); + if(ctlr->nic == nil){ + print("%s: can't map %#llud\n", cname(ctlr), ctlr->port); + return -1; + } + if(i82563reset(ctlr)){ + vunmap(ctlr->nic, p->mem[0].size); + return -1; + } + pcisetbme(ctlr->pcidev); + return 0; +} + +static void +i82563pci(void) +{ + int type; + u32int io; + Ctlr *ctlr; + Pcidev *p; + + p = nil; + while(p = pcimatch(p, 0x8086, 0)){ + hbafixup(p); + if((type = didtype(p->did)) == -1) + continue; + ctlr = malloc(sizeof(Ctlr)); + if(ctlr == nil) + error(Enomem); + ctlr->type = type; + ctlr->pcidev = p; + ctlr->rbsz = cttab[type].mtu; + io = p->mem[0].bar & ~0x0F; + ctlr->port = io; + if(i82563ctlrhead != nil) + i82563ctlrtail->next = ctlr; + else + i82563ctlrhead = ctlr; + i82563ctlrtail = ctlr; + } +} + + +static int +pnp(Ether* edev, int type) +{ + Ctlr *ctlr; + static int done; + + if(!done) { + i82563pci(); + done = 1; + } + + /* + * Any adapter matches if no edev->port is supplied, + * otherwise the ports must match. + */ + for(ctlr = i82563ctlrhead; ; ctlr = ctlr->next){ + if(ctlr == nil) + return -1; + if(ctlr->active) + continue; + if(type != Iany && ctlr->type != type) + continue; + if(edev->port == 0 || edev->port == ctlr->port){ + ctlr->active = 1; + memmove(ctlr->ra, edev->ea, Eaddrlen); + if(setup(ctlr) == 0) + break; + } + } + + edev->ctlr = ctlr; + ctlr->edev = edev; /* point back to Ether* */ + edev->port = ctlr->port; + edev->irq = ctlr->pcidev->intl; + edev->tbdf = ctlr->pcidev->tbdf; + edev->mbps = 1000; + edev->maxmtu = ctlr->rbsz; + memmove(edev->ea, ctlr->ra, Eaddrlen); + + /* + * Linkage to the generic ethernet driver. + */ + edev->attach = i82563attach; + edev->transmit = i82563transmit; + edev->interrupt = (ctlr->type == i82575? + i82575interrupt: i82563interrupt); + edev->ifstat = i82563ifstat; + edev->ctl = i82563ctl; + + edev->arg = edev; + edev->promiscuous = i82563promiscuous; + edev->shutdown = i82563shutdown; + edev->multicast = i82563multicast; + + return 0; +} + +static int +anypnp(Ether *e) +{ + return pnp(e, Iany); +} + +static int +i82563pnp(Ether *e) +{ + return pnp(e, i82563); +} + +static int +i82566pnp(Ether *e) +{ + return pnp(e, i82566); +} + +static int +i82567pnp(Ether *e) +{ + return pnp(e, i82567m) & pnp(e, i82567); +} + +static int +i82571pnp(Ether *e) +{ + return pnp(e, i82571); +} + +static int +i82572pnp(Ether *e) +{ + return pnp(e, i82572); +} + +static int +i82573pnp(Ether *e) +{ + return pnp(e, i82573); +} + +static int +i82574pnp(Ether *e) +{ + return pnp(e, i82574); +} + +static int +i82575pnp(Ether *e) +{ + return pnp(e, i82575); +} + +static int +i82576pnp(Ether *e) +{ + return pnp(e, i82576); +} + +static int +i82577pnp(Ether *e) +{ + return pnp(e, i82577m) & pnp(e, i82577); +} + +static int +i82578pnp(Ether *e) +{ + return pnp(e, i82578m) & pnp(e, i82578); +} + +static int +i82579pnp(Ether *e) +{ + return pnp(e, i82579); +} + +static int +i82580pnp(Ether *e) +{ + return pnp(e, i82580); +} + +static int +i82583pnp(Ether *e) +{ + return pnp(e, i82583); +} + +void +ether82563link(void) +{ + /* + * recognise lots of model numbers for debugging + * also good for forcing onboard nic(s) as ether0 + * try to make that unnecessary by listing lom first. + */ + addethercard("i82563", i82563pnp); + addethercard("i82566", i82566pnp); + addethercard("i82574", i82574pnp); + addethercard("i82576", i82576pnp); + addethercard("i82567", i82567pnp); + addethercard("i82573", i82573pnp); + + addethercard("i82571", i82571pnp); + addethercard("i82572", i82572pnp); + addethercard("i82575", i82575pnp); + addethercard("i82577", i82577pnp); + addethercard("i82578", i82578pnp); + addethercard("i82579", i82579pnp); + addethercard("i82580", i82580pnp); + addethercard("i82583", i82583pnp); + addethercard("igbepcie", anypnp); +} diff -Nru 0/sys/src/nix/k10/etherbcm.c 4/sys/src/nix/k10/etherbcm.c --- 0/sys/src/nix/k10/etherbcm.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/etherbcm.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,886 @@ +/* + * Broadcom BCM57xx + * Not implemented: + * proper fatal error handling + * multiple rings + * checksum offloading + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/netif.h" + +#include "etherif.h" +#include "../port/ethermii.h" + +#define dprint(...) do{ if(debug)print(__VA_ARGS__); }while(0) +#define Rbsz ROUNDUP(sizeof(Etherpkt)+4, 4) + +typedef struct Ctlr Ctlr; +struct Ctlr { + Lock txlock, imlock; + Ether *ether; + Ctlr *next; + Pcidev *pdev; + u32int *nic, *status; + + u32int *recvret, *recvprod, *sendr; + ulong port; + uint recvreti, recvprodi, sendri, sendcleani; + Block **sends; + Block **rxs; + int active, duplex; + int type; + + uint nobuf; + uint partial; + uint rxerr; + uint qfull; + uint dmaerr; +}; + +enum { + /* configurable constants */ + RxRetRingLen = 0x200, + RxProdRingLen = 0x200, + SendRingLen = 0x200, + + Reset = 1<<0, + Enable = 1<<1, + Attn = 1<<2, + + Pwrctlstat = 0x4C, + + MiscHostCtl = 0x68, + TaggedStatus = 1<<9, + IndirAccessEn = 1<<7, + EnableClockCtl = 1<<5, + PCIStateRegEn = 1<<4, + WordSwap = 1<<3, + ByteSwap = 1<<2, + MaskPCIInt = 1<<1, + ClearIntA = 1<<0, + + Fwmbox = 0x0b50, /* magic value exchange */ + Fwmagic = 0x4b657654, + + Dmarwctl = 0x6C, + DMAWaterMask = ~(7<<19), + DMAWaterValue = 3<<19, + + Memwind = 0x7C, + MemwindData = 0x84, + + SendRCB = 0x100, + RxRetRCB = 0x200, + + InterruptMailbox = 0x204, + + RxProdBDRingIdx = 0x26c, + RxBDRetRingIdx = 0x284, + SendBDRingHostIdx = 0x304, + + MACMode = 0x400, + MACPortMask = ~(1<<3 | 1<<2), + MACPortGMII = 1<<3, + MACPortMII = 1<<2, + MACEnable = 1<<23 | 1<<22 | 1<<21 | 1 << 15 | 1 << 14 | 1<<12 | 1<<11, + MACHalfDuplex = 1<<1, + + MACEventStatus = 0x404, + MACEventEnable = 0x408, + MACAddress = 0x410, + RandomBackoff = 0x438, + RxMTU = 0x43C, + MIComm = 0x44C, + MIStatus = 0x450, + MIMode = 0x454, + RxMACMode = 0x468, + TxMACMode = 0x45C, + TxMACLengths = 0x464, + MACHash = 0x470, + RxRules = 0x480, + + RxRulesConf = 0x500, + LowWaterMax = 0x504, + LowWaterMaxMask = ~0xFFFF, + LowWaterMaxValue = 2, + + SendDataInitiatorMode = 0xC00, + SendInitiatorConf = 0x0C08, + SendStats = 1<<0, + SendInitiatorMask = 0x0C0C, + + SendDataCompletionMode = 0x1000, + SendBDSelectorMode = 0x1400, + SendBDInitiatorMode = 0x1800, + SendBDCompletionMode = 0x1C00, + + RxListPlacementMode = 0x2000, + RxListPlacement = 0x2010, + RxListPlacementConf = 0x2014, + RxStats = 1<<0, + RxListPlacementMask = 0x2018, + + RxDataBDInitiatorMode = 0x2400, + RxBDHostAddr = 0x2450, + RxBDFlags = 0x2458, + RxBDNIC = 0x245C, + RxDataCompletionMode = 0x2800, + RxBDInitiatorMode = 0x2C00, + RxBDRepl = 0x2C18, + + RxBDCompletionMode = 0x3000, + HostCoalMode = 0x3C00, + HostCoalRxTicks = 0x3C08, + HostCoalSendTicks = 0x3C0C, + RxMaxCoalFrames = 0x3C10, + SendMaxCoalFrames = 0x3C14, + RxMaxCoalFramesInt = 0x3C20, + SendMaxCoalFramesInt = 0x3C24, + StatusBlockHostAddr = 0x3C38, + FlowAttention = 0x3C48, + + MemArbiterMode = 0x4000, + + BufferManMode = 0x4400, + + MBUFLowWater = 0x4414, + MBUFHighWater = 0x4418, + + ReadDMAMode = 0x4800, + ReadDMAStatus = 0x4804, + WriteDMAMode = 0x4C00, + WriteDMAStatus = 0x4C04, + + RISCState = 0x5004, + FTQReset = 0x5C00, + MSIMode = 0x6000, + + ModeControl = 0x6800, + ByteWordSwap = 1<<4 | 1<<5 | 1<<2, // | 1<<1, + HostStackUp = 1<<16, + HostSendBDs = 1<<17, + InterruptOnMAC = 1<<26, + + MiscConf = 0x6804, + CoreClockBlocksReset = 1<<0, + GPHYPwrdnOverride = 1<<26, + DisableGRCRstOnPpcie = 1<<29, + TimerMask = ~0xFF, + TimerValue = 65<<1, + MiscLocalControl = 0x6808, + InterruptOnAttn = 1<<3, + AutoSEEPROM = 1<<24, + + SwArbitration = 0x7020, + SwArbitSet1 = 1<<1, + SwArbitWon1 = 1<<9, + Pcitlplpl = 0x7C00, /* "lower 1k of the pcie pl regs" ?? */ + + PhyAuxControl = 0x18, + PhyIntStatus = 0x1A, + PhyIntMask = 0x1B, + + Updated = 1<<0, + LinkStateChange = 1<<1, + Error = 1<<2, + + PacketEnd = 1<<2, + FrameError = 1<<10, +}; + +enum { + b5722, + b5751, + b5754, + b5755, + b5756, + b5782, + b5787, + b5906, + Nctlrtype, +}; + +typedef struct Ctlrtype Ctlrtype; +struct Ctlrtype { + int mtu; + int flag; + char *name; +}; + +static Ctlrtype cttab[Nctlrtype] = { +[b5722] 1514, 0, "b5722", +[b5751] 1514, 0, "b5751", +[b5754] 1514, 0, "b5754", +[b5755] 1514, 0, "b5755", +[b5756] 1514, 0, "b5756", +[b5782] 1514, 0, "b5782", +[b5787] 1514, 0, "b5787", +[b5906] 1514, 0, "b5906", +}; + +#define csr32(c, r) ((c)->nic[(r)/4]) + +static Ctlr *bcmhead; +static int debug=1; + +static char* +cname(Ctlr *c) +{ + return cttab[c->type].name; +} + +static long +bcmifstat(Ether *edev, void *a, long n, ulong offset) +{ + char *s, *p, *e; + Ctlr *c; + + c = edev->ctlr; + p = s = malloc(READSTR); + e = p + READSTR; + + p = seprint(p, e, "nobuf %ud\n", c->nobuf); + p = seprint(p, e, "partial %ud\n", c->partial); + p = seprint(p, e, "rxerr %ud\n", c->rxerr); + p = seprint(p, e, "qfull %ud\n", c->qfull); + p = seprint(p, e, "dmaerr %ud\n", c->dmaerr); + p = seprint(p, e, "type: %s\n", cname(c)); + + USED(p); + n = readstr(offset, a, n, s); + free(s); + + return n; +} + +enum { + Phybusy = 1<<29, + Phyrdfail = 1<<28, + Phyrd = 1<<27, + Phywr = 1<<26, +}; +Lock miilock; + +static uint +miiwait(Ctlr *ctlr) +{ + uint i, v; + + for(i = 0; i < 100; i += 5){ + microdelay(10); + v = csr32(ctlr, MIComm); + if((v & Phybusy) == 0){ + microdelay(5); + return csr32(ctlr, MIComm); + } + microdelay(5); + } + print("#l%d: bcm: miiwait: timeout\n", ctlr->ether->ctlrno); + return ~0; +} + +static int +miir(Ctlr *ctlr, int r) +{ + uint v, phyno; + + phyno = 1; + lock(&miilock); + csr32(ctlr, MIComm) = r<<16 | phyno<<21 | Phyrd | Phybusy; + v = miiwait(ctlr); + unlock(&miilock); + if(v == ~0) + return -1; + if(v & Phyrdfail){ + print("#l%d: bcm: miir: fail\n", ctlr->ether->ctlrno); + return -1; + } + return v & 0xffff; +} + +static int +miiw(Ctlr *ctlr, int r, int v) +{ + uint phyno, w; + + phyno = 1; + lock(&miilock); + csr32(ctlr, MIComm) = r<<16 | v&0xffff | phyno<<21 | Phywr | Phybusy; + w = miiwait(ctlr); + unlock(&miilock); + if(w == ~0) + return -1; + return 0; +} + +static void +checklink(Ether *edev) +{ + uint i; + Ctlr *ctlr; + + ctlr = edev->ctlr; + miir(ctlr, Bmsr); /* read twice for current status as per 802.3 */ + if(!(miir(ctlr, Bmsr) & BmsrLs)) { + edev->link = 0; + edev->mbps = 1000; + ctlr->duplex = 1; + dprint("bcm: no link\n"); + goto out; + } + edev->link = 1; + while((miir(ctlr, Bmsr) & BmsrAnc) == 0) + ; + i = miir(ctlr, Mssr); + if(i & (Mssr1000THD | Mssr1000TFD)) { + edev->mbps = 1000; + ctlr->duplex = (i & Mssr1000TFD) != 0; + } else if(i = miir(ctlr, Anlpar), i & (AnaTXHD | AnaTXFD)) { + edev->mbps = 100; + ctlr->duplex = (i & AnaTXFD) != 0; + } else if(i & (Ana10HD | Ana10FD)) { + edev->mbps = 10; + ctlr->duplex = (i & Ana10FD) != 0; + } else { + edev->link = 0; + edev->mbps = 1000; + ctlr->duplex = 1; + dprint("bcm: link partner supports neither 10/100/1000 Mbps\n"); + goto out; + } + dprint("bcm: %d Mbps link, %s duplex\n", edev->mbps, ctlr->duplex ? "full" : "half"); +out: + if(ctlr->duplex) + csr32(ctlr, MACMode) &= ~MACHalfDuplex; + else + csr32(ctlr, MACMode) |= MACHalfDuplex; + if(edev->mbps >= 1000) + csr32(ctlr, MACMode) = (csr32(ctlr, MACMode) & MACPortMask) | MACPortGMII; + else + csr32(ctlr, MACMode) = (csr32(ctlr, MACMode) & MACPortMask) | MACPortMII; + csr32(ctlr, MACEventStatus) |= (1<<4) | (1<<3); /* undocumented bits (sync and config changed) */ +} + +static uint* +currentrecvret(Ctlr *ctlr) +{ + if(ctlr->recvreti == (ctlr->status[4] & 0xFFFF)) + return 0; + return ctlr->recvret + ctlr->recvreti * 8; +} + +static void +consumerecvret(Ctlr *ctlr) +{ + ctlr->recvreti = ctlr->recvreti+1 & RxRetRingLen-1; + csr32(ctlr, RxBDRetRingIdx) = ctlr->recvreti; +} + +static int +replenish(Ctlr *ctlr) +{ + uint incr; + u32int *next; + Block *bp; + + incr = (ctlr->recvprodi + 1) & (RxProdRingLen - 1); + if(incr == (ctlr->status[2] >> 16)) + return -1; + bp = iallocb(Rbsz); + if(bp == nil) { + /* iallocb never fails. this code is unnecessary */ + dprint("bcm: out of memory for receive buffers\n"); + ctlr->nobuf++; + return -1; + } + next = ctlr->recvprod + ctlr->recvprodi * 8; + memset(next, 0, 32); + next[0] = Pciwaddrh(bp->rp); + next[1] = Pciwaddrl(bp->rp); + next[2] = Rbsz; + next[7] = ctlr->recvprodi; + ctlr->rxs[ctlr->recvprodi] = bp; + coherence(); + csr32(ctlr, RxProdBDRingIdx) = ctlr->recvprodi = incr; + return 0; +} + +static void +bcmreceive(Ether *edev) +{ + uint len; + u32int *pkt; + Ctlr *ctlr; + Block *bp; + + ctlr = edev->ctlr; + for(; pkt = currentrecvret(ctlr); replenish(ctlr), consumerecvret(ctlr)) { + bp = ctlr->rxs[pkt[7]]; + len = pkt[2] & 0xFFFF; + bp->wp = bp->rp + len; + if((pkt[3] & PacketEnd) == 0){ + dprint("bcm: partial frame received -- shouldn't happen\n"); + ctlr->partial++; + freeb(bp); + continue; + } + if(pkt[3] & FrameError){ + ctlr->rxerr++; + freeb(bp); + continue; + } + etheriq(edev, bp, 1); + } +} + +static void +bcmtransclean(Ether *edev) +{ + Ctlr *ctlr; + + ctlr = edev->ctlr; + ilock(&ctlr->txlock); + while(ctlr->sendcleani != (ctlr->status[4] >> 16)) { + freeb(ctlr->sends[ctlr->sendcleani]); + ctlr->sends[ctlr->sendcleani] = nil; + ctlr->sendcleani = (ctlr->sendcleani + 1) & (SendRingLen - 1); + } + iunlock(&ctlr->txlock); +} + +static void +bcmtransmit(Ether *edev) +{ + uint incr; + u32int *next; + Ctlr *ctlr; + Block *bp; + + ctlr = edev->ctlr; + ilock(&ctlr->txlock); + for(;;){ + incr = (ctlr->sendri + 1) & (SendRingLen - 1); + if(incr == ctlr->sendcleani) { + dprint("bcm: send queue full\n"); + ctlr->qfull++; + break; + } + bp = qget(edev->oq); + if(bp == nil) + break; + next = ctlr->sendr + ctlr->sendri * 4; + next[0] = Pciwaddrh(bp->rp); + next[1] = Pciwaddrl(bp->rp); + next[2] = (BLEN(bp) << 16) | PacketEnd; + next[3] = 0; + ctlr->sends[ctlr->sendri] = bp; + coherence(); + csr32(ctlr, SendBDRingHostIdx) = ctlr->sendri = incr; + } + iunlock(&ctlr->txlock); +} + +static void +bcmerror(Ether *edev) +{ + Ctlr *ctlr; + + ctlr = edev->ctlr; + if(csr32(ctlr, FlowAttention)) { + if(csr32(ctlr, FlowAttention) & 0xf8ff8080) + print("bcm: fatal error %#.8ux", csr32(ctlr, FlowAttention)); + csr32(ctlr, FlowAttention) = 0; + } + csr32(ctlr, MACEventStatus) = 0; /* worth ignoring */ + if(csr32(ctlr, ReadDMAStatus) || csr32(ctlr, WriteDMAStatus)) { + dprint("bcm: DMA error\n"); + ctlr->dmaerr++; + csr32(ctlr, ReadDMAStatus) = 0; + csr32(ctlr, WriteDMAStatus) = 0; + } + if(csr32(ctlr, RISCState)) { + if(csr32(ctlr, RISCState) & 0x78000403) + print("bcm: RISC halted %#.8ux", csr32(ctlr, RISCState)); + csr32(ctlr, RISCState) = 0; + } +} + +static void +bcminterrupt(Ureg*, void *arg) +{ + u32int status, tag, dummy; + Ether *edev; + Ctlr *ctlr; + + edev = arg; + ctlr = edev->ctlr; + ilock(&ctlr->imlock); + dummy = csr32(ctlr, InterruptMailbox); + USED(dummy); + csr32(ctlr, InterruptMailbox) = 1; + status = ctlr->status[0]; + tag = ctlr->status[1]; + ctlr->status[0] = 0; + if(status & Error) + bcmerror(edev); + if(status & LinkStateChange) + checklink(edev); + if(0) + iprint("bcm: interrupt %.8ux %.8ux\n", ctlr->status[2], ctlr->status[4]); + bcmreceive(edev); + bcmtransclean(edev); + bcmtransmit(edev); + csr32(ctlr, InterruptMailbox) = tag << 24; + iunlock(&ctlr->imlock); +} + +static void +mem32w(Ctlr *c, uint r, uint v) +{ + pcicfgw32(c->pdev, Memwind, r); + pcicfgw32(c->pdev, MemwindData, v); +} + +static u32int +mem32r(Ctlr *c, uint r) +{ + u32int v; + + pcicfgw32(c->pdev, Memwind, r); + v = pcicfgr32(c->pdev, MemwindData); + pcicfgw32(c->pdev, Memwind, 0); + return v; +} + +static int +bcmµwait(Ctlr *ctlr, uint to, uint r, uint m, uint v) +{ + int i; + + for(i = 0;; i += 100){ + if((csr32(ctlr, r) & m) == v) + return 0; + if(i == to /* µs */) + return -1; + microdelay(100); + } +} + +static int +bcminit(Ether *edev) +{ + uint i; + u32int j; + Ctlr *ctlr; + + ctlr = edev->ctlr; + dprint("bcm: reset\n"); + /* initialization procedure according to the datasheet */ + csr32(ctlr, MiscHostCtl) |= MaskPCIInt | ClearIntA | WordSwap | IndirAccessEn; + csr32(ctlr, SwArbitration) |= SwArbitSet1; + if(bcmµwait(ctlr, 2000, SwArbitration, SwArbitWon1, SwArbitWon1) == -1){ + print("bcm: arbiter failed to respond\n"); + return -1; + } + csr32(ctlr, MemArbiterMode) |= Enable; + csr32(ctlr, MiscHostCtl) = WordSwap | IndirAccessEn | PCIStateRegEn | EnableClockCtl + | MaskPCIInt | ClearIntA; + csr32(ctlr, Memwind) = 0; + mem32w(ctlr, Fwmbox, Fwmagic); + csr32(ctlr, MiscConf) |= GPHYPwrdnOverride | DisableGRCRstOnPpcie | CoreClockBlocksReset; + delay(100); + pcicfgw32(ctlr->pdev, PciPCR, ctlr->pdev->pcr); /* restore pci bits lost */ + csr32(ctlr, MiscHostCtl) |= MaskPCIInt | ClearIntA; + csr32(ctlr, MemArbiterMode) |= Enable; + csr32(ctlr, MiscHostCtl) |= WordSwap | IndirAccessEn | PCIStateRegEn | EnableClockCtl | TaggedStatus; + csr32(ctlr, ModeControl) |= ByteWordSwap; + csr32(ctlr, MACMode) = (csr32(ctlr, MACMode) & MACPortMask) | MACPortGMII; + delay(40); + for(i = 0;; i += 100){ + if(mem32r(ctlr, Fwmbox) == ~Fwmagic) + break; + if(i == 20*10000 /* µs */){ + print("bcm: fw failed to respond %#.8ux\n", mem32r(ctlr, Fwmbox)); + break; //return -1; + } + microdelay(100); + } + /* + * there appears to be no justification for setting these bits in any driver + * i can find. nor to i have a datasheet that recommends this. - quanstro + * csr32(ctlr, Pcitlplpl) |= 1<<25 | 1<<29; + */ + memset(ctlr->status, 0, 20); + csr32(ctlr, Dmarwctl) = (csr32(ctlr, Dmarwctl) & DMAWaterMask) | DMAWaterValue; + csr32(ctlr, ModeControl) |= HostSendBDs | HostStackUp | InterruptOnMAC; + csr32(ctlr, MiscConf) = (csr32(ctlr, MiscConf) & TimerMask) | TimerValue; + csr32(ctlr, MBUFLowWater) = 0x20; + csr32(ctlr, MBUFHighWater) = 0x60; + csr32(ctlr, LowWaterMax) = (csr32(ctlr, LowWaterMax) & LowWaterMaxMask) | LowWaterMaxValue; + csr32(ctlr, BufferManMode) |= Enable | Attn; + if(bcmµwait(ctlr, 2000, BufferManMode, Enable, Enable) == -1){ + print("bcm: failed to enable buffers\n"); + return -1; + } + csr32(ctlr, FTQReset) = ~0; + csr32(ctlr, FTQReset) = 0; + if(bcmµwait(ctlr, 2000, FTQReset, ~0, 0) == -1){ + print("bcm: failed to bring ftq out of reset\n"); + return -1; + } + csr32(ctlr, RxBDHostAddr) = Pciwaddrh(ctlr->recvprod); + csr32(ctlr, RxBDHostAddr + 4) = Pciwaddrl(ctlr->recvprod); + csr32(ctlr, RxBDFlags) = RxProdRingLen << 16; + csr32(ctlr, RxBDNIC) = 0x6000; + csr32(ctlr, RxBDRepl) = 25; + csr32(ctlr, SendBDRingHostIdx) = 0; + csr32(ctlr, SendBDRingHostIdx+4) = 0; + mem32w(ctlr, SendRCB, Pciwaddrh(ctlr->sendr)); + mem32w(ctlr, SendRCB + 4, Pciwaddrl(ctlr->sendr)); + mem32w(ctlr, SendRCB + 8, SendRingLen << 16); + mem32w(ctlr, SendRCB + 12, 0x4000); + for(i=1; i<4; i++) + mem32w(ctlr, RxRetRCB + i * 0x10 + 8, 2); + mem32w(ctlr, RxRetRCB, Pciwaddrh(ctlr->recvret)); + mem32w(ctlr, RxRetRCB + 4, Pciwaddrl(ctlr->recvret)); + mem32w(ctlr, RxRetRCB + 8, RxRetRingLen << 16); + csr32(ctlr, RxProdBDRingIdx) = 0; + csr32(ctlr, RxProdBDRingIdx+4) = 0; + /* this delay is not in the datasheet, but necessary */ + delay(1); + i = csr32(ctlr, MACAddress); + j = edev->ea[0] = i >> 8; + j += edev->ea[1] = i; + i = csr32(ctlr, MACAddress + 4); + j += edev->ea[2] = i >> 24; + j += edev->ea[3] = i >> 16; + j += edev->ea[4] = i >> 8; + j += edev->ea[5] = i; + csr32(ctlr, RandomBackoff) = j & 0x3FF; + csr32(ctlr, RxMTU) = Rbsz; + csr32(ctlr, TxMACLengths) = 0x2620; + csr32(ctlr, RxListPlacement) = 1<<3; /* one list */ + csr32(ctlr, RxListPlacementMask) = 0xFFFFFF; + csr32(ctlr, RxListPlacementConf) |= RxStats; + csr32(ctlr, SendInitiatorMask) = 0xFFFFFF; + csr32(ctlr, SendInitiatorConf) |= SendStats; + csr32(ctlr, HostCoalMode) = 0; + if(bcmµwait(ctlr, 2000, HostCoalMode, ~0, 0) == -1){ + print("bcm: failed to unset coalescing\n"); + return -1; + } + csr32(ctlr, HostCoalRxTicks) = 150; + csr32(ctlr, HostCoalSendTicks) = 150; + csr32(ctlr, RxMaxCoalFrames) = 10; + csr32(ctlr, SendMaxCoalFrames) = 10; + csr32(ctlr, RxMaxCoalFramesInt) = 0; + csr32(ctlr, SendMaxCoalFramesInt) = 0; + csr32(ctlr, StatusBlockHostAddr) = Pciwaddrh(ctlr->status); + csr32(ctlr, StatusBlockHostAddr + 4) = Pciwaddrl(ctlr->status); + csr32(ctlr, HostCoalMode) |= Enable; + csr32(ctlr, RxBDCompletionMode) |= Enable | Attn; + csr32(ctlr, RxListPlacementMode) |= Enable; + csr32(ctlr, MACMode) |= MACEnable; + csr32(ctlr, MiscLocalControl) |= InterruptOnAttn | AutoSEEPROM; + csr32(ctlr, InterruptMailbox) = 0; + csr32(ctlr, WriteDMAMode) |= 0x200003fe; /* pulled out of my nose */ + csr32(ctlr, ReadDMAMode) |= 0x3fe; + csr32(ctlr, RxDataCompletionMode) |= Enable | Attn; + csr32(ctlr, SendDataCompletionMode) |= Enable; + csr32(ctlr, SendBDCompletionMode) |= Enable | Attn; + csr32(ctlr, RxBDInitiatorMode) |= Enable | Attn; + csr32(ctlr, RxDataBDInitiatorMode) |= Enable | (1<<4); + csr32(ctlr, SendDataInitiatorMode) |= Enable; + csr32(ctlr, SendBDInitiatorMode) |= Enable | Attn; + csr32(ctlr, SendBDSelectorMode) |= Enable | Attn; + ctlr->recvprodi = 0; + while(replenish(ctlr) >= 0) + ; + csr32(ctlr, TxMACMode) |= Enable; + csr32(ctlr, RxMACMode) |= Enable; + csr32(ctlr, Pwrctlstat) &= ~3; + csr32(ctlr, MIStatus) |= 1<<0; + csr32(ctlr, MACEventEnable) = 0; + csr32(ctlr, MACEventStatus) |= (1<<12); + csr32(ctlr, MIMode) = 0xC0000; /* set base mii clock */ + microdelay(40); + + if(0){ + /* bug (ours): can't reset phy without dropping into 100mbit mode */ + miiw(ctlr, Bmcr, BmcrR); + for(i = 0;; i += 100){ + if((miir(ctlr, Bmcr) & BmcrR) == 0) + break; + if(i == 10000 /* µs */){ + print("bcm: phy reset failure\n"); + return -1; + } + microdelay(100); + } + } + miiw(ctlr, Bmcr, BmcrAne | BmcrRan); + + miiw(ctlr, PhyAuxControl, 2); + miir(ctlr, PhyIntStatus); + miir(ctlr, PhyIntStatus); + miiw(ctlr, PhyIntMask, ~(1<<1)); + csr32(ctlr, MACEventEnable) |= 1<<12; + for(i = 0; i < 4; i++) + csr32(ctlr, MACHash + 4*i) = ~0; + for(i = 0; i < 8; i++) + csr32(ctlr, RxRules + 8 * i) = 0; + csr32(ctlr, RxRulesConf) = 1 << 3; + csr32(ctlr, MSIMode) |= Enable; + csr32(ctlr, MiscHostCtl) &= ~(MaskPCIInt | ClearIntA); + dprint("bcm: reset: fin\n"); + return 0; +} + +static int +didtype(Pcidev *p) +{ + if(p->vid != 0x14e4) + return -1; + + switch(p->did){ + default: + return -1; + case 0x165a: /* 5722 gbe */ + return b5722; + case 0x1670: /* ?? */ + return b5751; + case 0x1672: /* 5754m */ + return b5754; + case 0x1673: /* 5755m gbe */ + return b5755; + case 0x1674: /* 5756me gbe */ + return b5756; + case 0x1677: /* 5751 gbe */ + return b5751; + case 0x167a: /* 5754 gbe */ + return b5754; + case 0x167b: /* 5755 gbe */ + return b5755; + case 0x1693: /* 5787m gbe */ + return b5787; + case 0x1696: /* 5782 gbe; steve */ + return b5782; + case 0x169b: /* 5787 gbe */ + return b5787; + case 0x1712: /* 5906 fast */ + case 0x1713: /* 5906m fast */ + return b5906; + case 0x167d: /* 5751m gbe */ + case 0x167e: /* 5751f fast */ + return b5751; + } +} + +static void +bcmpci(void) +{ + int type; + void *mem; + Ctlr *ctlr, **xx; + Pcidev *p; + + xx = &bcmhead; + for(p = nil; p = pcimatch(p, 0, 0); ) { + if(p->ccrb != 2 || p->ccru != 0 || (type = didtype(p)) == -1) + continue; + pcisetbme(p); + pcisetpms(p, 0); + ctlr = malloc(sizeof(Ctlr)); + if(ctlr == nil) + continue; + ctlr->type = type; + ctlr->port = p->mem[0].bar & ~0x0F; + mem = vmap(ctlr->port, p->mem[0].size); + if(mem == nil) { + print("bcm: can't map %#p\n", (uvlong)ctlr->port); + free(ctlr); + continue; + } + ctlr->pdev = p; + ctlr->nic = mem; + ctlr->status = mallocalign(20, 16, 0, 0); + ctlr->recvprod = mallocalign(32 * RxProdRingLen, 16, 0, 0); + ctlr->recvret = mallocalign(32 * RxRetRingLen, 16, 0, 0); + ctlr->sendr = mallocalign(16 * SendRingLen, 16, 0, 0); + ctlr->sends = malloc(sizeof *ctlr->sends * SendRingLen); + ctlr->rxs = malloc(sizeof *ctlr->sends * SendRingLen); + *xx = ctlr; + xx = &ctlr->next; + } +} + +static void +bcmpromiscuous(void* arg, int on) +{ + Ctlr *ctlr; + + ctlr = ((Ether*)arg)->ctlr; + if(on) + csr32(ctlr, RxMACMode) |= 1<<8; + else + csr32(ctlr, RxMACMode) &= ~(1<<8); +} + +static void +bcmmulticast(void*, uchar*, int) +{ +} + +static int +bcmpnp(Ether* edev) +{ + Ctlr *ctlr; + static int done; + + if(done == 0){ + bcmpci(); + done = 1; + } + +redux: + for(ctlr = bcmhead; ; ctlr = ctlr->next) { + if(ctlr == nil) + return -1; + if(ctlr->active) + continue; + if(edev->port == 0 || edev->port == ctlr->port) { + ctlr->active = 1; + break; + } + } + + ctlr->ether = edev; + edev->ctlr = ctlr; + edev->port = ctlr->port; + edev->irq = ctlr->pdev->intl; + edev->tbdf = ctlr->pdev->tbdf; + edev->interrupt = bcminterrupt; + edev->ifstat = bcmifstat; + edev->transmit = bcmtransmit; + edev->multicast = bcmmulticast; + edev->promiscuous = bcmpromiscuous; + edev->arg = edev; + edev->mbps = 1000; + + if(bcminit(edev) == -1) + goto redux; + return 0; +} + +void +etherbcmlink(void) +{ + addethercard("bcm57xx", bcmpnp); +} diff -Nru 0/sys/src/nix/k10/etherif.h 4/sys/src/nix/k10/etherif.h --- 0/sys/src/nix/k10/etherif.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/etherif.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,51 @@ +enum +{ + Eaddrlen = 6, + ETHERMINTU = 60, /* minimum transmit size */ + ETHERMAXTU = 1514, /* maximum transmit size */ + ETHERHDRSIZE = 14, /* size of an ethernet header */ + + MaxEther = 48, + Ntypes = 8, +}; + +typedef struct Ether Ether; +struct Ether { + ISAConf; /* hardware info */ + + int ctlrno; + int tbdf; /* type+busno+devno+funcno */ + uchar ea[Eaddrlen]; + + void (*attach)(Ether*); /* filled in by reset routine */ + void (*detach)(Ether*); + void (*transmit)(Ether*); + void (*interrupt)(Ureg*, void*); + long (*ifstat)(Ether*, void*, long, ulong); + long (*ctl)(Ether*, void*, long); /* custom ctl messages */ + void (*power)(Ether*, int); /* power on/off */ + void (*shutdown)(Ether*); /* shutdown hardware before reboot */ + void *ctlr; + + int scan[Ntypes]; /* base station scanning interval */ + int nscan; /* number of base station scanners */ + + Netif; +}; + +typedef struct Etherpkt Etherpkt; +struct Etherpkt +{ + uchar d[Eaddrlen]; + uchar s[Eaddrlen]; + uchar type[2]; + uchar data[1500]; +}; + +extern Block* etheriq(Ether*, Block*, int); +extern void addethercard(char*, int(*)(Ether*)); +extern ulong ethercrc(uchar*, int); +extern int parseether(uchar*, char*); + +#define NEXT(x, l) (((x)+1)%(l)) +#define PREV(x, l) (((x) == 0) ? (l)-1: (x)-1) diff -Nru 0/sys/src/nix/k10/fns.h 4/sys/src/nix/k10/fns.h --- 0/sys/src/nix/k10/fns.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/fns.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,243 @@ +#include "../port/portfns.h" +void aamloop(int); +void acidthandlers(void); +void acinit(void); +void acmmuswitch(void); +void acmodeset(int); +int acpiinit(void); +void acquiesce(void); +void acsysret(void); +void actouser(void); +void actrapenable(int, char* (*)(Ureg*, void*), void*, char*); +Dirtab* addarchfile(char*, int, long(*)(Chan*,void*,long,vlong), long(*)(Chan*,void*,long,vlong)); +void archfmtinstall(void); +vlong archhz(void); +void archidle(void); +void archinit(void); +int archmmu(void); +void archreset(void); +uvlong asmalloc(uintmem, uintmem, int, int); +int asmfree(uintmem, uintmem, int); +void asminit(void); +void asmmapinit(uintmem, uintmem, int); +void asmmodinit(u32int, u32int, char*); +void cgaconsputs(char*, int); +void cgainit(void); +void cgapost(int); +int changerole(int, int); +void checkpa(char*, uintmem); +#define clearmmucache() /* x86 doesn't have one */ +void (*coherence)(void); +int corecolor(int); +u32int cpuid(u32int, u32int, u32int[4]); +u64int cr0get(void); +void cr0put(u64int); +u64int cr2get(void); +u64int cr3get(void); +void cr3put(u64int); +u64int cr4get(void); +void cr4put(u64int); +int dbgprint(char*, ...); +int decref(Ref*); +void delay(int); +void dumpmmu(Proc*); +void dumpmmuwalk(uintmem pa); +void dumpptepg(int lvl,uintmem pa); +#define evenaddr(x) /* x86 doesn't care */ +int fpudevprocio(Proc*, void*, long, vlong, int); +void fpuinit(void); +void fpunoted(void); +void fpunotify(Ureg*); +void fpuprocrestore(Proc*); +void fpuprocsave(Proc*); +void fpusysprocsetup(Proc*); +void fpusysrfork(Ureg*); +void fpusysrforkchild(Proc*, Proc*); +void gdtget(void*); +void gdtput(int, u64int, u16int); +Mach* getac(Proc*, int); +char* getconf(char*); +void halt(void); +void hardhalt(void); +int i8042auxcmd(int); +int i8042auxcmds(uchar*, int); +void i8042auxenable(void (*)(int, int)); +void i8042reset(void); +void* i8250alloc(int, int, int); +Uart* i8250console(char*); +vlong i8254hz(u32int[2][4]); +void idlehands(void); +void idthandlers(void); +void idtput(int, u64int); +int inb(int); +int incref(Ref*); +ulong inl(int); +void insb(int, void*, int); +ushort ins(int); +void insl(int, void*, int); +void inss(int, void*, int); +void intrac(Proc*); +int intrdisable(void*); +void* intrenable(int, void (*)(Ureg*, void*), void*, int, char*); +void invlpg(uintptr); +int ioalloc(int, int, int, char*); +void iofree(int); +void ioinit(void); +int ioreserve(int, int, int, char*); +int iounused(int, int); +int iprint(char*, ...); +int isaconfig(char*, int, ISAConf*); +int isbooting(Mach *mp); +int islo(void); +void kbdenable(void); +void kbdinit(void); +void kexit(Ureg*); +#define kmapinval() +void lfence(void); +void links(void); +void mach0init(void); +void machinit(void); +void meminit(void); +void mfence(void); +void mmuflushtlb(uintmem); +void mmuinit(void); +int mmukmapsync(uintmem); +uintptr mmukmap(uintptr, uintmem, usize); +uintmem mmuphysaddr(uintptr); +int mmuwalk(PTE*, uintptr, int, PTE**, uintmem (*)(usize)); +int multiboot(u32int, u32int, int); +void ndnr(void); +void noerrorsleft(void); +uchar nvramread(int); +void nvramwrite(int, uchar); +void optionsinit(char*); +void outb(int, int); +void outl(int, ulong); +void outsb(int, void*, int); +void outs(int, ushort); +void outsl(int, void*, int); +void outss(int, void*, int); +int pcicap(Pcidev*, int); +int pcicfgr16(Pcidev*, int); +int pcicfgr32(Pcidev*, int); +int pcicfgr8(Pcidev*, int); +void pcicfgw16(Pcidev*, int, int); +void pcicfgw32(Pcidev*, int, int); +void pcicfgw8(Pcidev*, int, int); +void pciclrbme(Pcidev*); +void pciclrmwi(Pcidev*); +int pcigetpms(Pcidev*); +void pcihinv(Pcidev*); +Pcidev* pcimatch(Pcidev*, int, int); +Pcidev* pcimatchtbdf(int); +void pcireset(void); +void pcisetbme(Pcidev*); +void pcisetmwi(Pcidev*); +int pcisetpms(Pcidev*, int); +int pickcore(int, int); +void printcpufreq(void); +void putac(Mach*); +u64int rdmsr(u32int); +u64int rdtsc(void); +void rolestable(Mach *mp); +void runacore(void); +void runapcore(int); +int screenprint(char*, ...); /* debugging */ +void sfence(void); +void spldone(void); +Mpl splhi(void); +Mpl spllo(void); +void splx(Mpl); +void splxpc(Mpl); +void stopac(void); +void syncclock(void); +void syscall(int scallnr, Ureg* ureg); +void* sysexecregs(uintptr, usize, uint); +uintptr sysexecstack(uintptr, int); +void sysprocsetup(Proc*); +void tcquiesce(void); +void trap(Ureg*); +void trapenable(int, void (*)(Ureg*, void*), void*, char*); +void trapinit(void); +void trput(u64int); +void tssrsp0(u64int); +void umeminit(void); +int userureg(Ureg*); +void* vmap(uintmem, usize); +void vsvminit(int, int); +void vunmap(void*, usize); +void wrmsr(u32int, u64int); + +int cas32(void*, u32int, u32int); +int cas64(void*, u64int, u64int); +int tas32(void*); +u64int fas64(u64int*, u64int); + +#define CASU(p, e, n) cas64((p), (u64int)(e), (u64int)(n)) +#define CASV(p, e, n) cas64((p), (u64int)(e), (u64int)(n)) +#define CASP(p, e, n) cas64((p), (u64int)(e), (u64int)(n)) +#define CASW(p, e, n) cas32((p), (e), (n)) +#define TAS(addr) tas32((addr)) +#define FASP(p, v) ((void*)fas64((u64int*)(p), (u64int)(v))) + +void touser(uintptr); +void syscallentry(void); +void acsyscallentry(void); +void syscallreturn(void); +void sysrforkret(void); + +#define waserror() (up->nerrlab++, setlabel(&up->errlab[up->nerrlab-1])) + +#define PTR2UINT(p) ((uintptr)(p)) +#define UINT2PTR(i) ((void*)(i)) + +void* KADDR(uintmem); +uintmem PADDR(void*); + +#define BIOSSEG(a) KADDR(((uint)(a))<<4) + +/* + * apic.c + */ +int apiceoi(int); +void apicipi(int); +void apicinit(int, uintmem, int); +int apicisr(int); +int apiconline(void); +void apicpri(int); +void apicsipi(int, uintmem); +void apicnipi(int); + +void ioapicinit(int, int, uintmem); +void ioapicintrinit(int, int, int, int, u32int); +void ioapiconline(void); + +/* + * archk10.c + */ +void millidelay(int); +void k10waitwhile(void*, uintptr); + +/* + * i8259.c + */ +int i8259init(int); +int i8259isr(int); + +/* + * mp.c + */ +void mpsinit(int); +int mpacpi(int); + +/* + * sipi.c + */ +void sipiall(void); +int sipicore(int); + +/* + * debug + */ +void HERE(void); +void DONE(void); diff -Nru 0/sys/src/nix/k10/fpu.c 4/sys/src/nix/k10/fpu.c --- 0/sys/src/nix/k10/fpu.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/fpu.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,540 @@ +/* + * SIMD Floating Point. + * Assembler support to get at the individual instructions + * is in l64fpu.s. + * There are opportunities to be lazier about saving and + * restoring the state and allocating the storage needed. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "amd64.h" +#include "ureg.h" + +enum { /* FCW, FSW and MXCSR */ + I = 0x00000001, /* Invalid-Operation */ + D = 0x00000002, /* Denormalized-Operand */ + Z = 0x00000004, /* Zero-Divide */ + O = 0x00000008, /* Overflow */ + U = 0x00000010, /* Underflow */ + P = 0x00000020, /* Precision */ +}; + +enum { /* FCW */ + PCs = 0x00000000, /* Precision Control -Single */ + PCd = 0x00000200, /* -Double */ + PCde = 0x00000300, /* -Double Extended */ + RCn = 0x00000000, /* Rounding Control -Nearest */ + RCd = 0x00000400, /* -Down */ + RCu = 0x00000800, /* -Up */ + RCz = 0x00000C00, /* -Toward Zero */ +}; + +enum { /* FSW */ + Sff = 0x00000040, /* Stack Fault Flag */ + Es = 0x00000080, /* Error Summary Status */ + C0 = 0x00000100, /* ZF - Condition Code Bits */ + C1 = 0x00000200, /* O/U# */ + C2 = 0x00000400, /* PF */ + C3 = 0x00004000, /* ZF */ + B = 0x00008000, /* Busy */ +}; + +enum { /* MXCSR */ + Daz = 0x00000040, /* Denormals are Zeros */ + Im = 0x00000080, /* I Mask */ + Dm = 0x00000100, /* D Mask */ + Zm = 0x00000200, /* Z Mask */ + Om = 0x00000400, /* O Mask */ + Um = 0x00000800, /* U Mask */ + Pm = 0x00001000, /* P Mask */ + Rn = 0x00000000, /* Round to Nearest */ + Rd = 0x00002000, /* Round Down */ + Ru = 0x00004000, /* Round Up */ + Rz = 0x00006000, /* Round toward Zero */ + Fz = 0x00008000, /* Flush to Zero for Um */ +}; + +enum { /* PFPU.state */ + Init = 0, /* The FPU has not been used */ + Busy = 1, /* The FPU is being used */ + Idle = 2, /* The FPU has been used */ + + Hold = 4, /* Handling an FPU note */ +}; + +extern void _clts(void); +extern void _fldcw(u16int); +extern void _fnclex(void); +extern void _fninit(void); +extern void _fxrstor(Fxsave*); +extern void _fxsave(Fxsave*); +extern void _fwait(void); +extern void _ldmxcsr(u32int); +extern void _stts(void); + +int +fpudevprocio(Proc* proc, void* a, long n, vlong offset, int write) +{ + uchar *p; + + /* + * Called from procdevtab.read and procdevtab.write + * allow user process access to the FPU registers. + * This is the only FPU routine which is called directly + * from the port code; it would be nice to have dynamic + * creation of entries in the device file trees... + */ + if(offset >= sizeof(Fxsave)) + return 0; + if((p = proc->fpusave) == nil) + return 0; + switch(write){ + default: + if(offset+n > sizeof(Fxsave)) + n = sizeof(Fxsave) - offset; + memmove(p+offset, a, n); + break; + case 0: + if(offset+n > sizeof(Fxsave)) + n = sizeof(Fxsave) - offset; + memmove(a, p+offset, n); + break; + } + + return n; +} + +void +fpunotify(Ureg*) +{ + /* + * Called when a note is about to be delivered to a + * user process, usually at the end of a system call. + * Note handlers are not allowed to use the FPU so + * the state is marked (after saving if necessary) and + * checked in the Device Not Available handler. + */ + if(up->fpustate == Busy){ + _fxsave(up->fpusave); + _stts(); + up->fpustate = Idle; + } + up->fpustate |= Hold; +} + +void +fpunoted(void) +{ + /* + * Called from sysnoted() via the machine-dependent + * noted() routine. + * Clear the flag set above in fpunotify(). + */ + up->fpustate &= ~Hold; +} + +void +fpusysrfork(Ureg*) +{ + /* + * Called early in the non-interruptible path of + * sysrfork() via the machine-dependent syscall() routine. + * Save the state so that it can be easily copied + * to the child process later. + */ + if(up->fpustate != Busy) + return; + + _fxsave(up->fpusave); + _stts(); + up->fpustate = Idle; +} + +void +fpusysrforkchild(Proc* child, Proc* parent) +{ + /* + * Called later in sysrfork() via the machine-dependent + * sysrforkchild() routine. + * Copy the parent FPU state to the child. + */ + child->fpustate = parent->fpustate; + child->fpusave = (void*)((PTR2UINT(up->fxsave) + 15) & ~15); + if(child->fpustate == Init) + return; + + memmove(child->fpusave, parent->fpusave, sizeof(Fxsave)); +} + +void +fpuprocsave(Proc* p) +{ + /* + * Called from sched() and sleep() via the machine-dependent + * procsave() routine. + * About to go in to the scheduler. + * If the process wasn't using the FPU + * there's nothing to do. + */ + if(p->fpustate != Busy) + return; + + /* + * The process is dead so clear and disable the FPU + * and set the state for whoever gets this proc struct + * next. + */ + if(p->state == Moribund){ + _clts(); + _fnclex(); + _stts(); + p->fpustate = Init; + return; + } + + /* + * Save the FPU state without handling pending + * unmasked exceptions and disable. Postnote() can't + * be called here as sleep() already has up->rlock, + * so the handling of pending exceptions is delayed + * until the process runs again and generates a + * Device Not Available exception fault to activate + * the FPU. + */ + _fxsave(p->fpusave); + _stts(); + p->fpustate = Idle; +} + +void +fpuprocrestore(Proc* p) +{ + /* + * The process has been rescheduled and is about to run. + * Nothing to do here right now. If the process tries to use + * the FPU again it will cause a Device Not Available + * exception and the state will then be restored. + */ + USED(p); +} + +void +fpusysprocsetup(Proc* p) +{ + /* + * Disable the FPU. + * Called from sysexec() via sysprocsetup() to + * set the FPU for the new process. + */ + if(p->fpustate != Init){ + _clts(); + _fnclex(); + _stts(); + p->fpustate = Init; + } +} + +void +acfpusysprocsetup(Proc *p) +{ + if(p->fpustate == Init){ + /* The FPU is initialized in the TC but we must initialize + * it in the AC. + */ + p->fpustate = Idle; + fpusysprocsetup(p); + } +} + +static char* +fpunote(void) +{ + ushort fsw; + Fxsave *fpusave; + char *m; + + /* + * The Sff bit is sticky, meaning it should be explicitly + * cleared or there's no way to tell if the exception was an + * invalid operation or a stack fault. + */ + fpusave = up->fpusave; + fsw = (fpusave->fsw & ~fpusave->fcw) & (Sff|P|U|O|Z|D|I); + if(fsw & I){ + if(fsw & Sff){ + if(fsw & C1) + m = "Stack Overflow"; + else + m = "Stack Underflow"; + } + else + m = "Invalid Operation"; + } + else if(fsw & D) + m = "Denormal Operand"; + else if(fsw & Z) + m = "Divide-By-Zero"; + else if(fsw & O) + m = "Numeric Overflow"; + else if(fsw & U) + m = "Numeric Underflow"; + else if(fsw & P) + m = "Precision"; + else + m = "Unknown"; + + snprint(up->genbuf, sizeof(up->genbuf), + "sys: fp: %s Exception ipo=%#llux fsw=%#ux", + m, fpusave->rip, fsw); + return up->genbuf; +} + +char* +xfpuxf(Ureg* ureg, void*) +{ + u32int mxcsr; + Fxsave *fpusave; + char *m; + + /* + * #XF - SIMD Floating Point Exception (Vector 18). + */ + + /* + * Save FPU state to check out the error. + */ + fpusave = up->fpusave; + _fxsave(fpusave); + _stts(); + up->fpustate = Idle; + + if(ureg->ip & KZERO) + panic("#MF: ip=%#p", ureg->ip); + + /* + * Notify the user process. + * The path here is similar to the x87 path described + * in fpupostnote above but without the fpupostnote() + * call. + */ + mxcsr = fpusave->mxcsr; + if((mxcsr & (Im|I)) == I) + m = "Invalid Operation"; + else if((mxcsr & (Dm|D)) == D) + m = "Denormal Operand"; + else if((mxcsr & (Zm|Z)) == Z) + m = "Divide-By-Zero"; + else if((mxcsr & (Om|O)) == O) + m = "Numeric Overflow"; + else if((mxcsr & (Um|U)) == U) + m = "Numeric Underflow"; + else if((mxcsr & (Pm|P)) == P) + m = "Precision"; + else + m = "Unknown"; + + snprint(up->genbuf, sizeof(up->genbuf), + "sys: fp: %s Exception mxcsr=%#ux", m, mxcsr); + return up->genbuf; +} + +void +fpuxf(Ureg *ureg, void *p) +{ + char *n; + + n = xfpuxf(ureg, p); + if(n != nil) + postnote(up, 1, n, NDebug); +} + +char* +acfpuxf(Ureg *ureg, void *p) +{ + return xfpuxf(ureg, p); +} + +static char* +xfpumf(Ureg* ureg, void*) +{ + Fxsave *fpusave; + + /* + * #MF - x87 Floating Point Exception Pending (Vector 16). + */ + + /* + * Save FPU state to check out the error. + */ + fpusave = up->fpusave; + _fxsave(fpusave); + _stts(); + up->fpustate = Idle; + + if(ureg->ip & KZERO) + panic("#MF: ip=%#p rip=%#p", ureg->ip, fpusave->rip); + + /* + * Notify the user process. + * The path here is + * call trap->fpumf->fpupostnote->postnote + * return ->fpupostnote->fpumf->trap + * call notify->fpunotify + * return ->notify + * then either + * call pexit + * or + * return ->trap + * return ->user note handler + */ + return fpunote(); +} + +void +fpumf(Ureg *ureg, void *p) +{ + char *n; + + n = xfpumf(ureg, p); + if(n != nil) + postnote(up, 1, n, NDebug); +} + +char* +acfpumf(Ureg *ureg, void *p) +{ + return xfpumf(ureg, p); +} + +static char* +xfpunm(Ureg* ureg, void*) +{ + Fxsave *fpusave; + + /* + * #NM - Device Not Available (Vector 7). + */ + if(up == nil) + panic("#NM: fpu in kernel: ip %#p\n", ureg->ip); + + /* + * Someone tried to use the FPU in a note handler. + * That's a no-no. + */ + if(up->fpustate & Hold) + return "sys: floating point in note handler"; + + if(ureg->ip & KZERO) + panic("#NM: proc %d %s state %d ip %#p\n", + up->pid, up->text, up->fpustate, ureg->ip); + + switch(up->fpustate){ + case Busy: + default: + panic("#NM: state %d ip %#p\n", up->fpustate, ureg->ip); + break; + case Init: + /* + * A process tries to use the FPU for the + * first time and generates a 'device not available' + * exception. + * Turn the FPU on and initialise it for use. + * Set the precision and mask the exceptions + * we don't care about from the generic Mach value. + */ + _clts(); + _fninit(); + _fwait(); + _fldcw(m->fcw); + _ldmxcsr(m->mxcsr); + up->fpusave = (void*)((PTR2UINT(up->fxsave) + 15) & ~15); + up->fpustate = Busy; + break; + case Idle: + /* + * Before restoring the state, check for any pending + * exceptions, there's no way to restore the state without + * generating an unmasked exception. + */ + fpusave = up->fpusave; + if((fpusave->fsw & ~fpusave->fcw) & (Sff|P|U|O|Z|D|I)) + return fpunote(); + + /* + * Sff is sticky. + */ + fpusave->fcw &= ~Sff; + _clts(); + _fxrstor(fpusave); + up->fpustate = Busy; + break; + } + return nil; +} + +void +fpunm(Ureg *ureg, void *p) +{ + char *n; + + n = xfpunm(ureg, p); + if(n != nil) + postnote(up, 1, n, NDebug); +} + +char* +acfpunm(Ureg *ureg, void *p) +{ + return xfpunm(ureg, p); +} + +void +fpuinit(void) +{ + u64int r; + Fxsave *fxsave; + uchar buf[sizeof(Fxsave)+15]; + + /* + * It's assumed there is an integrated FPU, so Em is cleared; + */ + r = cr0get(); + r &= ~(Ts|Em); + r |= Ne|Mp; + cr0put(r); + + r = cr4get(); + r |= Osxmmexcpt|Osfxsr; + cr4put(r); + + _fninit(); + fxsave = (Fxsave*)((PTR2UINT(buf) + 15) & ~15); + memset(fxsave, 0, sizeof(Fxsave)); + _fxsave(fxsave); + m->fcw = RCn|PCd|P|U|D; + if(fxsave->mxcsrmask == 0) + m->mxcsrmask = 0x0000FFBF; + else + m->mxcsrmask = fxsave->mxcsrmask; + m->mxcsr = (Rn|Pm|Um|Dm) & m->mxcsrmask; + _stts(); + + if(m->machno != 0) + return; + + /* + * Set up the exception handlers. + */ + trapenable(IdtNM, fpunm, 0, "#NM"); + trapenable(IdtMF, fpumf, 0, "#MF"); + trapenable(IdtXF, fpuxf, 0, "#XF"); + + /* Same thing, for the AC */ + actrapenable(IdtNM, acfpunm, 0, "#NM"); + actrapenable(IdtMF, acfpumf, 0, "#MF"); + actrapenable(IdtXF, acfpuxf, 0, "#XF"); +} diff -Nru 0/sys/src/nix/k10/i8254.c 4/sys/src/nix/k10/i8254.c --- 0/sys/src/nix/k10/i8254.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/i8254.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,167 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +/* + * 8254 Programmable Interval Timer and compatibles. + */ +enum { /* I/O ports */ + Timer1 = 0x40, + Timer2 = 0x48, /* Counter0 is watchdog (EISA) */ + + Counter0 = 0, /* Counter 0 Access Port */ + Counter1 = 1, /* Counter 1 Access Port */ + Counter2 = 2, /* Counter 2 Access Port */ + Control = 3, /* Timer Control Word */ +}; + +enum { /* Control */ + Bcd = 0x01, /* Binary/BCD countdown select */ + + Mode0 = 0x00, /* [3:1] interrupt on terminal count */ + Mode1 = 0x02, /* hardware re-triggerable one-shot */ + Mode2 = 0x04, /* rate generator */ + Mode3 = 0x06, /* square-wave generator */ + Mode4 = 0x08, /* sofware triggered strobe */ + Mode5 = 0x0A, /* hardware triggered strobe */ + + Clc = 0x00, /* [5:4] Counter Latch Command */ + RWlsb = 0x10, /* R/W LSB */ + RWmsb = 0x20, /* R/W MSB */ + RW16 = 0x30, /* R/W LSB then MSB */ + Cs0 = 0x00, /* [7:6] Counter 0 Select */ + Cs1 = 0x40, /* Counter 1 Select */ + Cs2 = 0x80, /* Counter 2 Select */ + + Rbc = 0xC0, /* Read-Back Command */ + RbCnt0 = 0x02, /* Select Counter 0 */ + RbCnt1 = 0x04, /* Select Counter 1 */ + RbCnt2 = 0x08, /* Select Counter 2 */ + RbS = 0x20, /* Read-Back Status */ + RbC = 0x10, /* Read-Back Count */ + RbCS = 0x00, /* Read-Back Count and Status */ + + RbNULL = 0x40, /* NULL-Count Flag */ + RbOUT = 0x80, /* OUT-pin */ +}; + +enum { + Osc = 1193182, /* 14.318180MHz/12 */ + Hz = 82, /* 2*41*14551 = 1193182 */ +}; + +static void +i8254set(int port, int hz) +{ + int counter, timeo; + + /* + * Initialise Counter0 to be the system clock if necessary, + * it's normally connected to IRQ0 on an interrupt controller. + * Use a periodic square wave (Mode3). + */ + counter = Osc/hz; + outb(port+Control, Cs0|RW16|Mode3); + outb(port+Counter0, counter); + outb(port+Counter0, counter>>8); + + /* + * Wait until the counting register has been loaded + * into the counting element. + */ + for(timeo = 0; timeo < 100000; timeo++){ + outb(port+Control, Rbc|RbS|RbCnt0); + if(!(inb(port+Counter0) & RbNULL)) + break; + } +} + +vlong +i8254hz(u32int info[2][4]) +{ + u32int ax; + u64int a, b; + int aamcycles, incr, loops, x, y; + + /* + * Use the cpuid family info to get the + * cycles for the AAM instruction. + * Beware: this can be called VERY early before + * some of the other device state is set. + */ + ax = info[1][0] & 0x00000f00; + if(memcmp(&info[0][1], "GenuntelineI", 12) == 0){ + switch(ax){ + default: + return 0; + case 0x00000600: + case 0x00000f00: + aamcycles = 16; + break; + } + } + else if(memcmp(&info[0][1], "AuthcAMDenti", 12) == 0){ + switch(ax){ + default: + return 0; + case 0x00000600: + case 0x00000f00: + aamcycles = 11; + break; + } + } + else + return 0; + + i8254set(Timer1, Hz); + + /* + * Find biggest loop that doesn't wrap. + */ + SET(a, b); + incr = 16000000/(aamcycles*Hz*2); + x = 2000; + for(loops = incr; loops < 64*1024; loops += incr) { + /* + * Measure time for the loop + * + * MOVL loops,CX + * aaml1: + * AAM + * LOOP aaml1 + * + * The time for the loop should be independent of external + * cache and memory system since it fits in the execution + * prefetch buffer. + * The AAM instruction is not available in 64-bit mode. + */ + outb(Timer1+Control, Cs0|Clc); + + a = rdtsc(); + x = inb(Timer1+Counter0); + x |= inb(Timer1+Counter0)<<8; + aamloop(loops); + outb(Timer1+Control, Cs0|Clc); + b = rdtsc(); + + y = inb(Timer1+Counter0); + y |= inb(Timer1+Counter0)<<8; + x -= y; + + if(x < 0) + x += Osc/Hz; + + if(x > Osc/(3*Hz)) + break; + } + + /* + * Figure out clock frequency. + */ + b = (b-a)<<1; + b *= Osc; + + return b/x; +} diff -Nru 0/sys/src/nix/k10/i8259.c 4/sys/src/nix/k10/i8259.c --- 0/sys/src/nix/k10/i8259.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/i8259.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,229 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "io.h" + +/* + * 8259 Interrupt Controller and compatibles. + */ +enum { /* I/O ports */ + Cntrl1 = 0x20, + Cntrl2 = 0xa0, + + Icw1 = 0, /* Initialisation Command Word 1 */ + Icw2 = 1, + Icw3 = 1, + Icw4 = 1, + + Ocw1 = 1, /* Operational Control Word 1 */ + Ocw2 = 0, + Ocw3 = 0, + + Imr = Ocw1, /* Interrupt Mask Register */ + Isr = Ocw3, /* In-Service Register */ + Irr = Ocw3, /* Interrupt Request Register */ + + Elcr1 = 0x4d0, /* Edge/Level Control Register */ + Elcr2 = 0x4d1, +}; + +enum { /* Icw1 */ + Ic4 = 0x01, /* there will be an Icw4 */ + Icw1sel = 0x10, /* Icw/Ocw select */ +}; + +enum { /* Icw3 */ + Cascaded = 0x04, /* Cntrl1 - Cascaded Mode Enable */ + SlaveIRQ2 = 0x02, /* Cntrl2 - Slave Identification Code */ +}; + +enum { /* Icw4 */ + Microprocessor = 0x01, /* 80x86-based system */ +}; + +enum { /* Ocw2 */ + Ocw2sel = 0x00, /* Ocw2 select */ + Eoi = 0x20, /* Non-spcific EOI command */ +}; + +enum { /* Ocw3 */ + Irrread = 0x02, /* Read IRQ register */ + Isrread = 0x03, /* Read IS register */ + Ocw3sel = 0x08, /* Ocw3 select */ +}; + +static Lock i8259lock; +static int i8259mask = ~0; /* mask of disabled interrupts */ +static int i8259elcr; /* mask of level interrupts */ + +int +i8259init(int vectorbase) +{ + int elcr; + + vectorbase &= ~0x07; + + ilock(&i8259lock); + + /* + * Boilerplate to initialise the pair of 8259 controllers, + * see one of the Intel bridge datasheets for details, + * e.g. 82371AB (PIIX4). The default settings are 80x86 mode, + * edge-sensitive detection, normal EOI, non-buffered and + * cascade mode. Cntrl1 is connected as the master and Cntrl2 + * as the slave; IRQ2 is used to cascade the two controllers. + */ + outb(Cntrl1+Icw1, Icw1sel|Ic4); + outb(Cntrl1+Icw2, vectorbase); + outb(Cntrl1+Icw3, Cascaded); + outb(Cntrl1+Icw4, Microprocessor); + + outb(Cntrl2+Icw1, Icw1sel|Ic4); + outb(Cntrl2+Icw2, vectorbase+8); + outb(Cntrl2+Icw3, SlaveIRQ2); + outb(Cntrl2+Icw4, Microprocessor); + + /* + * Set the interrupt masks, allowing interrupts + * to pass from Cntrl2 to Cntrl1 on IRQ2. + */ + i8259mask &= ~(1<<2); + outb(Cntrl2+Imr, (i8259mask>>8) & 0xff); + outb(Cntrl1+Imr, i8259mask & 0xff); + + outb(Cntrl1+Ocw2, Ocw2sel|Eoi); + outb(Cntrl2+Ocw2, Ocw2sel|Eoi); + + /* + * Set Ocw3 to return the ISR when read for i8259isr() + * (after initialisation status read is set to return the IRR). + * Read IRR first to possibly deassert an outstanding + * interrupt. + */ + inb(Cntrl1+Irr); + outb(Cntrl1+Ocw3, Ocw3sel|Isrread); + inb(Cntrl2+Irr); + outb(Cntrl2+Ocw3, Ocw3sel|Isrread); + + /* + * Check for Edge/Level Control register. + * This check may not work for all chipsets. + * First try a non-intrusive test - the bits for + * IRQs 13, 8, 2, 1 and 0 must be edge (0). If + * that's OK try a R/W test. + */ + elcr = (inb(Elcr2)<<8)|inb(Elcr1); + if(!(elcr & 0x2107)){ + outb(Elcr1, 0); + if(inb(Elcr1) == 0){ + outb(Elcr1, 0x20); + if(inb(Elcr1) == 0x20) + i8259elcr = elcr; + outb(Elcr1, elcr & 0xff); + } + } + iunlock(&i8259lock); + + return vectorbase; +} + +int +i8259isr(int vno) +{ + int irq, isr; + + if(vno < IdtPIC || vno > IdtPIC+15) + return 0; + irq = vno-IdtPIC; + + /* + * Collect the interrupt status, + * acknowledge the interrupt and return whether + * the acknowledged interrupt was the correct + * one (this could be better but it's not really + * used). + */ + ilock(&i8259lock); + isr = inb(Cntrl1+Isr); + outb(Cntrl1+Ocw2, Ocw2sel|Eoi); + if(irq >= 8){ + isr |= inb(Cntrl2+Isr)<<8; + outb(Cntrl2+Ocw2, Ocw2sel|Eoi); + } + iunlock(&i8259lock); + + return isr & (1<irq; + if(irq < 0 || irq > 15){ + print("i8259enable: irq %d out of range\n", irq); + return -1; + } + irqbit = 1<>8) & 0xff); + + if(i8259elcr & irqbit) + v->eoi = i8259isr; + else + v->isr = i8259isr; + iunlock(&i8259lock); + + v->type = "8259"; + return IdtPIC+irq; +} + +int +i8259irqdisable(int irq) +{ + int irqbit; + + /* + * Given an IRQ, disable the corresponding interrupt + * in the 8259. + */ + if(irq < 0 || irq > 15){ + print("i8259disable: irq %d out of range\n", irq); + return -1; + } + irqbit = 1<>8) & 0xff); + } + iunlock(&i8259lock); + + return 0; +} +#endif /* notdef */ diff -Nru 0/sys/src/nix/k10/init9.c 4/sys/src/nix/k10/init9.c --- 0/sys/src/nix/k10/init9.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/init9.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,7 @@ +extern void startboot(char*, char**); + +void +main(char* argv0) +{ + startboot(argv0, &argv0); +} diff -Nru 0/sys/src/nix/k10/io.h 4/sys/src/nix/k10/io.h --- 0/sys/src/nix/k10/io.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/io.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,266 @@ +enum { + VectorNMI = 2, /* non-maskable interrupt */ + VectorBPT = 3, /* breakpoint */ + VectorUD = 6, /* invalid opcode exception */ + VectorCNA = 7, /* coprocessor not available */ + Vector2F = 8, /* double fault */ + VectorCSO = 9, /* coprocessor segment overrun */ + VectorPF = 14, /* page fault */ + Vector15 = 15, /* reserved */ + VectorCERR = 16, /* coprocessor error */ + + VectorPIC = 32, /* external i8259 interrupts */ + IrqCLOCK = 0, + IrqKBD = 1, + IrqUART1 = 3, + IrqUART0 = 4, + IrqPCMCIA = 5, + IrqFLOPPY = 6, + IrqLPT = 7, + IrqIRQ7 = 7, + IrqAUX = 12, /* PS/2 port */ + IrqIRQ13 = 13, /* coprocessor on 386 */ + IrqATA0 = 14, + IrqATA1 = 15, + MaxIrqPIC = 15, + + VectorLAPIC = VectorPIC+16, /* local APIC interrupts */ + IrqLINT0 = VectorLAPIC+0, + IrqLINT1 = VectorLAPIC+1, + IrqTIMER = VectorLAPIC+2, + IrqERROR = VectorLAPIC+3, + IrqPCINT = VectorLAPIC+4, + IrqSPURIOUS = VectorLAPIC+15, + MaxIrqLAPIC = VectorLAPIC+15, + + VectorSYSCALL = 64, + + VectorAPIC = 65, /* external APIC interrupts */ + MaxVectorAPIC = 255, +}; + +enum { + IdtPIC = 32, /* external i8259 interrupts */ + + IdtLINT0 = 48, /* local APIC interrupts */ + IdtLINT1 = 49, + IdtTIMER = 50, + IdtERROR = 51, + IdtPCINT = 52, + + IdtIPI = 62, + IdtSPURIOUS = 63, + + IdtSYSCALL = 64, + + IdtIOAPIC = 65, /* external APIC interrupts */ + + IdtMAX = 255, +}; + +typedef struct Vkey { + int tbdf; /* pci: ioapic or msi sources */ + int irq; /* 8259-emulating sources */ +} Vkey; + +typedef struct Vctl { + Vctl* next; /* handlers on this vector */ + + int isintr; /* interrupt or fault/trap */ + + Vkey; /* source-specific key; tbdf for pci */ + void (*f)(Ureg*, void*); /* handler to call */ + void* a; /* argument to call it with */ + char name[KNAMELEN]; /* of driver */ + char *type; + + int (*isr)(int); /* get isr bit for this irq */ + int (*eoi)(int); /* eoi */ + int (*mask)(Vkey*, int); /* interrupt enable returns masked vector */ + int vno; +} Vctl; + +typedef struct ACVctl { + char* (*f)(Ureg*,void*); + void* a; + int vno; + char name[KNAMELEN]; /* of driver */ +} ACVctl; + +enum { + BusCBUS = 0, /* Corollary CBUS */ + BusCBUSII, /* Corollary CBUS II */ + BusEISA, /* Extended ISA */ + BusFUTURE, /* IEEE Futurebus */ + BusINTERN, /* Internal bus */ + BusISA, /* Industry Standard Architecture */ + BusMBI, /* Multibus I */ + BusMBII, /* Multibus II */ + BusMCA, /* Micro Channel Architecture */ + BusMPI, /* MPI */ + BusMPSA, /* MPSA */ + BusNUBUS, /* Apple Macintosh NuBus */ + BusPCI, /* Peripheral Component Interconnect */ + BusPCMCIA, /* PC Memory Card International Association */ + BusTC, /* DEC TurboChannel */ + BusVL, /* VESA Local bus */ + BusVME, /* VMEbus */ + BusXPRESS, /* Express System Bus */ +}; + +#define MKBUS(t,b,d,f) (((t)<<24)|(((b)&0xFF)<<16)|(((d)&0x1F)<<11)|(((f)&0x07)<<8)) +#define BUSFNO(tbdf) (((tbdf)>>8)&0x07) +#define BUSDNO(tbdf) (((tbdf)>>11)&0x1F) +#define BUSBNO(tbdf) (((tbdf)>>16)&0xFF) +#define BUSTYPE(tbdf) ((tbdf)>>24) +#define BUSBDF(tbdf) ((tbdf)&0x00FFFF00) +#define BUSUNKNOWN (-1) + +enum { + MaxEISA = 16, + CfgEISA = 0xC80, +}; + +/* + * PCI support code. + */ +enum { /* type 0 and type 1 pre-defined header */ + PciVID = 0x00, /* vendor ID */ + PciDID = 0x02, /* device ID */ + PciPCR = 0x04, /* command */ + PciPSR = 0x06, /* status */ + PciRID = 0x08, /* revision ID */ + PciCCRp = 0x09, /* programming interface class code */ + PciCCRu = 0x0A, /* sub-class code */ + PciCCRb = 0x0B, /* base class code */ + PciCLS = 0x0C, /* cache line size */ + PciLTR = 0x0D, /* latency timer */ + PciHDT = 0x0E, /* header type */ + PciBST = 0x0F, /* BIST */ + + PciBAR0 = 0x10, /* base address */ + PciBAR1 = 0x14, + + PciCP = 0x34, /* capabilities pointer */ + + PciINTL = 0x3C, /* interrupt line */ + PciINTP = 0x3D, /* interrupt pin */ +}; + +enum { /* type 0 pre-defined header */ + PciCIS = 0x28, /* cardbus CIS pointer */ + PciSVID = 0x2C, /* subsystem vendor ID */ + PciSID = 0x2E, /* cardbus CIS pointer */ + PciEBAR0 = 0x30, /* expansion ROM base address */ + PciMGNT = 0x3E, /* burst period length */ + PciMLT = 0x3F, /* maximum latency between bursts */ +}; + +enum { /* type 1 pre-defined header */ + PciPBN = 0x18, /* primary bus number */ + PciSBN = 0x19, /* secondary bus number */ + PciUBN = 0x1A, /* subordinate bus number */ + PciSLTR = 0x1B, /* secondary latency timer */ + PciIBR = 0x1C, /* I/O base */ + PciILR = 0x1D, /* I/O limit */ + PciSPSR = 0x1E, /* secondary status */ + PciMBR = 0x20, /* memory base */ + PciMLR = 0x22, /* memory limit */ + PciPMBR = 0x24, /* prefetchable memory base */ + PciPMLR = 0x26, /* prefetchable memory limit */ + PciPUBR = 0x28, /* prefetchable base upper 32 bits */ + PciPULR = 0x2C, /* prefetchable limit upper 32 bits */ + PciIUBR = 0x30, /* I/O base upper 16 bits */ + PciIULR = 0x32, /* I/O limit upper 16 bits */ + PciEBAR1 = 0x28, /* expansion ROM base address */ + PciBCR = 0x3E, /* bridge control register */ +}; + +enum { /* type 2 pre-defined header */ + PciCBExCA = 0x10, + PciCBSPSR = 0x16, + PciCBPBN = 0x18, /* primary bus number */ + PciCBSBN = 0x19, /* secondary bus number */ + PciCBUBN = 0x1A, /* subordinate bus number */ + PciCBSLTR = 0x1B, /* secondary latency timer */ + PciCBMBR0 = 0x1C, + PciCBMLR0 = 0x20, + PciCBMBR1 = 0x24, + PciCBMLR1 = 0x28, + PciCBIBR0 = 0x2C, /* I/O base */ + PciCBILR0 = 0x30, /* I/O limit */ + PciCBIBR1 = 0x34, /* I/O base */ + PciCBILR1 = 0x38, /* I/O limit */ + PciCBSVID = 0x40, /* subsystem vendor ID */ + PciCBSID = 0x42, /* subsystem ID */ + PciCBLMBAR = 0x44, /* legacy mode base address */ +}; + +/* capabilities */ +enum { + PciCapPMG = 0x01, /* power management */ + PciCapAGP = 0x02, + PciCapVPD = 0x03, /* vital product data */ + PciCapSID = 0x04, /* slot id */ + PciCapMSI = 0x05, + PciCapCHS = 0x06, /* compact pci hot swap */ + PciCapPCIX = 0x07, + PciCapHTC = 0x08, /* hypertransport irq conf */ + PciCapVND = 0x09, /* vendor specific information */ + PciCapPCIe = 0x10, + PciCapMSIX = 0x11, + PciCapSATA = 0x12, + PciCapHSW = 0x0c, /* hot swap */ +}; + +typedef struct Pcisiz Pcisiz; +struct Pcisiz +{ + Pcidev* dev; + int siz; + int bar; +}; + +typedef struct Pcidev Pcidev; +struct Pcidev +{ + int tbdf; /* type+bus+device+function */ + ushort vid; /* vendor ID */ + ushort did; /* device ID */ + + ushort pcr; + + uchar rid; + uchar ccrp; + uchar ccru; + uchar ccrb; + uchar cls; + uchar ltr; + + struct { + ulong bar; /* base address */ + int size; + } mem[6]; + + struct { + ulong bar; + int size; + } rom; + uchar intl; /* interrupt line */ + + Pcidev* list; + Pcidev* link; /* next device on this bno */ + + Pcidev* bridge; /* down a bus */ + struct { + ulong bar; + int size; + } ioa, mema; +}; + +#define PCIWINDOW 0 +#define PCIWADDR(va) (PADDR(va)+PCIWINDOW) +#define ISAWINDOW 0 +#define ISAWADDR(va) (PADDR(va)+ISAWINDOW) + +#pragma varargck type "T" int diff -Nru 0/sys/src/nix/k10/ioapic.c 4/sys/src/nix/k10/ioapic.c --- 0/sys/src/nix/k10/ioapic.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/ioapic.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,485 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "apic.h" +#include "io.h" + +typedef struct Rbus Rbus; +typedef struct Rdt Rdt; + +struct Rbus { + Rbus *next; + int devno; + Rdt *rdt; +}; + +struct Rdt { + Apic *apic; + int intin; + u32int lo; + + int ref; /* could map to multiple busses */ + int enabled; /* times enabled */ +}; + +enum { /* IOAPIC registers */ + Ioregsel = 0x00, /* indirect register address */ + Iowin = 0x04, /* indirect register data */ + Ioipa = 0x08, /* IRQ Pin Assertion */ + Ioeoi = 0x10, /* EOI */ + + Ioapicid = 0x00, /* Identification */ + Ioapicver = 0x01, /* Version */ + Ioapicarb = 0x02, /* Arbitration */ + Ioabcfg = 0x03, /* Boot Coniguration */ + Ioredtbl = 0x10, /* Redirection Table */ +}; + +static Rdt rdtarray[Nrdt]; +static int nrdtarray; +static Rbus* rdtbus[Nbus]; +static Rdt* rdtvecno[IdtMAX+1]; + +static Lock idtnolock; +static int idtno = IdtIOAPIC; + +Apic xioapic[Napic]; + +static void +rtblget(Apic* apic, int sel, u32int* hi, u32int* lo) +{ + sel = Ioredtbl + 2*sel; + + *(apic->addr+Ioregsel) = sel+1; + *hi = *(apic->addr+Iowin); + *(apic->addr+Ioregsel) = sel; + *lo = *(apic->addr+Iowin); +} + +static void +rtblput(Apic* apic, int sel, u32int hi, u32int lo) +{ + sel = Ioredtbl + 2*sel; + + *(apic->addr+Ioregsel) = sel+1; + *(apic->addr+Iowin) = hi; + *(apic->addr+Ioregsel) = sel; + *(apic->addr+Iowin) = lo; +} + +Rdt* +rdtlookup(Apic *apic, int intin) +{ + int i; + Rdt *r; + + for(i = 0; i < nrdtarray; i++){ + r = rdtarray + i; + if(apic == r->apic && intin == r->intin) + return r; + } + return nil; +} + +void +ioapicintrinit(int busno, int apicno, int intin, int devno, u32int lo) +{ + Rbus *rbus; + Rdt *rdt; + Apic *apic; + + if(busno >= Nbus || apicno >= Napic || nrdtarray >= Nrdt) + return; + apic = &xioapic[apicno]; + if(!apic->useable || intin >= apic->nrdt) + return; + + rdt = rdtlookup(apic, intin); + if(rdt == nil){ + rdt = &rdtarray[nrdtarray++]; + rdt->apic = apic; + rdt->intin = intin; + rdt->lo = lo; + }else{ + if(lo != rdt->lo){ + print("mutiple irq botch bus %d %d/%d/%d lo %d vs %d\n", + busno, apicno, intin, devno, lo, rdt->lo); + return; + } + DBG("dup rdt %d %d %d %d %.8ux\n", busno, apicno, intin, devno, lo); + } + rdt->ref++; + rbus = malloc(sizeof *rbus); + rbus->rdt = rdt; + rbus->devno = devno; + rbus->next = rdtbus[busno]; + rdtbus[busno] = rbus; +} + +void +ioapicinit(int id, int ibase, uintmem pa) +{ + Apic *apic; + static int base; + + /* + * Mark the IOAPIC useable if it has a good ID + * and the registers can be mapped. + */ + if(id >= Napic) + return; + + apic = &xioapic[id]; + if(apic->useable || (apic->addr = vmap(pa, 1024)) == nil) + return; + apic->useable = 1; + apic->paddr = pa; + + /* + * Initialise the I/O APIC. + * The MultiProcessor Specification says it is the + * responsibility of the O/S to set the APIC ID. + */ + lock(apic); + *(apic->addr+Ioregsel) = Ioapicver; + apic->nrdt = ((*(apic->addr+Iowin)>>16) & 0xff) + 1; + if(ibase != -1) + apic->ibase = ibase; + else{ + apic->ibase = base; + base += apic->nrdt; + } + *(apic->addr+Ioregsel) = Ioapicid; + *(apic->addr+Iowin) = id<<24; + unlock(apic); +} + +void +ioapicdump(void) +{ + int i, n; + Rbus *rbus; + Rdt *rdt; + Apic *apic; + u32int hi, lo; + + if(!DBGFLG) + return; + for(i = 0; i < Napic; i++){ + apic = &xioapic[i]; + if(!apic->useable || apic->addr == 0) + continue; + print("ioapic %d addr %#p nrdt %d ibase %d\n", + i, apic->addr, apic->nrdt, apic->ibase); + for(n = 0; n < apic->nrdt; n++){ + lock(apic); + rtblget(apic, n, &hi, &lo); + unlock(apic); + print(" rdt %2.2d %#8.8ux %#8.8ux\n", n, hi, lo); + } + } + for(i = 0; i < Nbus; i++){ + if((rbus = rdtbus[i]) == nil) + continue; + print("iointr bus %d:\n", i); + for(; rbus != nil; rbus = rbus->next){ + rdt = rbus->rdt; + print(" apic %ld devno %#ux (%d %d) intin %d lo %#ux ref %d\n", + rdt->apic-xioapic, rbus->devno, rbus->devno>>2, + rbus->devno & 0x03, rdt->intin, rdt->lo, rdt->ref); + } + } +} + +void +ioapiconline(void) +{ + int i; + Apic *apic; + + for(apic = xioapic; apic < &xioapic[Napic]; apic++){ + if(!apic->useable || apic->addr == nil) + continue; + for(i = 0; i < apic->nrdt; i++){ + lock(apic); + rtblput(apic, i, 0, Im); + unlock(apic); + } + } + ioapicdump(); +} + +static int dfpolicy = 0; + +static void +ioapicintrdd(u32int* hi, u32int* lo) +{ + int i; + static int df; + static Lock dflock; + + /* + * Set delivery mode (lo) and destination field (hi), + * according to interrupt routing policy. + */ + /* + * The bulk of this code was written ~1995, when there was + * one architecture and one generation of hardware, the number + * of CPUs was up to 4(8) and the choices for interrupt routing + * were physical, or flat logical (optionally with lowest + * priority interrupt). Logical mode hasn't scaled well with + * the increasing number of packages/cores/threads, so the + * fall-back is to physical mode, which works across all processor + * generations, both AMD and Intel, using the APIC and xAPIC. + * + * Interrupt routing policy can be set here. + */ + switch(dfpolicy){ + default: /* noise core 0 */ + *hi = sys->machptr[0]->apicno<<24; + break; + case 1: /* round-robin */ + /* + * Assign each interrupt to a different CPU on a round-robin + * Some idea of the packages/cores/thread topology would be + * useful here, e.g. to not assign interrupts to more than one + * thread in a core. But, as usual, Intel make that an onerous + * task. + */ + lock(&dflock); + for(;;){ + i = df++; + if(df >= sys->nmach+1) + df = 0; + if(sys->machptr[i] == nil || sys->machptr[i]->nixrole == NIXUC) + continue; + i = sys->machptr[i]->apicno; + if(xlapic[i].useable && xlapic[i].addr == 0) + break; + } + unlock(&dflock); + + *hi = i<<24; + break; + } + *lo |= Pm|MTf; +} + +int +nextvec(void) +{ + uint vecno; + + lock(&idtnolock); + vecno = idtno; + idtno = (idtno+8) % IdtMAX; + if(idtno < IdtIOAPIC) + idtno += IdtIOAPIC; + unlock(&idtnolock); + + return vecno; +} + +static int +msimask(Vkey *v, int mask) +{ + Pcidev *p; + + p = pcimatchtbdf(v->tbdf); + if(p == nil) + return -1; + return pcimsimask(p, mask); +} + +static int +intrenablemsi(Vctl* v, Pcidev *p) +{ + uint vno, lo, hi; + uvlong msivec; + + vno = nextvec(); + + lo = IPlow | TMedge | vno; + ioapicintrdd(&hi, &lo); + + if(lo & Lm) + lo |= MTlp; + + msivec = (uvlong)hi<<32 | lo; + if(pcimsienable(p, msivec) == -1) + return -1; + v->isr = apicisr; + v->eoi = apiceoi; + v->vno = vno; + v->type = "msi"; + v->mask = msimask; + + DBG("msiirq: %T: enabling %.16llux %s irq %d vno %d\n", p->tbdf, msivec, v->name, v->irq, vno); + return vno; +} + +int +disablemsi(Vctl*, Pcidev *p) +{ + if(p == nil) + return -1; + return pcimsimask(p, 1); +} + +int +ioapicintrenable(Vctl* v) +{ + Rbus *rbus; + Rdt *rdt; + u32int hi, lo; + int busno, devno, vecno; + + /* + * Bridge between old and unspecified new scheme, + * the work in progress... + */ + if(v->tbdf == BUSUNKNOWN){ + if(v->irq >= IrqLINT0 && v->irq <= MaxIrqLAPIC){ + if(v->irq != IrqSPURIOUS) + v->isr = apiceoi; + v->type = "lapic"; + return v->irq; + } + else{ + /* + * Legacy ISA. + * Make a busno and devno using the + * ISA bus number and the irq. + */ + extern int mpisabusno; + + if(mpisabusno == -1) + panic("no ISA bus allocated"); + busno = mpisabusno; + devno = v->irq<<2; + } + } + else if(BUSTYPE(v->tbdf) == BusPCI){ + /* + * PCI. + * Make a devno from BUSDNO(tbdf) and pcidev->intp. + */ + Pcidev *pcidev; + + busno = BUSBNO(v->tbdf); + if((pcidev = pcimatchtbdf(v->tbdf)) == nil) + panic("no PCI dev for tbdf %#8.8ux", v->tbdf); + if((vecno = intrenablemsi(v, pcidev)) != -1) + return vecno; + disablemsi(v, pcidev); + if((devno = pcicfgr8(pcidev, PciINTP)) == 0) + panic("no INTP for tbdf %#8.8ux", v->tbdf); + devno = BUSDNO(v->tbdf)<<2|(devno-1); + DBG("ioapicintrenable: tbdf %#8.8ux busno %d devno %d\n", + v->tbdf, busno, devno); + } + else{ + SET(busno, devno); + panic("unknown tbdf %#8.8ux", v->tbdf); + } + + rdt = nil; + for(rbus = rdtbus[busno]; rbus != nil; rbus = rbus->next) + if(rbus->devno == devno){ + rdt = rbus->rdt; + break; + } + if(rdt == nil){ + extern int mpisabusno; + + /* + * First crack in the smooth exterior of the new code: + * some BIOS make an MPS table where the PCI devices are + * just defaulted to ISA. + * Rewrite this to be cleaner. + */ + if((busno = mpisabusno) == -1) + return -1; + devno = v->irq<<2; + for(rbus = rdtbus[busno]; rbus != nil; rbus = rbus->next) + if(rbus->devno == devno){ + rdt = rbus->rdt; + break; + } + DBG("isa: tbdf %#8.8ux busno %d devno %d %#p\n", + v->tbdf, busno, devno, rdt); + } + if(rdt == nil) + return -1; + + /* + * Second crack: + * what to do about devices that intrenable/intrdisable frequently? + * 1) there is no ioapicdisable yet; + * 2) it would be good to reuse freed vectors. + * Oh bugger. + */ + /* + * This is a low-frequency event so just lock + * the whole IOAPIC to initialise the RDT entry + * rather than putting a Lock in each entry. + */ + lock(rdt->apic); + DBG("%T: %ld/%d/%d (%d)\n", v->tbdf, rdt->apic - xioapic, rbus->devno, rdt->intin, devno); + if((rdt->lo & 0xff) == 0){ + vecno = nextvec(); + rdt->lo |= vecno; + rdtvecno[vecno] = rdt; + }else + DBG("%T: mutiple irq bus %d dev %d\n", v->tbdf, busno, devno); + + rdt->enabled++; + lo = (rdt->lo & ~Im); + ioapicintrdd(&hi, &lo); + rtblput(rdt->apic, rdt->intin, hi, lo); + vecno = lo & 0xff; + unlock(rdt->apic); + + DBG("busno %d devno %d hi %#8.8ux lo %#8.8ux vecno %d\n", + busno, devno, hi, lo, vecno); + v->isr = apicisr; + v->eoi = apiceoi; + v->vno = vecno; + v->type = "ioapic"; + + return vecno; +} + +int +ioapicintrdisable(int vecno) +{ + Rdt *rdt; + + /* + * FOV. Oh dear. This isn't very good. + * Fortunately rdtvecno[vecno] is static + * once assigned. + * Must do better. + * + * What about any pending interrupts? + */ + if(vecno < 0 || vecno > MaxVectorAPIC){ + panic("ioapicintrdisable: vecno %d out of range", vecno); + return -1; + } + if((rdt = rdtvecno[vecno]) == nil){ + panic("ioapicintrdisable: vecno %d has no rdt", vecno); + return -1; + } + + lock(rdt->apic); + rdt->enabled--; + if(rdt->enabled == 0) + rtblput(rdt->apic, rdt->intin, 0, rdt->lo); + unlock(rdt->apic); + + return 0; +} diff -Nru 0/sys/src/nix/k10/k8cpu 4/sys/src/nix/k10/k8cpu --- 0/sys/src/nix/k10/k8cpu Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/k8cpu Wed Feb 6 00:00:00 2013 @@ -0,0 +1,203 @@ +dev +dev + root + cons + arch + env + pipe + proc + mnt + srv + dup + rtc + sd + ssl + cap + kprof + pmc pmcio + segment + acpi + zp + ws + +# add to get cec in the kernel +# cec + + ether netif + ip arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium ptclbsum inferno + + pci + + uart + +uart +dev + uarti8250 + uartpci pci + +ip +dev + tcp + udp + ipifc + icmp + icmp6 + +link +dev + ether8169 pci ethermii + ether82557 pci + ether82563 pci + etherigbe pci ethermii +# etherbcm pci ethermii + ethermedium + loopbackmedium + netdevmedium + +# ht + +sd +dev + sdahci sdscsifis sdatafis led + +misc +dev +# cache + mp mpacpi apic ioapic msi pci sipi + +# +#boot cpu +# int cpuflag = 1; +#boot cpu boot $3 +# int cpuflag = 1; +# char* bootdisk = "$3"; +#boot rootdir $3 +# char* rootdir = "$3"; +#boot (bboot|romboot|dosboot) +# int cpuflag = 1; +# char* bootprog = $2; +#boot boot $3 +# char* bootdisk = "$3"; +# +boot cpu + tcp + +rootdir + bootk8cpu.out boot + /amd64/bin/auth/factotum factotum + /amd64/bin/ip/ipconfig ipconfig + ../root/nvram nvram + +conf + int cpuserver = 1; + +# +#dbgflg +# chan 'c' +# apic 'A' +# hpet 'H' +# ht 'H' +# ioapic 'I' +# mp 'M' +# pci 'P' +# arch 'V' +# +dbgflg + acore 'c' + apic 'A' + arch 'V' + asm 'm' + devacpi 'C' + devsegment 'z' + devzp 'z' + hpet 'H' + ht 'H' + image 'p' + ioapic 'I' + main 'x' + memory 'm' + mp 'M' + page 'p' + pager 'p' + physalloc 'm' + sysproc 'E' + sysseg 'p' + syssem 'S' + syszio 'z' + tcore 'c' + mmu 'v' + +amd64 +dev + l32p + l64v + l64idt + l64acidt + l64cpuid + l64syscall + l64acsyscall + l64fpu + acore + arch + archk10 + asm + cga + crap + fpu + i8254 + i8259 + kbd + main + map + memory + mmu + multiboot + qmalloc + random + syscall + tcore + trap + vsvm + physalloc + +port + alarm + allocb + chan + dev + devtab + edf + fault + image + latin1 + page + pager + parse + pgrp + portclock + print + proc + ps + qio + qlock + rebootcmd + segment + sysauth + sysfile + sysproc + sysseg + syssem + systab + taslock +# tcklock + tod + syszio + syscallfmt + +# +#dir +# pc -.I. +# +dir + 386 + ip + port + +lib + libc + libip + libsec + libfis diff -Nru 0/sys/src/nix/k10/k8cpufs 4/sys/src/nix/k10/k8cpufs --- 0/sys/src/nix/k10/k8cpufs Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/k8cpufs Wed Feb 6 00:00:00 2013 @@ -0,0 +1,193 @@ +dev +dev + root + cons + arch + env + pipe + proc + mnt + srv + dup + rtc + ssl + cap + kprof + pmc + segment + +# add to get cec in the kernel +# cec + + ether netif + ip arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium ptclbsum inferno + + uart + +uart +dev + uarti8250 + uartpci pci +pmc +dev + pmcio + +ip +dev + tcp + udp + ipifc + icmp + icmp6 + +link +dev + ether8169 pci ethermii + ether82557 pci + ether82563 pci + etherigbe pci ethermii + ethermedium + loopbackmedium + netdevmedium + +# acpi hpet +# ht + +misc +dev + cache + mp apic ioapic pci sipi + +# +#boot cpu +# int cpuflag = 1; +#boot cpu boot $3 +# int cpuflag = 1; +# char* bootdisk = "$3"; +#boot rootdir $3 +# char* rootdir = "$3"; +#boot (bboot|romboot|dosboot) +# int cpuflag = 1; +# char* bootprog = $2; +#boot boot $3 +# char* bootdisk = "$3"; +# +boot cpu + tcp + +rootdir + boot.fs boot + /amd64/bin/rc rc + /rc/lib/rcmain + /amd64/bin/echo echo + /amd64/bin/date date + /amd64/bin/ls ls + /amd64/bin/ps ps + /amd64/bin/bind bind + /amd64/bin/cat cat + /amd64/bin/auth/factotum factotum + /amd64/bin/ip/ipconfig ipconfig + ../root/big big + ../root/nvram nvram + +conf + int cpuserver = 1; + +# +#dbgflg +# chan 'c' +# apic 'A' +# acpi 'C' +# hpet 'H' +# ht 'H' +# ioapic 'I' +# mp 'M' +# pci 'P' +# arch 'V' +# +dbgflg + apic 'A' + acpi 'C' + hpet 'H' + ht 'H' + ioapic 'I' + mp 'M' + arch 'V' + sysproc 'E' + main 'x' + acore 'c' + tcore 'c' + syssem 'S' + page 'p' + pager 'p' + memory 'm' + +amd64 +dev + l32p + l64v + l64idt + l64acidt + l64syscall + l64acsyscall + l64fpu + cpuidamd64 + acore + arch + archk10 + cga + crap + fpu + i8254 + i8259 + kbd + main + map + memory + mmu + multiboot + random + syscall + tcore + trap + vsvm + +port + alarm + alloc xalloc + allocb + chan + dev + devtab + edf + fault + image + latin1 + page + parse + pgrp + portclock + print + proc + ps + qio + qlock + rebootcmd + segment + pager + sysauth + sysfile + sysproc + sysseg + systab + taslock + tod + syssem + syszio + +# +#dir +# pc -.I. +# +dir + 386 + ip + port + +lib + libc + libip + libsec diff -Nru 0/sys/src/nix/k10/k8cpukexec 4/sys/src/nix/k10/k8cpukexec --- 0/sys/src/nix/k10/k8cpukexec Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/k8cpukexec Wed Feb 6 00:00:00 2013 @@ -0,0 +1,202 @@ +dev +dev + root + cons + arch + env + pipe + proc + kexec + cmd + mnt + srv + dup + rtc + ssl + cap + kprof +# pmc pmcio + segment + acpi + tube + zp + +# add to get cec in the kernel +# cec + + ether netif + ip arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium ptclbsum inferno + + uart + +uart +dev + uarti8250 + uartpci pci + +ip +dev + tcp + udp + ipifc + icmp + icmp6 + +link +dev + ether8169 pci ethermii + ether82557 pci + ether82563 pci + etherigbe pci ethermii + ethermedium + loopbackmedium + netdevmedium + +# ht + +misc +dev +# cache + mp apic ioapic msi pci sipi +# rdb + +# +#boot cpu +# int cpuflag = 1; +#boot cpu boot $3 +# int cpuflag = 1; +# char* bootdisk = "$3"; +#boot rootdir $3 +# char* rootdir = "$3"; +#boot (bboot|romboot|dosboot) +# int cpuflag = 1; +# char* bootprog = $2; +#boot boot $3 +# char* bootdisk = "$3"; +# +boot cpu + tcp + +rootdir + bootk8cpu.out boot + /amd64/bin/auth/factotum factotum + /amd64/bin/ip/ipconfig ipconfig + ../root/nvram nvram + +conf + int cpuserver = 1; + +# +#dbgflg +# chan 'c' +# apic 'A' +# hpet 'H' +# ht 'H' +# ioapic 'I' +# mp 'M' +# pci 'P' +# arch 'V' +# +dbgflg + acore 'c' + apic 'A' + arch 'V' + asm 'm' + devacpi 'C' + devsegment 'z' + devtube 'T' + devzp 'z' + hpet 'H' + ht 'H' + image 'p' + ioapic 'I' + kexec 'k' + main 'x' + memory 'm' + mp 'M' + nixcall 'n' + page 'p' + pager 'p' + physalloc 'm' + sysproc 'E' + sysseg 'p' + syssem 'S' + syszio 'z' + tcore 'c' + mmu 'v' + +amd64 +dev + l32p + l64v + l64idt + l64acidt + l64cpuid + l64syscall + l64acsyscall + l64fpu + acore + arch + archk10 + asm + cga + crap + fpu + i8254 + i8259 + kbd + main + map + memory + mmu + multiboot + qmalloc + random + syscall + tcore + trap + vsvm + physalloc + +port + alarm + allocb + chan + dev + devtab + edf + fault + image + kexec + latin1 + nixcall + page + pager + parse + pgrp + portclock + print + proc + ps + qio + qlock + rebootcmd + segment + sysauth + sysfile + sysproc + sysseg + syssem + systab + taslock + tod + syszio + syscallfmt + +# +#dir +# pc -.I. +# +dir + 386 + ip + port + +lib + libc + libip + libsec diff -Nru 0/sys/src/nix/k10/kbd.c 4/sys/src/nix/k10/kbd.c --- 0/sys/src/nix/k10/kbd.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/kbd.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,698 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" + +enum { + Data= 0x60, /* data port */ + + Status= 0x64, /* status port */ + Inready= 0x01, /* input character ready */ + Outbusy= 0x02, /* output busy */ + Sysflag= 0x04, /* system flag */ + Cmddata= 0x08, /* cmd==0, data==1 */ + Inhibit= 0x10, /* keyboard/mouse inhibited */ + Minready= 0x20, /* mouse character ready */ + Rtimeout= 0x40, /* general timeout */ + Parity= 0x80, + + Cmd= 0x64, /* command port (write only) */ + + Spec= 0xF800, /* Unicode private space */ + PF= Spec|0x20, /* num pad function key */ + View= Spec|0x00, /* view (shift window up) */ + KF= 0xF000, /* function key (begin Unicode private space) */ + Shift= Spec|0x60, + Break= Spec|0x61, + Ctrl= Spec|0x62, + Latin= Spec|0x63, + Caps= Spec|0x64, + Num= Spec|0x65, + Middle= Spec|0x66, + Altgr= Spec|0x67, + Kmouse= Spec|0x100, + No= 0x00, /* peter */ + + Home= KF|13, + Up= KF|14, + Pgup= KF|15, + Print= KF|16, + Left= KF|17, + Right= KF|18, + End= KF|24, + Down= View, + Pgdown= KF|19, + Ins= KF|20, + Del= 0x7F, + Scroll= KF|21, + + Nscan= 128, + + Int= 0, /* kbscans indices */ + Ext, + Nscans, +}; + +/* + * The codes at 0x79 and 0x7b are produced by the PFU Happy Hacking keyboard. + * A 'standard' keyboard doesn't produce anything above 0x58. + */ +Rune kbtab[Nscan] = +{ +[0x00] No, 0x1b, '1', '2', '3', '4', '5', '6', +[0x08] '7', '8', '9', '0', '-', '=', '\b', '\t', +[0x10] 'q', 'w', 'e', 'r', 't', 'y', 'u', 'i', +[0x18] 'o', 'p', '[', ']', '\n', Ctrl, 'a', 's', +[0x20] 'd', 'f', 'g', 'h', 'j', 'k', 'l', ';', +[0x28] '\'', '`', Shift, '\\', 'z', 'x', 'c', 'v', +[0x30] 'b', 'n', 'm', ',', '.', '/', Shift, '*', +[0x38] Latin, ' ', Ctrl, KF|1, KF|2, KF|3, KF|4, KF|5, +[0x40] KF|6, KF|7, KF|8, KF|9, KF|10, Num, Scroll, '7', +[0x48] '8', '9', '-', '4', '5', '6', '+', '1', +[0x50] '2', '3', '0', '.', No, No, No, KF|11, +[0x58] KF|12, No, No, No, No, No, No, No, +[0x60] No, No, No, No, No, No, No, No, +[0x68] No, No, No, No, No, No, No, No, +[0x70] No, No, No, No, No, No, No, No, +[0x78] No, View, No, Up, No, No, No, No, +}; + +Rune kbtabshift[Nscan] = +{ +[0x00] No, 0x1b, '!', '@', '#', '$', '%', '^', +[0x08] '&', '*', '(', ')', '_', '+', '\b', '\t', +[0x10] 'Q', 'W', 'E', 'R', 'T', 'Y', 'U', 'I', +[0x18] 'O', 'P', '{', '}', '\n', Ctrl, 'A', 'S', +[0x20] 'D', 'F', 'G', 'H', 'J', 'K', 'L', ':', +[0x28] '"', '~', Shift, '|', 'Z', 'X', 'C', 'V', +[0x30] 'B', 'N', 'M', '<', '>', '?', Shift, '*', +[0x38] Latin, ' ', Ctrl, KF|1, KF|2, KF|3, KF|4, KF|5, +[0x40] KF|6, KF|7, KF|8, KF|9, KF|10, Num, Scroll, '7', +[0x48] '8', '9', '-', '4', '5', '6', '+', '1', +[0x50] '2', '3', '0', '.', No, No, No, KF|11, +[0x58] KF|12, No, No, No, No, No, No, No, +[0x60] No, No, No, No, No, No, No, No, +[0x68] No, No, No, No, No, No, No, No, +[0x70] No, No, No, No, No, No, No, No, +[0x78] No, Up, No, Up, No, No, No, No, +}; + +Rune kbtabesc1[Nscan] = +{ +[0x00] No, No, No, No, No, No, No, No, +[0x08] No, No, No, No, No, No, No, No, +[0x10] No, No, No, No, No, No, No, No, +[0x18] No, No, No, No, '\n', Ctrl, No, No, +[0x20] No, No, No, No, No, No, No, No, +[0x28] No, No, Shift, No, No, No, No, No, +[0x30] No, No, No, No, No, '/', No, Print, +[0x38] Altgr, No, No, No, No, No, No, No, +[0x40] No, No, No, No, No, No, Break, Home, +[0x48] Up, Pgup, No, Left, No, Right, No, End, +[0x50] Down, Pgdown, Ins, Del, No, No, No, No, +[0x58] No, No, No, No, No, No, No, No, +[0x60] No, No, No, No, No, No, No, No, +[0x68] No, No, No, No, No, No, No, No, +[0x70] No, No, No, No, No, No, No, No, +[0x78] No, Up, No, No, No, No, No, No, +}; + +Rune kbtabaltgr[Nscan] = +{ +[0x00] No, No, No, No, No, No, No, No, +[0x08] No, No, No, No, No, No, No, No, +[0x10] No, No, No, No, No, No, No, No, +[0x18] No, No, No, No, '\n', Ctrl, No, No, +[0x20] No, No, No, No, No, No, No, No, +[0x28] No, No, Shift, No, No, No, No, No, +[0x30] No, No, No, No, No, '/', No, Print, +[0x38] Altgr, No, No, No, No, No, No, No, +[0x40] No, No, No, No, No, No, Break, Home, +[0x48] Up, Pgup, No, Left, No, Right, No, End, +[0x50] Down, Pgdown, Ins, Del, No, No, No, No, +[0x58] No, No, No, No, No, No, No, No, +[0x60] No, No, No, No, No, No, No, No, +[0x68] No, No, No, No, No, No, No, No, +[0x70] No, No, No, No, No, No, No, No, +[0x78] No, Up, No, No, No, No, No, No, +}; + +Rune kbtabctrl[Nscan] = +{ +[0x00] No, '', '', '', '', '', '', '', +[0x08] '', '', '', '', ' ', '', '\b', '\t', +[0x10] '', '', '', '', '', '', '', '\t', +[0x18] '', '', '', '', '\n', Ctrl, '', '', +[0x20] '', '', '', '\b', '\n', ' ', ' ', '', +[0x28] '', No, Shift, '', '', '', '', '', +[0x30] '', '', ' ', ' ', '', '', Shift, '\n', +[0x38] Latin, No, Ctrl, '', '', '', '', '', +[0x40] '', '', ' ', ' ', '', '', '', '', +[0x48] '', '', ' ', '', '', '', ' ', '', +[0x50] '', '', '', '', No, No, No, '', +[0x58] ' ', No, No, No, No, No, No, No, +[0x60] No, No, No, No, No, No, No, No, +[0x68] No, No, No, No, No, No, No, No, +[0x70] No, No, No, No, No, No, No, No, +[0x78] No, '', No, '\b', No, No, No, No, +}; + +enum +{ + /* controller command byte */ + Cscs1= (1<<6), /* scan code set 1 */ + Cauxdis= (1<<5), /* mouse disable */ + Ckbddis= (1<<4), /* kbd disable */ + Csf= (1<<2), /* system flag */ + Cauxint= (1<<1), /* mouse interrupt enable */ + Ckbdint= (1<<0), /* kbd interrupt enable */ +}; + +static Queue *kbdq; + +int mouseshifted; +void (*kbdmouse)(int); +static int nokbd = 1; + +static Lock i8042lock; +static uchar ccc; +static void (*auxputc)(int, int); + +/* + * wait for output no longer busy + */ +static int +outready(void) +{ + int tries; + + for(tries = 0; (inb(Status) & Outbusy); tries++){ + if(tries > 500) + return -1; + delay(2); + } + return 0; +} + +/* + * wait for input + */ +static int +inready(void) +{ + int tries; + + for(tries = 0; !(inb(Status) & Inready); tries++){ + if(tries > 500) + return -1; + delay(2); + } + return 0; +} + +/* + * ask 8042 to reset the machine + */ +void +i8042reset(void) +{ + ushort *s; + int i, x; + + if(nokbd) + return; + + s = KADDR(0x472); + *s = 0x1234; /* BIOS warm-boot flag */ + + /* + * newer reset the machine command + */ + outready(); + outb(Cmd, 0xFE); + outready(); + + /* + * Pulse it by hand (old somewhat reliable) + */ + x = 0xDF; + for(i = 0; i < 5; i++){ + x ^= 1; + outready(); + outb(Cmd, 0xD1); + outready(); + outb(Data, x); /* toggle reset */ + delay(100); + } +} + +int +i8042auxcmd(int cmd) +{ + uint c; + int tries; + + c = 0; + tries = 0; + + ilock(&i8042lock); + do{ + if(tries++ > 2) + break; + if(outready() < 0) + break; + outb(Cmd, 0xD4); + if(outready() < 0) + break; + outb(Data, cmd); + if(outready() < 0) + break; + if(inready() < 0) + break; + c = inb(Data); + } while(c == 0xFE || c == 0); + iunlock(&i8042lock); + + if(c != 0xFA){ + print("i8042: %2.2ux returned to the %2.2ux command\n", c, cmd); + return -1; + } + return 0; +} + +int +i8042auxcmds(uchar *cmd, int ncmd) +{ + int i; + + ilock(&i8042lock); + for(i=0; inum) + leds |= 1<<1; + if(0 && kbscan->caps) /* we don't implement caps lock */ + leds |= 1<<2; + ilock(&i8042lock); + outready(); + outb(Data, 0xed); /* talk directly to kbd, not ctlr */ + if(inready() == 0) + inb(Data); + + outready(); + outb(Data, leds); + if(inready() == 0) + inb(Data); + + outready(); + iunlock(&i8042lock); +} + +/* + * Scan code processing + */ +void +kbdputsc(int c, int scanno) +{ + int i, lastc, keyup; + Kbscan *kbscan; + + kbscan = kbscans + scanno; + + /* + * e0's is the first of a 2 character sequence, e1 the first + * of a 3 character sequence (on the safari) + */ + if(c == 0xe0){ + kbscan->esc1 = 1; + return; + } else if(c == 0xe1){ + kbscan->esc2 = 2; + return; + } + + keyup = c&0x80; + c &= 0x7f; + + if(kbscan->esc1){ + c = kbtabesc1[c]; + kbscan->esc1 = 0; + } else if(kbscan->esc2){ + kbscan->esc2--; + return; + } else if(kbscan->shift) + c = kbtabshift[c]; + else if(kbscan->altgr) + c = kbtabaltgr[c]; + else if(kbscan->ctl) + c = kbtabctrl[c]; + else + c = kbtab[c]; + + if(kbscan->caps && c<='z' && c>='a') + c += 'A' - 'a'; + + /* + * keyup only important for shifts + */ + if(keyup){ + switch(c){ + case Latin: + kbscan->alt = 0; + break; + case Shift: + kbscan->shift = 0; + mouseshifted = 0; + break; + case Ctrl: + kbscan->ctl = 0; + break; + case Altgr: + kbscan->altgr = 0; + break; + case Kmouse|1: + case Kmouse|2: + case Kmouse|3: + case Kmouse|4: + case Kmouse|5: + kbscan->buttons &= ~(1<<(c-Kmouse-1)); + if(kbdmouse) + kbdmouse(kbscan->buttons); + break; + } + return; + } + + /* + * normal character + */ + lastc = kbscan->lastc; + kbscan->lastc = c; + if(!(c & (Spec|KF))){ + if(kbscan->ctl) + if(kbscan->alt && c == Del) + exit(0); + if(!kbscan->collecting){ + kbdputc(kbdq, c); + return; + } + kbscan->kc[kbscan->nk++] = c; + c = latin1(kbscan->kc, kbscan->nk); + if(c < -1) /* need more keystrokes */ + return; + if(c != -1) /* valid sequence */ + kbdputc(kbdq, c); + else /* dump characters */ + for(i=0; ink; i++) + kbdputc(kbdq, kbscan->kc[i]); + kbscan->nk = 0; + kbscan->collecting = 0; + return; + } else { + switch(c){ + case Caps: + kbscan->caps ^= 1; + return; + case Num: + kbscan->num ^= 1; + if(scanno == Int) + setleds(kbscan); + return; + case Shift: + kbscan->shift = 1; + mouseshifted = 1; + return; + case Latin: + kbscan->alt = 1; + /* + * VMware and Qemu use Ctl-Alt as the key combination + * to make the VM give up keyboard and mouse focus. + * Iogear kvm use Ctl followed by Alt as their special key. + * This has the unfortunate side effect that when you + * come back into focus, Plan 9 thinks you want to type + * a compose sequence (you just typed alt). + * + * As a clumsy hack around this, we look for ctl-alt or + * ctl followed by alt and don't treat it as the start of a + * compose sequence. + */ + if(lastc != Ctrl && lastc != Shift && !kbscan->ctl){ + kbscan->collecting = 1; + kbscan->nk = 0; + } + return; + case Ctrl: + kbscan->ctl = 1; + return; + case Altgr: + kbscan->altgr = 1; + return; + case Kmouse|1: + case Kmouse|2: + case Kmouse|3: + case Kmouse|4: + case Kmouse|5: + kbscan->buttons |= 1<<(c-Kmouse-1); + if(kbdmouse) + kbdmouse(kbscan->buttons); + return; + } + } + kbdputc(kbdq, c); +} + +/* + * keyboard interrupt + */ +static void +i8042intr(Ureg*, void*) +{ + int s, c; + + /* + * get status + */ + ilock(&i8042lock); + s = inb(Status); + if(!(s&Inready)){ + iunlock(&i8042lock); + return; + } + + /* + * get the character + */ + c = inb(Data); + iunlock(&i8042lock); + + /* + * if it's the aux port... + */ + if(s & Minready){ + if(auxputc != nil) + auxputc(c, kbscans[Int].shift); + return; + } + + kbdputsc(c, Int); +} + +void +i8042auxenable(void (*putc)(int, int)) +{ + char *err = "i8042: aux init failed\n"; + + /* enable kbd/aux xfers and interrupts */ + ccc &= ~Cauxdis; + ccc |= Cauxint; + + ilock(&i8042lock); + if(outready() < 0) + print(err); + outb(Cmd, 0x60); /* write control register */ + if(outready() < 0) + print(err); + outb(Data, ccc); + if(outready() < 0) + print(err); + outb(Cmd, 0xA8); /* auxiliary device enable */ + if(outready() < 0){ + iunlock(&i8042lock); + return; + } + auxputc = putc; + intrenable(IrqAUX, i8042intr, 0, BUSUNKNOWN, "kbdaux"); + iunlock(&i8042lock); +} + +static char *initfailed = "i8042: kbdinit failed\n"; + +static int +outbyte(int port, int c) +{ + outb(port, c); + if(outready() < 0) { + print(initfailed); + return -1; + } + return 0; +} + +void +kbdinit(void) +{ + int c, try; + + /* wait for a quiescent controller */ + try = 1000; + while(try-- > 0 && (c = inb(Status)) & (Outbusy | Inready)) { + if(c & Inready) + inb(Data); + delay(1); + } + if (try <= 0) { + print(initfailed); + return; + } + + /* get current controller command byte */ + outb(Cmd, 0x20); + if(inready() < 0){ + print("i8042: kbdinit can't read ccc\n"); + ccc = 0; + } else + ccc = inb(Data); + + /* enable kbd xfers and interrupts */ + ccc &= ~Ckbddis; + ccc |= Csf | Ckbdint | Cscs1; + if(outready() < 0) { + print(initfailed); + return; + } + + nokbd = 0; + + /* disable mouse */ + if (outbyte(Cmd, 0x60) < 0 || outbyte(Data, ccc) < 0) + print("i8042: kbdinit mouse disable failed\n"); + + /* set typematic rate/delay (0 -> delay=250ms & rate=30cps) */ + if(outbyte(Data, 0xf3) < 0 || outbyte(Data, 0) < 0) + print("i8042: kbdinit set typematic rate failed\n"); +} + +void +kbdenable(void) +{ + kbdq = qopen(4*1024, 0, 0, 0); + if(kbdq == nil) + panic("kbdinit"); + qnoblock(kbdq, 1); + addkbdq(kbdq, -1); + + ioalloc(Data, 1, 0, "kbd"); + ioalloc(Cmd, 1, 0, "kbd"); + + intrenable(IrqKBD, i8042intr, 0, BUSUNKNOWN, "kbd"); + + kbscans[Int].num = 0; + setleds(kbscans + Int); +} + +void +kbdputmap(ushort m, ushort scanc, Rune r) +{ + if(scanc >= Nscan) + error(Ebadarg); + switch(m) { + default: + error(Ebadarg); + case 0: + kbtab[scanc] = r; + break; + case 1: + kbtabshift[scanc] = r; + break; + case 2: + kbtabesc1[scanc] = r; + break; + case 3: + kbtabaltgr[scanc] = r; + break; + case 4: + kbtabctrl[scanc] = r; + break; + } +} + +int +kbdgetmap(uint offset, int *t, int *sc, Rune *r) +{ + *t = offset/Nscan; + *sc = offset%Nscan; + if(*t < 0 || *sc < 0) + error(Ebadarg); + switch(*t) { + default: + return 0; + case 0: + *r = kbtab[*sc]; + return 1; + case 1: + *r = kbtabshift[*sc]; + return 1; + case 2: + *r = kbtabesc1[*sc]; + return 1; + case 3: + *r = kbtabaltgr[*sc]; + return 1; + case 4: + *r = kbtabctrl[*sc]; + return 1; + } +} diff -Nru 0/sys/src/nix/k10/l32p.s 4/sys/src/nix/k10/l32p.s --- 0/sys/src/nix/k10/l32p.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/l32p.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,235 @@ +#include "mem.h" +#include "amd64l.h" + +MODE $32 + +#define pFARJMP32(s, o) BYTE $0xea; /* far jump to ptr32:16 */\ + LONG $o; WORD $s + +/* + * Enter here in 32-bit protected mode. Welcome to 1982. + * Make sure the GDT is set as it should be: + * disable interrupts; + * load the GDT with the table in _gdt32p; + * load all the data segments + * load the code segment via a far jump. + */ +TEXT _protected<>(SB), 1, $-4 + CLI + BYTE $0xe9; LONG $0x00000058; /* JMP _endofheader */ + +_startofheader: + BYTE $0x90 /* NOP */ + BYTE $0x90 /* NOP */ + +TEXT _multibootheader<>(SB), 1, $-4 /* must be 4-byte aligned */ + LONG $0x1badb002 /* magic */ + LONG $0x00000003 /* flags */ + LONG $-(0x1badb002 + 0x00000003) /* checksum */ + +TEXT _gdt32p<>(SB), 1, $-4 + QUAD $0x0000000000000000 /* NULL descriptor */ + QUAD $0x00cf9a000000ffff /* CS */ + QUAD $0x00cf92000000ffff /* DS */ + QUAD $0x0020980000000000 /* Long mode CS */ + +TEXT _gdtptr32p<>(SB), 1, $-4 + WORD $(4*8-1) + LONG $_gdt32p<>-KZERO(SB) + +TEXT _gdt64<>(SB), 1, $-4 + QUAD $0x0000000000000000 /* NULL descriptor */ + QUAD $0x0020980000000000 /* CS */ + +TEXT _gdtptr64p<>(SB), 1, $-4 + WORD $(2*8-1) + QUAD $_gdt64<>-KZERO(SB) + +TEXT _gdtptr64v<>(SB), 1, $-4 + WORD $(3*8-1) + QUAD $_gdt64<>(SB) + +_endofheader: + MOVL AX, BP /* possible passed-in magic */ + + MOVL $_gdtptr32p<>-KZERO(SB), AX + MOVL (AX), GDTR + + MOVL $SSEL(SiDS, SsTIGDT|SsRPL0), AX + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + MOVW AX, SS + + pFARJMP32(SSEL(SiCS, SsTIGDT|SsRPL0), _warp64<>-KZERO(SB)) + +/* + * Make the basic page tables for CPU0 to map 0-4MiB physical + * to KZERO, and include an identity map for the switch from protected + * to paging mode. There's an assumption here that the creation and later + * removal of the identity map will not interfere with the KZERO mappings; + * the conditions for clearing the identity map are + * clear PML4 entry when (KZER0 & 0x0000ff8000000000) != 0; + * clear PDP entry when (KZER0 & 0x0000007fc0000000) != 0; + * don't clear PD entry when (KZER0 & 0x000000003fe00000) == 0; + * the code below assumes these conditions are met. + * + * Assume a recent processor with Page Size Extensions + * and use two 2MiB entries. + */ +/* + * The layout is decribed in dat.h: + * - MACHSTKSZ stack + * - PTSZ PT for PMAPADDR unused - assumes in KZERO PD + * - PTSZ PD + * - PTSZ PDP + * - PTSZ PML4 + * - 4*KiB vsvmpage for gdt, tss + * - MACHSZ m + * - 4*KiB syspage + * - 4*KiB ptrpage + * - 4*KiB unused + * - 4*KiB unused + * _protected: start of kernel text + */ + +/* + * Macros for accessing page table entries; change the + * C-style array-index macros into a page table byte offset + */ +#define PML4O(v) ((PTLX((v), 3))<<3) +#define PDPO(v) ((PTLX((v), 2))<<3) +#define PDO(v) ((PTLX((v), 1))<<3) +#define PTO(v) ((PTLX((v), 0))<<3) + +TEXT _warp64<>(SB), 1, $-4 + MOVL $_protected<>-(MACHSTKSZ+4*PTSZ+5*(4*KiB)+MACHSZ+KZERO)(SB), SI + + MOVL SI, DI + XORL AX, AX + MOVL $((MACHSTKSZ+4*PTSZ+5*(4*KiB)+MACHSZ)>>2), CX + + CLD + REP; STOSL /* stack, P*, vsvm, m, sys */ + + MOVL SI, AX /* sys-KZERO */ + ADDL $(MACHSTKSZ), AX /* PML4 */ + MOVL AX, CR3 /* load the mmu */ + MOVL AX, DX + ADDL $(PTSZ|PteRW|PteP), DX /* PDP at PML4 + PTSZ */ + MOVL DX, PML4O(0)(AX) /* PML4E for identity map */ + MOVL DX, PML4O(KZERO)(AX) /* PML4E for KZERO, PMAPADDR */ + + ADDL $PTSZ, AX /* PDP at PML4 + PTSZ */ + ADDL $PTSZ, DX /* PD at PML4 + 2*PTSZ */ + MOVL DX, PDPO(0)(AX) /* PDPE for identity map */ + MOVL DX, PDPO(KZERO)(AX) /* PDPE for KZERO, PMAPADDR */ + + ADDL $PTSZ, AX /* PD at PML4 + 2*PTSZ */ + MOVL $(PtePS|PteRW|PteP), DX + MOVL DX, PDO(0)(AX) /* PDE for identity 0-[24]MiB */ + MOVL DX, PDO(KZERO)(AX) /* PDE for KZERO 0-[24]MiB */ + ADDL $PGLSZ(1), DX + MOVL DX, PDO(KZERO+PGLSZ(1))(AX) /* PDE for KZERO [24]-[48]MiB */ + + MOVL AX, DX /* PD at PML4 + 2*PTSZ */ + ADDL $(PTSZ|PteRW|PteP), DX /* PT at PML4 + 3*PTSZ */ + MOVL DX, PDO(PMAPADDR)(AX) /* PDE for PMAPADDR */ + +/* + * Enable and activate Long Mode. From the manual: + * make sure Page Size Extentions are off, and Page Global + * Extensions and Physical Address Extensions are on in CR4; + * set Long Mode Enable in the Extended Feature Enable MSR; + * set Paging Enable in CR0; + * make an inter-segment jump to the Long Mode code. + * It's all in 32-bit mode until the jump is made. + */ +TEXT _lme<>(SB), 1, $-4 + MOVL CR4, AX + ANDL $~Pse, AX /* Page Size */ + ORL $(Pge|Pae), AX /* Page Global, Phys. Address */ + MOVL AX, CR4 + + MOVL $Efer, CX /* Extended Feature Enable */ + RDMSR + ORL $Lme, AX /* Long Mode Enable */ + WRMSR + + MOVL CR0, DX + ANDL $~(Cd|Nw|Ts|Mp), DX + ORL $(Pg|Wp), DX /* Paging Enable */ + MOVL DX, CR0 + + pFARJMP32(SSEL(3, SsTIGDT|SsRPL0), _identity<>-KZERO(SB)) + +/* + * Long mode. Welcome to 2003. + * Jump out of the identity map space; + * load a proper long mode GDT. + */ +MODE $64 + +TEXT _identity<>(SB), 1, $-4 + MOVQ $_start64v<>(SB), AX + JMP* AX + +TEXT _start64v<>(SB), 1, $-4 + MOVQ $_gdtptr64v<>(SB), AX + MOVL (AX), GDTR + + XORQ DX, DX + MOVW DX, DS /* not used in long mode */ + MOVW DX, ES /* not used in long mode */ + MOVW DX, FS + MOVW DX, GS + MOVW DX, SS /* not used in long mode */ + + MOVLQZX SI, SI /* sys-KZERO */ + MOVQ SI, AX + ADDQ $KZERO, AX + MOVQ AX, sys(SB) /* sys */ + + ADDQ $(MACHSTKSZ), AX /* PML4 and top of stack */ + MOVQ AX, SP /* set stack */ + +_zap0pml4: + CMPQ DX, $PML4O(KZERO) /* KZER0 & 0x0000ff8000000000 */ + JEQ _zap0pdp + MOVQ DX, PML4O(0)(AX) /* zap identity map PML4E */ +_zap0pdp: + ADDQ $PTSZ, AX /* PDP at PML4 + PTSZ */ + CMPQ DX, $PDPO(KZERO) /* KZER0 & 0x0000007fc0000000 */ + JEQ _zap0pd + MOVQ DX, PDPO(0)(AX) /* zap identity map PDPE */ +_zap0pd: + ADDQ $PTSZ, AX /* PD at PML4 + 2*PTSZ */ + CMPQ DX, $PDO(KZERO) /* KZER0 & 0x000000003fe00000 */ + JEQ _zap0done + MOVQ DX, PDO(0)(AX) /* zap identity map PDE */ +_zap0done: + + ADDQ $(MACHSTKSZ), SI /* PML4-KZERO */ + MOVQ SI, CR3 /* flush TLB */ + + ADDQ $(2*PTSZ+4*KiB), AX /* PD+PT+vsvm */ + MOVQ AX, RMACH /* Mach */ + MOVQ DX, RUSER + + PUSHQ DX /* clear flags */ + POPFQ + + MOVLQZX BX, BX /* push multiboot args */ + PUSHQ BX /* multiboot info* */ + MOVLQZX RARG, RARG + PUSHQ RARG /* multiboot magic */ + + CALL main(SB) + +TEXT ndnr(SB), 1, $-4 /* no deposit, no return */ +_dnr: + STI + HLT + JMP _dnr /* do not resuscitate */ + diff -Nru 0/sys/src/nix/k10/l64acidt.s 4/sys/src/nix/k10/l64acidt.s --- 0/sys/src/nix/k10/l64acidt.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/l64acidt.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,341 @@ +/* + * Interrupt/exception handling. + */ +#include "amd64l.h" + +MODE $64 + +TEXT _acintrp<>(SB), 1, $-4 /* no error code pushed */ + PUSHQ AX /* save AX */ + MOVQ 8(SP), AX /* idthandlers(SB) PC */ + JMP _acintrcommon + +TEXT _acintre<>(SB), 1, $-4 /* error code pushed */ + XCHGQ AX, (SP) +_acintrcommon: + MOVBQZX (AX), AX + XCHGQ AX, (SP) + + SUBQ $24, SP /* R1[45], [DEFG]S */ + CMPW 48(SP), $SSEL(SiCS, SsTIGDT|SsRPL0) /* old CS */ + JEQ _acintrnested + + MOVQ RUSER, 0(SP) + MOVQ RMACH, 8(SP) + MOVW DS, 16(SP) + MOVW ES, 18(SP) + MOVW FS, 20(SP) + MOVW GS, 22(SP) + + SWAPGS + BYTE $0x65; MOVQ 0, RMACH /* m-> (MOVQ GS:0x0, R15) */ + MOVQ 16(RMACH), RUSER /* up */ + +_acintrnested: + PUSHQ R13 + PUSHQ R12 + PUSHQ R11 + PUSHQ R10 + PUSHQ R9 + PUSHQ R8 + PUSHQ BP + PUSHQ DI + PUSHQ SI + PUSHQ DX + PUSHQ CX + PUSHQ BX + PUSHQ AX + + MOVQ SP, RARG + PUSHQ SP + CALL actrap(SB) + +TEXT _acintrr<>(SB), 1, $-4 /* so ktrace can pop frame */ + POPQ AX + + POPQ AX + POPQ BX + POPQ CX + POPQ DX + POPQ SI + POPQ DI + POPQ BP + POPQ R8 + POPQ R9 + POPQ R10 + POPQ R11 + POPQ R12 + POPQ R13 + + CMPQ 48(SP), $SSEL(SiCS, SsTIGDT|SsRPL0) + JEQ _aciretnested + + SWAPGS + MOVW 22(SP), GS + MOVW 20(SP), FS + MOVW 18(SP), ES + MOVW 16(SP), DS + MOVQ 8(SP), RMACH + MOVQ 0(SP), RUSER + +_aciretnested: + ADDQ $40, SP + IRETQ + +TEXT acidthandlers(SB), 1, $-4 + CALL _acintrp<>(SB); BYTE $IdtDE /* #DE Divide-by-Zero Error */ + CALL _acintrp<>(SB); BYTE $IdtDB /* #DB Debug */ + CALL _acintrp<>(SB); BYTE $IdtNMI /* #NMI Borked */ + CALL _acintrp<>(SB); BYTE $IdtBP /* #BP Breakpoint */ + CALL _acintrp<>(SB); BYTE $IdtOF /* #OF Overflow */ + CALL _acintrp<>(SB); BYTE $IdtBR /* #BR Bound-Range */ + CALL _acintrp<>(SB); BYTE $IdtUD /* #UD Invalid-Opcode */ + CALL _acintrp<>(SB); BYTE $IdtNM /* #NM Device-Not-Available */ + CALL _acintre<>(SB); BYTE $IdtDF /* #DF Double-Fault */ + CALL _acintrp<>(SB); BYTE $Idt09 /* reserved */ + CALL _acintre<>(SB); BYTE $IdtTS /* #TS Invalid-TSS */ + CALL _acintre<>(SB); BYTE $IdtNP /* #NP Segment-Not-Present */ + CALL _acintre<>(SB); BYTE $IdtSS /* #SS Stack */ + CALL _acintre<>(SB); BYTE $IdtGP /* #GP General-Protection */ + CALL _acintre<>(SB); BYTE $IdtPF /* #PF Page-Fault */ + CALL _acintrp<>(SB); BYTE $Idt0F /* reserved */ + CALL _acintrp<>(SB); BYTE $IdtMF /* #MF x87 FPE-Pending */ + CALL _acintre<>(SB); BYTE $IdtAC /* #AC Alignment-Check */ + CALL _acintrp<>(SB); BYTE $IdtMC /* #MC Machine-Check */ + CALL _acintrp<>(SB); BYTE $IdtXF /* #XF SIMD Floating-Point */ + CALL _acintrp<>(SB); BYTE $0x14 /* reserved */ + CALL _acintrp<>(SB); BYTE $0x15 /* reserved */ + CALL _acintrp<>(SB); BYTE $0x16 /* reserved */ + CALL _acintrp<>(SB); BYTE $0x17 /* reserved */ + CALL _acintrp<>(SB); BYTE $0x18 /* reserved */ + CALL _acintrp<>(SB); BYTE $0x19 /* reserved */ + CALL _acintrp<>(SB); BYTE $0x1a /* reserved */ + CALL _acintrp<>(SB); BYTE $0x1b /* reserved */ + CALL _acintrp<>(SB); BYTE $0x1c /* reserved */ + CALL _acintrp<>(SB); BYTE $0x1d /* reserved */ + CALL _acintrp<>(SB); BYTE $0x1e /* reserved */ + CALL _acintrp<>(SB); BYTE $0x1f /* reserved */ + CALL _acintrp<>(SB); BYTE $0x20 + CALL _acintrp<>(SB); BYTE $0x21 + CALL _acintrp<>(SB); BYTE $0x22 + CALL _acintrp<>(SB); BYTE $0x23 + CALL _acintrp<>(SB); BYTE $0x24 + CALL _acintrp<>(SB); BYTE $0x25 + CALL _acintrp<>(SB); BYTE $0x26 + CALL _acintrp<>(SB); BYTE $0x27 + CALL _acintrp<>(SB); BYTE $0x28 + CALL _acintrp<>(SB); BYTE $0x29 + CALL _acintrp<>(SB); BYTE $0x2a + CALL _acintrp<>(SB); BYTE $0x2b + CALL _acintrp<>(SB); BYTE $0x2c + CALL _acintrp<>(SB); BYTE $0x2d + CALL _acintrp<>(SB); BYTE $0x2e + CALL _acintrp<>(SB); BYTE $0x2f + CALL _acintrp<>(SB); BYTE $0x30 + CALL _acintrp<>(SB); BYTE $0x31 + CALL _acintrp<>(SB); BYTE $0x32 + CALL _acintrp<>(SB); BYTE $0x33 + CALL _acintrp<>(SB); BYTE $0x34 + CALL _acintrp<>(SB); BYTE $0x35 + CALL _acintrp<>(SB); BYTE $0x36 + CALL _acintrp<>(SB); BYTE $0x37 + CALL _acintrp<>(SB); BYTE $0x38 + CALL _acintrp<>(SB); BYTE $0x39 + CALL _acintrp<>(SB); BYTE $0x3a + CALL _acintrp<>(SB); BYTE $0x3b + CALL _acintrp<>(SB); BYTE $0x3c + CALL _acintrp<>(SB); BYTE $0x3d + CALL _acintrp<>(SB); BYTE $0x3e + CALL _acintrp<>(SB); BYTE $0x3f + CALL _acintrp<>(SB); BYTE $0x40 + CALL _acintrp<>(SB); BYTE $0x41 + CALL _acintrp<>(SB); BYTE $0x42 + CALL _acintrp<>(SB); BYTE $0x43 + CALL _acintrp<>(SB); BYTE $0x44 + CALL _acintrp<>(SB); BYTE $0x45 + CALL _acintrp<>(SB); BYTE $0x46 + CALL _acintrp<>(SB); BYTE $0x47 + CALL _acintrp<>(SB); BYTE $0x48 + CALL _acintrp<>(SB); BYTE $0x49 + CALL _acintrp<>(SB); BYTE $0x4a + CALL _acintrp<>(SB); BYTE $0x4b + CALL _acintrp<>(SB); BYTE $0x4c + CALL _acintrp<>(SB); BYTE $0x4d + CALL _acintrp<>(SB); BYTE $0x4e + CALL _acintrp<>(SB); BYTE $0x4f + CALL _acintrp<>(SB); BYTE $0x50 + CALL _acintrp<>(SB); BYTE $0x51 + CALL _acintrp<>(SB); BYTE $0x52 + CALL _acintrp<>(SB); BYTE $0x53 + CALL _acintrp<>(SB); BYTE $0x54 + CALL _acintrp<>(SB); BYTE $0x55 + CALL _acintrp<>(SB); BYTE $0x56 + CALL _acintrp<>(SB); BYTE $0x57 + CALL _acintrp<>(SB); BYTE $0x58 + CALL _acintrp<>(SB); BYTE $0x59 + CALL _acintrp<>(SB); BYTE $0x5a + CALL _acintrp<>(SB); BYTE $0x5b + CALL _acintrp<>(SB); BYTE $0x5c + CALL _acintrp<>(SB); BYTE $0x5d + CALL _acintrp<>(SB); BYTE $0x5e + CALL _acintrp<>(SB); BYTE $0x5f + CALL _acintrp<>(SB); BYTE $0x60 + CALL _acintrp<>(SB); BYTE $0x61 + CALL _acintrp<>(SB); BYTE $0x62 + CALL _acintrp<>(SB); BYTE $0x63 + CALL _acintrp<>(SB); BYTE $0x64 + CALL _acintrp<>(SB); BYTE $0x65 + CALL _acintrp<>(SB); BYTE $0x66 + CALL _acintrp<>(SB); BYTE $0x67 + CALL _acintrp<>(SB); BYTE $0x68 + CALL _acintrp<>(SB); BYTE $0x69 + CALL _acintrp<>(SB); BYTE $0x6a + CALL _acintrp<>(SB); BYTE $0x6b + CALL _acintrp<>(SB); BYTE $0x6c + CALL _acintrp<>(SB); BYTE $0x6d + CALL _acintrp<>(SB); BYTE $0x6e + CALL _acintrp<>(SB); BYTE $0x6f + CALL _acintrp<>(SB); BYTE $0x70 + CALL _acintrp<>(SB); BYTE $0x71 + CALL _acintrp<>(SB); BYTE $0x72 + CALL _acintrp<>(SB); BYTE $0x73 + CALL _acintrp<>(SB); BYTE $0x74 + CALL _acintrp<>(SB); BYTE $0x75 + CALL _acintrp<>(SB); BYTE $0x76 + CALL _acintrp<>(SB); BYTE $0x77 + CALL _acintrp<>(SB); BYTE $0x78 + CALL _acintrp<>(SB); BYTE $0x79 + CALL _acintrp<>(SB); BYTE $0x7a + CALL _acintrp<>(SB); BYTE $0x7b + CALL _acintrp<>(SB); BYTE $0x7c + CALL _acintrp<>(SB); BYTE $0x7d + CALL _acintrp<>(SB); BYTE $0x7e + CALL _acintrp<>(SB); BYTE $0x7f + CALL _acintrp<>(SB); BYTE $0x80 + CALL _acintrp<>(SB); BYTE $0x81 + CALL _acintrp<>(SB); BYTE $0x82 + CALL _acintrp<>(SB); BYTE $0x83 + CALL _acintrp<>(SB); BYTE $0x84 + CALL _acintrp<>(SB); BYTE $0x85 + CALL _acintrp<>(SB); BYTE $0x86 + CALL _acintrp<>(SB); BYTE $0x87 + CALL _acintrp<>(SB); BYTE $0x88 + CALL _acintrp<>(SB); BYTE $0x89 + CALL _acintrp<>(SB); BYTE $0x8a + CALL _acintrp<>(SB); BYTE $0x8b + CALL _acintrp<>(SB); BYTE $0x8c + CALL _acintrp<>(SB); BYTE $0x8d + CALL _acintrp<>(SB); BYTE $0x8e + CALL _acintrp<>(SB); BYTE $0x8f + CALL _acintrp<>(SB); BYTE $0x90 + CALL _acintrp<>(SB); BYTE $0x91 + CALL _acintrp<>(SB); BYTE $0x92 + CALL _acintrp<>(SB); BYTE $0x93 + CALL _acintrp<>(SB); BYTE $0x94 + CALL _acintrp<>(SB); BYTE $0x95 + CALL _acintrp<>(SB); BYTE $0x96 + CALL _acintrp<>(SB); BYTE $0x97 + CALL _acintrp<>(SB); BYTE $0x98 + CALL _acintrp<>(SB); BYTE $0x99 + CALL _acintrp<>(SB); BYTE $0x9a + CALL _acintrp<>(SB); BYTE $0x9b + CALL _acintrp<>(SB); BYTE $0x9c + CALL _acintrp<>(SB); BYTE $0x9d + CALL _acintrp<>(SB); BYTE $0x9e + CALL _acintrp<>(SB); BYTE $0x9f + CALL _acintrp<>(SB); BYTE $0xa0 + CALL _acintrp<>(SB); BYTE $0xa1 + CALL _acintrp<>(SB); BYTE $0xa2 + CALL _acintrp<>(SB); BYTE $0xa3 + CALL _acintrp<>(SB); BYTE $0xa4 + CALL _acintrp<>(SB); BYTE $0xa5 + CALL _acintrp<>(SB); BYTE $0xa6 + CALL _acintrp<>(SB); BYTE $0xa7 + CALL _acintrp<>(SB); BYTE $0xa8 + CALL _acintrp<>(SB); BYTE $0xa9 + CALL _acintrp<>(SB); BYTE $0xaa + CALL _acintrp<>(SB); BYTE $0xab + CALL _acintrp<>(SB); BYTE $0xac + CALL _acintrp<>(SB); BYTE $0xad + CALL _acintrp<>(SB); BYTE $0xae + CALL _acintrp<>(SB); BYTE $0xaf + CALL _acintrp<>(SB); BYTE $0xb0 + CALL _acintrp<>(SB); BYTE $0xb1 + CALL _acintrp<>(SB); BYTE $0xb2 + CALL _acintrp<>(SB); BYTE $0xb3 + CALL _acintrp<>(SB); BYTE $0xb4 + CALL _acintrp<>(SB); BYTE $0xb5 + CALL _acintrp<>(SB); BYTE $0xb6 + CALL _acintrp<>(SB); BYTE $0xb7 + CALL _acintrp<>(SB); BYTE $0xb8 + CALL _acintrp<>(SB); BYTE $0xb9 + CALL _acintrp<>(SB); BYTE $0xba + CALL _acintrp<>(SB); BYTE $0xbb + CALL _acintrp<>(SB); BYTE $0xbc + CALL _acintrp<>(SB); BYTE $0xbd + CALL _acintrp<>(SB); BYTE $0xbe + CALL _acintrp<>(SB); BYTE $0xbf + CALL _acintrp<>(SB); BYTE $0xc0 + CALL _acintrp<>(SB); BYTE $0xc1 + CALL _acintrp<>(SB); BYTE $0xc2 + CALL _acintrp<>(SB); BYTE $0xc3 + CALL _acintrp<>(SB); BYTE $0xc4 + CALL _acintrp<>(SB); BYTE $0xc5 + CALL _acintrp<>(SB); BYTE $0xc6 + CALL _acintrp<>(SB); BYTE $0xc7 + CALL _acintrp<>(SB); BYTE $0xc8 + CALL _acintrp<>(SB); BYTE $0xc9 + CALL _acintrp<>(SB); BYTE $0xca + CALL _acintrp<>(SB); BYTE $0xcb + CALL _acintrp<>(SB); BYTE $0xcc + CALL _acintrp<>(SB); BYTE $0xce + CALL _acintrp<>(SB); BYTE $0xce + CALL _acintrp<>(SB); BYTE $0xcf + CALL _acintrp<>(SB); BYTE $0xd0 + CALL _acintrp<>(SB); BYTE $0xd1 + CALL _acintrp<>(SB); BYTE $0xd2 + CALL _acintrp<>(SB); BYTE $0xd3 + CALL _acintrp<>(SB); BYTE $0xd4 + CALL _acintrp<>(SB); BYTE $0xd5 + CALL _acintrp<>(SB); BYTE $0xd6 + CALL _acintrp<>(SB); BYTE $0xd7 + CALL _acintrp<>(SB); BYTE $0xd8 + CALL _acintrp<>(SB); BYTE $0xd9 + CALL _acintrp<>(SB); BYTE $0xda + CALL _acintrp<>(SB); BYTE $0xdb + CALL _acintrp<>(SB); BYTE $0xdc + CALL _acintrp<>(SB); BYTE $0xdd + CALL _acintrp<>(SB); BYTE $0xde + CALL _acintrp<>(SB); BYTE $0xdf + CALL _acintrp<>(SB); BYTE $0xe0 + CALL _acintrp<>(SB); BYTE $0xe1 + CALL _acintrp<>(SB); BYTE $0xe2 + CALL _acintrp<>(SB); BYTE $0xe3 + CALL _acintrp<>(SB); BYTE $0xe4 + CALL _acintrp<>(SB); BYTE $0xe5 + CALL _acintrp<>(SB); BYTE $0xe6 + CALL _acintrp<>(SB); BYTE $0xe7 + CALL _acintrp<>(SB); BYTE $0xe8 + CALL _acintrp<>(SB); BYTE $0xe9 + CALL _acintrp<>(SB); BYTE $0xea + CALL _acintrp<>(SB); BYTE $0xeb + CALL _acintrp<>(SB); BYTE $0xec + CALL _acintrp<>(SB); BYTE $0xed + CALL _acintrp<>(SB); BYTE $0xee + CALL _acintrp<>(SB); BYTE $0xef + CALL _acintrp<>(SB); BYTE $0xf0 + CALL _acintrp<>(SB); BYTE $0xf1 + CALL _acintrp<>(SB); BYTE $0xf2 + CALL _acintrp<>(SB); BYTE $0xf3 + CALL _acintrp<>(SB); BYTE $0xf4 + CALL _acintrp<>(SB); BYTE $0xf5 + CALL _acintrp<>(SB); BYTE $0xf6 + CALL _acintrp<>(SB); BYTE $0xf7 + CALL _acintrp<>(SB); BYTE $0xf8 + CALL _acintrp<>(SB); BYTE $0xf9 + CALL _acintrp<>(SB); BYTE $0xfa + CALL _acintrp<>(SB); BYTE $0xfb + CALL _acintrp<>(SB); BYTE $0xfc + CALL _acintrp<>(SB); BYTE $0xfd + CALL _acintrp<>(SB); BYTE $0xfe + CALL _acintrp<>(SB); BYTE $0xff diff -Nru 0/sys/src/nix/k10/l64acsyscall.s 4/sys/src/nix/k10/l64acsyscall.s --- 0/sys/src/nix/k10/l64acsyscall.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/l64acsyscall.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,80 @@ +#include "mem.h" +#include "amd64l.h" + +MODE $64 + +/* + */ +TEXT acsyscallentry(SB), 1, $-4 + SWAPGS + BYTE $0x65; MOVQ 0, RMACH /* m-> (MOVQ GS:0x0, R15) */ + MOVQ 16(RMACH), RUSER /* m->proc */ + MOVQ 24(RUSER), R12 /* m->proc->dbgregs */ + + /* save sp to r13; set up kstack so we can call acsyscall */ + MOVQ SP, R13 + MOVQ 24(RMACH), SP /* m->stack */ + ADDQ $MACHSTKSZ, SP + + MOVQ $SSEL(SiUDS, SsRPL3), BX /* old stack segment */ + MOVQ BX, 176(R12) /* save ss */ + MOVQ R13, 168(R12) /* old sp */ + MOVQ R11, 160(R12) /* old flags */ + MOVQ $SSEL(SiUCS, SsRPL3), BX /* old code segment */ + MOVQ BX, 152(R12) /* save cs */ + MOVQ CX, 144(R12) /* old ip */ + + MOVW $SSEL(SiUDS, SsRPL3), 120(R12) + MOVW ES, 122(R12) + MOVW FS, 124(R12) + MOVW GS, 126(R12) + + MOVQ RARG, 0(R12) /* system call number: up->dbgregs->ax */ + CALL acsyscall(SB) +NDNR: JMP NDNR + +TEXT _acsysret(SB), 1, $-4 + CLI + SWAPGS + + MOVQ 24(RUSER), R12 /* m->proc->dbgregs */ + MOVQ 0(R12), AX /* m->proc->dbgregs->ax */ + MOVQ (6*8)(R12), BP /* m->proc->dbgregs->bp */ + ADDQ $(15*8), R12 /* after ax--r15, 8 bytes each */ + + MOVW 0(R12), DS + MOVW 2(R12), ES + MOVW 4(R12), FS + MOVW 6(R12), GS + + MOVQ 24(R12), CX /* ip */ + MOVQ 40(R12), R11 /* flags */ + + MOVQ 48(R12), SP /* sp */ + + BYTE $0x48; SYSRET /* SYSRETQ */ + +/* + * Return from an exec() system call that we never did, + * DX is ar0->p by the time we call it. See syscall() + */ +TEXT xactouser(SB), 1, $-4 + CLI + BYTE $0x65; MOVQ 0, RMACH /* m-> (MOVQ GS:0x0, R15) */ + MOVQ 16(RMACH), RUSER /* m->proc */ + MOVQ 24(RUSER), R12 /* m->proc->dbgregs */ + MOVQ 144(R12), CX /* old ip */ + MOVQ 0(R12), BX /* save AX */ + SWAPGS + MOVQ $SSEL(SiUDS, SsRPL3), AX + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + + MOVQ BX, AX /* restore AX */ + MOVQ $If, R11 /* flags */ + + MOVQ RARG, SP /* sp */ + + BYTE $0x48; SYSRET /* SYSRETQ */ diff -Nru 0/sys/src/nix/k10/l64cpuid.s 4/sys/src/nix/k10/l64cpuid.s --- 0/sys/src/nix/k10/l64cpuid.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/l64cpuid.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,26 @@ +/* + * The CPUID instruction is always supported on the amd64. + */ +TEXT cpuid(SB), $-4 + MOVL RARG, AX /* function in AX */ + MOVLQZX cx+8(FP), CX /* iterator/index/etc. */ + + CPUID + + MOVQ info+16(FP), BP + MOVL AX, 0(BP) + MOVL BX, 4(BP) + MOVL CX, 8(BP) + MOVL DX, 12(BP) + RET + +/* + * Basic timing loop to determine CPU frequency. + * The AAM instruction is not available in 64-bit mode. + */ +TEXT aamloop(SB), 1, $-4 + MOVLQZX RARG, CX +aaml1: + XORQ AX, AX /* close enough */ + LOOP aaml1 + RET diff -Nru 0/sys/src/nix/k10/l64fpu.s 4/sys/src/nix/k10/l64fpu.s --- 0/sys/src/nix/k10/l64fpu.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/l64fpu.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,46 @@ +/* + * SIMD Floating Point. + * Note: for x87 instructions which have both a 'wait' and 'nowait' version, + * 8a only knows the 'wait' mnemonic but does NOT insertthe WAIT prefix byte + * (i.e. they act like their FNxxx variations) so WAIT instructions must be + * explicitly placed in the code if necessary. + */ +TEXT _clts(SB), 1, $-4 + CLTS + RET + +TEXT _fldcw(SB), 1, $-4 /* Load x87 FPU Control Word */ + MOVQ RARG, cw+0(FP) + FLDCW cw+0(FP) + RET + +TEXT _fnclex(SB), 1, $-4 + FCLEX + RET + +TEXT _fninit(SB), 1, $-4 + FINIT /* no WAIT */ + RET + +TEXT _fxrstor(SB), 1, $-4 + FXRSTOR64 (RARG) + RET + +TEXT _fxsave(SB), 1, $-4 + FXSAVE64 (RARG) + RET + +TEXT _fwait(SB), 1, $-4 + WAIT + RET + +TEXT _ldmxcsr(SB), 1, $-4 /* Load MXCSR */ + MOVQ RARG, mxcsr+0(FP) + LDMXCSR mxcsr+0(FP) + RET + +TEXT _stts(SB), 1, $-4 + MOVQ CR0, AX + ORQ $8, AX /* Ts */ + MOVQ AX, CR0 + RET diff -Nru 0/sys/src/nix/k10/l64idt.s 4/sys/src/nix/k10/l64idt.s --- 0/sys/src/nix/k10/l64idt.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/l64idt.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,341 @@ +/* + * Interrupt/exception handling. + */ +#include "amd64l.h" + +MODE $64 + +TEXT _intrp<>(SB), 1, $-4 /* no error code pushed */ + PUSHQ AX /* save AX */ + MOVQ 8(SP), AX /* idthandlers(SB) PC */ + JMP _intrcommon + +TEXT _intre<>(SB), 1, $-4 /* error code pushed */ + XCHGQ AX, (SP) +_intrcommon: + MOVBQZX (AX), AX + XCHGQ AX, (SP) + + SUBQ $24, SP /* R1[45], [DEFG]S */ + CMPW 48(SP), $SSEL(SiCS, SsTIGDT|SsRPL0) /* old CS */ + JEQ _intrnested + + MOVQ RUSER, 0(SP) + MOVQ RMACH, 8(SP) + MOVW DS, 16(SP) + MOVW ES, 18(SP) + MOVW FS, 20(SP) + MOVW GS, 22(SP) + + SWAPGS + BYTE $0x65; MOVQ 0, RMACH /* m-> (MOVQ GS:0x0, R15) */ + MOVQ 16(RMACH), RUSER /* up */ + +_intrnested: + PUSHQ R13 + PUSHQ R12 + PUSHQ R11 + PUSHQ R10 + PUSHQ R9 + PUSHQ R8 + PUSHQ BP + PUSHQ DI + PUSHQ SI + PUSHQ DX + PUSHQ CX + PUSHQ BX + PUSHQ AX + + MOVQ SP, RARG + PUSHQ SP + CALL _trap(SB) + +TEXT _intrr<>(SB), 1, $-4 /* so ktrace can pop frame */ + POPQ AX + + POPQ AX + POPQ BX + POPQ CX + POPQ DX + POPQ SI + POPQ DI + POPQ BP + POPQ R8 + POPQ R9 + POPQ R10 + POPQ R11 + POPQ R12 + POPQ R13 + + CMPQ 48(SP), $SSEL(SiCS, SsTIGDT|SsRPL0) + JEQ _iretnested + + SWAPGS + MOVW 22(SP), GS + MOVW 20(SP), FS + MOVW 18(SP), ES + MOVW 16(SP), DS + MOVQ 8(SP), RMACH + MOVQ 0(SP), RUSER + +_iretnested: + ADDQ $40, SP + IRETQ + +TEXT idthandlers(SB), 1, $-4 + CALL _intrp<>(SB); BYTE $IdtDE /* #DE Divide-by-Zero Error */ + CALL _intrp<>(SB); BYTE $IdtDB /* #DB Debug */ + CALL _intrp<>(SB); BYTE $IdtNMI /* #NMI Borked */ + CALL _intrp<>(SB); BYTE $IdtBP /* #BP Breakpoint */ + CALL _intrp<>(SB); BYTE $IdtOF /* #OF Overflow */ + CALL _intrp<>(SB); BYTE $IdtBR /* #BR Bound-Range */ + CALL _intrp<>(SB); BYTE $IdtUD /* #UD Invalid-Opcode */ + CALL _intrp<>(SB); BYTE $IdtNM /* #NM Device-Not-Available */ + CALL _intre<>(SB); BYTE $IdtDF /* #DF Double-Fault */ + CALL _intrp<>(SB); BYTE $Idt09 /* reserved */ + CALL _intre<>(SB); BYTE $IdtTS /* #TS Invalid-TSS */ + CALL _intre<>(SB); BYTE $IdtNP /* #NP Segment-Not-Present */ + CALL _intre<>(SB); BYTE $IdtSS /* #SS Stack */ + CALL _intre<>(SB); BYTE $IdtGP /* #GP General-Protection */ + CALL _intre<>(SB); BYTE $IdtPF /* #PF Page-Fault */ + CALL _intrp<>(SB); BYTE $Idt0F /* reserved */ + CALL _intrp<>(SB); BYTE $IdtMF /* #MF x87 FPE-Pending */ + CALL _intre<>(SB); BYTE $IdtAC /* #AC Alignment-Check */ + CALL _intrp<>(SB); BYTE $IdtMC /* #MC Machine-Check */ + CALL _intrp<>(SB); BYTE $IdtXF /* #XF SIMD Floating-Point */ + CALL _intrp<>(SB); BYTE $0x14 /* reserved */ + CALL _intrp<>(SB); BYTE $0x15 /* reserved */ + CALL _intrp<>(SB); BYTE $0x16 /* reserved */ + CALL _intrp<>(SB); BYTE $0x17 /* reserved */ + CALL _intrp<>(SB); BYTE $0x18 /* reserved */ + CALL _intrp<>(SB); BYTE $0x19 /* reserved */ + CALL _intrp<>(SB); BYTE $0x1a /* reserved */ + CALL _intrp<>(SB); BYTE $0x1b /* reserved */ + CALL _intrp<>(SB); BYTE $0x1c /* reserved */ + CALL _intrp<>(SB); BYTE $0x1d /* reserved */ + CALL _intrp<>(SB); BYTE $0x1e /* reserved */ + CALL _intrp<>(SB); BYTE $0x1f /* reserved */ + CALL _intrp<>(SB); BYTE $0x20 + CALL _intrp<>(SB); BYTE $0x21 + CALL _intrp<>(SB); BYTE $0x22 + CALL _intrp<>(SB); BYTE $0x23 + CALL _intrp<>(SB); BYTE $0x24 + CALL _intrp<>(SB); BYTE $0x25 + CALL _intrp<>(SB); BYTE $0x26 + CALL _intrp<>(SB); BYTE $0x27 + CALL _intrp<>(SB); BYTE $0x28 + CALL _intrp<>(SB); BYTE $0x29 + CALL _intrp<>(SB); BYTE $0x2a + CALL _intrp<>(SB); BYTE $0x2b + CALL _intrp<>(SB); BYTE $0x2c + CALL _intrp<>(SB); BYTE $0x2d + CALL _intrp<>(SB); BYTE $0x2e + CALL _intrp<>(SB); BYTE $0x2f + CALL _intrp<>(SB); BYTE $0x30 + CALL _intrp<>(SB); BYTE $0x31 + CALL _intrp<>(SB); BYTE $0x32 + CALL _intrp<>(SB); BYTE $0x33 + CALL _intrp<>(SB); BYTE $0x34 + CALL _intrp<>(SB); BYTE $0x35 + CALL _intrp<>(SB); BYTE $0x36 + CALL _intrp<>(SB); BYTE $0x37 + CALL _intrp<>(SB); BYTE $0x38 + CALL _intrp<>(SB); BYTE $0x39 + CALL _intrp<>(SB); BYTE $0x3a + CALL _intrp<>(SB); BYTE $0x3b + CALL _intrp<>(SB); BYTE $0x3c + CALL _intrp<>(SB); BYTE $0x3d + CALL _intrp<>(SB); BYTE $0x3e + CALL _intrp<>(SB); BYTE $0x3f + CALL _intrp<>(SB); BYTE $0x40 + CALL _intrp<>(SB); BYTE $0x41 + CALL _intrp<>(SB); BYTE $0x42 + CALL _intrp<>(SB); BYTE $0x43 + CALL _intrp<>(SB); BYTE $0x44 + CALL _intrp<>(SB); BYTE $0x45 + CALL _intrp<>(SB); BYTE $0x46 + CALL _intrp<>(SB); BYTE $0x47 + CALL _intrp<>(SB); BYTE $0x48 + CALL _intrp<>(SB); BYTE $0x49 + CALL _intrp<>(SB); BYTE $0x4a + CALL _intrp<>(SB); BYTE $0x4b + CALL _intrp<>(SB); BYTE $0x4c + CALL _intrp<>(SB); BYTE $0x4d + CALL _intrp<>(SB); BYTE $0x4e + CALL _intrp<>(SB); BYTE $0x4f + CALL _intrp<>(SB); BYTE $0x50 + CALL _intrp<>(SB); BYTE $0x51 + CALL _intrp<>(SB); BYTE $0x52 + CALL _intrp<>(SB); BYTE $0x53 + CALL _intrp<>(SB); BYTE $0x54 + CALL _intrp<>(SB); BYTE $0x55 + CALL _intrp<>(SB); BYTE $0x56 + CALL _intrp<>(SB); BYTE $0x57 + CALL _intrp<>(SB); BYTE $0x58 + CALL _intrp<>(SB); BYTE $0x59 + CALL _intrp<>(SB); BYTE $0x5a + CALL _intrp<>(SB); BYTE $0x5b + CALL _intrp<>(SB); BYTE $0x5c + CALL _intrp<>(SB); BYTE $0x5d + CALL _intrp<>(SB); BYTE $0x5e + CALL _intrp<>(SB); BYTE $0x5f + CALL _intrp<>(SB); BYTE $0x60 + CALL _intrp<>(SB); BYTE $0x61 + CALL _intrp<>(SB); BYTE $0x62 + CALL _intrp<>(SB); BYTE $0x63 + CALL _intrp<>(SB); BYTE $0x64 + CALL _intrp<>(SB); BYTE $0x65 + CALL _intrp<>(SB); BYTE $0x66 + CALL _intrp<>(SB); BYTE $0x67 + CALL _intrp<>(SB); BYTE $0x68 + CALL _intrp<>(SB); BYTE $0x69 + CALL _intrp<>(SB); BYTE $0x6a + CALL _intrp<>(SB); BYTE $0x6b + CALL _intrp<>(SB); BYTE $0x6c + CALL _intrp<>(SB); BYTE $0x6d + CALL _intrp<>(SB); BYTE $0x6e + CALL _intrp<>(SB); BYTE $0x6f + CALL _intrp<>(SB); BYTE $0x70 + CALL _intrp<>(SB); BYTE $0x71 + CALL _intrp<>(SB); BYTE $0x72 + CALL _intrp<>(SB); BYTE $0x73 + CALL _intrp<>(SB); BYTE $0x74 + CALL _intrp<>(SB); BYTE $0x75 + CALL _intrp<>(SB); BYTE $0x76 + CALL _intrp<>(SB); BYTE $0x77 + CALL _intrp<>(SB); BYTE $0x78 + CALL _intrp<>(SB); BYTE $0x79 + CALL _intrp<>(SB); BYTE $0x7a + CALL _intrp<>(SB); BYTE $0x7b + CALL _intrp<>(SB); BYTE $0x7c + CALL _intrp<>(SB); BYTE $0x7d + CALL _intrp<>(SB); BYTE $0x7e + CALL _intrp<>(SB); BYTE $0x7f + CALL _intrp<>(SB); BYTE $0x80 + CALL _intrp<>(SB); BYTE $0x81 + CALL _intrp<>(SB); BYTE $0x82 + CALL _intrp<>(SB); BYTE $0x83 + CALL _intrp<>(SB); BYTE $0x84 + CALL _intrp<>(SB); BYTE $0x85 + CALL _intrp<>(SB); BYTE $0x86 + CALL _intrp<>(SB); BYTE $0x87 + CALL _intrp<>(SB); BYTE $0x88 + CALL _intrp<>(SB); BYTE $0x89 + CALL _intrp<>(SB); BYTE $0x8a + CALL _intrp<>(SB); BYTE $0x8b + CALL _intrp<>(SB); BYTE $0x8c + CALL _intrp<>(SB); BYTE $0x8d + CALL _intrp<>(SB); BYTE $0x8e + CALL _intrp<>(SB); BYTE $0x8f + CALL _intrp<>(SB); BYTE $0x90 + CALL _intrp<>(SB); BYTE $0x91 + CALL _intrp<>(SB); BYTE $0x92 + CALL _intrp<>(SB); BYTE $0x93 + CALL _intrp<>(SB); BYTE $0x94 + CALL _intrp<>(SB); BYTE $0x95 + CALL _intrp<>(SB); BYTE $0x96 + CALL _intrp<>(SB); BYTE $0x97 + CALL _intrp<>(SB); BYTE $0x98 + CALL _intrp<>(SB); BYTE $0x99 + CALL _intrp<>(SB); BYTE $0x9a + CALL _intrp<>(SB); BYTE $0x9b + CALL _intrp<>(SB); BYTE $0x9c + CALL _intrp<>(SB); BYTE $0x9d + CALL _intrp<>(SB); BYTE $0x9e + CALL _intrp<>(SB); BYTE $0x9f + CALL _intrp<>(SB); BYTE $0xa0 + CALL _intrp<>(SB); BYTE $0xa1 + CALL _intrp<>(SB); BYTE $0xa2 + CALL _intrp<>(SB); BYTE $0xa3 + CALL _intrp<>(SB); BYTE $0xa4 + CALL _intrp<>(SB); BYTE $0xa5 + CALL _intrp<>(SB); BYTE $0xa6 + CALL _intrp<>(SB); BYTE $0xa7 + CALL _intrp<>(SB); BYTE $0xa8 + CALL _intrp<>(SB); BYTE $0xa9 + CALL _intrp<>(SB); BYTE $0xaa + CALL _intrp<>(SB); BYTE $0xab + CALL _intrp<>(SB); BYTE $0xac + CALL _intrp<>(SB); BYTE $0xad + CALL _intrp<>(SB); BYTE $0xae + CALL _intrp<>(SB); BYTE $0xaf + CALL _intrp<>(SB); BYTE $0xb0 + CALL _intrp<>(SB); BYTE $0xb1 + CALL _intrp<>(SB); BYTE $0xb2 + CALL _intrp<>(SB); BYTE $0xb3 + CALL _intrp<>(SB); BYTE $0xb4 + CALL _intrp<>(SB); BYTE $0xb5 + CALL _intrp<>(SB); BYTE $0xb6 + CALL _intrp<>(SB); BYTE $0xb7 + CALL _intrp<>(SB); BYTE $0xb8 + CALL _intrp<>(SB); BYTE $0xb9 + CALL _intrp<>(SB); BYTE $0xba + CALL _intrp<>(SB); BYTE $0xbb + CALL _intrp<>(SB); BYTE $0xbc + CALL _intrp<>(SB); BYTE $0xbd + CALL _intrp<>(SB); BYTE $0xbe + CALL _intrp<>(SB); BYTE $0xbf + CALL _intrp<>(SB); BYTE $0xc0 + CALL _intrp<>(SB); BYTE $0xc1 + CALL _intrp<>(SB); BYTE $0xc2 + CALL _intrp<>(SB); BYTE $0xc3 + CALL _intrp<>(SB); BYTE $0xc4 + CALL _intrp<>(SB); BYTE $0xc5 + CALL _intrp<>(SB); BYTE $0xc6 + CALL _intrp<>(SB); BYTE $0xc7 + CALL _intrp<>(SB); BYTE $0xc8 + CALL _intrp<>(SB); BYTE $0xc9 + CALL _intrp<>(SB); BYTE $0xca + CALL _intrp<>(SB); BYTE $0xcb + CALL _intrp<>(SB); BYTE $0xcc + CALL _intrp<>(SB); BYTE $0xce + CALL _intrp<>(SB); BYTE $0xce + CALL _intrp<>(SB); BYTE $0xcf + CALL _intrp<>(SB); BYTE $0xd0 + CALL _intrp<>(SB); BYTE $0xd1 + CALL _intrp<>(SB); BYTE $0xd2 + CALL _intrp<>(SB); BYTE $0xd3 + CALL _intrp<>(SB); BYTE $0xd4 + CALL _intrp<>(SB); BYTE $0xd5 + CALL _intrp<>(SB); BYTE $0xd6 + CALL _intrp<>(SB); BYTE $0xd7 + CALL _intrp<>(SB); BYTE $0xd8 + CALL _intrp<>(SB); BYTE $0xd9 + CALL _intrp<>(SB); BYTE $0xda + CALL _intrp<>(SB); BYTE $0xdb + CALL _intrp<>(SB); BYTE $0xdc + CALL _intrp<>(SB); BYTE $0xdd + CALL _intrp<>(SB); BYTE $0xde + CALL _intrp<>(SB); BYTE $0xdf + CALL _intrp<>(SB); BYTE $0xe0 + CALL _intrp<>(SB); BYTE $0xe1 + CALL _intrp<>(SB); BYTE $0xe2 + CALL _intrp<>(SB); BYTE $0xe3 + CALL _intrp<>(SB); BYTE $0xe4 + CALL _intrp<>(SB); BYTE $0xe5 + CALL _intrp<>(SB); BYTE $0xe6 + CALL _intrp<>(SB); BYTE $0xe7 + CALL _intrp<>(SB); BYTE $0xe8 + CALL _intrp<>(SB); BYTE $0xe9 + CALL _intrp<>(SB); BYTE $0xea + CALL _intrp<>(SB); BYTE $0xeb + CALL _intrp<>(SB); BYTE $0xec + CALL _intrp<>(SB); BYTE $0xed + CALL _intrp<>(SB); BYTE $0xee + CALL _intrp<>(SB); BYTE $0xef + CALL _intrp<>(SB); BYTE $0xf0 + CALL _intrp<>(SB); BYTE $0xf1 + CALL _intrp<>(SB); BYTE $0xf2 + CALL _intrp<>(SB); BYTE $0xf3 + CALL _intrp<>(SB); BYTE $0xf4 + CALL _intrp<>(SB); BYTE $0xf5 + CALL _intrp<>(SB); BYTE $0xf6 + CALL _intrp<>(SB); BYTE $0xf7 + CALL _intrp<>(SB); BYTE $0xf8 + CALL _intrp<>(SB); BYTE $0xf9 + CALL _intrp<>(SB); BYTE $0xfa + CALL _intrp<>(SB); BYTE $0xfb + CALL _intrp<>(SB); BYTE $0xfc + CALL _intrp<>(SB); BYTE $0xfd + CALL _intrp<>(SB); BYTE $0xfe + CALL _intrp<>(SB); BYTE $0xff diff -Nru 0/sys/src/nix/k10/l64sipi.s 4/sys/src/nix/k10/l64sipi.s --- 0/sys/src/nix/k10/l64sipi.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/l64sipi.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,255 @@ +/* + * Start-up request IPI handler. + * + * This code is executed on an application processor in response to receiving + * a Start-up IPI (SIPI) from another processor. + * This must be placed on a 4KiB boundary + * somewhere in the 1st MiB of conventional memory. However, + * due to some shortcuts below it's restricted further to within the 1st 64KiB. + * The AP starts in real-mode, with + * CS selector set to the startup memory address/16; + * CS base set to startup memory address; + * CS limit set to 64KiB; + * CPL and IP set to 0. + */ +#include "mem.h" +#include "amd64l.h" + +/* + * Some machine instructions not handled well by [68][al]. + * This is a messy piece of code, requiring instructions in real mode, + * protected mode (+long mode on amd64). The MODE psuedo-op of 6[al] handles + * the latter two OK, but 'MODE $16' is incomplete, e.g. it does + * not truncate operands appropriately, hence the ugly 'rMOVAX' macro. + * Fortunately, the only other instruction executed in real mode that + * could cause a problem (ORL) is encoded such that it will work OK. + */ +#define DELAY BYTE $0xeb; /* JMP .+2 */ \ + BYTE $0x00 +#define NOP BYTE $0x90 /* NOP */ + +#define pFARJMP32(s, o) BYTE $0xea; /* far jmp ptr32:16 */ \ + LONG $o; WORD $s + +#define rFARJMP16(s, o) BYTE $0xea; /* far jump ptr16:16 */ \ + WORD $o; WORD $s; +#define rFARJMP32(s, o) BYTE $0x66; /* far jump ptr32:16 */ \ + pFARJMP32(s, o) +#define rLGDT(gdtptr) BYTE $0x0f; /* LGDT */ \ + BYTE $0x01; BYTE $0x16; \ + WORD $gdtptr +#define rMOVAX(i) BYTE $0xb8; /* i -> AX */ \ + WORD $i; + +/* + * Real mode. Welcome to 1978. + * Load a basic GDT, turn on protected mode and make + * inter-segment jump to the protected mode code. + */ +MODE $16 + +TEXT _real<>(SB), 1, $-4 + rFARJMP16(0, _endofheader<>-KZERO(SB)) /* */ + +_startofheader: + NOP; NOP; NOP + QUAD $0xa5a5a5a5a5a5a5a5 + +TEXT _gdt32p<>(SB), 1, $-4 + QUAD $0x0000000000000000 /* NULL descriptor */ + QUAD $0x00cf9a000000ffff /* CS */ + QUAD $0x00cf92000000ffff /* DS */ + QUAD $0x0020980000000000 /* Long mode CS */ + +TEXT _gdtptr32p<>(SB), 1, $-4 + WORD $(4*8-1) /* includes long mode */ + LONG $_gdt32p<>-KZERO(SB) + +TEXT _gdt64<>(SB), 1, $-4 + QUAD $0x0000000000000000 /* NULL descriptor */ + QUAD $0x0020980000000000 /* CS */ + QUAD $0x0000800000000000 /* DS */ + +TEXT _gdtptr64v<>(SB), 1, $-4 + WORD $(3*8-1) + QUAD $_gdt64<>(SB) + +TEXT _endofheader<>(SB), 1, $-4 + MOVW CS, AX + MOVW AX, DS /* initialise DS */ + + rLGDT(_gdtptr32p<>-KZERO(SB)) /* load a basic gdt */ + + MOVL CR0, AX + ORL $Pe, AX + MOVL AX, CR0 /* turn on protected mode */ + DELAY /* JMP .+2 */ + + rMOVAX (SSEL(SiDS, SsTIGDT|SsRPL0)) /* */ + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + MOVW AX, SS + + rFARJMP32(SSEL(SiCS, SsTIGDT|SsRPL0), _protected<>-KZERO(SB)) + +/* + * Protected mode. Welcome to 1982. + * Get the local APIC ID from the memory mapped APIC; + * load the PDB with the page table address, which is located + * in the Sipi, after the handler, see sipi.c + * make an identity map for the inter-segment jump below, + * using the stack space to hold a temporary PDP and PD; + * enable and activate long mode; + * make an inter-segment jump to the long mode code. + */ +MODE $32 + +/* + * Macros for accessing page table entries; must turn + * the C-style array-index macros into a page table byte + * offset. + */ +#define PML4O(v) ((PTLX((v), 3))<<3) +#define PDPO(v) ((PTLX((v), 2))<<3) +#define PDO(v) ((PTLX((v), 1))<<3) +#define PTO(v) ((PTLX((v), 0))<<3) + +TEXT _protected<>(SB), 1, $-4 + MOVL $0xfee00000, BP /* apicbase */ + MOVL 0x20(BP), BP /* Id */ + SHRL $24, BP /* becomes RARG later */ + + MOVL BP, AX /* apicno */ + IMULL $32, AX /* [apicno] */ + MOVL $_real<>-KZERO(SB), BX + ADDL $4096, BX /* sipi */ + ADDL AX, BX /* sipi[apicno] */ +//MOVL $_real<>-KZERO(SB), CX +//MOVL BX, -4(CX) +//_spin: JMP _spin + + + MOVL 0(BX), SI /* sipi[apicno].pml4 */ + +//MOVL $_real<>-KZERO(SB), CX +//MOVL AX, -4(CX) +//MOVL BX, -8(CX) +//MOVL DX, -12(CX) +//MOVL SI, -16(CX) +//JMP 0(PC) + + + MOVL SI, AX + MOVL AX, CR3 /* load the mmu */ + + MOVL AX, DX + SUBL $MACHSTKSZ, DX /* PDP for identity map */ + ADDL $(PteRW|PteP), DX + MOVL DX, PML4O(0)(AX) /* PML4E for identity map */ + + SUBL $MACHSTKSZ, AX /* PDP for identity map */ + ADDL $PTSZ, DX + MOVL DX, PDPO(0)(AX) /* PDPE for identity map */ + MOVL $(PtePS|PteRW|PteP), DX + ADDL $PTSZ, AX /* PD for identity map */ + MOVL DX, PDO(0)(AX) /* PDE for identity 0-[24]MiB */ + + +/* + * Enable and activate Long Mode. From the manual: + * make sure Page Size Extentions are off, and Page Global + * Extensions and Physical Address Extensions are on in CR4; + * set Long Mode Enable in the Extended Feature Enable MSR; + * set Paging Enable in CR0; + * make an inter-segment jump to the Long Mode code. + * It's all in 32-bit mode until the jump is made. + */ +TEXT _lme<>(SB), 1, $-4 + MOVL CR4, AX + ANDL $~Pse, AX /* Page Size */ + ORL $(Pge|Pae), AX /* Page Global, Phys. Address */ + MOVL AX, CR4 + + MOVL $Efer, CX /* Extended Feature Enable */ + RDMSR + ORL $Lme, AX /* Long Mode Enable */ + WRMSR + + MOVL CR0, DX + ANDL $~(Cd|Nw|Ts|Mp), DX + ORL $(Pg|Wp), DX /* Paging Enable */ + MOVL DX, CR0 + + pFARJMP32(SSEL(3, SsTIGDT|SsRPL0), _identity<>-KZERO(SB)) + +/* + * Long mode. Welcome to 2003. + * Jump out of the identity map space; + * load a proper long mode GDT; + * zap the identity map; + * initialise the stack and call the + * C startup code in m->splpc. + */ +MODE $64 + +TEXT _identity<>(SB), 1, $-4 + MOVQ $_start64v<>(SB), AX + JMP* AX + +TEXT _start64v<>(SB), 1, $-4 + MOVQ $_gdtptr64v<>(SB), AX + MOVL (AX), GDTR + + XORQ DX, DX + MOVW DX, DS /* not used in long mode */ + MOVW DX, ES /* not used in long mode */ + MOVW DX, FS + MOVW DX, GS + MOVW DX, SS /* not used in long mode */ + + MOVLQZX SI, SI /* PML4-KZERO */ + MOVQ SI, AX + ADDQ $KZERO, AX /* PML4 and top of stack */ + + MOVQ AX, SP /* set stack */ + + +//MOVQ $_real<>-KZERO(SB), CX +//MOVQ AX, -8(CX) +//JMP 0(PC) + + + MOVQ DX, PML4O(0)(AX) /* zap identity map */ + + MOVQ SI, CR3 /* flush TLB */ + /* + * SI still points to the base of the bootstrap + * processor page tables. + * Want to use that for clearing the identity map, + * but want to use the passed-in address for + * setting up the stack and Mach. + */ + + ADDQ $KZERO, BX + MOVL 0(BX), SI /* sipi[apicno].pml4 */ + MOVLQZX SI, SI /* PML4-KZERO */ + + MOVQ SI, AX + MOVQ 8(BX), SP /* sipi[apicno].stack */ + + MOVQ 16(BX), RMACH /* sipi[apicno].mach */ + MOVQ DX, RUSER + + PUSHQ DX /* clear flags */ + POPFQ + + MOVLQZX RARG, RARG /* APIC ID */ + PUSHQ RARG /* apicno */ + + MOVQ 24(BX), AX /* sipi[apicno].pc */ + CALL* AX /* CALL squidboy(SB) */ + +_ndnr: + JMP _ndnr diff -Nru 0/sys/src/nix/k10/l64syscall.s 4/sys/src/nix/k10/l64syscall.s --- 0/sys/src/nix/k10/l64syscall.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/l64syscall.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,72 @@ +#include "mem.h" +#include "amd64l.h" + +MODE $64 + +/* + */ +TEXT touser(SB), 1, $-4 + CLI + SWAPGS + MOVQ $SSEL(SiUDS, SsRPL3), AX + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + + MOVQ $(UTZERO+0x28), CX /* ip */ + MOVQ $If, R11 /* flags */ + + MOVQ RARG, SP /* sp */ + + BYTE $0x48; SYSRET /* SYSRETQ */ + +/* + */ +TEXT syscallentry(SB), 1, $-4 + SWAPGS + BYTE $0x65; MOVQ 0, RMACH /* m-> (MOVQ GS:0x0, R15) */ + MOVQ 16(RMACH), RUSER /* m->proc */ + MOVQ SP, R13 + MOVQ 16(RUSER), SP /* m->proc->kstack */ + ADDQ $KSTACK, SP + PUSHQ $SSEL(SiUDS, SsRPL3) /* old stack segment */ + PUSHQ R13 /* old sp */ + PUSHQ R11 /* old flags */ + PUSHQ $SSEL(SiUCS, SsRPL3) /* old code segment */ + PUSHQ CX /* old ip */ + + SUBQ $(18*8), SP /* unsaved registers */ + + MOVW $SSEL(SiUDS, SsRPL3), (15*8+0)(SP) + MOVW ES, (15*8+2)(SP) + MOVW FS, (15*8+4)(SP) + MOVW GS, (15*8+6)(SP) + + PUSHQ SP /* Ureg* */ + PUSHQ RARG /* system call number */ + CALL syscall(SB) + +TEXT syscallreturn(SB), 1, $-4 + MOVQ 16(SP), AX /* Ureg.ax */ + MOVQ (16+6*8)(SP), BP /* Ureg.bp */ +_syscallreturn: + ADDQ $(17*8), SP /* registers + arguments */ + + CLI + SWAPGS + MOVW 0(SP), DS + MOVW 2(SP), ES + MOVW 4(SP), FS + MOVW 6(SP), GS + + MOVQ 24(SP), CX /* ip */ + MOVQ 40(SP), R11 /* flags */ + + MOVQ 48(SP), SP /* sp */ + + BYTE $0x48; SYSRET /* SYSRETQ */ + +TEXT sysrforkret(SB), 1, $-4 + MOVQ $0, AX + JMP _syscallreturn diff -Nru 0/sys/src/nix/k10/l64v.s 4/sys/src/nix/k10/l64v.s --- 0/sys/src/nix/k10/l64v.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/l64v.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,410 @@ +#include "amd64l.h" + +MODE $64 + +/* + * Port I/O. + */ +TEXT inb(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + XORL AX, AX + INB + RET + +TEXT insb(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), DI + MOVL count+16(FP), CX + CLD + REP; INSB + RET + +TEXT ins(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + XORL AX, AX + INW + RET + +TEXT inss(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), DI + MOVL count+16(FP), CX + CLD + REP; INSW + RET + +TEXT inl(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + INL + RET + +TEXT insl(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), DI + MOVL count+16(FP), CX + CLD + REP; INSL + RET + +TEXT outb(SB), 1, $-1 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVL byte+8(FP), AX + OUTB + RET + +TEXT outsb(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), SI + MOVL count+16(FP), CX + CLD + REP; OUTSB + RET + +TEXT outs(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVL short+8(FP), AX + OUTW + RET + +TEXT outss(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), SI + MOVL count+16(FP), CX + CLD + REP; OUTSW + RET + +TEXT outl(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVL long+8(FP), AX + OUTL + RET + +TEXT outsl(SB), 1, $-4 + MOVL RARG, DX /* MOVL port+0(FP), DX */ + MOVQ address+8(FP), SI + MOVL count+16(FP), CX + CLD + REP; OUTSL + RET + +/* + * Load/store segment descriptor tables: + * GDT - global descriptor table + * IDT - interrupt descriptor table + * TR - task register + * GDTR and LDTR take an m16:m64 argument, + * so shuffle the stack arguments to + * get it in the right format. + */ +TEXT gdtget(SB), 1, $-4 + MOVL GDTR, (RARG) /* Note: 10 bytes returned */ + RET + +TEXT gdtput(SB), 1, $-4 + SHLQ $48, RARG + MOVQ RARG, m16+0(FP) + LEAQ m16+6(FP), RARG + + MOVL (RARG), GDTR + + XORQ AX, AX + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + MOVW AX, SS + + POPQ AX + MOVWQZX cs+16(FP), BX + PUSHQ BX + PUSHQ AX + RETFQ + +TEXT idtput(SB), 1, $-4 + SHLQ $48, RARG + MOVQ RARG, m16+0(FP) + LEAQ m16+6(FP), RARG + MOVL (RARG), IDTR + RET + +TEXT trput(SB), 1, $-4 + MOVW RARG, TASK + RET + +/* + * Read/write various system registers. + */ +TEXT cr0get(SB), 1, $-4 /* Processor Control */ + MOVQ CR0, AX + RET + +TEXT cr0put(SB), 1, $-4 + MOVQ RARG, AX + MOVQ AX, CR0 + RET + +TEXT cr2get(SB), 1, $-4 /* #PF Linear Address */ + MOVQ CR2, AX + RET + +TEXT cr3get(SB), 1, $-4 /* PML4 Base */ + MOVQ CR3, AX + RET + +TEXT cr3put(SB), 1, $-4 + MOVQ RARG, AX + MOVQ AX, CR3 + RET + +TEXT cr4get(SB), 1, $-4 /* Extensions */ + MOVQ CR4, AX + RET + +TEXT cr4put(SB), 1, $-4 + MOVQ RARG, AX + MOVQ AX, CR4 + RET + +TEXT rdtsc(SB), 1, $-4 /* Time Stamp Counter */ + RDTSC + /* u64int rdtsc(void); */ + XCHGL DX, AX /* swap lo/hi, zero-extend */ + SHLQ $32, AX /* hi<<32 */ + ORQ DX, AX /* (hi<<32)|lo */ + RET + +TEXT rdmsr(SB), 1, $-4 /* Model-Specific Register */ + MOVL RARG, CX + + RDMSR + /* u64int rdmsr(u32int); */ + XCHGL DX, AX /* swap lo/hi, zero-extend */ + SHLQ $32, AX /* hi<<32 */ + ORQ DX, AX /* (hi<<32)|lo */ + RET + +TEXT wrmsr(SB), 1, $-4 + MOVL RARG, CX + MOVL lo+8(FP), AX + MOVL hi+12(FP), DX + + WRMSR + + RET + +TEXT invlpg(SB), 1, $-4 /* INVLPG va+0(FP) */ + MOVQ RARG, va+0(FP) + + INVLPG va+0(FP) + + RET + +TEXT wbinvd(SB), 1, $-4 + WBINVD + RET + +/* + * Serialisation. + */ +TEXT lfence(SB), 1, $-4 + LFENCE + RET + +TEXT mfence(SB), 1, $-4 + MFENCE + RET + +TEXT sfence(SB), 1, $-4 + SFENCE + RET + +/* + * Note: CLI and STI are not serialising instructions. + * Is that assumed anywhere? + */ +TEXT splhi(SB), 1, $-4 +_splhi: + PUSHFQ + POPQ AX + TESTQ $If, AX /* If - Interrupt Flag */ + JZ _alreadyhi /* use CMOVLEQ etc. here? */ + + MOVQ (SP), BX + MOVQ BX, 8(RMACH) /* save PC in m->splpc */ + +_alreadyhi: + CLI + RET + +TEXT spllo(SB), 1, $-4 +_spllo: + PUSHFQ + POPQ AX + TESTQ $If, AX /* If - Interrupt Flag */ + JNZ _alreadylo /* use CMOVLEQ etc. here? */ + + MOVQ $0, 8(RMACH) /* clear m->splpc */ + +_alreadylo: + STI + RET + +TEXT splx(SB), 1, $-4 + TESTQ $If, RARG /* If - Interrupt Flag */ + JNZ _spllo + JMP _splhi + +TEXT spldone(SB), 1, $-4 + RET + +TEXT islo(SB), 1, $-4 + PUSHFQ + POPQ AX + ANDQ $If, AX /* If - Interrupt Flag */ + RET + +/* + * Synchronisation + */ +TEXT ainc(SB), 1, $-4 /* int ainc(int*); */ + MOVL $1, AX + LOCK; XADDL AX, (RARG) + ADDL $1, AX /* overflow if -ve or 0 */ + JGT _return +_trap: + XORQ BX, BX + MOVQ (BX), BX /* over under sideways down */ +_return: + RET + +TEXT adec(SB), 1, $-4 /* int adec(int*); */ + MOVL $-1, AX + LOCK; XADDL AX, (RARG) + SUBL $1, AX /* underflow if -ve */ + JLT _trap + + RET + +/* + * Semaphores rely on negative values for the counter, + * and don't have the same overflow/underflow conditions + * as ainc/adec. + */ +TEXT semainc(SB), 1, $-4 /* int semainc(int*); */ + MOVL $1, AX + LOCK; XADDL AX, (RARG) + ADDL $1, AX + RET + +TEXT semadec(SB), 1, $-4 /* int semadec(int*); */ + MOVL $-1, AX + LOCK; XADDL AX, (RARG) + SUBL $1, AX + RET + +TEXT tas32(SB), 1, $-4 + MOVL $0xdeaddead, AX + XCHGL AX, (RARG) /* */ + RET + +TEXT fas64(SB), 1, $-4 + MOVQ p+8(FP), AX + LOCK; XCHGQ AX, (RARG) /* */ + RET + +TEXT cas32(SB), 1, $-4 + MOVL exp+8(FP), AX + MOVL new+16(FP), BX + LOCK; CMPXCHGL BX, (RARG) + MOVL $1, AX /* use CMOVLEQ etc. here? */ + JNZ _cas32r0 +_cas32r1: + RET +_cas32r0: + DECL AX + RET + +TEXT cas64(SB), 1, $-4 + MOVQ exp+8(FP), AX + MOVQ new+16(FP), BX + LOCK; CMPXCHGQ BX, (RARG) + MOVL $1, AX /* use CMOVLEQ etc. here? */ + JNZ _cas64r0 +_cas64r1: + RET +_cas64r0: + DECL AX + RET + +/* + * Label consists of a stack pointer and a programme counter + */ +TEXT gotolabel(SB), 1, $-4 + MOVQ 0(RARG), SP /* restore SP */ + MOVQ 8(RARG), AX /* put return PC on the stack */ + MOVQ AX, 0(SP) + MOVL $1, AX /* return 1 */ + RET + +TEXT setlabel(SB), 1, $-4 + MOVQ SP, 0(RARG) /* store SP */ + MOVQ 0(SP), BX /* store return PC */ + MOVQ BX, 8(RARG) + MOVL $0, AX /* return 0 */ + RET + +TEXT hardhalt(SB), 1, $-4 + STI + HLT + RET + +TEXT _monitor(SB), 1, $-4 /* void monitor(void*); */ + MOVQ RARG, AX /* linear address to monitor */ + XORQ CX, CX /* no optional extensions yet */ + XORQ DX, DX /* no optional hints yet */ + BYTE $0x0f; BYTE $0x01; BYTE $0xc8 /* MONITOR */ + RET + +TEXT _waitwhile(SB), 1, $-4 /* void waitwhile(u32int); */ + MOVLQZX RARG, CX /* optional extensions */ + BYTE $0x0f; BYTE $0x01; BYTE $0xc9 /* MWAIT */ + RET + +TEXT k10waitwhile+0(SB),0,$16 +k10mwloop: + MOVQ RARG, CX + MOVQ val+8(FP), DX + MOVQ (CX), AX + CMPQ AX, DX + JNE k10mwdone + MOVQ RARG, AX /* linear address to monitor */ + XORQ CX, CX /* no optional extensions yet */ + XORQ DX, DX /* no optional hints yet */ + BYTE $0x0f; BYTE $0x01; BYTE $0xc8 /* MONITOR */ + MOVQ RARG, CX + MOVQ (CX),AX + MOVQ val+8(FP), DX + CMPQ AX, DX + JNE k10mwdone + XORQ DX, DX + XORQ CX, CX /* optional extensions */ + BYTE $0x0f; BYTE $0x01; BYTE $0xc9 /* MWAIT */ + JMP k10mwloop +k10mwdone: + RET , + +TEXT mul64fract(SB), 1, $-4 + MOVQ a+8(FP), AX + MULQ b+16(FP) /* a*b */ + SHRQ $32, AX:DX + MOVQ AX, (RARG) + RET + +///* +// * Testing. +// */ +//TEXT ud2(SB), $-4 +// BYTE $0x0f; BYTE $0x0b +// RET +// diff -Nru 0/sys/src/nix/k10/lsipi.s 4/sys/src/nix/k10/lsipi.s --- 0/sys/src/nix/k10/lsipi.s Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/lsipi.s Wed Feb 6 00:00:00 2013 @@ -0,0 +1,242 @@ +/* + * Start-up request IPI handler. + * + * This code is executed on an application processor in response to receiving + * a Start-up IPI (SIPI) from another processor. + * This must be placed on a 4KiB boundary + * somewhere in the 1st MiB of conventional memory. However, + * due to some shortcuts below it's restricted further to within the 1st 64KiB. + * The AP starts in real-mode, with + * CS selector set to the startup memory address/16; + * CS base set to startup memory address; + * CS limit set to 64KiB; + * CPL and IP set to 0. + */ +#include "mem.h" +#include "amd64l.h" + +/* + * Some machine instructions not handled well by [68][al]. + * This is a messy piece of code, requiring instructions in real mode, + * protected mode (+long mode on amd64). The MODE psuedo-op of 6[al] handles + * the latter two OK, but 'MODE $16' is incomplete, e.g. it does + * not truncate operands appropriately, hence the ugly 'rMOVAX' macro. + * Fortunately, the only other instruction executed in real mode that + * could cause a problem (ORL) is encoded such that it will work OK. + */ +#define DELAY BYTE $0xeb; /* JMP .+2 */ \ + BYTE $0x00 +#define NOP BYTE $0x90 /* NOP */ + +#define pFARJMP32(s, o) BYTE $0xea; /* far jmp ptr32:16 */ \ + LONG $o; WORD $s + +#define rFARJMP16(s, o) BYTE $0xea; /* far jump ptr16:16 */ \ + WORD $o; WORD $s; +#define rFARJMP32(s, o) BYTE $0x66; /* far jump ptr32:16 */ \ + pFARJMP32(s, o) +#define rLGDT(gdtptr) BYTE $0x0f; /* LGDT */ \ + BYTE $0x01; BYTE $0x16; \ + WORD $gdtptr +#define rMOVAX(i) BYTE $0xb8; /* i -> AX */ \ + WORD $i; + +/* + * Real mode. Welcome to 1978. + * Load a basic GDT, turn on protected mode and make + * inter-segment jump to the protected mode code. + */ +MODE $16 + +TEXT _real<>(SB), 1, $-4 + rFARJMP16(0, _endofheader<>-KZERO(SB)) /* */ + +_startofheader: + NOP; NOP; NOP + QUAD $0xa5a5a5a5a5a5a5a5 + +TEXT _gdt32p<>(SB), 1, $-4 + QUAD $0x0000000000000000 /* NULL descriptor */ + QUAD $0x00cf9a000000ffff /* CS */ + QUAD $0x00cf92000000ffff /* DS */ + QUAD $0x0020980000000000 /* Long mode CS */ + +TEXT _gdtptr32p<>(SB), 1, $-4 + WORD $(4*8-1) /* includes long mode */ + LONG $_gdt32p<>-KZERO(SB) + +TEXT _gdt64<>(SB), 1, $-4 + QUAD $0x0000000000000000 /* NULL descriptor */ + QUAD $0x0020980000000000 /* CS */ + QUAD $0x0000800000000000 /* DS */ + +TEXT _gdtptr64v<>(SB), 1, $-4 + WORD $(3*8-1) + QUAD $_gdt64<>(SB) + +TEXT _endofheader<>(SB), 1, $-4 + MOVW CS, AX + MOVW AX, DS /* initialise DS */ + + rLGDT(_gdtptr32p<>-KZERO(SB)) /* load a basic gdt */ + + MOVL CR0, AX + ORL $Pe, AX + MOVL AX, CR0 /* turn on protected mode */ + DELAY /* JMP .+2 */ + + rMOVAX (SSEL(SiDS, SsTIGDT|SsRPL0)) /* */ + MOVW AX, DS + MOVW AX, ES + MOVW AX, FS + MOVW AX, GS + MOVW AX, SS + + rFARJMP32(SSEL(SiCS, SsTIGDT|SsRPL0), _protected<>-KZERO(SB)) + +/* + * Protected mode. Welcome to 1982. + * Get the local APIC ID from the memory mapped APIC + * load the PML4 with the shared page table address; + * make an identity map for the inter-segment jump below; + * enable and activate long mode; + * make an inter-segment jump to the long mode code. + */ +MODE $32 + +/* + * Macros for accessing page table entries; must turn + * the C-style array-index macros into a page table byte + * offset. + */ +#define PMX(v, l) (((v)>>((((l)-1)*PTPGSHFT)+PGSHFT)) & ((1<(SB), 1, $-4 + MOVL $0xfee00000, BP /* apicbase */ + MOVL 0x20(BP), BP /* Id */ + SHRL $24, BP /* becomes RARG later */ + + MOVL $(0x00100000+MACHSTKSZ), SI /* page table PML4 */ + + MOVL SI, AX + MOVL AX, CR3 /* load the mmu */ + + MOVL PML4O(KZERO)(AX), DX /* PML4E for KZERO, PMAPADDR */ + MOVL DX, PML4O(0)(AX) /* PML4E for identity map */ + + ANDL $~((1<(SB), 1, $-4 + MOVL CR4, AX + ANDL $~Pse, AX /* Page Size */ + ORL $(Pge|Pae), AX /* Page Global, Phys. Address */ + MOVL AX, CR4 + + MOVL $Efer, CX /* Extended Feature Enable */ + RDMSR + ORL $Lme, AX /* Long Mode Enable */ + WRMSR + + MOVL CR0, DX + ANDL $~(Cd|Nw|Ts|Mp), DX + ORL $(Pg|Wp), DX /* Paging Enable */ + MOVL DX, CR0 + + pFARJMP32(SSEL(3, SsTIGDT|SsRPL0), _identity<>-KZERO(SB)) + +/* + * Long mode. Welcome to 2003. + * Jump out of the identity map space; + * load a proper long mode GDT; + * zap the identity map; + * initialise the stack and call the + * C startup code in m->splpc. + */ +MODE $64 + +TEXT _identity<>(SB), 1, $-4 + MOVQ $_start64v<>(SB), AX + JMP* AX + +TEXT _start64v<>(SB), 1, $-4 + MOVQ $_gdtptr64v<>(SB), AX + MOVL (AX), GDTR + + XORQ DX, DX + MOVW DX, DS /* not used in long mode */ + MOVW DX, ES /* not used in long mode */ + MOVW DX, FS + MOVW DX, GS + MOVW DX, SS /* not used in long mode */ + + MOVLQZX SI, SI /* PML4-KZERO */ + MOVQ SI, AX + ADDQ $KZERO, AX /* PML4 and top of stack */ + + MOVQ AX, SP /* set stack */ + + MOVQ PML4O(0)(AX), BX /* PDPE identity map physical */ + ANDQ $~((1<(SB), AX + MOVL -4(AX), SI /* PML4 */ + MOVLQZX SI, SI /* PML4-KZERO */ + MOVQ SI, AX + ADDQ $KZERO, AX /* PML4 and top of stack */ + + MOVQ AX, SP /* set stack */ + + ADDQ $(4*PTPGSZ+PGSZ), AX /* PML4+PDP+PD+PT+vsvm */ + MOVQ AX, RMACH /* Mach */ + MOVQ DX, RUSER + + PUSHQ DX /* clear flags */ + POPFQ + + MOVLQZX RARG, RARG /* APIC ID */ + PUSHQ RARG /* apicno */ + + MOVQ 8(RMACH), AX /* m->splpc */ + CALL* AX /* CALL squidboy(SB) */ + +_ndnr: + JMP _ndnr diff -Nru 0/sys/src/nix/k10/main.c 4/sys/src/nix/k10/main.c --- 0/sys/src/nix/k10/main.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/main.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,576 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "init.h" +#include "apic.h" +#include "io.h" + + +Conf conf; /* XXX - must go - gag */ + +extern void crapoptions(void); /* XXX - must go */ +extern void confsetenv(void); /* XXX - must go */ + +static uintptr sp; /* XXX - must go - user stack of init proc */ + +uintptr kseg0 = KZERO; +Sys* sys = nil; +usize sizeofSys = sizeof(Sys); + +/* + * Option arguments from the command line. + * oargv[0] is the boot file. + * Optionsinit() is called from multiboot() to + * set it all up. + */ +static int oargc; +static char* oargv[20]; +static char oargb[128]; +static int oargblen; + +static int maxcores = 1024; /* max # of cores given as an argument */ +static int numtcs = 32; /* initial # of TCs */ + +char dbgflg[256]; +static int vflag = 0; + +void +optionsinit(char* s) +{ + oargblen = strecpy(oargb, oargb+sizeof(oargb), s) - oargb; + oargc = tokenize(oargb, oargv, nelem(oargv)-1); + oargv[oargc] = nil; +} + +static void +options(int argc, char* argv[]) +{ + char *p; + int n, o; + + /* + * Process flags. + * Flags [A-Za-z] may be optionally followed by + * an integer level between 1 and 127 inclusive + * (no space between flag and level). + * '--' ends flag processing. + */ + while(--argc > 0 && (*++argv)[0] == '-' && (*argv)[1] != '-'){ + while(o = *++argv[0]){ + if(!(o >= 'A' && o <= 'Z') && !(o >= 'a' && o <= 'z')) + continue; + n = strtol(argv[0]+1, &p, 0); + if(p == argv[0]+1 || n < 1 || n > 127) + n = 1; + argv[0] = p-1; + dbgflg[o] = n; + } + } + vflag = dbgflg['v']; + if(argc > 0){ + maxcores = strtol(argv[0], 0, 0); + argc--; + argv++; + } + if(argc > 0){ + numtcs = strtol(argv[0], 0, 0); + //argc--; + //argv++; + } +} + +static int istentacle; + +void +squidboy(int apicno) +{ + vlong hz; + + sys->machptr[m->machno] = m; + /* + * Need something for initial delays + * until a timebase is worked out. + */ + m->cpuhz = 2000000000ll; + m->cpumhz = 2000; + m->perf.period = 1; + + if(istentacle) + print("TENTACLE %d stack %#p m:%#p\n", m->machno, m->stack, m); + + DBG("Hello Squidboy %d %d\n", apicno, m->machno); + + vsvminit(MACHSTKSZ, NIXAC); + + /* + * Beware the Curse of The Non-Interruptable Were-Temporary. + */ + hz = archhz(); + if(hz == 0) + ndnr(); + m->cpuhz = hz; + m->cyclefreq = hz; + m->cpumhz = hz/1000000ll; + + mmuinit(); + if(!apiconline()){ + acmodeset(NIXUC); + ndnr(); + } + fpuinit(); + + m->splpc = 0; + + /* + * CAUTION: no time sync done, etc. + */ + DBG("Wait for the thunderbirds!\n"); + while(!active.thunderbirdsarego) + ; + wrmsr(0x10, sys->epoch); + m->rdtsc = rdtsc(); + + print("cpu%d color %d role %s tsc %lld\n", + m->machno, corecolor(m->machno), rolename[m->nnixrole], m->rdtsc); + + switch(m->nnixrole){ + case NIXAC: + acmmuswitch(); + acinit(); + adec(&active.nbooting); + ainc(&active.nonline); /* this was commented out */ + rolestable(m); + acsched(); + panic("squidboy"); + break; + case NIXTC: + /* + * We only need the idt and syscall entry point actually. + * At boot time the boot processor might set our role after + * we have decided to become an AC. + */ + vsvminit(MACHSTKSZ, NIXTC); + + /* + * Enable the timer interrupt. + */ + apictimerenab(); + apicpri(0); + + timersinit(); + adec(&active.nbooting); + ainc(&active.nonline); + + rolestable(m); + schedinit(); + break; + } + panic("squidboy returns (type %d)", m->nixrole); +} + +void +tentacle(int apicno) +{ + //m->nixrole = m->nnixrole; + ainc(&istentacle); + squidboy(apicno); +} + +static void +testiccs(void) +{ + int i; + Mach *mp; + extern void testicc(int); + + /* setup arguments for all */ + for(i = 0; i < MACHMAX; i++) + if((mp = sys->machptr[i]) != nil && mp->nixrole == NIXAC) + testicc(i); + print("bootcore: all cores done\n"); +} + +/* + * Rendezvous with other cores. Set roles for those that came + * up online, and wait until they are initialized. + * Sync TSC with them. + * We assume other processors that could boot had time to + * set online to 1 by now. + */ +static void +nixsquids(void) +{ + Mach *mp; + int i; + uvlong now, start; + + for(i = 1; i < MACHMAX; i++) + if((mp = sys->machptr[i]) != nil && mp->nixrole != NIXUC){ + /* + * Inter-core calls. A ensure *mp->iccall and mp->icargs + * go into different cache lines. + */ + mp->icc = mallocalign(sizeof *m->icc, ICCLNSZ, 0, 0); + mp->icc->fn = nil; + if(i < numtcs){ + sys->nmach++; + mp->nnixrole = NIXTC; + sys->nc[NIXTC]++; + }else{ + mp->nnixrole = NIXAC; + sys->nc[NIXAC]++; + } + ainc(&active.nbooting); + } + sys->epoch = rdtsc(); + mfence(); + wrmsr(0x10, sys->epoch); + m->rdtsc = rdtsc(); + active.thunderbirdsarego = 1; + start = fastticks2us(fastticks(nil)); + do{ + now = fastticks2us(fastticks(nil)); + }while(active.nbooting > 0 && now - start < 1000000) + ; + if(active.nbooting > 0) + print("cpu0: %d cores couldn't start\n", active.nbooting); + active.nbooting = 0; +} + +void +DONE(void) +{ + print("DONE\n"); + prflush(); + delay(10000); + ndnr(); +} + +void +HERE(void) +{ + print("here\n"); + prflush(); + delay(5000); +} + +void +main(u32int ax, u32int bx) +{ + vlong hz; + + memset(edata, 0, end - edata); + + /* + * ilock via i8250enable via i8250console + * needs m->machno, sys->machptr[] set, and + * also 'up' set to nil. + */ + cgapost(sizeof(uintptr)*8); + memset(m, 0, sizeof(Mach)); + m->machno = 0; + + m->nnixrole = NIXTC; + + sys->machptr[m->machno] = &sys->mach; + m->stack = PTR2UINT(sys->machstk); + m->vsvm = sys->vsvmpage; + up = nil; + active.nonline = 1; + active.exiting = 0; + active.nbooting = 0; + + rolestable(m); + asminit(); + multiboot(ax, bx, 0); + options(oargc, oargv); + crapoptions(); + + /* + * Need something for initial delays + * until a timebase is worked out. + */ + m->cpuhz = 2000000000ll; + m->cpumhz = 2000; + + cgainit(); + i8250console("0"); + consputs = cgaconsputs; + + vsvminit(MACHSTKSZ, NIXTC); + + sys->nmach = 1; + + fmtinit(); + print("\nNIX\n"); + if(vflag){ + print("&ax = %#p, ax = %#ux, bx = %#ux\n", &ax, ax, bx); + multiboot(ax, bx, vflag); + } + + m->perf.period = 1; + if((hz = archhz()) != 0ll){ + m->cpuhz = hz; + m->cyclefreq = hz; + m->cpumhz = hz/1000000ll; + } + + /* + * Mmuinit before meminit because it + * flushes the TLB via m->pml4->pa. + */ + mmuinit(); + + ioinit(); + kbdinit(); + meminit(); + confinit(); + archinit(); + mallocinit(); + + /* + * Acpiinit will cause the first malloc + * call to happen. + * If the system dies here it's probably due + * to malloc not being initialised + * correctly, or the data segment is misaligned + * (it's amazing how far you can get with + * things like that completely broken). + */ + acpiinit(); + + umeminit(); + trapinit(); + printinit(); + + /* + * This is necessary with GRUB and QEMU. + * Without it an interrupt can occur at a weird vector, + * because the vector base is likely different, causing + * havoc. Do it before any APIC initialisation. + */ + i8259init(32); + + + procinit0(); + mpsinit(maxcores); + apiconline(); + sipiall(); + + timersinit(); + kbdenable(); + fpuinit(); + psinit(conf.nproc); + initimage(); + links(); + devtabreset(); + pageinit(); + swapinit(); + userinit(); + nixsquids(); +testiccs(); +print("schedinit...\n"); + schedinit(); +} + +void +init0(void) +{ + char buf[2*KNAMELEN]; + + up->nerrlab = 0; + +// if(consuart == nil) +// i8250console("0"); + spllo(); + + /* + * These are o.k. because rootinit is null. + * Then early kproc's will have a root and dot. + */ + up->slash = namec("#/", Atodir, 0, 0); + pathclose(up->slash->path); + up->slash->path = newpath("/"); + up->dot = cclone(up->slash); + + devtabinit(); + + if(!waserror()){ + snprint(buf, sizeof(buf), "%s %s", "AMD64", conffile); + ksetenv("terminal", buf, 0); + ksetenv("cputype", "amd64", 0); + if(cpuserver) + ksetenv("service", "cpu", 0); + else + ksetenv("service", "terminal", 0); + confsetenv(); + poperror(); + } + kproc("alarm", alarmkproc, 0); + touser(sp); +} + +void +bootargs(uintptr base) +{ + int i; + ulong ssize; + char **av, *p; + + /* + * Push the boot args onto the stack. + * Make sure the validaddr check in syscall won't fail + * because there are fewer than the maximum number of + * args by subtracting sizeof(up->arg). + */ + i = oargblen+1; + p = UINT2PTR(STACKALIGN(base + BIGPGSZ - sizeof(up->arg) - i)); + memmove(p, oargb, i); + + /* + * Now push argc and the argv pointers. + * This isn't strictly correct as the code jumped to by + * touser in init9.[cs] calls startboot (port/initcode.c) which + * expects arguments + * startboot(char* argv0, char* argv[]) + * not the usual (int argc, char* argv[]), but argv0 is + * unused so it doesn't matter (at the moment...). + */ + av = (char**)(p - (oargc+2)*sizeof(char*)); + ssize = base + BIGPGSZ - PTR2UINT(av); + *av++ = (char*)oargc; + for(i = 0; i < oargc; i++) + *av++ = (oargv[i] - oargb) + (p - base) + (USTKTOP - BIGPGSZ); + *av = nil; + + sp = USTKTOP - ssize; +} + +void +userinit(void) +{ + Proc *p; + Segment *s; + KMap *k; + Page *pg; + + p = newproc(); + p->pgrp = newpgrp(); + p->egrp = smalloc(sizeof(Egrp)); + p->egrp->ref = 1; + p->fgrp = dupfgrp(nil); + p->rgrp = newrgrp(); + p->procmode = 0640; + + kstrdup(&eve, ""); + kstrdup(&p->text, "*init*"); + kstrdup(&p->user, eve); + + /* + * Kernel Stack + * + * N.B. make sure there's enough space for syscall to check + * for valid args and + * space for gotolabel's return PC + * AMD64 stack must be quad-aligned. + */ + p->sched.pc = PTR2UINT(init0); + p->sched.sp = PTR2UINT(p->kstack+KSTACK-sizeof(up->arg)-sizeof(uintptr)); + p->sched.sp = STACKALIGN(p->sched.sp); + + /* + * User Stack + * + * Technically, newpage can't be called here because it + * should only be called when in a user context as it may + * try to sleep if there are no pages available, but that + * shouldn't be the case here. + */ + s = newseg(SG_STACK, USTKTOP-USTKSIZE, USTKSIZE/BIGPGSZ); + p->seg[SSEG] = s; + + pg = newpage(1, 0, USTKTOP-BIGPGSZ, BIGPGSZ, -1); + segpage(s, pg); + k = kmap(pg); + bootargs(VA(k)); + kunmap(k); + + /* + * Text + */ + s = newseg(SG_TEXT, UTZERO, 1); + s->flushme++; + p->seg[TSEG] = s; + pg = newpage(1, 0, UTZERO, BIGPGSZ, -1); + memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl)); + segpage(s, pg); + k = kmap(s->map[0]->pages[0]); + memmove(UINT2PTR(VA(k)), initcode, sizeof initcode); + kunmap(k); + + ready(p); +} + +void +confinit(void) +{ + int i; + + conf.npage = 0; + for(i=0; imachno == 0 && m->nixrole == NIXUC) + active.ispanic = 0; + once = m->nixrole != NIXUC; + acmodeset(NIXUC); + active.exiting = 1; + unlock(&active); + + if(once) + iprint("cpu%d: exiting\n", m->machno); + + spllo(); + for(ms = 5*1000; ms > 0; ms -= TK2MS(2)){ + delay(TK2MS(2)); + if(active.nonline == 0 && consactive() == 0) + break; + } + + if(active.ispanic && m->machno == 0){ + if(cpuserver) + delay(30000); + else + for(;;) + halt(); + } + else + delay(1000); +} + +void +reboot(void*, void*, long) +{ + panic("reboot\n"); +} + +void +exit(int ispanic) +{ + shutdown(ispanic); + archreset(); +} diff -Nru 0/sys/src/nix/k10/map.c 4/sys/src/nix/k10/map.c --- 0/sys/src/nix/k10/map.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/map.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,48 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#define _KADDR(pa) UINT2PTR(kseg0+((uintptr)(pa))) +#define _PADDR(va) PTR2UINT(((uintptr)(va)) - kseg0) + +#define TMFM (64*MiB) + +void* +KADDR(uintmem pa) +{ + u8int* va; + + va = UINT2PTR(pa); + if(pa < TMFM) + return KSEG0+va; + + assert(pa < KSEG2); + return KSEG2+va; +} + +uintmem +PADDR(void* va) +{ + uintmem pa; + + pa = PTR2UINT(va); + if(pa >= KSEG0 && pa < KSEG0+TMFM) + return pa-KSEG0; + if(pa > KSEG2) + return pa-KSEG2; + + panic("PADDR: va %#p pa #%p @ %#p\n", va, _PADDR(va), getcallerpc(&va)); + return 0; +} + +KMap* +kmap(Page* page) +{ + DBG("kmap(%#llux) @ %#p: %#p %#p\n", + page->pa, getcallerpc(&page), + page->pa, KADDR(page->pa)); + + return KADDR(page->pa); +} diff -Nru 0/sys/src/nix/k10/mem.h 4/sys/src/nix/k10/mem.h --- 0/sys/src/nix/k10/mem.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/mem.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,99 @@ +/* + * Memory and machine-specific definitions. Used in C and assembler. + */ +#define KiB 1024u /* Kibi 0x0000000000000400 */ +#define MiB 1048576u /* Mebi 0x0000000000100000 */ +#define GiB 1073741824u /* Gibi 000000000040000000 */ +#define TiB 1099511627776ull /* Tebi 0x0000010000000000 */ +#define PiB 1125899906842624ull /* Pebi 0x0004000000000000 */ +#define EiB 1152921504606846976ull /* Exbi 0x1000000000000000 */ + +#define HOWMANY(x, y) (((x)+((y)-1))/(y)) +#define ROUNDUP(x, y) (HOWMANY((x), (y))*(y)) +#define ROUNDDN(x, y) (((x)/(y))*(y)) +#define MIN(a, b) ((a) < (b)? (a): (b)) +#define MAX(a, b) ((a) > (b)? (a): (b)) + +#define ALIGNED(va, a) (!(((uintptr)(va)) & ((a)-1))) + +/* + * Sizes + */ +#define BI2BY 8 /* bits per byte */ +#define BY2V 8 /* bytes per double word */ +#define BY2SE 8 /* bytes per stack element */ +#define BLOCKALIGN 8 + +/* + * 4K pages + * these defines could go. + */ +#define PGSZ (4*KiB) /* page size */ +#define PGSHFT 12 /* log(PGSZ) */ +#define PTSZ (4*KiB) /* page table page size */ +#define PTSHFT 9 /* */ + +#define MACHSZ (4*KiB) /* Mach+stack size */ +#define MACHMAX 32 /* max. number of cpus */ +#define MACHSTKSZ (6*(4*KiB)) /* Mach stack size */ + +#define KSTACK (16*1024) /* Size of Proc kernel stack */ +#define STACKALIGN(sp) ((sp) & ~(BY2SE-1)) /* bug: assure with alloc */ + +/* + * 2M pages + * these defines must go. + */ +#define BIGPGSHFT 21 +#define BIGPGSZ (1ull<>(((l)*PTSHFT)+PGSHFT)) & ((1< $target + +apic.$O: apic.h io.h +devarch.$O: ../port/error.h /$objtype/include/ureg.h +fpu.$O: amd64.h +fpu.$O: /$objtype/include/ureg.h +ioapic.$O: apic.h io.h +main.$O: /sys/include/pool.h init.h +memory.$O: amd64.h +mmu.$O: amd64.h +mp.$O: apic.h +sipi.$O: apic.h sipi.h +svm.$O: amd64.h +svm.$O: /$objtype/include/ureg.h +syscall.$O: ../port/error.h /sys/src/libc/9syscall/sys.h +syscall.$O: /sys/include/tos.h /$objtype/include/ureg.h +syscall.$O: amd64.h +trap.$O: ../port/error.h io.h +trap.$O: /sys/include/tos.h /$objtype/include/ureg.h + +devaoe.$O: ../port/error.h ../port/netif.h ../ip/ip.h +devaoe.$O: ../386/aoe.h etherif.h +devether.$O: ../port/error.h ../port/netif.h etherif.h +devrtc.$O: ../port/error.h +ether8169.$O: ../port/error.h ../port/ethermii.h ../port/netif.h +ether8169.$O: etherif.h +ether82557.$O: ../port/netif.h +ether82557.$O: etherif.h io.h +etherigbe.$O: ../port/error.h ../port/ethermii.h ../port/netif.h +etherigbe.$O: etherif.h io.h +etherm10g.$O: ../port/error.h ../port/netif.h +etherm10g.$O: etherif.h io.h +etherm10g.$O: ../386/etherm10g2k.i ../386/etherm10g4k.i +i8259.$O: io.h +kbd.$O: ../port/error.h io.h +pci.$O: io.h +sdaoe.$O: ../port/error.h ../port/netif.h ../port/sd.h +sdaoe.$O: ../386/aoe.h etherif.h io.h +sdscsi.$O: ../port/error.h + +random.$O: ../port/error.h +devacpi.$O: acpi.h +physalloc.$O: acpi.h + +sipi.h: l64sipi.$O + $LD -o l64sipi.out -T0xfffffffff0003000 -R4 -l -s $prereq + {echo 'uchar sipihandler[]={' + xd -1x l64sipi.out | + sed -e 's/^[0-9a-f]+ //' \ + -e '1,2d' -e '3s/^ .. .. .. .. .. .. .. ..//' \ + -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g' + echo '};'} > $target + +../root/nvram: + dd -if /dev/zero -of ../root/nvram -bs 512 -count 1 + nvram=../root/nvram auth/wrkey diff -Nru 0/sys/src/nix/k10/mmu.c 4/sys/src/nix/k10/mmu.c --- 0/sys/src/nix/k10/mmu.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/mmu.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,862 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "amd64.h" + +/* + * To do: + * PteNX; + * mmukmapsync grot for >1 processor; + * replace vmap with newer version (no PDMAP); + * mmuptcopy (PteSHARED trick?); + * calculate and map up to TMFM (conf crap); + */ + +#define TMFM (64*MiB) /* kernel memory */ + +#define PPN(x) ((x)&~(PGSZ-1)) + +void +mmuflushtlb(uintmem) +{ + PTE *pte; + int i; + + m->tlbpurge++; + if(m->pml4->daddr){ + pte = UINT2PTR(m->pml4->va); + for(i = 0; i < m->pml4->daddr; i++) + if(pte[i] & PteP) + pte[i] = 0; + m->pml4->daddr = 0; + } + cr3put(m->pml4->pa); +} + +void +mmuflush(void) +{ + Mpl pl; + + pl = splhi(); + up->newtlb = 1; + mmuswitch(up); + splx(pl); +} + +static void +mmuptpfree(Proc* proc, int clear) +{ + int l; + PTE *pte; + Page **last, *page; + + for(l = 1; l < 4; l++){ + last = &proc->mmuptp[l]; + if(*last == nil) + continue; + for(page = *last; page != nil; page = page->next){ +//what is right here? 2 or 1? + if(l <= 2 && clear) + memset(UINT2PTR(page->va), 0, PTSZ); + pte = UINT2PTR(page->prev->va); + pte[page->daddr] = 0; + last = &page->next; + } + *last = proc->mmuptp[0]; + proc->mmuptp[0] = proc->mmuptp[l]; + proc->mmuptp[l] = nil; + } + + m->pml4->daddr = 0; +} + +static void +tabs(int n) +{ + int i; + + for(i = 0; i < n; i++) + print(" "); +} + +void +dumpptepg(int lvl, uintptr pa) +{ + PTE *pte; + int tab, i; + + tab = 4 - lvl; + pte = UINT2PTR(KADDR(pa)); + for(i = 0; i < PTSZ/sizeof(PTE); i++) + if(pte[i] & PteP){ + tabs(tab); + print("l%d %#p[%#05x]: %#ullx\n", lvl, pa, i, pte[i]); + + /* skip kernel mappings */ + if((pte[i]&PteU) == 0){ + tabs(tab+1); + print("...kern...\n"); + continue; + } + if(lvl > 2) + dumpptepg(lvl-1, PPN(pte[i])); + } +} + +void +dumpmmu(Proc *p) +{ + int i; + Page *pg; + + print("proc %#p\n", p); + for(i = 3; i > 0; i--){ + print("mmuptp[%d]:\n", i); + for(pg = p->mmuptp[i]; pg != nil; pg = pg->next) + print("\tpg %#p = va %#ullx pa %#ullx" + " daddr %#ulx next %#p prev %#p\n", + pg, pg->va, pg->pa, pg->daddr, pg->next, pg->prev); + } + print("pml4 %#ullx\n", m->pml4->pa); + if(0)dumpptepg(4, m->pml4->pa); +} + +void +dumpmmuwalk(uintmem addr) +{ + int l; + PTE *pte, *pml4; + + pml4 = UINT2PTR(m->pml4->va); + if((l = mmuwalk(pml4, addr, 3, &pte, nil)) >= 0) + print("cpu%d: mmu l%d pte %#p = %llux\n", m->machno, l, pte, *pte); + if((l = mmuwalk(pml4, addr, 2, &pte, nil)) >= 0) + print("cpu%d: mmu l%d pte %#p = %llux\n", m->machno, l, pte, *pte); + if((l = mmuwalk(pml4, addr, 1, &pte, nil)) >= 0) + print("cpu%d: mmu l%d pte %#p = %llux\n", m->machno, l, pte, *pte); + if((l = mmuwalk(pml4, addr, 0, &pte, nil)) >= 0) + print("cpu%d: mmu l%d pte %#p = %llux\n", m->machno, l, pte, *pte); +} + +static Page mmuptpfreelist; + +static Page* +mmuptpalloc(void) +{ + void* va; + Page *page; + + /* + * Do not really need a whole Page structure, + * but it makes testing this out a lot easier. + * Could keep a cache and free excess. + * Have to maintain any fiction for pexit? + */ + lock(&mmuptpfreelist); + if((page = mmuptpfreelist.next) != nil){ + mmuptpfreelist.next = page->next; + mmuptpfreelist.ref--; + unlock(&mmuptpfreelist); + + if(page->ref++ != 0) + panic("mmuptpalloc ref\n"); + page->prev = page->next = nil; + memset(UINT2PTR(page->va), 0, PTSZ); + + if(page->pa == 0) + panic("mmuptpalloc: free page with pa == 0"); + return page; + } + unlock(&mmuptpfreelist); + + if((page = malloc(sizeof(Page))) == nil){ + print("mmuptpalloc Page\n"); + + return nil; + } + if((va = mallocalign(PTSZ, PTSZ, 0, 0)) == nil){ + print("mmuptpalloc va\n"); + free(page); + + return nil; + } + + page->va = PTR2UINT(va); + page->pa = PADDR(va); + page->ref = 1; + + if(page->pa == 0) + panic("mmuptpalloc: no pa"); + return page; +} + +void +mmuswitch(Proc* proc) +{ + PTE *pte; + Page *page; + Mpl pl; + + pl = splhi(); + if(proc->newtlb){ + /* + * NIX: We cannot clear our page tables if they are going to + * be used in the AC + */ + if(proc->ac == nil) + mmuptpfree(proc, 1); + proc->newtlb = 0; + } + + if(m->pml4->daddr){ + memset(UINT2PTR(m->pml4->va), 0, m->pml4->daddr*sizeof(PTE)); + m->pml4->daddr = 0; + } + + pte = UINT2PTR(m->pml4->va); + for(page = proc->mmuptp[3]; page != nil; page = page->next){ + pte[page->daddr] = PPN(page->pa)|PteU|PteRW|PteP; + if(page->daddr >= m->pml4->daddr) + m->pml4->daddr = page->daddr+1; + page->prev = m->pml4; + } + + tssrsp0(STACKALIGN(PTR2UINT(proc->kstack+KSTACK))); + cr3put(m->pml4->pa); + splx(pl); +} + +void +mmurelease(Proc* proc) +{ + Page *page, *next; + + mmuptpfree(proc, 0); + + for(page = proc->mmuptp[0]; page != nil; page = next){ + next = page->next; + if(--page->ref) + panic("mmurelease: page->ref %d\n", page->ref); + lock(&mmuptpfreelist); + page->next = mmuptpfreelist.next; + mmuptpfreelist.next = page; + mmuptpfreelist.ref++; + page->prev = nil; + unlock(&mmuptpfreelist); + } + if(proc->mmuptp[0] && pga.r.p) + wakeup(&pga.r); + proc->mmuptp[0] = nil; + + tssrsp0(STACKALIGN(m->stack+MACHSTKSZ)); + cr3put(m->pml4->pa); +} + +static void +checkpte(uintmem ppn, void *a) +{ + int l; + PTE *pte, *pml4; + u64int addr; + char buf[240], *s; + + addr = PTR2UINT(a); + pml4 = UINT2PTR(m->pml4->va); + pte = 0; + s = buf; + *s = 0; + if((l = mmuwalk(pml4, addr, 3, &pte, nil)) < 0 || (*pte&PteP) == 0) + goto Panic; + s = seprint(buf, buf+sizeof buf, + "check3: l%d pte %#p = %llux\n", + l, pte, pte?*pte:~0); + if((l = mmuwalk(pml4, addr, 2, &pte, nil)) < 0 || (*pte&PteP) == 0) + goto Panic; + s = seprint(s, buf+sizeof buf, + "check2: l%d pte %#p = %llux\n", + l, pte, pte?*pte:~0); + if(*pte&PtePS) + return; + if((l = mmuwalk(pml4, addr, 1, &pte, nil)) < 0 || (*pte&PteP) == 0) + goto Panic; + seprint(s, buf+sizeof buf, + "check1: l%d pte %#p = %llux\n", + l, pte, pte?*pte:~0); + return; +Panic: + + seprint(s, buf+sizeof buf, + "checkpte: l%d addr %#p ppn %#ullx kaddr %#p pte %#p = %llux", + l, a, ppn, KADDR(ppn), pte, pte?*pte:~0); + print("%s\n", buf); + seprint(buf, buf+sizeof buf, "start %#ullx unused %#ullx" + " unmap %#ullx end %#ullx\n", + sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend); + panic("%s", buf); +} + + +static void +mmuptpcheck(Proc *proc) +{ + int lvl, npgs, i; + Page *lp, *p, *pgs[16], *fp; + uint idx[16]; + + if(proc == nil) + return; + lp = m->pml4; + for(lvl = 3; lvl >= 2; lvl--){ + npgs = 0; + for(p = proc->mmuptp[lvl]; p != nil; p = p->next){ + for(fp = proc->mmuptp[0]; fp != nil; fp = fp->next) + if(fp == p){ + dumpmmu(proc); + panic("ptpcheck: using free page"); + } + for(i = 0; i < npgs; i++){ + if(pgs[i] == p){ + dumpmmu(proc); + panic("ptpcheck: dup page"); + } + if(idx[i] == p->daddr){ + dumpmmu(proc); + panic("ptcheck: dup daddr"); + } + } + if(npgs >= nelem(pgs)) + panic("ptpcheck: pgs is too small"); + idx[npgs] = p->daddr; + pgs[npgs++] = p; + if(lvl == 3 && p->prev != lp){ + dumpmmu(proc); + panic("ptpcheck: wrong prev"); + } + } + + } + npgs = 0; + for(fp = proc->mmuptp[0]; fp != nil; fp = fp->next){ + for(i = 0; i < npgs; i++) + if(pgs[i] == fp) + panic("ptpcheck: dup free page"); + pgs[npgs++] = fp; + } +} + +static uint +pteflags(uint attr) +{ + uint flags; + + flags = 0; + if(attr & ~(PTEVALID|PTEWRITE|PTERONLY|PTEUNCACHED)) + panic("mmuput: wrong attr bits: %#ux\n", attr); + if(attr&PTEVALID) + flags |= PteP; + if(attr&PTEWRITE) + flags |= PteRW; + if(attr&PTEUNCACHED) + flags |= PtePCD; + return flags; +} + +/* + * pg->pgszi indicates the page size in m->pgsz[] used for the mapping. + * For the user, it can be either 2*MiB or 1*GiB pages. + * For 2*MiB pages, we use three levels, not four. + * For 1*GiB pages, we use two levels. + */ +void +mmuput(uintptr va, Page *pg, uint attr) +{ + int lvl, user, x, pgsz; + PTE *pte; + Page *page, *prev; + Mpl pl; + uintmem pa, ppn; + char buf[80]; + + ppn = 0; + pa = pg->pa; + if(pa == 0) + panic("mmuput: zero pa"); + + if(DBGFLG){ + snprint(buf, sizeof buf, "cpu%d: up %#p mmuput %#p %#P %#ux\n", + m->machno, up, va, pa, attr); + print("%s", buf); + } + assert(pg->pgszi >= 0); + pgsz = m->pgsz[pg->pgszi]; + if(pa & (pgsz-1)) + panic("mmuput: pa offset non zero: %#ullx\n", pa); + pa |= pteflags(attr); + + pl = splhi(); + if(DBGFLG) + mmuptpcheck(up); + user = (va < KZERO); + x = PTLX(va, 3); + + pte = UINT2PTR(m->pml4->va); + pte += x; + prev = m->pml4; + + for(lvl = 3; lvl >= 0; lvl--){ + if(user){ + if(pgsz == 2*MiB && lvl == 1) /* use 2M */ + break; + if(pgsz == 1ull*GiB && lvl == 2) /* use 1G */ + break; + } + for(page = up->mmuptp[lvl]; page != nil; page = page->next) + if(page->prev == prev && page->daddr == x){ + if(*pte == 0){ + print("mmu: jmk and nemo had fun\n"); + *pte = PPN(page->pa)|PteU|PteRW|PteP; + } + break; + } + + if(page == nil){ + if(up->mmuptp[0] == nil) + page = mmuptpalloc(); + else { + page = up->mmuptp[0]; + up->mmuptp[0] = page->next; + } + page->daddr = x; + page->next = up->mmuptp[lvl]; + up->mmuptp[lvl] = page; + page->prev = prev; + *pte = PPN(page->pa)|PteU|PteRW|PteP; + if(lvl == 3 && x >= m->pml4->daddr) + m->pml4->daddr = x+1; + } + x = PTLX(va, lvl-1); + + ppn = PPN(*pte); + if(ppn == 0) + panic("mmuput: ppn=0 l%d pte %#p = %#P\n", lvl, pte, *pte); + + pte = UINT2PTR(KADDR(ppn)); + pte += x; + prev = page; + } + + if(DBGFLG) + checkpte(ppn, pte); + *pte = pa|PteU; + + if(user) + switch(pgsz){ + case 2*MiB: + case 1*GiB: + *pte |= PtePS; + break; + default: + panic("mmuput: user pages must be 2M or 1G"); + } + splx(pl); + + if(DBGFLG){ + snprint(buf, sizeof buf, "cpu%d: up %#p new pte %#p = %#llux\n", + m->machno, up, pte, pte?*pte:~0); + print("%s", buf); + } + + invlpg(va); /* only if old entry valid? */ +} + +static Lock mmukmaplock; +static Lock vmaplock; + +#define PML4X(v) PTLX((v), 3) +#define PDPX(v) PTLX((v), 2) +#define PDX(v) PTLX((v), 1) +#define PTX(v) PTLX((v), 0) + +int +mmukmapsync(uvlong va) +{ + USED(va); + + return 0; +} + +static PTE +pdeget(uintptr va) +{ + PTE *pdp; + + if(va < 0xffffffffc0000000ull) + panic("pdeget(%#p)", va); + + pdp = (PTE*)(PDMAP+PDX(PDMAP)*4096); + + return pdp[PDX(va)]; +} + +/* + * Add kernel mappings for pa -> va for a section of size bytes. + * Called only after the va range is known to be unoccupied. + */ +static int +pdmap(uintptr pa, int attr, uintptr va, usize size) +{ + uintptr pae; + PTE *pd, *pde, *pt, *pte; + int pdx, pgsz; + Page *pg; + + pd = (PTE*)(PDMAP+PDX(PDMAP)*4096); + + for(pae = pa + size; pa < pae; pa += pgsz){ + pdx = PDX(va); + pde = &pd[pdx]; + + /* + * Check if it can be mapped using a big page, + * i.e. is big enough and starts on a suitable boundary. + * Assume processor can do it. + */ + if(ALIGNED(pa, PGLSZ(1)) && ALIGNED(va, PGLSZ(1)) && (pae-pa) >= PGLSZ(1)){ + assert(*pde == 0); + *pde = pa|attr|PtePS|PteP; + pgsz = PGLSZ(1); + } + else{ + if(*pde == 0){ + pg = mmuptpalloc(); + assert(pg != nil && pg->pa != 0); + *pde = pg->pa|PteRW|PteP; + memset((PTE*)(PDMAP+pdx*4096), 0, 4096); + } + assert(*pde != 0); + + pt = (PTE*)(PDMAP+pdx*4096); + pte = &pt[PTX(va)]; + assert(!(*pte & PteP)); + *pte = pa|attr|PteP; + pgsz = PGLSZ(0); + } + va += pgsz; + } + + return 0; +} + +static int +findhole(PTE* a, int n, int count) +{ + int have, i; + + have = 0; + for(i = 0; i < n; i++){ + if(a[i] == 0) + have++; + else + have = 0; + if(have >= count) + return i+1 - have; + } + + return -1; +} + +/* + * Look for free space in the vmap. + */ +static uintptr +vmapalloc(usize size) +{ + int i, n, o; + PTE *pd, *pt; + int pdsz, ptsz; + + pd = (PTE*)(PDMAP+PDX(PDMAP)*4096); + pd += PDX(VMAP); + pdsz = VMAPSZ/PGLSZ(1); + + /* + * Look directly in the PD entries if the size is + * larger than the range mapped by a single entry. + */ + if(size >= PGLSZ(1)){ + n = HOWMANY(size, PGLSZ(1)); + if((o = findhole(pd, pdsz, n)) != -1) + return VMAP + o*PGLSZ(1); + return 0; + } + + /* + * Size is smaller than that mapped by a single PD entry. + * Look for an already mapped PT page that has room. + */ + n = HOWMANY(size, PGLSZ(0)); + ptsz = PGLSZ(0)/sizeof(PTE); + for(i = 0; i < pdsz; i++){ + if(!(pd[i] & PteP) || (pd[i] & PtePS)) + continue; + + pt = (PTE*)(PDMAP+(PDX(VMAP)+i)*4096); + if((o = findhole(pt, ptsz, n)) != -1) + return VMAP + i*PGLSZ(1) + o*PGLSZ(0); + } + + /* + * Nothing suitable, start using a new PD entry. + */ + if((o = findhole(pd, pdsz, 1)) != -1) + return VMAP + o*PGLSZ(1); + + return 0; +} + +/* + * KSEG0 maps low memory. + * KSEG2 maps almost all memory, but starting at an address determined + * by the address space map (see asm.c). + * Thus, almost everything in physical memory is already mapped, but + * there are things that fall in the gap + * (acpi tables, device memory-mapped registers, etc.) + * for those things, we also want to disable caching. + * vmap() is required to access them. + */ +void* +vmap(uintmem pa, usize size) +{ + uintptr va; + usize o, sz; + + DBG("vmap(%#p, %lud) pc=%#p\n", pa, size, getcallerpc(&pa)); + + if(m->machno != 0) + panic("vmap"); + + /* + * This is incomplete; the checks are not comprehensive + * enough. + * Sometimes the request is for an already-mapped piece + * of low memory, in which case just return a good value + * and hope that a corresponding vunmap of the address + * will have the same address. + * To do this properly will require keeping track of the + * mappings; perhaps something like kmap, but kmap probably + * can't be used early enough for some of the uses. + */ + if(pa+size < 1ull*MiB) + return KADDR(pa); + if(pa < 1ull*MiB) + return nil; + + /* + * Might be asking for less than a page. + * This should have a smaller granularity if + * the page size is large. + */ + o = pa & ((1< %#p\n", pa+o, size, va+o); + + return UINT2PTR(va + o); +} + +void +vunmap(void* v, usize size) +{ + uintptr va; + + DBG("vunmap(%#p, %lud)\n", v, size); + + if(m->machno != 0) + panic("vunmap"); + + /* + * See the comments above in vmap. + */ + va = PTR2UINT(v); + if(va >= KZERO && va+size < KZERO+1ull*MiB) + return; + + /* + * Here will have to deal with releasing any + * resources used for the allocation (e.g. page table + * pages). + */ + DBG("vunmap(%#p, %lud)\n", v, size); +} + +int +mmuwalk(PTE* pml4, uintptr va, int level, PTE** ret, uintmem (*alloc)(usize)) +{ + int l; + uintmem pa; + PTE *pte; + + Mpl pl; + + pl = splhi(); + if(DBGFLG > 1) + DBG("mmuwalk%d: va %#p level %d\n", m->machno, va, level); + pte = &pml4[PTLX(va, 3)]; + for(l = 3; l >= 0; l--){ + if(l == level) + break; + if(!(*pte & PteP)){ + if(alloc == nil) + break; + pa = alloc(PTSZ); + if(pa == ~0) + return -1; + memset(UINT2PTR(KADDR(pa)), 0, PTSZ); + *pte = pa|PteRW|PteP; + } + else if(*pte & PtePS) + break; + pte = UINT2PTR(KADDR(PPN(*pte))); + pte += PTLX(va, l-1); + } + *ret = pte; + splx(pl); + return l; +} + +uintmem +mmuphysaddr(uintptr va) +{ + int l; + PTE *pte; + uintmem mask, pa; + + /* + * Given a VA, find the PA. + * This is probably not the right interface, + * but will do as an experiment. Usual + * question, should va be void* or uintptr? + */ + l = mmuwalk(UINT2PTR(m->pml4->va), va, 0, &pte, nil); + DBG("physaddr: va %#p l %d\n", va, l); + if(l < 0) + return ~0; + + mask = PGLSZ(l)-1; + pa = (*pte & ~mask) + (va & mask); + + DBG("physaddr: l %d va %#p pa %#llux\n", l, va, pa); + + return pa; +} + +Page mach0pml4; + +static void +nxeon(void) +{ + uint idres[4]; + + /* on intel64, cpuid 0x8::1 DX bit 20 means "Nxe bit in Efer allowed" */ + cpuid(0x80000001, 0, idres); + if (idres[3] & (1<<20)) + wrmsr(Efer, rdmsr(Efer) | Nxe); +} + +void +mmuinit(void) +{ + uchar *p; + Page *page; + u64int o, pa, sz; + + archmmu(); + DBG("mach%d: %#p pml4 %#p npgsz %d\n", m->machno, m, m->pml4, m->npgsz); + + if(m->machno != 0){ + /* NIX: KLUDGE: Has to go when each mach is using + * its own page table + */ + p = UINT2PTR(m->stack); + p += MACHSTKSZ; + + memmove(p, UINT2PTR(mach0pml4.va), PTSZ); + m->pml4 = &m->pml4kludge; + m->pml4->va = PTR2UINT(p); + m->pml4->pa = PADDR(p); + m->pml4->daddr = mach0pml4.daddr; /* # of user mappings in pml4 */ + + nxeon(); + cr3put(m->pml4->pa); + DBG("m %#p pml4 %#p\n", m, m->pml4); + return; + } + + page = &mach0pml4; + page->pa = cr3get(); + page->va = PTR2UINT(KADDR(page->pa)); + + m->pml4 = page; + + nxeon(); + + /* + * Set up the various kernel memory allocator limits: + * pmstart/pmend bound the unused physical memory; + * vmstart/vmend bound the total possible virtual memory + * used by the kernel; + * vmunused is the highest virtual address currently mapped + * and used by the kernel; + * vmunmapped is the highest virtual address currently + * mapped by the kernel. + * Vmunused can be bumped up to vmunmapped before more + * physical memory needs to be allocated and mapped. + * + * This is set up here so meminit can map appropriately. + */ + o = sys->pmstart; + sz = ROUNDUP(o, 4*MiB) - o; + pa = asmalloc(0, sz, 1, 0); + if(pa != o) + panic("mmuinit: pa %#llux memstart %#llux\n", pa, o); + sys->pmstart += sz; + + sys->vmstart = KSEG0; + sys->vmunused = sys->vmstart + ROUNDUP(o, 4*KiB); + sys->vmunmapped = sys->vmstart + o + sz; + sys->vmend = sys->vmstart + TMFM; + + print("mmuinit: vmstart %#p vmunused %#p vmunmapped %#p vmend %#p\n", + sys->vmstart, sys->vmunused, sys->vmunmapped, sys->vmend); + + /* + * Set up the map for PD entry access by inserting + * the relevant PDP entry into the PD. It's equivalent + * to PADDR(sys->pd)|PteRW|PteP. + * + */ + sys->pd[PDX(PDMAP)] = sys->pdp[PDPX(PDMAP)] & ~(PteD|PteA); + print("sys->pd %#p %#p\n", sys->pd[PDX(PDMAP)], sys->pdp[PDPX(PDMAP)]); + assert((pdeget(PDMAP) & ~(PteD|PteA)) == (PADDR(sys->pd)|PteRW|PteP)); + + + dumpmmuwalk(KZERO); + + mmuphysaddr(PTR2UINT(end)); +} diff -Nru 0/sys/src/nix/k10/mp.c 4/sys/src/nix/k10/mp.c --- 0/sys/src/nix/k10/mp.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/mp.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,487 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "apic.h" + +/* + * MultiProcessor Specification Version 1.[14]. + */ +typedef struct { /* MP Floating Pointer */ + u8int signature[4]; /* "_MP_" */ + u8int addr[4]; /* PCMP */ + u8int length; /* 1 */ + u8int revision; /* [14] */ + u8int checksum; + u8int feature[5]; +} _MP_; + +typedef struct { /* MP Configuration Table */ + u8int signature[4]; /* "PCMP" */ + u8int length[2]; + u8int revision; /* [14] */ + u8int checksum; + u8int string[20]; /* OEM + Product ID */ + u8int oaddr[4]; /* OEM table pointer */ + u8int olength[2]; /* OEM table length */ + u8int entry[2]; /* entry count */ + u8int apicpa[4]; /* local APIC address */ + u8int xlength[2]; /* extended table length */ + u8int xchecksum; /* extended table checksum */ + u8int reserved; + + u8int entries[]; +} PCMP; + +typedef struct { + char type[6]; + int polarity; /* default for this bus */ + int trigger; /* default for this bus */ +} Mpbus; + +static Mpbus mpbusdef[] = { + { "PCI ", IPlow, TMlevel, }, + { "ISA ", IPhigh, TMedge, }, +}; +static Mpbus* mpbus[Nbus]; +int mpisabusno = -1; + +static void +mpintrprint(char* s, u8int* p) +{ + char buf[128], *b, *e; + char format[] = " type %d flags %#ux bus %d IRQ %d APIC %d INTIN %d\n"; + + b = buf; + e = b + sizeof(buf); + b = seprint(b, e, "mpparse: intr:"); + if(s != nil) + b = seprint(b, e, " %s:", s); + seprint(b, e, format, p[1], l16get(p+2), p[4], p[5], p[6], p[7]); + print(buf); +} + +static u32int +mpmkintr(u8int* p) +{ + u32int v; + Apic *apic; + int n, polarity, trigger; + + /* + * Check valid bus, interrupt input pin polarity + * and trigger mode. If the APIC ID is 0xff it means + * all APICs of this type so those checks for useable + * APIC and valid INTIN must also be done later in + * the appropriate init routine in that case. It's hard + * to imagine routing a signal to all IOAPICs, the + * usual case is routing NMI and ExtINT to all LAPICs. + */ + if(mpbus[p[4]] == nil){ + mpintrprint("no source bus", p); + return 0; + } + if(p[6] != 0xff){ + if(Napic < 256 && p[6] >= Napic){ + mpintrprint("APIC ID out of range", p); + return 0; + } + switch(p[0]){ + default: + mpintrprint("INTIN botch", p); + return 0; + case 3: /* IOINTR */ + apic = &xioapic[p[6]]; + if(!apic->useable){ + mpintrprint("unuseable ioapic", p); + return 0; + } + if(p[7] >= apic->nrdt){ + mpintrprint("IO INTIN out of range", p); + return 0; + } + break; + case 4: /* LINTR */ + apic = &xlapic[p[6]]; + if(!apic->useable){ + mpintrprint("unuseable lapic", p); + return 0; + } + if(p[7] >= nelem(apic->lvt)){ + mpintrprint("LOCAL INTIN out of range", p); + return 0; + } + break; + } + } + n = l16get(p+2); + if((polarity = (n & 0x03)) == 2 || (trigger = ((n>>2) & 0x03)) == 2){ + mpintrprint("invalid polarity/trigger", p); + return 0; + } + + /* + * Create the low half of the vector table entry (LVT or RDT). + * For the NMI, SMI and ExtINT cases, the polarity and trigger + * are fixed (but are not always consistent over IA-32 generations). + * For the INT case, either the polarity/trigger are given or + * it defaults to that of the source bus; + * whether INT is Fixed or Lowest Priority is left until later. + */ + v = Im; + switch(p[1]){ + default: + mpintrprint("invalid type", p); + return 0; + case 0: /* INT */ + switch(polarity){ + case 0: + v |= mpbus[p[4]]->polarity; + break; + case 1: + v |= IPhigh; + break; + case 3: + v |= IPlow; + break; + } + switch(trigger){ + case 0: + v |= mpbus[p[4]]->trigger; + break; + case 1: + v |= TMedge; + break; + case 3: + v |= TMlevel; + break; + } + break; + case 1: /* NMI */ + v |= TMedge|IPhigh|MTnmi; + break; + case 2: /* SMI */ + v |= TMedge|IPhigh|MTsmi; + break; + case 3: /* ExtINT */ + v |= TMedge|IPhigh|MTei; + break; + } + + return v; +} + +static int +mpparse(PCMP* pcmp, int maxcores) +{ + u32int lo; + u8int *e, *p; + int devno, i, n; + + p = pcmp->entries; + e = ((uchar*)pcmp)+l16get(pcmp->length); + while(p < e) switch(*p){ + default: + print("mpparse: unknown PCMP type %d (e-p %#ld)\n", *p, e-p); + for(i = 0; p < e; i++){ + if(i && ((i & 0x0f) == 0)) + print("\n"); + print(" %#2.2ux", *p); + p++; + } + print("\n"); + break; + case 0: /* processor */ + /* + * Initialise the APIC if it is enabled (p[3] & 0x01). + * p[1] is the APIC ID, the memory mapped address comes + * from the PCMP structure as the addess is local to the + * CPU and identical for all. Indicate whether this is + * the bootstrap processor (p[3] & 0x02). + */ + DBG("mpparse: cpu %d pa %#ux bp %d\n", + p[1], l32get(pcmp->apicpa), p[3] & 0x02); + if((p[3] & 0x01) != 0 && maxcores > 0){ + maxcores--; + apicinit(p[1], l32get(pcmp->apicpa), p[3] & 0x02); + } + p += 20; + break; + case 1: /* bus */ + DBG("mpparse: bus: %d type %6.6s\n", p[1], (char*)p+2); + if(p[1] >= Nbus){ + print("mpparse: bus %d out of range\n", p[1]); + p += 8; + break; + } + if(mpbus[p[1]] != nil){ + print("mpparse: bus %d already allocated\n", p[1]); + p += 8; + break; + } + for(i = 0; i < nelem(mpbusdef); i++){ + if(memcmp(p+2, mpbusdef[i].type, 6) != 0) + continue; + if(memcmp(p+2, "ISA ", 6) == 0){ + if(mpisabusno != -1){ + print("mpparse: bus %d already have ISA bus %d\n", + p[1], mpisabusno); + continue; + } + mpisabusno = p[1]; + } + mpbus[p[1]] = &mpbusdef[i]; + break; + } + if(mpbus[p[1]] == nil) + print("mpparse: bus %d type %6.6s unknown\n", + p[1], (char*)p+2); + + p += 8; + break; + case 2: /* IOAPIC */ + /* + * Initialise the IOAPIC if it is enabled (p[3] & 0x01). + * p[1] is the APIC ID, p[4-7] is the memory mapped address. + */ + if(p[3] & 0x01) + ioapicinit(p[1], -1, l32get(p+4)); + + p += 8; + break; + case 3: /* IOINTR */ + /* + * p[1] is the interrupt type; + * p[2-3] contains the polarity and trigger mode; + * p[4] is the source bus; + * p[5] is the IRQ on the source bus; + * p[6] is the destination APIC; + * p[7] is the INITIN pin on the destination APIC. + */ + if(p[6] == 0xff){ + mpintrprint("routed to all IOAPICs", p); + p += 8; + break; + } + if((lo = mpmkintr(p)) == 0){ + p += 8; + break; + } + if(DBGFLG) + mpintrprint(nil, p); + + /* + * Always present the device number in the style + * of a PCI Interrupt Assignment Entry. For the ISA + * bus the IRQ is the device number but unencoded. + * May need to handle other buses here in the future + * (but unlikely). + */ + devno = p[5]; + if(memcmp(mpbus[p[4]]->type, "PCI ", 6) != 0) + devno <<= 2; + ioapicintrinit(p[4], p[6], p[7], devno, lo); + + p += 8; + break; + case 4: /* LINTR */ + /* + * Format is the same as IOINTR above. + */ + if((lo = mpmkintr(p)) == 0){ + p += 8; + break; + } + if(DBGFLG) + mpintrprint(nil, p); + + /* + * Everything was checked in mpmkintr above. + */ + if(p[6] == 0xff){ + for(i = 0; i < Napic; i++){ + if(!xlapic[i].useable || xlapic[i].addr != nil) + continue; + xlapic[i].lvt[p[7]] = lo; + } + } + else + xlapic[p[6]].lvt[p[7]] = lo; + p += 8; + break; + } + + /* + * There's nothing of interest in the extended table, + * but check it for consistency. + */ + p = e; + e = p + l16get(pcmp->xlength); + while(p < e) switch(*p){ + default: + n = p[1]; + print("mpparse: unknown extended entry %d length %d\n", *p, n); + for(i = 0; i < n; i++){ + if(i && ((i & 0x0f) == 0)) + print("\n"); + print(" %#2.2ux", *p); + p++; + } + print("\n"); + break; + case 128: + DBG("address space mapping\n"); + DBG(" bus %d type %d base %#llux length %#llux\n", + p[2], p[3], l64get(p+4), l64get(p+12)); + p += p[1]; + break; + case 129: + DBG("bus hierarchy descriptor\n"); + DBG(" bus %d sd %d parent bus %d\n", + p[2], p[3], p[4]); + p += p[1]; + break; + case 130: + DBG("compatibility bus address space modifier\n"); + DBG(" bus %d pr %d range list %d\n", + p[2], p[3], l32get(p+4)); + p += p[1]; + break; + } + return maxcores; +} + +static int +sigchecksum(void* address, int length) +{ + u8int *p, sum; + + sum = 0; + for(p = address; length-- > 0; p++) + sum += *p; + + return sum; +} + +static void* +sigscan(u8int* address, int length, char* signature) +{ + u8int *e, *p; + int siglength; + + e = address+length; + siglength = strlen(signature); + for(p = address; p+siglength < e; p += 16){ + if(memcmp(p, signature, siglength)) + continue; + return p; + } + + return nil; +} +static void* +sigsearch(char* signature) +{ + uintptr p; + u8int *bda; + void *r; + + /* + * Search for the data structure: + * 1) in the first KB of the EBDA; + * 2) in the last KB of system base memory; + * 3) in the BIOS ROM between 0xe0000 and 0xfffff. + */ + bda = BIOSSEG(0x40); + if(memcmp(KADDR(0xfffd9), "EISA", 4) == 0){ + if((p = (bda[0x0f]<<8)|bda[0x0e])){ + if((r = sigscan(BIOSSEG(p), 1024, signature)) != nil) + return r; + } + } + + p = ((bda[0x14]<<8)|bda[0x13])*1024; + if((r = sigscan(KADDR(p-1024), 1024, signature)) != nil) + return r; + + r = sigscan(BIOSSEG(0xe000), 0x20000, signature); + if(r != nil) + return r; + /* and virtualbox hidden mp tables... */ + return sigscan(KADDR(0xa0000 - 1024), 1024, signature); +} + +void +mpsinit(int maxcores) +{ + u8int *p; + int i, n, ncleft; + _MP_ *mp; + PCMP *pcmp; + + if((mp = sigsearch("_MP_")) == nil){ + print("no mp tables\n"); + return; + } + + if(DBGFLG){ + DBG("_MP_ @ %#p, addr %#ux length %ud rev %d", + mp, l32get(mp->addr), mp->length, mp->revision); + for(i = 0; i < sizeof(mp->feature); i++) + DBG(" %2.2#ux", mp->feature[i]); + DBG("\n"); + } + if(mp->revision != 1 && mp->revision != 4) + return; + if(sigchecksum(mp, mp->length*16) != 0) + return; + + if((pcmp = vmap(l32get(mp->addr), sizeof(PCMP))) == nil) + return; + if(pcmp->revision != 1 && pcmp->revision != 4){ + vunmap(pcmp, sizeof(PCMP)); + return; + } + n = l16get(pcmp->length) + l16get(pcmp->xlength); + vunmap(pcmp, sizeof(PCMP)); + if((pcmp = vmap(l32get(mp->addr), n)) == nil) + return; + if(sigchecksum(pcmp, l16get(pcmp->length)) != 0){ + vunmap(pcmp, n); + return; + } + if(DBGFLG){ + DBG("PCMP @ %#p length %#ux revision %d\n", + pcmp, l16get(pcmp->length), pcmp->revision); + DBG(" %20.20s oaddr %#ux olength %#ux\n", + (char*)pcmp->string, l32get(pcmp->oaddr), + l16get(pcmp->olength)); + DBG(" entry %d apicpa %#ux\n", + l16get(pcmp->entry), l32get(pcmp->apicpa)); + + DBG(" xlength %#ux xchecksum %#ux\n", + l16get(pcmp->xlength), pcmp->xchecksum); + } + if(pcmp->xchecksum != 0){ + p = ((u8int*)pcmp) + l16get(pcmp->length); + i = sigchecksum(p, l16get(pcmp->xlength)); + if(((i+pcmp->xchecksum) & 0xff) != 0){ + print("extended table checksums to %#ux\n", i); + vunmap(pcmp, n); + return; + } + } + + /* + * Parse the PCMP table and set up the datastructures + * for later interrupt enabling and application processor + * startup. + */ + ncleft = mpparse(pcmp, maxcores); + mpacpi(ncleft); + + apicdump(); + ioapicdump(); +} diff -Nru 0/sys/src/nix/k10/mpacpi.c 4/sys/src/nix/k10/mpacpi.c --- 0/sys/src/nix/k10/mpacpi.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/mpacpi.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,61 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "mp.h" +#include "apic.h" +#include "acpi.h" + +extern Madt *apics; + +int +mpacpi(int ncleft) +{ + char *already; + int np, bp; + Apic *apic; + Apicst *st; + + print("APIC lapic paddr %#.8llux, flags %#.8ux\n", + apics->lapicpa, apics->pcat); + np = 0; + for(st = apics->st; st != nil; st = st->next){ + already = ""; + switch(st->type){ + case ASlapic: + /* this table is supposed to have all of them if it exists */ + if(st->lapic.id > Napic) + break; + apic = xlapic + st->lapic.id; + bp = (np++ == 0); + if(apic->useable){ + already = "(mp)"; + } + else if(ncleft != 0){ + ncleft--; + apicinit(st->lapic.id, apics->lapicpa, bp); + } else + already = "(off)"; + + print("apic proc %d/%d apicid %d %s\n", np-1, apic->machno, st->lapic.id, already); + break; + case ASioapic: + if(st->ioapic.id > Napic) + break; + apic = xioapic + st->ioapic.id; + if(apic->useable){ + apic->ibase = st->ioapic.ibase; /* gnarly */ + already = "(mp)"; + goto pr1; + } + ioapicinit(st->ioapic.id, st->ioapic.ibase, st->ioapic.addr); + pr1: + print("ioapic %d ", st->ioapic.id); + print("addr %p base %d %s\n", apic->paddr, apic->ibase, already); + break; + } + } + return ncleft; +} diff -Nru 0/sys/src/nix/k10/msi.c 4/sys/src/nix/k10/msi.c --- 0/sys/src/nix/k10/msi.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/msi.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,117 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "apic.h" + +enum { + Dpcicap = 1<<0, + Dmsicap = 1<<1, + Dvec = 1<<2, + Debug = 0, +}; + +enum { + /* address */ + Msiabase = 0xfee00000u, + Msiadest = 1<<12, /* same as 63:56 of apic vector */ + Msiaedest = 1<<4, /* same as 55:48 of apic vector */ + Msialowpri = 1<<3, /* redirection hint */ + Msialogical = 1<<2, + + /* data */ + Msidlevel = 1<<15, + Msidassert = 1<<14, + Msidlogical = 1<<11, + Msidmode = 1<<8, /* 3 bits; delivery mode */ + Msidvector = 0xff<<0, +}; + +enum{ + /* msi capabilities */ + Vmask = 1<<8, + Cap64 = 1<<7, + Mmesgmsk = 7<<4, + Mmcap = 7<<1, + Msienable = 1<<0, +}; + +static int +msicap(Pcidev *p) +{ + int c; + + c = pcicap(p, PciCapMSI); + if(c == -1) + return 0; + return c; +} + +static int +blacklist(Pcidev *p) +{ + switch(p->vid<<16 | p->did){ + case 0x11ab<<16 | 0x6485: + return -1; + } + return 0; +} + +int +pcimsienable(Pcidev *p, uvlong vec) +{ + char *s; + uint c, f, d, datao, lopri, dmode, logical; + + c = msicap(p); + if(c == 0) + return -1; + + f = pcicfgr16(p, c + 2) & ~Mmesgmsk; + + if(blacklist(p) != 0) + return -1; + datao = 8; + d = vec>>48; + lopri = (vec & 0x700) == MTlp; + logical = (vec & Lm) != 0; + pcicfgw32(p, c + 4, Msiabase | Msiaedest * d + | Msialowpri * lopri | Msialogical * logical); + if(f & Cap64){ + datao += 4; + pcicfgw32(p, c + 8, 0); + } + dmode = (vec >> 8) & 7; + pcicfgw16(p, c + datao, Msidassert | Msidlogical * logical + | Msidmode * dmode | (uint)vec & 0xff); + if(f & Vmask) + pcicfgw32(p, c + datao + 4, 0); + + /* leave vectors configured but disabled for debugging */ + if((s = getconf("*nomsi")) != nil && atoi(s) != 0) + return -1; + + pcicfgw16(p, c + 2, f); + return 0; +} + +int +pcimsimask(Pcidev *p, int mask) +{ + uint c, f; + + c = msicap(p); + if(c == 0) + return -1; + f = pcicfgr16(p, c + 2) & ~Msienable; + if(mask){ + pcicfgw16(p, c + 2, f & ~Msienable); +// pciclrbme(p); cheeze + }else{ + pcisetbme(p); + pcicfgw16(p, c + 2, f | Msienable); + } + return 0; +} diff -Nru 0/sys/src/nix/k10/multiboot.c 4/sys/src/nix/k10/multiboot.c --- 0/sys/src/nix/k10/multiboot.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/multiboot.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,147 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +typedef struct Mbi Mbi; +struct Mbi { + u32int flags; + u32int memlower; + u32int memupper; + u32int bootdevice; + u32int cmdline; + u32int modscount; + u32int modsaddr; + u32int syms[4]; + u32int mmaplength; + u32int mmapaddr; + u32int driveslength; + u32int drivesaddr; + u32int configtable; + u32int bootloadername; + u32int apmtable; + u32int vbe[6]; +}; + +enum { /* flags */ + Fmem = 0x00000001, /* mem* valid */ + Fbootdevice = 0x00000002, /* bootdevice valid */ + Fcmdline = 0x00000004, /* cmdline valid */ + Fmods = 0x00000008, /* mod* valid */ + Fsyms = 0x00000010, /* syms[] has a.out info */ + Felf = 0x00000020, /* syms[] has ELF info */ + Fmmap = 0x00000040, /* mmap* valid */ + Fdrives = 0x00000080, /* drives* valid */ + Fconfigtable = 0x00000100, /* configtable* valid */ + Fbootloadername = 0x00000200, /* bootloadername* valid */ + Fapmtable = 0x00000400, /* apmtable* valid */ + Fvbe = 0x00000800, /* vbe[] valid */ +}; + +typedef struct Mod Mod; +struct Mod { + u32int modstart; + u32int modend; + u32int string; + u32int reserved; +}; + +typedef struct MMap MMap; +struct MMap { + u32int size; + u32int base[2]; + u32int length[2]; + u32int type; +}; + +int +multiboot(u32int magic, u32int pmbi, int vflag) +{ + char *p; + int i, n; + Mbi *mbi; + Mod *mod; + MMap *mmap; + u64int addr, len; + + if(vflag) + print("magic %#ux pmbi %#ux\n", magic, pmbi); + if(magic != 0x2badb002) + return -1; + + mbi = KADDR(pmbi); + if(vflag) + print("flags %#ux\n", mbi->flags); + if(mbi->flags & Fcmdline){ + p = KADDR(mbi->cmdline); + if(vflag) + print("cmdline <%s>\n", p); + else + optionsinit(p); + } + if(mbi->flags & Fmods){ + for(i = 0; i < mbi->modscount; i++){ + mod = KADDR(mbi->modsaddr + i*16); + if(mod->string != 0) + p = KADDR(mod->string); + else + p = ""; + if(vflag) + print("mod %#ux %#ux <%s>\n", + mod->modstart, mod->modend, p); + else + asmmodinit(mod->modstart, mod->modend, p); + } + } + if(mbi->flags & Fmmap){ + mmap = KADDR(mbi->mmapaddr); + n = 0; + while(n < mbi->mmaplength){ + addr = (((u64int)mmap->base[1])<<32)|mmap->base[0]; + len = (((u64int)mmap->length[1])<<32)|mmap->length[0]; + switch(mmap->type){ + default: + if(vflag) + print("type %ud", mmap->type); + break; + case 1: + if(vflag) + print("Memory"); + else + asmmapinit(addr, len, mmap->type); + break; + case 2: + if(vflag) + print("reserved"); + else + asmmapinit(addr, len, mmap->type); + break; + case 3: + if(vflag) + print("ACPI Reclaim Memory"); + else + asmmapinit(addr, len, mmap->type); + break; + case 4: + if(vflag) + print("ACPI NVS Memory"); + else + asmmapinit(addr, len, mmap->type); + break; + } + if(vflag) + print("\n\t%#16.16llux %#16.16llux (%llud)\n", + addr, addr+len, len); + + n += mmap->size+sizeof(mmap->size); + mmap = KADDR(mbi->mmapaddr+n); + } + } + if(vflag && (mbi->flags & Fbootloadername)){ + p = KADDR(mbi->bootloadername); + print("bootloadername <%s>\n", p); + } + + return 0; +} diff -Nru 0/sys/src/nix/k10/physalloc.c 4/sys/src/nix/k10/physalloc.c --- 0/sys/src/nix/k10/physalloc.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/physalloc.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,532 @@ +/* + * Buddy allocator for physical memory allocation. + * One per ACPI affinity domain, to color pages depending on their + * NUMA location. + * + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "acpi.h" + +#define ISPOWEROF2(x) (((x) != 0) && !((x) & ((x)-1))) +#define UNO ((uintmem)1) + +enum { + BKmin = 21, /* Minimum lg2 */ + BKmax = 30, /* Maximum lg2 */ + + Ndoms = 16, /* Max # of domains */ + + Used = 0, + Avail = 1, +}; + + +#define INDEX(b, v) ((uint)(((v))/(b)->bminsz)) +#define BLOCK(b, i) ((i)-INDEX((b),(b)->memory)) + +typedef struct Buddy Buddy; +struct Buddy { + short tag; /* Used or Avail */ + short kval; + uint next; + uint prev; + void *p; +}; + +/* + * Bals should allocate using its base address as 0. + * For now, all of them refer to the entire memory and we record + * the base and size for each one. + */ +typedef struct Bal Bal; +struct Bal { + uintmem base; + u64int size; + usize nfree; + usize nblocks; + int kmin; /* Minimum lg2 */ + int kmax; /* Maximum lg2 */ + uintmem bminsz; /* minimum block sz */ + uintmem memory; + uint kspan; + + Buddy* blocks; + Buddy* avail; +}; + +static Bal bal[Ndoms]; +static int ndoms; +static Lock budlock; + +char* +seprintphysstats(char *s, char *e) +{ + Bal *b; + int i; + + lock(&budlock); + for(i = 0; i < Ndoms; i++){ + b = &bal[i]; + if(b->size > 0) + s = seprint(s, e, "%uld/%uld %ulldK color %d blocks avail\n", + b->nfree, b->nblocks, b->bminsz/KiB, i); + } + unlock(&budlock); + return s; +} + +static void +xphysfree(Bal *b, uintmem data, u64int size) +{ + uint i; + Buddy *l, *p; + Buddy *blocks, *avail; + + DBG("physfree\n"); + + /* + * Knuth's Algorithm S (Buddy System Liberation). + */ + blocks = b->blocks; + avail = b->avail; + + if(data == 0 /*|| !ALIGNED(data, b->bminsz)*/) + return; + i = INDEX(b,data); + + lock(&budlock); +S1: + /* + * Find buddy. + */ + l = &blocks[BLOCK(b,i)]; + l->p = nil; + DBG("\tbsl: BLOCK(b,i) %d index %ulld kval %d\n", + BLOCK(b,i), BLOCK(b,i)/((1<kval)/b->bminsz), l->kval); + if((BLOCK(b,i)/((1<kval)/b->bminsz)) & 1) /* simpler test? */ + p = l - (1<kval)/b->bminsz; + else + p = l + (1<kval)/(b->bminsz); + DBG("\tbsl: l @ %ld buddy @ %ld\n", l - blocks, p - blocks); + + /* + * Is buddy available? + * Can't merge if: + * this is the largest block; + * buddy isn't free; + * buddy has been subsequently split again. + */ + if(l->kval == b->kmax || p->tag == Used || (p->tag == Avail && p->kval != l->kval)){ + /* + * Put on list. + */ + l->tag = Avail; + l->next = avail[l->kval].next; + l->prev = 0; + if(l->next != 0) + blocks[BLOCK(b,l->next)].prev = i; + avail[l->kval].next = i; + + b->nfree += size/b->bminsz; + + unlock(&budlock); + DBG("bsl: free @ i %d BLOCK(b,i) %d kval %d next %d %s\n", + i, BLOCK(b,i), l->kval, l->next, l->tag?"avail":"used"); + return; + } + + /* + * Combine with buddy. + * This removes block P from the avail list. + */ + if(p->prev != 0){ + blocks[BLOCK(b,p->prev)].next = p->next; + p->prev = 0; + } + else + avail[p->kval].next = 0; + if(p->next != 0){ + blocks[BLOCK(b,p->next)].prev = p->prev; + p->next = 0; + } + p->tag = Used; + + /* + * Now can try to merge this larger block. + k++; + */ + DBG("\tbsl: l @ %ld p @ %ld\n", l - blocks, p - blocks); + if(p < l) + l = p; + i = l - blocks + INDEX(b,b->memory); + l->kval++; + DBG("bsl: merge @ i %d BLOCK(b,i) %d kval %d next %d tag %s\n", + i, BLOCK(b,i), l->kval, l->next, l->tag?"avail":"used"); + goto S1; +} + +void +physfree(uintmem data, u64int size) +{ + Bal *b; + int i; + + for(i = 0; i < Ndoms; i++){ + b = &bal[i]; + if(b->base <= data && data < b->base + b->size){ + xphysfree(b, data, size); + return; + } + } + panic("physfree: no bal"); +} + +static void* +xphystag(Bal *b, uintmem data) +{ + uint i; + Buddy *blocks; + + DBG("phystag\n"); + + blocks = b->blocks; + + if(data == 0 /*|| !ALIGNED(data, b->bminsz)*/) + return nil; + i = INDEX(b,data); + return blocks[BLOCK(b,i)].p; +} + +void* +phystag(uintmem data) +{ + Bal *b; + int i; + + for(i = 0; i < Ndoms; i++){ + b = &bal[i]; + if(b->base <= data && data < b->base + b->size) + return xphystag(b, data); + } + return nil; +} + +static uchar lg2table[256] = { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, +}; + +static int +lg2floor(u64int w) +{ + u64int hi, lo; + + if((lo = (w>>48)) != 0){ + if((hi = (lo>>8)) != 0) + return 56+lg2table[hi]; + return 48+lg2table[lo]; + } + if((lo = (w>>32)) != 0){ + if((hi = (lo>>8)) != 0) + return 40+lg2table[hi]; + return 32+lg2table[lo]; + } + if((lo = (w>>16)) != 0){ + if((hi = (lo>>8)) != 0) + return 24+lg2table[hi]; + return 16+lg2table[lo]; + } + if((hi = (w>>8)) != 0) + return 8+lg2table[hi]; + return lg2table[w]; +} + +static uintmem +xphysalloc(Bal *b, u64int size, void *tag) +{ + uint i, j, k; + Buddy *l, *p; + Buddy *avail, *blocks; + uintmem m; + + DBG("physalloc\n"); + assert(b->size > 0); + + avail = b->avail; + blocks = b->blocks; + + /* + * Knuth's Algorithm R (Buddy System Reservation). + */ + if(size < b->bminsz) + size = b->bminsz; + + /* + * Find block. + */ + if(!ISPOWEROF2(size)) + return 0; + k = lg2floor(size); + + lock(&budlock); + for(j = k; j <= b->kmax; j++){ + if(avail[j].next != 0) + break; + } + DBG("bsr: size %#llud k %d j %d\n", size, k, j); + if(j > b->kmax){ + unlock(&budlock); + return 0; + } + + /* + * Remove from list. + */ + i = avail[j].next; + l = &blocks[BLOCK(b,i)]; + DBG("bsr: block @ i %d BLOCK(b,i) %d kval %d next %d %s\n", + i, BLOCK(b,i), l->kval, l->next, l->tag?"avail":"used"); + avail[j].next = l->next; + blocks[avail[j].next].prev = 0; + l->prev = l->next = 0; + l->tag = Used; + l->kval = k; + + /* + * Split required? + */ + while(j > k){ + /* + * Split. + */ + j--; + p = &blocks[BLOCK(b,i) + (UNO<bminsz)]; + p->tag = Avail; + p->kval = j; + p->next = avail[j].next; + p->prev = 0; + if(p->next != 0) + blocks[BLOCK(b,p->next)].prev = i + (UNO<bminsz); + avail[j].next = i + (UNO<bminsz); + DBG("bsr: split @ i %d BLOCK(b,i) %ld j %d next %d (%d) %s\n", + i, p - blocks, j, p->next, BLOCK(b,p->next), + p->tag?"avail":"used"); + } + b->nfree -= size/b->bminsz; + unlock(&budlock); + + m = b->memory + b->bminsz*BLOCK(b,i); + assert(m >= b->base && m < b->base + b->size); + blocks[BLOCK(b,i)].p = tag; + + return m; +} + +uintmem +physalloc(u64int size, int *colorp, void *tag) +{ + int i, color; + uintmem m; + + m = 0; + + color = *colorp; + if(color >= 0){ + color %= ndoms; + if(bal[color].kmin > 0){ + *colorp = color; + m = xphysalloc(&bal[color], size, tag); + } + } + if(m == 0) + for(i = 0; i < ndoms; i++) + if(bal[i].kmin > 0) + if((m = xphysalloc(&bal[i], size, tag)) != 0){ + *colorp = i; + return m; + } + return m; +} + +static void +dump(Bal *b) +{ + uint bi, i, k; + Buddy *blocks; + + blocks = b->blocks; + for(i = 0; i < (UNO<<(b->kmax-b->kmin+1)); i++){ + if(blocks[i].tag == Used) + continue; + print("blocks[%d]: size %d prev %d next %d\n", + i, 1<blocks[i].kval, blocks[i].prev, blocks[i].next); + //i += (1<bminsz-1; + } + + for(k = 0; k <= b->kmax; k++){ + print("a[%d]:", k); + for(bi = b->avail[k].next; bi != 0; bi = blocks[BLOCK(b,bi)].next){ + print(" %d", bi); + } + print("\n"); + } +} + +void +physallocdump(void) +{ + int n; + + for(n = 0; n < Ndoms; n++) + if(bal[n].size > 0) + print("physalloc color=%d base=%#ullx size=%#ullx\n", + n, bal[n].base, bal[n].size); +} + +static int +plop(Bal *b, uintmem a, int k, int type) +{ + uint i; + Buddy *l; + + + DBG("plop(a %#p k %d type %d)\n", a, k, type); + + i = INDEX(b,a); + l = &b->blocks[BLOCK(b,i)]; + + l->kval = k; + xphysfree(b, a, 1<bminsz); + e = ROUNDDN(e, b->bminsz); + DBG("iimbchunk: start a %#P e %#P\n", a, e); + + b->nblocks += (e-a)/b->bminsz; + + for(k = b->kmin, s = b->bminsz; a+s < e && k < b->kmax; s <<= 1, k += 1){ + if(a & s){ + plop(b, a, k, type); + a += s; + } + } + DBG("done1 a %#P e %#P s %#ux %d\n", a, e, s, k); + + while(a+s <= e){ + plop(b, a, k, type); + a += s; + } + DBG("done2 a %#P e %#P s %#ux %d\n", a, e, s, k); + + for(k -= 1, s >>= 1; a < e; s >>= 1, k -= 1){ + if(a+s <= e){ + plop(b, a, k, type); + a += s; + } + } + DBG("done3 a %#P e %#P s %#ux %d\n", a, e, s, k); + + return 0; +} + +/* + * Called from umeminit to initialize user memory allocators. + */ +void +physinit(uintmem a, u64int size) +{ + uintmem dtsz; + Bal *b; + int i, dom; + uintmem addr, len; + + DBG("physinit %#ullx %#ullx\n", a, size); + + for(addr = a; addr < a+size; addr += len){ + dom = 0; + len = acpimblocksize(addr, &dom); + /* len can be zero if there's no acpi information about addr */ + if(len == 0 || addr + len > a + size) + len = a + size - addr; + /* + * Each block belongs to a different domain (ie. cpu/mem socket) + * We must create a buddy allocator for each block, so we could + * allocate memory from different domains. + * + * This code assumes that a domain may be extended later and + * that there is no interleaving of domains. Ok by now. + */ + DBG("physmem block dom %d addr %#ullx size %#ullx\n", + dom, addr, len); + if(dom < 0 || dom >= Ndoms){ + print("physinit: invalid dom %d\n", dom); + dom = 0; + } + b = &bal[dom]; + if(dom >= ndoms) + ndoms = dom+1; + if(b->kmin == 0){ + b->base = addr; + b->size = len; + b->kmin = BKmin; + b->kmax = BKmax; + b->bminsz = (UNO<kmin); + b->memory = sys->pmstart; + b->kspan = lg2floor(sys->pmend); + if(!ISPOWEROF2(sys->pmend)) + b->kspan++; + dtsz = sizeof(Buddy)*(UNO<<(b->kspan-b->kmin+1)); + DBG("kspan %ud (arrysz = %llud)\n", b->kspan, dtsz); + b->blocks = malloc(dtsz); + if(b->blocks == nil) + panic("physinit: no blocks"); + memset(b->blocks, 0, dtsz); + b->avail = malloc(sizeof(Buddy)*(b->kmax+1)); + if(b->avail == nil) + panic("physinit: no avail"); + memset(b->avail, 0, sizeof(Buddy)*(b->kmax+1)); + }else{ + if(addr < b->base) + panic("physinit: decreasing base"); + if(b->base+b->size < addr + len) + b->size = (addr-b->base) + len; + for(i = 0; i < Ndoms; i++) + if(bal[i].kmin && &bal[i] != b) + if(bal[i].base < b->base + b->size && + bal[i].base + bal[i].size > b->base + b->size) + panic("physinit: doms overlap"); + } + assert(addr >= b->base && addr+len <= b->base + b->size); + + iimbchunk(b, addr, addr+len, 0); + } + + +} diff -Nru 0/sys/src/nix/k10/pmcio.c 4/sys/src/nix/k10/pmcio.c --- 0/sys/src/nix/k10/pmcio.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/pmcio.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,468 @@ +/* + * Performance counters non port part + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "amd64.h" +#include "../port/pmc.h" + + +/* non portable, for intel will be CPUID.0AH.EDX + */ + +enum { + PeNreg = 4, /* Number of Pe/Pct regs */ +}; + +int +pmcnregs(void) +{ + /* could run CPUID to see if there are registers, + * PmcMaxCtrs + */ + return PeNreg; +} + +//PeHo|PeGo +#define PeAll (PeOS|PeUsr) +#define SetEvMsk(v, e) ((v)|(((e)&PeEvMskL)|(((e)<<(PeEvMsksh-8))&PeEvMskH))) +#define SetUMsk(v, u) ((v)|(((u)<<8ull)&PeUnMsk)) + +#define GetEvMsk(e) (((e)&PeEvMskL)|(((e)&PeEvMskH)>>(PeEvMsksh-8))) +#define GetUMsk(u) (((u)&PeUnMsk)>>8ull) + +static int +pmcuserenab(int enable) +{ + u64int cr4; + + cr4 = cr4get(); + if (enable){ + cr4 |= Pce; + } else + cr4 &= ~Pce; + cr4put(cr4); + return cr4&Pce; +} + +PmcCtlCtrId pmcids[] = { + {"locked instr", "0x024 0x1"}, + {"locked cycles nonspec", "0x024 0x4"}, // cycles + {"SMI intr", "0x02b 0x0"}, + {"DC access", "0x040 0x0"}, + {"DC miss", "0x041 0x0"}, + {"DC refills", "0x042 0x1f"}, + {"DC evicted", "0x042 0x3f"}, + {"L1 DTLB miss", "0x045 0x7"}, //DTLB L2 hit + {"L2 DTLB miss", "0x046 0x7"}, + {"L1 DTLB hit", "0x04d 0x3"}, + {"global TLB flush", "0x054 0x0"}, + {"L2 hit", "0x07d 0x3f"}, + {"L2 miss", "0x07e 0xf"}, + {"IC miss", "0x081 0x0"}, + {"IC refill from L2", "0x082 0x0"}, + {"IC refill from system", "0x083 0x0"}, + {"L1 ITLB miss", "0x084 0x0"}, //L2 ITLB hit + {"L2 ITLB miss", "0x085 0x3"}, + {"DRAM access", "0x0e0 0x3f"}, + {"L3 miss core 0", "0x4e1 0x13"}, //core 0 only + {"L3 miss core 1", "0x4e1 0x23"}, + {"L3 miss core 2", "0x4e1 0x43"}, + {"L3 miss core 3", "0x4e1 0x83"}, + {"L3 miss socket", "0x4e1 0xf3"}, //all cores in the socket + {"", ""}, +}; + +int +pmctrans(PmcCtl *p) +{ + PmcCtlCtrId *pi; + + for (pi = &pmcids[0]; pi->portdesc[0] != '\0'; pi++){ + if ( strncmp(p->descstr, pi->portdesc, strlen(pi->portdesc)) == 0){ + strncpy(p->descstr, pi->archdesc, strlen(pi->archdesc) + 1); + return 0; + } + } + return 1; +} + +static int +getctl(PmcCtl *p, u32int regno) +{ + u64int r, e, u; + + r = rdmsr(regno + PerfEvtbase); + p->enab = (r&PeCtEna) != 0; + p->user = (r&PeUsr) != 0; + p->os = (r&PeOS) != 0; + e = GetEvMsk(r); + u = GetUMsk(r); + //TODO inverse translation + snprint(p->descstr, KNAMELEN, "%#ullx %#ullx", e, u); + p->nodesc = 0; + return 0; +} + +int +pmcanyenab(void) +{ + int i; + PmcCtl p; + + for (i = 0; i < pmcnregs(); i++) { + if (getctl(&p, i) < 0) + return -1; + if (p.enab) + return 1; + } + + return 0; +} + +extern int pmcdebug; + +static int +setctl(PmcCtl *p, int regno) +{ + u64int v, e, u; + char *toks[2]; + char str[KNAMELEN]; + + if (regno >= pmcnregs()) + error("invalid reg"); + + v = rdmsr(regno + PerfEvtbase); + v &= PeEvMskH|PeEvMskL|PeCtEna|PeOS|PeUsr|PeUnMsk; + if (p->enab != PmcCtlNullval) + if (p->enab) + v |= PeCtEna; + else + v &= ~PeCtEna; + + if (p->user != PmcCtlNullval) + if (p->user) + v |= PeUsr; + else + v &= ~PeUsr; + + if (p->os != PmcCtlNullval) + if (p->os) + v |= PeOS; + else + v &= ~PeOS; + + if (pmctrans(p) < 0) + return -1; + + if (p->nodesc == 0) { + memmove(str, p->descstr, KNAMELEN); + if (tokenize(str, toks, 2) != 2) + return -1; + e = atoi(toks[0]); + u = atoi(toks[1]); + v &= ~(PeEvMskL|PeEvMskH|PeUnMsk); + v |= SetEvMsk(v, e); + v |= SetUMsk(v, u); + } + if (p->reset != PmcCtlNullval && p->reset) { + v = 0; + wrmsr(regno+ PerfCtrbase, 0); + p->reset = PmcCtlNullval; /* only reset once */ + } + wrmsr(regno+ PerfEvtbase, v); + pmcuserenab(pmcanyenab()); + if (pmcdebug) { + v = rdmsr(regno+ PerfEvtbase); + print("conf pmc[%#ux]: %#llux\n", regno, v); + } + return 0; +} + +int +pmcctlstr(char *str, int nstr, PmcCtl *p) +{ + int ns; + + ns = 0; + if (p->enab && p->enab != PmcCtlNullval) + ns += snprint(str + ns, nstr - ns, "enable\n"); + else + ns += snprint(str + ns, nstr - ns, "disable\n"); + + if (p->user && p->user != PmcCtlNullval) + ns += snprint(str + ns, nstr - ns, "user\n"); + if (p->os && p->user != PmcCtlNullval) + ns += snprint(str + ns, nstr - ns, "os\n"); + + //TODO, inverse pmctrans? + if(!p->nodesc) + ns += snprint(str + ns, nstr - ns, "%s\n", p->descstr); + else + ns += snprint(str + ns, nstr - ns, "no desc\n"); + return ns; +} + +int +pmcdescstr(char *str, int nstr) +{ + PmcCtlCtrId *pi; + int ns; + + ns = 0; + + for (pi = &pmcids[0]; pi->portdesc[0] != '\0'; pi++) + ns += snprint(str + ns, nstr - ns, "%s\n",pi->portdesc); + return ns; +} + +static u64int +getctr(u32int regno) +{ + return rdmsr(regno + PerfCtrbase); +} + +static int +setctr(u64int v, u32int regno) +{ + wrmsr(regno + PerfCtrbase, v); + return 0; +} + +static int +notstale(void *x) +{ + PmcCtr *p; + p = (PmcCtr *)x; + return !p->stale; +} + +static PmcWait* +newpmcw(void) +{ + PmcWait *w; + + w = malloc(sizeof (PmcWait)); + w->ref = 1; + return w; +} + +static void +pmcwclose(PmcWait *w) +{ + if(decref(w)) + return; + + free(w); +} + +/* + * As it is now, it sends an IPI if the processor is otherwise + * ocuppied for it to update the counter. Probably not needed + * for TC/XC as it will be updated every time we cross the kernel + * boundary, but we are doing it now just in case it is idle or + * not being updated NB: this function releases the ilock + */ + +static void +waitnotstale(Mach *mp, PmcCtr *p) +{ + PmcWait *w; + + p->stale = 1; + w = newpmcw(); + w->next = p->wq; + p->wq = w; + incref(w); + iunlock(&mp->pmclock); + apicipi(mp->apicno); + if(waserror()){ + pmcwclose(w); + nexterror(); + } + sleep(&w->r, notstale, p); + poperror(); + pmcwclose(w); +} + +/* + * The reason this is not racy is subtle. + * + * If the processor suddenly changes state to busy once I have + * decided not to IPI it, I don't wait for it. + * + * In the other case, I have decided to IPI it and hence, wait. + * The problem then is that it switches to idle (not + * interruptible) and I wait forever but this switch crosses + * kernel boundaries and gets the pmclock. One of us gets there + * first and either I never sleep (p->stale iscleared) or I sleep + * and get waken after. pmclock + rendez locks make sure this is + * the case. + */ +static int +shouldipi(Mach *mp) +{ + if(mp->nixrole == NIXUC) + return 0; + + if(mp->proc == nil && mp->nixrole == NIXAC) + return 0; + + return 1; +} + +u64int +pmcgetctr(u32int coreno, u32int regno) +{ + PmcCtr *p; + Mach *mp; + u64int v; + + if(coreno == m->machno){ + v = getctr(regno); + if (pmcdebug) { + print("int getctr[%#ux, %#ux] = %#llux\n", regno, coreno, v); + } + return v; + } + + mp = sys->machptr[coreno]; + p = &mp->pmc[regno]; + ilock(&mp->pmclock); + p->ctrset |= PmcGet; + if(shouldipi(mp)){ + waitnotstale(mp, p); + ilock(&mp->pmclock); + } + v = p->ctr; + iunlock(&mp->pmclock); + if (pmcdebug) { + print("ext getctr[%#ux, %#ux] = %#llux\n", regno, coreno, v); + } + return v; +} + +int +pmcsetctr(u32int coreno, u64int v, u32int regno) +{ + PmcCtr *p; + Mach *mp; + + if(coreno == m->machno){ + if (pmcdebug) { + print("int getctr[%#ux, %#ux] = %#llux\n", regno, coreno, v); + } + return setctr(v, regno); + } + + mp = sys->machptr[coreno]; + p = &mp->pmc[regno]; + if (pmcdebug) { + print("ext setctr[%#ux, %#ux] = %#llux\n", regno, coreno, v); + } + ilock(&mp->pmclock); + p->ctr = v; + p->ctrset |= PmcSet; + if(shouldipi(mp)) + waitnotstale(mp, p); + else + iunlock(&mp->pmclock); + return 0; +} + +static void +ctl2ctl(PmcCtl *dctl, PmcCtl *sctl) +{ + if(sctl->enab != PmcCtlNullval) + dctl->enab = sctl->enab; + if(sctl->user != PmcCtlNullval) + dctl->user = sctl->user; + if(sctl->os != PmcCtlNullval) + dctl->os = sctl->os; + if(sctl->nodesc == 0) { + memmove(dctl->descstr, sctl->descstr, KNAMELEN); + dctl->nodesc = 0; + } +} + +int +pmcsetctl(u32int coreno, PmcCtl *pctl, u32int regno) +{ + PmcCtr *p; + Mach *mp; + + if(coreno == m->machno) + return setctl(pctl, regno); + + mp = sys->machptr[coreno]; + p = &mp->pmc[regno]; + ilock(&mp->pmclock); + ctl2ctl(&p->PmcCtl, pctl); + p->ctlset |= PmcSet; + if(shouldipi(mp)) + waitnotstale(mp, p); + else + iunlock(&mp->pmclock); + return 0; +} + +int +pmcgetctl(u32int coreno, PmcCtl *pctl, u32int regno) +{ + PmcCtr *p; + Mach *mp; + + if(coreno == m->machno) + return getctl(pctl, regno); + + mp = sys->machptr[coreno]; + p = &mp->pmc[regno]; + + ilock(&mp->pmclock); + p->ctlset |= PmcGet; + if(shouldipi(mp)){ + waitnotstale(mp, p); + ilock(&mp->pmclock); + } + memmove(pctl, &p->PmcCtl, sizeof(PmcCtl)); + iunlock(&mp->pmclock); + return 0; +} + +void +pmcupdate(Mach *m) +{ + PmcCtr *p; + int i, maxct, wk; + PmcWait *w; + + maxct = pmcnregs(); + for (i = 0; i < maxct; i++) { + p = &m->pmc[i]; + ilock(&m->pmclock); + if(p->ctrset & PmcSet) + setctr(p->ctr, i); + if(p->ctlset & PmcSet) + setctl(p, i); + p->ctr = getctr(i); + getctl(p, i); + p->ctrset = PmcIgn; + p->ctlset = PmcIgn; + wk = p->stale; + p->stale = 0; + if(wk){ + for(w = p->wq; w != nil; w = w->next){ + p->wq = w->next; + wakeup(&w->r); + pmcwclose(w); + } + } + iunlock(&m->pmclock); + } +} + diff -Nru 0/sys/src/nix/k10/sdahci.c 4/sys/src/nix/k10/sdahci.c --- 0/sys/src/nix/k10/sdahci.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/sdahci.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,2133 @@ +/* + * intel/amd ahci sata controller + * copyright © 2007-12 coraid, inc. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/sd.h" +#include +#include "../port/sdfis.h" +#include "ahci.h" +#include "../port/led.h" + +#pragma varargck type "T" int +#define dprint(...) if(debug) iprint(__VA_ARGS__); else USED(debug) +#define idprint(...) if(prid) print(__VA_ARGS__); else USED(prid) +#define aprint(...) if(datapi) print(__VA_ARGS__); else USED(datapi) +#define ledprint(...) if(dled) print(__VA_ARGS__); else USED(dled) +#define Ticks sys->ticks +#define Pciwaddrh(va) ((u32int)(PCIWADDR(va)>>32)) + +enum { + NCtlr = 4, + NCtlrdrv = 32, + NDrive = NCtlr*NCtlrdrv, + + Fahdrs = 4, + + Read = 0, + Write, + + Eesb = 1<<0, /* must have (Eesb & Emtype) == 0 */ + + /* pci space configuration */ + Pmap = 0x90, + Ppcs = 0x91, + + Nms = 256, + Mphywait = 2*1024/Nms - 1, + Midwait = 16*1024/Nms - 1, + Mcomrwait = 64*1024/Nms - 1, +}; + +enum { + Tesb, + Tsb600, + Tjmicron, + Tahci, + Tlast, +}; + +typedef struct Ctlrtype Ctlrtype; +typedef struct Ctlr Ctlr; +typedef struct Drive Drive; + +struct Ctlrtype { + uint type; + uint maxdmaxfr; + uint flags; + char *name; +}; + +Ctlrtype cttab[Tlast] = { +[Tesb] Tesb, 8192, 0, "63xxesb", +[Tsb600] Tsb600, 256, 0, "sb600", +[Tjmicron] Tjmicron, 8192, 0, "jmicron", +[Tahci] Tahci, 8192, 0, "ahci", +}; + +enum { + Dnull = 0, + Dmissing = 1<<0, + Dnew = 1<<1, + Dready = 1<<2, + Derror = 1<<3, + Dreset = 1<<4, + Doffline = 1<<5, + Dportreset = 1<<6, + Dlast = 8, +}; + +static char *diskstates[Dlast] = { + "null", + "missing", + "new", + "ready", + "error", + "reset", + "offline", + "portreset", +}; + +extern SDifc sdahciifc; + +enum { + DMautoneg, + DMsatai, + DMsataii, + DMsataiii, + DMlast, +}; + +static char *modes[DMlast] = { + "auto", + "satai", + "sataii", + "sataiii", +}; + +typedef struct Htab Htab; +struct Htab { + uint bit; + char *name; +}; + +struct Drive { + Lock; + + Ctlr *ctlr; + SDunit *unit; + char name[10]; + Aport *port; + Aportm portm; + Aportc portc; /* redundant ptr to port and portm. */ + Ledport; + + ulong totick; + ulong lastseen; + uint wait; + uchar mode; + uchar state; + + /* + * ahci allows non-sequential ports. + * to avoid this hassle, we let + * driveno ctlr*NCtlrdrv + unit + * portno nth available port + */ + uint driveno; + uint portno; +}; + +struct Ctlr { + Lock; + + Ctlrtype *type; + int enabled; + SDev *sdev; + Pcidev *pci; + + uchar *mmio; + u32int *lmmio; + Ahba *hba; + Aenc; + uint enctype; + + Drive rawdrive[NCtlrdrv]; + Drive* drive[NCtlrdrv]; + int ndrive; + uint pi; +}; + +static Ctlr iactlr[NCtlr]; +static SDev sdevs[NCtlr]; +static int niactlr; +static ushort olds[NCtlr*NCtlrdrv]; + +static Drive *iadrive[NDrive]; +static int niadrive; + +static int debug; +static int prid = 1; +static int datapi; +static int dled; + +static char stab[] = { +[0] 'i', 'm', +[8] 't', 'c', 'p', 'e', +[16] 'N', 'I', 'W', 'B', 'D', 'C', 'H', 'S', 'T', 'F', 'X' +}; + +static void +serrstr(u32int r, char *s, char *e) +{ + int i; + + e -= 3; + for(i = 0; i < nelem(stab) && s < e; i++) + if(r & (1<task, p->cmd, p->ci, p->isr); +} + +static void +esleep(int ms) +{ + if(waserror()) + return; + tsleep(&up->sleep, return0, 0, ms); + poperror(); +} + +typedef struct { + Aport *p; + int i; +} Asleep; + +static int +ahciclear(void *v) +{ + Asleep *s; + + s = v; + return (s->p->ci & s->i) == 0; +} + +static void +aesleep(Aportm *m, Asleep *a, int ms) +{ + if(waserror()) + return; + tsleep(m, ahciclear, a, ms); + poperror(); +} + +static int +ahciwait(Aportc *c, int ms) +{ + Aport *p; + Asleep as; + + p = c->p; + p->ci = 1; + as.p = p; + as.i = 1; + aesleep(c->m, &as, ms); + if((p->task & 1) == 0 && p->ci == 0) + return 0; + dreg("ahciwait fail/timeout ", c->p); + return -1; +} + +static void +mkalist(Aportm *m, uint flags, uchar *data, int len) +{ + Actab *t; + Alist *l; + Aprdt *p; + + t = m->ctab; + l = m->list; + l->flags = flags | 0x5; + l->len = 0; + l->ctab = PCIWADDR(t); + l->ctabhi = Pciwaddrh(t); + if(data){ + l->flags |= 1<<16; + p = &t->prdt; + p->dba = PCIWADDR(data); + p->dbahi = Pciwaddrh(data); + p->count = 1<<31 | len - 2 | 1; + } +} + +static int +settxmode(Aportc *pc, uchar f) +{ + uchar *c; + + c = pc->m->ctab->cfis; + if(txmodefis(pc->m, c, f) == -1) + return 0; + mkalist(pc->m, Lwrite, 0, 0); + return ahciwait(pc, 3*1000); +} + +static void +asleep(int ms) +{ + if(up == nil) + delay(ms); + else + esleep(ms); +} + +static int +ahciportreset(Aportc *c, uint mode) +{ + int i; + u32int *cmd; + Aport *p; + + p = c->p; + cmd = &p->cmd; + *cmd &= ~(Afre|Ast); + for(i = 0; i < 500; i += 25){ + if((*cmd & Acr) == 0) + break; + asleep(25); + } + p->sctl = 3*Aipm | 0*Aspd | Adet; + delay(1); + p->sctl = 3*Aipm | mode*Aspd; + return 0; +} + +static int +ahciquiet(Aport *a) +{ + int i; + u32int *p; + + p = &a->cmd; + *p &= ~Ast; + for(i = 0; i < 500; i += 50){ + if((*p & Acr) == 0) + goto stop; + asleep(50); + } + return -1; +stop: + if((a->task & (ASdrq|ASbsy)) == 0){ + *p |= Ast; + return 0; + } + + *p |= Aclo; + for(i = 0; i < 500; i += 50){ + if((*p & Aclo) == 0) + goto stop1; + asleep(50); + } + return -1; +stop1: + /* extra check */ + dprint("ahci: clo clear %ux\n", a->task); + if(a->task & ASbsy) + return -1; + *p |= Afre | Ast; + return 0; +} + +static int +ahcicomreset(Aportc *pc) +{ + uchar *c; + + dreg("comreset ", pc->p); + if(ahciquiet(pc->p) == -1){ + dprint("ahci: ahciquiet failed\n"); + return -1; + } + dreg("comreset ", pc->p); + + c = pc->m->ctab->cfis; + nopfis(pc->m, c, 1); + mkalist(pc->m, Lclear | Lreset, 0, 0); + if(ahciwait(pc, 500) == -1){ + dprint("ahci: comreset1 failed\n"); + return -1; + } + microdelay(250); + dreg("comreset ", pc->p); + + nopfis(pc->m, c, 0); + mkalist(pc->m, Lwrite, 0, 0); + if(ahciwait(pc, 150) == -1){ + dprint("ahci: comreset2 failed\n"); + return -1; + } + dreg("comreset ", pc->p); + return 0; +} + +static int +ahciidle(Aport *port) +{ + int i, r; + u32int *p; + + p = &port->cmd; + if((*p & Arun) == 0) + return 0; + *p &= ~Ast; + r = 0; + for(i = 0; i < 500; i += 25){ + if((*p & Acr) == 0) + goto stop; + asleep(25); + } + r = -1; +stop: + if((*p & Afre) == 0) + return r; + *p &= ~Afre; + for(i = 0; i < 500; i += 25){ + if((*p & Afre) == 0) + return 0; + asleep(25); + } + return -1; +} + +/* + * §6.2.2.1 first part; comreset handled by reset disk. + * - remainder is handled by configdisk. + * - ahcirecover is a quick recovery from a failed command. + */ +static int +ahciswreset(Aportc *pc) +{ + int i; + + i = ahciidle(pc->p); + pc->p->cmd |= Afre; + if(i == -1) + return -1; + if(pc->p->task & (ASdrq|ASbsy)) + return -1; + return 0; +} + +static int +ahcirecover(Aportc *pc) +{ + ahciswreset(pc); + pc->p->cmd |= Ast; + if(settxmode(pc, pc->m->udma) == -1) + return -1; + return 0; +} + +static void +setupfis(Afis *f) +{ + f->base = mallocalign(0x100, 0x100, 0, 0); + f->d = f->base + 0; + f->p = f->base + 0x20; + f->r = f->base + 0x40; + f->u = f->base + 0x60; + f->devicebits = (u32int*)(f->base + 0x58); +} + +static void +ahciwakeup(Aportc *c, uint mode) +{ + ushort s; + + s = c->p->sstatus; + if((s & Isleepy) == 0) + return; + if((s & Smask) != Spresent){ + print("ahci: slumbering drive missing %.3ux\n", s); + return; + } + ahciportreset(c, mode); +// iprint("ahci: wake %.3ux -> %.3lux\n", s, c->p->sstatus); +} + +static int +ahciconfigdrive(Ahba *h, Aportc *c, int mode) +{ + Aportm *m; + Aport *p; + + p = c->p; + m = c->m; + + if(m->list == 0){ + setupfis(&m->fis); + m->list = mallocalign(sizeof *m->list, 1024, 0, 0); + m->ctab = mallocalign(sizeof *m->ctab, 128, 0, 0); + } + + p->list = PCIWADDR(m->list); + p->listhi = Pciwaddrh(m->list); + p->fis = PCIWADDR(m->fis.base); + p->fishi = Pciwaddrh(m->fis.base); + + p->cmd |= Afre; + + if((p->sstatus & Sbist) == 0 && (p->cmd & Apwr) != Apwr) + if((p->sstatus & Sphylink) == 0 && h->cap & Hss){ + dprint("ahci: spin up ... [%.3ux]\n", p->sstatus); + p->cmd |= Apwr; + for(int i = 0; i < 1400; i += 50){ + if(p->sstatus & (Sphylink | Sbist)) + break; + asleep(50); + } + } + + p->serror = SerrAll; + + if((p->sstatus & SSmask) == (Isleepy | Spresent)) + ahciwakeup(c, mode); + /* disable power managment sequence from book. */ + p->sctl = 3*Aipm | mode*Aspd | 0*Adet; + p->cmd &= ~Aalpe; + + p->cmd |= Ast; + p->ie = IEM; + + return 0; +} + +static void +setstate(Drive *d, int state) +{ + ilock(d); + d->state = state; + iunlock(d); +} + +static void +ahcienable(Ahba *h) +{ + h->ghc |= Hie; +} + +static void +ahcidisable(Ahba *h) +{ + h->ghc &= ~Hie; +} + +static int +countbits(u32int u) +{ + int i, n; + + n = 0; + for(i = 0; i < 32; i++) + if(u & (1<hba = (Ahba*)c->mmio; + u = h->cap; + if((u & Ham) == 0) + h->ghc |= Hae; + return countbits(h->pi); +} + +static int +ahcihbareset(Ahba *h) +{ + int wait; + + h->ghc |= Hhr; + for(wait = 0; wait < 1000; wait += 100){ + if(h->ghc == 0) + return 0; + delay(100); + } + return -1; +} + +static char* +dstate(uint s) +{ + int i; + + for(i = 0; s; i++) + s >>= 1; + return diskstates[i]; +} + +static char* +tnam(Ctlr *c) +{ + return c->type->name; +} + +static char* +dnam(Drive *d) +{ + char *s; + + s = d->name; + if(d->unit && d->unit->name) + s = d->unit->name; + return s; +} + +static void +clearci(Aport *p) +{ + if(p->cmd & Ast){ + p->cmd &= ~Ast; + p->cmd |= Ast; + } +} + +static int +intel(Ctlr *c) +{ + return c->pci->vid == 0x8086; +} + +static int +ignoreahdrs(Drive *d) +{ + return d->portm.feat & Datapi && d->ctlr->type->type == Tsb600; +} + +static void +updatedrive(Drive *d) +{ + u32int f, cause, serr, s0, pr, ewake; + Aport *p; + static u32int last; + + pr = 1; + ewake = 0; + f = 0; + p = d->port; + cause = p->isr; + if(d->ctlr->type->type == Tjmicron) + cause &= ~Aifs; + serr = p->serror; + p->isr = cause; + + if(p->ci == 0){ + f |= Fdone; + pr = 0; + }else if(cause & Adps) + pr = 0; + if(cause & Ifatal){ + ewake = 1; + dprint("%s: fatal\n", dnam(d)); + } + if(cause & Adhrs){ + if(p->task & 33){ + if(ignoreahdrs(d) && serr & ErrE) + f |= Fahdrs; + dprint("%s: Adhrs cause %ux serr %ux task %ux\n", + dnam(d), cause, serr, p->task); + f |= Ferror; + ewake = 1; + } + pr = 0; + } + if(p->task & 1 && last != cause) + dprint("%s: err ca %ux serr %ux task %ux sstat %.3ux\n", + dnam(d), cause, serr, p->task, p->sstatus); + if(pr) + dprint("%s: upd %ux ta %ux\n", dnam(d), cause, p->task); + + if(cause & (Aprcs|Aifs)){ + s0 = d->state; + switch(p->sstatus & Smask){ + case Smissing: + d->state = Dmissing; + break; + case Spresent: + if((p->sstatus & Imask) == Islumber) + d->state = Dnew; + else + d->state = Derror; + break; + case Sphylink: + /* power mgnt crap for suprise removal */ + p->ie |= Aprcs|Apcs; /* is this required? */ + d->state = Dreset; + break; + case Sbist: + d->state = Doffline; + break; + } + dprint("%s: %s → %s [Apcrs] %.3ux\n", dnam(d), dstate(s0), + dstate(d->state), p->sstatus); + if(s0 == Dready && d->state != Dready) + idprint("%s: pulled\n", dnam(d)); + if(d->state != Dready) + f |= Ferror; + if(d->state != Dready || p->ci) + ewake = 1; + } + p->serror = serr; + if(ewake) + clearci(p); + if(f){ + d->portm.flag = f; + wakeup(&d->portm); + } + last = cause; +} + +static void +pstatus(Drive *d, u32int s) +{ + /* + * bogus code because the first interrupt is currently dropped. + * likely my fault. serror is maybe cleared at the wrong time. + */ + if(s) + d->lastseen = Ticks; + switch(s){ + default: + print("%s: pstatus: bad status %.3ux\n", dnam(d), s); + case Smissing: + d->state = Dmissing; + break; + case Spresent: + break; + case Sphylink: + d->wait = 0; + d->state = Dnew; + break; + case Sbist: + d->state = Doffline; + break; + } +} + +static int +configdrive(Drive *d) +{ + if(ahciconfigdrive(d->ctlr->hba, &d->portc, d->mode) == -1) + return -1; + ilock(d); + pstatus(d, d->port->sstatus & Smask); + iunlock(d); + return 0; +} + +static void +resetdisk(Drive *d) +{ + uint state, det, stat; + Aport *p; + + p = d->port; + det = p->sctl & 7; + stat = p->sstatus & Smask; + state = (p->cmd>>28) & 0xf; + dprint("%s: resetdisk: icc %ux det %.3ux sdet %.3ux\n", dnam(d), state, det, stat); + + ilock(d); + state = d->state; + if(d->state != Dready || d->state != Dnew) + d->portm.flag |= Ferror; + clearci(p); /* satisfy sleep condition. */ + wakeup(&d->portm); + d->state = Derror; + iunlock(d); + + if(stat != Sphylink){ + setstate(d, Dportreset); + return; + } + + qlock(&d->portm); + if(p->cmd&Ast && ahciswreset(&d->portc) == -1) + setstate(d, Dportreset); /* get a bigger stick. */ + else{ + setstate(d, Dmissing); + configdrive(d); + } + dprint("%s: resetdisk: %s → %s\n", dnam(d), dstate(state), dstate(d->state)); + qunlock(&d->portm); +} + +static int +newdrive(Drive *d) +{ + Aportc *c; + Aportm *m; + + c = &d->portc; + m = &d->portm; + + qlock(c->m); + setfissig(m, c->p->sig); + qunlock(c->m); + + if(ataonline(d->unit, m) != 0) + goto lose; + m->atamaxxfr = 128; + if(d->portm.feat & Dllba) + m->atamaxxfr = d->ctlr->type->maxdmaxfr; + + setstate(d, Dready); + pronline(d->unit, m); + return 0; + +lose: + qlock(c->m); + idprint("%s: can't be initialized\n", dnam(d)); + setstate(d, Dnull); + qunlock(c->m); + return -1; +} + +static int +doportreset(Drive *d) +{ + int i; + + i = -1; + qlock(&d->portm); + if(ahciportreset(&d->portc, d->mode) == -1) + dprint("ahci: ahciportreset fails\n"); + else + i = 0; + qunlock(&d->portm); + dprint("ahci: portreset → %s [task %.4ux ss %.3ux]\n", + dstate(d->state), d->port->task, d->port->sstatus); + return i; +} + +static void +statechange(Drive *d) +{ + Aportm *m; + + m = &d->portm; + switch(d->state){ + case Dnull: + case Doffline: + if(d->unit) + if(d->unit->sectors != 0){ + m->sectors = 0; + m->drivechange = 1; + } + case Dready: + d->wait = 0; + } +} + +static uint +maxmode(Ctlr *c) +{ + return (c->hba->cap & 0xf*Hiss)/Hiss; +} + +static void +checkdrive(Drive *d, int i) +{ + ushort s, sig; + + ilock(d); + s = d->port->sstatus; + if(s) + d->lastseen = Ticks; + if(s != olds[i]){ + dprint("%s: status: %.3ux -> %.3ux: %s\n", + dnam(d), olds[i], s, dstate(d->state)); + olds[i] = s; + d->wait = 0; + } + switch(d->state){ + case Dnull: + case Dready: + break; + case Dmissing: + case Dnew: + switch(s & (Iactive|Smask)){ + case Spresent: + ahciwakeup(&d->portc, d->mode); + case Smissing: + break; + default: + dprint("%s: unknown status %.3ux\n", dnam(d), s); + /* fall through */ + case Iactive: /* active, no device */ + if(++d->wait&Mphywait) + break; +reset: + if(d->mode == 0) + d->mode = maxmode(d->ctlr); + else + d->mode--; + if(d->mode == DMautoneg){ + d->state = Dportreset; + goto portreset; + } + dprint("%s: reset; new mode %s\n", dnam(d), + modes[d->mode]); + iunlock(d); + resetdisk(d); + ilock(d); + break; + case Iactive | Sphylink: + if(d->unit == nil) + break; + if((++d->wait&Midwait) == 0){ + dprint("%s: slow reset %.3ux task=%ux; %d\n", + dnam(d), s, d->port->task, d->wait); + goto reset; + } + s = (uchar)d->port->task; + sig = d->port->sig >> 16; + if(s == 0x7f || s&ASbsy || + (sig != 0xeb14 && (s & ASdrdy) == 0)) + break; + iunlock(d); + newdrive(d); + ilock(d); + break; + } + break; + case Doffline: + if(d->wait++ & Mcomrwait) + break; + /* fallthrough */ + case Derror: + case Dreset: + dprint("%s: reset [%s]: mode %d; status %.3ux\n", + dnam(d), dstate(d->state), d->mode, s); + iunlock(d); + resetdisk(d); + ilock(d); + break; + case Dportreset: +portreset: + if(d->wait++ & 0xff && (s & Iactive) == 0) + break; + dprint("%s: portreset [%s]: mode %d; status %.3ux\n", + dnam(d), dstate(d->state), d->mode, s); + d->portm.flag |= Ferror; + clearci(d->port); + wakeup(&d->portm); + if((s & Smask) == 0){ + d->state = Dmissing; + break; + } + iunlock(d); + doportreset(d); + ilock(d); + break; + } + statechange(d); + iunlock(d); +} + +static void +satakproc(void*) +{ + int i; + + for(;;){ + tsleep(&up->sleep, return0, 0, Nms); + for(i = 0; i < niadrive; i++) + checkdrive(iadrive[i], i); + } +} + +static void +iainterrupt(Ureg*, void *a) +{ + int i; + u32int cause, m; + Ctlr *c; + Drive *d; + + c = a; + ilock(c); + cause = c->hba->isr; + for(i = 0; cause; i++){ + m = 1 << i; + if((cause & m) == 0) + continue; + cause &= ~m; + d = c->rawdrive + i; + ilock(d); + if(d->port->isr && c->pi & m) + updatedrive(d); + c->hba->isr = m; + iunlock(d); + } + iunlock(c); +} + +static int +ahciencreset(Ctlr *c) +{ + Ahba *h; + + if(c->enctype == Eesb) + return 0; + h = c->hba; + h->emctl |= Emrst; + while(h->emctl & Emrst) + delay(1); + return 0; +} + +/* + * from the standard: (http://en.wikipedia.org/wiki/IBPI) + * rebuild is preferred as locate+fail; alternate 1hz fail + * we're going to assume no locate led. + */ +enum { + Ledsleep = 125, /* 8hz */ + + N0 = Ledon*Aled, + L0 = Ledon*Aled | Ledon*Locled, + L1 = Ledon*Aled | Ledoff*Locled, + R0 = Ledon*Aled | Ledon*Locled | Ledon*Errled, + R1 = Ledon*Aled | Ledoff*Errled, + S0 = Ledon*Aled | Ledon*Locled /*| Ledon*Errled*/, /* botch */ + S1 = Ledon*Aled | Ledoff*Errled, + P0 = Ledon*Aled | Ledon*Errled, + P1 = Ledon*Aled | Ledoff*Errled, + F0 = Ledon*Aled | Ledon*Errled, + C0 = Ledon*Aled | Ledon*Locled, + C1 = Ledon*Aled | Ledoff*Locled, + +}; + +//static ushort led3[Ibpilast*8] = { +//[Ibpinone*8] 0, 0, 0, 0, 0, 0, 0, 0, +//[Ibpinormal*8] N0, N0, N0, N0, N0, N0, N0, N0, +//[Ibpirebuild*8] R0, R0, R0, R0, R1, R1, R1, R1, +//[Ibpilocate*8] L0, L1, L0, L1, L0, L1, L0, L1, +//[Ibpispare*8] S0, S1, S0, S1, S1, S1, S1, S1, +//[Ibpipfa*8] P0, P1, P0, P1, P1, P1, P1, P1, /* first 1 sec */ +//[Ibpifail*8] F0, F0, F0, F0, F0, F0, F0, F0, +//[Ibpicritarray*8] C0, C0, C0, C0, C1, C1, C1, C1, +//[Ibpifailarray*8] C0, C1, C0, C1, C0, C1, C0, C1, +//}; + +static ushort led2[Ibpilast*8] = { +[Ibpinone*8] 0, 0, 0, 0, 0, 0, 0, 0, +[Ibpinormal*8] N0, N0, N0, N0, N0, N0, N0, N0, +[Ibpirebuild*8] R0, R0, R0, R0, R1, R1, R1, R1, +[Ibpilocate*8] L0, L0, L0, L0, L0, L0, L0, L0, +[Ibpispare*8] S0, S0, S0, S0, S1, S1, S1, S1, +[Ibpipfa*8] P0, P1, P0, P1, P1, P1, P1, P1, /* first 1 sec */ +[Ibpifail*8] F0, F0, F0, F0, F0, F0, F0, F0, +[Ibpicritarray*8] C0, C0, C0, C0, C1, C1, C1, C1, +[Ibpifailarray*8] C0, C1, C0, C1, C0, C1, C0, C1, +}; + +static int +ledstate(Ledport *p, uint seq) +{ + ushort i; + + if(p->led == Ibpipfa && seq%32 >= 8) + i = P1; + else + i = led2[8*p->led + seq%8]; + if(i != p->ledbits){ + p->ledbits = i; + ledprint("ledstate %,.011ub %ud\n", p->ledbits, seq); + return 1; + } + return 0; +} + +static int +blink(Drive *d, uint t) +{ + Ahba *h; + Ctlr *c; + Aledmsg msg; + + if(ledstate(d, t) == 0) + return 0; + c = d->ctlr; + h = c->hba; + /* ensure last message has been transmitted */ + while(h->emctl & Tmsg) + microdelay(1); + switch(c->enctype){ + default: + panic("%s: bad led type %d", dnam(d), c->enctype); + case Elmt: + memset(&msg, 0, sizeof msg); + msg.type = Mled; + msg.dsize = 0; + msg.msize = sizeof msg - 4; + msg.led[0] = d->ledbits; + msg.led[1] = d->ledbits>>8; + msg.pm = 0; + msg.hba = d->driveno; + memmove(c->enctx, &msg, sizeof msg); + break; + } + h->emctl |= Tmsg; + return 1; +} + +enum { + Esbdrv0 = 4, /* start pos in bits */ + Esbiota = 3, /* shift in bits */ + Esbact = 1, + Esbloc = 2, + Esberr = 4, +}; + +uint +esbbits(uint s) +{ + uint i, e; /* except after c */ + + e = 0; + for(i = 0; i < 3; i++) + e |= ((s>>3*i & 7) != 0)<ndrive; i++){ + d = c->drive[i]; + s |= ledstate(d, t); /* no port mapping */ + } + if(s == 0) + return 0; + memset(u, 0, sizeof u); + for(i = 0; i < c->ndrive; i++){ + d = c->drive[i]; + s = Esbdrv0 + Esbiota*i; + v = esbbits(d->ledbits) * (1ull << s%32); + u[s/32 + 0] |= v; + u[s/32 + 1] |= v>>32; + } + for(i = 0; i < c->encsz; i++) + c->enctx[i] = u[i]; + return 1; +} + +static long +ahciledr(SDunit *u, Chan *ch, void *a, long n, vlong off) +{ + Ctlr *c; + Drive *d; + + c = u->dev->ctlr; + d = c->drive[u->subno]; + return ledr(d, ch, a, n, off); +} + +static long +ahciledw(SDunit *u, Chan *ch, void *a, long n, vlong off) +{ + Ctlr *c; + Drive *d; + + c = u->dev->ctlr; + d = c->drive[u->subno]; + return ledw(d, ch, a, n, off); +} + +static void +ledkproc(void*) +{ + uchar map[NCtlr]; + uint i, j, t0, t1; + Ctlr *c; + Drive *d; + + j = 0; + memset(map, 0, sizeof map); + for(i = 0; i < niactlr; i++) + if(iactlr[i].enctype != 0){ + ahciencreset(iactlr + i); + map[i] = 1; + j++; + } + if(j == 0) + pexit("no work", 1); + for(i = 0; i < niadrive; i++){ + iadrive[i]->nled = 3; /* hardcoded */ + if(iadrive[i]->ctlr->enctype == Eesb) + iadrive[i]->nled = 3; + iadrive[i]->ledbits = -1; + } + for(i = 0; ; i++){ + t0 = Ticks; + for(j = 0; j < niadrive; ){ + c = iadrive[j]->ctlr; + if(map[j] == 0) + j += c->enctype; + else if(c->enctype == Eesb){ + blinkesb(c, i); + j += c->ndrive; + }else{ + d = iadrive[j++]; + blink(d, i); + } + } + t1 = Ticks; + esleep(Ledsleep - TK2MS(t1 - t0)); + } +} + +static int +iaverify(SDunit *u) +{ + Ctlr *c; + Drive *d; + + c = u->dev->ctlr; + d = c->drive[u->subno]; + ilock(c); + ilock(d); + if(d->unit == nil){ + d->unit = u; + if(c->enctype != 0) + sdaddfile(u, "led", 0644, eve, ahciledr, ahciledw); + } + iunlock(d); + iunlock(c); + checkdrive(d, d->driveno); /* c->d0 + d->driveno */ + return 1; +} + +static int +iaenable(SDev *s) +{ + char name[32]; + Ctlr *c; + static int once; + + c = s->ctlr; + ilock(c); + if(!c->enabled){ + if(once == 0) + kproc("iasata", satakproc, 0); + if(c->ndrive == 0) + panic("iaenable: zero s->ctlr->ndrive"); + pcisetbme(c->pci); + snprint(name, sizeof name, "%s (%s)", s->name, s->ifc->name); + intrenable(c->pci->intl, iainterrupt, c, c->pci->tbdf, name); + /* supposed to squelch leftover interrupts here. */ + ahcienable(c->hba); + c->enabled = 1; + if(++once == niactlr) + kproc("ialed", ledkproc, 0); + } + iunlock(c); + return 1; +} + +static int +iadisable(SDev *s) +{ + char name[32]; + Ctlr *c; + + c = s->ctlr; + ilock(c); + ahcidisable(c->hba); + snprint(name, sizeof name, "%s (%s)", s->name, s->ifc->name); + print("missing the intrdisable because intrdisable is wierd\n"); +// intrdisable(c->pci->intl, iainterrupt, c, c->pci->tbdf, name); + c->enabled = 0; + iunlock(c); + return 1; +} + +static int +iaonline(SDunit *u) +{ + int r; + Ctlr *c; + Drive *d; + Aportm *m; + + c = u->dev->ctlr; + d = c->drive[u->subno]; + m = &d->portm; + r = 0; + + if(m->feat & Datapi && m->drivechange){ + r = scsionlinex(u, m) == SDok; + if(r > 0) + m->drivechange = 0; + return r; + } + + ilock(d); + if(m->drivechange){ + r = 2; + m->drivechange = 0; + /* devsd resets this after online is called; why? */ + u->sectors = m->sectors; + u->secsize = m->secsize; + }else if(d->state == Dready) + r = 1; + iunlock(d); + return r; +} + +static Alist* +ahcibuildpkt(Aportm *m, SDreq *r, void *data, int n) +{ + uint flags; + uchar *c; + Actab *t; + Alist *l; + + l = m->list; + t = m->ctab; + c = t->cfis; + atapirwfis(m, c, r->cmd, r->clen, n); + flags = 1<<16 | Lpref | Latapi; + if(r->write != 0 && data) + flags |= Lwrite; + mkalist(m, flags, data, n); + return l; +} + +static Alist* +ahcibuildfis(Aportm *m, SDreq *r, void *data, uint n) +{ + uchar *c; + uint flags, dir; + Alist *l; + + l = m->list; + c = m->ctab->cfis; + if((r->ataproto & Pprotom) != Ppkt){ + memmove(c, r->cmd, r->clen); + flags = Lpref; + if(r->ataproto&Pout && n > 0) + flags |= Lwrite; + dir = r->ataproto&Pdatam; + if(dir == Pnd && n == 0) + flags |= Lwrite; + mkalist(m, flags, data, n); + }else{ + atapirwfis(m, c, r->cmd, r->clen, n); + flags = 1<<16 | Lpref | Latapi; + if(r->write && data) + flags |= Lwrite; + mkalist(m, flags, data, n); + } + return l; +} + +static int +isready(Drive *d) +{ + u32int s; + ulong δ; + + if(d->state & (Dreset | Dportreset /*| Dnew*/)) + return 1; + δ = TK2MS(Ticks - d->lastseen); + if(d->state == Dnull || δ > 10*1000){ + dprint("%s: last seen too long ago: %ld\n", dnam(d), δ); + return -1; + } + ilock(d); + s = d->port->sstatus; + iunlock(d); + if((s & Imask) == 0 && δ > 1500){ + dprint("%s: phy off %ldms\n", dnam(d), δ); + return -1; + } + if(d->state & (Dready | Dnew) && (s & Smask) == Sphylink) + return 0; + return 1; +} + +static int +waitready(Drive *d, int tk) +{ + int r; + + for(;;){ + r = isready(d); + if(r <= 0) + return r; + if(tk - Ticks - 10 < 1ul<<31) + return -1; + esleep(10); + } +} + +static int +io(Drive *d, uint proto, int totk, int interrupt) +{ + uint task, flag, rv; + Aport *p; + Asleep as; + + switch(waitready(d, totk)){ + case -1: + return SDeio; + case 1: + return SDretry; + } + + ilock(d); + d->portm.flag = 0; + iunlock(d); + p = d->port; + p->ci = 1; + + as.p = p; + as.i = 1; + d->totick = 0; + if(totk > 0) + d->totick = totk | 1; /* fix fencepost */ + + while(waserror()) + if(interrupt){ + d->port->ci = 0; + if(ahcicomreset(&d->portc) == -1) + setstate(d, Dreset); + return SDtimeout; + } + sleep(&d->portm, ahciclear, &as); + poperror(); + + ilock(d); + flag = d->portm.flag; + task = p->task; + iunlock(d); + + rv = SDok; + if(proto & Ppkt){ + rv = task >> 8 + 4 & 0xf; + flag &= ~Fahdrs; + flag |= Fdone; + }else if(task & (Efatal<<8) || task & (ASbsy|ASdrq) && d->state == Dready){ + p->ci = 0; + ahcirecover(&d->portc); + task = p->task; + flag &= ~Fdone; /* either an error or do-over */ + } + if(flag == 0){ + print("%s: retry\n", dnam(d)); + return SDretry; + } + if(flag & (Fahdrs | Ferror)){ + if((task & Eidnf) == 0) + print("%s: i/o error %ux\n", dnam(d), task); + return SDcheck; + } + return rv; +} + +static int +iariopkt(SDreq *r, Drive *d) +{ + int n, count, t, max, δ; + uchar *cmd; + + cmd = r->cmd; + aprint("%s: %.2ux %.2ux %c %d %p\n", dnam(d), cmd[0], cmd[2], + "rw"[r->write], r->dlen, r->data); + r->rlen = 0; + count = r->dlen; + max = 65536; + δ = r->timeout - Ticks; + + for(t = r->timeout; setreqto(r, t) != -1;){ + n = count; + if(n > max) + n = max; + qlock(&d->portm); + ahcibuildpkt(&d->portm, r, r->data, n); + r->status = io(d, Ppkt, r->timeout, 0); + qunlock(&d->portm); + switch(r->status){ + case SDeio: + return r->status = SDcheck; + case SDretry: + continue; + } +// aprint("%s: OK %.2ux :: %d :: %.4lux\n", dnam(d), r->cmd[0], r->status, d->port->task); + r->rlen = d->portm.list->len; + return SDok; + } + print("%s: atapi timeout %dms\n", dnam(d), TK2MS(δ)); + return r->status = SDcheck; +} + +static long +ahcibio(SDunit *u, int lun, int write, void *a, long count0, uvlong lba) +{ + Ctlr *c; + Drive *d; + + c = u->dev->ctlr; + d = c->drive[u->subno]; + if(d->portm.feat & Datapi) + return scsibiox(u, &d->portm, lun, write, a, count0, lba); + return atabio(u, &d->portm, lun, write, a, count0, lba); +} + +static int +iario(SDreq *r) +{ + Ctlr *c; + Drive *d; + SDunit *u; + + u = r->unit; + c = u->dev->ctlr; + d = c->drive[u->subno]; + if((d->state & (Dnew | Dready)) == 0) + return sdsetsense(r, SDcheck, 3, 0x04, 0x24); + if(r->timeout == 0) + r->timeout = totk(Ms2tk(600*1000)); + if(d->portm.feat & Datapi) + return iariopkt(r, d); + return atariosata(u, &d->portm, r); +} + +static uchar bogusrfis[16] = { +[Ftype] 0x34, +[Fioport] 0x40, +[Fstatus] 0x50, +[Fdev] 0xa0, +}; + +static void +sdr0(Drive *d) +{ + uchar *c; + + c = d->portm.fis.r; + memmove(c, bogusrfis, sizeof bogusrfis); + coherence(); +} + +static int +sdr(SDreq *r, Drive *d, int st) +{ + uchar *c; + uint t; + + if((r->ataproto & Pprotom) == Ppkt){ + t = d->port->task; + if(t & ASerr) + st = t >> 8 + 4 & 0xf; + } + c = d->portm.fis.r; + memmove(r->cmd, c, 16); + r->status = st; + if(st == SDcheck) + st = SDok; + return st; +} + +static int +fisreqchk(Sfis *f, SDreq *r) +{ + if((r->ataproto & Pprotom) == Ppkt) + return SDnostatus; + /* + * handle oob requests; + * restrict & sanitize commands + */ + if(r->clen != 16) + error(Eio); + if(r->cmd[0] == 0xf0){ + sigtofis(f, r->cmd); + r->status = SDok; + return SDok; + } + r->cmd[0] = 0x27; + r->cmd[1] = 0x80; + r->cmd[7] |= 0xa0; + return SDnostatus; +} + +static int +iaataio(SDreq *r) +{ + Ctlr *c; + Drive *d; + SDunit *u; + + u = r->unit; + c = u->dev->ctlr; + d = c->drive[u->subno]; + + if(r->timeout == 0) + r->timeout = totk(Ms2tk(600*1000)); + if((r->status = fisreqchk(&d->portm, r)) != SDnostatus) + return r->status; + r->rlen = 0; + sdr0(d); + + qlock(&d->portm); + ahcibuildfis(&d->portm, r, r->data, r->dlen); + r->status = io(d, r->ataproto & Pprotom, -1, 1); + qunlock(&d->portm); + if(r->status != SDok) + return r->status; + r->rlen = r->dlen; + if((r->ataproto & Pprotom) == Ppkt) + r->rlen = d->portm.list->len; + return sdr(r, d, r->status); +} + +/* configure drives 0-5 as ahci sata (c.f. errata) */ +static int +iaahcimode(Pcidev *p) +{ + uint u; + + u = pcicfgr16(p, 0x92); + dprint("ahci: %T: iaahcimode %.2ux %.4ux\n", p->tbdf, pcicfgr8(p, 0x91), u); + pcicfgw16(p, 0x92, u | 0xf); /* ports 0-15 (sic) */ + return 0; +} + +enum{ + Ghc = 0x04/4, /* global host control */ + Pi = 0x0c/4, /* ports implemented */ + Cmddec = 1<<15, /* enable command block decode */ + + /* Ghc bits */ + Ahcien = 1<<31, /* ahci enable */ +}; + +static void +iasetupahci(Ctlr *c) +{ + pcicfgw16(c->pci, 0x40, pcicfgr16(c->pci, 0x40) & ~Cmddec); + pcicfgw16(c->pci, 0x42, pcicfgr16(c->pci, 0x42) & ~Cmddec); + + c->lmmio[Ghc] |= Ahcien; + c->lmmio[Pi] = (1 << 6) - 1; /* 5 ports (supposedly ro pi reg) */ + + /* enable ahci mode; from ich9 datasheet */ + pcicfgw16(c->pci, 0x90, 1<<6 | 1<<5); +} + +static void +sbsetupahci(Pcidev *p) +{ + print("sbsetupahci: tweaking %.4ux ccru %.2ux ccrp %.2ux\n", + p->did, p->ccru, p->ccrp); + pcicfgw8(p, 0x40, pcicfgr8(p, 0x40) | 1); + pcicfgw8(p, PciCCRu, 6); + pcicfgw8(p, PciCCRp, 1); + p->ccru = 6; + p->ccrp = 1; +} + +static int +esbenc(Ctlr *c) +{ + c->encsz = 1; + c->enctx = (u32int*)(c->mmio + 0xa0); + c->enctype = Eesb; + c->enctx[0] = 0; + return 0; +} + +static int +ahciencinit(Ctlr *c) +{ + uint type, sz, o; + u32int *bar; + Ahba *h; + + h = c->hba; + if(c->type == Tesb) + return esbenc(c); + if((h->cap & Hems) == 0) + return -1; + type = h->emctl & Emtype; + switch(type){ + case Esgpio: + case Eses2: + case Esafte: + return -1; + case Elmt: + break; + default: + return -1; + } + + sz = h->emloc & 0xffff; + o = h->emloc>>16; + if(sz == 0 || o == 0) + return -1; + bar = c->lmmio; + ledprint("size = %#.4ux; loc = %#.4ux*4\n", sz, o); + + c->encsz = sz; + c->enctx = bar + o; + if((h->emctl & Xonly) == 0){ + if(h->emctl & Smb) + c->encrx = bar + o; + else + c->encrx = bar + o*2; + } + c->enctype = type; + return 0; +} + +static ushort itab[] = { + 0xfffc, 0x2680, Tesb, + 0xfffb, 0x27c1, Tahci, /* 82801g[bh]m */ + 0xffff, 0x2821, Tahci, /* 82801h[roh] */ + 0xfffe, 0x2824, Tahci, /* 82801h[b] */ + 0xfeff, 0x2829, Tahci, /* ich8 */ + 0xfffe, 0x2922, Tahci, /* ich9 */ + 0xffff, 0x3a02, Tahci, /* 82801jd/do */ + 0xfefe, 0x3a22, Tahci, /* ich10, pch */ + 0xfff7, 0x3b28, Tahci, /* pchm */ + 0xfffe, 0x3b22, Tahci, /* pch */ +}; + +static int +didtype(Pcidev *p) +{ + int type, i; + + type = Tahci; + switch(p->vid){ + default: + return -1; + case 0x8086: + for(i = 0; i < nelem(itab); i += 3) + if((p->did & itab[i]) == itab[i+1]) + return itab[i+2]; + break; + case 0x1002: + if(p->ccru == 1 || p->ccrp != 1) + if(p->did == 0x4380 || p->did == 0x4390) + sbsetupahci(p); + type = Tsb600; + break; + case 0x1106: + /* + * unconfirmed report that the programming + * interface is set incorrectly. + */ + if(p->did == 0x3349) + return Tahci; + break; + case 0x10de: + case 0x1039: + case 0x1b4b: + case 0x11ab: + break; + case 0x197b: + case 0x10b9: + type = Tjmicron; + break; + } + if(p->ccrb == 1 && p->ccru == 6 && p->ccrp == 1) + return type; + return -1; +} + +static SDev* +iapnp(void) +{ + int i, n, nunit, type; + uintptr io; + Ctlr *c; + Drive *d; + Pcidev *p; + SDev *s; + static int done; + + if(done) + return nil; + done = 1; + memset(olds, 0xff, sizeof olds); + p = nil; +loop: + while((p = pcimatch(p, 0, 0)) != nil){ + if((type = didtype(p)) == -1) + continue; + if(p->mem[Abar].bar == 0) + continue; + if(niactlr == NCtlr){ + print("iapnp: %s: too many controllers\n", cttab[type].name); + break; + } + c = iactlr + niactlr; + s = sdevs + niactlr; + memset(c, 0, sizeof *c); + memset(s, 0, sizeof *s); + io = p->mem[Abar].bar & ~0xf; + c->mmio = vmap(io, p->mem[Abar].size); + if(c->mmio == 0){ + print("%s: address %#p in use did %.4ux\n", + tnam(c), io, p->did); + continue; + } + c->lmmio = (u32int*)c->mmio; + c->pci = p; + c->type = cttab + type; + + s->ifc = &sdahciifc; + s->idno = 'E'; + s->ctlr = c; + c->sdev = s; + + if(intel(c) && p->did != 0x2681) + iasetupahci(c); +// ahcihbareset((Ahba*)c->mmio); + nunit = ahciconf(c); + c->pi = c->hba->pi; + if(0 && p->vid == 0x1002 && p->did == 0x4391){ + c->pi = 0x3f; /* noah's opteron */ + nunit = 6; + } + if(intel(c) && iaahcimode(p) == -1 || nunit < 1){ + vunmap(c->mmio, p->mem[Abar].size); + continue; + } + c->ndrive = s->nunit = nunit; + + /* map the drives -- they don't all need to be enabled. */ + memset(c->rawdrive, 0, sizeof c->rawdrive); + n = 0; + for(i = 0; i < NCtlrdrv; i++){ + d = c->rawdrive + i; + d->portno = i; + d->driveno = -1; + d->portm.tler = 5000; + d->portm.sectors = 0; + d->portm.serial[0] = ' '; + d->led = Ibpinormal; + d->ctlr = c; + if((c->pi & 1<name, sizeof d->name, "iahci%d.%d", niactlr, i); + d->port = (Aport*)(c->mmio + 0x80*i + 0x100); + d->portc.p = d->port; + d->portc.m = &d->portm; + d->driveno = n++; + c->drive[d->driveno] = d; + iadrive[niadrive + d->driveno] = d; + } + for(i = 0; i < n; i++) + if(ahciidle(c->drive[i]->port) == -1){ + print("%s: port %d wedged; abort\n", + tnam(c), i); + goto loop; + } + for(i = 0; i < n; i++){ + c->drive[i]->mode = DMautoneg; + configdrive(c->drive[i]); + } + ahciencinit(c); + + niadrive += n; + niactlr++; + sdadddevs(s); + i = (c->hba->cap >> 21) & 1; + print("#S/%s: %s: sata-%s with %d ports\n", s->name, + tnam(c), "I\0II" + i*2, nunit); + } + return nil; +} + +static Htab ctab[] = { + Aasp, "asp", + Aalpe , "alpe ", + Adlae, "dlae", + Aatapi, "atapi", + Apste, "pste", + Afbsc, "fbsc", + Aesp, "esp", + Acpd, "cpd", + Ampsp, "mpsp", + Ahpcp, "hpcp", + Apma, "pma", + Acps, "cps", + Acr, "cr", + Afr, "fr", + Ampss, "mpss", + Apod, "pod", + Asud, "sud", + Ast, "st", +}; + +static char* +capfmt(char *p, char *e, Htab *t, int n, u32int cap) +{ + uint i; + + *p = 0; + for(i = 0; i < n; i++) + if(cap & t[i].bit) + p = seprint(p, e, "%s ", t[i].name); + return p; +} + +static int +iarctl(SDunit *u, char *p, int l) +{ + char buf[32], *e, *op; + Aport *o; + Ctlr *c; + Drive *d; + + if((c = u->dev->ctlr) == nil) + return 0; + d = c->drive[u->subno]; + o = d->port; + + e = p+l; + op = p; + if(d->state == Dready) + p = sfisxrdctl(&d->portm, p, e); + else + p = seprint(p, e, "no disk present [%s]\n", dstate(d->state)); + serrstr(o->serror, buf, buf + sizeof buf - 1); + p = seprint(p, e, "reg\ttask %ux cmd %ux serr %ux %s ci %ux is %ux " + "sig %ux sstatus %.3ux\n", o->task, o->cmd, o->serror, buf, + o->ci, o->isr, o->sig, o->sstatus); + p = seprint(p, e, "cmd\t"); + p = capfmt(p, e, ctab, nelem(ctab), o->cmd); + p = seprint(p, e, "\n"); + p = seprint(p, e, "mode\t%s %s\n", modes[d->mode], modes[maxmode(c)]); + p = seprint(p, e, "geometry %llud %lud\n", u->sectors, u->secsize); + return p - op; +} + +static void +forcemode(Drive *d, char *mode) +{ + int i; + + for(i = 0; i < nelem(modes); i++) + if(strcmp(mode, modes[i]) == 0) + break; + if(i == nelem(modes)) + i = 0; + ilock(d); + d->mode = i; + iunlock(d); +} + +static void +forcestate(Drive *d, char *state) +{ + int i; + + for(i = 1; i < nelem(diskstates); i++) + if(strcmp(state, diskstates[i]) == 0) + break; + if(i == nelem(diskstates)) + error(Ebadctl); + setstate(d, 1 << i-1); +} + +static int +iawctl(SDunit *u, Cmdbuf *cmd) +{ + char **f; + Ctlr *c; + Drive *d; + + c = u->dev->ctlr; + d = c->drive[u->subno]; + f = cmd->f; + + if(strcmp(f[0], "mode") == 0) + forcemode(d, f[1]? f[1]: "satai"); + else if(strcmp(f[0], "state") == 0) + forcestate(d, f[1]? f[1]: "null"); + else + cmderror(cmd, Ebadctl); + return 0; +} + +static char * +portr(char *p, char *e, uint x) +{ + int i, a; + + p[0] = 0; + a = -1; + for(i = 0; i < 32; i++){ + if((x & (1< 0) + p = seprint(p, e, ", "); + p = seprint(p, e, "%d", a = i); + } + } + if(a != -1 && i - 1 != a) + p = seprint(p, e, "-%d", i - 1); + return p; +} + +static Htab htab[] = { + H64a, "64a", + Hncq, "ncq", + Hsntf, "ntf", + Hmps, "mps", + Hss, "ss", + Halp, "alp", + Hal, "led", + Hclo, "clo", + Ham, "am", + Hpm, "pm", + Hfbs, "fbs", + Hpmb, "pmb", + Hssc, "slum", + Hpsc, "pslum", + Hcccs, "coal", + Hems, "ems", + Hxs, "xs", +}; + +static Htab htab2[] = { + Apts, "apts", + Nvmp, "nvmp", + Boh, "boh", +}; + +static Htab emtab[] = { + Pm, "pm", + Alhd, "alhd", + Xonly, "xonly", + Smb, "smb", + Esgpio, "esgpio", + Eses2, "eses2", + Esafte, "esafte", + Elmt, "elmt", +}; + +static char* +iartopctl(SDev *s, char *p, char *e) +{ + char pr[25]; + u32int cap; + Ahba *h; + Ctlr *c; + + c = s->ctlr; + h = c->hba; + cap = h->cap; + p = seprint(p, e, "sd%c ahci %s port %#p: ", s->idno, tnam(c), h); + p = capfmt(p, e, htab, nelem(htab), cap); + p = capfmt(p, e, htab2, nelem(htab2), h->cap2); + p = capfmt(p, e, emtab, nelem(emtab), h->emctl); + portr(pr, pr + sizeof pr, h->pi); + return seprint(p, e, + "iss %d ncs %d np %d ghc %ux isr %ux pi %ux %s ver %ux\n", + (cap>>20) & 0xf, (cap>>8) & 0x1f, 1 + (cap & 0x1f), + h->ghc, h->isr, h->pi, pr, h->ver); +} + +static int +iawtopctl(SDev *, Cmdbuf *cmd) +{ + int *v; + char **f; + + f = cmd->f; + v = 0; + + if(strcmp(f[0], "debug") == 0) + v = &debug; + else if(strcmp(f[0], "idprint") == 0) + v = &prid; + else if(strcmp(f[0], "aprint") == 0) + v = &datapi; + else if(strcmp(f[0], "ledprint") == 0) + v = &dled; + else + cmderror(cmd, Ebadctl); + + switch(cmd->nf){ + default: + cmderror(cmd, Ebadarg); + case 1: + *v ^= 1; + return 0; + case 2: + *v = strcmp(f[1], "on") == 0; + return 0; + } +} + +SDifc sdahciifc = { + "ahci", + + iapnp, + nil, /* legacy */ + iaenable, + iadisable, + + iaverify, + iaonline, + iario, + iarctl, + iawctl, + + ahcibio, + nil, /* probe */ + nil, /* clear */ + iartopctl, + iawtopctl, + iaataio, +}; diff -Nru 0/sys/src/nix/k10/sipi.c 4/sys/src/nix/k10/sipi.c --- 0/sys/src/nix/k10/sipi.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/sipi.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,180 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "apic.h" +#include "sipi.h" + +#define SIPIHANDLER (KZERO+0x3000) + +/* + * Parameters are passed to the bootstrap code via a vector + * in low memory indexed by the APIC number of the processor. + * The layout, size, and location have to be kept in sync + * with the handler code in l64sipi.s. + */ +typedef struct Sipi Sipi; +struct Sipi { + u32int pml4; + u32int _4_; + uintptr stack; + Mach* mach; + uintptr pc; +}; + +enum { + SipiHdlrMaxSz = 4*KiB, +}; + +enum { + INVPCMAGIC = 0x0eeebabe +}; + +/* + * the caller must make sure it has exclusive access to + * the core and it should be quiescent before sipiing it + */ +int +sipicore(int core) +{ + Mach *mp; + uchar nvstate; + int i, issuicide; + extern void tentacle(int); + uintmem sipipa; + Sipi *sipist, *sipi; + u8int *sipiptr; + + issuicide = 0; + mp = sys->machptr[core]; + if(mp == m) + issuicide = 1; + + sipiptr = UINT2PTR(SIPIHANDLER); + sipist = (Sipi*)(sipiptr+SipiHdlrMaxSz); + sipi = &sipist[mp->apicno]; + sipi->pc = PTR2UINT(tentacle); + mp->splpc = INVPCMAGIC; + + ainc(&active.nbooting); + adec(&active.nonline); + + nvstate = nvramread(0x0f); + if(nvstate != 0){ + print("nvram %d should be: %ux, sipied too early?\n", core, nvstate); + return -1; + } + + sipipa = mmuphysaddr(SIPIHANDLER); + apicsipi(mp->apicno, sipipa); + for(i = 0; i < 1000; i++){ + if(!issuicide && mp->splpc != INVPCMAGIC) + break; + millidelay(5); + } + if (issuicide || i == 1000){ + print("timed out waiting for reboot\n"); + return -1; + } + return 0; + +} + + +void +sipiall(void) +{ + Apic *apic; + Mach *mach; + int apicno, i; + u8int *sipiptr; + uintmem sipipa; + u8int *alloc, *p; + Sipi *sipi, *sipist; + extern void squidboy(int); + + /* + * Move the startup code into place, + * must be aligned properly. + */ + sipipa = mmuphysaddr(SIPIHANDLER); + if((sipipa & (4*KiB - 1)) || sipipa > (1*MiB - 2*4*KiB)) + return; + sipiptr = UINT2PTR(SIPIHANDLER); + memmove(sipiptr, sipihandler, sizeof(sipihandler)); + memset(sipiptr+SipiHdlrMaxSz, 0, sizeof(Sipi)*Napic); + DBG("sipiptr %#p sipipa %#llux\n", sipiptr, sipipa); + + sipist = (Sipi*)(sipiptr+SipiHdlrMaxSz); + + /* + * Notes: + * The Universal Startup Algorithm described in the MP Spec. 1.4. + * The data needed per-processor is the sum of the stack, page + * table pages, vsvm page and the Mach page. The layout is similar + * to that described in data.h for the bootstrap processor, but + * with any unused space elided. + */ + for(apicno = 0; apicno < Napic; apicno++){ + apic = &xlapic[apicno]; + if(!apic->useable || apic->addr || apic->machno == 0) + continue; + sipi = &sipist[apicno]; + /* + * NOTE: for now, share the page tables with the + * bootstrap processor, until the lsipi code is worked out, + * so only the Mach and stack portions are used below. + */ + alloc = mallocalign(MACHSTKSZ+4*PTSZ+4*KiB+MACHSZ, 4096, 0, 0); + if(alloc == nil) + continue; + memset(alloc, 0, MACHSTKSZ+4*PTSZ+4*KiB+MACHSZ); + p = alloc+MACHSTKSZ; + + sipi->pml4 = cr3get(); + + + DBG("sipi %#p pml4 %#ux\n", sipi, sipi->pml4); + sipi->stack = PTR2UINT(p); + + p += 4*PTSZ+4*KiB; + + /* + * Committed. If the AP startup fails, can't safely + * release the resources, who knows what mischief + * the AP is up to. Perhaps should try to put it + * back into the INIT state? + */ + mach = (Mach*)p; + sipi->mach = mach; + mach->machno = apic->machno; /* NOT one-to-one... */ + mach->splpc = PTR2UINT(squidboy); + sipi->pc = mach->splpc; + mach->apicno = apicno; + mach->stack = PTR2UINT(alloc); + mach->vsvm = alloc+MACHSTKSZ+4*PTSZ; + mach->pml4 = m->pml4; /* not really needed any more? */ + + p = KADDR(0x467); + *p++ = sipipa; + *p++ = sipipa>>8; + *p++ = 0; + *p = 0; + + nvramwrite(0x0f, 0x0a); + apicsipi(apicno, sipipa); + + for(i = 0; i < 1000; i++){ + if(mach->splpc == 0) + break; + millidelay(5); + } + nvramwrite(0x0f, 0x00); + + DBG("mach %#p (%#p) apicid %d machno %2d %dMHz\n", + mach, sys->machptr[mach->machno], + apicno, mach->machno, mach->cpumhz); + } +} diff -Nru 0/sys/src/nix/k10/sipi.h 4/sys/src/nix/k10/sipi.h --- 0/sys/src/nix/k10/sipi.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/sipi.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,26 @@ +uchar sipihandler[]={ +0xea,0x58,0x30,0x00,0x00,0x90,0x90,0x90, +0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0xff,0xff,0x00,0x00,0x00,0x9a,0xcf,0x00,0xff,0xff,0x00,0x00,0x00,0x92,0xcf,0x00, +0x00,0x00,0x00,0x00,0x00,0x98,0x20,0x00,0x1f,0x00,0x10,0x30,0x00,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x98,0x20,0x00,0x00,0x00, +0x00,0x00,0x00,0x80,0x00,0x00,0x17,0x00,0x36,0x30,0x00,0xf0,0xff,0xff,0xff,0xff, +0x8c,0xc8,0x8e,0xd8,0x0f,0x01,0x16,0x30,0x30,0x0f,0x20,0xc0,0x83,0xc8,0x01,0x0f, +0x22,0xc0,0xeb,0x00,0xb8,0x10,0x00,0x8e,0xd8,0x8e,0xc0,0x8e,0xe0,0x8e,0xe8,0x8e, +0xd0,0x66,0xea,0x81,0x30,0x00,0x00,0x08,0x00,0xbd,0x00,0x00,0xe0,0xfe,0x8b,0x6d, +0x20,0xc1,0xed,0x18,0x89,0xe8,0x6b,0xc0,0x20,0xbb,0x00,0x30,0x00,0x00,0x81,0xc3, +0x00,0x10,0x00,0x00,0x01,0xc3,0x8b,0x33,0x89,0xf0,0x0f,0x22,0xd8,0x89,0xc2,0x81, +0xea,0x00,0x60,0x00,0x00,0x83,0xc2,0x03,0x89,0x10,0x2d,0x00,0x60,0x00,0x00,0x81, +0xc2,0x00,0x10,0x00,0x00,0x89,0x10,0xba,0x83,0x00,0x00,0x00,0x05,0x00,0x10,0x00, +0x00,0x89,0x10,0x0f,0x20,0xe0,0x83,0xe0,0xef,0x0d,0xa0,0x00,0x00,0x00,0x0f,0x22, +0xe0,0xb9,0x80,0x00,0x00,0xc0,0x0f,0x32,0x0d,0x00,0x01,0x00,0x00,0x0f,0x30,0x0f, +0x20,0xc2,0x81,0xe2,0xf5,0xff,0xff,0x9f,0x81,0xca,0x00,0x00,0x01,0x80,0x0f,0x22, +0xc2,0xea,0x00,0x31,0x00,0x00,0x18,0x00,0x48,0xc7,0xc0,0x09,0x31,0x00,0xf0,0xff, +0xe0,0x48,0xc7,0xc0,0x4e,0x30,0x00,0xf0,0x0f,0x01,0x10,0x48,0x31,0xd2,0x8e,0xda, +0x8e,0xc2,0x8e,0xe2,0x8e,0xea,0x8e,0xd2,0x63,0xf6,0x48,0x89,0xf0,0x48,0x05,0x00, +0x00,0x00,0xf0,0x48,0x89,0xc4,0x48,0x89,0x10,0x0f,0x22,0xde,0x48,0x81,0xc3,0x00, +0x00,0x00,0xf0,0x8b,0x33,0x63,0xf6,0x48,0x89,0xf0,0x48,0x8b,0x63,0x08,0x4c,0x8b, +0x7b,0x10,0x49,0x89,0xd6,0x52,0x9d,0x63,0xed,0x55,0x48,0x8b,0x43,0x18,0xff,0xd0, +0xeb,0xfe, + +}; diff -Nru 0/sys/src/nix/k10/syscall.c 4/sys/src/nix/k10/syscall.c --- 0/sys/src/nix/k10/syscall.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/syscall.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,429 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "../port/error.h" + +#include "/sys/src/libc/9syscall/sys.h" + +#include + +#include "amd64.h" +#include "ureg.h" + +typedef struct { + uintptr ip; + Ureg* arg0; + char* arg1; + char msg[ERRMAX]; + Ureg* old; + Ureg ureg; +} NFrame; + +/* + * Return user to state before notify() + */ +void +noted(Ureg* cur, uintptr arg0) +{ + NFrame *nf; + Note note; + Ureg *nur; + + qlock(&up->debug); + if(arg0 != NRSTR && !up->notified){ + qunlock(&up->debug); + pprint("suicide: call to noted when not notified\n"); + pexit("Suicide", 0); + } + up->notified = 0; + fpunoted(); + + nf = up->ureg; + + /* sanity clause */ + if(!okaddr(PTR2UINT(nf), sizeof(NFrame), 0)){ + qunlock(&up->debug); + pprint("suicide: bad ureg %#p in noted\n", nf); + pexit("Suicide", 0); + } + + /* + * Check the segment selectors are all valid. + */ + nur = &nf->ureg; + if(nur->cs != SSEL(SiUCS, SsRPL3) || nur->ss != SSEL(SiUDS, SsRPL3) + || nur->ds != SSEL(SiUDS, SsRPL3) || nur->es != SSEL(SiUDS, SsRPL3) + || nur->fs != SSEL(SiUDS, SsRPL3) || nur->gs != SSEL(SiUDS, SsRPL3)){ + qunlock(&up->debug); + pprint("suicide: bad segment selector in noted\n"); + pexit("Suicide", 0); + } + + /* don't let user change system flags */ + nur->flags &= (Of|Df|Sf|Zf|Af|Pf|Cf); + nur->flags |= cur->flags & ~(Of|Df|Sf|Zf|Af|Pf|Cf); + + memmove(cur, nur, sizeof(Ureg)); + + switch((int)arg0){ + case NCONT: + case NRSTR: + if(!okaddr(nur->ip, BY2SE, 0) || !okaddr(nur->sp, BY2SE, 0)){ + qunlock(&up->debug); + pprint("suicide: trap in noted pc=%#p sp=%#p\n", + nur->ip, nur->sp); + pexit("Suicide", 0); + } + up->ureg = nf->old; + qunlock(&up->debug); + break; + case NSAVE: + if(!okaddr(nur->ip, BY2SE, 0) || !okaddr(nur->sp, BY2SE, 0)){ + qunlock(&up->debug); + pprint("suicide: trap in noted pc=%#p sp=%#p\n", + nur->ip, nur->sp); + pexit("Suicide", 0); + } + qunlock(&up->debug); + + splhi(); + nf->arg1 = nf->msg; + nf->arg0 = &nf->ureg; + cur->bp = PTR2UINT(nf->arg0); + nf->ip = 0; + cur->sp = PTR2UINT(nf); + break; + default: + memmove(¬e, &up->lastnote, sizeof(Note)); + qunlock(&up->debug); + pprint("suicide: bad arg %#p in noted: %s\n", arg0, note.msg); + pexit(note.msg, 0); + break; + case NDFLT: + memmove(¬e, &up->lastnote, sizeof(Note)); + qunlock(&up->debug); + if(note.flag == NDebug) + pprint("suicide: %s\n", note.msg); + pexit(note.msg, note.flag != NDebug); + break; + } +} + +/* + * Call user, if necessary, with note. + * Pass user the Ureg struct and the note on his stack. + */ +int +notify(Ureg* ureg) +{ + int l; + Mpl pl; + Note note; + uintptr sp; + NFrame *nf; + + /* + * Calls procctl splhi, see comment in procctl for the reasoning. + */ + if(up->procctl) + procctl(up); + if(up->nnote == 0) + return 0; + + fpunotify(ureg); + + pl = spllo(); + qlock(&up->debug); + + up->notepending = 0; + memmove(¬e, &up->note[0], sizeof(Note)); + if(strncmp(note.msg, "sys:", 4) == 0){ + l = strlen(note.msg); + if(l > ERRMAX-sizeof(" pc=0x0123456789abcdef")) + l = ERRMAX-sizeof(" pc=0x0123456789abcdef"); + sprint(note.msg+l, " pc=%#p", ureg->ip); + } + + if(note.flag != NUser && (up->notified || up->notify == nil)){ + qunlock(&up->debug); + if(note.flag == NDebug) + pprint("suicide: %s\n", note.msg); + pexit(note.msg, note.flag != NDebug); + } + + if(up->notified){ + qunlock(&up->debug); + splhi(); + return 0; + } + + if(up->notify == nil){ + qunlock(&up->debug); + pexit(note.msg, note.flag != NDebug); + } + if(!okaddr(PTR2UINT(up->notify), sizeof(ureg->ip), 0)){ + qunlock(&up->debug); + pprint("suicide: bad function address %#p in notify\n", + up->notify); + pexit("Suicide", 0); + } + + sp = ureg->sp - sizeof(NFrame); + if(!okaddr(sp, sizeof(NFrame), 1)){ + qunlock(&up->debug); + pprint("suicide: bad stack address %#p in notify\n", sp); + pexit("Suicide", 0); + } + + nf = UINT2PTR(sp); + memmove(&nf->ureg, ureg, sizeof(Ureg)); + nf->old = up->ureg; + up->ureg = nf; /* actually the NFrame, for noted */ + memmove(nf->msg, note.msg, ERRMAX); + nf->arg1 = nf->msg; + nf->arg0 = &nf->ureg; + ureg->bp = PTR2UINT(nf->arg0); + nf->ip = 0; + + ureg->sp = sp; + ureg->ip = PTR2UINT(up->notify); + up->notified = 1; + up->nnote--; + memmove(&up->lastnote, ¬e, sizeof(Note)); + memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note)); + + qunlock(&up->debug); + splx(pl); + + return 1; +} + +void +noerrorsleft(void) +{ + int i; + + if(up->nerrlab){ + /* NIX processes will have a waserror in their handler */ + if(up->ac != nil && up->nerrlab == 1) + return; + + print("bad errstack: %d extra\n", up->nerrlab); + for(i = 0; i < NERR; i++) + print("sp=%#p pc=%#p\n", + up->errlab[i].sp, up->errlab[i].pc); + panic("error stack"); + } +} + +/* it should be unsigned. FIXME */ +void +syscall(int badscallnr, Ureg* ureg) +{ + unsigned int scallnr = (unsigned int) badscallnr; + char *e; + uintptr sp; + int s; + vlong startns, stopns; + Ar0 ar0; + static Ar0 zar0; + + if(!userureg(ureg)) + panic("syscall: cs %#llux\n", ureg->cs); + + cycles(&up->kentry); + + m->syscall++; + up->nsyscall++; + up->nqsyscall++; + up->insyscall = 1; + up->pc = ureg->ip; + up->dbgreg = ureg; + sp = ureg->sp; + startns = 0; + + if(up->procctl == Proc_tracesyscall){ + /* + * Redundant validaddr. Do we care? + * Tracing syscalls is not exactly a fast path... + * Beware, validaddr currently does a pexit rather + * than an error if there's a problem; that might + * change in the future. + */ + if(sp < (USTKTOP-BIGPGSZ) || sp > (USTKTOP-sizeof(up->arg)-BY2SE)) + validaddr(UINT2PTR(sp), sizeof(up->arg)+BY2SE, 0); + + syscallfmt(scallnr, (va_list)(sp+BY2SE)); + up->procctl = Proc_stopme; + procctl(up); + if(up->syscalltrace) + free(up->syscalltrace); + up->syscalltrace = nil; + startns = todget(nil); + } + + up->scallnr = scallnr; + if(scallnr == RFORK) + fpusysrfork(ureg); + spllo(); + + sp = ureg->sp; + up->nerrlab = 0; + ar0 = zar0; + if(!waserror()){ + if(scallnr >= nsyscall || systab[scallnr].f == nil){ + pprint("bad sys call number %d pc %#llux\n", + scallnr, ureg->ip); + postnote(up, 1, "sys: bad sys call", NDebug); + error(Ebadarg); + } + + if(sp < (USTKTOP-BIGPGSZ) || sp > (USTKTOP-sizeof(up->arg)-BY2SE)) + validaddr(UINT2PTR(sp), sizeof(up->arg)+BY2SE, 0); + + memmove(up->arg, UINT2PTR(sp+BY2SE), sizeof(up->arg)); + up->psstate = systab[scallnr].n; + + systab[scallnr].f(&ar0, (va_list)up->arg); + if(scallnr == SYSR1){ + /* + * BUG: must go when ron binaries go. + * NIX: Returning from execac(). + * This means that the process is back to the + * time sharing core. However, the process did + * already return from the system call, when dispatching + * the user code to the AC. The only thing left is to + * return. The user registers should be ok, because + * up->dbgreg has been the user context for the process. + */ + return; + } + poperror(); + } + else{ + /* failure: save the error buffer for errstr */ + e = up->syserrstr; + up->syserrstr = up->errstr; + up->errstr = e; + if(DBGFLG && up->pid == 1) + iprint("%s: syscall %s error %s\n", + up->text, systab[scallnr].n, up->syserrstr); + ar0 = systab[scallnr].r; + } + + /* + * NIX: for the execac() syscall, what follows is done within + * the system call, because it never returns. + */ + + noerrorsleft(); + + /* + * Put return value in frame. + */ + ureg->ax = ar0.p; + + if(up->procctl == Proc_tracesyscall){ + stopns = todget(nil); + up->procctl = Proc_stopme; + sysretfmt(scallnr, (va_list)(sp+BY2SE), &ar0, startns, stopns); + s = splhi(); + procctl(up); + splx(s); + if(up->syscalltrace) + free(up->syscalltrace); + up->syscalltrace = nil; + }else if(up->procctl == Proc_totc || up->procctl == Proc_toac) + procctl(up); + + + up->insyscall = 0; + up->psstate = 0; + + if(scallnr == NOTED) + noted(ureg, *(uintptr*)(sp+BY2SE)); + + splhi(); + if(scallnr != RFORK && (up->procctl || up->nnote)) + notify(ureg); + + /* if we delayed sched because we held a lock, sched now */ + if(up->delaysched){ + sched(); + splhi(); + } + kexit(ureg); +} + +uintptr +sysexecstack(uintptr stack, int argc) +{ + /* + * Given a current bottom-of-stack and a count + * of pointer arguments to be pushed onto it followed + * by an integer argument count, return a suitably + * aligned new bottom-of-stack which will satisfy any + * hardware stack-alignment contraints. + * Rounding the stack down to be aligned with the + * natural size of a pointer variable usually suffices, + * but some architectures impose further restrictions, + * e.g. 32-bit SPARC, where the stack must be 8-byte + * aligned although pointers and integers are 32-bits. + */ + USED(argc); + + return STACKALIGN(stack); +} + +void* +sysexecregs(uintptr entry, usize ssize, uint nargs) +{ + uintptr *sp; + Ureg *ureg; + + sp = (uintptr*)(USTKTOP - ssize); + *--sp = nargs; + + ureg = up->dbgreg; + ureg->sp = PTR2UINT(sp); + ureg->ip = entry; + ureg->type = 64; /* fiction for acid */ + + /* + * return the address of kernel/user shared data + * (e.g. clock stuff) + */ + return UINT2PTR(USTKTOP-sizeof(Tos)); +} + +void +sysprocsetup(Proc* p) +{ + fpusysprocsetup(p); +} + +void +sysrforkchild(Proc* child, Proc* parent) +{ + Ureg *cureg; + + /* + * Add 3*BY2SE to the stack to account for + * - the return PC + * - trap's arguments (syscallnr, ureg) + */ + child->sched.sp = PTR2UINT(child->kstack+KSTACK-(sizeof(Ureg)+3*BY2SE)); + child->sched.pc = PTR2UINT(sysrforkret); + + cureg = (Ureg*)(child->sched.sp+3*BY2SE); + memmove(cureg, parent->dbgreg, sizeof(Ureg)); + + /* Things from bottom of syscall which were never executed */ + child->psstate = 0; + child->insyscall = 0; + + fpusysrforkchild(child, parent); +} diff -Nru 0/sys/src/nix/k10/tcore.c 4/sys/src/nix/k10/tcore.c --- 0/sys/src/nix/k10/tcore.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/tcore.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,469 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include +#include +#include "amd64.h" +#include "ureg.h" +#include "io.h" + +Lock nixaclock; /* NIX AC lock; held while assigning procs to cores */ + +/* + * NIX support for the time sharing core. + */ + +extern void actrapret(void); +extern void acsysret(void); + +Mach* +getac(Proc *p, int core) +{ + int i, j; + Mach *mp; + + mp = nil; + if(core == 0) + panic("can't getac for a %s", rolename[NIXTC]); + lock(&nixaclock); + if(waserror()){ + unlock(&nixaclock); + nexterror(); + } + if(core > 0){ + if(core >= MACHMAX) + error("no such core"); + mp = sys->machptr[core]; + if(mp == nil || mp->nixrole == NIXUC || mp->proc != nil) + error("core not online or busy"); + if(mp->nixrole != NIXAC) + error("core is not an AC"); + Found: + mp->proc = p; + }else{ + for(i = 0; i < MACHMAX; i++){ + j = pickcore(p->color, i); + if((mp = sys->machptr[j]) != nil && mp->nixrole == NIXAC){ + if(isbooting(mp)) + continue; + if(mp->proc == nil) + goto Found; + } + } + error("not enough cores"); + } + unlock(&nixaclock); + poperror(); + return mp; +} + +/* + * BUG: + * The AC must not accept interrupts while in the kernel, + * or we must be prepared for nesting them, which we are not. + * This is important for note handling, because postnote() + * assumes that it's ok to send an IPI to an AC, no matter its + * state. The /proc interface also assumes that. + * + */ +void +intrac(Proc *p) +{ + Mach *ac; + + ac = p->ac; + if(ac == nil){ + DBG("intrac: Proc.ac is nil. no ipi sent.\n"); + return; + } + /* + * It's ok if the AC gets idle in the mean time. + */ + DBG("intrac: ipi to cpu%d\n", ac->machno); + apicipi(ac->apicno); +} + +void +putac(Mach *m) +{ + coherence(); + m->proc = nil; +} + +void +stopac(void) +{ + Mach *mp; + + mp = up->ac; + if(mp == nil) + return; + if(mp->proc != up) + panic("stopac"); + + lock(&nixaclock); + up->ac = nil; + mp->proc = nil; + unlock(&nixaclock); + + /* TODO: + * send sipi to up->ac, it would rerun squidboy(), and + * wait for us to give it a function to run. + */ +} + + + +static void +roleac(Mach *mp, int role) +{ + if(mp == nil) + return; + /* wake it up... */ + mp->nixrole = NIXSC; + mp->icc->fn = acquiesce; + coherence(); + while(mp->icc->fn == acquiesce) + waitwhile(&mp->nixrole, NIXSC); + if(role == NIXOC){ + mp->nixrole = role; + apicnipi(mp->apicno); + }else + sipicore(mp->machno); +} + + +void +tcquiesce(void) +{ + /* sync, suicide is not possible */ + m->proc = nil; + m->icc->fn = nil; + + /* signal we are done */ + m->nixrole= NIXQC; + coherence(); + wakeup(&m->sipir); + for(;;) + halt(); +} + +void +rolestable(Mach *mp) +{ + mp->nixrole = mp->nnixrole; + coherence(); +} + +int +isbooting(Mach *mp) +{ + return mp->nixrole == NIXQC || mp->nixrole == NIXSC; +} + +static int +donequiesce(void *x) +{ + Mach *mp; + mp = (Mach *)x; + return mp->nixrole == NIXQC; +} + +/* + * what should happen if you are wired and the core dissapears? + * for now, this is for testing and it executes in the context of + * a process (it shouldn't) + */ +int +changerole(int role, int core) +{ + int apicno; + Mach *mp, *mpc, *w; + + /* + * 1 *has* to be a TC. + */ + mpc = sys->machptr[1]; + if(core == 1 || core >= MACHMAX || !mpc->nixrole == NIXTC) + return -1; + w = up->wired; + procwired(up, 1); + if(m != mpc) + sched(); + mp = sys->machptr[core]; + + lock(&mp->sipilock); + apicno = mp->apicno; + if(isbooting(mp) && mp->nixrole != NIXOC){ + print("core is already rebooting, nnixrole %#ux\n", mp->nnixrole); + unlock(&mp->sipilock); + return -1; + } + mp->nnixrole = role; + unlock(&mp->sipilock); + switch(mp->nixrole){ + case NIXAC: + lock(&nixaclock); /* so noone reassigns the core */ + if(mp->proc != nil){ + mp->proc->procctl = Proc_totc; + unlock(&nixaclock); + apicipi(apicno); + }else{ + unlock(&nixaclock); + roleac(mp, role); + } + break; + case NIXTC: + mp->nixrole = NIXSC; + mp->qexpired = 1; + coherence(); + /* in case the core is sleeping */ + apicipi(mp->apicno); + sleep(&mp->sipir, donequiesce, mp); + + /* fall */ + case NIXOC: + mp->nnixrole = role; + coherence(); + if(role == NIXOC){ + mp->proc = nil; + mp->nixrole = NIXOC; + apicnipi(mp->apicno); + }else + sipicore(mp->machno); + break; + default: + print("don't know how to change my role\n"); + } + up->wired = w; + return 0; +} + +/* + * Functions starting with ac... are run in the application core. + * All other functions are run by the time-sharing cores. + */ + +typedef void (*APfunc)(void); +extern int notify(Ureg*); + +/* + * run an arbitrary function with arbitrary args on an ap core + * first argument is always pml4 for process + * make a field and a struct for the args cache line. + * + * Returns the return-code for the ICC or -1 if the process was + * interrupted while issuing the ICC. + */ +int +runac(Mach *mp, APfunc func, int flushtlb, void *a, long n) +{ + uchar *dpg, *spg; + + if (n > sizeof(mp->icc->data)) + panic("runac: args too long"); + + if(mp->nixrole == NIXUC) + panic("Bad core"); + if(mp->proc != nil && mp->proc != up) + panic("runapfunc: mach is busy with another proc?"); + + memmove(mp->icc->data, a, n); + if(flushtlb){ + DBG("runac flushtlb: cppml4 %#p %#p\n", mp->pml4->pa, m->pml4->pa); + dpg = UINT2PTR(mp->pml4->va); + spg = UINT2PTR(m->pml4->va); + /* We should copy less: + * memmove(dgp, spg, m->pml4->daddr * sizeof(PTE)); + */ + memmove(dpg, spg, PTSZ); + if(0){ + print("runac: upac pml4 %#p\n", up->ac->pml4->pa); + dumpptepg(4, up->ac->pml4->pa); + } + } + mp->icc->flushtlb = flushtlb; + mp->icc->rc = ICCOK; + + DBG("runac: exotic proc on cpu%d\n", mp->machno); + qlock(&up->debug); + up->nicc++; + up->state = Exotic; + up->psstate = 0; + qunlock(&up->debug); + coherence(); + mp->icc->fn = func; + sched(); + return mp->icc->rc; +} + +/* + * Cleanup done by runacore to pretend we are going back to user space. + * We won't return and won't do what syscall() would normally do. + * Do it here instead. + */ +static void +fakeretfromsyscall(Ureg *ureg) +{ + int s; + + poperror(); /* as syscall() would do if we would return */ + if(up->procctl == Proc_tracesyscall){ /* Would this work? */ + up->procctl = Proc_stopme; + s = splhi(); + procctl(up); + splx(s); + } + + up->insyscall = 0; + /* if we delayed sched because we held a lock, sched now */ + if(up->delaysched){ + sched(); + splhi(); + } + kexit(ureg); +} + +/* + * Move the current process to an application core. + * This is performed at the end of execac(), and + * we pretend to be returning to user-space, but instead we + * dispatch the process to another core. + * 1. We do the final bookkeeping that syscall() would do after + * a return from sysexec(), because we are not returning. + * 2. We dispatch the process to an AC using an ICC. + * + * This function won't return unless the process is reclaimed back + * to the time-sharing core, and is the handler for the process + * to deal with traps and system calls until the process dies. + * + * Remember that this function is the "line" between user and kernel + * space, it's not expected to raise|handle any error. + * + * We install a safety error label, just in case we raise errors, + * which we shouldn't. (noerrorsleft knows that for exotic processes + * there is an error label pushed by us). + */ +void +runacore(void) +{ + Ureg *ureg; + void (*fn)(void); + int rc, flush, s; + char *n; + uvlong t1; + Mach *ac; + + if(waserror()) + panic("runacore: error: %s\n", up->errstr); + ureg = up->dbgreg; + fakeretfromsyscall(ureg); + fpusysrfork(ureg); + + procpriority(up, PriKproc, 1); + ac = up->ac; + rc = runac(up->ac, actouser, 1, nil, 0); + procpriority(up, PriNormal, 0); + for(;;){ + t1 = fastticks(nil); + flush = 0; + fn = nil; + switch(rc){ + case ICCTRAP: + s = splhi(); + m->cr2 = up->ac->cr2; + DBG("runacore: trap %ulld cr2 %#ullx ureg %#p\n", + ureg->type, m->cr2, ureg); + switch(ureg->type){ + case IdtIPI: + if(up->procctl || up->nnote) + notify(up->dbgreg); + if(up->ac == nil) + goto ToTC; + kexit(up->dbgreg); + break; + case IdtNM: + case IdtMF: + case IdtXF: + /* these are handled in the AC; + * If we get here, they left in m->icc->data + * a note to be posted to the process. + * Post it, and make the vector a NOP. + */ + n = up->ac->icc->note; + if(n != nil) + postnote(up, 1, n, NDebug); + ureg->type = IdtIPI; /* NOP */ + break; + default: + cr3put(m->pml4->pa); + if(0 && ureg->type == IdtPF){ + print("before PF:\n"); + print("AC:\n"); + dumpptepg(4, up->ac->pml4->pa); + print("\n%s:\n", rolename[NIXTC]); + dumpptepg(4, m->pml4->pa); + } + trap(ureg); + } + splx(s); + flush = 1; + fn = actrapret; + break; + case ICCSYSCALL: + DBG("runacore: syscall ax %#ullx ureg %#p\n", + ureg->ax, ureg); + cr3put(m->pml4->pa); + syscall(ureg->ax, ureg); + flush = 1; + fn = acsysret; + if(0) + if(up->nqtrap > 2 || up->nsyscall > 1) + goto ToTC; + if(up->ac == nil) + goto ToTC; + break; + default: + panic("runacore: unexpected rc = %d", rc); + } + up->tctime += fastticks2us(fastticks(nil) - t1); + procpriority(up, PriExtra, 1); + rc = runac(up->ac, fn, flush, nil, 0); + procpriority(up, PriNormal, 0); + } +ToTC: + /* + * to procctl, then syscall, to + * be back in the TC + */ + DBG("runacore: up %#p: return\n", up); + if(isbooting(ac)){ + roleac(ac, ac->nnixrole); + } +} + +extern ACVctl *acvctl[]; + +void +actrapenable(int vno, char* (*f)(Ureg*, void*), void* a, char *name) +{ + ACVctl *v; + + if(vno < 0 || vno >= 256) + panic("actrapenable: vno %d\n", vno); + v = malloc(sizeof(Vctl)); + v->f = f; + v->a = a; + v->vno = vno; + strncpy(v->name, name, KNAMELEN); + v->name[KNAMELEN-1] = 0; + + if(acvctl[vno]) + panic("AC traps can't be shared"); + acvctl[vno] = v; +} + + diff -Nru 0/sys/src/nix/k10/trap.c 4/sys/src/nix/k10/trap.c --- 0/sys/src/nix/k10/trap.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/trap.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,713 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include +#include "ureg.h" +#include "../port/pmc.h" + +#include "io.h" +#include "amd64.h" + +extern int notify(Ureg*); + +static void debugbpt(Ureg*, void*); +static void faultamd64(Ureg*, void*); +static void doublefault(Ureg*, void*); +static void ipihdler(Ureg*, void*); +static void unexpected(Ureg*, void*); +static void dumpstackwithureg(Ureg*); + +static Lock vctllock; +static Vctl *vctl[256]; + +typedef struct Intrtime Intrtime; +struct Intrtime { + uvlong count; + uvlong cycles; +}; +static Intrtime intrtimes[256]; + +void* +intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name) +{ + int vno; + Vctl *v; + extern int ioapicintrenable(Vctl*); + + if(f == nil){ + print("intrenable: nil handler for %d, tbdf %#ux for %s\n", + irq, tbdf, name); + return nil; + } + + v = malloc(sizeof(Vctl)); + v->isintr = 1; + v->irq = irq; + v->tbdf = tbdf; + v->f = f; + v->a = a; + strncpy(v->name, name, KNAMELEN-1); + v->name[KNAMELEN-1] = 0; + + ilock(&vctllock); + vno = ioapicintrenable(v); + if(vno == -1){ + iunlock(&vctllock); + print("intrenable: couldn't enable irq %d, tbdf %#ux for %s\n", + irq, tbdf, v->name); + free(v); + return nil; + } + if(vctl[vno]){ + if(vctl[v->vno]->isr != v->isr || vctl[v->vno]->eoi != v->eoi) + panic("intrenable: handler: %s %s %#p %#p %#p %#p", + vctl[v->vno]->name, v->name, + vctl[v->vno]->isr, v->isr, vctl[v->vno]->eoi, v->eoi); + } + v->vno = vno; + v->next = vctl[vno]; + vctl[vno] = v; + iunlock(&vctllock); + + if(v->mask) + v->mask(v, 0); + + /* + * Return the assigned vector so intrdisable can find + * the handler; the IRQ is useless in the wonderful world + * of the IOAPIC. + */ + return v; +} + +int +intrdisable(void* vector) +{ + Vctl *v, *x, **ll; + extern int ioapicintrdisable(int); + + ilock(&vctllock); + v = vector; + if(v == nil || vctl[v->vno] != v) + panic("intrdisable: v %#p", v); + for(ll = vctl+v->vno; x = *ll; ll = &x->next) + if(v == x) + break; + if(x != v) + panic("intrdisable: v %#p", v); + if(v->mask) + v->mask(v, 1); + v->f(nil, v->a); + *ll = v->next; + ioapicintrdisable(v->vno); + iunlock(&vctllock); + + free(v); + return 0; +} + +static long +irqallocread(Chan*, void *vbuf, long n, vlong offset) +{ + char *buf, *p, str[2*(11+1)+2*(20+1)+(KNAMELEN+1)+(8+1)+1]; + int m, vno; + long oldn; + Intrtime *t; + Vctl *v; + + if(n < 0 || offset < 0) + error(Ebadarg); + + oldn = n; + buf = vbuf; + for(vno=0; vnonext){ + t = intrtimes + vno; + m = snprint(str, sizeof str, "%11d %11d %20llud %20llud %-*.*s %.*s\n", + vno, v->irq, t->count, t->cycles, 8, 8, v->type, KNAMELEN, v->name); + if(m <= offset) /* if do not want this, skip entry */ + offset -= m; + else{ + /* skip offset bytes */ + m -= offset; + p = str+offset; + offset = 0; + + /* write at most max(n,m) bytes */ + if(m > n) + m = n; + memmove(buf, p, m); + n -= m; + buf += m; + + if(n == 0) + return oldn; + } + } + } + return oldn - n; +} + +void +trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name) +{ + Vctl *v; + + if(vno < 0 || vno >= 256) + panic("trapenable: vno %d\n", vno); + v = malloc(sizeof(Vctl)); + v->type = "trap"; + v->tbdf = BUSUNKNOWN; + v->f = f; + v->a = a; + strncpy(v->name, name, KNAMELEN); + v->name[KNAMELEN-1] = 0; + + ilock(&vctllock); + v->next = vctl[vno]; + vctl[vno] = v; + iunlock(&vctllock); +} + +static void +nmienable(void) +{ + int x; + + /* + * Hack: should be locked with NVRAM access. + */ + outb(0x70, 0x80); /* NMI latch clear */ + outb(0x70, 0); + + x = inb(0x61) & 0x07; /* Enable NMI */ + outb(0x61, 0x08|x); + outb(0x61, x); +} + +void +trapinit(void) +{ + /* + * Need to set BPT interrupt gate - here or in vsvminit? + */ + /* + * Special traps. + * Syscall() is called directly without going through trap(). + */ + trapenable(VectorBPT, debugbpt, 0, "#BP"); + trapenable(VectorPF, faultamd64, 0, "#PF"); + trapenable(Vector2F, doublefault, 0, "#DF"); + intrenable(IdtIPI, ipihdler, 0, BUSUNKNOWN, "#IPI"); + trapenable(Vector15, unexpected, 0, "#15"); + nmienable(); + + addarchfile("irqalloc", 0444, irqallocread, nil); +} + +static char* excname[32] = { + "#DE", /* Divide-by-Zero Error */ + "#DB", /* Debug */ + "#NMI", /* Non-Maskable-Interrupt */ + "#BP", /* Breakpoint */ + "#OF", /* Overflow */ + "#BR", /* Bound-Range */ + "#UD", /* Invalid-Opcode */ + "#NM", /* Device-Not-Available */ + "#DF", /* Double-Fault */ + "#9 (reserved)", + "#TS", /* Invalid-TSS */ + "#NP", /* Segment-Not-Present */ + "#SS", /* Stack */ + "#GP", /* General-Protection */ + "#PF", /* Page-Fault */ + "#15 (reserved)", + "#MF", /* x87 FPE-Pending */ + "#AC", /* Alignment-Check */ + "#MC", /* Machine-Check */ + "#XF", /* SIMD Floating-Point */ + "#20 (reserved)", + "#21 (reserved)", + "#22 (reserved)", + "#23 (reserved)", + "#24 (reserved)", + "#25 (reserved)", + "#26 (reserved)", + "#27 (reserved)", + "#28 (reserved)", + "#29 (reserved)", + "#30 (reserved)", + "#31 (reserved)", +}; + +/* + * keep interrupt service times and counts + */ +void +intrtime(int vno) +{ + ulong diff, x; /* should be uvlong */ + + x = perfticks(); + diff = x - m->perf.intrts; + m->perf.intrts = x; + + m->perf.inintr += diff; + if(up == nil && m->perf.inidle > diff) + m->perf.inidle -= diff; + + intrtimes[vno].cycles += diff; + intrtimes[vno].count++; +} + +static void +pmcnop(Mach *) +{ +} + +void (*_pmcupdate)(Mach *m) = pmcnop; + +/* go to user space */ +void +kexit(Ureg*) +{ + uvlong t; + Tos *tos; + Mach *mp; + + /* + * precise time accounting, kernel exit + * initialized in exec, sysproc.c + */ + tos = (Tos*)(USTKTOP-sizeof(Tos)); + cycles(&t); + tos->kcycles += t - up->kentry; + tos->pcycles = up->pcycles; + tos->pid = up->pid; + if (up->ac != nil) + mp = up->ac; + else + mp = m; + tos->core = mp->machno; + tos->nixtype = mp->nixrole; + _pmcupdate(m); + /* + * The process may change its core. + * Be sure it has the right cyclefreq. + */ + tos->cyclefreq = mp->cyclefreq; +} + +void +_trap(Ureg *ureg) +{ + /* + * If it's a real trap in this core, then we want to + * use the hardware cr2 register. + * We cannot do this in trap() because application cores + * would update m->cr2 with their cr2 values upon page faults, + * and then call trap(). + * If we do this in trap(), we would overwrite that with our own cr2. + */ + if(ureg->type == VectorPF) + m->cr2 = cr2get(); + trap(ureg); +} + +/* + * All traps come here. It is slower to have all traps call trap() + * rather than directly vectoring the handler. However, this avoids a + * lot of code duplication and possible bugs. The only exception is + * VectorSYSCALL. + * Trap is called with interrupts disabled via interrupt-gates. + */ +void +trap(Ureg* ureg) +{ + int clockintr, vno, user; + char buf[ERRMAX]; + Vctl *ctl, *v; + + vno = ureg->type; + + m->perf.intrts = perfticks(); + user = userureg(ureg); + if(user && (m->nixrole == NIXTC)){ + up->dbgreg = ureg; + cycles(&up->kentry); + } + + clockintr = 0; + + _pmcupdate(m); + + if(ctl = vctl[vno]){ + if(ctl->isintr){ + m->intr++; + if(vno >= VectorPIC && vno != VectorSYSCALL) + m->lastintr = ctl->irq; + }else + if(up) + up->nqtrap++; + + if(ctl->isr) + ctl->isr(vno); + for(v = ctl; v != nil; v = v->next){ + if(v->f) + v->f(ureg, v->a); + } + if(ctl->eoi) + ctl->eoi(vno); + intrtime(vno); + if(ctl->isintr){ + if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER) + clockintr = 1; + + if(up && !clockintr) + preempted(); + } + } + else if(vno < nelem(excname) && user){ + spllo(); + snprint(buf, sizeof buf, "sys: trap: %s", excname[vno]); + postnote(up, 1, buf, NDebug); + } + else if(vno >= VectorPIC && vno != VectorSYSCALL){ + /* + * An unknown interrupt. + * Check for a default IRQ7. This can happen when + * the IRQ input goes away before the acknowledge. + * In this case, a 'default IRQ7' is generated, but + * the corresponding bit in the ISR isn't set. + * In fact, just ignore all such interrupts. + */ + + /* clear the interrupt */ + i8259isr(vno); + + iprint("cpu%d: spurious interrupt %d, last %d\n", + m->machno, vno, m->lastintr); + intrtime(vno); + if(user) + kexit(ureg); + return; + } + else{ + if(vno == VectorNMI){ + nmienable(); + if(m->machno != 0){ + iprint("cpu%d: PC %#llux\n", + m->machno, ureg->ip); + for(;;); + } + } + dumpregs(ureg); + if(!user){ + ureg->sp = PTR2UINT(&ureg->sp); + dumpstackwithureg(ureg); + } + if(vno < nelem(excname)) + panic("%s", excname[vno]); + panic("unknown trap/intr: %d\n", vno); + } + splhi(); + + /* delaysched set because we held a lock or because our quantum ended */ + if(up && up->delaysched && clockintr){ + if(0) + if(user && up->ac == nil && up->nqtrap == 0 && up->nqsyscall == 0){ + if(!waserror()){ + up->ac = getac(up, -1); + poperror(); + runacore(); + return; + } + } + sched(); + splhi(); + } + + + if(user){ + if(up && up->procctl || up->nnote) + notify(ureg); + kexit(ureg); + } +} + +/* + * Dump general registers. + */ +static void +dumpgpr(Ureg* ureg) +{ + if(up != nil) + iprint("cpu%d: registers for %s %d\n", + m->machno, up->text, up->pid); + else + iprint("cpu%d: registers for kernel\n", m->machno); + + iprint("ax\t%#16.16llux\n", ureg->ax); + iprint("bx\t%#16.16llux\n", ureg->bx); + iprint("cx\t%#16.16llux\n", ureg->cx); + iprint("dx\t%#16.16llux\n", ureg->dx); + iprint("di\t%#16.16llux\n", ureg->di); + iprint("si\t%#16.16llux\n", ureg->si); + iprint("bp\t%#16.16llux\n", ureg->bp); + iprint("r8\t%#16.16llux\n", ureg->r8); + iprint("r9\t%#16.16llux\n", ureg->r9); + iprint("r10\t%#16.16llux\n", ureg->r10); + iprint("r11\t%#16.16llux\n", ureg->r11); + iprint("r12\t%#16.16llux\n", ureg->r12); + iprint("r13\t%#16.16llux\n", ureg->r13); + iprint("r14\t%#16.16llux\n", ureg->r14); + iprint("r15\t%#16.16llux\n", ureg->r15); + iprint("ds %#4.4ux es %#4.4ux fs %#4.4ux gs %#4.4ux\n", + ureg->ds, ureg->es, ureg->fs, ureg->gs); + iprint("ureg fs\t%#ux\n", *(unsigned int *)&ureg->ds); + iprint("type\t%#llux\n", ureg->type); + iprint("error\t%#llux\n", ureg->error); + iprint("pc\t%#llux\n", ureg->ip); + iprint("cs\t%#llux\n", ureg->cs); + iprint("flags\t%#llux\n", ureg->flags); + iprint("sp\t%#llux\n", ureg->sp); + iprint("ss\t%#llux\n", ureg->ss); + iprint("type\t%#llux\n", ureg->type); + iprint("FS\t%#llux\n", rdmsr(FSbase)); + iprint("GS\t%#llux\n", rdmsr(GSbase)); + + iprint("m\t%#16.16p\nup\t%#16.16p\n", m, up); +} + +void +dumpregs(Ureg* ureg) +{ + dumpgpr(ureg); + + /* + * Processor control registers. + * If machine check exception, time stamp counter, page size extensions + * or enhanced virtual 8086 mode extensions are supported, there is a + * CR4. If there is a CR4 and machine check extensions, read the machine + * check address and machine check type registers if RDMSR supported. + */ + iprint("cr0\t%#16.16llux\n", cr0get()); + iprint("cr2\t%#16.16llux\n", m->cr2); + iprint("cr3\t%#16.16llux\n", cr3get()); + +// archdumpregs(); +} + +/* + * Fill in enough of Ureg to get a stack trace, and call a function. + * Used by debugging interface rdb. + */ +void +callwithureg(void (*fn)(Ureg*)) +{ + Ureg ureg; + ureg.ip = getcallerpc(&fn); + ureg.sp = PTR2UINT(&fn); + fn(&ureg); +} + +static void +dumpstackwithureg(Ureg* ureg) +{ + char *s; + uintptr l, v, i, estack; + extern ulong etext; + int x; + + if((s = getconf("*nodumpstack")) != nil && atoi(s) != 0){ + iprint("dumpstack disabled\n"); + return; + } + iprint("dumpstack\n"); + + x = 0; + x += iprint("ktrace 9%s %#p %#p\n", strrchr(conffile, '/')+1, ureg->ip, ureg->sp); + i = 0; + if(up != nil +// && (uintptr)&l >= (uintptr)up->kstack + && (uintptr)&l <= (uintptr)up->kstack+KSTACK) + estack = (uintptr)up->kstack+KSTACK; + else if((uintptr)&l >= m->stack && (uintptr)&l <= m->stack+MACHSTKSZ) + estack = m->stack+MACHSTKSZ; + else{ + if(up != nil) + iprint("&up->kstack %#p &l %#p\n", up->kstack, &l); + else + iprint("&m %#p &l %#p\n", m, &l); + return; + } + x += iprint("estackx %#p\n", estack); + + for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){ + v = *(uintptr*)l; + if((KTZERO < v && v < (uintptr)&etext) + || ((uintptr)&l < v && v < estack) || estack-l < 256){ + x += iprint("%#16.16p=%#16.16p ", l, v); + i++; + } + if(i == 2){ + i = 0; + x += iprint("\n"); + } + } + if(i) + iprint("\n"); +} + +void +dumpstack(void) +{ + callwithureg(dumpstackwithureg); +} + +static void +debugbpt(Ureg* ureg, void*) +{ + char buf[ERRMAX]; + + if(up == 0) + panic("kernel bpt"); + /* restore pc to instruction that caused the trap */ + ureg->ip--; + sprint(buf, "sys: breakpoint"); + postnote(up, 1, buf, NDebug); +} + +static void +doublefault(Ureg*, void*) +{ + panic("double fault"); +} + +static void +unexpected(Ureg* ureg, void*) +{ + iprint("unexpected trap %llud; ignoring\n", ureg->type); +} + +static void +ipihdler(Ureg*, void*) +{ + coherence(); + if(isbooting(m) && m->proc == nil){ + tcquiesce(); + } +} + +static void +faultamd64(Ureg* ureg, void*) +{ + u64int addr; + int read, user, insyscall; + char buf[ERRMAX]; + + addr = m->cr2; + user = userureg(ureg); +// if(!user && mmukmapsync(addr)) +// return; + + /* + * There must be a user context. + * If not, the usual problem is causing a fault during + * initialisation before the system is fully up. + */ + if(up == nil){ + panic("fault with up == nil; pc %#llux addr %#llux\n", + ureg->ip, addr); + } + read = !(ureg->error & 2); + + insyscall = up->insyscall; + up->insyscall = 1; + if(fault(addr, read) < 0){ + + /* + * It is possible to get here with !user if, for example, + * a process was in a system call accessing a shared + * segment but was preempted by another process which shrunk + * or deallocated the shared segment; when the original + * process resumes it may fault while in kernel mode. + * No need to panic this case, post a note to the process + * and unwind the error stack. There must be an error stack + * (up->nerrlab != 0) if this is a system call, if not then + * the game's a bogey. + */ + if(!user && (!insyscall || up->nerrlab == 0)) + panic("fault: %#llux\n", addr); + sprint(buf, "sys: trap: fault %s addr=%#llux", + read? "read": "write", addr); + postnote(up, 1, buf, NDebug); + if(insyscall) + error(buf); + } + up->insyscall = insyscall; +} + +/* + * return the userpc the last exception happened at + */ +uintptr +userpc(Ureg* ureg) +{ + if(ureg == nil) + ureg = up->dbgreg; + return ureg->ip; +} + +/* This routine must save the values of registers the user is not permitted + * to write from devproc and then restore the saved values before returning. + * TODO: fix this because the segment registers are wrong for 64-bit mode. + */ +void +setregisters(Ureg* ureg, char* pureg, char* uva, int n) +{ + u64int cs, flags, ss; + u16int ds, es, fs, gs; + + ss = ureg->ss; + flags = ureg->flags; + cs = ureg->cs; + gs = ureg->cs; + fs = ureg->cs; + es = ureg->cs; + ds = ureg->cs; + memmove(pureg, uva, n); + ureg->ds = ds; + ureg->es = es; + ureg->fs = fs; + ureg->gs = gs; + ureg->cs = cs; + ureg->flags = (ureg->flags & 0x00ff) | (flags & 0xff00); + ureg->ss = ss; +} + +/* Give enough context in the ureg to produce a kernel stack for + * a sleeping process + */ +void +setkernur(Ureg* ureg, Proc* p) +{ + ureg->ip = p->sched.pc; + ureg->sp = p->sched.sp+BY2SE; +} + +uintptr +dbgpc(Proc *p) +{ + Ureg *ureg; + + ureg = p->dbgreg; + if(ureg == 0) + return 0; + + return ureg->ip; +} diff -Nru 0/sys/src/nix/k10/usbehci.h 4/sys/src/nix/k10/usbehci.h --- 0/sys/src/nix/k10/usbehci.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/usbehci.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,83 @@ +/* override default macros from ../port/usb.h */ +#undef dprint +#undef ddprint +#undef deprint +#undef ddeprint +#define dprint(...) do if(ehcidebug)print(__VA_ARGS__); while(0) +#define ddprint(...) do if(ehcidebug>1)print(__VA_ARGS__); while(0) +#define deprint(...) do if(ehcidebug || ep->debug)print(__VA_ARGS__); while(0) +#define ddeprint(...) do if(ehcidebug>1 || ep->debug>1)print(__VA_ARGS__); while(0) + +typedef struct Ctlr Ctlr; +typedef struct Eopio Eopio; +typedef struct Isoio Isoio; +typedef struct Poll Poll; +typedef struct Qh Qh; +typedef struct Qtree Qtree; + +#pragma incomplete Ctlr; +#pragma incomplete Eopio; +#pragma incomplete Isoio; +#pragma incomplete Poll; +#pragma incomplete Qh; +#pragma incomplete Qtree; + +struct Poll +{ + Lock; + Rendez; + int must; + int does; +}; + +struct Ctlr +{ + Rendez; /* for waiting to async advance doorbell */ + Lock; /* for ilock. qh lists and basic ctlr I/O */ + QLock portlck; /* for port resets/enable... (and doorbell) */ + int active; /* in use or not */ + Pcidev* pcidev; + Ecapio* capio; /* Capability i/o regs */ + Eopio* opio; /* Operational i/o regs */ + + int nframes; /* 1024, 512, or 256 frames in the list */ + ulong* frames; /* periodic frame list (hw) */ + Qh* qhs; /* async Qh circular list for bulk/ctl */ + Qtree* tree; /* tree of Qhs for the periodic list */ + int ntree; /* number of dummy qhs in tree */ + Qh* intrqhs; /* list of (not dummy) qhs in tree */ + Isoio* iso; /* list of active Iso I/O */ + ulong load; + ulong isoload; + int nintr; /* number of interrupts attended */ + int ntdintr; /* number of intrs. with something to do */ + int nqhintr; /* number of async td intrs. */ + int nisointr; /* number of periodic td intrs. */ + int nreqs; + Poll poll; +}; + +/* + * Operational registers (hw) + */ +struct Eopio +{ + ulong cmd; /* 00 command */ + ulong sts; /* 04 status */ + ulong intr; /* 08 interrupt enable */ + ulong frno; /* 0c frame index */ + ulong seg; /* 10 bits 63:32 of EHCI datastructs (unused) */ + ulong frbase; /* 14 frame list base addr, 4096-byte boundary */ + ulong link; /* 18 link for async list */ + uchar d2c[0x40-0x1c]; /* 1c dummy */ + ulong config; /* 40 1: all ports default-routed to this HC */ + ulong portsc[1]; /* 44 Port status and control, one per port */ +}; + +extern int ehcidebug; +extern Ecapio *ehcidebugcapio; +extern int ehcidebugport; + +void ehcilinkage(Hci *hp); +void ehcimeminit(Ctlr *ctlr); +void ehcirun(Ctlr *ctlr, int on); diff -Nru 0/sys/src/nix/k10/usbehcipc.c 4/sys/src/nix/k10/usbehcipc.c --- 0/sys/src/nix/k10/usbehcipc.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/usbehcipc.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,306 @@ +/* + * PC-specific code for + * USB Enhanced Host Controller Interface (EHCI) driver + * High speed USB 2.0. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/usb.h" +#include "../port/portusbehci.h" +#include "usbehci.h" + +static Ctlr* ctlrs[Nhcis]; +static int maxehci = Nhcis; + +/* Isn't this cap list search in a helper function? */ +static void +getehci(Ctlr* ctlr) +{ + int i, ptr, cap, sem; + + ptr = (ctlr->capio->capparms >> Ceecpshift) & Ceecpmask; + for(; ptr != 0; ptr = pcicfgr8(ctlr->pcidev, ptr+1)){ + if(ptr < 0x40 || (ptr & ~0xFC)) + break; + cap = pcicfgr8(ctlr->pcidev, ptr); + if(cap != Clegacy) + continue; + sem = pcicfgr8(ctlr->pcidev, ptr+CLbiossem); + if(sem == 0) + continue; + pcicfgw8(ctlr->pcidev, ptr+CLossem, 1); + for(i = 0; i < 100; i++){ + if(pcicfgr8(ctlr->pcidev, ptr+CLbiossem) == 0) + break; + delay(10); + } + if(i == 100) + dprint("ehci %#p: bios timed out\n", ctlr->capio); + pcicfgw32(ctlr->pcidev, ptr+CLcontrol, 0); /* no SMIs */ + ctlr->opio->config = 0; + coherence(); + return; + } +} + +static void +ehcireset(Ctlr *ctlr) +{ + Eopio *opio; + int i; + + ilock(ctlr); + dprint("ehci %#p reset\n", ctlr->capio); + opio = ctlr->opio; + + /* + * Turn off legacy mode. Some controllers won't + * interrupt us as expected otherwise. + */ + ehcirun(ctlr, 0); + pcicfgw16(ctlr->pcidev, 0xc0, 0x2000); + + /* + * reclaim from bios + */ + getehci(ctlr); + + /* clear high 32 bits of address signals if it's 64 bits capable. + * This is probably not needed but it does not hurt and others do it. + */ + if((ctlr->capio->capparms & C64) != 0){ + dprint("ehci: 64 bits\n"); + opio->seg = 0; + coherence(); + } + + if(ehcidebugcapio != ctlr->capio){ + opio->cmd |= Chcreset; /* controller reset */ + coherence(); + for(i = 0; i < 100; i++){ + if((opio->cmd & Chcreset) == 0) + break; + delay(1); + } + if(i == 100) + print("ehci %#p controller reset timed out\n", ctlr->capio); + } + + /* requesting more interrupts per µframe may miss interrupts */ + opio->cmd &= ~Citcmask; + opio->cmd |= 1 << Citcshift; /* max of 1 intr. per 125 µs */ + coherence(); + switch(opio->cmd & Cflsmask){ + case Cfls1024: + ctlr->nframes = 1024; + break; + case Cfls512: + ctlr->nframes = 512; + break; + case Cfls256: + ctlr->nframes = 256; + break; + default: + panic("ehci: unknown fls %ld", opio->cmd & Cflsmask); + } + dprint("ehci: %d frames\n", ctlr->nframes); + iunlock(ctlr); +} + +static void +setdebug(Hci*, int d) +{ + ehcidebug = d; +} + +static void +shutdown(Hci *hp) +{ + int i; + Ctlr *ctlr; + Eopio *opio; + + ctlr = hp->aux; + ilock(ctlr); + opio = ctlr->opio; + opio->cmd |= Chcreset; /* controller reset */ + coherence(); + for(i = 0; i < 100; i++){ + if((opio->cmd & Chcreset) == 0) + break; + delay(1); + } + if(i >= 100) + print("ehci %#p controller reset timed out\n", ctlr->capio); + delay(100); + ehcirun(ctlr, 0); + opio->frbase = 0; + iunlock(ctlr); +} + +static int +checkdev(Pcidev *p) +{ + char *conf, *s, dev[32]; + + conf = getconf("*badehci"); + if(conf == nil) + return 0; + snprint(dev, sizeof dev, "%.4ux/%.4ux", p->vid, p->did); + + s = strstr(conf, dev); + if(s != nil && (s[9] == 0 || s[9] == ' ')) + return -1; + return 0; +} + +static void +scanpci(void) +{ + int i; + uintmem io; + Ctlr *ctlr; + Pcidev *p; + Ecapio *capio; + static int already; + + if(already) + return; + already = 1; + i = 0; + for(p = nil; (p = pcimatch(p, 0, 0)) != nil; ) { + /* + * Find EHCI controllers (Programming Interface = 0x20). + */ + if(p->ccrb != 0xc || p->ccru != 3 || p->ccrp != 0x20) + continue; + if(i == Nhcis){ + print("ehci: bug: more than %d controllers\n", Nhcis); + continue; + } + if(checkdev(p) == -1){ + print("usbehci: ignore %.4ux/%.4ux\n", p->vid, p->did); + continue; + } + io = p->mem[0].bar & ~0x0f; + if(io == 0){ + print("usbehci: %x %x: failed to map registers\n", + p->vid, p->did); + continue; + } + if(p->intl == 0xff || p->intl == 0) { + print("usbehci: no irq assigned for port %#P\n", io); + continue; + } + dprint("usbehci: %#x %#x: port %#P size %#x irq %d\n", + p->vid, p->did, io, p->mem[0].size, p->intl); + capio = vmap(io, p->mem[0].size); + if(capio == nil){ + print("usbehci: can't vmap %#P\n", io); + continue; + } + + ctlr = malloc(sizeof(Ctlr)); + if (ctlr == nil) + panic("usbehci: out of memory"); + ctlr->pcidev = p; + ctlr->capio = capio; + ctlr->opio = (Eopio*)((uintptr)capio + (capio->cap & 0xff)); + pcisetbme(p); + pcisetpms(p, 0); + + /* + * currently, if we enable a second ehci controller on zt + * systems w x58m motherboard, we'll wedge solid after iunlock + * in init for the second one. + */ + if (i >= maxehci) { + print("usbehci: ignoring controllers after first %d, " + "at %#P\n", maxehci, io); + pciclrbme(p); + vunmap(capio, p->mem[0].size); + free(ctlr); + continue; + } + ctlrs[i++] = ctlr; + } +} + +static int +reset(Hci *hp) +{ + int i; + char *s; + Ctlr *ctlr; + Ecapio *capio; + Pcidev *p; + static Lock resetlck; + + s = getconf("*maxehci"); + if(s != nil){ + i = strtoul(s, &s, 0); + if(*s == 0) + maxehci = i; + } + if(maxehci == 0 || getconf("*nousbehci")) + return -1; + + ilock(&resetlck); + scanpci(); + + /* + * Any adapter matches if no hp->port is supplied, + * otherwise the ports must match. + */ + ctlr = nil; + for(i = 0; i < Nhcis && ctlrs[i] != nil; i++){ + ctlr = ctlrs[i]; + if(ctlr->active == 0) + if(hp->port == 0 || hp->port == (uintptr)ctlr->capio){ + ctlr->active = 1; + break; + } + } + iunlock(&resetlck); + if(i >= Nhcis || ctlrs[i] == nil) + return -1; + + p = ctlr->pcidev; + hp->aux = ctlr; + hp->port = (uintptr)ctlr->capio; + hp->irq = p->intl; + hp->tbdf = p->tbdf; + + capio = ctlr->capio; + hp->nports = capio->parms & Cnports; + + ddprint("echi: %s, ncc %ud npcc %ud\n", + capio->parms & 0x10000 ? "leds" : "no leds", + (capio->parms >> 12) & 0xf, (capio->parms >> 8) & 0xf); + ddprint("ehci: routing %s, %sport power ctl, %d ports\n", + capio->parms & 0x40 ? "explicit" : "automatic", + capio->parms & 0x10 ? "" : "no ", hp->nports); + + ehcireset(ctlr); + ehcimeminit(ctlr); + + /* + * Linkage to the generic HCI driver. + */ + ehcilinkage(hp); + hp->shutdown = shutdown; + hp->debug = setdebug; + return 0; +} + +void +usbehcilink(void) +{ + addhcitype("ehci", reset); +} diff -Nru 0/sys/src/nix/k10/usbohci.c 4/sys/src/nix/k10/usbohci.c --- 0/sys/src/nix/k10/usbohci.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/usbohci.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,2589 @@ +/* + * USB Open Host Controller Interface (Ohci) driver + * + * BUGS: + * - not really 64-bit safe (Tds aren't necessarly in low memory) + * - Missing isochronous input streams. + * - Too many delays and ilocks. + * - bandwidth admission control must be done per-frame. + * - Buffering could be handled like in uhci, to avoid + * needed block allocation and avoid allocs for small Tds. + * - must warn of power overruns. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" + +#include "../port/usb.h" + +typedef struct Ctlio Ctlio; +typedef struct Ctlr Ctlr; +typedef struct Ed Ed; +typedef struct Edpool Edpool; +typedef struct Epx Epx; +typedef struct Hcca Hcca; +typedef struct Isoio Isoio; +typedef struct Ohci Ohci; +typedef struct Qio Qio; +typedef struct Qtree Qtree; +typedef struct Td Td; +typedef struct Tdpool Tdpool; + +enum +{ + Incr = 64, /* for Td and Ed pools */ + + Align = 0x20, /* OHCI only requires 0x10 */ + /* use always a power of 2 */ + + Abortdelay = 1, /* delay after cancelling Tds (ms) */ + Tdatomic = 8, /* max nb. of Tds per bulk I/O op. */ + Enabledelay = 100, /* waiting for a port to enable */ + + + /* Queue states (software) */ + Qidle = 0, + Qinstall, + Qrun, + Qdone, + Qclose, + Qfree, + + /* Ed control bits */ + Edmpsmask = 0x7ff, /* max packet size */ + Edmpsshift = 16, + Edlow = 1 << 13, /* low speed */ + Edskip = 1 << 14, /* skip this ed */ + Ediso = 1 << 15, /* iso Tds used */ + Edtddir = 0, /* get dir from td */ + Edin = 2 << 11, /* direction in */ + Edout = 1 << 11, /* direction out */ + Eddirmask = 3 << 11, /* direction bits */ + Edhalt = 1, /* halted (in head ptr) */ + Edtoggle = 2, /* toggle (in head ptr) 1 == data1 */ + + /* Td control bits */ + Tdround = 1<<18, /* (rounding) short packets ok */ + Tdtoksetup = 0<<19, /* setup packet */ + Tdtokin = 2<<19, /* in packet */ + Tdtokout = 1<<19, /* out packet */ + Tdtokmask = 3<<19, /* in/out/setup bits */ + Tdnoioc = 7<<21, /* intr. cnt. value for no interrupt */ + Tdusetog = 1<<25, /* use toggle from Td (1) or Ed (0) */ + Tddata1 = 1<<24, /* data toggle (1 == data1) */ + Tddata0 = 0<<24, + Tdfcmask = 7, /* frame count (iso) */ + Tdfcshift = 24, + Tdsfmask = 0xFFFF, /* starting frame (iso) */ + Tderrmask = 3, /* error counter */ + Tderrshift = 26, + Tdccmask = 0xf, /* condition code (status) */ + Tdccshift = 28, + Tdiccmask = 0xf, /* condition code (iso, offsets) */ + Tdiccshift = 12, + + Ntdframes = 0x10000, /* # of different iso frame numbers */ + + /* Td errors (condition code) */ + Tdok = 0, + Tdcrc = 1, + Tdbitstuff = 2, + Tdbadtog = 3, + Tdstalled = 4, + Tdtmout = 5, + Tdpidchk = 6, + Tdbadpid = 7, + Tddataovr = 8, + Tddataund = 9, + Tdbufovr = 0xC, + Tdbufund = 0xD, + Tdnotacc = 0xE, + + /* control register */ + Cple = 0x04, /* periodic list enable */ + Cie = 0x08, /* iso. list enable */ + Ccle = 0x10, /* ctl list enable */ + Cble = 0x20, /* bulk list enable */ + Cfsmask = 3 << 6, /* functional state... */ + Cfsreset = 0 << 6, + Cfsresume = 1 << 6, + Cfsoper = 2 << 6, + Cfssuspend = 3 << 6, + + /* command status */ + Sblf = 1 << 2, /* bulk list (load) flag */ + Sclf = 1 << 1, /* control list (load) flag */ + Shcr = 1 << 0, /* host controller reset */ + + /* intr enable */ + Mie = 1 << 31, + Oc = 1 << 30, + Rhsc = 1 << 6, + Fno = 1 << 5, + Ue = 1 << 4, + Rd = 1 << 3, + Sf = 1 << 2, + Wdh = 1 << 1, + So = 1 << 0, + + Fmaxpktmask = 0x7fff, + Fmaxpktshift = 16, + HcRhDescA_POTPGT_MASK = 0xff << 24, + HcRhDescA_POTPGT_SHIFT = 24, + + /* Rh status */ + Lps = 1 << 0, + Cgp = 1 << 0, + Oci = 1 << 1, + Psm = 1 << 8, + Nps = 1 << 9, + Drwe = 1 << 15, + Srwe = 1 << 15, + Lpsc = 1 << 16, + Ccic = 1 << 17, + Crwe = 1 << 31, + + /* port status */ + Ccs = 0x00001, /* current connect status */ + Pes = 0x00002, /* port enable status */ + Pss = 0x00004, /* port suspend status */ + Poci = 0x00008, /* over current indicator */ + Prs = 0x00010, /* port reset status */ + Pps = 0x00100, /* port power status */ + Lsda = 0x00200, /* low speed device attached */ + Csc = 0x10000, /* connect status change */ + Pesc = 0x20000, /* enable status change */ + Pssc = 0x40000, /* suspend status change */ + Ocic = 0x80000, /* over current ind. change */ + Prsc = 0x100000, /* reset status change */ + + /* port status write bits */ + Cpe = 0x001, /* clear port enable */ + Spe = 0x002, /* set port enable */ + Spr = 0x010, /* set port reset */ + Spp = 0x100, /* set port power */ + Cpp = 0x200, /* clear port power */ + +}; + +/* + * Endpoint descriptor. (first 4 words used by hardware) + */ +struct Ed { + u32int ctrl; + u32int tail; /* transfer descriptor */ + u32int head; + u32int nexted; + + Ed* next; /* sw; in free list or next in list */ + Td* tds; /* in use by current xfer; all for iso */ + Ep* ep; /* debug/align */ + Ed* inext; /* debug/align (dump interrupt eds). */ +}; + +/* + * Endpoint I/O state (software), per direction. + */ +struct Qio +{ + QLock; /* for the entire I/O process */ + Rendez; /* wait for completion */ + Ed* ed; /* to place Tds on it */ + int sched; /* queue number (intr/iso) */ + int toggle; /* Tddata0/Tddata1 */ + ulong usbid; /* device/endpoint address */ + int tok; /* Tdsetup, Tdtokin, Tdtokout */ + long iotime; /* last I/O time; to hold interrupt polls */ + int debug; /* for the endpoint */ + char* err; /* error status */ + int state; /* Qidle -> Qinstall -> Qrun -> Qdone | Qclose */ + long bw; /* load (intr/iso) */ +}; + +struct Ctlio +{ + Qio; /* single Ed for all transfers */ + uchar* data; /* read from last ctl req. */ + int ndata; /* number of bytes read */ +}; + +struct Isoio +{ + Qio; + int nframes; /* number of frames for a full second */ + Td* atds; /* Tds avail for further I/O */ + int navail; /* number of avail Tds */ + u32int frno; /* next frame number avail for I/O */ + u32int left; /* remainder after rounding Hz to samples/ms */ + int nerrs; /* consecutive errors on iso I/O */ +}; + +/* + * Transfer descriptor. Size must be multiple of 32 + * First block is used by hardware (aligned to 32). + */ +struct Td +{ + u32int ctrl; + u32int cbp; /* current buffer pointer */ + u32int nexttd; + u32int be; + u16int offsets[8]; /* used by Iso Tds only */ + + Td* next; /* in free or Ed tds list */ + Td* anext; /* in avail td list (iso) */ + Ep* ep; /* using this Td for I/O */ + Qio* io; /* using this Td for I/O */ + Block* bp; /* data for this Td */ + ulong nbytes; /* bytes in this Td */ + u32int cbp0; /* initial value for cbp */ + int last; /* true for last Td in Qio */ +}; + +/* + * Host controller communication area (hardware) + */ +struct Hcca +{ + u32int intrtable[32]; + u16int framenumber; + u16int pad1; + u32int donehead; + uchar reserved[116]; +}; + +/* + * I/O registers + */ +struct Ohci +{ + /* control and status group */ + u32int revision; /*00*/ + u32int control; /*04*/ + u32int cmdsts; /*08*/ + u32int intrsts; /*0c*/ + u32int intrenable; /*10*/ + u32int intrdisable; /*14*/ + + /* memory pointer group */ + u32int hcca; /*18*/ + u32int periodcurred; /*1c*/ + u32int ctlheaded; /*20*/ + u32int ctlcurred; /*24*/ + u32int bulkheaded; /*28*/ + u32int bulkcurred; /*2c*/ + u32int donehead; /*30*/ + + /* frame counter group */ + u32int fminterval; /*34*/ + u32int fmremaining; /*38*/ + u32int fmnumber; /*3c*/ + u32int periodicstart; /*40*/ + u32int lsthreshold; /*44*/ + + /* root hub group */ + u32int rhdesca; /*48*/ + u32int rhdescb; /*4c*/ + u32int rhsts; /*50*/ + u32int rhportsts[15]; /*54*/ + u32int pad25[20]; /*90*/ + + /* unknown */ + u32int hostueaddr; /*e0*/ + u32int hostuests; /*e4*/ + u32int hosttimeoutctrl; /*e8*/ + u32int pad59; /*ec*/ + u32int pad60; /*f0*/ + u32int hostrevision; /*f4*/ + u32int pad62[2]; + /*100*/ +}; + +/* + * Endpoint tree (software) + */ +struct Qtree +{ + int nel; + int depth; + ulong* bw; + Ed** root; +}; + +struct Tdpool +{ + Lock; + Td* free; + int nalloc; + int ninuse; + int nfree; +}; + +struct Edpool +{ + Lock; + Ed* free; + int nalloc; + int ninuse; + int nfree; +}; + +struct Ctlr +{ + Lock; /* for ilock; lists and basic ctlr I/O */ + QLock resetl; /* lock controller during USB reset */ + int active; + Ctlr* next; + int nports; + + Ohci* ohci; /* base I/O address */ + Hcca* hcca; /* intr/done Td lists (used by hardware) */ + int overrun; /* sched. overrun */ + Ed* intrhd; /* list of intr. eds in tree */ + Qtree* tree; /* tree for t Ep i/o */ + int ntree; /* number of dummy Eds in tree */ + Pcidev* pcidev; +}; + +#define dqprint if(debug || io && io->debug)print +#define ddqprint if(debug>1 || (io && io->debug>1))print +#define diprint if(debug || iso && iso->debug)print +#define ddiprint if(debug>1 || (iso && iso->debug>1))print +#define TRUNC(x, sz) ((x) & ((sz)-1)) + +static int ohciinterrupts[Nttypes]; +static char* iosname[] = { "idle", "install", "run", "done", "close", "FREE" }; + +static int debug; +static Edpool edpool; +static Tdpool tdpool; +static Ctlr* ctlrs[Nhcis]; + +static char EnotWritten[] = "usb write unfinished"; +static char EnotRead[] = "usb read unfinished"; +static char Eunderrun[] = "usb endpoint underrun"; + +static QLock usbhstate; /* protects name space state */ + +static int schedendpt(Ctlr *ub, Ep *ep); +static void unschedendpt(Ctlr *ub, Ep *ep); +static long qtd(Ctlr*, Ep*, int, Block*, uchar*, uchar*, int, ulong); + +static char* errmsgs[] = +{ +[Tdcrc] "crc error", +[Tdbitstuff] "bit stuffing error", +[Tdbadtog] "bad toggle", +[Tdstalled] Estalled, +[Tdtmout] "timeout error", +[Tdpidchk] "pid check error", +[Tdbadpid] "bad pid", +[Tddataovr] "data overrun", +[Tddataund] "data underrun", +[Tdbufovr] "buffer overrun", +[Tdbufund] "buffer underrun", +[Tdnotacc] "not accessed" +}; + +static void* +pa2ptr(uintmem pa) +{ + if(pa == 0) + return nil; + else if(pa > 0xffffffff) + panic("usb: ohci: highmem pa %#P", pa); + return KADDR(pa); +} + +static uintmem +ptr2pa(void *p) +{ + uintmem pa; + + if(p == nil) + return 0; + pa = PADDR(p); + if(pa > 0xffffffff) + panic("usb: ohci: highmemptr %#p", p); + return pa; +} + +static void +waitSOF(Ctlr *ub) +{ + int frame = ub->hcca->framenumber & 0x3f; + + do { + delay(2); + } while(frame == (ub->hcca->framenumber & 0x3f)); +} + +static char* +errmsg(int err) +{ + + if(err < nelem(errmsgs)) + return errmsgs[err]; + return nil; +} + +static Ed* +ctlhd(Ctlr *ctlr) +{ + return pa2ptr(ctlr->ohci->ctlheaded); +} + +static Ed* +bulkhd(Ctlr *ctlr) +{ + return pa2ptr(ctlr->ohci->bulkheaded); +} + +static void +edlinked(Ed *ed, Ed *next) +{ + if(ed == nil) + print("edlinked: nil ed: pc %#p\n", getcallerpc(&ed)); + ed->nexted = ptr2pa(next); + ed->next = next; +} + +static void +setctlhd(Ctlr *ctlr, Ed *ed) +{ + ctlr->ohci->ctlheaded = ptr2pa(ed); + if(ed != nil) + ctlr->ohci->cmdsts |= Sclf; /* reload it on next pass */ +} + +static void +setbulkhd(Ctlr *ctlr, Ed *ed) +{ + ctlr->ohci->bulkheaded = ptr2pa(ed); + if(ed != nil) + ctlr->ohci->cmdsts |= Sblf; /* reload it on next pass */ +} + +static void +unlinkctl(Ctlr *ctlr, Ed *ed) +{ + Ed *this, *prev, *next; + + ctlr->ohci->control &= ~Ccle; + waitSOF(ctlr); + this = ctlhd(ctlr); + ctlr->ohci->ctlcurred = 0; + prev = nil; + while(this != nil && this != ed){ + prev = this; + this = this->next; + } + if(this == nil){ + print("unlinkctl: not found\n"); + return; + } + next = this->next; + if(prev == nil) + setctlhd(ctlr, next); + else + edlinked(prev, next); + ctlr->ohci->control |= Ccle; + edlinked(ed, nil); /* wipe out next field */ +} + +static void +unlinkbulk(Ctlr *ctlr, Ed *ed) +{ + Ed *this, *prev, *next; + + ctlr->ohci->control &= ~Cble; + waitSOF(ctlr); + this = bulkhd(ctlr); + ctlr->ohci->bulkcurred = 0; + prev = nil; + while(this != nil && this != ed){ + prev = this; + this = this->next; + } + if(this == nil){ + print("unlinkbulk: not found\n"); + return; + } + next = this->next; + if(prev == nil) + setbulkhd(ctlr, next); + else + edlinked(prev, next); + ctlr->ohci->control |= Cble; + edlinked(ed, nil); /* wipe out next field */ +} + +static void +edsetaddr(Ed *ed, ulong addr) +{ + ulong ctrl; + + ctrl = ed->ctrl & ~((Epmax<<7)|Devmax); + ctrl |= (addr & ((Epmax<<7)|Devmax)); + ed->ctrl = ctrl; +} + +static void +edsettog(Ed *ed, int c) +{ + if(c != 0) + ed->head |= Edtoggle; + else + ed->head &= ~Edtoggle; +} + +static int +edtoggle(Ed *ed) +{ + return ed->head & Edtoggle; +} + +static int +edhalted(Ed *ed) +{ + return ed->head & Edhalt; +} + +static int +edmaxpkt(Ed *ed) +{ + return (ed->ctrl >> Edmpsshift) & Edmpsmask; +} + +static void +edsetmaxpkt(Ed *ed, int m) +{ + ulong c; + + c = ed->ctrl & ~(Edmpsmask << Edmpsshift); + ed->ctrl = c | ((m&Edmpsmask) << Edmpsshift); +} + +static int +tderrs(Td *td) +{ + return (td->ctrl >> Tdccshift) & Tdccmask; +} + +static int +tdtok(Td *td) +{ + return td->ctrl & Tdtokmask; +} + +static void* +lomallocalign(usize sz, usize align) +{ + void *va; + + va = mallocalign(sz, align, 0, 0); + if(PADDR(va) > 0xffffffff) + panic("usb: ohci: lomallocalign: mallocalign gives high mem %#p\n", va); + return va; +} + +static Td* +tdalloc(void) +{ + Td *td; + Td *pool; + int i; + + lock(&tdpool); + if(tdpool.free == nil){ + ddprint("ohci: tdalloc %d Tds\n", Incr); + pool = lomallocalign(Incr*sizeof(Td), Align); + if(pool == nil) + panic("tdalloc"); + for(i=Incr; --i>=0;){ + pool[i].next = tdpool.free; + tdpool.free = &pool[i]; + } + tdpool.nalloc += Incr; + tdpool.nfree += Incr; + } + tdpool.ninuse++; + tdpool.nfree--; + td = tdpool.free; + tdpool.free = td->next; + memset(td, 0, sizeof(Td)); + unlock(&tdpool); + + assert(((uintptr)td & 0xF) == 0); + return td; +} + +static void +tdfree(Td *td) +{ + if(td == 0) + return; + freeb(td->bp); + td->bp = nil; + lock(&tdpool); + if(td->nexttd == 0x77777777) + panic("ohci: tdfree: double free"); + memset(td, 7, sizeof(Td)); /* poison */ + td->next = tdpool.free; + tdpool.free = td; + tdpool.ninuse--; + tdpool.nfree++; + unlock(&tdpool); +} + +static Ed* +edalloc(void) +{ + Ed *ed, *pool; + int i; + + lock(&edpool); + if(edpool.free == nil){ + ddprint("ohci: edalloc %d Eds\n", Incr); + pool = lomallocalign(Incr*sizeof(Ed), Align); + if(pool == nil) + panic("edalloc"); + for(i=Incr; --i>=0;){ + pool[i].next = edpool.free; + edpool.free = &pool[i]; + } + edpool.nalloc += Incr; + edpool.nfree += Incr; + } + edpool.ninuse++; + edpool.nfree--; + ed = edpool.free; + edpool.free = ed->next; + memset(ed, 0, sizeof(Ed)); + unlock(&edpool); + + return ed; +} + +static void +edfree(Ed *ed) +{ + Td *td, *next; + int i; + + if(ed == 0) + return; + i = 0; + for(td = ed->tds; td != nil; td = next){ + next = td->next; + tdfree(td); + if(i++ > 2000){ + print("ohci: bug: ed with more than 2000 tds\n"); + break; + } + } + lock(&edpool); + if(ed->nexted == 0x99999999) + panic("ohci: edfree: double free"); + memset(ed, 9, sizeof(Ed)); /* poison */ + ed->next = edpool.free; + edpool.free = ed; + edpool.ninuse--; + edpool.nfree++; + unlock(&edpool); + ddprint("edfree: ed %#p\n", ed); +} + +/* + * return smallest power of 2 >= n + */ +static int +flog2(int n) +{ + int i; + + for(i = 0; (1 << i) < n; i++) + ; + return i; +} + +/* + * return smallest power of 2 <= n + */ +static int +flog2lower(int n) +{ + int i; + + for(i = 0; (1 << (i + 1)) <= n; i++) + ; + return i; +} + +static int +pickschedq(Qtree *qt, int pollival, ulong bw, ulong limit) +{ + int i, j, d, upperb, q; + ulong best, worst, total; + + d = flog2lower(pollival); + if(d > qt->depth) + d = qt->depth; + q = -1; + worst = 0; + best = ~0; + upperb = (1 << (d+1)) - 1; + for(i = (1 << d) - 1; i < upperb; i++){ + total = qt->bw[0]; + for(j = i; j > 0; j = (j - 1) / 2) + total += qt->bw[j]; + if(total < best){ + best = total; + q = i; + } + if(total > worst) + worst = total; + } + if(worst + bw >= limit) + return -1; + return q; +} + +static int +schedq(Ctlr *ctlr, Qio *io, int pollival) +{ + int q; + Ed *ted; + + q = pickschedq(ctlr->tree, pollival, io->bw, ~0); + ddqprint("ohci: sched %#p q %d, ival %d, bw %ld\n", io, q, pollival, io->bw); + if(q < 0){ + print("ohci: no room for ed\n"); + return -1; + } + ctlr->tree->bw[q] += io->bw; + ted = ctlr->tree->root[q]; + io->sched = q; + edlinked(io->ed, ted->next); + edlinked(ted, io->ed); + io->ed->inext = ctlr->intrhd; + ctlr->intrhd = io->ed; + return 0; +} + +static void +unschedq(Ctlr *ctlr, Qio *qio) +{ + int q; + Ed *prev, *this, *next; + Ed **l; + + q = qio->sched; + if(q < 0) + return; + ctlr->tree->bw[q] -= qio->bw; + + prev = ctlr->tree->root[q]; + this = prev->next; + while(this != nil && this != qio->ed){ + prev = this; + this = this->next; + } + if(this == nil) + print("ohci: unschedq %d: not found\n", q); + else{ + next = this->next; + edlinked(prev, next); + } + waitSOF(ctlr); + for(l = &ctlr->intrhd; *l != nil; l = &(*l)->inext) + if(*l == qio->ed){ + *l = (*l)->inext; + return; + } + print("ohci: unschedq: ed %#p not found\n", qio->ed); +} + +static char* +seprinttdtok(char *s, char *e, int tok) +{ + switch(tok){ + case Tdtoksetup: + s = seprint(s, e, " setup"); + break; + case Tdtokin: + s = seprint(s, e, " in"); + break; + case Tdtokout: + s = seprint(s, e, " out"); + break; + } + return s; +} + + +static char* +seprinttd(char *s, char *e, Td *td, int iso) +{ + int i; + Block *bp; + + if(td == nil) + return seprint(s, e, "\n"); + s = seprint(s, e, "%#p ep %#p ctrl %#.8ux", td, td->ep, td->ctrl); + s = seprint(s, e, " cc=%#ux", (td->ctrl >> Tdccshift) & Tdccmask); + if(iso == 0){ + if((td->ctrl & Tdround) != 0) + s = seprint(s, e, " rnd"); + s = seprinttdtok(s, e, td->ctrl & Tdtokmask); + if((td->ctrl & Tdusetog) != 0) + s = seprint(s, e, " d%d", (td->ctrl & Tddata1) ? 1 : 0); + else + s = seprint(s, e, " d-"); + s = seprint(s, e, " ec=%ud", (td->ctrl >> Tderrshift) & Tderrmask); + }else{ + s = seprint(s, e, " fc=%ud", (td->ctrl >> Tdfcshift) & Tdfcmask); + s = seprint(s, e, " sf=%ud", td->ctrl & Tdsfmask); + } + s = seprint(s, e, " cbp0 %#.8ux cbp %#.8ux next %#.8ux be %#.8ux %s", + td->cbp0, td->cbp, td->nexttd, td->be, td->last ? "last" : ""); + s = seprint(s, e, "\n\t\t%ld bytes", td->nbytes); + if((bp = td->bp) != nil){ + s = seprint(s, e, " rp %#p wp %#p ", bp->rp, bp->wp); + if(BLEN(bp) > 0) + s = seprintdata(s, e, bp->rp, bp->wp - bp->rp); + } + if(iso == 0) + return seprint(s, e, "\n"); + s = seprint(s, e, "\n\t\t"); + /* we use only offsets[0] */ + i = 0; + s = seprint(s, e, "[%d] %#ux cc=%#ux sz=%ud\n", i, td->offsets[i], + (td->offsets[i] >> Tdiccshift) & Tdiccmask, + td->offsets[i] & 0x7FF); + return s; +} + +static void +dumptd(Td *td, char *p, int iso) +{ + static char buf[512]; /* Too much */ + char *s; + + s = seprint(buf, buf+sizeof(buf), "%s: ", p); + s = seprinttd(s, buf+sizeof(buf), td, iso); + if(s > buf && s[-1] != '\n') + s[-1] = '\n'; + print("\t%s", buf); +} + +static void +dumptds(Td *td, char *p, int iso) +{ + int i; + + for(i = 0; td != nil; td = td->next){ + dumptd(td, p, iso); + if(td->last) + break; + if(tdtok(td) == Tdtokin && ++i > 2){ + print("\t\t...\n"); + break; + } + } +} + +static void +dumped(Ed *ed) +{ + char *buf, *s, *e; + + if(ed == nil){ + print("\n"); + return; + } + buf = malloc(512); + /* no waserror; may want to use from interrupt context */ + if(buf == nil) + return; + e = buf+512; + s = seprint(buf, e, "\ted %#p: ctrl %#.8ux", ed, ed->ctrl); + if((ed->ctrl & Edskip) != 0) + s = seprint(s, e, " skip"); + if((ed->ctrl & Ediso) != 0) + s = seprint(s, e, " iso"); + if((ed->ctrl & Edlow) != 0) + s = seprint(s, e, " low"); + s = seprint(s, e, " d%d", (ed->head & Edtoggle) ? 1 : 0); + if((ed->ctrl & Eddirmask) == Edin) + s = seprint(s, e, " in"); + if((ed->ctrl & Eddirmask) == Edout) + s = seprint(s, e, " out"); + if(edhalted(ed)) + s = seprint(s, e, " hlt"); + s = seprint(s, e, " ep%ud.%ud", (ed->ctrl>>7)&Epmax, ed->ctrl&0x7f); + s = seprint(s, e, " maxpkt %ud", (ed->ctrl>>Edmpsshift)&Edmpsmask); + seprint(s, e, " tail %#.8ux head %#.8ux next %#.8ux\n", ed->tail, ed->head, ed->nexted); + print("%s", buf); + free(buf); + if(ed->tds != nil && (ed->ctrl & Ediso) == 0) + dumptds(ed->tds, "td", 0); +} + +static char* +seprintio(char *s, char *e, Qio *io, char *pref) +{ + s = seprint(s, e, "%s qio %#p ed %#p", pref, io, io->ed); + s = seprint(s, e, " tog %d iot %ld err %s id %#ulx", + io->toggle, io->iotime, io->err, io->usbid); + s = seprinttdtok(s, e, io->tok); + s = seprint(s, e, " %s\n", iosname[io->state]); + return s; +} + +static char* +seprintep(char* s, char* e, Ep *ep) +{ + Isoio *iso; + Qio *io; + Ctlio *cio; + + if(ep == nil) + return seprint(s, e, "\n"); + if(ep->aux == nil) + return seprint(s, e, "no mdep\n"); + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + s = seprintio(s, e, cio, "c"); + s = seprint(s, e, "\trepl %d ndata %d\n", ep->rhrepl, cio->ndata); + break; + case Tbulk: + case Tintr: + io = ep->aux; + if(ep->mode != OWRITE) + s = seprintio(s, e, &io[OREAD], "r"); + if(ep->mode != OREAD) + s = seprintio(s, e, &io[OWRITE], "w"); + break; + case Tiso: + iso = ep->aux; + s = seprintio(s, e, iso, "w"); + s = seprint(s, e, "\tntds %d avail %d frno %ud left %ud next avail %#p\n", + iso->nframes, iso->navail, iso->frno, iso->left, iso->atds); + break; + } + return s; +} + +static char* +seprintctl(char *s, char *se, ulong ctl) +{ + s = seprint(s, se, "en="); + if((ctl&Cple) != 0) + s = seprint(s, se, "p"); + if((ctl&Cie) != 0) + s = seprint(s, se, "i"); + if((ctl&Ccle) != 0) + s = seprint(s, se, "c"); + if((ctl&Cble) != 0) + s = seprint(s, se, "b"); + switch(ctl & Cfsmask){ + case Cfsreset: + return seprint(s, se, " reset"); + case Cfsresume: + return seprint(s, se, " resume"); + case Cfsoper: + return seprint(s, se, " run"); + case Cfssuspend: + return seprint(s, se, " suspend"); + default: + return seprint(s, se, " ???"); + } +} + +static void +dump(Hci *hp) +{ + Ctlr *ctlr; + Ed *ed; + char cs[20]; + + ctlr = hp->aux; + ilock(ctlr); + seprintctl(cs, cs+sizeof(cs), ctlr->ohci->control); + print("ohci ctlr %#p: frno %#ux ctl %#ux %s sts %#ux intr %#ux\n", + ctlr, ctlr->hcca->framenumber, ctlr->ohci->control, cs, + ctlr->ohci->cmdsts, ctlr->ohci->intrsts); + print("ctlhd %#ux cur %#ux bulkhd %#ux cur %#ux done %#ux\n", + ctlr->ohci->ctlheaded, ctlr->ohci->ctlcurred, + ctlr->ohci->bulkheaded, ctlr->ohci->bulkcurred, + ctlr->ohci->donehead); + if(ctlhd(ctlr) != nil) + print("[ctl]\n"); + for(ed = ctlhd(ctlr); ed != nil; ed = ed->next) + dumped(ed); + if(bulkhd(ctlr) != nil) + print("[bulk]\n"); + for(ed = bulkhd(ctlr); ed != nil; ed = ed->next) + dumped(ed); + if(ctlr->intrhd != nil) + print("[intr]\n"); + for(ed = ctlr->intrhd; ed != nil; ed = ed->inext) + dumped(ed); + if(ctlr->tree->root[0]->next != nil) + print("[iso]"); + for(ed = ctlr->tree->root[0]->next; ed != nil; ed = ed->next) + dumped(ed); + print("%d eds in tree\n", ctlr->ntree); + iunlock(ctlr); + lock(&tdpool); + print("%d tds allocated = %d in use + %d free\n", + tdpool.nalloc, tdpool.ninuse, tdpool.nfree); + unlock(&tdpool); + lock(&edpool); + print("%d eds allocated = %d in use + %d free\n", + edpool.nalloc, edpool.ninuse, edpool.nfree); + unlock(&edpool); +} + +/* + * Compute size for the next iso Td and setup its + * descriptor for I/O according to the buffer size. + */ +static void +isodtdinit(Ep *ep, Isoio *iso, Td *td) +{ + Block *bp; + long size; + int i; + + bp = td->bp; + assert(bp != nil && BLEN(bp) == 0); + size = (ep->hz+iso->left) * ep->pollival / 1000; + iso->left = (ep->hz+iso->left) * ep->pollival % 1000; + size *= ep->samplesz; + if(size > ep->maxpkt){ + print("ohci: ep%d.%d: size > maxpkt\n", + ep->dev->nb, ep->nb); + print("size = %uld max = %ld\n", size, ep->maxpkt); + size = ep->maxpkt; + } + td->nbytes = size; + memset(bp->wp, 0, size); /* in case we don't fill it on time */ + td->cbp0 = td->cbp = ptr2pa(bp->rp) & ~0xFFF; + td->ctrl = TRUNC(iso->frno, Ntdframes); + td->offsets[0] = (ptr2pa(bp->rp) & 0xFFF); + td->offsets[0] |= (Tdnotacc << Tdiccshift); + /* in case the controller checks out the offests... */ + for(i = 1; i < nelem(td->offsets); i++) + td->offsets[i] = td->offsets[0]; + td->be = ptr2pa(bp->rp + size - 1); + td->ctrl |= (0 << Tdfcshift); /* frame count is 1 */ + + iso->frno = TRUNC(iso->frno + ep->pollival, Ntdframes); +} + +/* + * start I/O on the dummy td and setup a new dummy to fill up. + */ +static void +isoadvance(Ep *ep, Isoio *iso, Td *td) +{ + Td *dtd; + + dtd = iso->atds; + iso->atds = dtd->anext; + iso->navail--; + dtd->anext = nil; + dtd->bp->wp = dtd->bp->rp; + dtd->nexttd = 0; + td->nexttd = ptr2pa(dtd); + isodtdinit(ep, iso, dtd); + iso->ed->tail = ptr2pa(dtd); +} + +static int +isocanwrite(void *a) +{ + Isoio *iso; + + iso = a; + return iso->state == Qclose || iso->err != nil || + iso->navail > iso->nframes / 2; +} + +/* + * Service a completed/failed Td from the done queue. + * It may be of any transfer type. + * The queue is not in completion order. + * (It's actually in reverse completion order). + * + * When an error, a short packet, or a last Td is found + * we awake the process waiting for the transfer. + * Although later we will process other Tds completed + * before, epio won't be able to touch the current Td + * until interrupt returns and releases the lock on the + * controller. + */ +static void +qhinterrupt(Ctlr *, Ep *ep, Qio *io, Td *td, int) +{ + Block *bp; + int mode, err; + Ed *ed; + + ed = io->ed; + if(io->state != Qrun) + return; + if(tdtok(td) == Tdtokin) + mode = OREAD; + else + mode = OWRITE; + bp = td->bp; + err = tderrs(td); + + switch(err){ + case Tddataovr: /* Overrun is not an error */ + break; + case Tdok: + /* virtualbox doesn't always report underflow on short packets */ + if(td->cbp == 0) + break; + /* fall through */ + case Tddataund: + /* short input packets are ok */ + if(mode == OREAD){ + if(td->cbp == 0) + panic("ohci: short packet but cbp == 0"); + /* + * td->cbp and td->cbp0 are the real addresses + * corresponding to virtual addresses bp->wp and + * bp->rp respectively. + */ + bp->wp = bp->rp + (td->cbp - td->cbp0); + if(bp->wp < bp->rp) + panic("ohci: wp < rp"); + /* + * It's ok. clear error and flag as last in xfer. + * epio must ignore following Tds. + */ + td->last = 1; + td->ctrl &= ~(Tdccmask << Tdccshift); + break; + } + /* else fall; it's an error */ + case Tdcrc: + case Tdbitstuff: + case Tdbadtog: + case Tdstalled: + case Tdtmout: + case Tdpidchk: + case Tdbadpid: + bp->wp = bp->rp; /* no bytes in xfer. */ + io->err = errmsg(err); + if(debug || ep->debug){ + print("tdinterrupt: failed err %d (%s)\n", err, io->err); + dumptd(td, "failed", ed->ctrl & Ediso); + } + td->last = 1; + break; + default: + panic("ohci: td cc %ud unknown", err); + } + + if(td->last != 0){ + /* + * clear td list and halt flag. + */ + ed->head = (ed->head & Edtoggle) | ed->tail; + ed->tds = pa2ptr(ed->tail); + io->state = Qdone; + wakeup(io); + } +} + +/* + * BUG: Iso input streams are not implemented. + */ +static void +isointerrupt(Ctlr *ctlr, Ep *ep, Qio *io, Td *td, int) +{ + Isoio *iso; + Block *bp; + Ed *ed; + int err, isoerr; + + iso = ep->aux; + ed = io->ed; + if(io->state == Qclose) + return; + bp = td->bp; + /* + * When we get more than half the frames consecutive errors + * we signal an actual error. Errors in the entire Td are + * more serious and are always singaled. + * Errors like overrun are not really errors. In fact, for + * output, errors cannot be really detected. The driver will + * hopefully notice I/O errors on input endpoints and detach the device. + */ + err = tderrs(td); + isoerr = (td->offsets[0] >> Tdiccshift) & Tdiccmask; + if(isoerr == Tdok || isoerr == Tdnotacc) + iso->nerrs = 0; + else if(iso->nerrs++ > iso->nframes/2) + err = Tdstalled; + if(err != Tdok && err != Tddataovr){ + bp->wp = bp->rp; + io->err = errmsg(err); + if(debug || ep->debug){ + print("ohci: isointerrupt: ep%d.%d: err %d (%s) frnum %#ux\n", + ep->dev->nb, ep->nb, + err, errmsg(err), ctlr->ohci->fmnumber); + dumptd(td, "failed", ed->ctrl & Ediso); + } + } + td->bp->wp = td->bp->rp; + td->nbytes = 0; + td->anext = iso->atds; + iso->atds = td; + iso->navail++; + /* + * If almost all Tds are avail the user is not doing I/O at the + * required rate. We put another Td in place to keep the polling rate. + */ + if(iso->err == nil && iso->navail > iso->nframes - 10) + isoadvance(ep, iso, pa2ptr(iso->ed->tail)); + /* + * If there's enough buffering futher I/O can be done. + */ + if(isocanwrite(iso)) + wakeup(iso); +} + +static void +interrupt(Ureg *, void *arg) +{ + Td *td, *ntd, *td0; + Hci *hp; + Ctlr *ctlr; + u32int status, curred; + int i, frno; + + hp = arg; + ctlr = hp->aux; + ilock(ctlr); + status = ctlr->ohci->intrsts; + status &= ctlr->ohci->intrenable; + status &= Oc|Rhsc|Fno|Ue|Rd|Sf|Wdh|So; + frno = TRUNC(ctlr->ohci->fmnumber, Ntdframes); + if((status & Wdh) != 0){ + /* lsb of donehead has bit to flag other intrs. */ + td = pa2ptr(ctlr->hcca->donehead & ~0xF); + }else + td = nil; + td0 = td; + + for(i = 0; td != nil && i < 1024; i++){ + if(0)ddprint("ohci tdinterrupt: td %#p\n", td); + ntd = pa2ptr(td->nexttd & ~0xF); + td->nexttd = 0; + if(td->ep == nil || td->io == nil) + panic("ohci: interrupt: ep %#p io %#p", td->ep, td->io); + ohciinterrupts[td->ep->ttype]++; + if(td->ep->ttype == Tiso) + isointerrupt(ctlr, td->ep, td->io, td, frno); + else + qhinterrupt(ctlr, td->ep, td->io, td, frno); + td = ntd; + } + if(i == 1024) + print("ohci: bug: more than 1024 done Tds?\n"); + + if(pa2ptr(ctlr->hcca->donehead & ~0xF) != td0) + print("ohci: bug: donehead changed before ack\n"); + ctlr->hcca->donehead = 0; + + ctlr->ohci->intrsts = status; + status &= ~Wdh; + status &= ~Sf; + if(status & So){ + print("ohci: sched overrun: too much load\n"); + ctlr->overrun++; + status &= ~So; + } + if((status & Ue) != 0){ + curred = ctlr->ohci->periodcurred; + print("ohci: unrecoverable error frame %#.8ux ed %#.8ux, " + "ints %d %d %d %d\n", + ctlr->ohci->fmnumber, curred, + ohciinterrupts[Tctl], ohciinterrupts[Tintr], + ohciinterrupts[Tbulk], ohciinterrupts[Tiso]); + if(curred != 0) + dumped(pa2ptr(curred)); + status &= ~Ue; + } + if(status != 0) + print("ohci interrupt: unhandled sts %#.8ux\n", status); + iunlock(ctlr); +} + +/* + * The old dummy Td is used to implement the new Td. + * A new dummy is linked at the end of the old one and + * returned, to link further Tds if needed. + */ +static Td* +epgettd(Ep *ep, Qio *io, Td **dtdp, int flags, void *a, int count) +{ + Td *td, *dtd; + Block *bp; + + if(count <= PGSZ) + bp = allocb(count); + else{ + if(count > 2*PGSZ) + panic("ohci: transfer > two pages"); + /* maximum of one physical page crossing allowed */ + bp = allocb(count+PGSZ); + bp->rp = (uchar*)ROUNDUP((uintptr)bp->rp, PGSZ); + bp->wp = bp->rp; + } + dtd = *dtdp; + td = dtd; + td->bp = bp; + if(count > 0){ + td->cbp0 = td->cbp = ptr2pa(bp->wp); + td->be = ptr2pa(bp->wp + count - 1); + if(a != nil){ + /* validaddr((uintptr)a, count, 0); DEBUG */ + assert(bp != nil); + assert(bp->wp != nil); + memmove(bp->wp, a, count); + } + bp->wp += count; + } + td->nbytes = count; + td->ctrl = io->tok|Tdusetog|io->toggle|flags; + if(io->toggle == Tddata0) + io->toggle = Tddata1; + else + io->toggle = Tddata0; + assert(td->ep == ep); + td->io = io; + dtd = tdalloc(); /* new dummy */ + dtd->ep = ep; + td->nexttd = ptr2pa(dtd); + td->next = dtd; + *dtdp = dtd; + return td; +} + +/* + * Try to get them idle + */ +static void +aborttds(Qio *io) +{ + Ed *ed; + Td *td; + + ed = io->ed; + if(ed == nil) + return; + ed->ctrl |= Edskip; + for(td = ed->tds; td != nil; td = td->next) + if(td->bp != nil) + td->bp->wp = td->bp->rp; + ed->head = (ed->head&0xF) | ed->tail; + if((ed->ctrl & Ediso) == 0) + ed->tds = pa2ptr(ed->tail); +} + +static int +epiodone(void *a) +{ + Qio *io; + + io = a; + return io->state != Qrun; +} + +static void +epiowait(Ctlr *ctlr, Qio *io, int tmout, ulong) +{ + Ed *ed; + int timedout; + + ed = io->ed; + if(0)ddqprint("ohci io %#p sleep on ed %#p state %s\n", + io, ed, iosname[io->state]); + timedout = 0; + if(waserror()){ + dqprint("ohci io %#p ed %#p timed out\n", io, ed); + timedout++; + }else{ + if(tmout == 0) + sleep(io, epiodone, io); + else + tsleep(io, epiodone, io, tmout); + poperror(); + } + ilock(ctlr); + if(io->state == Qrun) + timedout = 1; + else if(io->state != Qdone && io->state != Qclose) + panic("epio: ed not done and not closed"); + if(timedout){ + aborttds(io); + io->err = "request timed out"; + iunlock(ctlr); + if(!waserror()){ + tsleep(&up->sleep, return0, 0, Abortdelay); + poperror(); + } + ilock(ctlr); + } + if(io->state != Qclose) + io->state = Qidle; + iunlock(ctlr); +} + +/* + * Non iso I/O. + * To make it work for control transfers, the caller may + * lock the Qio for the entire control transfer. + */ +static long +epio(Ep *ep, Qio *io, void *a, long count, int mustlock) +{ + Ed *ed; + Ctlr *ctlr; + char buf[80]; + char *err; + uchar *c; + Td *td, *ltd, *ntd, *td0; + int last, ntds, tmout; + long tot, n; + ulong load; + + ed = io->ed; + ctlr = ep->hp->aux; + io->debug = ep->debug; + tmout = ep->tmout; + ddeprint("ohci: %s ep%d.%d io %#p count %ld\n", + io->tok == Tdtokin ? "in" : "out", + ep->dev->nb, ep->nb, io, count); + if((debug > 1 || ep->debug > 1) && io->tok != Tdtokin){ + seprintdata(buf, buf+sizeof(buf), a, count); + print("\t%s\n", buf); + } + if(mustlock){ + qlock(io); + if(waserror()){ + qunlock(io); + nexterror(); + } + } + io->err = nil; + ilock(ctlr); + if(io->state == Qclose){ /* Tds released by cancelio */ + iunlock(ctlr); + error(io->err ? io->err : Eio); + } + if(io->state != Qidle) + panic("epio: qio not idle"); + io->state = Qinstall; + + c = a; + ltd = td0 = ed->tds; + load = tot = 0; + do{ + n = 2*PGSZ; + if(count-tot < n) + n = count-tot; + if(c != nil && io->tok != Tdtokin) + td = epgettd(ep, io, <d, 0, c+tot, n); + else + td = epgettd(ep, io, <d, 0, nil, n); + tot += n; + load += ep->load; + }while(tot < count); + if(td0 == nil || ltd == nil || td0 == ltd) + panic("epio: no td"); + td->last = 1; + if(debug > 2 || ep->debug > 2) + dumptds(td0, "put td", ep->ttype == Tiso); + iunlock(ctlr); + + ilock(ctlr); + if(io->state != Qclose){ + io->iotime = TK2MS(sys->ticks); + io->state = Qrun; + ed->tail = ptr2pa(ltd); + if(ep->ttype == Tctl) + ctlr->ohci->cmdsts |= Sclf; + else if(ep->ttype == Tbulk) + ctlr->ohci->cmdsts |= Sblf; + } + iunlock(ctlr); + + epiowait(ctlr, io, tmout, load); + ilock(ctlr); + if(debug > 1 || ep->debug > 1) + dumptds(td0, "got td", 0); + iunlock(ctlr); + + tot = 0; + c = a; + ntds = last = 0; + for(td = td0; td != ltd; td = ntd){ + ntds++; + /* + * If the Td is flagged as last we must + * ignore any following Td. The block may + * seem to have bytes but interrupt has not seen + * those Tds through the done queue, and they are void. + */ + if(last == 0 && tderrs(td) == Tdok){ + n = BLEN(td->bp); + tot += n; + if(c != nil && tdtok(td) == Tdtokin && n > 0){ + memmove(c, td->bp->rp, n); + c += n; + } + } + last |= td->last; + ntd = td->next; + tdfree(td); + } + if(edtoggle(ed) == 0) + io->toggle = Tddata0; + else + io->toggle = Tddata1; + + err = io->err; + if(mustlock){ + qunlock(io); + poperror(); + } + ddeprint("ohci: io %#p: %d tds: return %ld err '%s'\n\n", + io, ntds, tot, err); + if(err != nil) + error(err); + if(tot < 0) + error(Eio); + return tot; +} + +/* + * halt condition was cleared on the endpoint. update our toggles. + */ +static void +clrhalt(Ep *ep) +{ + Qio *io; + + ep->clrhalt = 0; + switch(ep->ttype){ + case Tbulk: + case Tintr: + io = ep->aux; + if(ep->mode != OREAD){ + qlock(&io[OWRITE]); + io[OWRITE].toggle = Tddata0; + deprint("ep clrhalt for io %#p\n", io+OWRITE); + qunlock(&io[OWRITE]); + } + if(ep->mode != OWRITE){ + qlock(&io[OREAD]); + io[OREAD].toggle = Tddata0; + deprint("ep clrhalt for io %#p\n", io+OREAD); + qunlock(&io[OREAD]); + } + break; + } +} + +static long +epread(Ep *ep, void *a, long count) +{ + Ctlio *cio; + Qio *io; + char buf[80]; + ulong delta; + + if(ep->aux == nil) + panic("epread: not open"); + + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + qlock(cio); + if(waserror()){ + qunlock(cio); + nexterror(); + } + ddeprint("epread ctl ndata %d\n", cio->ndata); + if(cio->ndata < 0) + error("request expected"); + else if(cio->ndata == 0){ + cio->ndata = -1; + count = 0; + }else{ + if(count > cio->ndata) + count = cio->ndata; + if(count > 0) + memmove(a, cio->data, count); + /* BUG for big transfers */ + free(cio->data); + cio->data = nil; + cio->ndata = 0; /* signal EOF next time */ + } + qunlock(cio); + poperror(); + if(debug>1 || ep->debug){ + seprintdata(buf, buf+sizeof(buf), a, count); + print("epread: %s\n", buf); + } + return count; + case Tbulk: + io = ep->aux; + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OREAD], a, count, 1); + case Tintr: + io = ep->aux; + delta = TK2MS(sys->ticks) - io[OREAD].iotime + 1; + if(delta < ep->pollival / 2) + tsleep(&up->sleep, return0, 0, ep->pollival/2 - delta); + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OREAD], a, count, 1); + case Tiso: + panic("ohci: iso read not implemented"); + break; + default: + panic("epread: bad ep ttype %d", ep->ttype); + } + return -1; +} + +/* + * Control transfers are one setup write (data0) + * plus zero or more reads/writes (data1, data0, ...) + * plus a final write/read with data1 to ack. + * For both host to device and device to host we perform + * the entire transfer when the user writes the request, + * and keep any data read from the device for a later read. + * We call epio three times instead of placing all Tds at + * the same time because doing so leads to crc/tmout errors + * for some devices. + * Upon errors on the data phase we must still run the status + * phase or the device may cease responding in the future. + */ +static long +epctlio(Ep *ep, Ctlio *cio, void *a, long count) +{ + uchar *c; + long len; + + ddeprint("epctlio: cio %#p ep%d.%d count %ld\n", + cio, ep->dev->nb, ep->nb, count); + if(count < Rsetuplen) + error("short usb command"); + qlock(cio); + free(cio->data); + cio->data = nil; + cio->ndata = 0; + if(waserror()){ + qunlock(cio); + free(cio->data); + cio->data = nil; + cio->ndata = 0; + nexterror(); + } + + /* set the address if unset and out of configuration state */ + if(ep->dev->state != Dconfig && ep->dev->state != Dreset) + if(cio->usbid == 0){ + cio->usbid = (ep->nb<<7)|(ep->dev->nb & Devmax); + edsetaddr(cio->ed, cio->usbid); + } + /* adjust maxpkt if the user has learned a different one */ + if(edmaxpkt(cio->ed) != ep->maxpkt) + edsetmaxpkt(cio->ed, ep->maxpkt); + c = a; + cio->tok = Tdtoksetup; + cio->toggle = Tddata0; + if(epio(ep, cio, a, Rsetuplen, 0) < Rsetuplen) + error(Eio); + + a = c + Rsetuplen; + count -= Rsetuplen; + + cio->toggle = Tddata1; + if(c[Rtype] & Rd2h){ + cio->tok = Tdtokin; + len = GET2(c+Rcount); + if(len <= 0) + error("bad length in d2h request"); + if(len > Maxctllen) + error("d2h data too large to fit in ohci"); + a = cio->data = smalloc(len+1); + }else{ + cio->tok = Tdtokout; + len = count; + } + if(len > 0) + if(waserror()) + len = -1; + else{ + len = epio(ep, cio, a, len, 0); + poperror(); + } + if(c[Rtype] & Rd2h){ + count = Rsetuplen; + cio->ndata = len; + cio->tok = Tdtokout; + }else{ + if(len < 0) + count = -1; + else + count = Rsetuplen + len; + cio->tok = Tdtokin; + } + cio->toggle = Tddata1; + epio(ep, cio, nil, 0, 0); + qunlock(cio); + poperror(); + ddeprint("epctlio cio %#p return %ld\n", cio, count); + return count; +} + +/* + * Put new samples in the dummy Td. + * BUG: This does only a transfer per Td. We could do up to 8. + */ +static long +putsamples(Ctlr *ctlr, Ep *ep, Isoio *iso, uchar *b, long count) +{ + Td *td; + ulong n; + + td = pa2ptr(iso->ed->tail); + n = count; + if(n > td->nbytes - BLEN(td->bp)) + n = td->nbytes - BLEN(td->bp); + assert(td->bp->wp + n <= td->bp->lim); + memmove(td->bp->wp, b, n); + td->bp->wp += n; + if(BLEN(td->bp) == td->nbytes){ /* full Td: activate it */ + ilock(ctlr); + isoadvance(ep, iso, td); + iunlock(ctlr); + } + return n; +} + +static long +episowrite(Ep *ep, void *a, long count) +{ + long tot, nw; + char *err; + uchar *b; + Ctlr *ctlr; + Isoio *iso; + + ctlr = ep->hp->aux; + iso = ep->aux; + iso->debug = ep->debug; + + qlock(iso); + if(waserror()){ + qunlock(iso); + nexterror(); + } + diprint("ohci: episowrite: %#p ep%d.%d\n", iso, ep->dev->nb, ep->nb); + ilock(ctlr); + if(iso->state == Qclose){ + iunlock(ctlr); + error(iso->err ? iso->err : Eio); + } + iso->state = Qrun; + b = a; + for(tot = 0; tot < count; tot += nw){ + while(isocanwrite(iso) == 0){ + iunlock(ctlr); + diprint("ohci: episowrite: %#p sleep\n", iso); + if(waserror()){ + if(iso->err == nil) + iso->err = "I/O timed out"; + ilock(ctlr); + break; + } + tsleep(iso, isocanwrite, iso, ep->tmout); + poperror(); + ilock(ctlr); + } + err = iso->err; + iso->err = nil; + if(iso->state == Qclose || err != nil){ + iunlock(ctlr); + error(err ? err : Eio); + } + if(iso->state != Qrun) + panic("episowrite: iso not running"); + iunlock(ctlr); /* We could page fault here */ + nw = putsamples(ctlr, ep, iso, b+tot, count-tot); + ilock(ctlr); + } + if(iso->state != Qclose) + iso->state = Qdone; + iunlock(ctlr); + err = iso->err; /* in case it failed early */ + iso->err = nil; + qunlock(iso); + poperror(); + if(err != nil) + error(err); + diprint("ohci: episowrite: %#p %ld bytes\n", iso, tot); + return tot; +} + +static long +epwrite(Ep *ep, void *a, long count) +{ + Qio *io; + Ctlio *cio; + ulong delta; + uchar *b; + long tot, nw; + + if(ep->aux == nil) + panic("ohci: epwrite: not open"); + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + return epctlio(ep, cio, a, count); + case Tbulk: + io = ep->aux; + if(ep->clrhalt) + clrhalt(ep); + /* + * Put at most Tdatomic Tds (512 bytes) at a time. + * Otherwise some devices produce babble errors. + */ + b = a; + assert(a != nil); + for(tot = 0; tot < count ; tot += nw){ + nw = count - tot; + if(nw > Tdatomic * ep->maxpkt) + nw = Tdatomic * ep->maxpkt; + nw = epio(ep, &io[OWRITE], b+tot, nw, 1); + } + return tot; + case Tintr: + io = ep->aux; + delta = TK2MS(sys->ticks) - io[OWRITE].iotime + 1; + if(delta < ep->pollival) + tsleep(&up->sleep, return0, 0, ep->pollival - delta); + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OWRITE], a, count, 1); + case Tiso: + return episowrite(ep, a, count); + default: + panic("ohci: epwrite: bad ep ttype %d", ep->ttype); + } + return -1; +} + +static Ed* +newed(Ctlr *ctlr, Ep *ep, Qio *io, char *) +{ + Ed *ed; + Td *td; + + ed = io->ed = edalloc(); /* no errors raised here, really */ + td = tdalloc(); + td->ep = ep; + td->io = io; + ed->tail = ptr2pa(td); + ed->head = ptr2pa(td); + ed->tds = td; + ed->ep = ep; + ed->ctrl = (ep->maxpkt & Edmpsmask) << Edmpsshift; + if(ep->ttype == Tiso) + ed->ctrl |= Ediso; + if(waserror()){ + edfree(ed); + io->ed = nil; + nexterror(); + } + /* For setup endpoints we start with the config address */ + if(ep->ttype != Tctl) + edsetaddr(io->ed, io->usbid); + if(ep->dev->speed == Lowspeed) + ed->ctrl |= Edlow; + switch(io->tok){ + case Tdtokin: + ed->ctrl |= Edin; + break; + case Tdtokout: + ed->ctrl |= Edout; + break; + default: + ed->ctrl |= Edtddir; /* Td will say */ + break; + } + + switch(ep->ttype){ + case Tctl: + ilock(ctlr); + edlinked(ed, ctlhd(ctlr)); + setctlhd(ctlr, ed); + iunlock(ctlr); + break; + case Tbulk: + ilock(ctlr); + edlinked(ed, bulkhd(ctlr)); + setbulkhd(ctlr, ed); + iunlock(ctlr); + break; + case Tintr: + case Tiso: + ilock(ctlr); + schedq(ctlr, io, ep->pollival); + iunlock(ctlr); + break; + default: + panic("ohci: newed: bad ttype"); + } + poperror(); + return ed; +} + +static void +isoopen(Ctlr *ctlr, Ep *ep) +{ + Td *td, *edtds; + Isoio *iso; + int i; + + iso = ep->aux; + iso->usbid = (ep->nb<<7)|(ep->dev->nb & Devmax); + iso->bw = ep->hz * ep->samplesz; /* bytes/sec */ + if(ep->mode != OWRITE){ + print("ohci: bug: iso input streams not implemented\n"); + error("ohci iso input streams not implemented"); + }else + iso->tok = Tdtokout; + + iso->left = 0; + iso->nerrs = 0; + iso->frno = TRUNC(ctlr->ohci->fmnumber + 10, Ntdframes); + iso->nframes = 1000 / ep->pollival; + if(iso->nframes < 10){ + print("ohci: isoopen: less than 10 frames; using 10.\n"); + iso->nframes = 10; + } + iso->navail = iso->nframes; + iso->atds = edtds = nil; + for(i = 0; i < iso->nframes-1; i++){ /* -1 for dummy */ + td = tdalloc(); + td->ep = ep; + td->io = iso; + td->bp = allocb(ep->maxpkt); + td->anext = iso->atds; /* link as avail */ + iso->atds = td; + td->next = edtds; + edtds = td; + } + newed(ctlr, ep, iso, "iso"); /* allocates a dummy td */ + iso->ed->tds->bp = allocb(ep->maxpkt); /* but not its block */ + iso->ed->tds->next = edtds; + isodtdinit(ep, iso, iso->ed->tds); +} + +/* + * Allocate the endpoint and set it up for I/O + * in the controller. This must follow what's said + * in Ep regarding configuration, including perhaps + * the saved toggles (saved on a previous close of + * the endpoint data file by epclose). + */ +static void +epopen(Ep *ep) +{ + Ctlr *ctlr; + Qio *io; + Ctlio *cio; + u32int usbid; + + ctlr = ep->hp->aux; + deprint("ohci: epopen ep%d.%d\n", ep->dev->nb, ep->nb); + if(ep->aux != nil) + panic("ohci: epopen called with open ep"); + if(waserror()){ + free(ep->aux); + ep->aux = nil; + nexterror(); + } + switch(ep->ttype){ + case Tnone: + error("endpoint not configured"); + case Tiso: + ep->aux = smalloc(sizeof(Isoio)); + isoopen(ctlr, ep); + break; + case Tctl: + cio = ep->aux = smalloc(sizeof(Ctlio)); + cio->debug = ep->debug; + cio->ndata = -1; + cio->data = nil; + cio->tok = -1; /* invalid; Tds will say */ + if(ep->dev->isroot != 0 && ep->nb == 0) /* root hub */ + break; + newed(ctlr, ep, cio, "epc"); + break; + case Tbulk: + ep->pollival = 1; /* assume this; doesn't really matter */ + /* and fall... */ + case Tintr: + io = ep->aux = smalloc(sizeof(Qio)*2); + io[OREAD].debug = io[OWRITE].debug = ep->debug; + usbid = (ep->nb<<7)|(ep->dev->nb & Devmax); + if(ep->mode != OREAD){ + if(ep->toggle[OWRITE] != 0) + io[OWRITE].toggle = Tddata1; + else + io[OWRITE].toggle = Tddata0; + io[OWRITE].tok = Tdtokout; + io[OWRITE].usbid = usbid; + io[OWRITE].bw = ep->maxpkt*1000/ep->pollival; /* bytes/s */ + newed(ctlr, ep, io+OWRITE, "epw"); + } + if(ep->mode != OWRITE){ + if(ep->toggle[OREAD] != 0) + io[OREAD].toggle = Tddata1; + else + io[OREAD].toggle = Tddata0; + io[OREAD].tok = Tdtokin; + io[OREAD].usbid = usbid; + io[OREAD].bw = ep->maxpkt*1000/ep->pollival; /* bytes/s */ + newed(ctlr, ep, io+OREAD, "epr"); + } + break; + } + deprint("ohci: epopen done:\n"); + if(debug || ep->debug) + dump(ep->hp); + poperror(); +} + +static void +cancelio(Ep *ep, Qio *io) +{ + Ed *ed; + Ctlr *ctlr; + + ctlr = ep->hp->aux; + + ilock(ctlr); + if(io == nil || io->state == Qclose){ + assert(io == nil || io->ed == nil); + iunlock(ctlr); + return; + } + ed = io->ed; + io->state = Qclose; + io->err = Eio; + aborttds(io); + iunlock(ctlr); + if(!waserror()){ + tsleep(&up->sleep, return0, 0, Abortdelay); + poperror(); + } + + wakeup(io); + qlock(io); + /* wait for epio if running */ + qunlock(io); + + ilock(ctlr); + switch(ep->ttype){ + case Tctl: + unlinkctl(ctlr, ed); + break; + case Tbulk: + unlinkbulk(ctlr, ed); + break; + case Tintr: + case Tiso: + unschedq(ctlr, io); + break; + default: + panic("ohci cancelio: bad ttype"); + } + iunlock(ctlr); + edfree(io->ed); + io->ed = nil; +} + +static void +epclose(Ep *ep) +{ + Ctlio *cio; + Isoio *iso; + Qio *io; + + deprint("ohci: epclose ep%d.%d\n", ep->dev->nb, ep->nb); + if(ep->aux == nil) + panic("ohci: epclose called with closed ep"); + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + cancelio(ep, cio); + free(cio->data); + cio->data = nil; + break; + case Tbulk: + case Tintr: + io = ep->aux; + if(ep->mode != OWRITE){ + cancelio(ep, &io[OREAD]); + if(io[OREAD].toggle == Tddata1) + ep->toggle[OREAD] = 1; + } + if(ep->mode != OREAD){ + cancelio(ep, &io[OWRITE]); + if(io[OWRITE].toggle == Tddata1) + ep->toggle[OWRITE] = 1; + } + break; + case Tiso: + iso = ep->aux; + cancelio(ep, iso); + break; + default: + panic("epclose: bad ttype %d", ep->ttype); + } + + deprint("ohci: epclose ep%d.%d: done\n", ep->dev->nb, ep->nb); + free(ep->aux); + ep->aux = nil; +} + +static int +portreset(Hci *hp, int port, int on) +{ + Ctlr *ctlr; + Ohci *ohci; + + if(on == 0) + return 0; + + ctlr = hp->aux; + qlock(&ctlr->resetl); + if(waserror()){ + qunlock(&ctlr->resetl); + nexterror(); + } + ilock(ctlr); + ohci = ctlr->ohci; + ohci->rhportsts[port - 1] = Spp; + if((ohci->rhportsts[port - 1] & Ccs) == 0){ + iunlock(ctlr); + error("port not connected"); + } + ohci->rhportsts[port - 1] = Spr; + while((ohci->rhportsts[port - 1] & Prsc) == 0){ + iunlock(ctlr); + dprint("ohci: portreset, wait for reset complete\n"); + ilock(ctlr); + } + ohci->rhportsts[port - 1] = Prsc; + iunlock(ctlr); + poperror(); + qunlock(&ctlr->resetl); + return 0; +} + +static int +portenable(Hci *hp, int port, int on) +{ + Ctlr *ctlr; + + ctlr = hp->aux; + dprint("ohci: %#p port %d enable=%d\n", ctlr->ohci, port, on); + qlock(&ctlr->resetl); + if(waserror()){ + qunlock(&ctlr->resetl); + nexterror(); + } + ilock(ctlr); + if(on) + ctlr->ohci->rhportsts[port - 1] = Spe | Spp; + else + ctlr->ohci->rhportsts[port - 1] = Cpe; + iunlock(ctlr); + tsleep(&up->sleep, return0, 0, Enabledelay); + poperror(); + qunlock(&ctlr->resetl); + return 0; +} + +static int +portstatus(Hci *hp, int port) +{ + int v; + Ctlr *ub; + u32int ohcistatus; + + /* + * We must return status bits as a + * get port status hub request would do. + */ + ub = hp->aux; + ohcistatus = ub->ohci->rhportsts[port - 1]; + v = 0; + if(ohcistatus & Ccs) + v |= HPpresent; + if(ohcistatus & Pes) + v |= HPenable; + if(ohcistatus & Pss) + v |= HPsuspend; + if(ohcistatus & Prs) + v |= HPreset; + else { + /* port is not in reset; these potential writes are ok */ + if(ohcistatus & Csc){ + v |= HPstatuschg; + ub->ohci->rhportsts[port - 1] = Csc; + } + if(ohcistatus & Pesc){ + v |= HPchange; + ub->ohci->rhportsts[port - 1] = Pesc; + } + } + if(ohcistatus & Lsda) + v |= HPslow; + if(v & (HPstatuschg|HPchange)) + ddprint("ohci port %d sts %#ux hub sts %#x\n", port, ohcistatus, v); + return v; +} + +static void +dumpohci(Ctlr *ctlr) +{ + int i; + u32int *ohci; + + ohci = &ctlr->ohci->revision; + print("ohci registers: \n"); + for(i = 0; i < sizeof(Ohci)/sizeof *ohci; i++) + if(i < 3 || ohci[i] != 0) + print("\t[%#2.2x]\t%#8.8ux\n", i * 4, ohci[i]); + print("\n"); +} + +static void +init(Hci *hp) +{ + Ctlr *ctlr; + Ohci *ohci; + int i; + u32int ival, ctrl, fmi; + + ctlr = hp->aux; + dprint("ohci %#p init\n", ctlr->ohci); + ohci = ctlr->ohci; + + fmi = ctlr->ohci->fminterval; + ctlr->ohci->cmdsts = Shcr; /* reset the block */ + while(ctlr->ohci->cmdsts & Shcr) + delay(1); /* wait till reset complete, Ohci says 10us max. */ + ctlr->ohci->fminterval = fmi; + + /* + * now that soft reset is done we are in suspend state. + * Setup registers which take in suspend state + * (will only be here for 2ms). + */ + + ctlr->ohci->hcca = ptr2pa(ctlr->hcca); + setctlhd(ctlr, nil); + ctlr->ohci->ctlcurred = 0; + setbulkhd(ctlr, nil); + ctlr->ohci->bulkcurred = 0; + + ohci->intrenable = Mie | Wdh | Ue; + ohci->control |= Ccle | Cble | Cple | Cie | Cfsoper; + + /* set frame after operational */ + ohci->rhdesca = Nps; /* no power switching */ + if(ohci->rhdesca & Nps){ + dprint("ohci: ports are not power switched\n"); + }else{ + dprint("ohci: ports are power switched\n"); + ohci->rhdesca &= ~Psm; + ohci->rhsts &= ~Lpsc; + } + for(i = 0; i < ctlr->nports; i++) /* paranoia */ + ohci->rhportsts[i] = 0; /* this has no effect */ + delay(50); + + for(i = 0; i < ctlr->nports; i++){ + ohci->rhportsts[i] = Spp; + if((ohci->rhportsts[i] & Ccs) != 0) + ohci->rhportsts[i] |= Spr; + } + delay(100); + + ctrl = ohci->control; + if((ctrl & Cfsmask) != Cfsoper){ + ctrl = (ctrl & ~Cfsmask) | Cfsoper; + ohci->control = ctrl; + ohci->rhsts = Lpsc; + } + ival = ohci->fminterval & ~(Fmaxpktmask << Fmaxpktshift); + ohci->fminterval = ival | (5120 << Fmaxpktshift); + + if(debug > 1) + dumpohci(ctlr); +} + +static void +scanpci(void) +{ + uintmem pa; + void *va; + Ctlr *ctlr; + Pcidev *p; + int i; + static int already = 0; + + if(already) + return; + already = 1; + for(p = nil; p = pcimatch(p, 0, 0); ) { + /* + * Find Ohci controllers (Programming Interface = 0x10). + */ + if(p->ccrb != 0xc || p->ccru != 3 || p->ccrp != 0x10) + continue; + pa = p->mem[0].bar & ~0x0F; + if(p->intl == 0xFF || p->intl == 0) { + print("usb: ohci: no irq assigned for port %#P\n", pa); + continue; + } + dprint("ohci: %.4ux/%.4ux port %#P size %#ux irq %d\n", + p->vid, p->did, pa, p->mem[0].size, p->intl); + va = vmap(pa, p->mem[0].size); + if(va == nil){ + print("ohci: failed to map registers\n"); + continue; + } + + ctlr = smalloc(sizeof(Ctlr)); + ctlr->pcidev = p; + ctlr->ohci = va; + dprint("scanpci: ctlr %#p, ohci %#p\n", ctlr, ctlr->ohci); + pcisetbme(p); + pcisetpms(p, 0); + for(i = 0; i < Nhcis; i++) + if(ctlrs[i] == nil){ + ctlrs[i] = ctlr; + break; + } + if(i == Nhcis) + print("ohci: bug: no more controllers\n"); + } +} + +static void +usbdebug(Hci*, int d) +{ + debug = d; +} + +/* + * build the periodic scheduling tree: + * framesize must be a multiple of the tree size + */ +static void +mkqhtree(Ctlr *ctlr) +{ + int i, n, d, o, leaf0, depth; + Ed **tree; + Qtree *qt; + + depth = flog2(32); + n = (1 << (depth+1)) - 1; + qt = mallocz(sizeof(*qt), 1); + if(qt == nil) + panic("usb: can't allocate scheduling tree"); + qt->nel = n; + qt->depth = depth; + qt->bw = mallocz(n * sizeof(qt->bw), 1); + qt->root = tree = mallocz(n * sizeof(Ed *), 1); + if(qt->bw == nil || qt->root == nil) + panic("usb: can't allocate scheduling tree"); + for(i = 0; i < n; i++){ + if((tree[i] = edalloc()) == nil) + panic("mkqhtree"); + tree[i]->ctrl = (8 << Edmpsshift); /* not needed */ + tree[i]->ctrl |= Edskip; + + if(i > 0) + edlinked(tree[i], tree[(i-1)/2]); + else + edlinked(tree[i], nil); + } + ctlr->ntree = i; + dprint("ohci: tree: %d endpoints allocated\n", i); + + /* distribute leaves evenly round the frame list */ + leaf0 = n / 2; + for(i = 0; i < 32; i++){ + o = 0; + for(d = 0; d < depth; d++){ + o <<= 1; + if(i & (1 << d)) + o |= 1; + } + if(leaf0 + o >= n){ + print("leaf0=%d o=%d i=%d n=%d\n", leaf0, o, i, n); + break; + } + ctlr->hcca->intrtable[i] = ptr2pa(tree[leaf0 + o]); + } + ctlr->tree = qt; +} + +static void +ohcimeminit(Ctlr *ctlr) +{ + Hcca *hcca; + + edfree(edalloc()); /* allocate pools now */ + tdfree(tdalloc()); + + hcca = lomallocalign(sizeof(Hcca), 256); + if(hcca == nil) + panic("usbhreset: no memory for Hcca"); + memset(hcca, 0, sizeof(*hcca)); + ctlr->hcca = hcca; + + mkqhtree(ctlr); +} + +static void +ohcireset(Ctlr *ctlr) +{ + ilock(ctlr); + dprint("ohci %#p reset\n", ctlr->ohci); + + /* + * usually enter here in reset, wait till its through, + * then do our own so we are on known timing conditions. + * Is this needed? + */ + delay(100); + ctlr->ohci->control = 0; + delay(100); + + /* legacy support register: turn off lunacy mode */ + pcicfgw16(ctlr->pcidev, 0xc0, 0x2000); + + iunlock(ctlr); +} + +static void +shutdown(Hci *hp) +{ + Ctlr *ctlr; + + ctlr = hp->aux; + + ilock(ctlr); + ctlr->ohci->intrdisable = Mie | Wdh | Ue; + ctlr->ohci->control = 0; + delay(100); + iunlock(ctlr); +} + +static int +reset(Hci *hp) +{ + int i; + Ctlr *ctlr; + Pcidev *p; + static Lock resetlck; + + if(getconf("*nousbohci")) + return -1; + ilock(&resetlck); + scanpci(); + + /* + * Any adapter matches if no hp->port is supplied, + * otherwise the ports must match. + */ + ctlr = nil; + for(i = 0; i < Nhcis && ctlrs[i] != nil; i++){ + ctlr = ctlrs[i]; + if(ctlr->active == 0) + if(hp->port == 0 || hp->port == (uintptr)ctlr->ohci){ + ctlr->active = 1; + break; + } + } + iunlock(&resetlck); + if(ctlrs[i] == nil || i == Nhcis) + return -1; + if(ctlr->ohci->control == ~0) + return -1; + + + p = ctlr->pcidev; + hp->aux = ctlr; + hp->port = (uintptr)ctlr->ohci; + hp->irq = p->intl; + hp->tbdf = p->tbdf; + ctlr->nports = hp->nports = ctlr->ohci->rhdesca & 0xff; + + ohcireset(ctlr); + ohcimeminit(ctlr); + + /* + * Linkage to the generic HCI driver. + */ + hp->init = init; + hp->dump = dump; + hp->interrupt = interrupt; + hp->epopen = epopen; + hp->epclose = epclose; + hp->epread = epread; + hp->epwrite = epwrite; + hp->seprintep = seprintep; + hp->portenable = portenable; + hp->portreset = portreset; + hp->portstatus = portstatus; + hp->shutdown = shutdown; + hp->debug = usbdebug; + hp->type = "ohci"; + return 0; +} + +void +usbohcilink(void) +{ + addhcitype("ohci", reset); +} diff -Nru 0/sys/src/nix/k10/usbuhci.c 4/sys/src/nix/k10/usbuhci.c --- 0/sys/src/nix/k10/usbuhci.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/usbuhci.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,2319 @@ +/* + * USB Universal Host Controller Interface (sic) driver. + * + * BUGS: + * - Too many delays and ilocks. + * - bandwidth admission control must be done per-frame. + * - interrupt endpoints should go on a tree like [oe]hci. + * - must warn of power overruns. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/usb.h" + +typedef struct Ctlio Ctlio; +typedef struct Ctlr Ctlr; +typedef struct Isoio Isoio; +typedef struct Qh Qh; +typedef struct Qhpool Qhpool; +typedef struct Qio Qio; +typedef struct Td Td; +typedef struct Tdpool Tdpool; + +enum +{ + Resetdelay = 100, /* delay after a controller reset (ms) */ + Enabledelay = 100, /* waiting for a port to enable */ + Abortdelay = 5, /* delay after cancelling Tds (ms) */ + Incr = 64, /* for Td and Qh pools */ + + Tdatomic = 8, /* max nb. of Tds per bulk I/O op. */ + + /* Queue states (software) */ + Qidle = 0, + Qinstall, + Qrun, + Qdone, + Qclose, + Qfree, + + /* + * HW constants + */ + + Nframes = 1024, /* 2ⁿ for xspanalloc; max 1024 */ + Align = 16, /* for data structures */ + + /* Size of small buffer kept within Tds. (software) */ + /* Keep as a multiple of Align to maintain alignment of Tds in pool */ + Tdndata = 1*Align, + + /* i/o space + * Some ports are short, some are long, some are byte. + * We use ins[bsl] and not vmap. + */ + Cmd = 0, + Crun = 0x01, + Chcreset = 0x02, /* host controller reset */ + Cgreset = 0x04, /* global reset */ + Cegsm = 0x08, /* enter global suspend */ + Cfgr = 0x10, /* forge global resume */ + Cdbg = 0x20, /* single step, debug */ + Cmaxp = 0x80, /* max packet */ + + Status = 2, + Susbintr = 0x01, /* interrupt */ + Seintr = 0x02, /* error interrupt */ + Sresume = 0x04, /* resume detect */ + Shserr = 0x08, /* host system error */ + Shcerr = 0x10, /* host controller error */ + Shalted = 0x20, /* controller halted */ + Sall = 0x3F, + + Usbintr = 4, + Itmout = 0x01, /* timeout or crc */ + Iresume = 0x02, /* resume interrupt enable */ + Ioc = 0x04, /* interrupt on complete */ + Ishort = 0x08, /* short packet interrupt */ + Iall = 0x0F, + Frnum = 6, + Flbaseadd = 8, + SOFmod = 0xC, /* start of frame modifier register */ + + Portsc0 = 0x10, + PSpresent = 0x0001, /* device present */ + PSstatuschg = 0x0002, /* PSpresent changed */ + PSenable = 0x0004, /* device enabled */ + PSchange = 0x0008, /* PSenable changed */ + PSresume = 0x0040, /* resume detected */ + PSreserved1 = 0x0080, /* always read as 1; reserved */ + PSslow = 0x0100, /* device has low speed */ + PSreset = 0x0200, /* port reset */ + PSsuspend = 0x1000, /* port suspended */ + + /* Transfer descriptor link */ + Tdterm = 0x1, /* nil (terminate) */ + Tdlinkqh = 0x2, /* link refers to a QH */ + Tdvf = 0x4, /* run linked Tds first (depth-first)*/ + + /* Transfer status bits */ + Tdbitstuff = 0x00020000, /* bit stuffing error */ + Tdcrcto = 0x00040000, /* crc or timeout error */ + Tdnak = 0x00080000, /* nak packet received */ + Tdbabble = 0x00100000, /* babble detected */ + Tddberr = 0x00200000, /* data buf. error */ + Tdstalled = 0x00400000, /* serious error to ep. */ + Tdactive = 0x00800000, /* enabled/in use by hw */ + /* Transfer control bits */ + Tdioc = 0x01000000, /* interrupt on complete */ + Tdiso = 0x02000000, /* isochronous select */ + Tdlow = 0x04000000, /* low speed device */ + Tderr1 = 0x08000000, /* bit 0 of error counter */ + Tderr2 = 0x10000000, /* bit 1 of error counter */ + Tdspd = 0x20000000, /* short packet detect */ + + Tdlen = 0x000003FF, /* actual length field */ + + Tdfatalerr = Tdnak|Tdbabble|Tdstalled, /* hw retries others */ + Tderrors = Tdfatalerr|Tdbitstuff|Tdcrcto|Tddberr, + + /* Transfer descriptor token bits */ + Tddata0 = 0, + Tddata1 = 0x80000, /* data toggle (1==DATA1) */ + Tdtokin = 0x69, + Tdtokout = 0xE1, + Tdtoksetup = 0x2D, + + Tdmaxpkt = 0x800, /* max packet size */ + + /* Queue head bits */ + QHterm = 1<<0, /* nil (terminate) */ + QHlinkqh = 1<<1, /* link refers to a QH */ + QHvf = 1<<2, /* vertical first (depth first) */ +}; + +struct Ctlr +{ + Lock; /* for ilock. qh lists and basic ctlr I/O */ + QLock portlck; /* for port resets/enable... */ + Pcidev* pcidev; + int active; + int port; /* I/O address */ + Qh* qhs; /* list of Qhs for this controller */ + Qh* qh[Tmax]; /* Dummy Qhs to insert Qhs after */ + Isoio* iso; /* list of active iso I/O */ + u32int* frames; /* frame list (used by hw) */ + ulong load; /* max load for a single frame */ + ulong isoload; /* max iso load for a single frame */ + int nintr; /* number of interrupts attended */ + int ntdintr; /* number of intrs. with something to do */ + int nqhintr; /* number of intrs. for Qhs */ + int nisointr; /* number of intrs. for iso transfers */ +}; + +struct Qio +{ + QLock; /* for the entire I/O process */ + Rendez; /* wait for completion */ + Qh* qh; /* Td list (field const after init) */ + int usbid; /* usb address for endpoint/device */ + int toggle; /* Tddata0/Tddata1 */ + int tok; /* Tdtoksetup, Tdtokin, Tdtokout */ + ulong iotime; /* time of last I/O */ + int debug; /* debug flag from the endpoint */ + char* err; /* error string */ +}; + +struct Ctlio +{ + Qio; /* a single Qio for each RPC */ + uchar* data; /* read from last ctl req. */ + int ndata; /* number of bytes read */ +}; + +struct Isoio +{ + QLock; + Rendez; /* wait for space/completion/errors */ + int usbid; /* address used for device/endpoint */ + int tok; /* Tdtokin or Tdtokout */ + int state; /* Qrun -> Qdone -> Qrun... -> Qclose */ + int nframes; /* Nframes/ep->pollival */ + uchar* data; /* iso data buffers if not embedded */ + int td0frno; /* frame number for first Td */ + Td* tdu; /* next td for user I/O in tdps */ + Td* tdi; /* next td processed by interrupt */ + char* err; /* error string */ + int nerrs; /* nb of consecutive I/O errors */ + long nleft; /* number of bytes left from last write */ + int debug; /* debug flag from the endpoint */ + Isoio* next; /* in list of active Isoios */ + Td* tdps[Nframes]; /* pointer to Td used for i-th frame or nil */ +}; + +struct Tdpool +{ + Lock; + Td* free; + int nalloc; + int ninuse; + int nfree; +}; + +struct Qhpool +{ + Lock; + Qh* free; + int nalloc; + int ninuse; + int nfree; +}; + +/* + * HW data structures + */ + +/* + * Queue header (known by hw). + * 16-byte aligned. first two words used by hw. + * They are taken from the pool upon endpoint opening and + * queued after the dummy queue header for the endpoint type + * in the controller. Actual I/O happens as Tds are linked into it. + * The driver does I/O in lock-step. + * The user builds a list of Tds and links it into the Qh, + * then the Qh goes from Qidle to Qrun and nobody touches it until + * it becomes Qdone at interrupt time. + * At that point the user collects the Tds and it goes Qidle. + * A premature cancel may set the state to Qclose and abort I/O. + * The Ctlr lock protects change of state for Qhs in use. + */ +struct Qh +{ + u32int link; /* link to next horiz. item (eg. Qh) */ + u32int elink; /* link to element (eg. Td; updated by hw) */ + + u32int state; /* Qidle -> Qinstall -> Qrun -> Qdone | Qclose */ + Qio* io; /* for this queue */ + + Qh* next; /* in active or free list */ + Td* tds; /* Td list in this Qh (initially, elink) */ + char* tag; /* debug and align, mostly */ + ulong align; +}; + +/* + * Transfer descriptor. + * 16-byte aligned. first two words used by hw. Next 4 by sw. + * We keep an embedded buffer for small I/O transfers. + * They are taken from the pool when buffers are needed for I/O + * and linked at the Qh/Isoio for the endpoint and direction requiring it. + * The block keeps actual data. They are protected from races by + * the queue or the pool keeping it. The owner of the link to the Td + * is free to use it and can be the only one using it. + */ +struct Td +{ + u32int link; /* Link to next Td or Qh */ + u32int csw; /* control and status word (updated by hw) */ + u32int token; /* endpt, device, pid */ + u32int buffer; /* buffer pointer */ + + Td* next; /* in qh or Isoio or free list */ + u32int ndata; /* bytes available/used at data */ + uchar* data; /* pointer to actual data */ + void* buff; /* allocated data, for large transfers */ + + uchar sbuff[Tdndata]; /* embedded buffer, for small transfers */ +}; + +#define INB(x) inb(ctlr->port+(x)) +#define INS(x) ins(ctlr->port+(x)) +#define INL(x) ((u32int)inl(ctlr->port+(x))) /* BOTCH: fix inl to return u32int */ +#define OUTB(x, v) outb(ctlr->port+(x), (v)) +#define OUTS(x, v) outs(ctlr->port+(x), (v)) +#define OUTL(x, v) outl(ctlr->port+(x), (v)) +#define TRUNC(x, sz) ((x) & ((sz)-1)) +#define PTR(q) ((void*)KADDR((ulong)(q) & ~ (0xF|PCIWINDOW))) +#define QPTR(q) ((Qh*)PTR(q)) +#define TPTR(q) ((Td*)PTR(q)) +#define PORT(p) (Portsc0 + 2*(p)) +#define diprint if(debug || iso->debug)print +#define ddiprint if(debug>1 || iso->debug>1)print +#define dqprint if(debug || (qh->io && qh->io->debug))print +#define ddqprint if(debug>1 || (qh->io && qh->io->debug>1))print + +static Ctlr* ctlrs[Nhcis]; + +static Tdpool tdpool; +static Qhpool qhpool; +static int debug; + +static char* qhsname[] = { "idle", "install", "run", "done", "close", "FREE" }; + +static void +uhcicmd(Ctlr *ctlr, int c) +{ + OUTS(Cmd, c); +} + +static void +uhcirun(Ctlr *ctlr, int on) +{ + int i; + + ddprint("uhci %#ux setting run to %d\n", ctlr->port, on); + + if(on) + uhcicmd(ctlr, INS(Cmd)|Crun); + else + uhcicmd(ctlr, INS(Cmd) & ~Crun); + for(i = 0; i < 100; i++) + if(on == 0 && (INS(Status) & Shalted) != 0) + break; + else if(on != 0 && (INS(Status) & Shalted) == 0) + break; + else + delay(1); + if(i == 100) + dprint("uhci %#x run cmd timed out\n", ctlr->port); + ddprint("uhci %#ux cmd %#ux sts %#ux\n", + ctlr->port, INS(Cmd), INS(Status)); +} + +static int +tdlen(Td *td) +{ + return (td->csw+1) & Tdlen; +} + +static int +maxtdlen(Td *td) +{ + return ((td->token>>21)+1) & (Tdmaxpkt-1); +} + +static int +tdtok(Td *td) +{ + return td->token & 0xFF; +} + +static char* +seprinttd(char *s, char *se, Td *td) +{ + s = seprint(s, se, "%#p link %#ux", td, td->link); + if((td->link & Tdvf) != 0) + s = seprint(s, se, "V"); + if((td->link & Tdterm) != 0) + s = seprint(s, se, "T"); + if((td->link & Tdlinkqh) != 0) + s = seprint(s, se, "Q"); + s = seprint(s, se, " csw %#ux ", td->csw); + if(td->csw & Tdactive) + s = seprint(s, se, "a"); + if(td->csw & Tdiso) + s = seprint(s, se, "I"); + if(td->csw & Tdioc) + s = seprint(s, se, "i"); + if(td->csw & Tdlow) + s = seprint(s, se, "l"); + if((td->csw & (Tderr1|Tderr2)) == 0) + s = seprint(s, se, "z"); + if(td->csw & Tderrors) + s = seprint(s, se, " err %#ux", td->csw & Tderrors); + if(td->csw & Tdstalled) + s = seprint(s, se, "s"); + if(td->csw & Tddberr) + s = seprint(s, se, "d"); + if(td->csw & Tdbabble) + s = seprint(s, se, "b"); + if(td->csw & Tdnak) + s = seprint(s, se, "n"); + if(td->csw & Tdcrcto) + s = seprint(s, se, "c"); + if(td->csw & Tdbitstuff) + s = seprint(s, se, "B"); + s = seprint(s, se, " stslen %d", tdlen(td)); + + s = seprint(s, se, " token %#ux", td->token); + if(td->token == 0) /* the BWS loopback Td, ignore rest */ + return s; + s = seprint(s, se, " maxlen %d", maxtdlen(td)); + if(td->token & Tddata1) + s = seprint(s, se, " d1"); + else + s = seprint(s, se, " d0"); + s = seprint(s, se, " id %#ux:", (td->token>>15) & Epmax); + s = seprint(s, se, "%#ux", (td->token>>8) & Devmax); + switch(tdtok(td)){ + case Tdtokin: + s = seprint(s, se, " in"); + break; + case Tdtokout: + s = seprint(s, se, " out"); + break; + case Tdtoksetup: + s = seprint(s, se, " setup"); + break; + default: + s = seprint(s, se, " BADPID"); + } + s = seprint(s, se, "\n\t buffer %#ux data %#p", td->buffer, td->data); + s = seprint(s, se, " ndata %ud sbuff %#p buff %#p", + td->ndata, td->sbuff, td->buff); + if(td->ndata > 0) + s = seprintdata(s, se, td->data, td->ndata); + return s; +} + +static void +isodump(Isoio *iso, int all) +{ + char buf[256]; + Td *td; + int i; + + print("iso %#p %s state %d nframes %d" + " td0 %#p tdu %#p tdi %#p data %#p\n", + iso, iso->tok == Tdtokin ? "in" : "out", + iso->state, iso->nframes, iso->tdps[iso->td0frno], + iso->tdu, iso->tdi, iso->data); + if(iso->err != nil) + print("\terr='%s'\n", iso->err); + if(all == 0){ + seprinttd(buf, buf+sizeof(buf), iso->tdu); + print("\ttdu %s\n", buf); + seprinttd(buf, buf+sizeof(buf), iso->tdi); + print("\ttdi %s\n", buf); + }else{ + td = iso->tdps[iso->td0frno]; + for(i = 0; i < iso->nframes; i++){ + seprinttd(buf, buf+sizeof(buf), td); + if(td == iso->tdi) + print("i->"); + if(td == iso->tdu) + print("u->"); + print("\t%s\n", buf); + td = td->next; + } + } +} + +static int +sameptr(void *p, ulong l) +{ + if(l & QHterm) + return p == nil; + return PTR(l) == p; +} + +static void +dumptd(Td *td, char *pref) +{ + char buf[256]; + char *s; + char *se; + int i; + + i = 0; + se = buf+sizeof(buf); + for(; td != nil; td = td->next){ + s = seprinttd(buf, se, td); + if(!sameptr(td->next, td->link)) + seprint(s, se, " next %#p != link %#ux %#p", + td->next, td->link, TPTR(td->link)); + print("%std %s\n", pref, buf); + if(i++ > 20){ + print("...more tds...\n"); + break; + } + } +} + +static void +qhdump(Qh *qh, char *pref) +{ + char buf[256]; + char *s; + char *se; + ulong td; + int i; + + s = buf; + se = buf+sizeof(buf); + s = seprint(s, se, "%sqh %s %#p state %s link %#ux", pref, + qh->tag, qh, qhsname[qh->state], qh->link); + if(!sameptr(qh->tds, qh->elink)) + s = seprint(s, se, " [tds %#p != elink %#ux %#p]", + qh->tds, qh->elink, TPTR(qh->elink)); + if(!sameptr(qh->next, qh->link)) + s = seprint(s, se, " [next %#p != link %#ux %#p]", + qh->next, qh->link, QPTR(qh->link)); + if((qh->link & Tdterm) != 0) + s = seprint(s, se, "T"); + if((qh->link & Tdlinkqh) != 0) + s = seprint(s, se, "Q"); + s = seprint(s, se, " elink %#ux", qh->elink); + if((qh->elink & Tdterm) != 0) + s = seprint(s, se, "T"); + if((qh->elink & Tdlinkqh) != 0) + s = seprint(s, se, "Q"); + s = seprint(s, se, " io %#p", qh->io); + if(qh->io != nil && qh->io->err != nil) + seprint(s, se, " err='%s'", qh->io->err); + print("%s\n", buf); + dumptd(qh->tds, "\t"); + if((qh->elink & QHterm) == 0){ + print("\thw tds:"); + i = 0; + for(td = qh->elink; (td & Tdterm) == 0; td = TPTR(td)->link){ + print(" %#ulx", td); + if(td == TPTR(td)->link) /* BWS Td */ + break; + if(i++ > 40){ + print("..."); + break; + } + } + print("\n"); + } +} + +static void +xdump(Ctlr *ctlr, int doilock) +{ + Isoio *iso; + Qh *qh; + int i; + + if(doilock){ + if(ctlr == ctlrs[0]){ + lock(&tdpool); + print("tds: alloc %d = inuse %d + free %d\n", + tdpool.nalloc, tdpool.ninuse, tdpool.nfree); + unlock(&tdpool); + lock(&qhpool); + print("qhs: alloc %d = inuse %d + free %d\n", + qhpool.nalloc, qhpool.ninuse, qhpool.nfree); + unlock(&qhpool); + } + ilock(ctlr); + } + print("uhci port %#x frames %#p nintr %d ntdintr %d", + ctlr->port, ctlr->frames, ctlr->nintr, ctlr->ntdintr); + print(" nqhintr %d nisointr %d\n", ctlr->nqhintr, ctlr->nisointr); + print("cmd %#ux sts %#ux fl %#ux ps1 %#ux ps2 %#ux frames[0] %#ux\n", + INS(Cmd), INS(Status), + INL(Flbaseadd), INS(PORT(0)), INS(PORT(1)), + ctlr->frames[0]); + for(iso = ctlr->iso; iso != nil; iso = iso->next) + isodump(iso, 1); + i = 0; + for(qh = ctlr->qhs; qh != nil; qh = qh->next){ + qhdump(qh, ""); + if(i++ > 20){ + print("qhloop\n"); + break; + } + } + print("\n"); + if(doilock) + iunlock(ctlr); +} + +static void +dump(Hci *hp) +{ + xdump(hp->aux, 1); +} + +static Td* +tdalloc(void) +{ + uchar *pool, *end; + int sz; + Td *td; + + lock(&tdpool); + if(tdpool.free == nil){ + ddprint("uhci: tdalloc %d Tds\n", Incr); + sz = ROUNDUP(sizeof *td, 16); + pool = mallocalign(Incr*sz, Align, 0, 0); + if(pool == nil) + panic("tdalloc"); + for(end = pool + Incr*sz; pool < end; pool += sz){ + td = (Td*)pool; + td->next = tdpool.free; + tdpool.free = td; + } + tdpool.nalloc += Incr; + tdpool.nfree += Incr; + } + td = tdpool.free; + tdpool.free = td->next; + tdpool.ninuse++; + tdpool.nfree--; + unlock(&tdpool); + + memset(td, 0, sizeof(Td)); + td->link = Tdterm; + assert(((uintptr)td & 0xF) == 0); + return td; +} + +static void +tdfree(Td *td) +{ + if(td == nil) + return; + free(td->buff); + td->buff = nil; + lock(&tdpool); + td->next = tdpool.free; + tdpool.free = td; + tdpool.ninuse--; + tdpool.nfree++; + unlock(&tdpool); +} + +static void +qhlinkqh(Qh* qh, Qh* next) +{ + if(next == nil) + qh->link = QHterm; + else{ + next->link = qh->link; + next->next = qh->next; + qh->link = PCIWADDR(next)|QHlinkqh; + } + qh->next = next; +} + +static void +qhlinktd(Qh *qh, Td *td) +{ + qh->tds = td; + if(td == nil) + qh->elink = QHvf|QHterm; + else + qh->elink = PCIWADDR(td); +} + +static void +tdlinktd(Td *td, Td *next) +{ + td->next = next; + if(next == nil) + td->link = Tdterm; + else + td->link = PCIWADDR(next)|Tdvf; +} + +static Qh* +qhalloc(Ctlr *ctlr, Qh *prev, Qio *io, char *tag) +{ + uchar *pool, *end; + int sz; + Qh *qh; + + lock(&qhpool); + if(qhpool.free == nil){ + ddprint("uhci: qhalloc %d Qhs\n", Incr); + sz = ROUNDUP(sizeof(*qh), 16); + pool = mallocalign(Incr*sz, Align, 0, 0); + if(pool == nil) + panic("qhalloc"); + for(end = pool+Incr*sz; pool < end; pool += sz){ + qh = (Qh*)pool; + qh->next = qhpool.free; + qhpool.free = qh; + } + qhpool.nalloc += Incr; + qhpool.nfree += Incr; + } + qh = qhpool.free; + qhpool.free = qh->next; + qh->next = nil; + qh->link = QHterm; + qhpool.ninuse++; + qhpool.nfree--; + unlock(&qhpool); + + qh->tds = nil; + qh->elink = QHterm; + qh->state = Qidle; + qh->io = io; + qh->tag = nil; + kstrdup(&qh->tag, tag); + + if(prev != nil){ + coherence(); + ilock(ctlr); + qhlinkqh(prev, qh); + iunlock(ctlr); + } + + assert(((uintptr)qh & 0xF) == 0); + return qh; +} + +static void +qhfree(Ctlr *ctlr, Qh *qh) +{ + Td *td; + Td *ltd; + Qh *q; + + if(qh == nil) + return; + + ilock(ctlr); + for(q = ctlr->qhs; q != nil; q = q->next) + if(q->next == qh) + break; + if(q == nil) + panic("qhfree: nil q"); + q->next = qh->next; + q->link = qh->link; + iunlock(ctlr); + + for(td = qh->tds; td != nil; td = ltd){ + ltd = td->next; + tdfree(td); + } + lock(&qhpool); + qh->state = Qfree; /* paranoia */ + qh->next = qhpool.free; + qh->tag = nil; + qh->io = nil; + qhpool.free = qh; + qhpool.ninuse--; + qhpool.nfree++; + unlock(&qhpool); + ddprint("qhfree: qh %#p\n", qh); +} + +static char* +errmsg(int err) +{ + if(err == 0) + return "ok"; + if(err & Tdcrcto) + return "crc/timeout error"; + if(err & Tdbabble) + return "babble detected"; + if(err & Tddberr) + return "db error"; + if(err & Tdbitstuff) + return "bit stuffing error"; + if(err & Tdstalled) + return Estalled; + return Eio; +} + +static int +isocanread(void *a) +{ + Isoio *iso; + + iso = a; + return iso->state == Qclose || + (iso->state == Qrun && + iso->tok == Tdtokin && iso->tdi != iso->tdu); +} + +static int +isocanwrite(void *a) +{ + Isoio *iso; + + iso = a; + return iso->state == Qclose || + (iso->state == Qrun && + iso->tok == Tdtokout && iso->tdu->next != iso->tdi); +} + +static void +tdisoinit(Isoio *iso, Td *td, long count) +{ + td->ndata = count; + td->token = ((count-1)<<21)| ((iso->usbid & 0x7FF)<<8) | iso->tok; + td->csw = Tderr1|Tdiso|Tdactive|Tdioc; +} + +/* + * Process Iso i/o on interrupt. For writes update just error status. + * For reads update tds to reflect data and also error status. + * When tdi aproaches tdu, advance tdu; data may be lost. + * (If nframes is << Nframes tdu might be far away but this avoids + * races regarding frno.) + * If we suffer errors for more than half the frames we stall. + */ +static void +isointerrupt(Ctlr *ctlr, Isoio* iso) +{ + Td *tdi; + int err; + int i; + int nframes; + + tdi = iso->tdi; + if((tdi->csw & Tdactive) != 0) /* nothing new done */ + return; + ctlr->nisointr++; + ddiprint("isointr: iso %#p: tdi %#p tdu %#p\n", iso, tdi, iso->tdu); + if(iso->state != Qrun && iso->state != Qdone) + panic("isointr: iso state"); + if(debug > 1 || iso->debug > 1) + isodump(iso, 0); + + nframes = iso->nframes / 2; /* limit how many we look */ + if(nframes > 64) + nframes = 64; + + for(i = 0; i < nframes && (tdi->csw & Tdactive) == 0; i++){ + tdi->csw &= ~Tdioc; + err = tdi->csw & Tderrors; + if(err == 0) + iso->nerrs = 0; + else if(iso->nerrs++ > iso->nframes/2) + tdi->csw |= Tdstalled; + if((tdi->csw & Tdstalled) != 0){ + if(iso->err == nil){ + iso->err = errmsg(err); + diprint("isointerrupt: tdi %#p error %#ux %s\n", + tdi, err, iso->err); + diprint("ctlr load %uld\n", ctlr->load); + } + tdi->ndata = 0; + }else + tdi->ndata = tdlen(tdi); + + if(tdi->next == iso->tdu || tdi->next->next == iso->tdu){ + memset(iso->tdu->data, 0, maxtdlen(iso->tdu)); + tdisoinit(iso, iso->tdu, maxtdlen(iso->tdu)); + iso->tdu = iso->tdu->next; + iso->nleft = 0; + } + tdi = tdi->next; + } + ddiprint("isointr: %d frames processed\n", nframes); + if(i == nframes) + tdi->csw |= Tdioc; + iso->tdi = tdi; + if(isocanwrite(iso) || isocanread(iso)){ + diprint("wakeup iso %#p tdi %#p tdu %#p\n", iso, + iso->tdi, iso->tdu); + wakeup(iso); + } + +} + +/* + * Process a Qh upon interrupt. There's one per ongoing user I/O. + * User process releases resources later, that is not done here. + * We may find in this order one or more Tds: + * - none/many non active and completed Tds + * - none/one (usually(!) not active) and failed Td + * - none/many active Tds. + * Upon errors the entire transfer is aborted and error reported. + * Otherwise, the transfer is complete only when all Tds are done or + * when a read with less than maxpkt is found. + * Use the software list and not qh->elink to avoid races. + * We could use qh->elink to see if there's something new or not. + */ +static void +qhinterrupt(Ctlr *ctlr, Qh *qh) +{ + Td *td; + int err; + + ctlr->nqhintr++; + if(qh->state != Qrun) + panic("qhinterrupt: qh state"); + if(qh->tds == nil) + panic("qhinterrupt: no tds"); + if((qh->tds->csw & Tdactive) == 0) + ddqprint("qhinterrupt port %#ux qh %#p p0 %#x p1 %#x\n", + ctlr->port, qh, INS(PORT(0)), INS(PORT(1))); + for(td = qh->tds; td != nil; td = td->next){ + if(td->csw & Tdactive) + return; + td->csw &= ~Tdioc; + if((td->csw & Tdstalled) != 0){ + err = td->csw & Tderrors; + /* just stalled is end of xfer but not an error */ + if(err != Tdstalled && qh->io->err == nil){ + qh->io->err = errmsg(td->csw & Tderrors); + dqprint("qhinterrupt: td %#p error %#ux %s\n", + td, err, qh->io->err); + dqprint("ctlr load %uld\n", ctlr->load); + } + break; + } + if((td->csw & Tdnak) != 0){ /* retransmit; not serious */ + td->csw &= ~Tdnak; + if(td->next == nil) + td->csw |= Tdioc; + } + td->ndata = tdlen(td); + if(td->ndata < maxtdlen(td)){ /* EOT */ + td = td->next; + break; + } + } + + /* + * Done. Make void the Tds not used (errors or EOT) and wakeup epio. + */ + qh->elink = QHterm; + for(; td != nil; td = td->next) + td->ndata = 0; + qh->state = Qdone; + wakeup(qh->io); +} + +static void +interrupt(Ureg*, void *a) +{ + Hci *hp; + Ctlr *ctlr; + int frptr; + int frno; + Qh *qh; + Isoio *iso; + int sts; + int cmd; + + hp = a; + ctlr = hp->aux; + ilock(ctlr); + ctlr->nintr++; + sts = INS(Status); + if((sts & Sall) == 0){ /* not for us; sharing irq */ + iunlock(ctlr); + return; + } + OUTS(Status, sts & Sall); + cmd = INS(Cmd); + if(cmd & Crun == 0){ + print("uhci %#ux: not running: uhci bug?\n", ctlr->port); + /* BUG: should abort everything in this case */ + } + if(debug > 1){ + frptr = INL(Flbaseadd); + frno = INL(Frnum); + frno = TRUNC(frno, Nframes); + print("cmd %#ux sts %#ux frptr %#ux frno %d\n", + cmd, sts, frptr, frno); + } + ctlr->ntdintr++; + /* + * Will we know in USB 3.0 who the interrupt was for?. + * Do they still teach indexing in CS? + * This is Intel's doing. + */ + for(iso = ctlr->iso; iso != nil; iso = iso->next) + if(iso->state == Qrun || iso->state == Qdone) + isointerrupt(ctlr, iso); + for(qh = ctlr->qhs; qh != nil; qh = qh->next) + if(qh->state == Qrun) + qhinterrupt(ctlr, qh); + else if(qh->state == Qclose) + qhlinktd(qh, nil); + iunlock(ctlr); +} + +/* + * iso->tdu is the next place to put data. When it gets full + * it is activated and tdu advanced. + */ +static long +putsamples(Isoio *iso, uchar *b, long count) +{ + long tot; + long n; + + for(tot = 0; isocanwrite(iso) && tot < count; tot += n){ + n = count-tot; + if(n > maxtdlen(iso->tdu) - iso->nleft) + n = maxtdlen(iso->tdu) - iso->nleft; + memmove(iso->tdu->data+iso->nleft, b+tot, n); + iso->nleft += n; + if(iso->nleft == maxtdlen(iso->tdu)){ + tdisoinit(iso, iso->tdu, iso->nleft); + iso->nleft = 0; + iso->tdu = iso->tdu->next; + } + } + return tot; +} + +/* + * Queue data for writing and return error status from + * last writes done, to maintain buffered data. + */ +static long +episowrite(Ep *ep, Isoio *iso, void *a, long count) +{ + Ctlr *ctlr; + uchar *b; + int tot; + int nw; + char *err; + + iso->debug = ep->debug; + diprint("uhci: episowrite: %#p ep%d.%d\n", iso, ep->dev->nb, ep->nb); + + ctlr = ep->hp->aux; + qlock(iso); + if(waserror()){ + qunlock(iso); + nexterror(); + } + ilock(ctlr); + if(iso->state == Qclose){ + iunlock(ctlr); + error(iso->err ? iso->err : Eio); + } + iso->state = Qrun; + b = a; + for(tot = 0; tot < count; tot += nw){ + while(isocanwrite(iso) == 0){ + iunlock(ctlr); + diprint("uhci: episowrite: %#p sleep\n", iso); + if(waserror()){ + if(iso->err == nil) + iso->err = "I/O timed out"; + ilock(ctlr); + break; + } + tsleep(iso, isocanwrite, iso, ep->tmout); + poperror(); + ilock(ctlr); + } + err = iso->err; + iso->err = nil; + if(iso->state == Qclose || err != nil){ + iunlock(ctlr); + error(err ? err : Eio); + } + if(iso->state != Qrun) + panic("episowrite: iso not running"); + iunlock(ctlr); /* We could page fault here */ + nw = putsamples(iso, b+tot, count-tot); + ilock(ctlr); + } + if(iso->state != Qclose) + iso->state = Qdone; + iunlock(ctlr); + err = iso->err; /* in case it failed early */ + iso->err = nil; + qunlock(iso); + poperror(); + if(err != nil) + error(err); + diprint("uhci: episowrite: %#p %d bytes\n", iso, tot); + return tot; +} + +/* + * Available data is kept at tdu and following tds, up to tdi (excluded). + */ +static long +episoread(Ep *ep, Isoio *iso, void *a, int count) +{ + Ctlr *ctlr; + uchar *b; + int nr; + int tot; + Td *tdu; + + iso->debug = ep->debug; + diprint("uhci: episoread: %#p ep%d.%d\n", iso, ep->dev->nb, ep->nb); + + b = a; + ctlr = ep->hp->aux; + qlock(iso); + if(waserror()){ + qunlock(iso); + nexterror(); + } + iso->err = nil; + iso->nerrs = 0; + ilock(ctlr); + if(iso->state == Qclose){ + iunlock(ctlr); + error(iso->err ? iso->err : Eio); + } + iso->state = Qrun; + while(isocanread(iso) == 0){ + iunlock(ctlr); + diprint("uhci: episoread: %#p sleep\n", iso); + if(waserror()){ + if(iso->err == nil) + iso->err = "I/O timed out"; + ilock(ctlr); + break; + } + tsleep(iso, isocanread, iso, ep->tmout); + poperror(); + ilock(ctlr); + } + if(iso->state == Qclose){ + iunlock(ctlr); + error(iso->err ? iso->err : Eio); + } + iso->state = Qdone; + assert(iso->tdu != iso->tdi); + + for(tot = 0; iso->tdi != iso->tdu && tot < count; tot += nr){ + tdu = iso->tdu; + if(tdu->csw & Tdactive){ + diprint("uhci: episoread: %#p tdu active\n", iso); + break; + } + nr = tdu->ndata; + if(tot + nr > count) + nr = count - tot; + if(nr == 0) + print("uhci: ep%d.%d: too many polls\n", + ep->dev->nb, ep->nb); + else{ + iunlock(ctlr); /* We could page fault here */ + memmove(b+tot, tdu->data, nr); + ilock(ctlr); + if(nr < tdu->ndata) + memmove(tdu->data, tdu->data+nr, tdu->ndata - nr); + tdu->ndata -= nr; + } + if(tdu->ndata == 0){ + tdisoinit(iso, tdu, ep->maxpkt); + iso->tdu = tdu->next; + } + } + iunlock(ctlr); + qunlock(iso); + poperror(); + diprint("uhci: episoread: %#p %d bytes err '%s'\n", iso, tot, iso->err); + if(iso->err != nil) + error(iso->err); + return tot; +} + +static int +nexttoggle(int tog) +{ + if(tog == Tddata0) + return Tddata1; + else + return Tddata0; +} + +static Td* +epgettd(Ep *ep, Qio *io, int flags, void *a, int count) +{ + Td *td; + int tok; + + if(ep->maxpkt < count) + error("maxpkt too short"); + td = tdalloc(); + if(count <= Tdndata) + td->data = td->sbuff; + else + td->data = td->buff = smalloc(ep->maxpkt); + td->buffer = PCIWADDR(td->data); + td->ndata = count; + if(a != nil && count > 0) + memmove(td->data, a, count); + td->csw = Tderr2|Tderr1|flags; + if(ep->dev->speed == Lowspeed) + td->csw |= Tdlow; + tok = io->tok | io->toggle; + io->toggle = nexttoggle(io->toggle); + td->token = ((count-1)<<21) | ((io->usbid&0x7FF)<<8) | tok; + + return td; +} + +/* + * Try to get them idle + */ +static void +aborttds(Qh *qh) +{ + Td *td; + + qh->state = Qdone; + qh->elink = QHterm; + for(td = qh->tds; td != nil; td = td->next){ + if(td->csw & Tdactive) + td->ndata = 0; + td->csw &= ~(Tdactive|Tdioc); + } +} + +static int +epiodone(void *a) +{ + Qh *qh; + + qh = a; + return qh->state != Qrun; +} + +static void +epiowait(Ctlr *ctlr, Qio *io, int tmout, ulong load) +{ + Qh *qh; + int timedout; + + qh = io->qh; + ddqprint("uhci io %#p sleep on qh %#p state %ud\n", io, qh, qh->state); + timedout = 0; + if(waserror()){ + dqprint("uhci io %#p qh %#p timed out\n", io, qh); + timedout++; + }else{ + if(tmout == 0) + sleep(io, epiodone, qh); + else + tsleep(io, epiodone, qh, tmout); + poperror(); + } + ilock(ctlr); + if(qh->state == Qrun) + timedout = 1; + else if(qh->state != Qdone && qh->state != Qclose) + panic("epio: queue not done and not closed"); + if(timedout){ + aborttds(io->qh); + io->err = "request timed out"; + iunlock(ctlr); + if(!waserror()){ + tsleep(&up->sleep, return0, 0, Abortdelay); + poperror(); + } + ilock(ctlr); + } + if(qh->state != Qclose) + qh->state = Qidle; + qhlinktd(qh, nil); + ctlr->load -= load; + iunlock(ctlr); +} + +/* + * Non iso I/O. + * To make it work for control transfers, the caller may + * lock the Qio for the entire control transfer. + */ +static long +epio(Ep *ep, Qio *io, void *a, long count, int mustlock) +{ + Td *td, *ltd, *td0, *ntd; + Ctlr *ctlr; + Qh* qh; + long n, tot; + char buf[128]; + uchar *c; + int saved, ntds, tmout; + ulong load; + char *err; + + qh = io->qh; + ctlr = ep->hp->aux; + io->debug = ep->debug; + tmout = ep->tmout; + ddeprint("epio: %s ep%d.%d io %#p count %ld load %uld\n", + io->tok == Tdtokin ? "in" : "out", + ep->dev->nb, ep->nb, io, count, ctlr->load); + if((debug > 1 || ep->debug > 1) && io->tok != Tdtokin){ + seprintdata(buf, buf+sizeof(buf), a, count); + print("uchi epio: user data: %s\n", buf); + } + if(mustlock){ + qlock(io); + if(waserror()){ + qunlock(io); + nexterror(); + } + } + io->err = nil; + ilock(ctlr); + if(qh->state == Qclose){ /* Tds released by cancelio */ + iunlock(ctlr); + error(io->err ? io->err : Eio); + } + if(qh->state != Qidle) + panic("epio: qh not idle"); + qh->state = Qinstall; + iunlock(ctlr); + + c = a; + td0 = ltd = nil; + load = tot = 0; + do{ + n = ep->maxpkt; + if(count-tot < n) + n = count-tot; + if(c != nil && io->tok != Tdtokin) + td = epgettd(ep, io, Tdactive, c+tot, n); + else + td = epgettd(ep, io, Tdactive|Tdspd, nil, n); + if(td0 == nil) + td0 = td; + else + tdlinktd(ltd, td); + ltd = td; + tot += n; + load += ep->load; + }while(tot < count); + if(td0 == nil || ltd == nil) + panic("epio: no td"); + + ltd->csw |= Tdioc; /* the last one interrupts */ + ddeprint("uhci: load %uld ctlr load %uld\n", load, ctlr->load); + ilock(ctlr); + if(qh->state != Qclose){ + io->iotime = TK2MS(sys->ticks); + qh->state = Qrun; + coherence(); + qhlinktd(qh, td0); + ctlr->load += load; + } + iunlock(ctlr); + + epiowait(ctlr, io, tmout, load); + + if(debug > 1 || ep->debug > 1) + dumptd(td0, "epio: got tds: "); + + tot = 0; + c = a; + saved = 0; + ntds = 0; + for(td = td0; td != nil; td = ntd){ + ntds++; + /* + * Use td tok, not io tok, because of setup packets. + * Also, if the Td was stalled or active (previous Td + * was a short packet), we must save the toggle as it is. + */ + if(td->csw & (Tdstalled|Tdactive)){ + if(saved++ == 0) + io->toggle = td->token & Tddata1; + }else{ + tot += td->ndata; + if(c != nil && tdtok(td) == Tdtokin && td->ndata > 0){ + memmove(c, td->data, td->ndata); + c += td->ndata; + } + } + ntd = td->next; + tdfree(td); + } + err = io->err; + if(mustlock){ + qunlock(io); + poperror(); + } + ddeprint("epio: io %#p: %d tds: return %ld err '%s'\n", + io, ntds, tot, err); + if(err != nil) + error(err); + if(tot < 0) + error(Eio); + return tot; +} + +/* + * halt condition was cleared on the endpoint. update our toggles. + */ +static void +clrhalt(Ep *ep) +{ + Qio *io; + + ep->clrhalt = 0; + switch(ep->ttype){ + case Tbulk: + case Tintr: + io = ep->aux; + if(ep->mode != OREAD){ + qlock(&io[OWRITE]); + io[OWRITE].toggle = Tddata0; + deprint("ep clrhalt for io %#p\n", io+OWRITE); + qunlock(&io[OWRITE]); + } + if(ep->mode != OWRITE){ + qlock(&io[OREAD]); + io[OREAD].toggle = Tddata0; + deprint("ep clrhalt for io %#p\n", io+OREAD); + qunlock(&io[OREAD]); + } + break; + } +} + +static long +epread(Ep *ep, void *a, long count) +{ + Ctlio *cio; + Qio *io; + Isoio *iso; + char buf[160]; + ulong delta; + + ddeprint("uhci: epread\n"); + if(ep->aux == nil) + panic("epread: not open"); + + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + qlock(cio); + if(waserror()){ + qunlock(cio); + nexterror(); + } + ddeprint("epread ctl ndata %d\n", cio->ndata); + if(cio->ndata < 0) + error("request expected"); + else if(cio->ndata == 0){ + cio->ndata = -1; + count = 0; + }else{ + if(count > cio->ndata) + count = cio->ndata; + if(count > 0) + memmove(a, cio->data, count); + /* BUG for big transfers */ + free(cio->data); + cio->data = nil; + cio->ndata = 0; /* signal EOF next time */ + } + qunlock(cio); + poperror(); + if(debug>1 || ep->debug){ + seprintdata(buf, buf+sizeof(buf), a, count); + print("epread: %s\n", buf); + } + return count; + case Tbulk: + io = ep->aux; + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OREAD], a, count, 1); + case Tintr: + io = ep->aux; + delta = TK2MS(sys->ticks) - io[OREAD].iotime + 1; + if(delta < ep->pollival / 2) + tsleep(&up->sleep, return0, 0, ep->pollival/2 - delta); + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OREAD], a, count, 1); + case Tiso: + iso = ep->aux; + return episoread(ep, iso, a, count); + default: + panic("epread: bad ep ttype %d", ep->ttype); + } + return -1; +} + +/* + * Control transfers are one setup write (data0) + * plus zero or more reads/writes (data1, data0, ...) + * plus a final write/read with data1 to ack. + * For both host to device and device to host we perform + * the entire transfer when the user writes the request, + * and keep any data read from the device for a later read. + * We call epio three times instead of placing all Tds at + * the same time because doing so leads to crc/tmout errors + * for some devices. + * Upon errors on the data phase we must still run the status + * phase or the device may cease responding in the future. + */ +static long +epctlio(Ep *ep, Ctlio *cio, void *a, long count) +{ + uchar *c; + long len; + + ddeprint("epctlio: cio %#p ep%d.%d count %ld\n", + cio, ep->dev->nb, ep->nb, count); + if(count < Rsetuplen) + error("short usb comand"); + qlock(cio); + free(cio->data); + cio->data = nil; + cio->ndata = 0; + if(waserror()){ + qunlock(cio); + free(cio->data); + cio->data = nil; + cio->ndata = 0; + nexterror(); + } + + /* set the address if unset and out of configuration state */ + if(ep->dev->state != Dconfig && ep->dev->state != Dreset) + if(cio->usbid == 0) + cio->usbid = ((ep->nb&Epmax)<<7)|(ep->dev->nb&Devmax); + c = a; + cio->tok = Tdtoksetup; + cio->toggle = Tddata0; + if(epio(ep, cio, a, Rsetuplen, 0) < Rsetuplen) + error(Eio); + a = c + Rsetuplen; + count -= Rsetuplen; + + cio->toggle = Tddata1; + if(c[Rtype] & Rd2h){ + cio->tok = Tdtokin; + len = GET2(c+Rcount); + if(len <= 0) + error("bad length in d2h request"); + if(len > Maxctllen) + error("d2h data too large to fit in uhci"); + a = cio->data = smalloc(len+1); + }else{ + cio->tok = Tdtokout; + len = count; + } + if(len > 0) + if(waserror()) + len = -1; + else{ + len = epio(ep, cio, a, len, 0); + poperror(); + } + if(c[Rtype] & Rd2h){ + count = Rsetuplen; + cio->ndata = len; + cio->tok = Tdtokout; + }else{ + if(len < 0) + count = -1; + else + count = Rsetuplen + len; + cio->tok = Tdtokin; + } + cio->toggle = Tddata1; + epio(ep, cio, nil, 0, 0); + qunlock(cio); + poperror(); + ddeprint("epctlio cio %#p return %ld\n", cio, count); + return count; +} + +static long +epwrite(Ep *ep, void *a, long count) +{ + Ctlio *cio; + Isoio *iso; + Qio *io; + ulong delta; + char *b; + int tot; + int nw; + + ddeprint("uhci: epwrite ep%d.%d\n", ep->dev->nb, ep->nb); + if(ep->aux == nil) + panic("uhci: epwrite: not open"); + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + return epctlio(ep, cio, a, count); + case Tbulk: + io = ep->aux; + if(ep->clrhalt) + clrhalt(ep); + /* + * Put at most Tdatomic Tds (512 bytes) at a time. + * Otherwise some devices produce babble errors. + */ + b = a; + for(tot = 0; tot < count ; tot += nw){ + nw = count - tot; + if(nw > Tdatomic * ep->maxpkt) + nw = Tdatomic * ep->maxpkt; + nw = epio(ep, &io[OWRITE], b+tot, nw, 1); + } + return tot; + case Tintr: + io = ep->aux; + delta = TK2MS(sys->ticks) - io[OWRITE].iotime + 1; + if(delta < ep->pollival) + tsleep(&up->sleep, return0, 0, ep->pollival - delta); + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OWRITE], a, count, 1); + case Tiso: + iso = ep->aux; + return episowrite(ep, iso, a, count); + default: + panic("uhci: epwrite: bad ep ttype %d", ep->ttype); + } + return -1; +} + +static void +isoopen(Ep *ep) +{ + Ctlr *ctlr; + Isoio *iso; + int frno; + int i; + Td* td; + Td* ltd; + int size; + int left; + + if(ep->mode == ORDWR) + error("iso i/o is half-duplex"); + ctlr = ep->hp->aux; + iso = ep->aux; + iso->debug = ep->debug; + iso->next = nil; /* paranoia */ + if(ep->mode == OREAD) + iso->tok = Tdtokin; + else + iso->tok = Tdtokout; + iso->usbid = ((ep->nb & Epmax)<<7)|(ep->dev->nb & Devmax); + iso->state = Qidle; + iso->nframes = Nframes/ep->pollival; + if(iso->nframes < 3) + error("uhci isoopen bug"); /* we need at least 3 tds */ + + ilock(ctlr); + if(ctlr->load + ep->load > 800) + print("usb: uhci: bandwidth may be exceeded\n"); + ctlr->load += ep->load; + ctlr->isoload += ep->load; + dprint("uhci: load %uld isoload %uld\n", ctlr->load, ctlr->isoload); + iunlock(ctlr); + + /* + * From here on this cannot raise errors + * unless we catch them and release here all memory allocated. + */ + if(ep->maxpkt > Tdndata) + iso->data = smalloc(iso->nframes*ep->maxpkt); + ilock(ctlr); + frno = INS(Frnum) + 10; /* start 10ms ahead */ + frno = TRUNC(frno, Nframes); + iunlock(ctlr); + iso->td0frno = frno; + ltd = nil; + left = 0; + for(i = 0; i < iso->nframes; i++){ + td = iso->tdps[frno] = tdalloc(); + if(ep->mode == OREAD) + size = ep->maxpkt; + else{ + size = (ep->hz+left) * ep->pollival / 1000; + size *= ep->samplesz; + left = (ep->hz+left) * ep->pollival % 1000; + if(size > ep->maxpkt){ + print("uhci: ep%d.%d: size > maxpkt\n", + ep->dev->nb, ep->nb); + print("size = %d max = %ld\n", size, ep->maxpkt); + size = ep->maxpkt; + } + } + if(size > Tdndata) + td->data = iso->data + i * ep->maxpkt; + else + td->data = td->sbuff; + td->buffer = PCIWADDR(td->data); + tdisoinit(iso, td, size); + if(ltd != nil) + ltd->next = td; + ltd = td; + frno = TRUNC(frno+ep->pollival, Nframes); + } + ltd->next = iso->tdps[iso->td0frno]; + iso->tdi = iso->tdps[iso->td0frno]; + iso->tdu = iso->tdi; /* read: right now; write: 1s ahead */ + ilock(ctlr); + frno = iso->td0frno; + for(i = 0; i < iso->nframes; i++){ + iso->tdps[frno]->link = ctlr->frames[frno]; + frno = TRUNC(frno+ep->pollival, Nframes); + } + coherence(); + frno = iso->td0frno; + for(i = 0; i < iso->nframes; i++){ + ctlr->frames[frno] = PCIWADDR(iso->tdps[frno]); + frno = TRUNC(frno+ep->pollival, Nframes); + } + iso->next = ctlr->iso; + ctlr->iso = iso; + iso->state = Qdone; + iunlock(ctlr); + if(debug > 1 || iso->debug >1) + isodump(iso, 0); +} + +/* + * Allocate the endpoint and set it up for I/O + * in the controller. This must follow what's said + * in Ep regarding configuration, including perhaps + * the saved toggles (saved on a previous close of + * the endpoint data file by epclose). + */ +static void +epopen(Ep *ep) +{ + Ctlr *ctlr; + Qh *cqh; + Qio *io; + Ctlio *cio; + int usbid; + + ctlr = ep->hp->aux; + deprint("uhci: epopen ep%d.%d\n", ep->dev->nb, ep->nb); + if(ep->aux != nil) + panic("uhci: epopen called with open ep"); + if(waserror()){ + free(ep->aux); + ep->aux = nil; + nexterror(); + } + if(ep->maxpkt > Tdmaxpkt){ + print("uhci: maxkpkt too large: using %d\n", Tdmaxpkt); + ep->maxpkt = Tdmaxpkt; + } + cqh = ctlr->qh[ep->ttype]; + switch(ep->ttype){ + case Tnone: + error("endpoint not configured"); + case Tiso: + ep->aux = smalloc(sizeof(Isoio)); + isoopen(ep); + break; + case Tctl: + cio = ep->aux = smalloc(sizeof(Ctlio)); + cio->debug = ep->debug; + cio->ndata = -1; + cio->data = nil; + if(ep->dev->isroot != 0 && ep->nb == 0) /* root hub */ + break; + cio->qh = qhalloc(ctlr, cqh, cio, "epc"); + break; + case Tbulk: + case Tintr: + io = ep->aux = smalloc(sizeof(Qio)*2); + io[OREAD].debug = io[OWRITE].debug = ep->debug; + usbid = ((ep->nb&Epmax)<<7)|(ep->dev->nb &Devmax); + if(ep->mode != OREAD){ + if(ep->toggle[OWRITE] != 0) + io[OWRITE].toggle = Tddata1; + else + io[OWRITE].toggle = Tddata0; + io[OWRITE].tok = Tdtokout; + io[OWRITE].qh = qhalloc(ctlr, cqh, io+OWRITE, "epw"); + io[OWRITE].usbid = usbid; + } + if(ep->mode != OWRITE){ + if(ep->toggle[OREAD] != 0) + io[OREAD].toggle = Tddata1; + else + io[OREAD].toggle = Tddata0; + io[OREAD].tok = Tdtokin; + io[OREAD].qh = qhalloc(ctlr, cqh, io+OREAD, "epr"); + io[OREAD].usbid = usbid; + } + break; + } + if(debug>1 || ep->debug) + dump(ep->hp); + deprint("uhci: epopen done\n"); + poperror(); +} + +static void +cancelio(Ctlr *ctlr, Qio *io) +{ + Qh *qh; + + ilock(ctlr); + qh = io->qh; + if(io == nil || io->qh == nil || io->qh->state == Qclose){ + iunlock(ctlr); + return; + } + dqprint("uhci: cancelio for qh %#p state %s\n", + qh, qhsname[qh->state]); + aborttds(qh); + qh->state = Qclose; + iunlock(ctlr); + if(!waserror()){ + tsleep(&up->sleep, return0, 0, Abortdelay); + poperror(); + } + + wakeup(io); + qlock(io); + /* wait for epio if running */ + qunlock(io); + + qhfree(ctlr, qh); + io->qh = nil; +} + +static void +cancelisoio(Ctlr *ctlr, Isoio *iso, int pollival, ulong load) +{ + Isoio **il; + u32int *lp; + int i; + int frno; + Td *td; + + ilock(ctlr); + if(iso->state == Qclose){ + iunlock(ctlr); + return; + } + if(iso->state != Qrun && iso->state != Qdone) + panic("bad iso state"); + iso->state = Qclose; + if(ctlr->isoload < load) + panic("uhci: low isoload"); + ctlr->isoload -= load; + ctlr->load -= load; + for(il = &ctlr->iso; *il != nil; il = &(*il)->next) + if(*il == iso) + break; + if(*il == nil) + panic("isocancel: not found"); + *il = iso->next; + frno = iso->td0frno; + for(i = 0; i < iso->nframes; i++){ + td = iso->tdps[frno]; + td->csw &= ~(Tdioc|Tdactive); + for(lp=&ctlr->frames[frno]; !(*lp & Tdterm); + lp = &TPTR(*lp)->link) + if(TPTR(*lp) == td) + break; + if(*lp & Tdterm) + panic("cancelisoio: td not found"); + *lp = td->link; + frno = TRUNC(frno+pollival, Nframes); + } + iunlock(ctlr); + + /* + * wakeup anyone waiting for I/O and + * wait to be sure no I/O is in progress in the controller. + * and then wait to be sure episo-io is no longer running. + */ + wakeup(iso); + diprint("cancelisoio iso %#p waiting for I/O to cease\n", iso); + tsleep(&up->sleep, return0, 0, 5); + qlock(iso); + qunlock(iso); + diprint("cancelisoio iso %#p releasing iso\n", iso); + + frno = iso->td0frno; + for(i = 0; i < iso->nframes; i++){ + tdfree(iso->tdps[frno]); + iso->tdps[frno] = nil; + frno = TRUNC(frno+pollival, Nframes); + } + free(iso->data); + iso->data = nil; +} + +static void +epclose(Ep *ep) +{ + Ctlr *ctlr; + Ctlio *cio; + Isoio *iso; + Qio *io; + + ctlr = ep->hp->aux; + deprint("uhci: epclose ep%d.%d\n", ep->dev->nb, ep->nb); + + if(ep->aux == nil) + panic("uhci: epclose called with closed ep"); + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + cancelio(ctlr, cio); + free(cio->data); + cio->data = nil; + break; + case Tbulk: + case Tintr: + io = ep->aux; + ep->toggle[OREAD] = ep->toggle[OWRITE] = 0; + if(ep->mode != OWRITE){ + cancelio(ctlr, &io[OREAD]); + if(io[OREAD].toggle == Tddata1) + ep->toggle[OREAD] = 1; + } + if(ep->mode != OREAD){ + cancelio(ctlr, &io[OWRITE]); + if(io[OWRITE].toggle == Tddata1) + ep->toggle[OWRITE] = 1; + } + break; + case Tiso: + iso = ep->aux; + cancelisoio(ctlr, iso, ep->pollival, ep->load); + break; + default: + panic("epclose: bad ttype %d", ep->ttype); + } + + free(ep->aux); + ep->aux = nil; + +} + +static char* +seprintep(char *s, char *e, Ep *ep) +{ + Ctlio *cio; + Qio *io; + Isoio *iso; + Ctlr *ctlr; + + ctlr = ep->hp->aux; + ilock(ctlr); + if(ep->aux == nil){ + *s = 0; + iunlock(ctlr); + return s; + } + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + s = seprint(s,e,"cio %#p qh %#p" + " id %#x tog %#x tok %#x err %s\n", + cio, cio->qh, cio->usbid, cio->toggle, + cio->tok, cio->err); + break; + case Tbulk: + case Tintr: + io = ep->aux; + if(ep->mode != OWRITE) + s = seprint(s,e,"r: qh %#p id %#x tog %#x tok %#x err %s\n", + io[OREAD].qh, io[OREAD].usbid, io[OREAD].toggle, + io[OREAD].tok, io[OREAD].err); + if(ep->mode != OREAD) + s = seprint(s,e,"w: qh %#p id %#x tog %#x tok %#x err %s\n", + io[OWRITE].qh, io[OWRITE].usbid, io[OWRITE].toggle, + io[OWRITE].tok, io[OWRITE].err); + break; + case Tiso: + iso = ep->aux; + s = seprint(s,e,"iso %#p id %#x tok %#x tdu %#p tdi %#p err %s\n", + iso, iso->usbid, iso->tok, iso->tdu, iso->tdi, iso->err); + break; + } + iunlock(ctlr); + return s; +} + +static int +portenable(Hci *hp, int port, int on) +{ + int s; + int ioport; + Ctlr *ctlr; + + ctlr = hp->aux; + dprint("uhci: %#x port %d enable=%d\n", ctlr->port, port, on); + ioport = PORT(port-1); + qlock(&ctlr->portlck); + if(waserror()){ + qunlock(&ctlr->portlck); + nexterror(); + } + ilock(ctlr); + s = INS(ioport); + if(on) + OUTS(ioport, s | PSenable); + else + OUTS(ioport, s & ~PSenable); + microdelay(64); + iunlock(ctlr); + tsleep(&up->sleep, return0, 0, Enabledelay); + dprint("uhci %#ux port %d enable=%d: sts %#x\n", + ctlr->port, port, on, INS(ioport)); + qunlock(&ctlr->portlck); + poperror(); + return 0; +} + +static int +portreset(Hci *hp, int port, int on) +{ + int i, p; + Ctlr *ctlr; + + if(on == 0) + return 0; + ctlr = hp->aux; + dprint("uhci: %#ux port %d reset\n", ctlr->port, port); + p = PORT(port-1); + ilock(ctlr); + OUTS(p, PSreset); + delay(50); + OUTS(p, INS(p) & ~PSreset); + OUTS(p, INS(p) | PSenable); + microdelay(64); + for(i=0; i<1000 && (INS(p) & PSenable) == 0; i++) + ; + OUTS(p, (INS(p) & ~PSreset)|PSenable); + iunlock(ctlr); + dprint("uhci %#ux after port %d reset: sts %#x\n", + ctlr->port, port, INS(p)); + return 0; +} + +static int +portstatus(Hci *hp, int port) +{ + int s; + int r; + int ioport; + Ctlr *ctlr; + + ctlr = hp->aux; + ioport = PORT(port-1); + qlock(&ctlr->portlck); + if(waserror()){ + iunlock(ctlr); + qunlock(&ctlr->portlck); + nexterror(); + } + ilock(ctlr); + s = INS(ioport); + if(s & (PSstatuschg | PSchange)){ + OUTS(ioport, s); + ddprint("uhci %#ux port %d status %#x\n", ctlr->port, port, s); + } + iunlock(ctlr); + qunlock(&ctlr->portlck); + poperror(); + + /* + * We must return status bits as a + * get port status hub request would do. + */ + r = 0; + if(s & PSpresent) + r |= HPpresent; + if(s & PSenable) + r |= HPenable; + if(s & PSsuspend) + r |= HPsuspend; + if(s & PSreset) + r |= HPreset; + if(s & PSslow) + r |= HPslow; + if(s & PSstatuschg) + r |= HPstatuschg; + if(s & PSchange) + r |= HPchange; + return r; +} + +static void +scanpci(void) +{ + static int already = 0; + int io; + int i; + Ctlr *ctlr; + Pcidev *p; + + if(already) + return; + already = 1; + p = nil; + while(p = pcimatch(p, 0, 0)){ + /* + * Find UHCI controllers (Programming Interface = 0). + */ + if(p->ccrb != 0xc || p->ccru != 3) + continue; + switch(p->ccrp){ + case 0: + io = p->mem[4].bar & ~0x0F; + break; + default: + continue; + } + if(io == 0){ + print("usbuhci: %#x %#x: failed to map registers\n", + p->vid, p->did); + continue; + } + if(ioalloc(io, p->mem[4].size, 0, "usbuhci") < 0){ + print("usbuhci: port %#ux in use\n", io); + continue; + } + if(p->intl == 0xFF || p->intl == 0){ + print("usbuhci: no irq assigned for port %#ux\n", io); + continue; + } + + dprint("uhci: %#x %#x: port %#ux size %#x irq %d\n", + p->vid, p->did, io, p->mem[4].size, p->intl); + + ctlr = smalloc(sizeof(Ctlr)); + ctlr->pcidev = p; + ctlr->port = io; + for(i = 0; i < Nhcis; i++) + if(ctlrs[i] == nil){ + ctlrs[i] = ctlr; + break; + } + if(i == Nhcis) + print("uhci: bug: no more controllers\n"); + } +} + +static void +uhcimeminit(Ctlr *ctlr) +{ + Td* td; + Qh *qh; + int frsize; + int i; + + ctlr->qhs = ctlr->qh[Tctl] = qhalloc(ctlr, nil, nil, "CTL"); + ctlr->qh[Tintr] = qhalloc(ctlr, ctlr->qh[Tctl], nil, "INT"); + ctlr->qh[Tbulk] = qhalloc(ctlr, ctlr->qh[Tintr], nil, "BLK"); + + /* idle Td from dummy Qh at the end. looped back to itself */ + /* This is a workaround for PIIX4 errata 29773804.pdf */ + qh = qhalloc(ctlr, ctlr->qh[Tbulk], nil, "BWS"); + td = tdalloc(); + td->link = PCIWADDR(td); + qhlinktd(qh, td); + + /* loop (hw only) from the last qh back to control xfers. + * this may be done only for some of them. Disable until ehci comes. + */ + if(0) + qh->link = PCIWADDR(ctlr->qhs); + + frsize = Nframes*sizeof(ulong); + ctlr->frames = mallocalign(frsize, frsize, 0, 0); + if(ctlr->frames == nil) + panic("uhci reset: no memory"); + + ctlr->iso = nil; + for(i = 0; i < Nframes; i++) + ctlr->frames[i] = PCIWADDR(ctlr->qhs)|QHlinkqh; + OUTL(Flbaseadd, PCIWADDR(ctlr->frames)); + OUTS(Frnum, 0); + dprint("uhci %#ux flb %#ux frno %#ux\n", ctlr->port, + INL(Flbaseadd), INS(Frnum)); +} + +static void +init(Hci *hp) +{ + Ctlr *ctlr; + int sts; + int i; + + ctlr = hp->aux; + dprint("uhci %#ux init\n", ctlr->port); + coherence(); + ilock(ctlr); + OUTS(Usbintr, Itmout|Iresume|Ioc|Ishort); + uhcirun(ctlr, 1); + dprint("uhci: init: cmd %#ux sts %#ux sof %#ux", + INS(Cmd), INS(Status), INS(SOFmod)); + dprint(" flb %#ux frno %#ux psc0 %#ux psc1 %#ux", + INL(Flbaseadd), INS(Frnum), INS(PORT(0)), INS(PORT(1))); + /* guess other ports */ + for(i = 2; i < 6; i++){ + sts = INS(PORT(i)); + if(sts != 0xFFFF && (sts & PSreserved1) == 1){ + dprint(" psc%d %#ux", i, sts); + hp->nports++; + }else + break; + } + for(i = 0; i < hp->nports; i++) + OUTS(PORT(i), 0); + iunlock(ctlr); +} + +static void +uhcireset(Ctlr *ctlr) +{ + int i; + int sof; + + ilock(ctlr); + dprint("uhci %#ux reset\n", ctlr->port); + + /* + * Turn off legacy mode. Some controllers won't + * interrupt us as expected otherwise. + */ + uhcirun(ctlr, 0); + pcicfgw16(ctlr->pcidev, 0xc0, 0x2000); + + OUTS(Usbintr, 0); + sof = INB(SOFmod); + uhcicmd(ctlr, Cgreset); /* global reset */ + delay(Resetdelay); + uhcicmd(ctlr, 0); /* all halt */ + uhcicmd(ctlr, Chcreset); /* controller reset */ + for(i = 0; i < 100; i++){ + if((INS(Cmd) & Chcreset) == 0) + break; + delay(1); + } + if(i == 100) + print("uhci %#x controller reset timed out\n", ctlr->port); + OUTB(SOFmod, sof); + iunlock(ctlr); +} + +static void +setdebug(Hci*, int d) +{ + debug = d; +} + +static void +shutdown(Hci *hp) +{ + Ctlr *ctlr; + + ctlr = hp->aux; + + ilock(ctlr); + uhcirun(ctlr, 0); + delay(100); + iunlock(ctlr); +} + +static int +reset(Hci *hp) +{ + static Lock resetlck; + int i; + Ctlr *ctlr; + Pcidev *p; + + if(getconf("*nousbuhci")) + return -1; + + ilock(&resetlck); + scanpci(); + + /* + * Any adapter matches if no hp->port is supplied, + * otherwise the ports must match. + */ + ctlr = nil; + for(i = 0; i < Nhcis && ctlrs[i] != nil; i++){ + ctlr = ctlrs[i]; + if(ctlr->active == 0) + if(hp->port == 0 || hp->port == ctlr->port){ + ctlr->active = 1; + break; + } + } + iunlock(&resetlck); + if(ctlrs[i] == nil || i == Nhcis) + return -1; + + p = ctlr->pcidev; + hp->aux = ctlr; + hp->port = ctlr->port; + hp->irq = p->intl; + hp->tbdf = p->tbdf; + hp->nports = 2; /* default */ + + uhcireset(ctlr); + uhcimeminit(ctlr); + + /* + * Linkage to the generic HCI driver. + */ + hp->init = init; + hp->dump = dump; + hp->interrupt = interrupt; + hp->epopen = epopen; + hp->epclose = epclose; + hp->epread = epread; + hp->epwrite = epwrite; + hp->seprintep = seprintep; + hp->portenable = portenable; + hp->portreset = portreset; + hp->portstatus = portstatus; + hp->shutdown = shutdown; + hp->debug = setdebug; + hp->type = "uhci"; + return 0; +} + +void +usbuhcilink(void) +{ + addhcitype("uhci", reset); +} diff -Nru 0/sys/src/nix/k10/vsvm.c 4/sys/src/nix/k10/vsvm.c --- 0/sys/src/nix/k10/vsvm.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/k10/vsvm.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,196 @@ +/* + * Vestigial Segmented Virtual Memory. + * To do: + * dynamic allocation and free of descriptors; + * IST should perhaps point to a different handler; + * user-level descriptors (if not dynamic). + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "amd64.h" +#include "ureg.h" + +typedef struct Gd Gd; +typedef u64int Sd; +typedef u16int Ss; +typedef struct Tss Tss; + +struct Gd { + Sd sd; + u64int hi; +}; + +struct Tss { + u32int _0_; + u32int rsp0[2]; + u32int rsp1[2]; + u32int rsp2[2]; + u32int _28_[2]; + u32int ist[14]; + u16int _92_[5]; + u16int iomap; +}; + +enum { + Ngdt = 16, /* max. entries in gdt */ + Nidt = 256, /* max. entries in idt */ +}; + +static Sd gdt64[Ngdt] = { + 0ull, /* NULL descriptor */ + SdL|SdP|SdDPL0|SdS|SdCODE, /* CS */ + SdG|SdD|SdP|SdDPL0|SdS|SdW, /* DS */ + SdG|SdD|SdP|SdDPL3|SdS|SdCODE|SdR|Sd4G, /* User CS 32-bit */ + SdG|SdD|SdP|SdDPL3|SdS|SdW|Sd4G, /* User DS */ + SdL|SdP|SdDPL3|SdS|SdCODE, /* User CS 64-bit */ + + 0ull, /* FS */ + 0ull, /* GS */ + + 0ull, /* TSS lower */ + 0ull, /* TSS upper */ +}; +static int ngdt64 = 10; + +static Gd idt64[Nidt]; +static Gd acidt64[Nidt]; /* NIX application core IDT */ + +static Sd +mksd(u64int base, u64int limit, u64int bits, u64int* upper) +{ + Sd sd; + + sd = bits; + sd |= (((limit & 0x00000000000f0000ull)>>16)<<48) + |(limit & 0x000000000000ffffull); + sd |= (((base & 0x00000000ff000000ull)>>24)<<56) + |(((base & 0x0000000000ff0000ull)>>16)<<32) + |((base & 0x000000000000ffffull)<<16); + if(upper != nil) + *upper = base>>32; + + return sd; +} + +static void +mkgd(Gd* gd, u64int offset, Ss ss, u64int bits, int ist) +{ + Sd sd; + + sd = bits; + sd |= (((offset & 0x00000000ffff0000ull)>>16)<<48) + |(offset & 0x000000000000ffffull); + sd |= ((ss & 0x000000000000ffffull)<<16); + sd |= (ist & (SdISTM>>32))<<32; + gd->sd = sd; + gd->hi = offset>>32; +} + +static void +idtinit(Gd *gd, uintptr offset) +{ + int ist, v; + u64int dpl; + + for(v = 0; v < Nidt; v++){ + ist = 0; + dpl = SdP|SdDPL0|SdIG; + switch(v){ + default: + break; + case IdtBP: /* #BP */ + dpl = SdP|SdDPL3|SdIG; + break; + case IdtUD: /* #UD */ + case IdtDF: /* #DF */ + ist = 1; + break; + } + mkgd(gd, offset, SSEL(SiCS, SsTIGDT|SsRPL0), dpl, ist); + gd++; + offset += 6; + } +} + +void +tssrsp0(uintptr sp) +{ + Tss *tss; + + tss = m->tss; + tss->rsp0[0] = sp; + tss->rsp0[1] = sp>>32; +} + +static void +tssinit(uintptr sp) +{ + int ist; + Tss *tss; + + tss = m->tss; + memset(tss, 0, sizeof(Tss)); + + tssrsp0(sp); + + sp = PTR2UINT(m->vsvm+PGSZ); + for(ist = 0; ist < 14; ist += 2){ + tss->ist[ist] = sp; + tss->ist[ist+1] = sp>>32; + } + tss->iomap = 0xdfff; +} + +void +vsvminit(int size, int nixrole) +{ + Sd *sd; + u64int r; + + if(m->machno == 0){ + idtinit(idt64, PTR2UINT(idthandlers)); + idtinit(acidt64, PTR2UINT(acidthandlers)); + } + + m->gdt = m->vsvm; + memmove(m->gdt, gdt64, sizeof(gdt64)); + m->tss = &m->vsvm[ROUNDUP(sizeof(gdt64), 16)]; + + sd = &((Sd*)m->gdt)[SiTSS]; + *sd = mksd(PTR2UINT(m->tss), sizeof(Tss)-1, SdP|SdDPL0|SdaTSS, sd+1); + + tssinit(m->stack+size); + + gdtput(sizeof(gdt64)-1, PTR2UINT(m->gdt), SSEL(SiCS, SsTIGDT|SsRPL0)); + if(nixrole != NIXAC) + idtput(sizeof(idt64)-1, PTR2UINT(idt64)); + else + idtput(sizeof(acidt64)-1, PTR2UINT(acidt64)); + trput(SSEL(SiTSS, SsTIGDT|SsRPL0)); + + wrmsr(FSbase, 0ull); + wrmsr(GSbase, PTR2UINT(&sys->machptr[m->machno])); + wrmsr(KernelGSbase, 0ull); + + r = rdmsr(Efer); + r |= Sce; + wrmsr(Efer, r); + r = ((u64int)SSEL(SiU32CS, SsRPL3))<<48; + r |= ((u64int)SSEL(SiCS, SsRPL0))<<32; + wrmsr(Star, r); + if(nixrole != NIXAC) + wrmsr(Lstar, PTR2UINT(syscallentry)); + else + wrmsr(Lstar, PTR2UINT(acsyscallentry)); + wrmsr(Sfmask, If); +} + +int +userureg(Ureg* ureg) +{ + return ureg->cs == SSEL(SiUCS, SsRPL3); +} diff -Nru 0/sys/src/nix/mk/bootmkfile 4/sys/src/nix/mk/bootmkfile --- 0/sys/src/nix/mk/bootmkfile Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/mk/bootmkfile Wed Feb 6 00:00:00 2013 @@ -0,0 +1,33 @@ +BOOTDIR=../boot +BOOTLIB=$BOOTDIR/libboot.a$O + +BOOTFILES=\ + bootauth.$O\ + aux.$O\ + boot.$O\ + bootcache.$O\ + bootip.$O\ + local.$O\ + embed.$O\ + settime.$O\ + paq.$O\ + printstub.$O\ + parts.$O\ + +$BOOTLIB(%.$O):N: %.$O + +$BOOTLIB: ${BOOTFILES:%=$BOOTLIB(%)} + names=`{membername $newprereq} + ar vu $BOOTLIB $names + rm $names + +$BOOTFILES: $BOOTDIR/boot.h + +%.$O: $BOOTDIR/%.c + $CC -I$BOOTDIR $CFLAGS $BOOTDIR/$stem.c + +boot$CONF.out: ../mk/parse $CONF print.$O $BOOTDIR/boot.c $BOOTLIB + awk -f ../mk/parse -- -mkbootconf $CONF > boot$CONF.c + $CC $CFLAGS boot$CONF.c + $CC $CFLAGS ../boot/printstub.c + $LD -o boot$CONF.out boot$CONF.$O $BOOTLIB printstub.$O diff -Nru 0/sys/src/nix/mk/mkenum 4/sys/src/nix/mk/mkenum --- 0/sys/src/nix/mk/mkenum Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/mk/mkenum Wed Feb 6 00:00:00 2013 @@ -0,0 +1,59 @@ +#!/bin/rc + +awk ' +BEGIN{ + oargc = 0; + for(argc = 1; argc < ARGC; argc++){ + if(ARGV[argc] !~ /^-.+/ || ARGV[argc] ~ /--/) + break; + if(ARGV[argc] != "-D") + oargv[ARGV[argc]] = oargc++; + else + DEBUG = 1; + ARGV[argc] = ""; + } +} + +/^enum([ \t]*{|$)/{ + inenum = 1; + if(DEBUG) + printf "inenum = 1\n"; + next; +} + +inenum && /^};$/{ + if(DEBUG) + printf "inenum = 0\n"; + inenum = 0; +} + +inenum && $0 ~ /^[ \t]+[_A-Za-z][_0-9A-Za-z]+[ \t]+=[ \t]+[0-9A-Z_a-z()<> ]+,/{ + tab = "\t"; + if(length($1) < 8) + sep = tab tab; + else + sep = tab; + split($3, a, ","); + printf "#define %s%s%s", $1, sep, a[1]; + if(match($0, /\/\*.*\*\/$/)){ + len = length(a[1]); + sep = ""; + while(len < 24){ + sep = sep tab; + len += 8; + } + printf "%s%s", sep, substr($0, RSTART); + } + printf "\n" +} + +!inenum && /^#(define|include) /{ + printf "%s\n", $0; +} + +/^$/{ + printf "\n"; +} + +END{ +}' $* diff -Nru 0/sys/src/nix/mk/mkroot 4/sys/src/nix/mk/mkroot --- 0/sys/src/nix/mk/mkroot Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/mk/mkroot Wed Feb 6 00:00:00 2013 @@ -0,0 +1,15 @@ +#!/bin/rc + +rfork e +echo mkroot $* +if(! ~ $#* 2){ + echo usage: mkroot path name >[2=1] + exit 1 +} +n=`{basename $1} +cp $1 $2.out +t=`{file $2.out} +if(~ $"t *executable*) + strip $2.out +aux/data2s $2 < $2.out > $2.root.s +echo mkroot $* done diff -Nru 0/sys/src/nix/mk/mkrootall 4/sys/src/nix/mk/mkrootall --- 0/sys/src/nix/mk/mkrootall Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/mk/mkrootall Wed Feb 6 00:00:00 2013 @@ -0,0 +1,31 @@ +#!/bin/rc + +rfork e +n=`{echo $#*^'%3' | hoc} +if(! ~ $n 0){ + echo 'usage: mkrootall [name cname file]...' >[1=2] + exit usage +} + +tmp=mkroot.$pid.out +fn sigexit { + rm -f $tmp +} + +allcname=() +while(! ~ $#* 0){ + name=$1 + cname=$2 + file=$3 + shift + shift + shift + allcname=($allcname $cname) + cp $file $tmp + t=`{file $tmp} + # do not strip venti - it uses its own symbols + if(~ $"t *executable* && ! ~ $name venti) + strip $tmp + aux/data2s $cname < $tmp +} +exit 0 diff -Nru 0/sys/src/nix/mk/mkrr 4/sys/src/nix/mk/mkrr --- 0/sys/src/nix/mk/mkrr Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/mk/mkrr Wed Feb 6 00:00:00 2013 @@ -0,0 +1,45 @@ +#!/bin/rc + +rfork en + +switch($#*){ +case 1 + PROTO=$1.proto +case 2 + PROTO=$2 +case * + echo $0: usage: $0 conf [proto] + exit "usage" +} + +ramfs -S ramfs.$pid +mount -c /srv/ramfs.$pid /tmp +mkdir /tmp/mnt /tmp/empty + +# clean up files and procs on exit +fn sigexit { + echo sync>>/srv/flcons.$pid + unmount /tmp/mnt + unmount /tmp + echo halt>>/srv/flcons.$pid + rm -f /srv/*.$pid + kill ramfs fossil|rc +} + +{syscall seek 1 8388608 0; echo} >>/tmp/fldisk |[0=2] grep -v 'no error$' +fossil/flfmt -b 4096 -y /tmp/fldisk + +fossil/conf -w /tmp/fldisk < 0){ + if(/^[ \t]*$/ || /^#/) + continue; + + if(/^[^ \t]/){ + #section[$1] = 0; + tag = $1; + } + if(!tag) + continue; + sub(/^[ \t]*/, ""); + line[tag, section[tag]++] = $0; + } + + o = ""; + if(!oargc || ("-mkdevlist" in oargv)){ + s = mkdevlist(); + if(!("-mkdevlist" in oargv) || (oargc > 1)) + s = "DEVS=" s; + o = o s "\n"; + } + if((!oargc || ("-mkmach" in oargv)) && (objtype in section)){ + s = mkmach(); + if(!("-mkmach" in oargv) || (oargc > 1)) + s = "MACH=" s; + o = o s "\n"; + } + if((!oargc || ("-mklib" in oargv)) && ("lib" in section)){ + s = mklib(); + if(!("-mklib" in oargv) || (oargc > 1)) + s = "LIB=" s; + o = o s "\n"; + } + if((!oargc || ("-mkport" in oargv) ) && ("port" in section)){ + s = mkport(); + if(!("-mkport" in oargv) || (oargc > 1)) + s = "PORT=" s; + o = o s "\n"; + } + if("dbgflg" in section){ + for(i = 1; i < section["dbgflg"]; i++){ + n = split(line["dbgflg", i], a); + if(n < 2 || n > 4 || a[2] !~ /'[a-zA-Z]'/) + continue; + if(n > 2 && a[3] !~ /'[a-zA-Z]'/) + continue; + if(n == 4 && (a[4] < 1 || a[4] >= 128)) + continue; + dbgc[a[1]] = a[2]; + if(n == 4) + dbgflg[a[3]] = a[4]; + else if(n == 3) + dbgflg[a[3]] = 1; + } + } + if((!oargc || ("-mkrules" in oargv)) && ("dir" in section)){ + o = o mkrules(".", exists, a, c, "-I."); + for(i = 1; i < section["dir"]; i++){ + n = split(line["dir", i], a); + dir = "../" a[1]; + if(n == 1) + a[2] = "-I."; + s = a[2]; + o = o mkrules(dir, exists, a, c, s); + l = listolate(a, "|"); + if(l != ""){ + o = o "^(" l ")\\.$O:R: " dir "/\\1.s\n"; + o = o "\t$AS $AFLAGS " s " " dir "/$stem1.s\n"; + } + l = listolate(c, "|"); + if(l != ""){ + o = o "^(" l ")\\.$O:R: " dir "/\\1.c\n"; + o = o "\t$CC $CFLAGS " s " " dir "/$stem1.c\n"; + } + } + } + if((!oargc || ("-mkrootrules" in oargv)) && ("rootdir" in section)){ + mkrootrules(name, cname, src); + s = ARGV[argc] ".root.s:D:"; + for(i = 1; i < section["rootdir"]; i++) + s = s " " src[i]; + s = s "\n\t../mk/mkrootall\\\n"; + for(i = 1; i < section["rootdir"]; i++) + s = s "\t\t" name[i] " " cname[i] " " src[i] "\\\n"; + s = s "\t>$target\n"; + if(section["rootdir"] > 1) + o = o s; + } + if((!oargc || ("-mkrrrules" in oargv)) && ("rr" in section)){ + n = split(line["rr", 0], a); + if(n == 1) + a[2] = ARGV[argc] ".proto"; + s = "$CONF.rr:\t../mk/mkrr $CONF " a[2] "\n"; + s = s "\t../mk/mkrr $CONF " a[2] "\n"; + for(i = 1; i < section["rr"]; i++) + s = s "$CONF.rr:\t" line["rr", i] "\n"; + o = o s; + } + if("-mkdevc" in oargv) + o = o mkdevc(); + if("-mkerrstr" in oargv) + o = o mkerrstr(); + if("-mksystab" in oargv) + o = o mksystab(); + if("-mkbootconf" in oargv) + o = o mkbootconf(); + + # + # to do: + # bootmkfile + # mkrootall (can it be done at all?) + # + printf o; + + exit 0; +} + +function mkbootconf( a, n, s, t, u, c, d, p, r){ + s = "#include \n"; + s = s "#include \n\n"; + s = s "#include \"../boot/boot.h\"\n\n"; + s = s "Method method[] = {\n"; + + c = "0"; + d = "#S/sdC0/"; + p = "boot"; + r = "/root"; + + for(i = 0; i < section["boot"]; i++){ # NOTE: start at 0 + n = split(line["boot", i], a); + if(a[1] == "boot"){ + if(a[2] == "cpu"){ + c = "1"; + if(n == 4 && a[3] == "boot") + d = a[4]; + } + else if(a[2] == "rootdir" && n == 3) + r = a[3]; + else if(a[2] ~ /^(bboot|dosboot|romboot)$/){ + c = "1"; + p = a[2]; + } + else if(a[2] == "boot" && n == 3) + d = a[3]; + continue; + } + s = s "\t{ \"" a[1] "\", config" a[1] ", connect" a[1] ", "; + t = "nil"; + if(n > 1){ + u = line["boot", i]; + if(sub(/^[_A-Za-z][_A-Za-z0-9]*[ \t]*/, "", u)){ + if(match(u, /^".*"$/)) + u = substr(u, RSTART+1, RLENGTH-2); + t = "\"" u "\""; + } + } + s = s t ", },\n"; + } + s = s "\t{ nil },\n};\n\n"; + s = s "int cpuflag = " c ";\n"; + s = s "char* rootdir = \"" r "\";\n"; + s = s "char* bootdisk = \"" d "\";\n"; + s = s "extern void " p "(int, char**);\n\n"; + s = s "void\nmain(int argc, char **argv)\n"; + s = s "{\n\t" p "(argc, argv);\n}\n" + + t = "int (*cfs)(int) = 0;\n"; + for(i = 1; i < section["rootdir"]; i++){ + if($1 !~ /\/bin\/cfs$/) + continue; + t = "int (*cfs)(int) = cache;\n"; + break; + } + s = s t; + + return s; +} + +function mksystab( a, i, f, n, s, t){ + s = "#include \"u.h\"\n"; + s = s "#include \"../port/lib.h\"\n"; + s = s "#include \"mem.h\"\n"; + s = s "#include \"dat.h\"\n"; + s = s "#include \"fns.h\"\n\n"; + s = s "#include \"/sys/src/libc/9syscall/sys.h\"\n\n"; + + t = ""; + while(getline < "/sys/src/libc/9syscall/sys.h"){ + if($1 != "#define" || NF != 3) + continue; + + f = "sys" tolower($2); + if($2 == "SYSR1") + f = "sysr1"; + if($2 == "RENDEZVOUS") + n = "Rendez"; + else if($2 == "BRK_") + n = "Brk"; + else + n = substr($2, 1, 1) tolower(substr($2, 2)); + + s = s "extern void " f "(Ar0*, va_list);\n"; + t = t "\t[" $2 "]\t"; + if(length($2) < 6) + t = t "\t"; + t = t "{ \"" n "\", " f ", "; + # + # The following should really be defined properly in the + # manual and code, but changing Plan 9 now is too awkward. + # It will matter more when sizeof(long) != sizeof(int). + # + # if($2 ~ "(FVERSION|STAT|FSTAT|WSTAT|FWSTAT|AWAIT)") + # t = t "{ .u = 0 } },\n"; + # + # if($2 ~ "(BIND|_MOUNT|MOUNT)") + # t = t "{ .l = -1 } },\n"; + # + if($2 ~ "(EXEC|SEGBRK|SEGATTACH|RENDEZVOUS)") + t = t "{ .v = (void*)-1 } },\n"; + else if($2 ~ "(ALARM|_READ|_WRITE|PREAD|PWRITE)") + t = t "{ .l = -1 } },\n"; + else + t = t "{ .i = -1 } },\n"; + } + if("syscall" in section){ + for(i = 1; i < section["syscall"]; i++){ + if(split(line["syscall", i], a) != 8) + continue; + if(line["syscall", i] !~ /#define.*{ \.[ilpuv] = .* }$/) + continue; + + f = "sys" tolower(a[2]); + n = substr(a[2], 1, 1) tolower(substr(a[2], 2)); + + s = s "\nSyscall " f ";\n"; + t = t a[1] " " a[2] "\t" a[3] "\n\t[" a[2] "]\t"; + if(length(a[2]) < 6) + t = t "\t"; + split(line["syscall", i], a, "{"); + t = t "{ \"" n "\", " f ", {" a[2] " },\n"; + } + } + s = s "struct {\n\tchar*\tn;\n\tvoid (*f)(Ar0*, va_list);\n\tAr0\tr;\n}"; + s = s " systab[] = {\n" t "};\n\nint nsyscall = nelem(systab);\n"; + + return s; +} + +function mkerrstr( a, s){ + FS="[ \t;]+"; + while(getline < "../port/error.h"){ + split($0, a, /\/\* | \*\//); + s = s $2 " " $3 " = \"" a[2] "\";\n"; + } + FS=" "; + + return s; +} + +function mkdevc( a, d, i, m, n, s, t, u, name, cname){ + s = "#include \"u.h\"\n"; + s = s "#include \"../port/lib.h\"\n"; + s = s "#include \"mem.h\"\n"; + s = s "#include \"dat.h\"\n"; + s = s "#include \"fns.h\"\n"; + s = s "#include \"../port/error.h\"\n\n"; + s = s "#include \"io.h\"\n\n"; + + t = ""; + for(i = 1; i < section["dev"]; i++){ + split(line["dev", i], a); + s = s "extern Dev " a[1] "devtab;\n"; + t = t "\t&" a[1] "devtab,\n"; + d[a[1]]++; + } + s = s "Dev* devtab[] = {\n" t "\tnil,\n};\n\n"; + + mkrootrules(name, cname, m); + t = ""; + for(i = 1; i < section["rootdir"]; i++){ + s = s "extern uchar " cname[i] "code[];\n"; + s = s "extern usize " cname[i] "len;\n"; + t = t "\taddbootfile(\"" name[i] "\", " cname[i] "code, " cname[i] "len);\n"; + } + for(i = 1; i < section["link"]; i++){ + split(line["link", i], a); + s = s "extern void " a[1] "link(void);\n"; + t = t "\t" a[1] "link();\n"; + } + s = s "void\nlinks(void)\n{\n" t "}\n\n"; + + if("ip" in d && "ip" in section){ + t = ""; + s = s "#include \"../ip/ip.h\"\n"; + for(i = 1; i < section["ip"]; i++){ + split(line["ip", i], a); + s = s "extern void " a[1] "init(Fs*);\n"; + t = t "\t" a[1] "init,\n"; + } + s = s "void (*ipprotoinit[])(Fs*) = {\n" t "\tnil,\n};\n\n"; + } + + if("sd" in d && "sd" in section){ + t = ""; + s = s "#include \"../port/sd.h\"\n"; + for(i = 1; i < section["sd"]; i++){ + split(line["sd", i], a); + s = s "extern SDifc " a[1] "ifc;\n"; + t = t "\t&" a[1] "ifc,\n"; + } + s = s "SDifc* sdifc[] = {\n" t "\tnil,\n};\n\n"; + } + + if("uart" in d && "uart" in section){ + t = ""; + for(i = 1; i < section["uart"]; i++){ + split(line["uart", i], a); + a[1] = substr(a[1], 5, length(a[1])-4) "physuart"; + s = s "extern PhysUart " a[1] ";\n"; + t = t "\t&" a[1] ",\n"; + } + s = s "PhysUart* physuart[] = {\n" t "\tnil,\n};\n\n"; + } + + t = ""; + n = 0; + if("physseg" in section){ + for(i = 1; i < section["physseg"]; i++){ + u = line["physseg", i]; + if(u ~ /^\.[_A-Za-z][_A-Za-z0-9]*/) + t = t "\t"; + t = t "\t" u "\n"; + if(sub(/.*\.pgalloc.*=[^_A-Za-z]*/, "", u)){ + if(match(u, /^[_A-Za-z][_A-Za-z0-9]*/)){ + u = substr(u, RSTART, RLENGTH); + s = s "extern Page *(*" u ")(Segment*, uintptr);\n"; + } + } + else if(sub(/.*\.pgfree.*=[^_A-Za-z]*/, "", u)){ + if(match(u, /^[_A-Za-z][_A-Za-z0-9]*/)){ + u = substr(u, RSTART, RLENGTH); + s = s "extern void (*" u ")(Page*);\n"; + } + } + if(match(u, /}/)) + n++; + } + } + s = s "Physseg physseg[" n+8 "] = {\n"; + s = s "\t{\t.attr\t= SG_SHARED,\n"; + s = s "\t\t.name\t= \"shared\",\n"; + s = s "\t\t.size\t= SEGMAXPG,\n\t},\n"; + s = s "\t{\t.attr\t= SG_BSS,\n"; + s = s "\t\t.name\t= \"memory\",\n"; + s = s "\t\t.size\t= SEGMAXPG,\n\t},\n"; + s = s t "};\nint nphysseg = " n+8 ";\n\n"; + + s = s "char dbgflg[256]"; + t = ""; + for(u in dbgflg) + t = t "\t[" u "]\t" dbgflg[u] ",\n"; + if(t != "") + s = s " = {\n" t "}"; + s = s ";\n\n"; + + for(i in m) + delete m[i]; + + for(i = 1; i < section["misc"]; i++){ + split(line["misc", i], a); + m[a[1]] = line["misc", i]; + } + if("cache" in m){ + s = s "extern void cinit(void);\n"; + s = s "extern void copen(Chan*);\n"; + s = s "extern int cread(Chan*, uchar*, int, vlong);\n"; + s = s "extern void cupdate(Chan*, uchar*, int, vlong);\n"; + s = s "extern void cwrite(Chan*, uchar*, int, vlong);\n\n"; + s = s "void (*mfcinit)(void) = cinit;\n"; + s = s "void (*mfcopen)(Chan*) = copen;\n"; + s = s "int (*mfcread)(Chan*, uchar*, int, vlong) = cread;\n"; + s = s "void (*mfcupdate)(Chan*, uchar*, int, vlong) = cupdate;\n"; + s = s "void (*mfcwrite)(Chan*, uchar*, int, vlong) = cwrite;\n\n"; + } + else{ + s = s "void (*mfcinit)(void) = nil;\n"; + s = s "void (*mfcopen)(Chan*) = nil;\n"; + s = s "int (*mfcread)(Chan*, uchar*, int, vlong) = nil;\n"; + s = s "void (*mfcupdate)(Chan*, uchar*, int, vlong) = nil;\n"; + s = s "void (*mfcwrite)(Chan*, uchar*, int, vlong) = nil;\n\n"; + } + if(!("rdb" in misc)){ + s = s "void\n"; + s = s "rdb(void)\n"; + s = s "{\n"; + s = s "\tsplhi();\n"; + s = s "\tiprint(\"rdb...not installed\\n\");\n"; + s = s "\tfor(;;);\n"; + s = s "}\n\n"; + } + if(objtype == "power"){ + for(i = 1; i < section[objtype]; i++){ + split(line[objtype, i], a); + m[a[1]] = line[objtype, i]; + } + if(!("cnksyscall" in m)){ + s = s "void\n"; + s = s "cnksyscall(Ureg*)\n"; + s = s "{\n"; + s = s "\tpanic(\"cnkemu...not installed\\n\");\n"; + s = s "\tfor(;;);\n"; + s = s "}\n\n"; + s = s "void*\n"; + s = s "cnksysexecregs(uintptr, ulong, ulong)\n"; + s = s "{\n"; + s = s "\tpanic(\"cnkemu...not installed\\n\");\n"; + s = s "\tfor(;;);\n"; + s = s "}\n\n"; + } + } + if("conf" in section){ + for(i = 1; i < section["conf"]; i++) + s = s line["conf", i] "\n"; + s = s "\n"; + } + t = "."; + while("pwd" | getline > 0){ + if($0 ~ /^\//) + t = $0; + } + s = s "char* conffile = \"" t "/" ARGV[argc] "\";\n"; + s = s "ulong kerndate = KERNDATE;\n"; + + return s; +} + +function mkrootrules(name, cname, src, a, i, n){ + for(i = 1; i < section["rootdir"]; i++){ + n = split(line["rootdir", i], a); + if(n >= 2) + name[i] = a[2]; + else + name[i] = a[1]; + sub(/.*\//, "", name[i]); + cname[i] = a[1]; + gsub(/[^a-zA-Z0-9_]/, "_", cname[i]); + src[i] = a[1]; + } +} + +function mkrules(dir, exists, ameta, cmeta, flags, f, i, s, t){ + for(i in ameta) + delete ameta[i]; + for(i in cmeta) + delete cmeta[i]; + + s = ""; + while("cd " dir "; /bin/ls *.[cs]" | getline > 0){ + if($0 !~ /^[A-Za-z0-9]*\.[cs]$/) + continue; + f = $0; + if(!sub(/\.[cs]$/, "")) + continue; + if($0 in exists) + continue; + exists[$0] = dir; + if(f ~ /\.c$/){ + if(!($0 in dbgc)){ + cmeta[$0]++; + continue; + } + t = "$CC $CFLAGS " flags; + } + else{ + if(!($0 in dbgc)){ + ameta[$0]++; + continue; + } + t = "$AS $AFLAGS " flags; + } + s = s $0 ".$O:\t" dir "/" f "\n"; + s = s "\t" t " -D'_DBGC_='" dbgc[$0] "'' " dir "/" f "\n"; + } + return s; +} + +function mkport( array){ + arrayify(array, "port", "", ".$O", 1); + + return listolate(array, " "); +} + +function mklib( array){ + arrayify(array, "lib", "/$objtype/lib/", ".a", 1); + + return listolate(array," "); +} + +function mkmach( a, i, s){ + s = ""; + for(i = 1; i < section[objtype]; i++){ + if(!split(line[objtype, i], a)) + continue; + if(s == "") + s = a[1] ".$O"; + else + s = s " " a[1] ".$O"; + } + + return s; +} + +function mkdevlist( a, array, i, j, n, s){ + for(s in section){ + if(line[s, 0] !~ /[ \t]\+dev[^_A-Za-z0-9]*/) + continue; + if(s == "dev") + arrayify(array, s, "dev", ".$O", 1); + else if(s == objtype) + arrayify(array, s, "", ".$O", 0); + else + arrayify(array, s, "", ".$O", 1); + } + + return listolate(array, " "); +} + +function listolate(array, sep, a, s){ + s = ""; + for(a in array){ + if(s == "") + s = a; + else + s = a sep s; + } + + return s; +} + +function arrayify(array, tag, prefix, suffix, one, a, i, j, n){ + for(i = 1; i < section[tag]; i++){ + n = split(line[tag, i], a); + if(one) + array[prefix a[1] suffix]++; + for(j = 2; j <= n; j++){ + if(a[$j] ~ /[+=-].*/) + continue; + array[a[j] suffix]++; + } + } +} diff -Nru 0/sys/src/nix/mk/portmkfile 4/sys/src/nix/mk/portmkfile --- 0/sys/src/nix/mk/portmkfile Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/mk/portmkfile Wed Feb 6 00:00:00 2013 @@ -0,0 +1,128 @@ +%.$O: %.s + $AS $AFLAGS $stem.s + +%.$O: %.c + $CC $CFLAGS $stem.c + +%.m: %.$O + $LD -o $target -uX -l $prereq + +%.acid: %.c + $CC $CFLAGS -a $stem.c >$stem.acid + +%.acid: ../ip/%.c + $CC $CFLAGS -a -I. ../ip/$stem.c >$stem.acid + +%.acid: ../port/%.c + $CC $CFLAGS -a -I. ../port/$stem.c >$stem.acid + +%.db: main.$O + $CC -s$stem main.c | dbfmt > $stem.db + +%.$O: /$objtype/include/u.h +%.$O: ../port/lib.h +%.$O: mem.h +%.$O: dat.h ../port/portdat.h +%.$O: fns.h ../port/portfns.h + +alloc.$O: /sys/include/pool.h +chan.$O: ../port/error.h +dev.$O: ../port/error.h +devcap.$O: ../port/error.h +devcap.$O: /sys/include/libsec.h +devcons.$O: /sys/include/authsrv.h /sys/include/pool.h +devdup.$O: ../port/error.h +devenv.$O: ../port/error.h +devkprof.$O: ../port/error.h +devmnt.$O: ../port/error.h +devpipe.$O: ../port/error.h +devprobe.$O: ../port/netif.h probe.h +devproc.$O: ../port/error.h ../port/edf.h +devproc.$O: /sys/include/tos.h /sys/include/trace.h /$objtype/include/ureg.h +devsd.$O: ../port/error.h ../port/sd.h +devsrv.$O: ../port/error.h +devssl.$O: ../port/error.h +devssl.$O: /sys/include/libsec.h +devtab.$O: ../port/error.h +devtls.$O: ../port/error.h +devtls.$O: /sys/include/libsec.h +devuart.$O: ../port/error.h +devwd.$O: ../port/error.h +edf.$O: ../port/error.h ../port/edf.h +edf.$O: /sys/include/trace.h +ethermii.$O: ../port/ethermii.h ../port/netif.h +fault.$O: ../port/error.h +image.$O: ../port/error.h +initcode.$O: /sys/include/libc.h +latin1.$O: ../port/latin1.h +netif.$O: ../port/error.h ../port/netif.h +parse.$O: ../port/error.h +pgrp.$O: ../port/error.h +portclock.$O: /$objtype/include/ureg.h +proc.$O: ../port/error.h ../port/edf.h errstr.h +proc.$O: /sys/include/trace.h +qio.$O: ../port/error.h +rdb.$O: /$objtype/include/ureg.h +rebootcmd.$O: ../port/error.h +rebootcmd.$O: /sys/include/a.out.h +segment.$O: ../port/error.h +swap.$O: ../port/error.h +sysauth.$O: ../port/error.h +sysauth.$O: /sys/include/authsrv.h +sysfile.$O: ../port/error.h +sysproc.$O: ../port/error.h ../port/edf.h +sysproc.$O: /sys/include/a.out.h +sysseg.$O: ../port/error.h +taslock.$O: ../port/edf.h + +../port/latin1.h: /lib/keyboard + aux/mklatinkbd /lib/keyboard > $target + +../port/systab.c: ../mk/parse /sys/src/libc/9syscall/sys.h + awk -f ../mk/parse -- -mksystab /sys/src/libc/9syscall/sys.h $CONF > $target + +systab.$O: ../port/systab.c + $CC $CFLAGS -I. ../port/systab.c + +errstr.h: ../mk/parse ../port/error.h + awk -f ../mk/parse -- -mkerrstr > $target + +init.h: init.out + {echo 'uchar initcode[]={' + xd -1x $prereq | sed -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g' + echo '};'} > init.h + +$CONF.$O: $CONF.c + $CC $CFLAGS '-DKERNDATE='`{date -n} $CONF.c + +$CONF.c: ../mk/parse $CONF + awk -f ../mk/parse -- -mkdevc $CONF > $CONF.c + +./root/$O.%: ./root/%.c + @{cd ./root; mk $O.$stem} + +../root/$O.%: ../root/%.c + @{cd ../root; mk $O.$stem} + +all:V: + for(i in $CONFLIST) + mk 'CONF='$i + +installall:V: + for(i in $CONFLIST) + mk 'CONF='$i install + +%.clean:V: + rm -f $stem.c [9bz]$stem 9$stem.elf [9bz]$stem.gz boot$stem.* + +clean:V: + rm -f *.[$OS] *.root.[cs] *.out *.m *.acid errstr.h init.h $objtype^l.h + for(i in $CONFLIST) + mk $i.clean + @{cd ../root; mk clean} + if(test -d ./root) @{cd ./root; mk clean}; status='' + +nuke:V: clean + rm -f ../boot/libboot.a$O *.elf *.rr + @{cd ../root; mk clean nuke} + if(test -d ./root) @{cd ./root; mk clean nuke}; status='' diff -Nru 0/sys/src/nix/port/alarm.c 4/sys/src/nix/port/alarm.c --- 0/sys/src/nix/port/alarm.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/alarm.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,101 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +static Alarms alarms; +static Rendez alarmr; + +void +alarmkproc(void*) +{ + Proc *rp; + ulong now; + + for(;;){ + now = sys->ticks; + qlock(&alarms); + while((rp = alarms.head) && rp->alarm <= now){ + if(rp->alarm != 0L){ + if(canqlock(&rp->debug)){ + if(!waserror()){ + postnote(rp, 0, "alarm", NUser); + poperror(); + } + qunlock(&rp->debug); + rp->alarm = 0L; + }else + break; + } + alarms.head = rp->palarm; + } + qunlock(&alarms); + + sleep(&alarmr, return0, 0); + } +} + +/* + * called every clock tick + */ +void +checkalarms(void) +{ + Proc *p; + ulong now; + + p = alarms.head; + now = sys->ticks; + + if(p && p->alarm <= now) + wakeup(&alarmr); +} + +ulong +procalarm(ulong time) +{ + Proc **l, *f; + ulong when, old; + + if(up->alarm) + old = tk2ms(up->alarm - sys->ticks); + else + old = 0; + if(time == 0) { + up->alarm = 0; + return old; + } + when = ms2tk(time)+sys->ticks; + + qlock(&alarms); + l = &alarms.head; + for(f = *l; f; f = f->palarm) { + if(up == f){ + *l = f->palarm; + break; + } + l = &f->palarm; + } + + up->palarm = 0; + if(alarms.head) { + l = &alarms.head; + for(f = *l; f; f = f->palarm) { + if(f->alarm > when) { + up->palarm = f; + *l = up; + goto done; + } + l = &f->palarm; + } + *l = up; + } + else + alarms.head = up; +done: + up->alarm = when; + qunlock(&alarms); + + return old; +} diff -Nru 0/sys/src/nix/port/allocb.c 4/sys/src/nix/port/allocb.c --- 0/sys/src/nix/port/allocb.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/allocb.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,173 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +enum +{ + Hdrspc = 64, /* leave room for high-level headers */ + Bdead = 0x51494F42, /* "QIOB" */ +}; + +struct +{ + Lock; + ulong bytes; +} ialloc; + +static Block* +_allocb(int size) +{ + Block *b; + uchar *p; + int n; + + n = BLOCKALIGN + ROUNDUP(size+Hdrspc, BLOCKALIGN) + sizeof(Block); + if((p = malloc(n)) == nil) + return nil; + + b = (Block*)(p + n - sizeof(Block)); /* block at end of allocated space */ + b->base = p; + b->next = nil; + b->list = nil; + b->free = 0; + b->flag = 0; + + /* align base and bounds of data */ + b->lim = (uchar*)(PTR2UINT(b) & ~(BLOCKALIGN-1)); + + /* align start of writable data, leaving space below for added headers */ + b->rp = b->lim - ROUNDUP(size, BLOCKALIGN); + b->wp = b->rp; + + if(b->rp < b->base || b->lim - b->rp < size) + panic("_allocb"); + + return b; +} + +Block* +allocb(int size) +{ + Block *b; + + /* + * Check in a process and wait until successful. + * Can still error out of here, though. + */ + if(up == nil) + panic("allocb without up: %#p\n", getcallerpc(&size)); + if((b = _allocb(size)) == nil){ + mallocsummary(); + panic("allocb: no memory for %d bytes\n", size); + } + + return b; +} + +Block* +iallocb(int size) +{ + Block *b; + static int m1, m2, mp; + + if(ialloc.bytes > conf.ialloc){ + if((m1++%10000)==0){ + if(mp++ > 1000){ + active.exiting = 1; + exit(0); + } + iprint("iallocb: limited %lud/%lud\n", + ialloc.bytes, conf.ialloc); + } + return nil; + } + + if((b = _allocb(size)) == nil){ + if((m2++%10000)==0){ + if(mp++ > 1000){ + active.exiting = 1; + exit(0); + } + iprint("iallocb: no memory %lud/%lud\n", + ialloc.bytes, conf.ialloc); + } + return nil; + } + b->flag = BINTR; + + ilock(&ialloc); + ialloc.bytes += b->lim - b->base; + iunlock(&ialloc); + + return b; +} + +void +freeb(Block *b) +{ + void *dead = (void*)Bdead; + uchar *p; + + if(b == nil) + return; + + /* + * drivers which perform non cache coherent DMA manage their own buffer + * pool of uncached buffers and provide their own free routine. + */ + if(b->free) { + b->free(b); + return; + } + if(b->flag & BINTR) { + ilock(&ialloc); + ialloc.bytes -= b->lim - b->base; + iunlock(&ialloc); + } + + p = b->base; + + /* poison the block in case someone is still holding onto it */ + b->next = dead; + b->rp = dead; + b->wp = dead; + b->lim = dead; + b->base = dead; + + free(p); +} + +void +checkb(Block *b, char *msg) +{ + void *dead = (void*)Bdead; + + if(b == dead) + panic("checkb b %s %#p", msg, b); + if(b->base == dead || b->lim == dead || b->next == dead + || b->rp == dead || b->wp == dead){ + print("checkb: base %#p lim %#p next %#p\n", + b->base, b->lim, b->next); + print("checkb: rp %#p wp %#p\n", b->rp, b->wp); + panic("checkb dead: %s\n", msg); + } + + if(b->base > b->lim) + panic("checkb 0 %s %#p %#p", msg, b->base, b->lim); + if(b->rp < b->base) + panic("checkb 1 %s %#p %#p", msg, b->base, b->rp); + if(b->wp < b->base) + panic("checkb 2 %s %#p %#p", msg, b->base, b->wp); + if(b->rp > b->lim) + panic("checkb 3 %s %#p %#p", msg, b->rp, b->lim); + if(b->wp > b->lim) + panic("checkb 4 %s %#p %#p", msg, b->wp, b->lim); +} + +void +iallocsummary(void) +{ + print("ialloc %lud/%lud\n", ialloc.bytes, conf.ialloc); +} diff -Nru 0/sys/src/nix/port/aoe.h 4/sys/src/nix/port/aoe.h --- 0/sys/src/nix/port/aoe.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/aoe.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,78 @@ +/* + * ATA-over-Ethernet (AoE) protocol + */ +enum { + ACata, + ACconfig, +}; + +enum { + AQCread, + AQCtest, + AQCprefix, + AQCset, + AQCfset, +}; + +enum { + AEcmd = 1, + AEarg, + AEdev, + AEcfg, + AEver, +}; + +enum { + Aoetype = 0x88a2, + Aoesectsz = 512, /* standard sector size */ + Aoever = 1, + + AFerr = 1<<2, + AFrsp = 1<<3, + + AAFwrite= 1, + AAFext = 1<<6, +}; + +typedef struct { + uchar dst[Eaddrlen]; + uchar src[Eaddrlen]; + uchar type[2]; + uchar verflag; + uchar error; + uchar major[2]; + uchar minor; + uchar cmd; + uchar tag[4]; + uchar payload[]; +} Aoehdr; + +#define AOEHDRSZ offsetof(Aoehdr, payload[0]) + +typedef struct { + Aoehdr; + uchar aflag; + uchar errfeat; + uchar scnt; + uchar cmdstat; + uchar lba[6]; + uchar res[2]; + uchar payload[]; +} Aoeata; + +#define AOEATASZ offsetof(Aoeata, payload[0]) + +typedef struct { + Aoehdr; + uchar bufcnt[2]; + uchar fwver[2]; + uchar scnt; + uchar verccmd; + uchar cslen[2]; + uchar payload[]; +} Aoeqc; + +#define AOEQCSZ offsetof(Aoeqc, payload[0]) + +extern char Echange[]; +extern char Enotup[]; diff -Nru 0/sys/src/nix/port/cache.c 4/sys/src/nix/port/cache.c --- 0/sys/src/nix/port/cache.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/cache.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,601 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +enum +{ + NHASH = 128, + MAXCACHE = 1024*1024, + NFILE = 4096, + NEXTENT = 200, /* extent allocation size */ +}; + +typedef struct Extent Extent; +struct Extent +{ + int bid; + ulong start; + int len; + Page *cache; + Extent *next; +}; + +typedef struct Mntcache Mntcache; +struct Mntcache +{ + Qid qid; + uint devno; + Dev* dev; + QLock; + Extent *list; + Mntcache *hash; + Mntcache *prev; + Mntcache *next; +}; + +typedef struct Cache Cache; +struct Cache +{ + Lock; + int pgno; + Mntcache *head; + Mntcache *tail; + Mntcache *hash[NHASH]; +}; + +typedef struct Ecache Ecache; +struct Ecache +{ + Lock; + int total; + int free; + Extent* head; +}; + +static Image fscache; +static Cache cache; +static Ecache ecache; +static int maxcache = MAXCACHE; + +static void +extentfree(Extent* e) +{ + lock(&ecache); + e->next = ecache.head; + ecache.head = e; + ecache.free++; + unlock(&ecache); +} + +static Extent* +extentalloc(void) +{ + Extent *e; + int i; + + lock(&ecache); + if(ecache.head == nil){ + e = malloc(NEXTENT*sizeof(Extent)); + if(e == nil){ + unlock(&ecache); + return nil; + } + for(i = 0; i < NEXTENT; i++){ + e->next = ecache.head; + ecache.head = e; + e++; + } + ecache.free += NEXTENT; + ecache.total += NEXTENT; + } + + e = ecache.head; + ecache.head = e->next; + memset(e, 0, sizeof(Extent)); + ecache.free--; + unlock(&ecache); + + return e; +} + +void +cinit(void) +{ + int i; + Mntcache *mc; + + if((cache.head = malloc(sizeof(Mntcache)*NFILE)) == nil) + panic("cinit: no memory"); + mc = cache.head; + + /* a better algorithm would be nice */ +// if(conf.npage*PGSZ > 200*MB) +// maxcache = 10*MAXCACHE; +// if(conf.npage*PGSZ > 400*MB) +// maxcache = 50*MAXCACHE; + + for(i = 0; i < NFILE-1; i++) { + mc->next = mc+1; + mc->prev = mc-1; + mc++; + } + + cache.tail = mc; + cache.tail->next = 0; + cache.head->prev = 0; + + fscache.notext = 1; +} + +Page* +cpage(Extent *e) +{ + /* Easy consistency check */ + if(e->cache->daddr != e->bid) + return 0; + + return lookpage(&fscache, e->bid); +} + +void +cnodata(Mntcache *mc) +{ + Extent *e, *n; + + /* + * Invalidate all extent data + * Image lru will waste the pages + */ + for(e = mc->list; e; e = n) { + n = e->next; + extentfree(e); + } + mc->list = 0; +} + +void +ctail(Mntcache *mc) +{ + /* Unlink and send to the tail */ + if(mc->prev) + mc->prev->next = mc->next; + else + cache.head = mc->next; + if(mc->next) + mc->next->prev = mc->prev; + else + cache.tail = mc->prev; + + if(cache.tail) { + mc->prev = cache.tail; + cache.tail->next = mc; + mc->next = 0; + cache.tail = mc; + } + else { + cache.head = mc; + cache.tail = mc; + mc->prev = 0; + mc->next = 0; + } +} + +void +copen(Chan *c) +{ + int h; + Extent *e, *next; + Mntcache *mc, *f, **l; + + /* directories aren't cacheable and append-only files confuse us */ + if(c->qid.type&(QTDIR|QTAPPEND)) + return; + + h = c->qid.path%NHASH; + lock(&cache); + for(mc = cache.hash[h]; mc != nil; mc = mc->hash) { + if(mc->qid.path == c->qid.path) + if(mc->qid.type == c->qid.type) + if(mc->devno == c->devno && mc->dev == c->dev) { + c->mc = mc; + ctail(mc); + unlock(&cache); + + /* File was updated, invalidate cache */ + if(mc->qid.vers != c->qid.vers) { + mc->qid.vers = c->qid.vers; + qlock(mc); + cnodata(mc); + qunlock(mc); + } + return; + } + } + + /* LRU the cache headers */ + mc = cache.head; + l = &cache.hash[mc->qid.path%NHASH]; + for(f = *l; f; f = f->hash) { + if(f == mc) { + *l = mc->hash; + break; + } + l = &f->hash; + } + + mc->qid = c->qid; + mc->devno = c->devno; + mc->dev = c->dev; + + l = &cache.hash[h]; + mc->hash = *l; + *l = mc; + ctail(mc); + + qlock(mc); + c->mc = mc; + e = mc->list; + mc->list = 0; + unlock(&cache); + + while(e) { + next = e->next; + extentfree(e); + e = next; + } + qunlock(mc); +} + +static int +cdev(Mntcache *mc, Chan *c) +{ + if(mc->qid.path != c->qid.path) + return 0; + if(mc->qid.type != c->qid.type) + return 0; + if(mc->devno != c->devno) + return 0; + if(mc->dev != c->dev) + return 0; + if(mc->qid.vers != c->qid.vers) + return 0; + return 1; +} + +int +cread(Chan *c, uchar *buf, int len, vlong off) +{ + KMap *k; + Page *p; + Mntcache *mc; + Extent *e, **t; + int o, l, total; + ulong offset; + + if(off+len > maxcache) + return 0; + + mc = c->mc; + if(mc == nil) + return 0; + + qlock(mc); + if(cdev(mc, c) == 0) { + qunlock(mc); + return 0; + } + + offset = off; + t = &mc->list; + for(e = *t; e; e = e->next) { + if(offset >= e->start && offset < e->start+e->len) + break; + t = &e->next; + } + + if(e == 0) { + qunlock(mc); + return 0; + } + + total = 0; + while(len) { + p = cpage(e); + if(p == 0) { + *t = e->next; + extentfree(e); + qunlock(mc); + return total; + } + + o = offset - e->start; + l = len; + if(l > e->len-o) + l = e->len-o; + + k = kmap(p); + if(waserror()) { + kunmap(k); + putpage(p); + qunlock(mc); + nexterror(); + } + + memmove(buf, (uchar*)VA(k) + o, l); + + poperror(); + kunmap(k); + + putpage(p); + + buf += l; + len -= l; + offset += l; + total += l; + t = &e->next; + e = e->next; + if(e == 0 || e->start != offset) + break; + } + + qunlock(mc); + return total; +} + +Extent* +cchain(uchar *buf, ulong offset, int len, Extent **tail) +{ + int l; + Page *p; + KMap *k; + Extent *e, *start, **t; + + start = 0; + *tail = 0; + t = &start; + while(len) { + e = extentalloc(); + if(e == 0) + break; + + p = auxpage(BIGPGSZ); + if(p == 0) { + extentfree(e); + break; + } + l = len; + if(l > BIGPGSZ) + l = BIGPGSZ; + + e->cache = p; + e->start = offset; + e->len = l; + + lock(&cache); + e->bid = cache.pgno; + cache.pgno += BIGPGSZ; + /* wrap the counter; low bits are unused by pghash but checked by lookpage */ + if((cache.pgno & ~(BIGPGSZ-1)) == 0){ + if(cache.pgno == BIGPGSZ-1){ + print("cache wrapped\n"); + cache.pgno = 0; + }else + cache.pgno++; + } + unlock(&cache); + + p->daddr = e->bid; + k = kmap(p); + if(waserror()) { /* buf may be virtual */ + kunmap(k); + nexterror(); + } + memmove((void*)VA(k), buf, l); + poperror(); + kunmap(k); + + cachepage(p, &fscache); + putpage(p); + + buf += l; + offset += l; + len -= l; + + *t = e; + *tail = e; + t = &e->next; + } + + return start; +} + +int +cpgmove(Extent *e, uchar *buf, int boff, int len) +{ + Page *p; + KMap *k; + + p = cpage(e); + if(p == 0) + return 0; + + k = kmap(p); + if(waserror()) { /* Since buf may be virtual */ + kunmap(k); + nexterror(); + } + + memmove((uchar*)VA(k)+boff, buf, len); + + poperror(); + kunmap(k); + putpage(p); + + return 1; +} + +void +cupdate(Chan *c, uchar *buf, int len, vlong off) +{ + Mntcache *mc; + Extent *tail; + Extent *e, *f, *p; + int o, ee, eblock; + ulong offset; + + if(off > maxcache || len == 0) + return; + + mc = c->mc; + if(mc == nil) + return; + qlock(mc); + if(cdev(mc, c) == 0) { + qunlock(mc); + return; + } + + /* + * Find the insertion point + */ + offset = off; + p = 0; + for(f = mc->list; f; f = f->next) { + if(f->start > offset) + break; + p = f; + } + + /* trim if there is a successor */ + eblock = offset+len; + if(f != 0 && eblock > f->start) { + len -= (eblock - f->start); + if(len <= 0) { + qunlock(mc); + return; + } + } + + if(p == 0) { /* at the head */ + e = cchain(buf, offset, len, &tail); + if(e != 0) { + mc->list = e; + tail->next = f; + } + qunlock(mc); + return; + } + + /* trim to the predecessor */ + ee = p->start+p->len; + if(offset < ee) { + o = ee - offset; + len -= o; + if(len <= 0) { + qunlock(mc); + return; + } + buf += o; + offset += o; + } + + /* try and pack data into the predecessor */ + if(offset == ee && p->len < BIGPGSZ) { + o = len; + if(o > BIGPGSZ - p->len) + o = BIGPGSZ - p->len; + if(cpgmove(p, buf, p->len, o)) { + p->len += o; + buf += o; + len -= o; + offset += o; + if(len <= 0) { +if(f && p->start + p->len > f->start) print("CACHE: p->start=%uld p->len=%d f->start=%uld\n", p->start, p->len, f->start); + qunlock(mc); + return; + } + } + } + + e = cchain(buf, offset, len, &tail); + if(e != 0) { + p->next = e; + tail->next = f; + } + qunlock(mc); +} + +void +cwrite(Chan* c, uchar *buf, int len, vlong off) +{ + int o, eo; + Mntcache *mc; + ulong eblock, ee; + Extent *p, *f, *e, *tail; + ulong offset; + + if(off > maxcache || len == 0) + return; + + mc = c->mc; + if(mc == nil) + return; + + qlock(mc); + if(cdev(mc, c) == 0) { + qunlock(mc); + return; + } + + offset = off; + mc->qid.vers++; + c->qid.vers++; + + p = 0; + for(f = mc->list; f; f = f->next) { + if(f->start >= offset) + break; + p = f; + } + + if(p != 0) { + ee = p->start+p->len; + eo = offset - p->start; + /* pack in predecessor if there is space */ + if(offset <= ee && eo < BIGPGSZ) { + o = len; + if(o > BIGPGSZ - eo) + o = BIGPGSZ - eo; + if(cpgmove(p, buf, eo, o)) { + if(eo+o > p->len) + p->len = eo+o; + buf += o; + len -= o; + offset += o; + } + } + } + + /* free the overlap -- it's a rare case */ + eblock = offset+len; + while(f && f->start < eblock) { + e = f->next; + extentfree(f); + f = e; + } + + /* link the block (if any) into the middle */ + e = cchain(buf, offset, len, &tail); + if(e != 0) { + tail->next = f; + f = e; + } + + if(p == 0) + mc->list = f; + else + p->next = f; + qunlock(mc); +} diff -Nru 0/sys/src/nix/port/chan.c 4/sys/src/nix/port/chan.c --- 0/sys/src/nix/port/chan.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/chan.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1703 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +enum +{ + PATHSLOP = 20, + PATHMSLOP = 20, +}; + +struct +{ + Lock; + int fid; + Chan *free; + Chan *list; +}chanalloc; + +typedef struct Elemlist Elemlist; + +struct Elemlist +{ + char *aname; /* original name */ + char *name; /* copy of name, so '/' can be overwritten */ + int nelems; + char **elems; + int *off; + int mustbedir; + int nerror; + int prefix; +}; + +char* +chanpath(Chan *c) +{ + if(c == nil) + return ""; + if(c->path == nil) + return ""; + if(c->path->s == nil) + return ""; + return c->path->s; +} + +int +isdotdot(char *p) +{ + return p[0]=='.' && p[1]=='.' && p[2]=='\0'; +} + +/* + * Rather than strncpy, which zeros the rest of the buffer, kstrcpy + * truncates if necessary, always zero terminates, does not zero fill, + * and puts ... at the end of the string if it's too long. Usually used to + * save a string in up->genbuf; + */ +void +kstrcpy(char *s, char *t, int ns) +{ + int nt; + + nt = strlen(t); + if(nt+1 <= ns){ + memmove(s, t, nt+1); + return; + } + /* too long */ + if(ns < 4){ + /* but very short! */ + strncpy(s, t, ns); + return; + } + /* truncate with ... at character boundary (very rare case) */ + memmove(s, t, ns-4); + ns -= 4; + s[ns] = '\0'; + /* look for first byte of UTF-8 sequence by skipping continuation bytes */ + while(ns>0 && (s[--ns]&0xC0)==0x80) + ; + strcpy(s+ns, "..."); +} + +int +emptystr(char *s) +{ + if(s == nil) + return 1; + if(s[0] == '\0') + return 1; + return 0; +} + +/* + * Atomically replace *p with copy of s + */ +void +kstrdup(char **p, char *s) +{ + int n; + char *t, *prev; + + n = strlen(s)+1; + /* if it's a user, we can wait for memory; if not, something's very wrong */ + if(up){ + t = smalloc(n); + setmalloctag(t, getcallerpc(&p)); + }else{ + t = malloc(n); + if(t == nil) + panic("kstrdup: no memory"); + } + memmove(t, s, n); + prev = *p; + *p = t; + free(prev); +} + +Chan* +newchan(void) +{ + Chan *c; + + lock(&chanalloc); + c = chanalloc.free; + if(c != 0) + chanalloc.free = c->next; + unlock(&chanalloc); + + if(c == nil){ + c = smalloc(sizeof(Chan)); + lock(&chanalloc); + c->fid = ++chanalloc.fid; + c->link = chanalloc.list; + chanalloc.list = c; + unlock(&chanalloc); + } + + c->dev = nil; + c->flag = 0; + c->ref = 1; + c->devno = 0; + c->offset = 0; + c->devoffset = 0; + c->iounit = 0; + c->umh = 0; + c->uri = 0; + c->dri = 0; + c->aux = 0; + c->mchan = 0; + c->mc = 0; + c->mux = 0; + memset(&c->mqid, 0, sizeof(c->mqid)); + c->path = 0; + c->ismtpt = 0; + + return c; +} + +Ref npath; + +Path* +newpath(char *s) +{ + int i; + Path *p; + + p = smalloc(sizeof(Path)); + i = strlen(s); + p->len = i; + p->alen = i+PATHSLOP; + p->s = smalloc(p->alen); + memmove(p->s, s, i+1); + p->ref = 1; + incref(&npath); + + /* + * Cannot use newpath for arbitrary names because the mtpt + * array will not be populated correctly. The names #/ and / are + * allowed, but other names with / in them draw warnings. + */ + if(strchr(s, '/') && strcmp(s, "#/") != 0 && strcmp(s, "/") != 0) + print("newpath: %s from %#p\n", s, getcallerpc(&s)); + + p->mlen = 1; + p->malen = PATHMSLOP; + p->mtpt = smalloc(p->malen*sizeof p->mtpt[0]); + return p; +} + +static Path* +copypath(Path *p) +{ + int i; + Path *pp; + + pp = smalloc(sizeof(Path)); + pp->ref = 1; + incref(&npath); + DBG("copypath %s %#p => %#p\n", p->s, p, pp); + + pp->len = p->len; + pp->alen = p->alen; + pp->s = smalloc(p->alen); + memmove(pp->s, p->s, p->len+1); + + pp->mlen = p->mlen; + pp->malen = p->malen; + pp->mtpt = smalloc(p->malen*sizeof pp->mtpt[0]); + for(i=0; imlen; i++){ + pp->mtpt[i] = p->mtpt[i]; + if(pp->mtpt[i]) + incref(pp->mtpt[i]); + } + + return pp; +} + +void +pathclose(Path *p) +{ + int i; + + if(p == nil) + return; +//XXX + DBG("pathclose %#p %s ref=%d =>", p, p->s, p->ref); + for(i=0; imlen; i++) + DBG(" %#p", p->mtpt[i]); + DBG("\n"); + + if(decref(p)) + return; + decref(&npath); + free(p->s); + for(i=0; imlen; i++) + if(p->mtpt[i]) + cclose(p->mtpt[i]); + free(p->mtpt); + free(p); +} + +/* + * In place, rewrite name to compress multiple /, eliminate ., and process .. + * (Really only called to remove a trailing .. that has been added. + * Otherwise would need to update n->mtpt as well.) + */ +static void +fixdotdotname(Path *p) +{ + char *r; + + if(p->s[0] == '#'){ + r = strchr(p->s, '/'); + if(r == nil) + return; + cleanname(r); + + /* + * The correct name is #i rather than #i/, + * but the correct name of #/ is #/. + */ + if(strcmp(r, "/")==0 && p->s[1] != '/') + *r = '\0'; + }else + cleanname(p->s); + p->len = strlen(p->s); +} + +static Path* +uniquepath(Path *p) +{ + Path *new; + + if(p->ref > 1){ + /* copy on write */ + new = copypath(p); + pathclose(p); + p = new; + } + return p; +} + +static Path* +addelem(Path *p, char *s, Chan *from) +{ + char *t; + int a, i; + Chan *c, **tt; + + if(s[0]=='.' && s[1]=='\0') + return p; + + p = uniquepath(p); + + i = strlen(s); + if(p->len+1+i+1 > p->alen){ + a = p->len+1+i+1 + PATHSLOP; + t = smalloc(a); + memmove(t, p->s, p->len+1); + free(p->s); + p->s = t; + p->alen = a; + } + /* don't insert extra slash if one is present */ + if(p->len>0 && p->s[p->len-1]!='/' && s[0]!='/') + p->s[p->len++] = '/'; + memmove(p->s+p->len, s, i+1); + p->len += i; + if(isdotdot(s)){ + fixdotdotname(p); + DBG("addelem %s .. => rm %#p\n", p->s, p->mtpt[p->mlen-1]); + if(p->mlen>1 && (c = p->mtpt[--p->mlen])){ + p->mtpt[p->mlen] = nil; + cclose(c); + } + }else{ + if(p->mlen >= p->malen){ + p->malen = p->mlen+1+PATHMSLOP; + tt = smalloc(p->malen*sizeof tt[0]); + memmove(tt, p->mtpt, p->mlen*sizeof tt[0]); + free(p->mtpt); + p->mtpt = tt; + } + DBG("addelem %s %s => add %#p\n", p->s, s, from); + p->mtpt[p->mlen++] = from; + if(from) + incref(from); + } + return p; +} + +void +chanfree(Chan *c) +{ + c->flag = CFREE; + + if(c->dirrock != nil){ + free(c->dirrock); + c->dirrock = 0; + c->nrock = 0; + c->mrock = 0; + } + if(c->umh != nil){ + putmhead(c->umh); + c->umh = nil; + } + if(c->umc != nil){ + cclose(c->umc); + c->umc = nil; + } + if(c->mux != nil){ + muxclose(c->mux); + c->mux = nil; + } + if(c->mchan != nil){ + cclose(c->mchan); + c->mchan = nil; + } + + if(c->dev != nil){ //XDYNX + //devtabdecr(c->dev); + c->dev = nil; + } + + pathclose(c->path); + c->path = nil; + + lock(&chanalloc); + c->next = chanalloc.free; + chanalloc.free = c; + unlock(&chanalloc); +} + +void +cclose(Chan *c) +{ + if(c->flag&CFREE) + panic("cclose %#p", getcallerpc(&c)); + + DBG("cclose %#p name=%s ref=%d\n", c, c->path->s, c->ref); + if(decref(c)) + return; + + if(!waserror()){ + if(c->dev != nil) //XDYNX + c->dev->close(c); + poperror(); + } + chanfree(c); +} + +/* + * Queue a chan to be closed by one of the clunk procs. + */ +struct { + Chan *head; + Chan *tail; + int nqueued; + int nclosed; + Lock l; + QLock q; + Rendez r; +} clunkq; + +static void closeproc(void*); + +void +ccloseq(Chan *c) +{ + if(c->flag&CFREE) + panic("ccloseq %#p", getcallerpc(&c)); + + DBG("ccloseq %#p name=%s ref=%d\n", c, c->path->s, c->ref); + + if(decref(c)) + return; + + lock(&clunkq.l); + clunkq.nqueued++; + c->next = nil; + if(clunkq.head) + clunkq.tail->next = c; + else + clunkq.head = c; + clunkq.tail = c; + unlock(&clunkq.l); + + if(!wakeup(&clunkq.r)) + kproc("closeproc", closeproc, nil); +} + +static int +clunkwork(void*) +{ + return clunkq.head != nil; +} + +static void +closeproc(void*) +{ + Chan *c; + + for(;;){ + qlock(&clunkq.q); + if(clunkq.head == nil){ + if(!waserror()){ + tsleep(&clunkq.r, clunkwork, nil, 5000); + poperror(); + } + if(clunkq.head == nil){ + qunlock(&clunkq.q); + pexit("no work", 1); + } + } + lock(&clunkq.l); + c = clunkq.head; + clunkq.head = c->next; + clunkq.nclosed++; + unlock(&clunkq.l); + qunlock(&clunkq.q); + if(!waserror()){ + if(c->dev != nil) //XDYNX + c->dev->close(c); + poperror(); + } + chanfree(c); + } +} + +/* + * Make sure we have the only copy of c. (Copy on write.) + */ +Chan* +cunique(Chan *c) +{ + Chan *nc; + + if(c->ref != 1){ + nc = cclone(c); + cclose(c); + c = nc; + } + + return c; +} + +int +eqqid(Qid a, Qid b) +{ + return a.path == b.path && a.vers == b.vers; +} + +static int +eqchan(Chan *a, Chan *b, int skipvers) +{ + if(a->qid.path != b->qid.path) + return 0; + if(!skipvers && a->qid.vers != b->qid.vers) + return 0; + if(a->dev->dc != b->dev->dc) + return 0; + if(a->devno != b->devno) + return 0; + return 1; +} + +int +eqchanddq(Chan *c, int dc, uint devno, Qid qid, int skipvers) +{ + if(c->qid.path != qid.path) + return 0; + if(!skipvers && c->qid.vers != qid.vers) + return 0; + if(c->dev->dc != dc) + return 0; + if(c->devno != devno) + return 0; + return 1; +} + +Mhead* +newmhead(Chan *from) +{ + Mhead *mh; + + mh = smalloc(sizeof(Mhead)); + mh->ref = 1; + mh->from = from; + incref(from); + return mh; +} + +int +cmount(Chan **newp, Chan *old, int flag, char *spec) +{ + int order, flg; + Chan *new; + Mhead *mhead, **l, *mh; + Mount *nm, *f, *um, **h; + Pgrp *pg; + + if(QTDIR & (old->qid.type^(*newp)->qid.type)) + error(Emount); + + if(old->umh) + print("cmount: unexpected umh, caller %#p\n", getcallerpc(&newp)); + + order = flag&MORDER; + + if(!(old->qid.type & QTDIR) && order != MREPL) + error(Emount); + + new = *newp; + mh = new->umh; + + /* + * Not allowed to bind when the old directory is itself a union. + * (Maybe it should be allowed, but I don't see what the semantics + * would be.) + * + * We need to check mh->mount->next to tell unions apart from + * simple mount points, so that things like + * mount -c fd /root + * bind -c /root / + * work. + * + * The check of mount->mflag allows things like + * mount fd /root + * bind -c /root / + * + * This is far more complicated than it should be, but I don't + * see an easier way at the moment. + */ + if((flag&MCREATE) && mh && mh->mount + && (mh->mount->next || !(mh->mount->mflag&MCREATE))) + error(Emount); + + pg = up->pgrp; + wlock(&pg->ns); + + l = &MOUNTH(pg, old->qid); + for(mhead = *l; mhead; mhead = mhead->hash){ + if(eqchan(mhead->from, old, 1)) + break; + l = &mhead->hash; + } + + if(mhead == nil){ + /* + * nothing mounted here yet. create a mount + * head and add to the hash table. + */ + mhead = newmhead(old); + *l = mhead; + + /* + * if this is a union mount, add the old + * node to the mount chain. + */ + if(order != MREPL) + mhead->mount = newmount(mhead, old, 0, 0); + } + wlock(&mhead->lock); + if(waserror()){ + wunlock(&mhead->lock); + nexterror(); + } + wunlock(&pg->ns); + + nm = newmount(mhead, new, flag, spec); + if(mh != nil && mh->mount != nil){ + /* + * copy a union when binding it onto a directory + */ + flg = order; + if(order == MREPL) + flg = MAFTER; + h = &nm->next; + um = mh->mount; + for(um = um->next; um; um = um->next){ + f = newmount(mhead, um->to, flg, um->spec); + *h = f; + h = &f->next; + } + } + + if(mhead->mount && order == MREPL){ + mountfree(mhead->mount); + mhead->mount = 0; + } + + if(flag & MCREATE) + nm->mflag |= MCREATE; + + if(mhead->mount && order == MAFTER){ + for(f = mhead->mount; f->next; f = f->next) + ; + f->next = nm; + }else{ + for(f = nm; f->next; f = f->next) + ; + f->next = mhead->mount; + mhead->mount = nm; + } + + wunlock(&mhead->lock); + poperror(); + return nm->mountid; +} + +void +cunmount(Chan *mnt, Chan *mounted) +{ + Pgrp *pg; + Mhead *mh, **l; + Mount *f, **p; + + if(mnt->umh) /* should not happen */ + print("cunmount newp extra umh %#p has %#p\n", mnt, mnt->umh); + + /* + * It _can_ happen that mounted->umh is non-nil, + * because mounted is the result of namec(Aopen) + * (see sysfile.c:/^sysunmount). + * If we open a union directory, it will have a umh. + * Although surprising, this is okay, since the + * cclose will take care of freeing the umh. + */ + + pg = up->pgrp; + wlock(&pg->ns); + + l = &MOUNTH(pg, mnt->qid); + for(mh = *l; mh; mh = mh->hash){ + if(eqchan(mh->from, mnt, 1)) + break; + l = &mh->hash; + } + + if(mh == 0){ + wunlock(&pg->ns); + error(Eunmount); + } + + wlock(&mh->lock); + if(mounted == 0){ + *l = mh->hash; + wunlock(&pg->ns); + mountfree(mh->mount); + mh->mount = nil; + cclose(mh->from); + wunlock(&mh->lock); + putmhead(mh); + return; + } + + p = &mh->mount; + for(f = *p; f; f = f->next){ + /* BUG: Needs to be 2 pass */ + if(eqchan(f->to, mounted, 1) || + (f->to->mchan && eqchan(f->to->mchan, mounted, 1))){ + *p = f->next; + f->next = 0; + mountfree(f); + if(mh->mount == nil){ + *l = mh->hash; + cclose(mh->from); + wunlock(&mh->lock); + wunlock(&pg->ns); + putmhead(mh); + return; + } + wunlock(&mh->lock); + wunlock(&pg->ns); + return; + } + p = &f->next; + } + wunlock(&mh->lock); + wunlock(&pg->ns); + error(Eunion); +} + +Chan* +cclone(Chan *c) +{ + Chan *nc; + Walkqid *wq; + + wq = c->dev->walk(c, nil, nil, 0); //XDYNX? + if(wq == nil) + error("clone failed"); + nc = wq->clone; + free(wq); + nc->path = c->path; + if(c->path) + incref(c->path); + return nc; +} + +/* also used by sysfile.c:/^mountfix */ +int +findmount(Chan **cp, Mhead **mp, int dc, uint devno, Qid qid) +{ + Pgrp *pg; + Mhead *mh; + + pg = up->pgrp; + rlock(&pg->ns); + for(mh = MOUNTH(pg, qid); mh; mh = mh->hash){ + rlock(&mh->lock); + if(mh->from == nil){ + print("mh %#p: mh->from nil\n", mh); + runlock(&mh->lock); + continue; + } + if(eqchanddq(mh->from, dc, devno, qid, 1)){ + runlock(&pg->ns); + if(mp != nil){ + incref(mh); + if(*mp != nil) + putmhead(*mp); + *mp = mh; + } + if(*cp != nil) + cclose(*cp); + incref(mh->mount->to); + *cp = mh->mount->to; + runlock(&mh->lock); + return 1; + } + runlock(&mh->lock); + } + + runlock(&pg->ns); + return 0; +} + +/* + * Calls findmount but also updates path. + */ +static int +domount(Chan **cp, Mhead **mp, Path **path) +{ + Chan **lc; + Path *p; + + if(findmount(cp, mp, (*cp)->dev->dc, (*cp)->devno, (*cp)->qid) == 0) + return 0; + + if(path){ + p = *path; + p = uniquepath(p); + if(p->mlen <= 0) + print("domount: path %s has mlen==%d\n", p->s, p->mlen); + else{ + lc = &p->mtpt[p->mlen-1]; + DBG("domount %#p %s => add %#p (was %#p)\n", + p, p->s, (*mp)->from, p->mtpt[p->mlen-1]); + incref((*mp)->from); + if(*lc) + cclose(*lc); + *lc = (*mp)->from; + } + *path = p; + } + return 1; +} + +/* + * If c is the right-hand-side of a mount point, returns the left hand side. + * Changes name to reflect the fact that we've uncrossed the mountpoint, + * so name had better be ours to change! + */ +static Chan* +undomount(Chan *c, Path *path) +{ + Chan *nc; + + if(path->ref != 1 || path->mlen == 0) + print("undomount: path %s ref %d mlen %d caller %#p\n", + path->s, path->ref, path->mlen, getcallerpc(&c)); + + if(path->mlen>0 && (nc=path->mtpt[path->mlen-1]) != nil){ + DBG("undomount %#p %s => remove %p\n", path, path->s, nc); + cclose(c); + path->mtpt[path->mlen-1] = nil; + c = nc; + } + return c; +} + +/* + * Call dev walk but catch errors. + */ +static Walkqid* +ewalk(Chan *c, Chan *nc, char **name, int nname) +{ + Walkqid *wq; + + if(waserror()) + return nil; + wq = c->dev->walk(c, nc, name, nname); + poperror(); + return wq; +} + +/* + * Either walks all the way or not at all. No partial results in *cp. + * *nerror is the number of names to display in an error message. + */ +static char Edoesnotexist[] = "does not exist"; +int +walk(Chan **cp, char **names, int nnames, int nomount, int *nerror) +{ + int dc, devno, didmount, dotdot, i, n, nhave, ntry; + Chan *c, *nc, *mtpt; + Path *path; + Mhead *mh, *nmh; + Mount *f; + Walkqid *wq; + + c = *cp; + incref(c); + path = c->path; + incref(path); + mh = nil; + + /* + * While we haven't gotten all the way down the path: + * 1. step through a mount point, if any + * 2. send a walk request for initial dotdot or initial prefix without dotdot + * 3. move to the first mountpoint along the way. + * 4. repeat. + * + * Each time through the loop: + * + * If didmount==0, c is on the undomount side of the mount point. + * If didmount==1, c is on the domount side of the mount point. + * Either way, c's full path is path. + */ + didmount = 0; + for(nhave=0; nhaveqid.type & QTDIR)){ + if(nerror) + *nerror = nhave; + pathclose(path); + cclose(c); + strcpy(up->errstr, Enotdir); + if(mh != nil) + putmhead(mh); + return -1; + } + ntry = nnames - nhave; + if(ntry > MAXWELEM) + ntry = MAXWELEM; + dotdot = 0; + for(i=0; idev->dc; + devno = c->devno; + + if((wq = ewalk(c, nil, names+nhave, ntry)) == nil){ + /* try a union mount, if any */ + if(mh && !nomount){ + /* + * mh->mount->to == c, so start at mh->mount->next + */ + rlock(&mh->lock); + if(mh->mount){ + for(f = mh->mount->next; f != nil; f = f->next){ + if((wq = ewalk(f->to, nil, names+nhave, ntry)) != nil){ + dc = f->to->dev->dc; + devno = f->to->devno; + break; + } + } + } + runlock(&mh->lock); + } + if(wq == nil){ + cclose(c); + pathclose(path); + if(nerror) + *nerror = nhave+1; + if(mh != nil) + putmhead(mh); + return -1; + } + } + + nmh = nil; + didmount = 0; + if(dotdot){ + assert(wq->nqid == 1); + assert(wq->clone != nil); + + path = addelem(path, "..", nil); + nc = undomount(wq->clone, path); + n = 1; + }else{ + nc = nil; + if(!nomount){ + for(i=0; inqid && iqid[i])){ + didmount = 1; + break; + } + } + } + if(nc == nil){ /* no mount points along path */ + if(wq->clone == nil){ + cclose(c); + pathclose(path); + if(wq->nqid==0 || (wq->qid[wq->nqid-1].type & QTDIR)){ + if(nerror) + *nerror = nhave+wq->nqid+1; + strcpy(up->errstr, Edoesnotexist); + }else{ + if(nerror) + *nerror = nhave+wq->nqid; + strcpy(up->errstr, Enotdir); + } + free(wq); + if(mh != nil) + putmhead(mh); + return -1; + } + n = wq->nqid; + nc = wq->clone; + }else{ /* stopped early, at a mount point */ + didmount = 1; + if(wq->clone != nil){ + cclose(wq->clone); + wq->clone = nil; + } + n = i+1; + } + for(i=0; ifrom; + path = addelem(path, names[nhave+i], mtpt); + } + } + cclose(c); + c = nc; + putmhead(mh); + mh = nmh; + free(wq); + } + + putmhead(mh); + + c = cunique(c); + + if(c->umh != nil){ //BUG + print("walk umh\n"); + putmhead(c->umh); + c->umh = nil; + } + + pathclose(c->path); + c->path = path; + + cclose(*cp); + *cp = c; + if(nerror) + *nerror = nhave; + return 0; +} + +/* + * c is a mounted non-creatable directory. find a creatable one. + */ +Chan* +createdir(Chan *c, Mhead *mh) +{ + Chan *nc; + Mount *f; + + rlock(&mh->lock); + if(waserror()){ + runlock(&mh->lock); + nexterror(); + } + for(f = mh->mount; f; f = f->next){ + if(f->mflag&MCREATE){ + nc = cclone(f->to); + runlock(&mh->lock); + poperror(); + cclose(c); + return nc; + } + } + error(Enocreate); + return 0; +} + +static void +saveregisters(void) +{ +} + +static void +growparse(Elemlist *e) +{ + char **new; + int *inew; + enum { Delta = 8 }; + + if(e->nelems % Delta == 0){ + new = smalloc((e->nelems+Delta) * sizeof(char*)); + memmove(new, e->elems, e->nelems*sizeof(char*)); + free(e->elems); + e->elems = new; + inew = smalloc((e->nelems+Delta+1) * sizeof(int)); + memmove(inew, e->off, (e->nelems+1)*sizeof(int)); + free(e->off); + e->off = inew; + } +} + +/* + * The name is known to be valid. + * Copy the name so slashes can be overwritten. + * An empty string will set nelem=0. + * A path ending in / or /. or /.//./ etc. will have + * e.mustbedir = 1, so that we correctly + * reject, e.g., "/adm/users/." when /adm/users is a file + * rather than a directory. + */ +static void +parsename(char *aname, Elemlist *e) +{ + char *name, *slash; + + kstrdup(&e->name, aname); + name = e->name; + e->nelems = 0; + e->elems = nil; + e->off = smalloc(sizeof(int)); + e->off[0] = skipslash(name) - name; + for(;;){ + name = skipslash(name); + if(*name == '\0'){ + e->off[e->nelems] = name+strlen(name) - e->name; + e->mustbedir = 1; + break; + } + growparse(e); + e->elems[e->nelems++] = name; + slash = utfrune(name, '/'); + if(slash == nil){ + e->off[e->nelems] = name+strlen(name) - e->name; + e->mustbedir = 0; + break; + } + e->off[e->nelems] = slash - e->name; + *slash++ = '\0'; + name = slash; + } + + if(DBGFLG > 1){ + int i; + + DBG("parsename %s:", e->name); + for(i=0; i<=e->nelems; i++) + DBG(" %d", e->off[i]); + DBG("\n"); + } +} + +static void* +memrchr(void *va, int c, long n) +{ + uchar *a, *e; + + a = va; + for(e=a+n-1; e>a; e--) + if(*e == c) + return e; + return nil; +} + +static void +namelenerror(char *aname, int len, char *err) +{ + char *ename, *name, *next; + int i, errlen; + + /* + * If the name is short enough, just use the whole thing. + */ + errlen = strlen(err); + if(len < ERRMAX/3 || len+errlen < 2*ERRMAX/3) + snprint(up->genbuf, sizeof up->genbuf, "%.*s", + utfnlen(aname, len), aname); + else{ + /* + * Print a suffix of the name, but try to get a little info. + */ + ename = aname+len; + next = ename; + do{ + name = next; + next = memrchr(aname, '/', name-aname); + if(next == nil) + next = aname; + len = ename-next; + }while(len < ERRMAX/3 || len + errlen < 2*ERRMAX/3); + + /* + * If the name is ridiculously long, chop it. + */ + if(name == ename){ + name = ename-ERRMAX/4; + if(name <= aname) + panic("bad math in namelenerror"); + /* walk out of current UTF sequence */ + for(i=0; (*name&0xC0)==0x80 && igenbuf, sizeof up->genbuf, "...%.*s", + utfnlen(name, ename-name), name); + } + snprint(up->errstr, ERRMAX, "%#q %s", up->genbuf, err); + nexterror(); +} + +void +nameerror(char *name, char *err) +{ + namelenerror(name, strlen(name), err); +} + +/* + * Turn a name into a channel. + * &name[0] is known to be a valid address. It may be a kernel address. + * + * Opening with amode Aopen, Acreate, Aremove, or Aaccess guarantees + * that the result will be the only reference to that particular fid. + * This is necessary since we might pass the result to + * devtab[]->remove(). + * + * Opening Atodir or Amount does not guarantee this. + * + * Under certain circumstances, opening Aaccess will cause + * an unnecessary clone in order to get a cunique Chan so it + * can attach the correct name. Sysstat and sys_stat need the + * correct name so they can rewrite the stat info. + */ +Chan* +namec(char *aname, int amode, int omode, int perm) +{ + int len, n, nomount; + Chan *c, *cnew; + Path *path; + Elemlist e; + Rune r; + Mhead *mh; + char *createerr, tmperrbuf[ERRMAX]; + char *name; + Dev *dev; + + if(aname[0] == '\0') + error("empty file name"); + aname = validnamedup(aname, 1); + if(waserror()){ + free(aname); + nexterror(); + } + DBG("namec %s %d %d\n", aname, amode, omode); + name = aname; + + /* + * Find the starting off point (the current slash, the root of + * a device tree, or the current dot) as well as the name to + * evaluate starting there. + */ + nomount = 0; + switch(name[0]){ + case '/': + c = up->slash; + incref(c); + break; + + case '#': + nomount = 1; + up->genbuf[0] = '\0'; + n = 0; + while(*name != '\0' && (*name != '/' || n < 2)){ + if(n >= sizeof(up->genbuf)-1) + error(Efilename); + up->genbuf[n++] = *name++; + } + up->genbuf[n] = '\0'; + /* + * noattach is sandboxing. + * + * the OK exceptions are: + * | it only gives access to pipes you create + * d this process's file descriptors + * e this process's environment + * the iffy exceptions are: + * c time and pid, but also cons and consctl + * p control of your own processes (and unfortunately + * any others left unprotected) + */ + n = chartorune(&r, up->genbuf+1)+1; + /* actually / is caught by parsing earlier */ + if(utfrune("M", r)) + error(Enoattach); + if(up->pgrp->noattach && utfrune("|decp", r)==nil) + error(Enoattach); + dev = devtabget(r, 1); //XDYNX + if(dev == nil) + error(Ebadsharp); + //if(waserror()){ + // devtabdecr(dev); + // nexterror(); + //} + c = dev->attach(up->genbuf+n); + //poperror(); + //devtabdecr(dev); + break; + + default: + c = up->dot; + incref(c); + break; + } + + e.aname = aname; + e.prefix = name - aname; + e.name = nil; + e.elems = nil; + e.off = nil; + e.nelems = 0; + e.nerror = 0; + if(waserror()){ + cclose(c); + free(e.name); + free(e.elems); + /* + * Prepare nice error, showing first e.nerror elements of name. + */ + if(e.nerror == 0) + nexterror(); + strcpy(tmperrbuf, up->errstr); + if(e.off[e.nerror]==0) + print("nerror=%d but off=%d\n", + e.nerror, e.off[e.nerror]); + if(DBGFLG > 0){ + DBG("showing %d+%d/%d (of %d) of %s (%d %d)\n", + e.prefix, e.off[e.nerror], e.nerror, + e.nelems, aname, e.off[0], e.off[1]); + } + len = e.prefix+e.off[e.nerror]; + free(e.off); + namelenerror(aname, len, tmperrbuf); + } + + /* + * Build a list of elements in the name. + */ + parsename(name, &e); + + /* + * On create, .... + */ + if(amode == Acreate){ + /* perm must have DMDIR if last element is / or /. */ + if(e.mustbedir && !(perm&DMDIR)){ + e.nerror = e.nelems; + error("create without DMDIR"); + } + + /* don't try to walk the last path element just yet. */ + if(e.nelems == 0) + error(Eexist); + e.nelems--; + } + + if(walk(&c, e.elems, e.nelems, nomount, &e.nerror) < 0){ + if(e.nerror < 0 || e.nerror > e.nelems){ + print("namec %s walk error nerror=%d\n", aname, e.nerror); + e.nerror = 0; + } + nexterror(); + } + + if(e.mustbedir && !(c->qid.type & QTDIR)) + error("not a directory"); + + if(amode == Aopen && (omode&3) == OEXEC && (c->qid.type & QTDIR)) + error("cannot exec directory"); + + switch(amode){ + case Abind: + /* no need to maintain path - cannot dotdot an Abind */ + mh = nil; + if(!nomount) + domount(&c, &mh, nil); + if(c->umh != nil) + putmhead(c->umh); + c->umh = mh; + break; + + case Aaccess: + case Aremove: + case Aopen: + Open: + /* save&update the name; domount might change c */ + path = c->path; + incref(path); + mh = nil; + if(!nomount) + domount(&c, &mh, &path); + + /* our own copy to open or remove */ + c = cunique(c); + + /* now it's our copy anyway, we can put the name back */ + pathclose(c->path); + c->path = path; + + /* record whether c is on a mount point */ + c->ismtpt = mh!=nil; + + switch(amode){ + case Aaccess: + case Aremove: + putmhead(mh); + break; + + case Aopen: + case Acreate: +if(c->umh != nil){ + print("cunique umh Open\n"); + putmhead(c->umh); + c->umh = nil; +} + /* only save the mount head if it's a multiple element union */ + if(mh && mh->mount && mh->mount->next) + c->umh = mh; + else + putmhead(mh); + + /* save registers else error() in open has wrong value of c saved */ + saveregisters(); + + if(omode == OEXEC) + c->flag &= ~CCACHE; + + +//open: //XDYNX +// get dev +// open +// if no error and read/write +// then fill in c->dev and +// don't put + c = c->dev->open(c, omode&~OCEXEC); + + if(omode & OCEXEC) + c->flag |= CCEXEC; + if(omode & ORCLOSE) + c->flag |= CRCLOSE; + break; + } + break; + + case Atodir: + /* + * Directories (e.g. for cd) are left before the mount point, + * so one may mount on / or . and see the effect. + */ + if(!(c->qid.type & QTDIR)) + error(Enotdir); + break; + + case Amount: + /* + * When mounting on an already mounted upon directory, + * one wants subsequent mounts to be attached to the + * original directory, not the replacement. Don't domount. + */ + break; + + case Acreate: + /* + * We've already walked all but the last element. + * If the last exists, try to open it OTRUNC. + * If omode&OEXCL is set, just give up. + */ + e.nelems++; + e.nerror++; + if(walk(&c, e.elems+e.nelems-1, 1, nomount, nil) == 0){ + if(omode&OEXCL) + error(Eexist); + omode |= OTRUNC; + goto Open; + } + + /* + * The semantics of the create(2) system call are that if the + * file exists and can be written, it is to be opened with truncation. + * On the other hand, the create(5) message fails if the file exists. + * If we get two create(2) calls happening simultaneously, + * they might both get here and send create(5) messages, but only + * one of the messages will succeed. To provide the expected create(2) + * semantics, the call with the failed message needs to try the above + * walk again, opening for truncation. This correctly solves the + * create/create race, in the sense that any observable outcome can + * be explained as one happening before the other. + * The create/create race is quite common. For example, it happens + * when two rc subshells simultaneously update the same + * environment variable. + * + * The implementation still admits a create/create/remove race: + * (A) walk to file, fails + * (B) walk to file, fails + * (A) create file, succeeds, returns + * (B) create file, fails + * (A) remove file, succeeds, returns + * (B) walk to file, return failure. + * + * This is hardly as common as the create/create race, and is really + * not too much worse than what might happen if (B) got a hold of a + * file descriptor and then the file was removed -- either way (B) can't do + * anything with the result of the create call. So we don't care about this race. + * + * Applications that care about more fine-grained decision of the races + * can use the OEXCL flag to get at the underlying create(5) semantics; + * by default we provide the common case. + * + * We need to stay behind the mount point in case we + * need to do the first walk again (should the create fail). + * + * We also need to cross the mount point and find the directory + * in the union in which we should be creating. + * + * The channel staying behind is c, the one moving forward is cnew. + */ + mh = nil; + cnew = nil; /* is this assignment necessary? */ + if(!waserror()){ /* try create */ + if(!nomount && findmount(&cnew, &mh, c->dev->dc, c->devno, c->qid)) + cnew = createdir(cnew, mh); + else{ + cnew = c; + incref(cnew); + } + + /* + * We need our own copy of the Chan because we're + * about to send a create, which will move it. Once we have + * our own copy, we can fix the name, which might be wrong + * if findmount gave us a new Chan. + */ + cnew = cunique(cnew); + pathclose(cnew->path); + cnew->path = c->path; + incref(cnew->path); + +//create: //XDYNX +// like open regarding read/write? + + cnew->dev->create(cnew, e.elems[e.nelems-1], omode&~(OEXCL|OCEXEC), perm); + poperror(); + if(omode & OCEXEC) + cnew->flag |= CCEXEC; + if(omode & ORCLOSE) + cnew->flag |= CRCLOSE; + if(mh) + putmhead(mh); + cclose(c); + c = cnew; + c->path = addelem(c->path, e.elems[e.nelems-1], nil); + break; + } + /* create failed */ + cclose(cnew); + if(mh) + putmhead(mh); + if(omode & OEXCL) + nexterror(); + /* save error */ + createerr = up->errstr; + up->errstr = tmperrbuf; + /* note: we depend that walk does not error */ + if(walk(&c, e.elems+e.nelems-1, 1, nomount, nil) < 0){ + up->errstr = createerr; + error(createerr); /* report true error */ + } + up->errstr = createerr; + omode |= OTRUNC; + goto Open; + + default: + panic("unknown namec access %d", amode); + } + + /* place final element in genbuf for e.g. exec */ + if(e.nelems > 0) + kstrcpy(up->genbuf, e.elems[e.nelems-1], sizeof up->genbuf); + else + kstrcpy(up->genbuf, ".", sizeof up->genbuf); + free(e.name); + free(e.elems); + free(e.off); + poperror(); /* e c */ + free(aname); + poperror(); /* aname */ + + return c; +} + +/* + * name is valid. skip leading / and ./ as much as possible + */ +char* +skipslash(char *name) +{ + while(name[0]=='/' || (name[0]=='.' && (name[1]==0 || name[1]=='/'))) + name++; + return name; +} + +char isfrog[256]={ + /*NUL*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*BKS*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*DLE*/ 1, 1, 1, 1, 1, 1, 1, 1, + /*CAN*/ 1, 1, 1, 1, 1, 1, 1, 1, + ['/'] 1, + [0x7f] 1, +}; + +/* + * Check that the name + * a) is in valid memory. + * b) is shorter than 2^16 bytes, so it can fit in a 9P string field. + * c) contains no frogs. + * The first byte is known to be addressable by the requester, so the + * routine works for kernel and user memory both. + * The parameter slashok flags whether a slash character is an error + * or a valid character. + * + * The parameter dup flags whether the string should be copied + * out of user space before being scanned the second time. + * (Otherwise a malicious thread could remove the NUL, causing us + * to access unchecked addresses.) + */ +static char* +validname0(char *aname, int slashok, int dup, uintptr pc) +{ + char *ename, *name, *s; + int c, n; + Rune r; + + name = aname; + if((PTR2UINT(name) & KZERO) != KZERO){ /* hmmmm */ + if(!dup) + print("warning: validname* called from %#p with user pointer", pc); + ename = vmemchr(name, 0, (1<<16)); + }else + ename = memchr(name, 0, (1<<16)); + + if(ename==nil || ename-name>=(1<<16)) + error("name too long"); + + s = nil; + if(dup){ + n = ename-name; + s = smalloc(n+1); + memmove(s, name, n); + s[n] = 0; + aname = s; + name = s; + setmalloctag(s, pc); + } + + while(*name){ + /* all characters above '~' are ok */ + c = *(uchar*)name; + if(c >= Runeself) + name += chartorune(&r, name); + else{ + if(isfrog[c]) + if(!slashok || c!='/'){ + snprint(up->genbuf, sizeof(up->genbuf), "%s: %q", Ebadchar, aname); + free(s); + error(up->genbuf); + } + name++; + } + } + return s; +} + +void +validname(char *aname, int slashok) +{ + validname0(aname, slashok, 0, getcallerpc(&aname)); +} + +char* +validnamedup(char *aname, int slashok) +{ + return validname0(aname, slashok, 1, getcallerpc(&aname)); +} + +void +isdir(Chan *c) +{ + if(c->qid.type & QTDIR) + return; + error(Enotdir); +} + +/* + * This is necessary because there are many + * pointers to the top of a given mount list: + * + * - the mhead in the namespace hash table + * - the mhead in chans returned from findmount: + * used in namec and then by unionread. + * - the mhead in chans returned from createdir: + * used in the open/create race protect, which is gone. + * + * The RWlock in the Mhead protects the mount list it contains. + * The mount list is deleted when we cunmount. + * The RWlock ensures that nothing is using the mount list at that time. + * + * It is okay to replace c->mh with whatever you want as + * long as you are sure you have a unique reference to it. + * + * This comment might belong somewhere else. + */ +void +putmhead(Mhead *mh) +{ + if(mh && decref(mh) == 0){ + mh->mount = (Mount*)0xCafeBeef; + free(mh); + } +} + diff -Nru 0/sys/src/nix/port/dev.c 4/sys/src/nix/port/dev.c --- 0/sys/src/nix/port/dev.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/dev.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,461 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +extern ulong kerndate; + +void +mkqid(Qid *q, vlong path, ulong vers, int type) +{ + q->type = type; + q->vers = vers; + q->path = path; +} + +void +devdir(Chan *c, Qid qid, char *n, vlong length, char *user, long perm, Dir *db) +{ + db->name = n; + if(c->flag&CMSG) + qid.type |= QTMOUNT; + db->qid = qid; + /* + * When called via devwalk c->dev is nil + * until the walk succeeds. + */ + if(c->dev != nil) + db->type = c->dev->dc; + else + db->type = -1; + db->dev = c->devno; + db->mode = perm; + db->mode |= qid.type << 24; + db->atime = seconds(); + db->mtime = kerndate; + db->length = length; + db->uid = user; + db->gid = eve; + db->muid = user; +} + +/* + * (here, Devgen is the prototype; devgen is the function in dev.c.) + * + * a Devgen is expected to return the directory entry for ".." + * if you pass it s==DEVDOTDOT (-1). otherwise... + * + * there are two contradictory rules. + * + * (i) if c is a directory, a Devgen is expected to list its children + * as you iterate s. + * + * (ii) whether or not c is a directory, a Devgen is expected to list + * its siblings as you iterate s. + * + * devgen always returns the list of children in the root + * directory. thus it follows (i) when c is the root and (ii) otherwise. + * many other Devgens follow (i) when c is a directory and (ii) otherwise. + * + * devwalk assumes (i). it knows that devgen breaks (i) + * for children that are themselves directories, and explicitly catches them. + * + * devstat assumes (ii). if the Devgen in question follows (i) + * for this particular c, devstat will not find the necessary info. + * with our particular Devgen functions, this happens only for + * directories, so devstat makes something up, assuming + * c->name, c->qid, eve, DMDIR|0555. + * + * devdirread assumes (i). the callers have to make sure + * that the Devgen satisfies (i) for the chan being read. + */ +/* + * the zeroth element of the table MUST be the directory itself for .. +*/ +int +devgen(Chan *c, char *name, Dirtab *tab, int ntab, int i, Dir *dp) +{ + if(tab == 0) + return -1; + if(i == DEVDOTDOT){ + /* nothing */ + }else if(name){ + for(i=1; i= ntab) + return -1; + tab += i; + } + devdir(c, tab->qid, tab->name, tab->length, eve, tab->perm, dp); + return 1; +} + +void +devreset(void) +{ +} + +void +devinit(void) +{ +} + +void +devshutdown(void) +{ +} + +Chan* +devattach(int dc, char *spec) +{ + Chan *c; + char *buf; + + /* + * There are no error checks here because + * this can only be called from the driver of dc + * which pretty much guarantees devtabget will + * succeed. + */ + c = newchan(); + mkqid(&c->qid, 0, 0, QTDIR); + c->dev = devtabget(dc, 0); + if(spec == nil) + spec = ""; + buf = smalloc(1+UTFmax+strlen(spec)+1); + sprint(buf, "#%C%s", dc, spec); + c->path = newpath(buf); + free(buf); + return c; +} + + +Chan* +devclone(Chan *c) +{ + Chan *nc; + + if(c->flag & COPEN){ + panic("devclone: file of type %C already open\n", + c->dev != nil? c->dev->dc: -1); + } + + nc = newchan(); + + /* + * The caller fills dev in if and when necessary. + nc->dev = nil; //XDYNXX + */ + nc->devno = c->devno; + nc->mode = c->mode; + nc->qid = c->qid; + nc->offset = c->offset; + nc->umh = nil; + nc->aux = c->aux; + nc->mqid = c->mqid; + nc->mc = c->mc; + return nc; +} + +Walkqid* +devwalk(Chan *c, Chan *nc, char **name, int nname, Dirtab *tab, int ntab, Devgen *gen) +{ + int i, j, alloc; + Walkqid *wq; + char *n; + Dir dir; + + if(nname > 0) + isdir(c); + + alloc = 0; + wq = smalloc(sizeof(Walkqid)+(nname-1)*sizeof(Qid)); + if(waserror()){ + if(alloc && wq->clone!=nil) + cclose(wq->clone); + free(wq); + return nil; + } + if(nc == nil){ + nc = devclone(c); + /* + * nc->dev remains nil for now. //XDYNX + */ + alloc = 1; + } + wq->clone = nc; + + for(j=0; jqid.type & QTDIR)){ + if(j==0) + error(Enotdir); + goto Done; + } + n = name[j]; + if(strcmp(n, ".") == 0){ + Accept: + wq->qid[wq->nqid++] = nc->qid; + continue; + } + if(strcmp(n, "..") == 0){ + /* + * Use c->dev->name in the error because + * nc->dev should be nil here. + */ + if((*gen)(nc, nil, tab, ntab, DEVDOTDOT, &dir) != 1){ + print("devgen walk .. in dev%s %#llux broken\n", + c->dev->name, nc->qid.path); + error("broken devgen"); + } + nc->qid = dir.qid; + goto Accept; + } + /* + * Ugly problem: If we're using devgen, make sure we're + * walking the directory itself, represented by the first + * entry in the table, and not trying to step into a sub- + * directory of the table, e.g. /net/net. Devgen itself + * should take care of the problem, but it doesn't have + * the necessary information (that we're doing a walk). + */ + if(gen==devgen && nc->qid.path!=tab[0].qid.path) + goto Notfound; + for(i=0;; i++) { + switch((*gen)(nc, n, tab, ntab, i, &dir)){ + case -1: + Notfound: + if(j == 0) + error(Enonexist); + kstrcpy(up->errstr, Enonexist, ERRMAX); + goto Done; + case 0: + continue; + case 1: + if(strcmp(n, dir.name) == 0){ + nc->qid = dir.qid; + goto Accept; + } + continue; + } + } + } + /* + * We processed at least one name, so will return some data. + * If we didn't process all nname entries succesfully, we drop + * the cloned channel and return just the Qids of the walks. + */ +Done: + poperror(); + if(wq->nqid < nname){ + if(alloc) + cclose(wq->clone); + wq->clone = nil; + }else if(wq->clone){ + /* attach cloned channel to same device */ +//what goes here: //XDYNX +// ->dev must be nil because can't walk an open chan, right? +// what about ref count on dev? + wq->clone->dev = c->dev; + //if(wq->clone->dev) //XDYNX + // devtabincr(wq->clone->dev); + } + return wq; +} + +long +devstat(Chan *c, uchar *db, long n, Dirtab *tab, int ntab, Devgen *gen) +{ + int i; + Dir dir; + char *p, *elem; + + for(i=0;; i++){ + switch((*gen)(c, nil, tab, ntab, i, &dir)){ + case -1: + if(c->qid.type & QTDIR){ + if(c->path == nil) + elem = "???"; + else if(strcmp(c->path->s, "/") == 0) + elem = "/"; + else + for(elem=p=c->path->s; *p; p++) + if(*p == '/') + elem = p+1; + devdir(c, c->qid, elem, 0, eve, DMDIR|0555, &dir); + n = convD2M(&dir, db, n); + if(n == 0) + error(Ebadarg); + return n; + } + + error(Enonexist); + case 0: + break; + case 1: + if(c->qid.path == dir.qid.path) { + if(c->flag&CMSG) + dir.mode |= DMMOUNT; + n = convD2M(&dir, db, n); + if(n == 0) + error(Ebadarg); + return n; + } + break; + } + } +} + +long +devdirread(Chan *c, char *d, long n, Dirtab *tab, int ntab, Devgen *gen) +{ + long m, dsz; + Dir dir; + + for(m=0; mdri++) { + switch((*gen)(c, nil, tab, ntab, c->dri, &dir)){ + case -1: + return m; + + case 0: + break; + + case 1: + dsz = convD2M(&dir, (uchar*)d, n-m); + if(dsz <= BIT16SZ){ /* <= not < because this isn't stat; read is stuck */ + if(m == 0) + error(Eshort); + return m; + } + m += dsz; + d += dsz; + break; + } + } + + return m; +} + +/* + * error(Eperm) if open permission not granted for up->user. + */ +void +devpermcheck(char *fileuid, int perm, int omode) +{ + int t; + static int access[] = { 0400, 0200, 0600, 0100 }; + + if(strcmp(up->user, fileuid) == 0) + perm <<= 0; + else + if(strcmp(up->user, eve) == 0) + perm <<= 3; + else + perm <<= 6; + + t = access[omode&3]; + if((t&perm) != t) + error(Eperm); +} + +Chan* +devopen(Chan *c, int omode, Dirtab *tab, int ntab, Devgen *gen) +{ + int i; + Dir dir; + + for(i=0;; i++) { + switch((*gen)(c, nil, tab, ntab, i, &dir)){ + case -1: + goto Return; + case 0: + break; + case 1: + if(c->qid.path == dir.qid.path) { + devpermcheck(dir.uid, dir.mode, omode); + goto Return; + } + break; + } + } +Return: + c->offset = 0; + if((c->qid.type & QTDIR) && omode!=OREAD) + error(Eperm); + c->mode = openmode(omode); + c->flag |= COPEN; + return c; +} + +void +devcreate(Chan*, char*, int, int) +{ + error(Eperm); +} + +Block* +devbread(Chan *c, long n, vlong offset) +{ + Block *bp; + + bp = allocb(n); + if(bp == 0) + error(Enomem); + if(waserror()) { + freeb(bp); + nexterror(); + } + bp->wp += c->dev->read(c, bp->wp, n, offset); + poperror(); + return bp; +} + +long +devbwrite(Chan *c, Block *bp, vlong offset) +{ + long n; + + if(waserror()) { + freeb(bp); + nexterror(); + } + n = c->dev->write(c, bp->rp, BLEN(bp), offset); + poperror(); + freeb(bp); + + return n; +} + +void +devremove(Chan*) +{ + error(Eperm); +} + +long +devwstat(Chan*, uchar*, long) +{ + error(Eperm); + return 0; +} + +void +devpower(int) +{ + error(Eperm); +} + +int +devconfig(int, char *, DevConf *) +{ + error(Eperm); + return 0; +} diff -Nru 0/sys/src/nix/port/devaoe.c 4/sys/src/nix/port/devaoe.c --- 0/sys/src/nix/port/devaoe.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devaoe.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,2508 @@ +/* + * © 2005-7 coraid + * aoe storage initiator + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/netif.h" +#include "../ip/ip.h" + +#include "etherif.h" +#include "aoe.h" + +#pragma varargck argpos eventlog 1 + +#define dprint(...) if(debug) eventlog(__VA_ARGS__); else USED(debug); +#define uprint(...) snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__); + +enum { + Maxunits = 0xff, + Maxframes = 128, + Ndevlink = 6, + Nea = 6, + Nnetlink = 6, + +// ETHERMINTU = 60, /* minimum transmit size */ +// ETHERMAXTU = 1514, /* maximum transmit size */ +}; + +#define TYPE(q) ((ulong)(q).path & 0xf) +#define UNIT(q) (((ulong)(q).path>>4) & 0xff) +#define L(q) (((ulong)(q).path>>12) & 0xf) +#define QID(u, t) ((u)<<4 | (t)) +#define Q3(l, u, t) ((l)<<8 | QID(u, t)) +#define UP(d) ((d)->flag & Dup) + +#define Ms2tk(t) (((t)*HZ)/1000) +#define Tk2ms(t) (((t)*1000)/HZ) + +enum { + Qzero, + Qtopdir = 1, + Qtopbase, + Qtopctl = Qtopbase, + Qtoplog, + Qtopend, + + Qunitdir, + Qunitbase, + Qctl = Qunitbase, + Qdata, + Qconfig, + Qident, + + Qdevlinkdir, + Qdevlinkbase, + Qdevlink = Qdevlinkbase, + Qdevlinkend, + + Qtopfiles = Qtopend-Qtopbase, + Qdevlinkfiles = Qdevlinkend-Qdevlinkbase, + + Eventlen = 256, + Nevents = 64, + + Fread = 0, + Fwrite, + Tfree = -1, + Tmgmt, + + /* round trip bounds, timeouts, in ticks */ + Rtmax = Ms2tk(320), + Rtmin = Ms2tk(20), + Srbtimeout = 4*60*HZ, + + Dbcnt = 1024, + + Crd = 0x20, + Crdext = 0x24, + Cwr = 0x30, + Cwrext = 0x34, + Cid = 0xec, +}; + +enum { + Read, + Write, +}; + +/* + * unified set of flags + * a Netlink + Aoedev most both be jumbo capable + * to send jumbograms to that interface. + */ +enum { + /* sync with ahci.h */ + Dllba = 1<<0, + Dsmart = 1<<1, + Dpower = 1<<2, + Dnop = 1<<3, + Datapi = 1<<4, + Datapi16= 1<<5, + + /* aoe specific */ + Dup = 1<<6, + Djumbo = 1<<7, +}; + +static char *flagname[] = { + "llba", + "smart", + "power", + "nop", + "atapi", + "atapi16", + + "up", + "jumbo", +}; + +typedef struct { + uchar flag; + uchar lostjumbo; + int datamtu; + + Chan *cc; + Chan *dc; + Chan *mtu; /* open early to prevent bind issues. */ + char path[Maxpath]; + uchar ea[Eaddrlen]; +} Netlink; + +typedef struct { + Netlink *nl; + int nea; + ulong eaidx; + uchar eatab[Nea][Eaddrlen]; + ulong npkt; + ulong resent; + uchar flag; + + ulong rttavg; + ulong mintimer; +} Devlink; + +typedef struct Srb Srb; +struct Srb { + Rendez; + Srb *next; + ulong ticksent; + ulong len; + vlong sector; + short write; + short nout; + char *error; + void *dp; + void *data; +}; + +typedef struct { + int tag; + ulong bcnt; + ulong dlen; + vlong lba; + ulong ticksent; + int nhdr; + uchar hdr[ETHERMINTU]; + void *dp; + Devlink *dl; + Netlink *nl; + int eaidx; + Srb *srb; +} Frame; + +typedef struct Aoedev Aoedev; +struct Aoedev { + QLock; + Aoedev *next; + + ulong vers; + + int ndl; + ulong dlidx; + Devlink *dl; + Devlink dltab[Ndevlink]; + + ushort fwver; + uchar flag; + int nopen; + int major; + int minor; + int unit; + int lasttag; + int nframes; + Frame *frames; + vlong bsize; + vlong realbsize; + + uint maxbcnt; + ulong lostjumbo; + ushort nout; + ushort maxout; + ulong lastwadj; + Srb *head; + Srb *tail; + Srb *inprocess; + + /* magic numbers 'R' us */ + char serial[20+1]; + char firmware[8+1]; + char model[40+1]; + int nconfig; + uchar config[1024]; + uchar ident[512]; +}; + +#pragma varargck type "æ" Aoedev* + +static struct { + Lock; + QLock; + Rendez; + char buf[Eventlen*Nevents]; + char *rp; + char *wp; +} events; + +static struct { + RWlock; + int nd; + Aoedev *d; +} devs; + +static struct { + Lock; + int reader[Nnetlink]; /* reader is running. */ + Rendez rendez[Nnetlink]; /* confirm exit. */ + Netlink nl[Nnetlink]; +} netlinks; + +extern Dev aoedevtab; +static Ref units; +static Ref drivevers; +static int debug; +static int autodiscover = 1; +static int rediscover; + +char Enotup[] = "aoe device is down"; +char Echange[] = "media or partition has changed"; + +static Srb* +srballoc(ulong sz) +{ + Srb *srb; + + srb = malloc(sizeof *srb+sz); + srb->dp = srb->data = srb+1; + srb->ticksent = sys->ticks; + return srb; +} + +static Srb* +srbkalloc(void *db, ulong) +{ + Srb *srb; + + srb = malloc(sizeof *srb); + srb->dp = srb->data = db; + srb->ticksent = sys->ticks; + return srb; +} + +#define srbfree(srb) free(srb) + +static void +srberror(Srb *srb, char *s) +{ + srb->error = s; + srb->nout--; + if (srb->nout == 0) + wakeup(srb); +} + +static void +frameerror(Aoedev *d, Frame *f, char *s) +{ + Srb *srb; + + srb = f->srb; + if(f->tag == Tfree || !srb) + return; + f->srb = nil; + f->tag = Tfree; /* don't get fooled by way-slow responses */ + srberror(srb, s); + d->nout--; +} + +static char* +unitname(Aoedev *d) +{ + uprint("%d.%d", d->major, d->minor); + return up->genbuf; +} + +static int +eventlogready(void*) +{ + return *events.rp; +} + +static long +eventlogread(void *a, long n) +{ + int len; + char *p, *buf; + + buf = smalloc(Eventlen); + qlock(&events); + lock(&events); + p = events.rp; + len = *p; + if(len == 0){ + n = 0; + unlock(&events); + } else { + if(n > len) + n = len; + /* can't move directly into pageable space with events lock held */ + memmove(buf, p+1, n); + *p = 0; + events.rp = p += Eventlen; + if(p >= events.buf + sizeof events.buf) + events.rp = events.buf; + unlock(&events); + + /* the concern here is page faults in memmove below */ + if(waserror()){ + free(buf); + qunlock(&events); + nexterror(); + } + memmove(a, buf, n); + poperror(); + } + free(buf); + qunlock(&events); + return n; +} + +static int +eventlog(char *fmt, ...) +{ + int dragrp, n; + char *p; + va_list arg; + + lock(&events); + p = events.wp; + dragrp = *p++; + va_start(arg, fmt); + n = vsnprint(p, Eventlen-1, fmt, arg); + *--p = n; + p = events.wp += Eventlen; + if(p >= events.buf + sizeof events.buf) + p = events.wp = events.buf; + if(dragrp) + events.rp = p; + unlock(&events); + wakeup(&events); + return n; +} + +static int +eventcount(void) +{ + int n; + + lock(&events); + if(*events.rp == 0) + n = 0; + else if(events.wp < events.rp) + n = Nevents - (events.rp - events.wp); + else + n = events.wp - events.rp; + unlock(&events); + return n/Eventlen; +} + +static int +tsince(int tag) +{ + int n; + + n = sys->ticks & 0xffff; + n -= tag & 0xffff; + if(n < 0) + n += 1<<16; + return n; +} + +static int +newtag(Aoedev *d) +{ + int t; + + do { + t = ++d->lasttag << 16; + t |= sys->ticks & 0xffff; + } while (t == Tfree || t == Tmgmt); + return t; +} + +static void +downdev(Aoedev *d, char *err) +{ + Frame *f, *e; + + d->flag &= ~Dup; + f = d->frames; + e = f + d->nframes; + for(; f < e; f->tag = Tfree, f->srb = nil, f++) + frameerror(d, f, Enotup); + d->inprocess = nil; + eventlog("%æ: removed; %s\n", d, err); +} + +static Block* +allocfb(Frame *f) +{ + int len; + Block *b; + + len = f->nhdr + f->dlen; + if(len < ETHERMINTU) + len = ETHERMINTU; + b = allocb(len); + memmove(b->wp, f->hdr, f->nhdr); + if(f->dlen) + memmove(b->wp + f->nhdr, f->dp, f->dlen); + b->wp += len; + return b; +} + +static void +putlba(Aoeata *a, vlong lba) +{ + uchar *c; + + c = a->lba; + c[0] = lba; + c[1] = lba >> 8; + c[2] = lba >> 16; + c[3] = lba >> 24; + c[4] = lba >> 32; + c[5] = lba >> 40; +} + +static Devlink* +pickdevlink(Aoedev *d) +{ + ulong i, n; + Devlink *l; + + for(i = 0; i < d->ndl; i++){ + n = d->dlidx++ % d->ndl; + l = d->dl + n; + if(l && l->flag & Dup) + return l; + } + return 0; +} + +static int +pickea(Devlink *l) +{ + if(l == 0) + return -1; + if(l->nea == 0) + return -1; + return l->eaidx++ % l->nea; +} + +static int +hset(Aoedev *d, Frame *f, Aoehdr *h, int cmd) +{ + int i; + Devlink *l; + + l = pickdevlink(d); + i = pickea(l); + if(i == -1){ + downdev(d, "resend fails; no netlink/ea"); + return -1; + } + if(f->srb && sys->ticks - f->srb->ticksent > Srbtimeout){ + eventlog("%æ: srb timeout\n", d); + frameerror(d, f, Etimedout); + return -1; + } + memmove(h->dst, l->eatab[i], Eaddrlen); + memmove(h->src, l->nl->ea, sizeof h->src); + hnputs(h->type, Aoetype); + h->verflag = Aoever << 4; + h->error = 0; + hnputs(h->major, d->major); + h->minor = d->minor; + h->cmd = cmd; + + hnputl(h->tag, f->tag = newtag(d)); + f->dl = l; + f->nl = l->nl; + f->eaidx = i; + f->ticksent = sys->ticks; + + return f->tag; +} + +static int +resend(Aoedev *d, Frame *f) +{ + ulong n; + Aoeata *a; + + a = (Aoeata*)f->hdr; + if(hset(d, f, a, a->cmd) == -1) + return -1; + n = f->bcnt; + if(n > d->maxbcnt){ + n = d->maxbcnt; /* mtu mismatch (jumbo fail?) */ + if(f->dlen > n) + f->dlen = n; + } + a->scnt = n / Aoesectsz; + f->dl->resent++; + f->dl->npkt++; + if(waserror()) + return -1; + f->nl->dc->dev->bwrite(f->nl->dc, allocfb(f), 0); + poperror(); + return 0; +} + +static void +discover(int major, int minor) +{ + Aoehdr *h; + Block *b; + Netlink *nl, *e; + + nl = netlinks.nl; + e = nl + nelem(netlinks.nl); + for(; nl < e; nl++){ + if(nl->cc == nil) + continue; + b = allocb(ETHERMINTU); + if(waserror()){ + freeb(b); + nexterror(); + } + b->wp = b->rp + ETHERMINTU; + memset(b->rp, 0, ETHERMINTU); + h = (Aoehdr*)b->rp; + memset(h->dst, 0xff, sizeof h->dst); + memmove(h->src, nl->ea, sizeof h->src); + hnputs(h->type, Aoetype); + h->verflag = Aoever << 4; + hnputs(h->major, major); + h->minor = minor; + h->cmd = ACconfig; + poperror(); + /* send b down the queue */ + nl->dc->dev->bwrite(nl->dc, b, 0); + } +} + +/* + * Check all frames on device and resend any frames that have been + * outstanding for 200% of the device round trip time average. + */ +static void +aoesweepproc(void*) +{ + ulong i, tx, timeout, nbc; + vlong starttick; + enum { Nms = 100, Nbcms = 30*1000, }; + uchar *ea; + Aoeata *a; + Aoedev *d; + Devlink *l; + Frame *f, *e; + + nbc = Nbcms/Nms; +loop: + if(nbc-- == 0){ + if(rediscover && !waserror()){ + discover(0xffff, 0xff); + poperror(); + } + nbc = Nbcms/Nms; + } + starttick = sys->ticks; + rlock(&devs); + for(d = devs.d; d; d = d->next){ + if(!canqlock(d)) + continue; + if(!UP(d)){ + qunlock(d); + continue; + } + tx = 0; + f = d->frames; + e = f + d->nframes; + for (; f < e; f++){ + if(f->tag == Tfree) + continue; + l = f->dl; + timeout = l->rttavg << 1; + i = tsince(f->tag); + if(i < timeout) + continue; + if(d->nout == d->maxout){ + if(d->maxout > 1) + d->maxout--; + d->lastwadj = sys->ticks; + } + a = (Aoeata*)f->hdr; + if(a->scnt > Dbcnt / Aoesectsz && + ++f->nl->lostjumbo > (d->nframes << 1)){ + ea = f->dl->eatab[f->eaidx]; + eventlog("%æ: jumbo failure on %s:%E; lba%lld\n", + d, f->nl->path, ea, f->lba); + d->maxbcnt = Dbcnt; + d->flag &= ~Djumbo; + } + resend(d, f); + if(tx++ == 0){ + if((l->rttavg <<= 1) > Rtmax) + l->rttavg = Rtmax; + eventlog("%æ: rtt %ldms\n", d, Tk2ms(l->rttavg)); + } + } + if(d->nout == d->maxout && d->maxout < d->nframes && + TK2MS(sys->ticks - d->lastwadj) > 10*1000){ + d->maxout++; + d->lastwadj = sys->ticks; + } + qunlock(d); + } + runlock(&devs); + i = Nms - TK2MS(sys->ticks - starttick); + if(i > 0) + tsleep(&up->sleep, return0, 0, i); + goto loop; +} + +static int +fmtæ(Fmt *f) +{ + char buf[8]; + Aoedev *d; + + d = va_arg(f->args, Aoedev*); + snprint(buf, sizeof buf, "aoe%d.%d", d->major, d->minor); + return fmtstrcpy(f, buf); +} + +static void netbind(char *path); + +static void +aoecfg(void) +{ + int n, i; + char *p, *f[32], buf[24]; + + if((p = getconf("aoeif")) == nil || (n = tokenize(p, f, nelem(f))) < 1) + return; + /* goo! */ + for(i = 0; i < n; i++){ + p = f[i]; + if(strncmp(p, "ether", 5) == 0) + snprint(buf, sizeof buf, "#l%c/ether%c", p[5], p[5]); + else if(strncmp(p, "#l", 2) == 0) + snprint(buf, sizeof buf, "#l%c/ether%c", p[2], p[2]); + else + continue; + if(!waserror()){ + netbind(buf); + poperror(); + } + } +} + +static void +aoeinit(void) +{ + static int init; + static QLock l; + + if(!canqlock(&l)) + return; + if(init == 0){ + fmtinstall(L'æ', fmtæ); + events.rp = events.wp = events.buf; + kproc("aoesweep", aoesweepproc, nil); + aoecfg(); + init = 1; + } + qunlock(&l); +} + +static Chan* +aoeattach(char *spec) +{ + Chan *c; + + if(*spec) + error(Enonexist); + aoeinit(); + c = devattach(L'æ', spec); + mkqid(&c->qid, Qzero, 0, QTDIR); + return c; +} + +static Aoedev* +unit2dev(ulong unit) +{ + int i; + Aoedev *d; + + rlock(&devs); + i = 0; + for(d = devs.d; d; d = d->next) + if(i++ == unit){ + runlock(&devs); + return d; + } + runlock(&devs); + uprint("unit lookup failure: %lux pc %p", unit, getcallerpc(&unit)); + error(up->genbuf); + return nil; +} + +static int +unitgen(Chan *c, ulong type, Dir *dp) +{ + int perm, t; + ulong vers; + vlong size; + char *p; + Aoedev *d; + Qid q; + + d = unit2dev(UNIT(c->qid)); + perm = 0644; + size = 0; + vers = d->vers; + t = QTFILE; + + switch(type){ + default: + return -1; + case Qctl: + p = "ctl"; + break; + case Qdata: + p = "data"; + perm = 0640; + if(UP(d)) + size = d->bsize; + break; + case Qconfig: + p = "config"; + if(UP(d)) + size = d->nconfig; + break; + case Qident: + p = "ident"; + if(UP(d)) + size = sizeof d->ident; + break; + case Qdevlinkdir: + p = "devlink"; + t = QTDIR; + perm = 0555; + break; + } + mkqid(&q, QID(UNIT(c->qid), type), vers, t); + devdir(c, q, p, size, eve, perm, dp); + return 1; +} + +static int +topgen(Chan *c, ulong type, Dir *d) +{ + int perm; + vlong size; + char *p; + Qid q; + + perm = 0444; + size = 0; + switch(type){ + default: + return -1; + case Qtopctl: + p = "ctl"; + perm = 0644; + break; + case Qtoplog: + p = "log"; + size = eventcount(); + break; + } + mkqid(&q, type, 0, QTFILE); + devdir(c, q, p, size, eve, perm, d); + return 1; +} + +static int +aoegen(Chan *c, char *, Dirtab *, int, int s, Dir *dp) +{ + int i; + Aoedev *d; + Qid q; + + if(c->qid.path == 0){ + switch(s){ + case DEVDOTDOT: + q.path = 0; + q.type = QTDIR; + devdir(c, q, "#æ", 0, eve, 0555, dp); + break; + case 0: + q.path = Qtopdir; + q.type = QTDIR; + devdir(c, q, "aoe", 0, eve, 0555, dp); + break; + default: + return -1; + } + return 1; + } + + switch(TYPE(c->qid)){ + default: + return -1; + case Qtopdir: + if(s == DEVDOTDOT){ + mkqid(&q, Qzero, 0, QTDIR); + devdir(c, q, "aoe", 0, eve, 0555, dp); + return 1; + } + if(s < Qtopfiles) + return topgen(c, Qtopbase + s, dp); + s -= Qtopfiles; + if(s >= units.ref) + return -1; + mkqid(&q, QID(s, Qunitdir), 0, QTDIR); + d = unit2dev(s); + devdir(c, q, unitname(d), 0, eve, 0555, dp); + return 1; + case Qtopctl: + case Qtoplog: + return topgen(c, TYPE(c->qid), dp); + case Qunitdir: + if(s == DEVDOTDOT){ + mkqid(&q, QID(0, Qtopdir), 0, QTDIR); + uprint("%uld", UNIT(c->qid)); + devdir(c, q, up->genbuf, 0, eve, 0555, dp); + return 1; + } + return unitgen(c, Qunitbase+s, dp); + case Qctl: + case Qdata: + case Qconfig: + case Qident: + return unitgen(c, TYPE(c->qid), dp); + case Qdevlinkdir: + i = UNIT(c->qid); + if(s == DEVDOTDOT){ + mkqid(&q, QID(i, Qunitdir), 0, QTDIR); + devdir(c, q, "devlink", 0, eve, 0555, dp); + return 1; + } + if(i >= units.ref) + return -1; + d = unit2dev(i); + if(s >= d->ndl) + return -1; + uprint("%d", s); + mkqid(&q, Q3(s, i, Qdevlink), 0, QTFILE); + devdir(c, q, up->genbuf, 0, eve, 0755, dp); + return 1; + case Qdevlink: + uprint("%d", s); + mkqid(&q, Q3(s, UNIT(c->qid), Qdevlink), 0, QTFILE); + devdir(c, q, up->genbuf, 0, eve, 0755, dp); + return 1; + } +} + +static Walkqid* +aoewalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, nil, 0, aoegen); +} + +static long +aoestat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, nil, 0, aoegen); +} + +static Chan* +aoeopen(Chan *c, int omode) +{ + Aoedev *d; + + if(TYPE(c->qid) != Qdata) + return devopen(c, omode, 0, 0, aoegen); + + d = unit2dev(UNIT(c->qid)); + qlock(d); + if(waserror()){ + qunlock(d); + nexterror(); + } + if(!UP(d)) + error(Enotup); + c = devopen(c, omode, 0, 0, aoegen); + d->nopen++; + poperror(); + qunlock(d); + return c; +} + +static void +aoeclose(Chan *c) +{ + Aoedev *d; + + if(TYPE(c->qid) != Qdata || (c->flag&COPEN) == 0) + return; + + d = unit2dev(UNIT(c->qid)); + qlock(d); + if(--d->nopen == 0 && !waserror()){ + discover(d->major, d->minor); + poperror(); + } + qunlock(d); +} + +static void +atarw(Aoedev *d, Frame *f) +{ + ulong bcnt; + char extbit, writebit; + Aoeata *ah; + Srb *srb; + + extbit = 0x4; + writebit = 0x10; + + srb = d->inprocess; + bcnt = d->maxbcnt; + if(bcnt > srb->len) + bcnt = srb->len; + f->nhdr = AOEATASZ; + memset(f->hdr, 0, f->nhdr); + ah = (Aoeata*)f->hdr; + if(hset(d, f, ah, ACata) == -1) + return; + f->dp = srb->dp; + f->bcnt = bcnt; + f->lba = srb->sector; + f->srb = srb; + + ah->scnt = bcnt / Aoesectsz; + putlba(ah, f->lba); + if(d->flag & Dllba) + ah->aflag |= AAFext; + else { + extbit = 0; + ah->lba[3] &= 0x0f; + ah->lba[3] |= 0xe0; /* LBA bit+obsolete 0xa0 */ + } + if(srb->write){ + ah->aflag |= AAFwrite; + f->dlen = bcnt; + }else{ + writebit = 0; + f->dlen = 0; + } + ah->cmdstat = 0x20 | writebit | extbit; + + /* mark tracking fields and load out */ + srb->nout++; + srb->dp = (uchar*)srb->dp + bcnt; + srb->len -= bcnt; + srb->sector += bcnt / Aoesectsz; + if(srb->len == 0) + d->inprocess = nil; + d->nout++; + f->dl->npkt++; + if(waserror()){ + f->tag = Tfree; + d->inprocess = nil; + nexterror(); + } + f->nl->dc->dev->bwrite(f->nl->dc, allocfb(f), 0); + poperror(); +} + +static char* +aoeerror(Aoehdr *h) +{ + int n; + static char *errs[] = { + "aoe protocol error: unknown", + "aoe protocol error: bad command code", + "aoe protocol error: bad argument param", + "aoe protocol error: device unavailable", + "aoe protocol error: config string present", + "aoe protocol error: unsupported version", + }; + + if((h->verflag & AFerr) == 0) + return 0; + n = h->error; + if(n > nelem(errs)) + n = 0; + return errs[n]; +} + +static void +rtupdate(Devlink *l, int rtt) +{ + int n; + + n = rtt; + if(rtt < 0){ + n = -rtt; + if(n < Rtmin) + n = Rtmin; + else if(n > Rtmax) + n = Rtmax; + l->mintimer += (n - l->mintimer) >> 1; + } else if(n < l->mintimer) + n = l->mintimer; + else if(n > Rtmax) + n = Rtmax; + + /* g == .25; cf. Congestion Avoidance and Control, Jacobson&Karels; 1988 */ + n -= l->rttavg; + l->rttavg += n >> 2; +} + +static int +srbready(void *v) +{ + Srb *s; + + s = v; + return s->error || (!s->nout && !s->len); +} + +static Frame* +getframe(Aoedev *d, int tag) +{ + Frame *f, *e; + + f = d->frames; + e = f + d->nframes; + for(; f < e; f++) + if(f->tag == tag) + return f; + return nil; +} + +static Frame* +freeframe(Aoedev *d) +{ + if(d->nout < d->maxout) + return getframe(d, Tfree); + return nil; +} + +static void +work(Aoedev *d) +{ + Frame *f; + + while ((f = freeframe(d)) != nil) { + if(d->inprocess == nil){ + if(d->head == nil) + return; + d->inprocess = d->head; + d->head = d->head->next; + if(d->head == nil) + d->tail = nil; + } + atarw(d, f); + } +} + +static void +strategy(Aoedev *d, Srb *srb) +{ + qlock(d); + if(waserror()){ + qunlock(d); + nexterror(); + } + srb->next = nil; + if(d->tail) + d->tail->next = srb; + d->tail = srb; + if(d->head == nil) + d->head = srb; + work(d); + poperror(); + qunlock(d); + + while(waserror()) + ; + sleep(srb, srbready, srb); + poperror(); +} + +#define iskaddr(a) ((uintptr)(a) > KZERO) + +static long +rw(Aoedev *d, int write, uchar *db, long len, uvlong off) +{ + long n, nlen, copy; + enum { Srbsz = 1<<18, }; + Srb *srb; + + if((off|len) & (Aoesectsz-1)) + error("offset and length must be sector multiple.\n"); + if(off > d->bsize || len == 0) + return 0; + if(off + len > d->bsize) + len = d->bsize - off; + copy = 0; + if(iskaddr(db)){ + srb = srbkalloc(db, len); + copy = 1; + }else + srb = srballoc(Srbsz <= len? Srbsz: len); + if(waserror()){ + srbfree(srb); + nexterror(); + } + nlen = len; + srb->write = write; + do { + if(!UP(d)) + error(Eio); + srb->sector = off / Aoesectsz; + srb->dp = srb->data; + n = nlen; + if(n > Srbsz) + n = Srbsz; + srb->len = n; + if(write && !copy) + memmove(srb->data, db, n); + strategy(d, srb); + if(srb->error) + error(srb->error); + if(!write && !copy) + memmove(db, srb->data, n); + nlen -= n; + db += n; + off += n; + } while (nlen > 0); + poperror(); + srbfree(srb); + return len; +} + +static long +readmem(ulong off, void *dst, long n, void *src, long size) +{ + if(off >= size) + return 0; + if(off + n > size) + n = size - off; + memmove(dst, (uchar*)src + off, n); + return n; +} + +static char * +pflag(char *s, char *e, uchar f) +{ + uchar i, m; + + for(i = 0; i < 8; i++){ + m = 1 << i; + if(f & m) + s = seprint(s, e, "%s ", flagname[i]? flagname[i]: "oops"); + } + return seprint(s, e, "\n"); +} + +static int +pstat(Aoedev *d, char *db, int len, int off) +{ + int i; + char *state, *s, *p, *e; + + s = p = malloc(READSTR); + e = p + READSTR; + + state = "down"; + if(UP(d)) + state = "up"; + + p = seprint(p, e, + "state: %s\n" "nopen: %d\n" "nout: %d\n" + "nmaxout: %d\n" "nframes: %d\n" "maxbcnt: %d\n" + "fw: %.4ux\n" + "model: %s\n" "serial: %s\n" "firmware: %s\n", + state, d->nopen, d->nout, + d->maxout, d->nframes, d->maxbcnt, + d->fwver, + d->model, d->serial, d->firmware); + p = seprint(p, e, "flag: "); + p = pflag(p, e, d->flag); + + if(p - s < len) + len = p - s; + i = readstr(off, db, len, s); + free(s); + return i; +} + +static long +unitread(Chan *c, void *db, long len, vlong off) +{ + Aoedev *d; + + d = unit2dev(UNIT(c->qid)); + if(d->vers != c->qid.vers) + error(Echange); + switch(TYPE(c->qid)){ + default: + error(Ebadarg); + case Qctl: + return pstat(d, db, len, off); + case Qdata: + return rw(d, Read, db, len, off); + case Qconfig: + if (!UP(d)) + error(Enotup); + return readmem(off, db, len, d->config, d->nconfig); + case Qident: + if (!UP(d)) + error(Enotup); + return readmem(off, db, len, d->ident, sizeof d->ident); + } +} + +static int +devlinkread(Chan *c, void *db, int len, int off) +{ + int i; + char *s, *p, *e; + Aoedev *d; + Devlink *l; + + d = unit2dev(UNIT(c->qid)); + i = L(c->qid); + if(i >= d->ndl) + return 0; + l = d->dl + i; + + s = p = malloc(READSTR); + e = s + READSTR; + + p = seprint(p, e, "addr: "); + for(i = 0; i < l->nea; i++) + p = seprint(p, e, "%E ", l->eatab[i]); + p = seprint(p, e, "\n"); + p = seprint(p, e, "npkt: %uld\n", l->npkt); + p = seprint(p, e, "resent: %uld\n", l->resent); + p = seprint(p, e, "flag: "); p = pflag(p, e, l->flag); + p = seprint(p, e, "rttavg: %uld\n", Tk2ms(l->rttavg)); + p = seprint(p, e, "mintimer: %uld\n", Tk2ms(l->mintimer)); + + p = seprint(p, e, "nl path: %s\n", l->nl->path); + p = seprint(p, e, "nl ea: %E\n", l->nl->ea); + p = seprint(p, e, "nl flag: "); p = pflag(p, e, l->flag); + p = seprint(p, e, "nl lostjumbo: %d\n", l->nl->lostjumbo); + p = seprint(p, e, "nl datamtu: %d\n", l->nl->datamtu); + + if(p - s < len) + len = p - s; + i = readstr(off, db, len, s); + free(s); + return i; +} + +static long +topctlread(Chan *, void *db, int len, int off) +{ + int i; + char *s, *p, *e; + Netlink *n; + + s = p = malloc(READSTR); + e = s + READSTR; + + p = seprint(p, e, "debug: %d\n", debug); + p = seprint(p, e, "autodiscover: %d\n", autodiscover); + p = seprint(p, e, "rediscover: %d\n", rediscover); + + for(i = 0; i < Nnetlink; i++){ + n = netlinks.nl+i; + if(n->cc == 0) + continue; + p = seprint(p, e, "if%d path: %s\n", i, n->path); + p = seprint(p, e, "if%d ea: %E\n", i, n->ea); + p = seprint(p, e, "if%d flag: ", i); p = pflag(p, e, n->flag); + p = seprint(p, e, "if%d lostjumbo: %d\n", i, n->lostjumbo); + p = seprint(p, e, "if%d datamtu: %d\n", i, n->datamtu); + } + + if(p - s < len) + len = p - s; + i = readstr(off, db, len, s); + free(s); + return i; +} + +static long +aoeread(Chan *c, void *db, long n, vlong off) +{ + switch(TYPE(c->qid)){ + default: + error(Eperm); + case Qzero: + case Qtopdir: + case Qunitdir: + case Qdevlinkdir: + return devdirread(c, db, n, 0, 0, aoegen); + case Qtopctl: + return topctlread(c, db, n, off); + case Qtoplog: + return eventlogread(db, n); + case Qctl: + case Qdata: + case Qconfig: + case Qident: + return unitread(c, db, n, off); + case Qdevlink: + return devlinkread(c, db, n, off); + } +} + +static long +configwrite(Aoedev *d, void *db, long len) +{ + char *s; + Aoeqc *ch; + Frame *f; + Srb *srb; + + if(!UP(d)) + error(Enotup); + if(len > ETHERMAXTU - AOEQCSZ) + error(Etoobig); + srb = srballoc(len); + s = malloc(len); + memmove(s, db, len); + if(waserror()){ + srbfree(srb); + free(s); + nexterror(); + } + for (;;) { + qlock(d); + if(waserror()){ + qunlock(d); + nexterror(); + } + f = freeframe(d); + if(f != nil) + break; + poperror(); + qunlock(d); + if(waserror()) + nexterror(); + tsleep(&up->sleep, return0, 0, 100); + poperror(); + } + f->nhdr = AOEQCSZ; + memset(f->hdr, 0, f->nhdr); + ch = (Aoeqc*)f->hdr; + if(hset(d, f, ch, ACconfig) == -1) + return 0; + f->srb = srb; + f->dp = s; + ch->verccmd = AQCfset; + hnputs(ch->cslen, len); + d->nout++; + srb->nout++; + f->dl->npkt++; + f->dlen = len; + /* + * these refer to qlock & waserror in the above for loop. + * there's still the first waserror outstanding. + */ + poperror(); + qunlock(d); + + f->nl->dc->dev->bwrite(f->nl->dc, allocfb(f), 0); + sleep(srb, srbready, srb); + if(srb->error) + error(srb->error); + + qlock(d); + if(waserror()){ + qunlock(d); + nexterror(); + } + memmove(d->config, s, len); + d->nconfig = len; + poperror(); + qunlock(d); + + poperror(); /* pop first waserror */ + + srbfree(srb); + memmove(db, s, len); + free(s); + return len; +} + +static int getmtu(Chan*); + +static int +devmaxdata(Aoedev *d) +{ + int i, m, mtu; + Devlink *l; + Netlink *n; + + mtu = 100000; + for(i = 0; i < d->ndl; i++){ + l = d->dl + i; + n = l->nl; + if((l->flag & Dup) == 0 || (n->flag & Dup) == 0) + continue; + m = getmtu(n->mtu); + if(m < mtu) + mtu = m; + } + if(mtu == 100000) + mtu = 0; + mtu -= AOEATASZ; + return mtu; +} + +static int +toggle(char *s, int init) +{ + if(s == nil) + return init ^ 1; + return strcmp(s, "on") == 0; +} + +static void ataident(Aoedev*); + +static long +unitctlwrite(Aoedev *d, void *db, long n) +{ + uint maxbcnt, m; + uvlong bsize; + enum { + Failio, + Ident, + Jumbo, + Maxbno, + Mtu, + Setsize, + }; + Cmdbuf *cb; + Cmdtab *ct; + static Cmdtab cmds[] = { + {Failio, "failio", 1 }, + {Ident, "identify", 1 }, + {Jumbo, "jumbo", 0 }, + {Maxbno, "maxbno", 0 }, + {Mtu, "mtu", 0 }, + {Setsize, "setsize", 0 }, + }; + + cb = parsecmd(db, n); + qlock(d); + if(waserror()){ + qunlock(d); + free(cb); + nexterror(); + } + ct = lookupcmd(cb, cmds, nelem(cmds)); + switch(ct->index){ + case Failio: + downdev(d, "i/o failure"); + break; + case Ident: + ataident(d); + break; + case Jumbo: + m = 0; + if(d->flag & Djumbo) + m = 1; + toggle(cb->f[1], m); + if(m) + d->flag |= Djumbo; + else + d->flag &= ~Djumbo; + break; + case Maxbno: + case Mtu: + maxbcnt = devmaxdata(d); + if(cb->nf > 2) + error(Ecmdargs); + if(cb->nf == 2){ + m = strtoul(cb->f[1], 0, 0); + if(ct->index == Maxbno) + m *= Aoesectsz; + else{ + m -= AOEATASZ; + m &= ~(Aoesectsz-1); + } + if(m > maxbcnt) + cmderror(cb, "maxb greater than media mtu"); + maxbcnt = m; + } + d->maxbcnt = maxbcnt; + break; + case Setsize: + bsize = d->realbsize; + if(cb->nf > 2) + error(Ecmdargs); + if(cb->nf == 2){ + bsize = strtoull(cb->f[1], 0, 0); + if(bsize % Aoesectsz) + cmderror(cb, "disk size must be sector aligned"); + } + d->bsize = bsize; + break; + default: + cmderror(cb, "unknown aoe control message"); + } + poperror(); + qunlock(d); + free(cb); + return n; +} + +static long +unitwrite(Chan *c, void *db, long n, vlong off) +{ + long rv; + char *buf; + Aoedev *d; + + d = unit2dev(UNIT(c->qid)); + switch(TYPE(c->qid)){ + default: + error(Ebadarg); + case Qctl: + return unitctlwrite(d, db, n); + case Qident: + error(Eperm); + case Qdata: + return rw(d, Write, db, n, off); + case Qconfig: + if(off + n > sizeof d->config) + error(Etoobig); + buf = malloc(sizeof d->config); + memmove(buf, d->config, d->nconfig); + memmove(buf + off, db, n); + rv = configwrite(d, buf, n + off); + free(buf); + return rv; + } +} + +static Netlink* +addnet(char *path, Chan *cc, Chan *dc, Chan *mtu, uchar *ea) +{ + Netlink *nl, *e; + + lock(&netlinks); + if(waserror()){ + unlock(&netlinks); + nexterror(); + } + nl = netlinks.nl; + e = nl + nelem(netlinks.nl); + for(; nl < e && nl->cc; nl++) + continue; + if (nl >= e) + error("out of netlink structures"); + nl->cc = cc; + nl->dc = dc; + nl->mtu = mtu; + strncpy(nl->path, path, sizeof nl->path); + memmove(nl->ea, ea, sizeof nl->ea); + poperror(); + nl->flag |= Dup; + unlock(&netlinks); + return nl; +} + +static int +newunit(void) +{ + int x; + + lock(&units); + if(units.ref == Maxunits) + x = -1; + else + x = units.ref++; + unlock(&units); + return x; +} + +static int +dropunit(void) +{ + int x; + + lock(&units); + x = --units.ref; + unlock(&units); + return x; +} + +static Aoedev* +newdev(long major, long minor, int n) +{ + Aoedev *d; + Frame *f, *e; + + d = mallocz(sizeof *d, 1); + f = mallocz(sizeof *f * n, 1); + if (!d || !f) { + free(d); + free(f); + error("aoe device allocation failure"); + } + d->nframes = n; + d->frames = f; + for (e = f + n; f < e; f++) + f->tag = Tfree; + d->maxout = n; + d->major = major; + d->minor = minor; + d->maxbcnt = Dbcnt; + d->flag = Djumbo; + d->unit = newunit(); /* bzzt. inaccurate if units removed */ + if(d->unit == -1){ + free(d); + free(d->frames); + error("too many units"); + } + d->dl = d->dltab; + return d; +} + +static Aoedev* +mm2dev(int major, int minor) +{ + Aoedev *d; + + rlock(&devs); + for(d = devs.d; d; d = d->next) + if(d->major == major && d->minor == minor){ + runlock(&devs); + return d; + } + runlock(&devs); + eventlog("mm2dev: %d.%d not found\n", major, minor); + return nil; +} + +/* Find the device in our list. If not known, add it */ +static Aoedev* +getdev(long major, long minor, int n) +{ + Aoedev *d; + + wlock(&devs); + if(waserror()){ + wunlock(&devs); + nexterror(); + } + for(d = devs.d; d; d = d->next) + if(d->major == major && d->minor == minor) + break; + if (d == nil) { + d = newdev(major, minor, n); + d->next = devs.d; + devs.d = d; + } + poperror(); + wunlock(&devs); + return d; +} + +static ushort +gbit16(void *a) +{ + uchar *i; + + i = a; + return i[1] << 8 | i[0]; +} + +static u32int +gbit32(void *a) +{ + u32int j; + uchar *i; + + i = a; + j = i[3] << 24; + j |= i[2] << 16; + j |= i[1] << 8; + j |= i[0]; + return j; +} + +static uvlong +gbit64(void *a) +{ + uchar *i; + + i = a; + return (uvlong)gbit32(i+4) << 32 | gbit32(a); +} + +static void +ataident(Aoedev *d) +{ + Aoeata *a; + Block *b; + Frame *f; + + f = freeframe(d); + if(f == nil) + return; + f->nhdr = AOEATASZ; + memset(f->hdr, 0, f->nhdr); + a = (Aoeata*)f->hdr; + if(hset(d, f, a, ACata) == -1) + return; + a->cmdstat = Cid; /* ata 6, page 110 */ + a->scnt = 1; + a->lba[3] = 0xa0; + d->nout++; + f->dl->npkt++; + f->bcnt = 512; + f->dlen = 0; + b = allocfb(f); + f->nl->dc->dev->bwrite(f->nl->dc, b, 0); +} + +static int +getmtu(Chan *m) +{ + int n, mtu; + char buf[36]; + + mtu = 1514; + if(m == nil || waserror()) + return mtu; + n = m->dev->read(m, buf, sizeof buf - 1, 0); + if(n > 12){ + buf[n] = 0; + mtu = strtoul(buf + 12, 0, 0); + } + poperror(); + return mtu; +} + +static int +newdlea(Devlink *l, uchar *ea) +{ + int i; + uchar *t; + + for(i = 0; i < Nea; i++){ + t = l->eatab[i]; + if(i == l->nea){ + memmove(t, ea, Eaddrlen); + return l->nea++; + } + if(memcmp(t, ea, Eaddrlen) == 0) + return i; + } + return -1; +} + +static Devlink* +newdevlink(Aoedev *d, Netlink *n, Aoeqc *c) +{ + int i; + Devlink *l; + + for(i = 0; i < Ndevlink; i++){ + l = d->dl + i; + if(i == d->ndl){ + d->ndl++; + newdlea(l, c->src); + l->nl = n; + l->flag |= Dup; + l->mintimer = Rtmin; + l->rttavg = Rtmax; + return l; + } + if(l->nl == n) + return l; + } + eventlog("%æ: out of links: %s:%E to %E\n", d, n->path, n->ea, c->src); + return 0; +} + +static void +errrsp(Block *b, char *s) +{ + int n; + Aoedev *d; + Aoehdr *h; + Frame *f; + + h = (Aoehdr*)b->rp; + n = nhgetl(h->tag); + if(n == Tmgmt || n == Tfree) + return; + d = mm2dev(nhgets(h->major), h->minor); + if(d == 0) + return; + if(f = getframe(d, n)) + frameerror(d, f, s); +} + +static void +qcfgrsp(Block *b, Netlink *nl) +{ + int major, cmd, cslen, blen; + unsigned n; + Aoedev *d; + Aoeqc *ch; + Devlink *l; + Frame *f; + + ch = (Aoeqc*)b->rp; + major = nhgets(ch->major); + n = nhgetl(ch->tag); + if(n != Tmgmt){ + d = mm2dev(major, ch->minor); + if(d == nil) + return; + qlock(d); + f = getframe(d, n); + if(f == nil){ + qunlock(d); + eventlog("%æ: unknown response tag %ux\n", d, n); + return; + } + cslen = nhgets(ch->cslen); + blen = BLEN(b) - AOEQCSZ; + if(cslen < blen) + eventlog("%æ: cfgrsp: tag %.8ux oversized %d %d\n", + d, n, cslen, blen); + if(cslen > blen){ + eventlog("%æ: cfgrsp: tag %.8ux runt %d %d\n", + d, n, cslen, blen); + cslen = blen; + } + memmove(f->dp, ch + 1, cslen); + f->srb->nout--; + wakeup(f->srb); + d->nout--; + f->srb = nil; + f->tag = Tfree; + qunlock(d); + return; + } + + cmd = ch->verccmd & 0xf; + if(cmd != 0){ + eventlog("aoe%d.%d: cfgrsp: bad command %d\n", major, ch->minor, cmd); + return; + } + n = nhgets(ch->bufcnt); + if(n > Maxframes) + n = Maxframes; + + if(waserror()){ + eventlog("getdev: %d.%d ignored: %s\n", major, ch->minor, up->errstr); + return; + } + d = getdev(major, ch->minor, n); + poperror(); + + qlock(d); + if(waserror()){ + qunlock(d); + eventlog("%æ: %s\n", d, up->errstr); + nexterror(); + } + + l = newdevlink(d, nl, ch); /* add this interface. */ + + d->fwver = nhgets(ch->fwver); + n = nhgets(ch->cslen); + if(n > sizeof d->config) + n = sizeof d->config; + d->nconfig = n; + memmove(d->config, ch + 1, n); + if(l != 0 && d->flag & Djumbo){ + n = getmtu(nl->mtu) - AOEATASZ; + n /= Aoesectsz; + if(n > ch->scnt) + n = ch->scnt; + n = n? n * Aoesectsz: Dbcnt; + if(n != d->maxbcnt){ + eventlog("%æ: setting %d byte data frames on %s:%E\n", + d, n, nl->path, nl->ea); + d->maxbcnt = n; + } + } + if(d->nopen == 0) + ataident(d); + poperror(); + qunlock(d); +} + +void +aoeidmove(char *p, ushort *u, unsigned n) +{ + int i; + char *op, *e, *s; + + op = p; + /* + * the ushort `a' is sometimes not aligned on a short boundary, + * so dereferencing a[i] would cause an alignment exception on + * some machines. + */ + s = (char *)u; + for(i = 0; i < n; i += 2){ + *p++ = s[i + 1]; + *p++ = s[i]; + } + *p = 0; + while(p > op && *--p == ' ') + *p = 0; + e = p; + p = op; + while(*p == ' ') + p++; + memmove(op, p, n - (e - p)); +} + +static vlong +aoeidentify(Aoedev *d, ushort *id) +{ + int i; + vlong s; + + d->flag &= ~(Dllba|Dpower|Dsmart|Dnop|Dup); + + i = gbit16(id+83) | gbit16(id+86); + if(i & (1<<10)){ + d->flag |= Dllba; + s = gbit64(id+100); + }else + s = gbit32(id+60); + + i = gbit16(id+83); + if((i>>14) == 1) { + if(i & (1<<3)) + d->flag |= Dpower; + i = gbit16(id+82); + if(i & 1) + d->flag |= Dsmart; + if(i & (1<<14)) + d->flag |= Dnop; + } +// eventlog("%æ up\n", d); + d->flag |= Dup; + memmove(d->ident, id, sizeof d->ident); + return s; +} + +static void +newvers(Aoedev *d) +{ + lock(&drivevers); + d->vers = drivevers.ref++; + unlock(&drivevers); +} + +static int +identify(Aoedev *d, ushort *id) +{ + vlong osectors, s; + uchar oserial[21]; + + s = aoeidentify(d, id); + if(s == -1) + return -1; + osectors = d->realbsize; + memmove(oserial, d->serial, sizeof d->serial); + + aoeidmove(d->serial, id+10, 20); + aoeidmove(d->firmware, id+23, 8); + aoeidmove(d->model, id+27, 40); + + s *= Aoesectsz; + if((osectors == 0 || osectors != s) && + memcmp(oserial, d->serial, sizeof oserial) != 0){ + d->bsize = s; + d->realbsize = s; +// d->mediachange = 1; + newvers(d); + } + return 0; +} + +static void +atarsp(Block *b) +{ + unsigned n; + short major; + Aoeata *ahin, *ahout; + Aoedev *d; + Frame *f; + Srb *srb; + + ahin = (Aoeata*)b->rp; + major = nhgets(ahin->major); + d = mm2dev(major, ahin->minor); + if(d == nil) + return; + qlock(d); + if(waserror()){ + qunlock(d); + nexterror(); + } + n = nhgetl(ahin->tag); + f = getframe(d, n); + if(f == nil){ + dprint("%æ: unexpected response; tag %ux\n", d, n); + goto bail; + } + rtupdate(f->dl, tsince(f->tag)); + ahout = (Aoeata*)f->hdr; + srb = f->srb; + + if(ahin->cmdstat & 0xa9){ + eventlog("%æ: ata error cmd %.2ux stat %.2ux\n", + d, ahout->cmdstat, ahin->cmdstat); + if(srb) + srb->error = Eio; + } else { + n = ahout->scnt * Aoesectsz; + switch(ahout->cmdstat){ + case Crd: + case Crdext: + if(BLEN(b) - AOEATASZ < n){ + eventlog("%æ: runt read blen %ld expect %d\n", + d, BLEN(b), n); + goto bail; + } + memmove(f->dp, (uchar *)ahin + AOEATASZ, n); + case Cwr: + case Cwrext: + if(n > Dbcnt) + f->nl->lostjumbo = 0; + if(f->bcnt -= n){ + f->lba += n / Aoesectsz; + f->dp = (uchar*)f->dp + n; + resend(d, f); + goto bail; + } + break; + case Cid: + if(BLEN(b) - AOEATASZ < 512){ + eventlog("%æ: runt identify blen %ld expect %d\n", + d, BLEN(b), n); + goto bail; + } + identify(d, (ushort*)((uchar *)ahin + AOEATASZ)); + break; + default: + eventlog("%æ: unknown ata command %.2ux \n", + d, ahout->cmdstat); + } + } + + if(srb && --srb->nout == 0 && srb->len == 0) + wakeup(srb); + f->srb = nil; + f->tag = Tfree; + d->nout--; + + work(d); +bail: + poperror(); + qunlock(d); +} + +static void +netrdaoeproc(void *v) +{ + int idx; + char name[Maxpath+1], *s; + Aoehdr *h; + Block *b; + Netlink *nl; + + nl = (Netlink*)v; + idx = nl - netlinks.nl; + netlinks.reader[idx] = 1; + kstrcpy(name, nl->path, Maxpath); + + if(waserror()){ + eventlog("netrdaoe exiting: %s\n", up->errstr); + netlinks.reader[idx] = 0; + wakeup(netlinks.rendez + idx); + pexit(up->errstr, 1); + } + if(autodiscover) + discover(0xffff, 0xff); + for (;;) { + if(!(nl->flag & Dup)) { + uprint("%s: netlink is down", name); + error(up->genbuf); + } + if (nl->dc == nil) + panic("netrdaoe: nl->dc == nil"); + b = nl->dc->dev->bread(nl->dc, 1<<16, 0); + if(b == nil) { + uprint("%s: nil read from network", name); + error(up->genbuf); + } + h = (Aoehdr*)b->rp; + if(h->verflag & AFrsp) + if(s = aoeerror(h)){ + eventlog("%s: %s\n", nl->path, up->errstr); + errrsp(b, s); + }else + switch(h->cmd){ + case ACata: + atarsp(b); + break; + case ACconfig: + qcfgrsp(b, nl); + break; + default: + eventlog("%s: unknown cmd %d\n", + nl->path, h->cmd); + errrsp(b, "unknown command"); + } + freeb(b); + } +} + +static void +getaddr(char *path, uchar *ea) +{ + int n; + char buf[2*Eaddrlen+1]; + Chan *c; + extern int parseether(uchar*, char*); + + uprint("%s/addr", path); + c = namec(up->genbuf, Aopen, OREAD, 0); + if(waserror()) { + cclose(c); + nexterror(); + } + if (c == nil) + panic("æ: getaddr: c == nil"); + n = c->dev->read(c, buf, sizeof buf-1, 0); + poperror(); + cclose(c); + buf[n] = 0; + if(parseether(ea, buf) < 0) + error("parseether failure"); +} + +static void +netbind(char *path) +{ + char addr[Maxpath]; + uchar ea[2*Eaddrlen+1]; + Chan *dc, *cc, *mtu; + Netlink *nl; + + snprint(addr, sizeof addr, "%s!0x%x", path, Aoetype); + dc = chandial(addr, nil, nil, &cc); + snprint(addr, sizeof addr, "%s/mtu", path); + if(waserror()) + mtu = nil; + else { + mtu = namec(addr, Aopen, OREAD, 0); + poperror(); + } + + if(waserror()){ + cclose(dc); + cclose(cc); + if(mtu) + cclose(mtu); + nexterror(); + } + if(dc == nil || cc == nil) + error(Enonexist); + getaddr(path, ea); + nl = addnet(path, cc, dc, mtu, ea); + snprint(addr, sizeof addr, "netrdaoe@%s", path); + kproc(addr, netrdaoeproc, nl); + poperror(); +} + +static int +unbound(void *v) +{ + return *(int*)v != 0; +} + +static void +netunbind(char *path) +{ + int i, idx; + Aoedev *d, *p, *next; + Chan *dc, *cc; + Devlink *l; + Frame *f; + Netlink *n, *e; + + n = netlinks.nl; + e = n + nelem(netlinks.nl); + + lock(&netlinks); + for(; n < e; n++) + if(n->dc && strcmp(n->path, path) == 0) + break; + unlock(&netlinks); + if (n >= e) + error("device not bound"); + + /* + * hunt down devices using this interface; disable + * this also terminates the reader. + */ + idx = n - netlinks.nl; + wlock(&devs); + for(d = devs.d; d; d = d->next){ + qlock(d); + for(i = 0; i < d->ndl; i++){ + l = d->dl + i; + if(l->nl == n) + l->flag &= ~Dup; + } + qunlock(d); + } + n->flag &= ~Dup; + wunlock(&devs); + + /* confirm reader is down. */ + while(waserror()) + ; + sleep(netlinks.rendez + idx, unbound, netlinks.reader + idx); + poperror(); + + /* reschedule packets. */ + wlock(&devs); + for(d = devs.d; d; d = d->next){ + qlock(d); + for(i = 0; i < d->nframes; i++){ + f = d->frames + i; + if(f->tag != Tfree && f->nl == n) + resend(d, f); + } + qunlock(d); + } + wunlock(&devs); + + /* squeeze devlink pool. (we assert nobody is using them now) */ + wlock(&devs); + for(d = devs.d; d; d = d->next){ + qlock(d); + for(i = 0; i < d->ndl; i++){ + l = d->dl + i; + if(l->nl == n) + memmove(l, l + 1, sizeof *l * (--d->ndl - i)); + } + qunlock(d); + } + wunlock(&devs); + + /* close device link. */ + lock(&netlinks); + dc = n->dc; + cc = n->cc; + if(n->mtu) + cclose(n->mtu); + memset(n, 0, sizeof *n); + unlock(&netlinks); + + cclose(dc); + cclose(cc); + + /* squeeze orphan devices */ + wlock(&devs); + for(p = d = devs.d; d; p = d, d = next){ + next = d->next; + if(d->ndl > 0) + continue; + if(p != devs.d) + p->next = next; + else + devs.d = next; + free(d->frames); + free(d); + dropunit(); + } + wunlock(&devs); +} + +static void +removedev(char *name) +{ + int i; + Aoedev *d, *p; + + wlock(&devs); + for(p = d = devs.d; d; p = d, d = d->next) + if(strcmp(name, unitname(d)) == 0) + goto found; + wunlock(&devs); + error("device not bound"); +found: + d->flag &= ~Dup; + newvers(d); + d->ndl = 0; + + for(i = 0; i < d->nframes; i++) + frameerror(d, d->frames+i, Enotup); + + if(p != devs.d) + p->next = d->next; + else + devs.d = d->next; + free(d->frames); + free(d); + dropunit(); + wunlock(&devs); +} + +static void +discoverstr(char *f) +{ + ushort shelf, slot; + ulong sh; + char *s; + + if(f == 0){ + discover(0xffff, 0xff); + return; + } + + shelf = sh = strtol(f, &s, 0); + if(s == f || sh > 0xffff) + error("bad shelf"); + f = s; + if(*f++ == '.'){ + slot = strtol(f, &s, 0); + if(s == f || slot > 0xff) + error("bad shelf"); + }else + slot = 0xff; + discover(shelf, slot); +} + + +static long +topctlwrite(void *db, long n) +{ + enum { + Autodiscover, + Bind, + Debug, + Discover, + Closewait, + Rediscover, + Remove, + Unbind, + }; + char *f; + Cmdbuf *cb; + Cmdtab *ct; + static Cmdtab cmds[] = { + { Autodiscover, "autodiscover", 0 }, + { Bind, "bind", 2 }, + { Debug, "debug", 0 }, + { Discover, "discover", 0 }, + { Rediscover, "rediscover", 0 }, + { Remove, "remove", 2 }, + { Unbind, "unbind", 2 }, + }; + + cb = parsecmd(db, n); + if(waserror()){ + free(cb); + nexterror(); + } + ct = lookupcmd(cb, cmds, nelem(cmds)); + f = cb->f[1]; + switch(ct->index){ + case Autodiscover: + autodiscover = toggle(f, autodiscover); + break; + case Bind: + netbind(f); + break; + case Debug: + debug = toggle(f, debug); + break; + case Discover: + discoverstr(f); + break; + case Rediscover: + rediscover = toggle(f, rediscover); + break; + case Remove: + removedev(f); + break; + case Unbind: + netunbind(f); + break; + default: + cmderror(cb, "unknown aoe control message"); + } + poperror(); + free(cb); + return n; +} + +static long +aoewrite(Chan *c, void *db, long n, vlong off) +{ + switch(TYPE(c->qid)){ + default: + case Qzero: + case Qtopdir: + case Qunitdir: + case Qtoplog: + error(Eperm); + case Qtopctl: + return topctlwrite(db, n); + case Qctl: + case Qdata: + case Qconfig: + case Qident: + return unitwrite(c, db, n, off); + } +} + +Dev aoedevtab = { + L'æ', + "aoe", + + devreset, + devinit, + devshutdown, + aoeattach, + aoewalk, + aoestat, + aoeopen, + devcreate, + aoeclose, + aoeread, + devbread, + aoewrite, + devbwrite, + devremove, + devwstat, + devpower, + devconfig, +}; diff -Nru 0/sys/src/nix/port/devcap.c 4/sys/src/nix/port/devcap.c --- 0/sys/src/nix/port/devcap.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devcap.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,286 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include + +enum +{ + Hashlen= SHA1dlen, + Maxhash= 256, +}; + +/* + * if a process knows cap->cap, it can change user + * to capabilty->user. + */ +typedef struct Caphash Caphash; +struct Caphash +{ + Caphash *next; + char hash[Hashlen]; + ulong ticks; +}; + +struct +{ + QLock; + Caphash *first; + int nhash; +} capalloc; + +enum +{ + Qdir, + Qhash, + Quse, +}; + +/* caphash must be last */ +Dirtab capdir[] = +{ + ".", {Qdir,0,QTDIR}, 0, DMDIR|0500, + "capuse", {Quse}, 0, 0222, + "caphash", {Qhash}, 0, 0200, +}; +int ncapdir = nelem(capdir); + +static Chan* +capattach(char *spec) +{ + return devattach(L'¤', spec); +} + +static Walkqid* +capwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, capdir, ncapdir, devgen); +} + +static void +capremove(Chan *c) +{ + if(iseve() && c->qid.path == Qhash) + ncapdir = nelem(capdir)-1; + else + error(Eperm); +} + + +static long +capstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, capdir, ncapdir, devgen); +} + +/* + * if the stream doesn't exist, create it + */ +static Chan* +capopen(Chan *c, int omode) +{ + if(c->qid.type & QTDIR){ + if(omode != OREAD) + error(Ebadarg); + c->mode = omode; + c->flag |= COPEN; + c->offset = 0; + return c; + } + + switch((ulong)c->qid.path){ + case Qhash: + if(!iseve()) + error(Eperm); + break; + } + + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + return c; +} + +/* +static char* +hashstr(uchar *hash) +{ + static char buf[2*Hashlen+1]; + int i; + + for(i = 0; i < Hashlen; i++) + sprint(buf+2*i, "%2.2ux", hash[i]); + buf[2*Hashlen] = 0; + return buf; +} + */ + +static Caphash* +remcap(uchar *hash) +{ + Caphash *t, **l; + + qlock(&capalloc); + + /* find the matching capability */ + for(l = &capalloc.first; *l != nil;){ + t = *l; + if(memcmp(hash, t->hash, Hashlen) == 0) + break; + l = &t->next; + } + t = *l; + if(t != nil){ + capalloc.nhash--; + *l = t->next; + } + qunlock(&capalloc); + + return t; +} + +/* add a capability, throwing out any old ones */ +static void +addcap(uchar *hash) +{ + Caphash *p, *t, **l; + + p = smalloc(sizeof *p); + memmove(p->hash, hash, Hashlen); + p->next = nil; + p->ticks = m->ticks; + + qlock(&capalloc); + + /* trim extras */ + while(capalloc.nhash >= Maxhash){ + t = capalloc.first; + if(t == nil) + panic("addcap"); + capalloc.first = t->next; + free(t); + capalloc.nhash--; + } + + /* add new one */ + for(l = &capalloc.first; *l != nil; l = &(*l)->next) + ; + *l = p; + capalloc.nhash++; + + qunlock(&capalloc); +} + +static void +capclose(Chan*) +{ +} + +static long +capread(Chan *c, void *va, long n, vlong) +{ + switch((ulong)c->qid.path){ + case Qdir: + return devdirread(c, va, n, capdir, ncapdir, devgen); + + default: + error(Eperm); + break; + } + return n; +} + +static long +capwrite(Chan *c, void *va, long n, vlong) +{ + Caphash *p; + char *cp; + uchar hash[Hashlen]; + char *key, *from, *to; + char err[256]; + + switch((ulong)c->qid.path){ + case Qhash: + if(!iseve()) + error(Eperm); + if(n < Hashlen) + error(Eshort); + memmove(hash, va, Hashlen); + addcap(hash); + break; + + case Quse: + /* copy key to avoid a fault in hmac_xx */ + cp = nil; + if(waserror()){ + free(cp); + nexterror(); + } + cp = smalloc(n+1); + memmove(cp, va, n); + cp[n] = 0; + + from = cp; + key = strrchr(cp, '@'); + if(key == nil) + error(Eshort); + *key++ = 0; + + hmac_sha1((uchar*)from, strlen(from), (uchar*)key, strlen(key), hash, nil); + + p = remcap(hash); + if(p == nil){ + snprint(err, sizeof err, "invalid capability %s@%s", from, key); + error(err); + } + + /* if a from user is supplied, make sure it matches */ + to = strchr(from, '@'); + if(to == nil){ + to = from; + } else { + *to++ = 0; + if(strcmp(from, up->user) != 0) + error("capability must match user"); + } + + /* set user id */ + kstrdup(&up->user, to); + up->basepri = PriNormal; + + free(p); + free(cp); + poperror(); + break; + + default: + error(Eperm); + break; + } + + return n; +} + +Dev capdevtab = { + L'¤', + "cap", + + devreset, + devinit, + devshutdown, + capattach, + capwalk, + capstat, + capopen, + devcreate, + capclose, + capread, + devbread, + capwrite, + devbwrite, + capremove, + devwstat +}; diff -Nru 0/sys/src/nix/port/devcec.c 4/sys/src/nix/port/devcec.c --- 0/sys/src/nix/port/devcec.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devcec.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,905 @@ +/* + * Coraid ethernet console — serial replacement. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" +#include "../port/error.h" +#include "../port/netif.h" + +extern Dev cecdevtab; + + +enum { + Namelen = 128, + Ncbuf = 8192, + Ncmask = Ncbuf-1, + Nconns = 20, + Size = READSTR, +}; + +enum { + Hdrsz = 18, + + Tinita = 0, + Tinitb, + Tinitc, + Tdata, + Tack, + Tdiscover, + Toffer, + Treset, + Tlast, + + Cunused = 0, + Cinitb, + Clogin, + Copen, +}; + +static char *cstate[] = { + "unused", + "initb", + "login", + "open" +}; + +typedef struct { + Chan *dc; + Chan *cc; + Dev *d; + uchar ea[6]; + char path[32]; +} If; + +typedef struct { + uchar dst[6]; + uchar src[6]; + uchar etype[2]; + uchar type; + uchar conn; + uchar seq; + uchar len; + uchar data[0x100]; +} Pkt; + +typedef struct { + QLock; + Lock; + uchar ea[6]; /* along with cno, the key to the connection */ + uchar cno; /* connection number on remote host */ + uchar stalled; /* cectimer needs to kick it */ + int state; /* connection state */ + int idle; /* idle ticks */ + int to; /* ticks to timeout */ + int retries; /* remaining retries */ + Block *bp; /* unacked message */ + If *ifc; /* interface for this connection */ + uchar sndseq; /* sequence number of last sent message */ + uchar rcvseq; /* sequence number of last rcv'd message */ + char cbuf[Ncbuf]; /* curcular buffer */ + int r, w; /* indexes into cbuf */ + int pwi; /* index into passwd; */ + char passwd[32]; /* password typed by connection */ +} Conn; + +extern int parseether(uchar *, char *); +extern Chan *chandial(char *, char *, char *, Chan **); + +enum { + Qdir = 0, + Qstat, + Qctl, + Qdbg, + Qcfg, + CMreset, + CMsetshelf, + CMsetname, + CMtraceon, + CMtraceoff, + CMsetpasswd, + CMcecon, + CMcecoff, + CMwrite, +}; + +static If ifs[4]; +static char name[Namelen]; +static int shelf = -1; +static Conn conn[Nconns]; +static int tflag; +static char passwd[Namelen]; +static int xmit; +static int rsnd; +static Rendez trendez; +static int tcond; +static uchar broadcast[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static Dirtab cecdir[] = { + ".", { Qdir, 0, QTDIR }, 0, DMDIR | 0555, + "cecstat", { Qstat}, 1, 0444, + "cecctl", { Qctl}, 1, 0777, +// "cecctl", { Qctl}, 1, 0664, + + "cecdbg", { Qdbg }, 1, 0444, + "ceccfg", { Qcfg }, 1, 0444, +}; +static Cmdtab ceccmd[] = { + CMsetname, "name", 2, + CMtraceon, "traceon", 1, + CMtraceoff, "traceoff", 1, + CMsetpasswd, "password", 2, + CMcecon, "cecon", 2, + CMcecoff, "cecoff", 2, + CMsetshelf, "shelf", 2, + CMwrite, "write", -1, +}; + +/* + * Avoid outputting debugging to ourselves. Assumes a serial port. + */ +static int +cecprint(char *fmt, ...) +{ + char buf[PRINTSIZE]; + int n; + va_list arg; + + va_start(arg, fmt); + n = vseprint(buf, buf+sizeof buf, fmt, arg)-buf; + va_end(arg); + uartputs(buf, n); + return n; +} + +static void +getaddr(char *path, uchar *ea) +{ + char buf[6*2]; + int n; + Chan *c; + + snprint(up->genbuf, sizeof up->genbuf, "%s/addr", path); + c = namec(up->genbuf, Aopen, OREAD, 0); + if(waserror()) { + cclose(c); + nexterror(); + } + n = c->dev->read(c, buf, sizeof buf, 0); + if(n != sizeof buf) + error("getaddr"); + if(parseether(ea, buf) < 0) + error("parseether failure"); + poperror(); + cclose(c); +} + +static char *types[Tlast+1] = { + "Tinita", "Tinitb", "Tinitc", "Tdata", "Tack", + "Tdiscover", "Toffer", "Treset", "*GOK*", +}; + +static int +cbget(Conn *cp) +{ + int c; + + if(cp->r == cp->w) + return -1; + c = cp->cbuf[cp->r]; + cp->r = cp->r+1 & Ncmask; + return c; +} + +static void +cbput(Conn *cp, int c) +{ + if(cp->r == (cp->w+1 & Ncmask)) + return; + cp->cbuf[cp->w] = c; + cp->w = cp->w+1 & Ncmask; +} + +static void +trace(Block *bp) +{ + uint type; + Pkt *p; + + if(tflag == 0) + return; + p = (Pkt *)bp->rp; + type = p->type; + if(type > Treset) + type = Treset+1; + cecprint("%E > %E) seq %d, type %s, len %d, conn %d\n", + p->src, p->dst, p->seq, types[type], p->len, p->conn); +} + +static Block* +sethdr(If *ifc, uchar *ea, Pkt **npp, int len) +{ + Block *bp; + Pkt *np; + + len += Hdrsz; + if(len < 60) + len = 60; + bp = allocb(len); + bp->wp = bp->rp+len; + np = (Pkt *)bp->rp; + memmove(np->dst, ea, 6); + memmove(np->src, ifc->ea, 6); + np->etype[0] = 0xbc; + np->etype[1] = 0xbc; + np->seq = 0; + *npp = np; + return bp; +} + +static void +send(Conn *cp, Block *bp) +{ + Block *nbp; + + if(cp->bp != nil) + panic("cecsend: cp->bp not nil\n"); + nbp = allocb(BLEN(bp)); + memmove(nbp->wp, bp->rp, BLEN(bp)); + nbp->wp += BLEN(bp); + cp->bp = nbp; + trace(bp); + cp->ifc->d->bwrite(cp->ifc->dc, bp, 0); + xmit++; + cp->to = 4; + cp->retries = 3; + xmit++; +} + +static void +senddata(Conn *cp, void *data, int len) +{ + Block *bp; + Pkt *p; + + bp = sethdr(cp->ifc, cp->ea, &p, len); + memmove(p->data, data, len); + p->len = len; + p->seq = ++cp->sndseq; + p->conn = cp->cno; + p->type = Tdata; + send(cp, bp); +} + +static void +resend(Conn *cp) +{ + Block *nbp; + + rsnd++; + nbp = allocb(BLEN(cp->bp)); + memmove(nbp->wp, cp->bp->rp, BLEN(cp->bp)); + nbp->wp += BLEN(cp->bp); + trace(nbp); + cp->ifc->d->bwrite(cp->ifc->dc, nbp, 0); + cp->to = 4; +} + +static void +reset(If *ifc, uchar conn) +{ + Block *bp; + Pkt *p; + + bp = sethdr(ifc, ifc->ea, &p, 0); + p->type = Treset; + p->conn = conn; + trace(bp); + ifc->d->bwrite(ifc->dc, bp, 0); +} + +static void +ack(Conn *cp) +{ + if(cp->bp) + freeb(cp->bp); + cp->bp = 0; + cp->to = 0; + cp->retries = 0; +} + +static void +start(Conn *cp) +{ + char buf[250]; + int n, c; + + if(cp->bp) + return; + ilock(cp); + for(n = 0; n < sizeof buf; n++){ + if((c = cbget(cp)) == -1) + break; + buf[n] = c; + } + iunlock(cp); + if(n != 0) + senddata(cp, buf, n); +} + +static void +cecputs(char *str, int n) +{ + int i, c, wake; + Conn *cp; + + wake = 0; + for(cp = conn; cp < conn+Nconns; cp++){ + ilock(cp); + if(cp->state == Copen){ + for (i = 0; i < n; i++){ + c = str[i]; + if(c == 0) + continue; /* BOTCH */ + if(c == '\n') + cbput(cp, '\r'); + cbput(cp, c); + } + cp->stalled = 1; + wake = 1; + } + iunlock(cp); + } + if(wake){ + tcond = 1; + wakeup(&trendez); + } +} + +static void +conputs(Conn *c, char *s) +{ + for(; *s; s++) + cbput(c, *s); +} + +static int +icansleep(void*) +{ + return tcond != 0; +} + +static void +cectimer(void *) +{ + Conn *cp; + + for(;;){ + tsleep(&trendez, icansleep, 0, 250); + tcond = 0; + for(cp = conn; cp < conn + Nconns; cp++){ + qlock(cp); + if(cp->bp != nil){ + if(--cp->to <= 0){ + if(--cp->retries <= 0){ + freeb(cp->bp); + cp->bp = 0; +// cp->state = Cunused; + }else + resend(cp); + } + }else if(cp->stalled){ + cp->stalled = 0; + start(cp); + } + qunlock(cp); + } + } +} + +static int +cecqlen(void*) +{ + int n; + Conn *c; + + n = 0; + for(c = conn; c < conn+Nconns; c++){ + if(!canqlock(c)) + continue; + if(c->bp) + n += BLEN(c->bp); + if(c->r > c->w) + n += c->r-c->w; + else + n += c->w-c->r; + qunlock(c); + } + if(n){ + tcond = 1; + wakeup(&trendez); + } + return n; +} + +static void +discover(If *ifc, Pkt *p) +{ + uchar *addr; + Block *bp; + Pkt *np; + + if(p) + addr = p->src; + else + addr = broadcast; + bp = sethdr(ifc, addr, &np, 0); + np->type = Toffer; + np->len = snprint((char *)np->data, sizeof np->data, "%d %s", shelf, name); + trace(bp); + ifc->d->bwrite(ifc->dc, bp, 0); +} + +static Conn* +connidx(int cno) +{ + Conn *c; + + for(c = conn; c < conn + Nconns; c++) + if(cno == c->cno){ + qlock(c); + return c; + } + return nil; +} + +static Conn* +findconn(uchar *ea, uchar cno) +{ + Conn *cp, *ncp; + + ncp = nil; + for(cp = conn; cp < conn + Nconns; cp++){ + if(ncp == nil && cp->state == Cunused) + ncp = cp; + if(memcmp(ea, cp->ea, 6) == 0 && cno == cp->cno){ + qlock(cp); + return cp; + } + } + if(ncp != nil) + qlock(ncp); + return ncp; +} + +static void +checkpw(Conn *cp, char *str, int len) +{ + int i, c; + + if(passwd[0] == 0) + return; + for(i = 0; i < len; i++){ + c = str[i]; + if(c != '\n' && c != '\r'){ + if(cp->pwi < (sizeof cp->passwd)-1) + cp->passwd[cp->pwi++] = c; + cbput(cp, '#'); + cecprint("%c", c); + continue; + } + /* is newline; check password */ + cp->passwd[cp->pwi] = 0; + if(strcmp(cp->passwd, passwd) == 0){ + cp->state = Copen; + cp->pwi = 0; + print("\r\n%E logged in\r\n", cp->ea); + }else{ + conputs(cp, "\r\nBad password\r\npassword: "); + cp->pwi = 0; + } + } + start(cp); +} + +static void +incoming(Conn *cp, If *ifc, Pkt *p) +{ + int i; + Block *bp; + Pkt *np; + + /* ack it no matter what its sequence number */ + bp = sethdr(ifc, p->src, &np, 0); + np->type = Tack; + np->seq = p->seq; + np->conn = cp->cno; + np->len = 0; + trace(bp); + ifc->d->bwrite(ifc->dc, bp, 0); + + if(cp->state == Cunused){ + /* connection */ + discover(ifc, p); + return; + } + if(p->seq == cp->rcvseq) + return; + + cp->rcvseq = p->seq; + if(cp->state == Copen){ + for (i = 0; i < p->len; i++) + kbdcr2nl(nil, (char)p->data[i]); + }else if(cp->state == Clogin) + checkpw(cp, (char *)p->data, p->len); +} + +static void +inita(Conn *ncp, If *ifc, Pkt *p) +{ + Block *bp; + Pkt *np; + + ncp->ifc = ifc; + ncp->state = Cinitb; + memmove(ncp->ea, p->src, 6); + ncp->cno = p->conn; + bp = sethdr(ifc, p->src, &np, 0); + np->type = Tinitb; + np->conn = ncp->cno; + np->len = 0; + send(ncp, bp); +} + + +static void +cecrdr(void *vp) +{ + Block *bp; + Conn *cp; + If *ifc; + Pkt *p; + + ifc = vp; + if(waserror()) + goto exit; + + discover(ifc, 0); + for(;;){ + bp = ifc->d->bread(ifc->dc, 1514, 0); // do we care about making the MTU non magic? + if(bp == nil) + nexterror(); + p = (Pkt *)bp->rp; + if(p->etype[0] != 0xbc || p->etype[1] != 0xbc){ + freeb(bp); + continue; + } + trace(bp); + cp = findconn(p->src, p->conn); + if(cp == nil){ + cecprint("cec: out of connection structures\n"); + freeb(bp); + continue; + } + if (waserror()){ + freeb(bp); + qunlock(cp); + continue; + } + switch(p->type){ + case Tinita: + if(cp->bp){ + cecprint("cec: reset with bp!? ask quanstro\n"); + freeb(cp->bp); + cp->bp = 0; + } + inita(cp, ifc, p); + break; + case Tinitb: + cecprint("cec: unexpected initb\n"); + break; + case Tinitc: + if(cp->state == Cinitb){ + ack(cp); + if(cp->passwd[0]){ + cp->state = Clogin; + conputs(cp, "password: "); + start(cp); + }else + cp->state = Copen; + } + break; + case Tdata: + incoming(cp, ifc, p); + break; + case Tack: + if(cp->state == Clogin || cp->state == Copen){ + ack(cp); + start(cp); + } + break; + case Tdiscover: + discover(ifc, p); + break; + case Toffer: + // cecprint("cec: unexpected offer\n"); from ourselves. + break; + case Treset: + if(cp->bp) + freeb(cp->bp); + cp->bp = 0; + cp->state = Cunused; + break; + default: + cecprint("bad cec type: %d\n", p->type); + break; + } + nexterror(); + } + +exit: + for(cp = conn; cp < conn+nelem(conn); cp++) + if(cp->ifc == ifc){ + if(cp->bp) + freeb(cp->bp); + memset(cp, 0, sizeof *cp); + break; + } + + memset(ifc, 0, sizeof *ifc); + pexit("cec exiting", 1); +} + +static Chan * +cecattach(char *spec) +{ + Chan *c; + static QLock q; + static int inited; + + qlock(&q); + if(inited == 0){ + kproc("cectimer", cectimer, nil); + inited++; + } + qunlock(&q); + c = devattach(L'©', spec); + c->qid.path = Qdir; + return c; +} + +static Walkqid* +cecwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, cecdir, nelem(cecdir), devgen); +} + +static long +cecstat(Chan *c, uchar *dp, long n) +{ + return devstat(c, dp, n, cecdir, nelem(cecdir), devgen); +} + +static Chan* +cecopen(Chan *c, int omode) +{ + return devopen(c, omode, cecdir, nelem(cecdir), devgen); +} + +static void +cecclose(Chan*) +{ +} + +static long +cecread(Chan *c, void *a, long n, vlong offset) +{ + char *p; + int j; + Conn *cp; + If *ifc; + + switch((int)c->qid.path){ + case Qdir: + return devdirread(c, a, n, cecdir, nelem(cecdir), devgen); + case Qstat: + p = smalloc(Size); + j = 0; + for(cp = conn; cp < conn+Nconns; cp++) + if(cp->state != Cunused) + j += snprint(p+j, Size-j, + "%E %3d %-6s %12d %d %d %.8lux\n", + cp->ea, cp->cno, cstate[cp->state], cp->idle, + cp->to, cp->retries, (uintptr)cp->bp); + n = readstr(offset, a, n, p); + free(p); + return n; + case Qdbg: + cecprint("xmit %d, rsnd %d\n", xmit, rsnd); + return 0; + case Qcfg: + case Qctl: + p = smalloc(Size); + j = 0; + for(ifc = ifs; ifc < ifs+nelem(ifs); ifc++) + if(ifc->d) + j += snprint(p+j, Size-j, "%s\n", ifc->path); + n = readstr(offset, a, n, p); + free(p); + return n; + } + error(Egreg); + return 0; +} + +/* +static void +cecfixup(void) +{ + char *p; + int len; + + p = malloc(128*1024); + snprint(p, 6, "write "); + len = readfile("/dev/kmesg", p+6, 128*1024-6); + writefile("#©/cecctl", p, len+6); + free(p); +} +*/ + +static void +cecon(char *path) +{ + char buf[64]; + uchar ea[6]; + Chan *dc, *cc; + If *ifc, *nifc; + + nifc = nil; + for(ifc = ifs; ifc < ifs+nelem(ifs); ifc++) + if(ifc->d == nil) + nifc = ifc; + else if(strcmp(ifc->path, path) == 0) + return; + ifc = nifc; + if(ifc == nil) + error("out of interface structures"); + + getaddr(path, ea); + snprint(buf, sizeof buf, "%s!0xbcbc", path); + dc = chandial(buf, nil, nil, &cc); + if(dc == nil || cc == nil){ + if (cc) + cclose(cc); + if (dc) + cclose(dc); + snprint(up->genbuf, sizeof up->genbuf, "can't dial %s", buf); + error(up->genbuf); + } + ifc->d = cc->dev; + ifc->cc = cc; + ifc->dc = dc; + strncpy(ifc->path, path, sizeof ifc->path); + memmove(ifc->ea, ea, 6); + snprint(up->genbuf, sizeof up->genbuf, "cec:%s", path); + kproc(up->genbuf, cecrdr, ifc); +} + +static void +cecoff(char *path) +{ + int all, n; + If *ifc, *e; + + all = strcmp(path, "*") == 0; + n = 0; + ifc = ifs; + e = ifc+nelem(ifs); + for(; ifc < e; ifc++) + if(ifc->d && (all || strcmp(path, ifc->path) == 0)){ + cclose(ifc->cc); + cclose(ifc->dc); + memset(ifc, 0, sizeof *ifc); + n++; + } + if(all + n == 0) + error("cec not found"); +} + +static void +rst(Conn *c) +{ + if(c == nil) + error("no such index"); + if(c->bp) + freeb(c->bp); + c->bp = 0; + c->state = Cunused; + qunlock(c); +} + +static long +cecwrite(Chan *c, void *a, long n, vlong) +{ + Cmdbuf *cb; + Cmdtab *cp; + + if(c->qid.path == Qctl){ + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + cp = lookupcmd(cb, ceccmd, nelem(ceccmd)); + switch(cp->index){ + case CMsetname: + strecpy(name, name+sizeof name-1, cb->f[1]); + break; + case CMtraceon: + tflag = 1; + break; + case CMtraceoff: + tflag = 0; + break; + case CMsetpasswd: + strcpy(passwd, cb->f[1]); + break; + case CMcecon: + cecon(cb->f[1]); + break; + case CMcecoff: + cecoff(cb->f[1]); + break; + case CMsetshelf: + shelf = atoi(cb->f[1]); + break; + case CMwrite: + cecputs((char*)a+6,n-6); + break; + case CMreset: + rst(connidx(atoi(cb->f[1]))); + break; + default: + cmderror(cb, "bad control message"); + break; + } + free(cb); + poperror(); + return n; + } + error(Egreg); + return 0; +} + +static void +cecinit(void) +{ + addconsdev(nil, cecputs, -1, 0); +} + +Dev cecdevtab = { + L'©', + "cec", + + devreset, + cecinit, + devshutdown, + cecattach, + cecwalk, + cecstat, + cecopen, + devcreate, + cecclose, + cecread, + devbread, + cecwrite, + devbwrite, + devremove, + devwstat, + devpower, + devconfig, +}; diff -Nru 0/sys/src/nix/port/devcmd.c 4/sys/src/nix/port/devcmd.c --- 0/sys/src/nix/port/devcmd.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devcmd.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,733 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "kexec.h" + +enum +{ + Qtopdir, /* top level directory */ + Qcmd, + Qclonus, + Qconvdir, + Qconvbase, + Qdata = Qconvbase, + Qstderr, + Qctl, + Qalloc, + Qexec, + Qstatus, + Qwait, + + Debug=0 /* to help debug os.c */ +}; +#define TYPE(x) ((ulong)(x).path & 0xf) +#define CONV(x) (((ulong)(x).path >> 4)&0xfff) +#define QID(c, y) (((c)<<4) | (y)) + +typedef struct Conv Conv; +struct Conv +{ + int x; + int inuse; + int fd[3]; /* stdin, stdout, and stderr */ + int count[3]; /* number of readers on stdin/stdout/stderr */ + int perm; + ulong esz; + char* owner; + char* state; + Cmdbuf* cmd; + char* dir; + QLock l; /* protects state changes */ + Queue* waitq; + void* child; + char* error; /* on start up */ + int nice; + short killonclose; + short killed; + Rendez startr; + Proc *p; +}; + +static struct +{ + QLock l; + int nc; + int maxconv; + Conv** conv; +} cmd; + +static Conv* cmdclone(char*); +static void cmdproc(void*); + +static int +cmd3gen(Chan *c, int i, Dir *dp) +{ + Qid q; + Conv *cv; + + cv = cmd.conv[CONV(c->qid)]; + switch(i){ + default: + return -1; + case Qdata: + mkqid(&q, QID(CONV(c->qid), Qdata), 0, QTFILE); + devdir(c, q, "data", 0, cv->owner, cv->perm, dp); + return 1; + case Qstderr: + mkqid(&q, QID(CONV(c->qid), Qstderr), 0, QTFILE); + devdir(c, q, "stderr", 0, cv->owner, 0444, dp); + return 1; + case Qalloc: + mkqid(&q, QID(CONV(c->qid), Qalloc), 0, QTFILE); + devdir(c, q, "alloc", 0, cv->owner, cv->perm, dp); + return 1; + case Qexec: + mkqid(&q, QID(CONV(c->qid), Qexec), 0, QTFILE); + devdir(c, q, "exec", 0, cv->owner, cv->perm, dp); + return 1; + case Qctl: + mkqid(&q, QID(CONV(c->qid), Qctl), 0, QTFILE); + devdir(c, q, "ctl", 0, cv->owner, cv->perm, dp); + return 1; + case Qstatus: + mkqid(&q, QID(CONV(c->qid), Qstatus), 0, QTFILE); + devdir(c, q, "status", 0, cv->owner, 0444, dp); + return 1; + case Qwait: + mkqid(&q, QID(CONV(c->qid), Qwait), 0, QTFILE); + devdir(c, q, "wait", 0, cv->owner, 0444, dp); + return 1; + } +} + +static int +cmdgen(Chan *c, char *name, Dirtab *d, int nd, int s, Dir *dp) +{ + Qid q; + Conv *cv; + + USED(name); + USED(nd); + USED(d); + + if(s == DEVDOTDOT){ + switch(TYPE(c->qid)){ + case Qtopdir: + case Qcmd: + mkqid(&q, QID(0, Qtopdir), 0, QTDIR); + devdir(c, q, "#C", 0, eve, DMDIR|0555, dp); + break; + case Qconvdir: + mkqid(&q, QID(0, Qcmd), 0, QTDIR); + devdir(c, q, "cmd", 0, eve, DMDIR|0555, dp); + break; + default: + panic("cmdgen %llux", c->qid.path); + } + return 1; + } + + switch(TYPE(c->qid)) { + case Qtopdir: + if(s >= 1) + return -1; + mkqid(&q, QID(0, Qcmd), 0, QTDIR); + devdir(c, q, "cmd", 0, "cmd", DMDIR|0555, dp); + return 1; + case Qcmd: + if(s < cmd.nc) { + cv = cmd.conv[s]; + mkqid(&q, QID(s, Qconvdir), 0, QTDIR); + sprint(up->genbuf, "%d", s); + devdir(c, q, up->genbuf, 0, cv->owner, DMDIR|0555, dp); + return 1; + } + s -= cmd.nc; + if(s == 0){ + mkqid(&q, QID(0, Qclonus), 0, QTFILE); + devdir(c, q, "clone", 0, "cmd", 0666, dp); + return 1; + } + return -1; + case Qclonus: + if(s == 0){ + mkqid(&q, QID(0, Qclonus), 0, QTFILE); + devdir(c, q, "clone", 0, "cmd", 0666, dp); + return 1; + } + return -1; + case Qconvdir: + return cmd3gen(c, Qconvbase+s, dp); + case Qdata: + case Qstderr: + case Qalloc: + case Qexec: + case Qctl: + case Qstatus: + case Qwait: + return cmd3gen(c, TYPE(c->qid), dp); + } + return -1; +} + +static void +cmdinit(void) +{ + cmd.maxconv = 1000; + cmd.conv = mallocz(sizeof(Conv*)*(cmd.maxconv+1), 1); + /* cmd.conv is checked by cmdattach, below */ +} + +static Chan * +cmdattach(char *spec) +{ + Chan *c; + + if(cmd.conv == nil) + error(Enomem); + c = devattach('C', spec); + mkqid(&c->qid, QID(0, Qtopdir), 0, QTDIR); + return c; +} + +static Walkqid* +cmdwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, 0, 0, cmdgen); +} + +static long +cmdstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, 0, 0, cmdgen); +} + +static Chan * +cmdopen(Chan *c, int omode) +{ + int perm; + Conv *cv; + char *user; + + perm = 0; + omode = openmode(omode); + switch(omode) { + case OREAD: + perm = 4; + break; + case OWRITE: + perm = 2; + break; + case ORDWR: + perm = 6; + break; + } + + switch(TYPE(c->qid)) { + default: + break; + case Qtopdir: + case Qcmd: + case Qconvdir: + case Qstatus: + if(omode != OREAD) + error(Eperm); + break; + case Qclonus: + qlock(&cmd.l); + if(waserror()){ + qunlock(&cmd.l); + nexterror(); + } + cv = cmdclone(up->user); + poperror(); + qunlock(&cmd.l); + if(cv == 0) + error(Enodev); + mkqid(&c->qid, QID(cv->x, Qctl), 0, QTFILE); + break; + case Qdata: + case Qstderr: + case Qctl: + case Qalloc: + case Qexec: + case Qwait: + qlock(&cmd.l); + cv = cmd.conv[CONV(c->qid)]; + qlock(&cv->l); + if(waserror()){ + qunlock(&cv->l); + qunlock(&cmd.l); + nexterror(); + } + user = up->user; + if((perm & (cv->perm>>6)) != perm) { + if(strcmp(user, cv->owner) != 0 || + (perm & cv->perm) != perm) + error(Eperm); + } + switch(TYPE(c->qid)){ + case Qdata: + if(omode == OWRITE || omode == ORDWR) + cv->count[0]++; + if(omode == OREAD || omode == ORDWR) + cv->count[1]++; + break; + case Qstderr: + if(omode != OREAD) + error(Eperm); + cv->count[2]++; + break; + case Qwait: + if(cv->waitq == nil) + cv->waitq = qopen(1024, Qmsg, nil, 0); + break; + } + cv->inuse++; + if(cv->inuse == 1) { + cv->state = "Open"; + kstrdup(&cv->owner, user); + cv->perm = 0660; + cv->nice = 0; + } + poperror(); + qunlock(&cv->l); + qunlock(&cmd.l); + break; + } + c->mode = omode; + c->flag |= COPEN; + c->offset = 0; + return c; +} + +static void +closeconv(Conv *c) +{ + kstrdup(&c->owner, "cmd"); + kstrdup(&c->dir, "FIXME"); + c->perm = 0666; + c->state = "Closed"; + c->killonclose = 0; + c->killed = 0; + c->nice = 0; + free(c->cmd); + c->cmd = nil; + if(c->waitq != nil){ + qfree(c->waitq); + c->waitq = nil; + } + free(c->error); + c->error = nil; +} + +static void +cmdfdclose(Conv *c, int fd) +{ + if(--c->count[fd] == 0 && c->fd[fd] != -1){ +// close(c->fd[fd]); + c->fd[fd] = -1; + } +} + +static void +cmdclose(Chan *c) +{ + Conv *cc; + int r; + + if((c->flag & COPEN) == 0) + return; + + switch(TYPE(c->qid)) { + case Qctl: + case Qalloc: + case Qexec: + case Qdata: + case Qstderr: + case Qwait: + cc = cmd.conv[CONV(c->qid)]; + qlock(&cc->l); + if(TYPE(c->qid) == Qdata){ + if(c->mode == OWRITE || c->mode == ORDWR) + cmdfdclose(cc, 0); + if(c->mode == OREAD || c->mode == ORDWR) + cmdfdclose(cc, 1); + }else if(TYPE(c->qid) == Qstderr) + cmdfdclose(cc, 2); + + r = --cc->inuse; + if(cc->child != nil){ + if(!cc->killed) + if(r == 0 || (cc->killonclose && TYPE(c->qid) == Qctl)){ + // oscmdkill(cc->child); + cc->killed = 1; + } + }else if(r == 0) + closeconv(cc); + + qunlock(&cc->l); + break; + } +} + +static long +cmdread(Chan *ch, void *a, long n, vlong offset) +{ + Conv *c; + Proc *p; + char *s, *cmds; + int fd; + char buf[256]; + + USED(offset); + + s = a; + switch(TYPE(ch->qid)) { + default: + error(Eperm); + case Qcmd: + case Qtopdir: + case Qconvdir: + return devdirread(ch, a, n, 0, 0, cmdgen); + case Qctl: + sprint(up->genbuf, "%ld", CONV(ch->qid)); + return readstr(offset, s, n, up->genbuf); + case Qalloc: + c = cmd.conv[CONV(ch->qid)]; + p = c->p; + snprint(buf, sizeof(buf), "%#p %#p %#p %#p %#p %#p %#p %#p", + p->seg[TSEG]->base, p->seg[TSEG]->top, + p->seg[DSEG]->base, p->seg[DSEG]->top, + p->seg[BSEG]->base, p->seg[BSEG]->top, + p->seg[SSEG]->base, p->seg[SSEG]->top); + return readstr(offset, s, n, buf); + case Qexec: + c = cmd.conv[CONV(ch->qid)]; + snprint(up->genbuf, sizeof(up->genbuf), "%ld", c->esz); + return readstr(offset, s, n, up->genbuf); + case Qstatus: + c = cmd.conv[CONV(ch->qid)]; + cmds = ""; + if(c->cmd != nil) + cmds = c->cmd->f[1]; + snprint(up->genbuf, sizeof(up->genbuf), "cmd/%d %d %s %q %q\n", + c->x, c->inuse, c->state, c->dir, cmds); + return readstr(offset, s, n, up->genbuf); + case Qdata: + case Qstderr: + fd = 1; + if(TYPE(ch->qid) == Qstderr) + fd = 2; + c = cmd.conv[CONV(ch->qid)]; + qlock(&c->l); + if(c->fd[fd] == -1){ + qunlock(&c->l); + return 0; + } + qunlock(&c->l); + // osenter(); +// n = read(c->fd[fd], a, n); +// osleave(); +// if(n < 0) +// oserror(); + return n; + case Qwait: + c = cmd.conv[CONV(ch->qid)]; + return qread(c->waitq, a, n); + } +} + +static int +cmdstarted(void *a) +{ + Conv *c; + + c = a; + return c->child != nil || c->error != nil || strcmp(c->state, "Execute") != 0; +} + +enum +{ + CMdir, + CMstart, + CMexec, + CMkill, + CMnice, + CMkillonclose +}; + +static +Cmdtab cmdtab[] = { + CMdir, "dir", 2, + CMstart, "start", 0, + CMexec, "exec", 0, + CMkill, "kill", 1, + CMnice, "nice", 0, + CMkillonclose, "killonclose", 0, +}; + +static long +cmdwrite(Chan *ch, void *a, long n, vlong offset) +{ + int i, r; + Conv *c; + Segment *s; + Cmdbuf *cb; + Cmdtab *ct; + + USED(offset); + + switch(TYPE(ch->qid)) { + default: + error(Eperm); + case Qctl: + c = cmd.conv[CONV(ch->qid)]; + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + ct = lookupcmd(cb, cmdtab, nelem(cmdtab)); + switch(ct->index){ + case CMdir: + kstrdup(&c->dir, cb->f[1]); + break; + case CMstart: + // so what do we do with this? + // we need to do the process. + if(cb->nf < 2) + error(Ebadctl); + c = cmd.conv[CONV(ch->qid)]; + s = c->p->seg[TSEG]; + // XXX: set the text name? + //kstrdup(&c->p->text, cb->f[1]); + kforkexecac(c->p, atoi(cb->f[2]), nil, cb->f+3); + break; + case CMexec: + poperror(); /* cb */ + qlock(&c->l); + if(waserror()){ + qunlock(&c->l); + free(cb); + nexterror(); + } + if(c->child != nil || c->cmd != nil) + error(Einuse); + for(i = 0; i < nelem(c->fd); i++) + if(c->fd[i] != -1) + error(Einuse); + if(cb->nf < 1) + error(Etoosmall); +// kproc("cmdproc", cmdproc, c, 0); /* cmdproc held back until unlock below */ + free(c->cmd); + c->cmd = cb; /* don't free cb */ + c->state = "Execute"; + poperror(); + qunlock(&c->l); + while(waserror()) + ; +// Sleep(&c->startr, cmdstarted, c); + poperror(); + if(c->error) + error(c->error); + return n; /* avoid free(cb) below */ + } + poperror(); + free(cb); + break; + case Qexec: + c = cmd.conv[CONV(ch->qid)]; + s = c->p->seg[TSEG]; + if(s->base+offset+n > s->top) + error(Etoobig); + memmove((void*)(s->base + offset), a, n); + if(offset+n > c->esz) + c->esz = offset+n; + // XXX: can this every not be n? + return n; + case Qdata: + c = cmd.conv[CONV(ch->qid)]; + qlock(&c->l); + if(c->fd[0] == -1){ + qunlock(&c->l); + error(Ehungup); + } + qunlock(&c->l); +// osenter(); +// r = write(c->fd[0], a, n); +// osleave(); + if(r == 0) + error(Ehungup); + if(r < 0) { + /* XXX perhaps should kill writer "write on closed pipe" here, 2nd time around? */ +// oserror(); + } + return r; + } + return n; +} + +static long +cmdwstat(Chan *c, uchar *dp, long n) +{ + Dir *d; + Conv *cv; + + switch(TYPE(c->qid)){ + default: + error(Eperm); + case Qctl: + case Qdata: + case Qstderr: + d = malloc(sizeof(*d)+n); + if(d == nil) + error(Enomem); + if(waserror()){ + free(d); + nexterror(); + } + n = convM2D(dp, n, d, (char*)&d[1]); + if(n == 0) + error(Eshortstat); + cv = cmd.conv[CONV(c->qid)]; + if(!iseve() && strcmp(up->user, cv->owner) != 0) + error(Eperm); + if(!emptystr(d->uid)) + kstrdup(&cv->owner, d->uid); + if(d->mode != ~0UL) + cv->perm = d->mode & 0777; + poperror(); + free(d); + break; + } + return n; +} + +static Conv* +cmdclone(char *user) +{ + Conv *c, **pp, **ep; + int i; + + c = nil; + ep = &cmd.conv[cmd.maxconv]; + for(pp = cmd.conv; pp < ep; pp++) { + c = *pp; + if(c == nil) { + c = malloc(sizeof(Conv)); + if(c == nil) + error(Enomem); + qlock(&c->l); + c->inuse = 1; + c->x = pp - cmd.conv; + cmd.nc++; + *pp = c; + break; + } + if(canqlock(&c->l)){ + if(c->inuse == 0 && c->child == nil) + break; + qunlock(&c->l); + } + } + if(pp >= ep) + return nil; + + c->inuse = 1; + kstrdup(&c->owner, user); + kstrdup(&c->dir, "FIXME"); + c->perm = 0660; + c->state = "Closed"; + c->esz = 0; + for(i=0; ifd); i++) + c->fd[i] = -1; + // XXX: this should go somewhere else. + c->p = setupseg(0); + + qunlock(&c->l); + return c; +} + +static void +cmdproc(void *a) +{ + Conv *c; + int n; + char status[ERRMAX]; + void *t; + + c = a; + qlock(&c->l); + if(Debug) + print("f[0]=%q f[1]=%q\n", c->cmd->f[0], c->cmd->f[1]); + if(waserror()){ + if(Debug) + print("failed: %q\n", up->errstr); + kstrdup(&c->error, up->errstr); + c->state = "Done"; + qunlock(&c->l); +// Wakeup(&c->startr); + pexit("cmdproc", 0); + } +// t = oscmd(c->cmd->f+1, c->nice, c->dir, c->fd); +// if(t == nil) +// oserror(); + c->child = t; /* to allow oscmdkill */ + poperror(); + qunlock(&c->l); +// Wakeup(&c->startr); + if(Debug) + print("started\n"); + +// while(waserror()) +// oscmdkill(t); +// osenter(); + mwait(&c->p->ac->icc->fn); + +// n = oscmdwait(t, status, sizeof(status)); +// osleave(); + if(n < 0){ +// oserrstr(up->genbuf, sizeof(up->genbuf)); + n = snprint(status, sizeof(status), "0 0 0 0 %q", up->genbuf); + } + qlock(&c->l); + c->child = nil; +// oscmdfree(t); + if(Debug){ + status[n]=0; + print("done %d %d %d: %q\n", c->fd[0], c->fd[1], c->fd[2], status); + } + if(c->inuse > 0){ + c->state = "Done"; + if(c->waitq != nil) + qproduce(c->waitq, status, n); + }else + closeconv(c); + qunlock(&c->l); + pexit("", 0); +} + +Dev cmddevtab = { + 'C', + "cmd", + + devreset, + cmdinit, + devshutdown, + cmdattach, + cmdwalk, + cmdstat, + cmdopen, + devcreate, + cmdclose, + cmdread, + devbread, + cmdwrite, + devbwrite, + devremove, + cmdwstat +}; diff -Nru 0/sys/src/nix/port/devcons.c 4/sys/src/nix/port/devcons.c --- 0/sys/src/nix/port/devcons.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devcons.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1463 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include + +enum +{ + Nconsdevs = 64, /* max number of consoles */ + + /* Consdev flags */ + Ciprint = 2, /* call this fn from iprint */ + Cntorn = 4, /* change \n to \r\n */ +}; + +typedef struct Consdev Consdev; + +struct Consdev +{ + Chan* c; /* external file */ + Queue* q; /* i/o queue, if any */ + void (*fn)(char*, int); /* i/o function when no queue */ + int flags; +}; + +void (*consdebug)(void) = nil; +void (*consputs)(char*, int) = nil; + +static void kmesgputs(char *, int); +static void kprintputs(char*, int); + +static Lock consdevslock; +static int nconsdevs = 3; +static Consdev consdevs[Nconsdevs] = /* keep this order */ +{ + {nil, nil, kmesgputs, 0}, /* kmesg */ + {nil, nil, kprintputs, Ciprint}, /* kprint */ + {nil, nil, uartputs, Ciprint|Cntorn}, /* serial */ +}; + +static int nkbdqs; +static int nkbdprocs; +static Queue* kbdqs[Nconsdevs]; +static int kbdprocs[Nconsdevs]; +static Queue* kbdq; /* unprocessed console input */ +static Queue* lineq; /* processed console input */ +static Queue* serialoq; /* serial console output */ +static Queue* kprintoq; /* console output, for /dev/kprint */ +static ulong kprintinuse; /* test and set whether /dev/kprint is open */ + +int panicking; + +static struct +{ + QLock; + + int raw; /* true if we shouldn't process input */ + Ref ctl; /* number of opens to the control file */ + int x; /* index into line */ + char line[1024]; /* current input line */ + + int count; + int ctlpoff; + + /* + * A place to save up characters at interrupt time + * before dumping them in the queue + */ + Lock lockputc; + char istage[1024]; + char *iw; + char *ir; + char *ie; +} kbd = { + .iw = kbd.istage, + .ir = kbd.istage, + .ie = kbd.istage + sizeof(kbd.istage), +}; + +char *sysname; +vlong fasthz; + +static void seedrand(void); +static int readtime(ulong, char*, int); +static int readbintime(char*, int); +static int writetime(char*, int); +static int writebintime(char*, int); + +enum +{ + CMhalt, + CMreboot, + CMpanic, +}; + +Cmdtab rebootmsg[] = +{ + CMhalt, "halt", 1, + CMreboot, "reboot", 0, + CMpanic, "panic", 0, +}; + +/* role debugging */ +enum +{ + CMoc, + CMac, + CMtc, +}; + +Cmdtab rolemsg[] = +{ + CMoc, "oc", 2, + CMac, "ac", 2, + CMtc, "tc", 2, +}; + +/* To keep the rest of the kernel unware of new consdevs for now */ +static void +kprintputs(char *s, int n) +{ + if(consputs != nil) + consputs(s, n); +} + +int +addconsdev(Queue *q, void (*fn)(char*,int), int i, int flags) +{ + Consdev *c; + + ilock(&consdevslock); + if(i < 0) + i = nconsdevs; + else + flags |= consdevs[i].flags; + if(nconsdevs == Nconsdevs) + panic("Nconsdevs too small"); + c = &consdevs[i]; + c->flags = flags; + c->q = q; + c->fn = fn; + if(i == nconsdevs) + nconsdevs++; + iunlock(&consdevslock); + return i; +} + +void +delconsdevs(void) +{ + nconsdevs = 2; /* throw away serial consoles and kprint */ + consdevs[1].q = nil; +} + +static void +conskbdqproc(void *a) +{ + char buf[64]; + Queue *q; + int nr; + + q = a; + while((nr = qread(q, buf, sizeof(buf))) > 0) + qwrite(kbdq, buf, nr); + pexit("hangup", 1); +} + +static void +kickkbdq(void) +{ + int i; + + if(up != nil && nkbdqs > 1 && nkbdprocs != nkbdqs){ + lock(&consdevslock); + if(nkbdprocs == nkbdqs){ + unlock(&consdevslock); + return; + } + for(i = 0; i < nkbdqs; i++) + if(kbdprocs[i] == 0){ + kbdprocs[i] = 1; + kproc("conskbdq", conskbdqproc, kbdqs[i]); + } + unlock(&consdevslock); + } +} + +int +addkbdq(Queue *q, int i) +{ + int n; + + ilock(&consdevslock); + if(i < 0) + i = nkbdqs++; + if(nkbdqs == Nconsdevs) + panic("Nconsdevs too small"); + kbdqs[i] = q; + n = nkbdqs; + iunlock(&consdevslock); + switch(n){ + case 1: + /* if there's just one, pull directly from it. */ + kbdq = q; + break; + case 2: + /* later we'll merge bytes from all kbdqs into a single kbdq */ + kbdq = qopen(4*1024, 0, 0, 0); + if(kbdq == nil) + panic("no kbdq"); + /* fall */ + default: + kickkbdq(); + } + return i; +} + +void +printinit(void) +{ + lineq = qopen(2*1024, 0, nil, nil); + if(lineq == nil) + panic("printinit"); + qnoblock(lineq, 1); +} + +int +consactive(void) +{ + int i; + Queue *q; + + for(i = 0; i < nconsdevs; i++) + if((q = consdevs[i].q) != nil && qlen(q) > 0) + return 1; + return 0; +} + +void +prflush(void) +{ + ulong now; + + now = m->ticks; + while(consactive()) + if(m->ticks - now >= HZ) + break; +} + +/* + * Log console output so it can be retrieved via /dev/kmesg. + * This is good for catching boot-time messages after the fact. + */ +struct { + Lock lk; + char buf[16384]; + uint n; +} kmesg; + +static void +kmesgputs(char *str, int n) +{ + uint nn, d; + + ilock(&kmesg.lk); + /* take the tail of huge writes */ + if(n > sizeof kmesg.buf){ + d = n - sizeof kmesg.buf; + str += d; + n -= d; + } + + /* slide the buffer down to make room */ + nn = kmesg.n; + if(nn + n >= sizeof kmesg.buf){ + d = nn + n - sizeof kmesg.buf; + if(d) + memmove(kmesg.buf, kmesg.buf+d, sizeof kmesg.buf-d); + nn -= d; + } + + /* copy the data in */ + memmove(kmesg.buf+nn, str, n); + nn += n; + kmesg.n = nn; + iunlock(&kmesg.lk); +} + +static void +consdevputs(Consdev *c, char *s, int n, int usewrite) +{ + Chan *cc; + Queue *q; + + if((cc = c->c) != nil && usewrite) + cc->dev->write(cc, s, n, 0); + else if((q = c->q) != nil && !qisclosed(q)) + if(usewrite) + qwrite(q, s, n); + else + qiwrite(q, s, n); + else if(c->fn != nil) + c->fn(s, n); +} + +/* + * Print a string on the console. Convert \n to \r\n for serial + * line consoles. Locking of the queues is left up to the screen + * or uart code. Multi-line messages to serial consoles may get + * interspersed with other messages. + */ +static void +putstrn0(char *str, int n, int usewrite) +{ + Consdev *c; + char *s, *t; + int i, len, m; + + if(!islo()) + usewrite = 0; + + for(i = 0; i < nconsdevs; i++){ + c = &consdevs[i]; + len = n; + s = str; + while(len > 0){ + t = nil; + if((c->flags&Cntorn) && !kbd.raw) + t = memchr(s, '\n', len); + if(t != nil && !kbd.raw){ + m = t-s; + consdevputs(c, s, m, usewrite); + consdevputs(c, "\r\n", 2, usewrite); + len -= m+1; + s = t+1; + }else{ + consdevputs(c, s, len, usewrite); + break; + } + } + } +} + +void +putstrn(char *str, int n) +{ + putstrn0(str, n, 0); +} + +int +print(char *fmt, ...) +{ + int n; + va_list arg; + char buf[PRINTSIZE]; + + va_start(arg, fmt); + n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf; + va_end(arg); + putstrn(buf, n); + + return n; +} + +/* + * Want to interlock iprints to avoid interlaced output on + * multiprocessor, but don't want to deadlock if one processor + * dies during print and another has something important to say. + * Make a good faith effort. + */ +static Lock iprintlock; + +static int +iprintcanlock(Lock *l) +{ + int i; + + for(i=0; i<1000; i++){ + if(canlock(l)) + return 1; + if(l->m == m) + return 0; + microdelay(100); + } + return 0; +} + +int +iprint(char *fmt, ...) +{ + Mpl pl; + int i, n, locked; + va_list arg; + char buf[PRINTSIZE]; + + pl = splhi(); + va_start(arg, fmt); + n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf; + va_end(arg); + locked = iprintcanlock(&iprintlock); + for(i = 0; i < nconsdevs; i++) + if((consdevs[i].flags&Ciprint) != 0){ + if(consdevs[i].q != nil) + qiwrite(consdevs[i].q, buf, n); + else + consdevs[i].fn(buf, n); + } + if(locked) + unlock(&iprintlock); + splx(pl); + + return n; +} + +#pragma profile 0 +void +panic(char *fmt, ...) +{ + int n; + Mpl pl; + va_list arg; + char buf[PRINTSIZE]; + + consdevs[1].q = nil; /* don't try to write to /dev/kprint */ + + if(panicking) + for(;;); + panicking = 1; + + pl = splhi(); + seprint(buf, buf+sizeof buf, "panic: cpu%d: ", m->machno); + va_start(arg, fmt); + n = vseprint(buf+strlen(buf), buf+sizeof(buf), fmt, arg) - buf; + va_end(arg); + iprint("%s\n", buf); + if(consdebug) + (*consdebug)(); + splx(pl); + prflush(); + buf[n] = '\n'; + putstrn(buf, n+1); + dumpstack(); + delay(1000); /* give time to consoles */ + + exit(1); +} +#pragma profile 1 +/* libmp at least contains a few calls to sysfatal; simulate with panic */ +void +sysfatal(char *fmt, ...) +{ + char err[256]; + va_list arg; + + va_start(arg, fmt); + vseprint(err, err + sizeof err, fmt, arg); + va_end(arg); + panic("sysfatal: %s", err); +} + +void +_assert(char *fmt) +{ + panic("assert failed at %#p: %s", getcallerpc(&fmt), fmt); +} + +int +pprint(char *fmt, ...) +{ + int n; + Chan *c; + va_list arg; + char buf[2*PRINTSIZE]; + + if(up == nil || up->fgrp == nil) + return 0; + + c = up->fgrp->fd[2]; + if(c==0 || (c->mode!=OWRITE && c->mode!=ORDWR)) + return 0; + n = snprint(buf, sizeof buf, "%s %d: ", up->text, up->pid); + va_start(arg, fmt); + n = vseprint(buf+n, buf+sizeof(buf), fmt, arg) - buf; + va_end(arg); + + if(waserror()) + return 0; + c->dev->write(c, buf, n, c->offset); + poperror(); + + lock(c); + c->offset += n; + unlock(c); + + return n; +} + +static void +echo(char *buf, int n) +{ + Mpl pl; + static int ctrlt, pid; + char *e, *p; + + if(n == 0) + return; + + e = buf+n; + for(p = buf; p < e; p++){ + switch(*p){ + case 0x10: /* ^P */ + if(cpuserver && !kbd.ctlpoff){ + active.exiting = 1; + return; + } + break; + case 0x14: /* ^T */ + ctrlt++; + if(ctrlt > 2) + ctrlt = 2; + continue; + } + + if(ctrlt != 2) + continue; + + /* ^T escapes */ + ctrlt = 0; + switch(*p){ + case 'S': + pl = splhi(); + dumpstack(); + procdump(); + splx(pl); + return; + case 's': + dumpstack(); + return; + case 'x': + ixsummary(); + mallocsummary(); +// memorysummary(); + pagersummary(); + return; + case 'd': + if(consdebug == nil) + consdebug = rdb; + else + consdebug = nil; + print("consdebug now %#p\n", consdebug); + return; + case 'D': + if(consdebug == nil) + consdebug = rdb; + consdebug(); + return; + case 'p': + pl = spllo(); + procdump(); + splx(pl); + return; + case 'q': + scheddump(); + return; + case 'k': + killbig("^t ^t k"); + return; + case 'r': + exit(0); + return; + } + } + + if(kbdq != nil) + qproduce(kbdq, buf, n); + if(kbd.raw == 0) + putstrn(buf, n); +} + +/* + * Called by a uart interrupt for console input. + * + * turn '\r' into '\n' before putting it into the queue. + */ +int +kbdcr2nl(Queue*, int ch) +{ + char *next; + + ilock(&kbd.lockputc); /* just a mutex */ + if(ch == '\r' && !kbd.raw) + ch = '\n'; + next = kbd.iw+1; + if(next >= kbd.ie) + next = kbd.istage; + if(next != kbd.ir){ + *kbd.iw = ch; + kbd.iw = next; + } + iunlock(&kbd.lockputc); + return 0; +} + +/* + * Put character, possibly a rune, into read queue at interrupt time. + * Called at interrupt time to process a character. + */ +int +kbdputc(Queue*, int ch) +{ + int i, n; + char buf[3]; + Rune r; + char *next; + + if(kbd.ir == nil) + return 0; /* in case we're not inited yet */ + + ilock(&kbd.lockputc); /* just a mutex */ + r = ch; + n = runetochar(buf, &r); + for(i = 0; i < n; i++){ + next = kbd.iw+1; + if(next >= kbd.ie) + next = kbd.istage; + if(next == kbd.ir) + break; + *kbd.iw = buf[i]; + kbd.iw = next; + } + iunlock(&kbd.lockputc); + return 0; +} + +/* + * we save up input characters till clock time to reduce + * per character interrupt overhead. + */ +static void +kbdputcclock(void) +{ + char *iw; + + /* this amortizes cost of qproduce */ + if(kbd.iw != kbd.ir){ + iw = kbd.iw; + if(iw < kbd.ir){ + echo(kbd.ir, kbd.ie-kbd.ir); + kbd.ir = kbd.istage; + } + if(kbd.ir != iw){ + echo(kbd.ir, iw-kbd.ir); + kbd.ir = iw; + } + } +} + +enum{ + Qdir, + Qbintime, + Qcons, + Qconsctl, + Qcputime, + Qdrivers, + Qkmesg, + Qkprint, + Qhostdomain, + Qhostowner, + Qnull, + Qosversion, + Qpgrpid, + Qpid, + Qppid, + Qrandom, + Qreboot, + Qswap, + Qsysname, + Qsysstat, + Qtime, + Quser, + Qzero, + Qdebug, +}; + +enum +{ + VLNUMSIZE= 22, +}; + +static Dirtab consdir[]={ + ".", {Qdir, 0, QTDIR}, 0, DMDIR|0555, + "bintime", {Qbintime}, 24, 0664, + "cons", {Qcons}, 0, 0660, + "consctl", {Qconsctl}, 0, 0220, + "cputime", {Qcputime}, 6*NUMSIZE, 0444, + "drivers", {Qdrivers}, 0, 0444, + "hostdomain", {Qhostdomain}, DOMLEN, 0664, + "hostowner", {Qhostowner}, 0, 0664, + "kmesg", {Qkmesg}, 0, 0440, + "kprint", {Qkprint, 0, QTEXCL}, 0, DMEXCL|0440, + "null", {Qnull}, 0, 0666, + "osversion", {Qosversion}, 0, 0444, + "pgrpid", {Qpgrpid}, NUMSIZE, 0444, + "pid", {Qpid}, NUMSIZE, 0444, + "ppid", {Qppid}, NUMSIZE, 0444, + "random", {Qrandom}, 0, 0444, + "reboot", {Qreboot}, 0, 0664, + "swap", {Qswap}, 0, 0664, + "sysname", {Qsysname}, 0, 0664, + "sysstat", {Qsysstat}, 0, 0666, + "time", {Qtime}, NUMSIZE+3*VLNUMSIZE, 0664, + "user", {Quser}, 0, 0666, + "zero", {Qzero}, 0, 0444, + "debug", {Qdebug}, 0, 0666, +}; + +int +readnum(ulong off, char *buf, ulong n, ulong val, int size) +{ + char tmp[64]; + + snprint(tmp, sizeof(tmp), "%*lud", size-1, val); + tmp[size-1] = ' '; + if(off >= size) + return 0; + if(off+n > size) + n = size-off; + memmove(buf, tmp+off, n); + return n; +} + +long +readstr(long offset, char *buf, long n, char *str) +{ + long size; + + size = strlen(str); + if(offset >= size) + return 0; + if(offset+n > size) + n = size-offset; + memmove(buf, str+offset, n); + return n; +} + +static void +consinit(void) +{ + todinit(); + randominit(); + /* + * at 115200 baud, the 1024 char buffer takes 56 ms to process, + * processing it every 22 ms should be fine + */ + addclock0link(kbdputcclock, 22); + kickkbdq(); +} + +static Chan* +consattach(char *spec) +{ + return devattach('c', spec); +} + +static Walkqid* +conswalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name,nname, consdir, nelem(consdir), devgen); +} + +static long +consstat(Chan *c, uchar *dp, long n) +{ + return devstat(c, dp, n, consdir, nelem(consdir), devgen); +} + +static Chan* +consopen(Chan *c, int omode) +{ + c->aux = nil; + c = devopen(c, omode, consdir, nelem(consdir), devgen); + switch((ulong)c->qid.path){ + case Qconsctl: + incref(&kbd.ctl); + break; + + case Qkprint: + if(TAS(&kprintinuse) != 0){ + c->flag &= ~COPEN; + error(Einuse); + } + if(kprintoq == nil){ + kprintoq = qopen(8*1024, Qcoalesce, 0, 0); + if(kprintoq == nil){ + c->flag &= ~COPEN; + error(Enomem); + } + qnoblock(kprintoq, 1); + consdevs[1].q = kprintoq; + }else + qreopen(kprintoq); + c->iounit = qiomaxatomic; + break; + } + return c; +} + +static void +consclose(Chan *c) +{ + switch((ulong)c->qid.path){ + /* last close of control file turns off raw */ + case Qconsctl: + if(c->flag&COPEN){ + if(decref(&kbd.ctl) == 0) + kbd.raw = 0; + } + break; + + /* close of kprint allows other opens */ + case Qkprint: + if(c->flag & COPEN){ + kprintinuse = 0; + qhangup(kprintoq, nil); + } + break; + } +} + +static long +consread(Chan *c, void *buf, long n, vlong off) +{ + ulong l; + Mach *mp; + char *b, *bp, ch, *s, *e; + char tmp[512]; /* Qswap is 381 bytes at clu */ + int i, k, id, send; + long offset; + + + if(n <= 0) + return n; + + offset = off; + switch((ulong)c->qid.path){ + case Qdir: + return devdirread(c, buf, n, consdir, nelem(consdir), devgen); + + case Qcons: + qlock(&kbd); + if(waserror()) { + qunlock(&kbd); + nexterror(); + } + while(!qcanread(lineq)){ + if(qread(kbdq, &ch, 1) == 0) + continue; + send = 0; + if(ch == 0){ + /* flush output on rawoff -> rawon */ + if(kbd.x > 0) + send = !qcanread(kbdq); + }else if(kbd.raw){ + kbd.line[kbd.x++] = ch; + send = !qcanread(kbdq); + }else{ + switch(ch){ + case '\b': + if(kbd.x > 0) + kbd.x--; + break; + case 0x15: /* ^U */ + kbd.x = 0; + break; + case '\n': + case 0x04: /* ^D */ + send = 1; + default: + if(ch != 0x04) + kbd.line[kbd.x++] = ch; + break; + } + } + if(send || kbd.x == sizeof kbd.line){ + qwrite(lineq, kbd.line, kbd.x); + kbd.x = 0; + } + } + n = qread(lineq, buf, n); + qunlock(&kbd); + poperror(); + return n; + + case Qcputime: + k = offset; + if(k >= 6*NUMSIZE) + return 0; + if(k+n > 6*NUMSIZE) + n = 6*NUMSIZE - k; + /* easiest to format in a separate buffer and copy out */ + for(i=0; i<6 && NUMSIZE*itime[i]; + if(i == TReal) + l = sys->ticks - l; + l = TK2MS(l); + readnum(0, tmp+NUMSIZE*i, NUMSIZE, l, NUMSIZE); + } + memmove(buf, tmp+k, n); + return n; + + case Qkmesg: + /* + * This is unlocked to avoid tying up a process + * that's writing to the buffer. kmesg.n never + * gets smaller, so worst case the reader will + * see a slurred buffer. + */ + if(off >= kmesg.n) + n = 0; + else{ + if(off+n > kmesg.n) + n = kmesg.n - off; + memmove(buf, kmesg.buf+off, n); + } + return n; + + case Qkprint: + return qread(kprintoq, buf, n); + + case Qpgrpid: + return readnum(offset, buf, n, up->pgrp->pgrpid, NUMSIZE); + + case Qpid: + return readnum(offset, buf, n, up->pid, NUMSIZE); + + case Qppid: + return readnum(offset, buf, n, up->parentpid, NUMSIZE); + + case Qtime: + return readtime(offset, buf, n); + + case Qbintime: + return readbintime(buf, n); + + case Qhostowner: + return readstr(offset, buf, n, eve); + + case Qhostdomain: + return readstr(offset, buf, n, hostdomain); + + case Quser: + return readstr(offset, buf, n, up->user); + + case Qnull: + return 0; + + case Qsysstat: + n = MACHMAX*(NUMSIZE*11+2+1); + b = smalloc(n + 1); /* +1 for NUL */ + bp = b; + e = bp + n; + for(id = 0; id < MACHMAX; id++) + if((mp = sys->machptr[id]) != nil && mp->nixrole != NIXUC){ + readnum(0, bp, NUMSIZE, mp->machno, NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, mp->cs, NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, mp->intr, NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, mp->syscall, NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, mp->pfault, NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, mp->tlbfault, NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, mp->tlbpurge, NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, sys->load, NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, + (mp->perf.avg_inidle*100)/mp->perf.period, + NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, + (mp->perf.avg_inintr*100)/mp->perf.period, + NUMSIZE); + bp += NUMSIZE; + readnum(0, bp, NUMSIZE, 0, NUMSIZE); /* sched # */ + bp += NUMSIZE; + bp = strecpy(bp, e, rolename[mp->nixrole]); + *bp++ = '\n'; + } + if(waserror()){ + free(b); + nexterror(); + } + n = readstr(offset, buf, n, b); + free(b); + poperror(); + return n; + + case Qswap: + tmp[0] = 0; + s = seprintpagestats(tmp, tmp + sizeof tmp); + s = seprintphysstats(s, tmp + sizeof tmp); + b = buf; + l = s - tmp; + i = readstr(offset, b, l, tmp); + b += i; + n -= i; + if(offset > l) + offset -= l; + else + offset = 0; + + return i + mallocreadsummary(c, b, n, offset); + + case Qsysname: + if(sysname == nil) + return 0; + return readstr(offset, buf, n, sysname); + + case Qrandom: + return randomread(buf, n); + + case Qdrivers: + return devtabread(c, buf, n, off); + + case Qzero: + memset(buf, 0, n); + return n; + + case Qosversion: + snprint(tmp, sizeof tmp, "2000"); + n = readstr(offset, buf, n, tmp); + return n; + + case Qdebug: + s = seprint(tmp, tmp + sizeof tmp, "locks %llud\n", lockstats.locks); + s = seprint(s, tmp + sizeof tmp, "glare %llud\n", lockstats.glare); + s = seprint(s, tmp + sizeof tmp, "inglare %llud\n", lockstats.inglare); + s = seprint(s, tmp + sizeof tmp, "qlock %llud\n", qlockstats.qlock); + seprint(s, tmp + sizeof tmp, "qlockq %llud\n", qlockstats.qlockq); + return readstr(offset, buf, n, tmp); + break; + default: + print("consread %#llux\n", c->qid.path); + error(Egreg); + } + return -1; /* never reached */ +} + +/* role debugging */ +static int +cmrole(int cm) +{ + switch(cm) { + case CMtc: + return NIXTC; + break; + case CMoc: + return NIXOC; + case CMac: + return NIXAC; + break; + } + return NIXAC; +} + + + +static long +conswrite(Chan *c, void *va, long n, vlong off) +{ + char buf[256], ch; + long l, bp; + char *a; + Mach *mp; + int i, core, role; + ulong offset; + Cmdbuf *cb; + Cmdtab *ct; + a = va; + offset = off; + + switch((ulong)c->qid.path){ + case Qcons: + /* + * Can't page fault in putstrn, so copy the data locally. + */ + l = n; + while(l > 0){ + bp = l; + if(bp > sizeof buf) + bp = sizeof buf; + memmove(buf, a, bp); + putstrn0(buf, bp, 1); + a += bp; + l -= bp; + } + break; + + case Qconsctl: + if(n >= sizeof(buf)) + n = sizeof(buf)-1; + strncpy(buf, a, n); + buf[n] = 0; + for(a = buf; a;){ + if(strncmp(a, "rawon", 5) == 0){ + kbd.raw = 1; + /* clumsy hack - wake up reader */ + ch = 0; + qwrite(kbdq, &ch, 1); + } + else if(strncmp(a, "rawoff", 6) == 0) + kbd.raw = 0; + else if(strncmp(a, "ctlpon", 6) == 0) + kbd.ctlpoff = 0; + else if(strncmp(a, "ctlpoff", 7) == 0) + kbd.ctlpoff = 1; + if(a = strchr(a, ' ')) + a++; + } + break; + + case Qtime: + if(!iseve()) + error(Eperm); + return writetime(a, n); + + case Qbintime: + if(!iseve()) + error(Eperm); + return writebintime(a, n); + + case Qhostowner: + return hostownerwrite(a, n); + + case Qhostdomain: + return hostdomainwrite(a, n); + + case Quser: + return userwrite(a, n); + + case Qnull: + break; + + case Qreboot: + if(!iseve()) + error(Eperm); + cb = parsecmd(a, n); + + if(waserror()) { + free(cb); + nexterror(); + } + ct = lookupcmd(cb, rebootmsg, nelem(rebootmsg)); + switch(ct->index) { + case CMhalt: + reboot(nil, 0, 0); + break; + case CMreboot: + rebootcmd(cb->nf-1, cb->f+1); + break; + case CMpanic: + *(ulong*)0=0; + panic("/dev/reboot"); + } + poperror(); + free(cb); + break; + + case Qsysstat: + for(i = 0; i < MACHMAX; i++) + if((mp = sys->machptr[i]) != nil && mp->nixrole != NIXUC){ + mp = sys->machptr[i]; + mp->cs = 0; + mp->intr = 0; + mp->syscall = 0; + mp->pfault = 0; + mp->tlbfault = 0; /* not updated */ + mp->tlbpurge = 0; /* # mmuflushtlb */ + } + break; + + case Qswap: + if(n >= sizeof buf) + error(Egreg); + memmove(buf, va, n); /* so we can NUL-terminate */ + buf[n] = 0; + if(!iseve()) + error(Eperm); + if(buf[0]<'0' || '9'= sizeof buf) + error(Ebadarg); + strncpy(buf, a, n); + buf[n] = 0; + if(buf[n-1] == '\n') + buf[n-1] = 0; + kstrdup(&sysname, buf); + break; + + case Qdebug: + /* role debugging for now */ + if(!iseve()) + error(Eperm); + cb = parsecmd(a, n); + if(waserror()) { + free(cb); + nexterror(); + } + if(cb->nf != 2) + error(Ecmdargs); + core = strtoul(cb->f[1], 0, 0); + + ct = lookupcmd(cb, rolemsg, nelem(rolemsg)); + role = cmrole(ct->index); + l = changerole(role, core); + if(l < 0) + error(Ebadarg); + poperror(); + free(cb); + break; + default: + print("conswrite: %#llux\n", c->qid.path); + error(Egreg); + } + return n; +} + +Dev consdevtab = { + 'c', + "cons", + + devreset, + consinit, + devshutdown, + consattach, + conswalk, + consstat, + consopen, + devcreate, + consclose, + consread, + devbread, + conswrite, + devbwrite, + devremove, + devwstat, +}; + +static ulong randn; + +static void +seedrand(void) +{ + if(!waserror()){ + randomread((void*)&randn, sizeof(randn)); + poperror(); + } +} + +int +nrand(int n) +{ + if(randn == 0) + seedrand(); + randn = randn*1103515245 + 12345 + sys->ticks; + return (randn>>16) % n; +} + +int +rand(void) +{ + nrand(1); + return randn; +} + +static uvlong uvorder = 0x0001020304050607ULL; + +static uchar* +le2vlong(vlong *to, uchar *f) +{ + uchar *t, *o; + int i; + + t = (uchar*)to; + o = (uchar*)&uvorder; + for(i = 0; i < sizeof(vlong); i++) + t[o[i]] = f[i]; + return f+sizeof(vlong); +} + +static uchar* +vlong2le(uchar *t, vlong from) +{ + uchar *f, *o; + int i; + + f = (uchar*)&from; + o = (uchar*)&uvorder; + for(i = 0; i < sizeof(vlong); i++) + t[i] = f[o[i]]; + return t+sizeof(vlong); +} + +static long order = 0x00010203; + +static uchar* +le2long(long *to, uchar *f) +{ + uchar *t, *o; + int i; + + t = (uchar*)to; + o = (uchar*)ℴ + for(i = 0; i < sizeof(long); i++) + t[o[i]] = f[i]; + return f+sizeof(long); +} + +static uchar* +long2le(uchar *t, long from) +{ + uchar *f, *o; + int i; + + f = (uchar*)&from; + o = (uchar*)ℴ + for(i = 0; i < sizeof(long); i++) + t[i] = f[o[i]]; + return t+sizeof(long); +} + +char *Ebadtimectl = "bad time control"; + +/* + * like the old #c/time but with added info. Return + * + * secs nanosecs fastticks fasthz + */ +static int +readtime(ulong off, char *buf, int n) +{ + vlong nsec, ticks; + long sec; + char str[7*NUMSIZE]; + + nsec = todget(&ticks); + if(fasthz == 0LL) + fastticks((uvlong*)&fasthz); + sec = nsec/1000000000ULL; + snprint(str, sizeof(str), "%*lud %*llud %*llud %*llud ", + NUMSIZE-1, sec, + VLNUMSIZE-1, nsec, + VLNUMSIZE-1, ticks, + VLNUMSIZE-1, fasthz); + return readstr(off, buf, n, str); +} + +/* + * set the time in seconds + */ +static int +writetime(char *buf, int n) +{ + char b[13]; + long i; + vlong now; + + if(n >= sizeof(b)) + error(Ebadtimectl); + strncpy(b, buf, n); + b[n] = 0; + i = strtol(b, 0, 0); + if(i <= 0) + error(Ebadtimectl); + now = i*1000000000LL; + todset(now, 0, 0); + return n; +} + +/* + * read binary time info. all numbers are little endian. + * ticks and nsec are syncronized. + */ +static int +readbintime(char *buf, int n) +{ + int i; + vlong nsec, ticks; + uchar *b = (uchar*)buf; + + i = 0; + if(fasthz == 0LL) + fastticks((uvlong*)&fasthz); + nsec = todget(&ticks); + if(n >= 3*sizeof(uvlong)){ + vlong2le(b+2*sizeof(uvlong), fasthz); + i += sizeof(uvlong); + } + if(n >= 2*sizeof(uvlong)){ + vlong2le(b+sizeof(uvlong), ticks); + i += sizeof(uvlong); + } + if(n >= 8){ + vlong2le(b, nsec); + i += sizeof(vlong); + } + return i; +} + +/* + * set any of the following + * - time in nsec + * - nsec trim applied over some seconds + * - clock frequency + */ +static int +writebintime(char *buf, int n) +{ + uchar *p; + vlong delta; + long period; + + n--; + p = (uchar*)buf + 1; + switch(*buf){ + case 'n': + if(n < sizeof(vlong)) + error(Ebadtimectl); + le2vlong(&delta, p); + todset(delta, 0, 0); + break; + case 'd': + if(n < sizeof(vlong)+sizeof(long)) + error(Ebadtimectl); + p = le2vlong(&delta, p); + le2long(&period, p); + todset(-1, delta, period); + break; + case 'f': + if(n < sizeof(uvlong)) + error(Ebadtimectl); + le2vlong(&fasthz, p); + todsetfreq(fasthz); + break; + } + return n; +} diff -Nru 0/sys/src/nix/port/devdup.c 4/sys/src/nix/port/devdup.c --- 0/sys/src/nix/port/devdup.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devdup.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,145 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +/* Qid is (2*fd + (file is ctl))+1 */ + +static int +dupgen(Chan *c, char *, Dirtab*, int, int s, Dir *dp) +{ + Fgrp *fgrp = up->fgrp; + Chan *f; + static int perm[] = { 0400, 0200, 0600, 0 }; + int p; + Qid q; + + if(s == DEVDOTDOT){ + devdir(c, c->qid, ".", 0, eve, DMDIR|0555, dp); + return 1; + } + if(s == 0) + return 0; + s--; + if(s/2 > fgrp->maxfd) + return -1; + if((f=fgrp->fd[s/2]) == nil) + return 0; + if(s & 1){ + p = 0400; + sprint(up->genbuf, "%dctl", s/2); + }else{ + p = perm[f->mode&3]; + sprint(up->genbuf, "%d", s/2); + } + mkqid(&q, s+1, 0, QTFILE); + devdir(c, q, up->genbuf, 0, eve, p, dp); + return 1; +} + +static Chan* +dupattach(char *spec) +{ + return devattach('d', spec); +} + +static Walkqid* +dupwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, (Dirtab *)0, 0, dupgen); +} + +static long +dupstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, (Dirtab *)0, 0L, dupgen); +} + +static Chan* +dupopen(Chan *c, int omode) +{ + Chan *f; + int fd, twicefd; + + if(c->qid.type & QTDIR){ + if(omode != 0) + error(Eisdir); + c->mode = 0; + c->flag |= COPEN; + c->offset = 0; + return c; + } + if(c->qid.type & QTAUTH) + error(Eperm); + twicefd = c->qid.path - 1; + fd = twicefd/2; + if((twicefd & 1)){ + /* ctl file */ + f = c; + f->mode = openmode(omode); + f->flag |= COPEN; + f->offset = 0; + }else{ + /* fd file */ + f = fdtochan(fd, openmode(omode), 0, 1); + cclose(c); + } + if(omode & OCEXEC) + f->flag |= CCEXEC; + return f; +} + +static void +dupclose(Chan*) +{ +} + +static long +dupread(Chan *c, void *va, long n, vlong off) +{ + char buf[256]; + int fd, twicefd; + + if(c->qid.type & QTDIR) + return devdirread(c, va, n, (Dirtab *)0, 0L, dupgen); + twicefd = c->qid.path - 1; + fd = twicefd/2; + if(twicefd & 1){ + c = fdtochan(fd, -1, 0, 1); + procfdprint(c, fd, 0, buf, sizeof buf); + cclose(c); + return readstr(off, va, n, buf); + } + panic("dupread"); + return 0; +} + +static long +dupwrite(Chan*, void*, long, vlong) +{ + error(Eperm); + return 0; /* not reached */ +} + +Dev dupdevtab = { + 'd', + "dup", + + devreset, + devinit, + devshutdown, + dupattach, + dupwalk, + dupstat, + dupopen, + devcreate, + dupclose, + dupread, + devbread, + dupwrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru 0/sys/src/nix/port/devenv.c 4/sys/src/nix/port/devenv.c --- 0/sys/src/nix/port/devenv.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devenv.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,439 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +enum +{ + Maxenvsize = 16300, +}; + +static Egrp *envgrp(Chan *c); +static int envwriteable(Chan *c); + +static Egrp confegrp; /* global environment group containing the kernel configuration */ + +static Evalue* +envlookup(Egrp *eg, char *name, ulong qidpath) +{ + Evalue *e; + int i; + + for(i=0; inent; i++){ + e = eg->ent[i]; + if(e->qid.path == qidpath || (name && e->name[0]==name[0] && strcmp(e->name, name) == 0)) + return e; + } + return nil; +} + +static int +envgen(Chan *c, char *name, Dirtab*, int, int s, Dir *dp) +{ + Egrp *eg; + Evalue *e; + + if(s == DEVDOTDOT){ + devdir(c, c->qid, "#e", 0, eve, DMDIR|0775, dp); + return 1; + } + + eg = envgrp(c); + rlock(eg); + e = 0; + if(name) + e = envlookup(eg, name, -1); + else if(s < eg->nent) + e = eg->ent[s]; + + if(e == 0) { + runlock(eg); + return -1; + } + + /* make sure name string continues to exist after we release lock */ + kstrcpy(up->genbuf, e->name, sizeof up->genbuf); + devdir(c, e->qid, up->genbuf, e->len, eve, 0666, dp); + runlock(eg); + return 1; +} + +static Chan* +envattach(char *spec) +{ + Chan *c; + Egrp *egrp = nil; + + if(spec && *spec) { + if(strcmp(spec, "c") == 0) + egrp = &confegrp; + if(egrp == nil) + error(Ebadarg); + } + + c = devattach('e', spec); + c->aux = egrp; + return c; +} + +static Walkqid* +envwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, 0, 0, envgen); +} + +static long +envstat(Chan *c, uchar *db, long n) +{ + if(c->qid.type & QTDIR) + c->qid.vers = envgrp(c)->vers; + return devstat(c, db, n, 0, 0, envgen); +} + +static Chan* +envopen(Chan *c, int omode) +{ + Egrp *eg; + Evalue *e; + int trunc; + + eg = envgrp(c); + if(c->qid.type & QTDIR) { + if(omode != OREAD) + error(Eperm); + } + else { + trunc = omode & OTRUNC; + if(omode != OREAD && !envwriteable(c)) + error(Eperm); + if(trunc) + wlock(eg); + else + rlock(eg); + e = envlookup(eg, nil, c->qid.path); + if(e == 0) { + if(trunc) + wunlock(eg); + else + runlock(eg); + error(Enonexist); + } + if(trunc && e->value) { + e->qid.vers++; + free(e->value); + e->value = 0; + e->len = 0; + } + if(trunc) + wunlock(eg); + else + runlock(eg); + } + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + return c; +} + +static void +envcreate(Chan *c, char *name, int omode, int) +{ + Egrp *eg; + Evalue *e; + Evalue **ent; + + if(c->qid.type != QTDIR) + error(Eperm); + + omode = openmode(omode); + eg = envgrp(c); + + wlock(eg); + if(waserror()) { + wunlock(eg); + nexterror(); + } + + if(envlookup(eg, name, -1)) + error(Eexist); + + e = smalloc(sizeof(Evalue)); + e->name = smalloc(strlen(name)+1); + strcpy(e->name, name); + + if(eg->nent == eg->ment){ + eg->ment += 32; + ent = smalloc(sizeof(eg->ent[0])*eg->ment); + if(eg->nent) + memmove(ent, eg->ent, sizeof(eg->ent[0])*eg->nent); + free(eg->ent); + eg->ent = ent; + } + e->qid.path = ++eg->path; + e->qid.vers = 0; + eg->vers++; + eg->ent[eg->nent++] = e; + c->qid = e->qid; + + wunlock(eg); + poperror(); + + c->offset = 0; + c->mode = omode; + c->flag |= COPEN; +} + +static void +envremove(Chan *c) +{ + int i; + Egrp *eg; + Evalue *e; + + if(c->qid.type & QTDIR) + error(Eperm); + + eg = envgrp(c); + wlock(eg); + e = 0; + for(i=0; inent; i++){ + if(eg->ent[i]->qid.path == c->qid.path){ + e = eg->ent[i]; + eg->nent--; + eg->ent[i] = eg->ent[eg->nent]; + eg->vers++; + break; + } + } + wunlock(eg); + if(e == 0) + error(Enonexist); + free(e->name); + if(e->value) + free(e->value); + free(e); +} + +static void +envclose(Chan *c) +{ + /* + * cclose can't fail, so errors from remove will be ignored. + * since permissions aren't checked, + * envremove can't not remove it if its there. + */ + if(c->flag & CRCLOSE) + envremove(c); +} + +static long +envread(Chan *c, void *a, long n, vlong off) +{ + Egrp *eg; + Evalue *e; + long offset; + + if(c->qid.type & QTDIR) + return devdirread(c, a, n, 0, 0, envgen); + + eg = envgrp(c); + rlock(eg); + e = envlookup(eg, nil, c->qid.path); + if(e == 0) { + runlock(eg); + error(Enonexist); + } + + offset = off; + if(offset > e->len) /* protects against overflow converting vlong to long */ + n = 0; + else if(offset + n > e->len) + n = e->len - offset; + if(n <= 0) + n = 0; + else + memmove(a, e->value+offset, n); + runlock(eg); + return n; +} + +static long +envwrite(Chan *c, void *a, long n, vlong off) +{ + char *s; + Egrp *eg; + Evalue *e; + long len, offset; + + if(n <= 0) + return 0; + offset = off; + if(offset > Maxenvsize || n > (Maxenvsize - offset)) + error(Etoobig); + + eg = envgrp(c); + wlock(eg); + e = envlookup(eg, nil, c->qid.path); + if(e == 0) { + wunlock(eg); + error(Enonexist); + } + + len = offset+n; + if(len > e->len) { + s = smalloc(len); + if(e->value){ + memmove(s, e->value, e->len); + free(e->value); + } + e->value = s; + e->len = len; + } + memmove(e->value+offset, a, n); + e->qid.vers++; + eg->vers++; + wunlock(eg); + return n; +} + +Dev envdevtab = { + 'e', + "env", + + devreset, + devinit, + devshutdown, + envattach, + envwalk, + envstat, + envopen, + envcreate, + envclose, + envread, + devbread, + envwrite, + devbwrite, + envremove, + devwstat, +}; + +void +envcpy(Egrp *to, Egrp *from) +{ + int i; + Evalue *ne, *e; + + rlock(from); + to->ment = (from->nent+31)&~31; + to->ent = smalloc(to->ment*sizeof(to->ent[0])); + for(i=0; inent; i++){ + e = from->ent[i]; + ne = smalloc(sizeof(Evalue)); + ne->name = smalloc(strlen(e->name)+1); + strcpy(ne->name, e->name); + if(e->value){ + ne->value = smalloc(e->len); + memmove(ne->value, e->value, e->len); + ne->len = e->len; + } + ne->qid.path = ++to->path; + to->ent[i] = ne; + } + to->nent = from->nent; + runlock(from); +} + +void +closeegrp(Egrp *eg) +{ + int i; + Evalue *e; + + if(decref(eg) == 0){ + for(i=0; inent; i++){ + e = eg->ent[i]; + free(e->name); + if(e->value) + free(e->value); + free(e); + } + free(eg->ent); + free(eg); + } +} + +static Egrp* +envgrp(Chan *c) +{ + if(c->aux == nil) + return up->egrp; + return c->aux; +} + +static int +envwriteable(Chan *c) +{ + return iseve() || c->aux == nil; +} + +/* + * to let the kernel set environment variables + */ +void +ksetenv(char *ename, char *eval, int conf) +{ + Chan *c; + char buf[2*KNAMELEN]; + + snprint(buf, sizeof(buf), "#e%s/%s", conf?"c":"", ename); + c = namec(buf, Acreate, OWRITE, 0600); + c->dev->write(c, eval, strlen(eval), 0); + cclose(c); +} + +/* + * Return a copy of configuration environment as a sequence of strings. + * The strings alternate between name and value. A zero length name string + * indicates the end of the list + */ +char * +getconfenv(void) +{ + Egrp *eg = &confegrp; + Evalue *e; + char *p, *q; + int i, n; + + rlock(eg); + if(waserror()) { + runlock(eg); + nexterror(); + } + + /* determine size */ + n = 0; + for(i=0; inent; i++){ + e = eg->ent[i]; + n += strlen(e->name) + e->len + 2; + } + p = malloc(n + 1); + if(p == nil) + error(Enomem); + q = p; + for(i=0; inent; i++){ + e = eg->ent[i]; + strcpy(q, e->name); + q += strlen(q) + 1; + memmove(q, e->value, e->len); + q[e->len] = 0; + /* move up to the first null */ + q += strlen(q) + 1; + } + *q = 0; + + poperror(); + runlock(eg); + return p; +} diff -Nru 0/sys/src/nix/port/devkbin.c 4/sys/src/nix/port/devkbin.c --- 0/sys/src/nix/port/devkbin.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devkbin.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,120 @@ +/* + * keyboard scan code input from outside the kernel. + * to avoid duplication of keyboard map processing for usb. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +extern void kbdputsc(int, int); + +enum { + Qdir, + Qkbd, +}; + +Dirtab kbintab[] = { + ".", {Qdir, 0, QTDIR}, 0, 0555, + "kbin", {Qkbd, 0}, 0, 0200, +}; + +Lock kbinlck; +int kbinbusy; + +static Chan * +kbinattach(char *spec) +{ + return devattach(L'Ι', spec); +} + +static Walkqid* +kbinwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, kbintab, nelem(kbintab), devgen); +} + +static long +kbinstat(Chan *c, uchar *dp, long n) +{ + return devstat(c, dp, n, kbintab, nelem(kbintab), devgen); +} + +static Chan* +kbinopen(Chan *c, int omode) +{ + if(!iseve()) + error(Eperm); + if(c->qid.path == Qkbd){ + lock(&kbinlck); + if(kbinbusy){ + unlock(&kbinlck); + error(Einuse); + } + kbinbusy++; + unlock(&kbinlck); + } + return devopen(c, omode, kbintab, nelem(kbintab), devgen); +} + +static void +kbinclose(Chan *c) +{ + if(c->aux){ + free(c->aux); + c->aux = nil; + } + if(c->qid.path == Qkbd) + kbinbusy = 0; +} + +static long +kbinread(Chan *c, void *a, long n, vlong ) +{ + if(c->qid.type == QTDIR) + return devdirread(c, a, n, kbintab, nelem(kbintab), devgen); + return 0; +} + +static long +kbinwrite(Chan *c, void *a, long n, vlong) +{ + int i; + uchar *p = a; + + if(c->qid.type == QTDIR) + error(Eisdir); + switch((int)c->qid.path){ + case Qkbd: + for(i = 0; i < n; i++) + kbdputsc(*p++, 1); /* external source */ + break; + default: + error(Egreg); + } + return n; +} + +Dev kbindevtab = { + L'Ι', + "kbin", + + devreset, + devinit, + devshutdown, + kbinattach, + kbinwalk, + kbinstat, + kbinopen, + devcreate, + kbinclose, + kbinread, + devbread, + kbinwrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru 0/sys/src/nix/port/devkbmap.c 4/sys/src/nix/port/devkbmap.c --- 0/sys/src/nix/port/devkbmap.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devkbmap.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,179 @@ +/* + * keyboard map + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +enum{ + Qdir, + Qdata, +}; +Dirtab kbmaptab[]={ + ".", {Qdir, 0, QTDIR}, 0, 0555, + "kbmap", {Qdata, 0}, 0, 0600, +}; +#define NKBFILE sizeof(kbmaptab)/sizeof(kbmaptab[0]) + +#define KBLINELEN (3*NUMSIZE+1) /* t code val\n */ + +static Chan * +kbmapattach(char *spec) +{ + return devattach(L'κ', spec); +} + +static Walkqid* +kbmapwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, kbmaptab, NKBFILE, devgen); +} + +static long +kbmapstat(Chan *c, uchar *dp, long n) +{ + return devstat(c, dp, n, kbmaptab, NKBFILE, devgen); +} + +static Chan* +kbmapopen(Chan *c, int omode) +{ + if(!iseve()) + error(Eperm); + return devopen(c, omode, kbmaptab, NKBFILE, devgen); +} + +static void +kbmapclose(Chan *c) +{ + if(c->aux){ + free(c->aux); + c->aux = nil; + } +} + +static long +kbmapread(Chan *c, void *a, long n, vlong offset) +{ + char *bp; + char tmp[KBLINELEN+1]; + int t, sc; + Rune r; + + if(c->qid.type == QTDIR) + return devdirread(c, a, n, kbmaptab, NKBFILE, devgen); + + switch((int)(c->qid.path)){ + case Qdata: + if(kbdgetmap(offset/KBLINELEN, &t, &sc, &r)) { + bp = tmp; + bp += readnum(0, bp, NUMSIZE, t, NUMSIZE); + bp += readnum(0, bp, NUMSIZE, sc, NUMSIZE); + bp += readnum(0, bp, NUMSIZE, r, NUMSIZE); + *bp++ = '\n'; + *bp = 0; + n = readstr(offset%KBLINELEN, a, n, tmp); + } else + n = 0; + break; + default: + n=0; + break; + } + return n; +} + +static long +kbmapwrite(Chan *c, void *a, long n, vlong) +{ + char line[100], *lp, *b; + int key, m, l; + Rune r; + + if(c->qid.type == QTDIR) + error(Eperm); + + switch((int)(c->qid.path)){ + case Qdata: + b = a; + l = n; + lp = line; + if(c->aux){ + strcpy(line, c->aux); + lp = line+strlen(line); + free(c->aux); + c->aux = nil; + } + while(--l >= 0) { + *lp++ = *b++; + if(lp[-1] == '\n' || lp == &line[sizeof(line)-1]) { + *lp = 0; + if(*line == 0) + error(Ebadarg); + if(*line == '\n' || *line == '#'){ + lp = line; + continue; + } + lp = line; + while(*lp == ' ' || *lp == '\t') + lp++; + m = strtoul(line, &lp, 0); + key = strtoul(lp, &lp, 0); + while(*lp == ' ' || *lp == '\t') + lp++; + r = 0; + if(*lp == '\'' && lp[1]) + chartorune(&r, lp+1); + else if(*lp == '^' && lp[1]){ + chartorune(&r, lp+1); + if(0x40 <= r && r < 0x60) + r -= 0x40; + else + error(Ebadarg); + }else if(*lp == 'M' && ('1' <= lp[1] && lp[1] <= '5')) + r = 0xF900+lp[1]-'0'; + else if(*lp>='0' && *lp<='9') /* includes 0x... */ + r = strtoul(lp, &lp, 0); + else + error(Ebadarg); + kbdputmap(m, key, r); + lp = line; + } + } + if(lp != line){ + l = lp-line; + c->aux = lp = smalloc(l+1); + memmove(lp, line, l); + lp[l] = 0; + } + break; + default: + error(Ebadusefd); + } + return n; +} + +Dev kbmapdevtab = { + L'κ', + "kbmap", + + devreset, + devinit, + devshutdown, + kbmapattach, + kbmapwalk, + kbmapstat, + kbmapopen, + devcreate, + kbmapclose, + kbmapread, + devbread, + kbmapwrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru 0/sys/src/nix/port/devkexec.c 4/sys/src/nix/port/devkexec.c --- 0/sys/src/nix/port/devkexec.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devkexec.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,407 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "kexec.h" + +enum +{ + Maxkexecsize = 16300, +}; + +int kxdbg = 0; +#define KXDBG if(!kxdbg) {} else print + + + +static Kexecgrp *kexecgrp(Chan *c); +static int kexecwriteable(Chan *c); + + +static Kexecgrp kgrp; /* global kexec group containing the kernel configuration */ + +static Kvalue* +kexeclookup(Kexecgrp *kg, uintptr addr, ulong qidpath) +{ + Kvalue *e; + int i; + + for(i=0; inent; i++){ + e = kg->ent[i]; + if(e->qid.path == qidpath || e->addr==addr) + return e; + } + return nil; +} + +static int +kexecgen(Chan *c, char *name, Dirtab*, int, int s, Dir *dp) +{ + Kexecgrp *kg; + Kvalue *e; + uintptr addr; + + print("starting gen name %s\n", name); + + if(s == DEVDOTDOT){ + devdir(c, c->qid, "#§", 0, eve, DMDIR|0775, dp); + return 1; + } + print("getting kg name %s\n", name); + + kg = kexecgrp(c); + rlock(kg); + e = 0; + if(name) { + addr = strtoull(name, nil, 0); + print("got addr %p\n", addr); + + e = kexeclookup(kg, addr, -1); + }else if(s < kg->nent) + e = kg->ent[s]; + + if(e == 0) { + runlock(kg); + return -1; + } + + /* make sure name string continues to exist after we release lock */ + // how will we free this? + snprint(up->genbuf, sizeof up->genbuf, "0x%p", addr); + print("up->genbuf %s e 0x%p\n", up->genbuf, e); + print("e qid %d e->addr 0x%p size %ld len %ld\n", e->qid, e->addr, e->size, e->len); + + devdir(c, e->qid, up->genbuf, e->len, eve, 0666, dp); + runlock(kg); + print("finished gen\n"); + + return 1; +} + +#define QPATH(p,d,t) ((p)<<16 | (d)<<8 | (t)<<0) + +static Chan* +kexecattach(char *spec) +{ + Chan *c; +// Kexecgrp *kgrp = nil; + Qid qid; + + + c = devattach(L'§', spec); + c->aux = &kgrp; + return c; +} + +static Walkqid* +kexecwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, 0, 0, kexecgen); +} + + +static long +kexecstat(Chan *c, uchar *db, long n) +{ + long nn; + + if(c->qid.type & QTDIR) + c->qid.vers = kexecgrp(c)->vers; + nn = devstat(c, db, n, 0, 0, kexecgen); + + return nn; +} + +static Chan* +kexecopen(Chan *c, int omode) +{ + Kexecgrp *kg; + Kvalue *e; + int trunc; + + kg = kexecgrp(c); + if(c->qid.type & QTDIR) { + if(omode != OREAD) + error(Eperm); + }else { + trunc = omode & OTRUNC; + if(omode != OREAD && !kexecwriteable(c)) + error(Eperm); + if(trunc) + wlock(kg); + else + rlock(kg); + e = kexeclookup(kg, 0, c->qid.path); + if(e == 0) { + if(trunc) + wunlock(kg); + else + runlock(kg); + error(Enonexist); + } + if(trunc && e->size) { // better validity check? + e->qid.vers++; + e->size = 0; + e->len = 0; + } + if(trunc) + wunlock(kg); + else + runlock(kg); + } + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + return c; +} + +static void +kexeccreate(Chan *c, char *name, int omode, int) +{ + Kexecgrp *kg; + Kvalue *e; + Kvalue **ent; + uintptr addr; + + addr = strtoull(name, nil, 0); + + if(c->qid.type != QTDIR) + error(Eperm); + + omode = openmode(omode); + kg = kexecgrp(c); + + wlock(kg); + if(waserror()) { + wunlock(kg); + nexterror(); + } + + if(kexeclookup(kg, addr, -1)) + error(Eexist); + + e = smalloc(sizeof(Kvalue)); + e->addr = addr; + + if(kg->nent == kg->ment){ + kg->ment += 32; + ent = smalloc(sizeof(kg->ent[0])*kg->ment); + if(kg->nent) + memmove(ent, kg->ent, sizeof(kg->ent[0])*kg->nent); + free(kg->ent); + kg->ent = ent; + } + e->qid.path = ++kg->path; + e->qid.vers = 0; + kg->vers++; + kg->ent[kg->nent++] = e; + c->qid = e->qid; + + wunlock(kg); + poperror(); + + c->offset = 0; + c->mode = omode; + c->flag |= COPEN; +} + +static void +kexecremove(Chan *c) +{ + int i; + Kexecgrp *kg; + Kvalue *e; + + if(c->qid.type & QTDIR) + error(Eperm); + + kg = kexecgrp(c); + wlock(kg); + e = 0; + for(i=0; inent; i++){ + if(kg->ent[i]->qid.path == c->qid.path){ + e = kg->ent[i]; + kg->nent--; + kg->ent[i] = kg->ent[kg->nent]; + kg->vers++; + break; + } + } + wunlock(kg); + if(e == 0) + error(Enonexist); + free(e); +} + +static void +kexecclose(Chan *c) +{ + /* + * cclose can't fail, so errors from remove will be ignored. + * since permissions aren't checked, + * kexecremove can't not remove it if its there. + */ + if(c->flag & CRCLOSE) + kexecremove(c); +} + +static long +kexecread(Chan *c, void *a, long n, vlong off) +{ + Kexecgrp *kg; + Kvalue *e; + long offset; + + if(c->qid.type & QTDIR) + return devdirread(c, a, n, 0, 0, kexecgen); + + kg = kexecgrp(c); + rlock(kg); + e = kexeclookup(kg, 0, c->qid.path); + if(e == 0) { + runlock(kg); + error(Enonexist); + } + + offset = off; + if(offset > e->len) /* protects against overflow converting vlong to long */ + n = 0; + else if(offset + n > e->len) + n = e->len - offset; + if(n <= 0) + n = 0; +// else +// memmove(a, e->value+offset, n); + runlock(kg); + return n; +} + +/* + +need to make slots. the slots themselves can be set somewhere else. + +need make the writes + +open will handle the parsing of the hex numbers. + +no, do it the other way around. just define the slots. +can work on the interface later. + +kmap the space where the values need to stay safe. + +then when that is correct you can do it the other. + +kmap address range +put it in + + +write is going to be significantly different. + +the first thing to do is to make this just work. + add to the kernel cfg. + +*/ + +static long +kexecwrite(Chan *c, void *a, long n, vlong off) +{ + Kexecgrp *kg; + Kvalue *e; + long offset; + + if(n <= 0) + return 0; + offset = off; + if(offset > Maxkexecsize || n > (Maxkexecsize - offset)) + error(Etoobig); + print("a: %s\n", a); + kg = kexecgrp(c); + wlock(kg); + e = kexeclookup(kg, 0, c->qid.path); + if(e == 0) { + wunlock(kg); + error(Enonexist); + } + + // XXX: what to do with what is written? + + e->qid.vers++; + kg->vers++; + wunlock(kg); + return n; +} + +Dev kexecdevtab = { + L'§', + "kexec", + + devreset, + devinit, + devshutdown, + kexecattach, + kexecwalk, + kexecstat, + kexecopen, + kexeccreate, + kexecclose, + kexecread, + devbread, + kexecwrite, + devbwrite, + kexecremove, + devwstat, +}; + +void +kexeccpy(Kexecgrp *to, Kexecgrp *from) +{ + int i; + Kvalue *ne, *e; + + rlock(from); + to->ment = (from->nent+31)&~31; + to->ent = smalloc(to->ment*sizeof(to->ent[0])); + for(i=0; inent; i++){ + e = from->ent[i]; + ne = smalloc(sizeof(Kvalue)); + ne->addr = e->addr; + ne->size = e->size; + ne->qid.path = ++to->path; + to->ent[i] = ne; + } + to->nent = from->nent; + runlock(from); +} + +void +closekgrp(Kexecgrp *kg) +{ + int i; + Kvalue *e; + + if(decref(kg) == 0){ + for(i=0; inent; i++){ + e = kg->ent[i]; + free(e); + } + free(kg->ent); + free(kg); + } +} + +static Kexecgrp* +kexecgrp(Chan *c) +{ + if(c->aux == nil) + return &kgrp; + return c->aux; +} + +static int +kexecwriteable(Chan *c) +{ + return iseve() || c->aux == nil; +} + diff -Nru 0/sys/src/nix/port/devkprof.c 4/sys/src/nix/port/devkprof.c --- 0/sys/src/nix/port/devkprof.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devkprof.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,199 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + + +#define LRES 3 /* log of PC resolution */ +#define SZ 4 /* sizeof of count cell; well known as 4 */ + +struct +{ + uintptr minpc; + uintptr maxpc; + int nbuf; + int time; + ulong *buf; + Lock; +}kprof; + +enum{ + Kprofdirqid, + Kprofdataqid, + Kprofctlqid, +}; +Dirtab kproftab[]={ + ".", {Kprofdirqid, 0, QTDIR},0, DMDIR|0550, + "kpdata", {Kprofdataqid}, 0, 0600, + "kpctl", {Kprofctlqid}, 0, 0600, +}; + +static Chan* +kprofattach(char *spec) +{ + ulong n; + + /* allocate when first used */ + kprof.minpc = KTZERO; + kprof.maxpc = PTR2UINT(etext); + kprof.nbuf = (kprof.maxpc-kprof.minpc) >> LRES; + n = kprof.nbuf*SZ; + if(kprof.buf == 0) { + kprof.buf = malloc(n); + if(kprof.buf == 0) + error(Enomem); + } + kproftab[1].length = n; + return devattach('K', spec); +} + +static void +_kproftimer(uintptr pc) +{ + if(kprof.time == 0) + return; + + /* + * if the pc corresponds to the idle loop, don't consider it. + */ + if(m->inidle) + return; + /* + * if the pc is coming out of spllo or splx, + * use the pc saved when we went splhi. + */ + if(pc>=PTR2UINT(spllo) && pc<=PTR2UINT(spldone)) + pc = m->splpc; + +// ilock(&kprof); + kprof.buf[0] += TK2MS(1); + if(kprof.minpc<=pc && pc>= LRES; + kprof.buf[pc] += TK2MS(1); + }else + kprof.buf[1] += TK2MS(1); +// iunlock(&kprof); +} + +static void +kprofinit(void) +{ + if(SZ != sizeof kprof.buf[0]) + panic("kprof size"); + kproftimer = _kproftimer; +} + +static Walkqid* +kprofwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, kproftab, nelem(kproftab), devgen); +} + +static long +kprofstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, kproftab, nelem(kproftab), devgen); +} + +static Chan* +kprofopen(Chan *c, int omode) +{ + if(c->qid.type & QTDIR){ + if(omode != OREAD) + error(Eperm); + } + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + return c; +} + +static void +kprofclose(Chan*) +{ +} + +static long +kprofread(Chan *c, void *va, long n, vlong off) +{ + ulong end; + ulong w, *bp; + uchar *a, *ea; + ulong offset = off; + + switch((int)c->qid.path){ + case Kprofdirqid: + return devdirread(c, va, n, kproftab, nelem(kproftab), devgen); + + case Kprofdataqid: + end = kprof.nbuf*SZ; + if(offset & (SZ-1)) + error(Ebadarg); + if(offset >= end){ + n = 0; + break; + } + if(offset+n > end) + n = end-offset; + n &= ~(SZ-1); + a = va; + ea = a + n; + bp = kprof.buf + offset/SZ; + while(a < ea){ + w = *bp++; + *a++ = w>>24; + *a++ = w>>16; + *a++ = w>>8; + *a++ = w>>0; + } + break; + + default: + n = 0; + break; + } + return n; +} + +static long +kprofwrite(Chan *c, void *a, long n, vlong) +{ + switch((int)(c->qid.path)){ + case Kprofctlqid: + if(strncmp(a, "startclr", 8) == 0){ + memset((char *)kprof.buf, 0, kprof.nbuf*SZ); + kprof.time = 1; + }else if(strncmp(a, "start", 5) == 0) + kprof.time = 1; + else if(strncmp(a, "stop", 4) == 0) + kprof.time = 0; + break; + default: + error(Ebadusefd); + } + return n; +} + +Dev kprofdevtab = { + 'K', + "kprof", + + devreset, + kprofinit, + devshutdown, + kprofattach, + kprofwalk, + kprofstat, + kprofopen, + devcreate, + kprofclose, + kprofread, + devbread, + kprofwrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru 0/sys/src/nix/port/devmnt.c 4/sys/src/nix/port/devmnt.c --- 0/sys/src/nix/port/devmnt.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devmnt.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1204 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +/* + * References are managed as follows: + * The channel to the server - a network connection or pipe - has one + * reference for every Chan open on the server. The server channel has + * c->mux set to the Mnt used for muxing control to that server. Mnts + * have no reference count; they go away when c goes away. + * Each channel derived from the mount point has mchan set to c, + * and increfs/decrefs mchan to manage references on the server + * connection. + */ + +#define MAXRPC (IOHDRSZ+8192) + +struct Mntrpc +{ + Chan* c; /* Channel for whom we are working */ + Mntrpc* list; /* Free/pending list */ + Fcall request; /* Outgoing file system protocol message */ + Fcall reply; /* Incoming reply */ + Mnt* m; /* Mount device during rpc */ + Rendez r; /* Place to hang out */ + uchar* rpc; /* I/O Data buffer */ + uint rpclen; /* len of buffer */ + Block *b; /* reply blocks */ + char done; /* Rpc completed */ + uvlong stime; /* start time for mnt statistics */ + ulong reqlen; /* request length for mnt statistics */ + ulong replen; /* reply length for mnt statistics */ + Mntrpc* flushed; /* message this one flushes */ +}; + +enum +{ + TAGSHIFT = 5, /* ulong has to be 32 bits */ + TAGMASK = (1<>TAGSHIFT, +}; + +struct Mntalloc +{ + Lock; + Mnt* list; /* Mount devices in use */ + Mnt* mntfree; /* Free list */ + Mntrpc* rpcfree; + int nrpcfree; + int nrpcused; + uint id; + ulong tagmask[NMASK]; +}mntalloc; + +Mnt* mntchk(Chan*); +void mntdirfix(uchar*, Chan*); +Mntrpc* mntflushalloc(Mntrpc*, ulong); +void mntflushfree(Mnt*, Mntrpc*); +void mntfree(Mntrpc*); +void mntgate(Mnt*); +void mntpntfree(Mnt*); +void mntqrm(Mnt*, Mntrpc*); +Mntrpc* mntralloc(Chan*, ulong); +long mntrdwr(int, Chan*, void*, long, vlong); +int mntrpcread(Mnt*, Mntrpc*); +void mountio(Mnt*, Mntrpc*); +void mountmux(Mnt*, Mntrpc*); +void mountrpc(Mnt*, Mntrpc*); +int rpcattn(void*); +Chan* mntchan(void); + +char Esbadstat[] = "invalid directory entry received from server"; +char Enoversion[] = "version not established for mount channel"; + + +void (*mntstats)(int, Chan*, uvlong, ulong); + +static void +mntreset(void) +{ + mntalloc.id = 1; + mntalloc.tagmask[0] = 1; /* don't allow 0 as a tag */ + mntalloc.tagmask[NMASK-1] = 0x80000000UL; /* don't allow NOTAG */ + fmtinstall('F', fcallfmt); + fmtinstall('D', dirfmt); +/* We can't install %M since eipfmt does and is used in the kernel [sape] */ + + if(mfcinit != nil) + mfcinit(); +} + +/* + * Version is not multiplexed: message sent only once per connection. + */ +usize +mntversion(Chan *c, u32int msize, char *version, usize returnlen) +{ + Fcall f; + uchar *msg; + Mnt *mnt; + char *v; + long l, n; + usize k; + vlong oo; + char buf[128]; + + qlock(&c->umqlock); /* make sure no one else does this until we've established ourselves */ + if(waserror()){ + qunlock(&c->umqlock); + nexterror(); + } + + /* defaults */ + if(msize == 0) + msize = MAXRPC; + if(msize > c->iounit && c->iounit != 0) + msize = c->iounit; + v = version; + if(v == nil || v[0] == '\0') + v = VERSION9P; + + /* validity */ + if(msize < 0) + error("bad iounit in version call"); + if(strncmp(v, VERSION9P, strlen(VERSION9P)) != 0) + error("bad 9P version specification"); + + mnt = c->mux; + + if(mnt != nil){ + qunlock(&c->umqlock); + poperror(); + + strecpy(buf, buf+sizeof buf, mnt->version); + k = strlen(buf); + if(strncmp(buf, v, k) != 0){ + snprint(buf, sizeof buf, "incompatible 9P versions %s %s", mnt->version, v); + error(buf); + } + if(returnlen != 0){ + if(returnlen < k) + error(Eshort); + memmove(version, buf, k); + } + return k; + } + + f.type = Tversion; + f.tag = NOTAG; + f.msize = msize; + f.version = v; + msg = malloc(8192+IOHDRSZ); + if(msg == nil) + exhausted("version memory"); + if(waserror()){ + free(msg); + nexterror(); + } + k = convS2M(&f, msg, 8192+IOHDRSZ); + if(k == 0) + error("bad fversion conversion on send"); + + lock(c); + oo = c->offset; + c->offset += k; + unlock(c); + + l = c->dev->write(c, msg, k, oo); + + if(l < k){ + lock(c); + c->offset -= k - l; + unlock(c); + error("short write in fversion"); + } + + /* message sent; receive and decode reply */ + n = c->dev->read(c, msg, 8192+IOHDRSZ, c->offset); + if(n <= 0) + error("EOF receiving fversion reply"); + + lock(c); + c->offset += n; + unlock(c); + + l = convM2S(msg, n, &f); + if(l != n) + error("bad fversion conversion on reply"); + if(f.type != Rversion){ + if(f.type == Rerror) + error(f.ename); + error("unexpected reply type in fversion"); + } + if(f.msize > msize) + error("server tries to increase msize in fversion"); + if(f.msize<256 || f.msize>1024*1024) + error("nonsense value of msize in fversion"); + k = strlen(f.version); + if(strncmp(f.version, v, k) != 0) + error("bad 9P version returned from server"); + + /* now build Mnt associated with this connection */ + lock(&mntalloc); + mnt = mntalloc.mntfree; + if(mnt != nil) + mntalloc.mntfree = mnt->list; + else { + mnt = malloc(sizeof(Mnt)); + if(mnt == nil) { + unlock(&mntalloc); + exhausted("mount devices"); + } + } + mnt->list = mntalloc.list; + mntalloc.list = mnt; + mnt->version = nil; + kstrdup(&mnt->version, f.version); + mnt->id = mntalloc.id++; + mnt->q = qopen(10*MAXRPC, 0, nil, nil); + mnt->msize = f.msize; + unlock(&mntalloc); + + if(returnlen != 0){ + if(returnlen < k) + error(Eshort); + memmove(version, f.version, k); + } + + poperror(); /* msg */ + free(msg); + + lock(mnt); + mnt->queue = 0; + mnt->rip = 0; + + c->flag |= CMSG; + c->mux = mnt; + mnt->c = c; + unlock(mnt); + + poperror(); /* c */ + qunlock(&c->umqlock); + + return k; +} + +Chan* +mntauth(Chan *c, char *spec) +{ + Mnt *mnt; + Mntrpc *r; + + mnt = c->mux; + + if(mnt == nil){ + mntversion(c, MAXRPC, VERSION9P, 0); + mnt = c->mux; + if(mnt == nil) + error(Enoversion); + } + + c = mntchan(); + if(waserror()) { + /* Close must not be called since it will + * call mnt recursively + */ + chanfree(c); + nexterror(); + } + + r = mntralloc(0, mnt->msize); + + if(waserror()) { + mntfree(r); + nexterror(); + } + + r->request.type = Tauth; + r->request.afid = c->fid; + r->request.uname = up->user; + r->request.aname = spec; + mountrpc(mnt, r); + + c->qid = r->reply.aqid; + c->mchan = mnt->c; + incref(mnt->c); + c->mqid = c->qid; + c->mode = ORDWR; + + poperror(); /* r */ + mntfree(r); + + poperror(); /* c */ + + return c; + +} + +static Chan* +mntattach(char *muxattach) +{ + Mnt *mnt; + Chan *c; + Mntrpc *r; + struct bogus{ + Chan *chan; + Chan *authchan; + char *spec; + int flags; + }bogus; + + bogus = *((struct bogus *)muxattach); + c = bogus.chan; + + mnt = c->mux; + + if(mnt == nil){ + mntversion(c, 0, nil, 0); + mnt = c->mux; + if(mnt == nil) + error(Enoversion); + } + + c = mntchan(); + if(waserror()) { + /* Close must not be called since it will + * call mnt recursively + */ + chanfree(c); + nexterror(); + } + + r = mntralloc(0, mnt->msize); + + if(waserror()) { + mntfree(r); + nexterror(); + } + + r->request.type = Tattach; + r->request.fid = c->fid; + if(bogus.authchan == nil) + r->request.afid = NOFID; + else + r->request.afid = bogus.authchan->fid; + r->request.uname = up->user; + r->request.aname = bogus.spec; + mountrpc(mnt, r); + + c->qid = r->reply.qid; + c->mchan = mnt->c; + incref(mnt->c); + c->mqid = c->qid; + + poperror(); /* r */ + mntfree(r); + + poperror(); /* c */ + + if((bogus.flags & MCACHE) && mfcinit != nil) + c->flag |= CCACHE; + return c; +} + +Chan* +mntchan(void) +{ + Chan *c; + + c = devattach('M', 0); + lock(&mntalloc); + c->devno = mntalloc.id++; + unlock(&mntalloc); + + if(c->mchan) + panic("mntchan non-zero %#p", c->mchan); + return c; +} + +static Walkqid* +mntwalk(Chan *c, Chan *nc, char **name, int nname) +{ + int i, alloc; + Mnt *mnt; + Mntrpc *r; + Walkqid *wq; + + if(nc != nil) + print("mntwalk: nc != nil\n"); + if(nname > MAXWELEM) + error("devmnt: too many name elements"); + alloc = 0; + wq = smalloc(sizeof(Walkqid)+(nname-1)*sizeof(Qid)); + if(waserror()){ + if(alloc && wq->clone!=nil) + cclose(wq->clone); + free(wq); + return nil; + } + + alloc = 0; + mnt = mntchk(c); + r = mntralloc(c, mnt->msize); + if(nc == nil){ + nc = devclone(c); + /* + * Until the other side accepts this fid, + * we can't mntclose it. + * nc->dev remains nil for now. + */ + alloc = 1; + } + wq->clone = nc; + + if(waserror()) { + mntfree(r); + nexterror(); + } + r->request.type = Twalk; + r->request.fid = c->fid; + r->request.newfid = nc->fid; + r->request.nwname = nname; + memmove(r->request.wname, name, nname*sizeof(char*)); + + mountrpc(mnt, r); + + if(r->reply.nwqid > nname) + error("too many QIDs returned by walk"); + if(r->reply.nwqid < nname){ + if(alloc) + cclose(nc); + wq->clone = nil; + if(r->reply.nwqid == 0){ + free(wq); + wq = nil; + goto Return; + } + } + + /* move new fid onto mnt device and update its qid */ + if(wq->clone != nil){ + if(wq->clone != c){ + wq->clone->dev = c->dev; + //if(wq->clone->dev != nil) //XDYNX + // devtabincr(wq->clone->dev); + wq->clone->mchan = c->mchan; + incref(c->mchan); + } + if(r->reply.nwqid > 0) + wq->clone->qid = r->reply.wqid[r->reply.nwqid-1]; + } + wq->nqid = r->reply.nwqid; + for(i=0; inqid; i++) + wq->qid[i] = r->reply.wqid[i]; + + Return: + poperror(); + mntfree(r); + poperror(); + return wq; +} + +static long +mntstat(Chan *c, uchar *dp, long n) +{ + Mnt *mnt; + Mntrpc *r; + usize nstat; + + if(n < BIT16SZ) + error(Eshortstat); + mnt = mntchk(c); + r = mntralloc(c, mnt->msize); + if(waserror()) { + mntfree(r); + nexterror(); + } + r->request.type = Tstat; + r->request.fid = c->fid; + mountrpc(mnt, r); + + if(r->reply.nstat > n){ + nstat = BIT16SZ; + PBIT16(dp, r->reply.nstat-2); + }else{ + nstat = r->reply.nstat; + memmove(dp, r->reply.stat, nstat); + validstat(dp, nstat); + mntdirfix(dp, c); + } + poperror(); + mntfree(r); + + return nstat; +} + +static Chan* +mntopencreate(int type, Chan *c, char *name, int omode, int perm) +{ + Mnt *mnt; + Mntrpc *r; + + mnt = mntchk(c); + r = mntralloc(c, mnt->msize); + if(waserror()) { + mntfree(r); + nexterror(); + } + r->request.type = type; + r->request.fid = c->fid; + r->request.mode = omode; + if(type == Tcreate){ + r->request.perm = perm; + r->request.name = name; + } + mountrpc(mnt, r); + + c->qid = r->reply.qid; + c->offset = 0; + c->mode = openmode(omode); + c->iounit = r->reply.iounit; + if(c->iounit == 0 || c->iounit > mnt->msize-IOHDRSZ) + c->iounit = mnt->msize-IOHDRSZ; + c->flag |= COPEN; + poperror(); + mntfree(r); + + if(c->flag & CCACHE) + mfcopen(c); + + return c; +} + +static Chan* +mntopen(Chan *c, int omode) +{ + return mntopencreate(Topen, c, nil, omode, 0); +} + +static void +mntcreate(Chan *c, char *name, int omode, int perm) +{ + mntopencreate(Tcreate, c, name, omode, perm); +} + +static void +mntclunk(Chan *c, int t) +{ + Mnt *mnt; + Mntrpc *r; + + mnt = mntchk(c); + r = mntralloc(c, mnt->msize); + if(waserror()){ + mntfree(r); + nexterror(); + } + + r->request.type = t; + r->request.fid = c->fid; + mountrpc(mnt, r); + mntfree(r); + poperror(); +} + +void +muxclose(Mnt *mnt) +{ + Mntrpc *q, *r; + + for(q = mnt->queue; q; q = r) { + r = q->list; + mntfree(q); + } + mnt->id = 0; + free(mnt->version); + mnt->version = nil; + mntpntfree(mnt); +} + +void +mntpntfree(Mnt *mnt) +{ + Mnt *f, **l; + Queue *q; + + lock(&mntalloc); + l = &mntalloc.list; + for(f = *l; f; f = f->list) { + if(f == mnt) { + *l = mnt->list; + break; + } + l = &f->list; + } + mnt->list = mntalloc.mntfree; + mntalloc.mntfree = mnt; + q = mnt->q; + unlock(&mntalloc); + + qfree(q); +} + +static void +mntclose(Chan *c) +{ + mntclunk(c, Tclunk); +} + +static void +mntremove(Chan *c) +{ + mntclunk(c, Tremove); +} + +static long +mntwstat(Chan *c, uchar *dp, long n) +{ + Mnt *mnt; + Mntrpc *r; + + mnt = mntchk(c); + r = mntralloc(c, mnt->msize); + if(waserror()) { + mntfree(r); + nexterror(); + } + r->request.type = Twstat; + r->request.fid = c->fid; + r->request.nstat = n; + r->request.stat = dp; + mountrpc(mnt, r); + poperror(); + mntfree(r); + return n; +} + +static long +mntread(Chan *c, void *buf, long n, vlong off) +{ + uchar *p, *e; + int nc, cache, isdir; + usize dirlen; + + isdir = 0; + cache = c->flag & CCACHE; + if(c->qid.type & QTDIR) { + cache = 0; + isdir = 1; + } + + p = buf; + if(cache) { + nc = mfcread(c, buf, n, off); + if(nc > 0) { + n -= nc; + if(n == 0) + return nc; + p += nc; + off += nc; + } + n = mntrdwr(Tread, c, p, n, off); + mfcupdate(c, p, n, off); + return n + nc; + } + + n = mntrdwr(Tread, c, buf, n, off); + if(isdir) { + for(e = &p[n]; p+BIT16SZ < e; p += dirlen){ + dirlen = BIT16SZ+GBIT16(p); + if(p+dirlen > e) + break; + validstat(p, dirlen); + mntdirfix(p, c); + } + if(p != e) + error(Esbadstat); + } + return n; +} + +static long +mntwrite(Chan *c, void *buf, long n, vlong off) +{ + return mntrdwr(Twrite, c, buf, n, off); +} + +long +mntrdwr(int type, Chan *c, void *buf, long n, vlong off) +{ + Mnt *mnt; + Mntrpc *r; + char *uba; + int cache; + ulong cnt, nr, nreq; + + mnt = mntchk(c); + uba = buf; + cnt = 0; + cache = c->flag & CCACHE; + if(c->qid.type & QTDIR) + cache = 0; + for(;;) { + r = mntralloc(c, mnt->msize); + if(waserror()) { + mntfree(r); + nexterror(); + } + r->request.type = type; + r->request.fid = c->fid; + r->request.offset = off; + r->request.data = uba; + nr = n; + if(nr > mnt->msize-IOHDRSZ) + nr = mnt->msize-IOHDRSZ; + r->request.count = nr; + mountrpc(mnt, r); + nreq = r->request.count; + nr = r->reply.count; + if(nr > nreq) + nr = nreq; + + if(type == Tread) + r->b = bl2mem((uchar*)uba, r->b, nr); + else if(cache) + mfcwrite(c, (uchar*)uba, nr, off); + + poperror(); + mntfree(r); + off += nr; + uba += nr; + cnt += nr; + n -= nr; + if(nr != nreq || n == 0 || up->nnote) + break; + } + return cnt; +} + +void +mountrpc(Mnt *mnt, Mntrpc *r) +{ + char *sn, *cn; + int t; + + r->reply.tag = 0; + r->reply.type = Tmax; /* can't ever be a valid message type */ + + mountio(mnt, r); + + t = r->reply.type; + switch(t) { + case Rerror: + error(r->reply.ename); + case Rflush: + error(Eintr); + default: + if(t == r->request.type+1) + break; + sn = "?"; + if(mnt->c->path != nil) + sn = mnt->c->path->s; + cn = "?"; + if(r->c != nil && r->c->path != nil) + cn = r->c->path->s; + print("mnt: proc %s %d: mismatch from %s %s rep %#p tag %d fid %d T%d R%d rp %d\n", + up->text, up->pid, sn, cn, + r, r->request.tag, r->request.fid, r->request.type, + r->reply.type, r->reply.tag); + error(Emountrpc); + } +} + +void +mountio(Mnt *mnt, Mntrpc *r) +{ + int n; + + while(waserror()) { + if(mnt->rip == up) + mntgate(mnt); + if(strcmp(up->errstr, Eintr) != 0){ + mntflushfree(mnt, r); + nexterror(); + } + r = mntflushalloc(r, mnt->msize); + } + + lock(mnt); + r->m = mnt; + r->list = mnt->queue; + mnt->queue = r; + unlock(mnt); + + /* Transmit a file system rpc */ + if(mnt->msize == 0) + panic("msize"); + n = convS2M(&r->request, r->rpc, mnt->msize); + if(n < 0) + panic("bad message type in mountio"); + if(mnt->c->dev->write(mnt->c, r->rpc, n, 0) != n) + error(Emountrpc); + r->stime = fastticks(nil); + r->reqlen = n; + + /* Gate readers onto the mount point one at a time */ + for(;;) { + lock(mnt); + if(mnt->rip == 0) + break; + unlock(mnt); + sleep(&r->r, rpcattn, r); + if(r->done){ + poperror(); + mntflushfree(mnt, r); + return; + } + } + mnt->rip = up; + unlock(mnt); + while(r->done == 0) { + if(mntrpcread(mnt, r) < 0) + error(Emountrpc); + mountmux(mnt, r); + } + mntgate(mnt); + poperror(); + mntflushfree(mnt, r); +} + +static int +doread(Mnt *mnt, int len) +{ + Block *b; + + while(qlen(mnt->q) < len){ + b = mnt->c->dev->bread(mnt->c, mnt->msize, 0); + if(b == nil) + return -1; + if(blocklen(b) == 0){ + freeblist(b); + return -1; + } + qaddlist(mnt->q, b); + } + return 0; +} + +int +mntrpcread(Mnt *mnt, Mntrpc *r) +{ + int i, t, len, hlen; + Block *b, **l, *nb; + + r->reply.type = 0; + r->reply.tag = 0; + + /* read at least length, type, and tag and pullup to a single block */ + if(doread(mnt, BIT32SZ+BIT8SZ+BIT16SZ) < 0) + return -1; + nb = pullupqueue(mnt->q, BIT32SZ+BIT8SZ+BIT16SZ); + + /* read in the rest of the message, avoid ridiculous (for now) message sizes */ + len = GBIT32(nb->rp); + if(len > mnt->msize){ + qdiscard(mnt->q, qlen(mnt->q)); + return -1; + } + if(doread(mnt, len) < 0) + return -1; + + /* pullup the header (i.e. everything except data) */ + t = nb->rp[BIT32SZ]; + switch(t){ + case Rread: + hlen = BIT32SZ+BIT8SZ+BIT16SZ+BIT32SZ; + break; + default: + hlen = len; + break; + } + nb = pullupqueue(mnt->q, hlen); + + if(convM2S(nb->rp, len, &r->reply) <= 0){ + /* bad message, dump it */ + print("mntrpcread: convM2S failed\n"); + qdiscard(mnt->q, len); + return -1; + } + + /* hang the data off of the fcall struct */ + l = &r->b; + *l = nil; + do { + b = qremove(mnt->q); + if(hlen > 0){ + b->rp += hlen; + len -= hlen; + hlen = 0; + } + i = BLEN(b); + if(i <= len){ + len -= i; + *l = b; + l = &(b->next); + } else { + /* split block and put unused bit back */ + nb = allocb(i-len); + memmove(nb->wp, b->rp+len, i-len); + b->wp = b->rp+len; + nb->wp += i-len; + qputback(mnt->q, nb); + *l = b; + return 0; + } + }while(len > 0); + + return 0; +} + +void +mntgate(Mnt *mnt) +{ + Mntrpc *q; + + lock(mnt); + mnt->rip = 0; + for(q = mnt->queue; q; q = q->list) { + if(q->done == 0) + if(wakeup(&q->r)) + break; + } + unlock(mnt); +} + +void +mountmux(Mnt *mnt, Mntrpc *r) +{ + Mntrpc **l, *q; + + lock(mnt); + l = &mnt->queue; + for(q = *l; q; q = q->list) { + /* look for a reply to a message */ + if(q->request.tag == r->reply.tag) { + *l = q->list; + if(q != r) { + /* + * Completed someone else. + * Trade pointers to receive buffer. + */ + q->reply = r->reply; + q->b = r->b; + r->b = nil; + } + q->done = 1; + unlock(mnt); + if(mntstats != nil) + (*mntstats)(q->request.type, + mnt->c, q->stime, + q->reqlen + r->replen); + if(q != r) + wakeup(&q->r); + return; + } + l = &q->list; + } + unlock(mnt); + print("unexpected reply tag %ud; type %d\n", r->reply.tag, r->reply.type); +} + +/* + * Create a new flush request and chain the previous + * requests from it + */ +Mntrpc* +mntflushalloc(Mntrpc *r, ulong iounit) +{ + Mntrpc *fr; + + fr = mntralloc(0, iounit); + + fr->request.type = Tflush; + if(r->request.type == Tflush) + fr->request.oldtag = r->request.oldtag; + else + fr->request.oldtag = r->request.tag; + fr->flushed = r; + + return fr; +} + +/* + * Free a chain of flushes. Remove each unanswered + * flush and the original message from the unanswered + * request queue. Mark the original message as done + * and if it hasn't been answered set the reply to to + * Rflush. + */ +void +mntflushfree(Mnt *mnt, Mntrpc *r) +{ + Mntrpc *fr; + + while(r){ + fr = r->flushed; + if(!r->done){ + r->reply.type = Rflush; + mntqrm(mnt, r); + } + if(fr) + mntfree(r); + r = fr; + } +} + +int +alloctag(void) +{ + int i, j; + ulong v; + + for(i = 0; i < NMASK; i++){ + v = mntalloc.tagmask[i]; + if(v == ~0UL) + continue; + for(j = 0; j < 1<>TAGSHIFT] &= ~(1<<(t&TAGMASK)); +} + +Mntrpc* +mntralloc(Chan *c, ulong msize) +{ + Mntrpc *new; + + lock(&mntalloc); + new = mntalloc.rpcfree; + if(new == nil){ + new = malloc(sizeof(Mntrpc)); + if(new == nil) { + unlock(&mntalloc); + exhausted("mount rpc header"); + } + /* + * The header is split from the data buffer as + * mountmux may swap the buffer with another header. + */ + new->rpc = mallocz(msize, 0); + if(new->rpc == nil){ + free(new); + unlock(&mntalloc); + exhausted("mount rpc buffer"); + } + new->rpclen = msize; + new->request.tag = alloctag(); + } + else { + mntalloc.rpcfree = new->list; + mntalloc.nrpcfree--; + if(new->rpclen < msize){ + free(new->rpc); + new->rpc = mallocz(msize, 0); + if(new->rpc == nil){ + free(new); + mntalloc.nrpcused--; + unlock(&mntalloc); + exhausted("mount rpc buffer"); + } + new->rpclen = msize; + } + } + mntalloc.nrpcused++; + unlock(&mntalloc); + new->c = c; + new->done = 0; + new->flushed = nil; + new->b = nil; + return new; +} + +void +mntfree(Mntrpc *r) +{ + if(r->b != nil) + freeblist(r->b); + lock(&mntalloc); + if(mntalloc.nrpcfree >= 10){ + free(r->rpc); + freetag(r->request.tag); + free(r); + } + else{ + r->list = mntalloc.rpcfree; + mntalloc.rpcfree = r; + mntalloc.nrpcfree++; + } + mntalloc.nrpcused--; + unlock(&mntalloc); +} + +void +mntqrm(Mnt *mnt, Mntrpc *r) +{ + Mntrpc **l, *f; + + lock(mnt); + r->done = 1; + + l = &mnt->queue; + for(f = *l; f; f = f->list) { + if(f == r) { + *l = r->list; + break; + } + l = &f->list; + } + unlock(mnt); +} + +Mnt* +mntchk(Chan *c) +{ + Mnt *mnt; + + /* This routine is mostly vestiges of prior lives; now it's just sanity checking */ + + if(c->mchan == nil) + panic("mntchk 1: nil mchan c %s\n", chanpath(c)); + + mnt = c->mchan->mux; + + if(mnt == nil) + print("mntchk 2: nil mux c %s c->mchan %s \n", chanpath(c), chanpath(c->mchan)); + + /* + * Was it closed and reused (was error(Eshutdown); now, it cannot happen) + */ + if(mnt->id == 0 || mnt->id >= c->devno) + panic("mntchk 3: can't happen"); + + return mnt; +} + +/* + * Rewrite channel type and dev for in-flight data to + * reflect local values. These entries are known to be + * the first two in the Dir encoding after the count. + */ +void +mntdirfix(uchar *dirbuf, Chan *c) +{ + uint r; + + r = c->dev->dc; + dirbuf += BIT16SZ; /* skip count */ + PBIT16(dirbuf, r); + dirbuf += BIT16SZ; + PBIT32(dirbuf, c->devno); +} + +int +rpcattn(void *v) +{ + Mntrpc *r; + + r = v; + return r->done || r->m->rip == 0; +} + +Dev mntdevtab = { + 'M', + "mnt", + + mntreset, + devinit, + devshutdown, + mntattach, + mntwalk, + mntstat, + mntopen, + mntcreate, + mntclose, + mntread, + devbread, + mntwrite, + devbwrite, + mntremove, + mntwstat, +}; diff -Nru 0/sys/src/nix/port/devpci.c 4/sys/src/nix/port/devpci.c --- 0/sys/src/nix/port/devpci.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devpci.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,256 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" + +enum { + Qtopdir = 0, + + Qpcidir, + Qpcictl, + Qpciraw, +}; + +#define TYPE(q) ((ulong)(q).path & 0x0F) +#define QID(c, t) (((c)<<4)|(t)) + +static Dirtab topdir[] = { + ".", { Qtopdir, 0, QTDIR }, 0, 0555, + "pci", { Qpcidir, 0, QTDIR }, 0, 0555, +}; + +extern Dev pcidevtab; + +static int +pcidirgen(Chan *c, int t, int tbdf, Dir *dp) +{ + Qid q; + + q = (Qid){BUSBDF(tbdf)|t, 0, 0}; + switch(t) { + case Qpcictl: + snprint(up->genbuf, sizeof up->genbuf, "%d.%d.%dctl", + BUSBNO(tbdf), BUSDNO(tbdf), BUSFNO(tbdf)); + devdir(c, q, up->genbuf, 0, eve, 0444, dp); + return 1; + case Qpciraw: + snprint(up->genbuf, sizeof up->genbuf, "%d.%d.%draw", + BUSBNO(tbdf), BUSDNO(tbdf), BUSFNO(tbdf)); + devdir(c, q, up->genbuf, 128, eve, 0664, dp); + return 1; + } + return -1; +} + +static int +pcigen(Chan *c, char *, Dirtab*, int, int s, Dir *dp) +{ + int tbdf; + Pcidev *p; + Qid q; + + switch(TYPE(c->qid)){ + case Qtopdir: + if(s == DEVDOTDOT){ + q = (Qid){QID(0, Qtopdir), 0, QTDIR}; + snprint(up->genbuf, sizeof up->genbuf, "#%C", pcidevtab.dc); + devdir(c, q, up->genbuf, 0, eve, 0555, dp); + return 1; + } + return devgen(c, nil, topdir, nelem(topdir), s, dp); + case Qpcidir: + if(s == DEVDOTDOT){ + q = (Qid){QID(0, Qtopdir), 0, QTDIR}; + snprint(up->genbuf, sizeof up->genbuf, "#%C", pcidevtab.dc); + devdir(c, q, up->genbuf, 0, eve, 0555, dp); + return 1; + } + p = pcimatch(nil, 0, 0); + while(s >= 2 && p != nil) { + p = pcimatch(p, 0, 0); + s -= 2; + } + if(p == nil) + return -1; + return pcidirgen(c, s+Qpcictl, p->tbdf, dp); + case Qpcictl: + case Qpciraw: + tbdf = MKBUS(BusPCI, 0, 0, 0)|BUSBDF((ulong)c->qid.path); + p = pcimatchtbdf(tbdf); + if(p == nil) + return -1; + return pcidirgen(c, TYPE(c->qid), tbdf, dp); + default: + break; + } + return -1; +} + +static Chan* +pciattach(char *spec) +{ + return devattach(pcidevtab.dc, spec); +} + +Walkqid* +pciwalk(Chan* c, Chan *nc, char** name, int nname) +{ + return devwalk(c, nc, name, nname, (Dirtab *)0, 0, pcigen); +} + +static long +pcistat(Chan* c, uchar* dp, long n) +{ + return devstat(c, dp, n, (Dirtab *)0, 0L, pcigen); +} + +static Chan* +pciopen(Chan *c, int omode) +{ + c = devopen(c, omode, (Dirtab*)0, 0, pcigen); + switch(TYPE(c->qid)){ + default: + break; + } + return c; +} + +static void +pciclose(Chan*) +{ +} + +static long +pciread(Chan *c, void *va, long n, vlong offset) +{ + char buf[256], *ebuf, *w, *a; + int i, tbdf, r; + ulong x; + Pcidev *p; + + a = va; + switch(TYPE(c->qid)){ + case Qtopdir: + case Qpcidir: + return devdirread(c, a, n, (Dirtab *)0, 0L, pcigen); + case Qpcictl: + tbdf = MKBUS(BusPCI, 0, 0, 0)|BUSBDF((ulong)c->qid.path); + p = pcimatchtbdf(tbdf); + if(p == nil) + error(Egreg); + ebuf = buf+sizeof buf-1; /* -1 for newline */ + w = seprint(buf, ebuf, "%.2x.%.2x.%.2x %.4x/%.4x %3d", + p->ccrb, p->ccru, p->ccrp, p->vid, p->did, p->intl); + for(i=0; imem); i++){ + if(p->mem[i].size == 0) + continue; + w = seprint(w, ebuf, " %d:%.8lux %d", i, p->mem[i].bar, p->mem[i].size); + } + *w++ = '\n'; + *w = '\0'; + return readstr(offset, a, n, buf); + case Qpciraw: + tbdf = MKBUS(BusPCI, 0, 0, 0)|BUSBDF((ulong)c->qid.path); + p = pcimatchtbdf(tbdf); + if(p == nil) + error(Egreg); + if(n+offset > 256) + n = 256-offset; + if(n < 0) + return 0; + r = offset; + if(!(r & 3) && n == 4){ + x = pcicfgr32(p, r); + PBIT32(a, x); + return 4; + } + if(!(r & 1) && n == 2){ + x = pcicfgr16(p, r); + PBIT16(a, x); + return 2; + } + for(i = 0; i < n; i++){ + x = pcicfgr8(p, r); + PBIT8(a, x); + a++; + r++; + } + return i; + default: + error(Egreg); + } + return n; +} + +static long +pciwrite(Chan *c, void *va, long n, vlong offset) +{ + char buf[256]; + uchar *a; + int i, r, tbdf; + ulong x; + Pcidev *p; + + if(n >= sizeof(buf)) + n = sizeof(buf)-1; + a = va; + strncpy(buf, (char*)a, n); + buf[n] = 0; + + switch(TYPE(c->qid)){ + case Qpciraw: + tbdf = MKBUS(BusPCI, 0, 0, 0)|BUSBDF((ulong)c->qid.path); + p = pcimatchtbdf(tbdf); + if(p == nil) + error(Egreg); + if(offset > 256) + return 0; + if(n+offset > 256) + n = 256-offset; + r = offset; + if(!(r & 3) && n == 4){ + x = GBIT32(a); + pcicfgw32(p, r, x); + return 4; + } + if(!(r & 1) && n == 2){ + x = GBIT16(a); + pcicfgw16(p, r, x); + return 2; + } + for(i = 0; i < n; i++){ + x = GBIT8(a); + pcicfgw8(p, r, x); + a++; + r++; + } + return i; + default: + error(Egreg); + } + return n; +} + +Dev pcidevtab = { + '$', + "pci", + + devreset, + devinit, + devshutdown, + pciattach, + pciwalk, + pcistat, + pciopen, + devcreate, + pciclose, + pciread, + devbread, + pciwrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru 0/sys/src/nix/port/devpipe.c 4/sys/src/nix/port/devpipe.c --- 0/sys/src/nix/port/devpipe.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devpipe.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,394 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +typedef struct Pipe Pipe; +struct Pipe +{ + QLock; + Pipe *next; + int ref; + ulong path; + Queue *q[2]; + int qref[2]; +}; + +struct +{ + Lock; + ulong path; +} pipealloc; + +enum +{ + Qdir, + Qdata0, + Qdata1, +}; + +Dirtab pipedir[] = +{ + ".", {Qdir,0,QTDIR}, 0, DMDIR|0500, + "data", {Qdata0}, 0, 0600, + "data1", {Qdata1}, 0, 0600, +}; +#define NPIPEDIR 3 + +#define PIPETYPE(x) (((unsigned)x)&0x1f) +#define PIPEID(x) ((((unsigned)x))>>5) +#define PIPEQID(i, t) ((((unsigned)i)<<5)|(t)) + + +enum +{ + /* Plan 9 default for nmach > 1 */ + Pipeqsize = 256*1024 +}; + +static void +pipeinit(void) +{ +} + +/* + * create a pipe, no streams are created until an open + */ +static Chan* +pipeattach(char *spec) +{ + Pipe *p; + Chan *c; + + c = devattach('|', spec); + p = malloc(sizeof(Pipe)); + if(p == 0) + exhausted("memory"); + p->ref = 1; + + p->q[0] = qopen(Pipeqsize, 0, 0, 0); + if(p->q[0] == 0){ + free(p); + exhausted("memory"); + } + p->q[1] = qopen(Pipeqsize, 0, 0, 0); + if(p->q[1] == 0){ + free(p->q[0]); + free(p); + exhausted("memory"); + } + + lock(&pipealloc); + p->path = ++pipealloc.path; + unlock(&pipealloc); + + mkqid(&c->qid, PIPEQID(2*p->path, Qdir), 0, QTDIR); + c->aux = p; + c->devno = 0; + return c; +} + +static int +pipegen(Chan *c, char*, Dirtab *tab, int ntab, int i, Dir *dp) +{ + Qid q; + int len; + Pipe *p; + + if(i == DEVDOTDOT){ + devdir(c, c->qid, "#|", 0, eve, DMDIR|0555, dp); + return 1; + } + i++; /* skip . */ + if(tab==0 || i>=ntab) + return -1; + + tab += i; + p = c->aux; + switch((ulong)tab->qid.path){ + case Qdata0: + len = qlen(p->q[0]); + break; + case Qdata1: + len = qlen(p->q[1]); + break; + default: + len = tab->length; + break; + } + mkqid(&q, PIPEQID(PIPEID(c->qid.path), tab->qid.path), 0, QTFILE); + devdir(c, q, tab->name, len, eve, tab->perm, dp); + return 1; +} + + +static Walkqid* +pipewalk(Chan *c, Chan *nc, char **name, int nname) +{ + Walkqid *wq; + Pipe *p; + + wq = devwalk(c, nc, name, nname, pipedir, NPIPEDIR, pipegen); + if(wq != nil && wq->clone != nil && wq->clone != c){ + p = c->aux; + qlock(p); + p->ref++; + if(c->flag & COPEN){ + print("channel open in pipewalk\n"); + switch(PIPETYPE(c->qid.path)){ + case Qdata0: + p->qref[0]++; + break; + case Qdata1: + p->qref[1]++; + break; + } + } + qunlock(p); + } + return wq; +} + +static long +pipestat(Chan *c, uchar *db, long n) +{ + Pipe *p; + Dir dir; + + p = c->aux; + + switch(PIPETYPE(c->qid.path)){ + case Qdir: + devdir(c, c->qid, ".", 0, eve, DMDIR|0555, &dir); + break; + case Qdata0: + devdir(c, c->qid, "data", qlen(p->q[0]), eve, 0600, &dir); + break; + case Qdata1: + devdir(c, c->qid, "data1", qlen(p->q[1]), eve, 0600, &dir); + break; + default: + panic("pipestat"); + } + n = convD2M(&dir, db, n); + if(n < BIT16SZ) + error(Eshortstat); + return n; +} + +/* + * if the stream doesn't exist, create it + */ +static Chan* +pipeopen(Chan *c, int omode) +{ + Pipe *p; + + if(c->qid.type & QTDIR){ + if(omode != OREAD) + error(Ebadarg); + c->mode = omode; + c->flag |= COPEN; + c->offset = 0; + return c; + } + + p = c->aux; + qlock(p); + switch(PIPETYPE(c->qid.path)){ + case Qdata0: + p->qref[0]++; + break; + case Qdata1: + p->qref[1]++; + break; + } + qunlock(p); + + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + c->iounit = qiomaxatomic; + return c; +} + +static void +pipeclose(Chan *c) +{ + Pipe *p; + + p = c->aux; + qlock(p); + + if(c->flag & COPEN){ + /* + * closing either side hangs up the stream + */ + switch(PIPETYPE(c->qid.path)){ + case Qdata0: + p->qref[0]--; + if(p->qref[0] == 0){ + qhangup(p->q[1], 0); + qclose(p->q[0]); + } + break; + case Qdata1: + p->qref[1]--; + if(p->qref[1] == 0){ + qhangup(p->q[0], 0); + qclose(p->q[1]); + } + break; + } + } + + + /* + * if both sides are closed, they are reusable + */ + if(p->qref[0] == 0 && p->qref[1] == 0){ + qreopen(p->q[0]); + qreopen(p->q[1]); + } + + /* + * free the structure on last close + */ + p->ref--; + if(p->ref == 0){ + qunlock(p); + free(p->q[0]); + free(p->q[1]); + free(p); + } else + qunlock(p); +} + +static long +piperead(Chan *c, void *va, long n, vlong) +{ + Pipe *p; + + p = c->aux; + + switch(PIPETYPE(c->qid.path)){ + case Qdir: + return devdirread(c, va, n, pipedir, NPIPEDIR, pipegen); + case Qdata0: + return qread(p->q[0], va, n); + case Qdata1: + return qread(p->q[1], va, n); + default: + panic("piperead"); + } + return -1; /* not reached */ +} + +static Block* +pipebread(Chan *c, long n, vlong offset) +{ + Pipe *p; + + p = c->aux; + + switch(PIPETYPE(c->qid.path)){ + case Qdata0: + return qbread(p->q[0], n); + case Qdata1: + return qbread(p->q[1], n); + } + + return devbread(c, n, offset); +} + +/* + * a write to a closed pipe causes a note to be sent to + * the process. + */ +static long +pipewrite(Chan *c, void *va, long n, vlong) +{ + Pipe *p; + + if(!islo()) + print("pipewrite hi %#p\n", getcallerpc(&c)); + if(waserror()) { + /* avoid notes when pipe is a mounted queue */ + if((c->flag & CMSG) == 0) + postnote(up, 1, "sys: write on closed pipe", NUser); + nexterror(); + } + + p = c->aux; + + switch(PIPETYPE(c->qid.path)){ + case Qdata0: + n = qwrite(p->q[1], va, n); + break; + + case Qdata1: + n = qwrite(p->q[0], va, n); + break; + + default: + panic("pipewrite"); + } + + poperror(); + return n; +} + +static long +pipebwrite(Chan *c, Block *bp, vlong) +{ + long n; + Pipe *p; + + if(waserror()) { + /* avoid notes when pipe is a mounted queue */ + if((c->flag & CMSG) == 0) + postnote(up, 1, "sys: write on closed pipe", NUser); + nexterror(); + } + + p = c->aux; + switch(PIPETYPE(c->qid.path)){ + case Qdata0: + n = qbwrite(p->q[1], bp); + break; + + case Qdata1: + n = qbwrite(p->q[0], bp); + break; + + default: + n = 0; + panic("pipebwrite"); + } + + poperror(); + return n; +} + +Dev pipedevtab = { + '|', + "pipe", + + devreset, + pipeinit, + devshutdown, + pipeattach, + pipewalk, + pipestat, + pipeopen, + devcreate, + pipeclose, + piperead, + pipebread, + pipewrite, + pipebwrite, + devremove, + devwstat, +}; diff -Nru 0/sys/src/nix/port/devpmc.c 4/sys/src/nix/port/devpmc.c --- 0/sys/src/nix/port/devpmc.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devpmc.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,441 @@ +/* + * Performance counters + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "amd64.h" +#include "pmc.h" + +enum{ + Qdir = 0, + Qgctl, + Qcore, + + Qctr, + Qdata, + Qctl, + + PmcCtlRdStr = 4*1024, +}; + +#define PMCTYPE(x) (((unsigned)x)&0xffful) +#define PMCID(x) (((unsigned)x)>>12) +#define PMCQID(i, t) ((((unsigned)i)<<12)|(t)) + +Dirtab *pmctab; +static int npmctab; +Dirtab *toptab; +static int ntoptab; +int pmcdebug; + +static void +topdirinit(int ncores) +{ + int i; + Dirtab *d; + + ntoptab = 2 + ncores; + toptab = malloc(ntoptab * sizeof(Dirtab)); + if (toptab == nil) + return; + d = toptab; + strncpy(d->name, ".", KNAMELEN); + mkqid(&d->qid, Qdir, 0, QTDIR); + d->perm = DMDIR|0555; + d++; + strncpy(d->name, "ctrdesc", KNAMELEN); + mkqid(&d->qid, Qgctl, 0, 0); + d->perm = 0444; + for (i = 2; i < ncores + 2; i++) { + d = &toptab[i]; + snprint(d->name, KNAMELEN, "core%4.4ud", i - 2); + mkqid(&d->qid, PMCQID(i - 2, Qcore), 0, QTDIR); + d->perm = DMDIR|0555; + } + +} + +static void +ctrdirinit(void) +{ + int nr, i; + Dirtab *d; + + nr = pmcnregs(); + + npmctab = 1 + 2*nr; + pmctab = malloc(npmctab * sizeof(Dirtab)); + if (pmctab == nil){ + free(toptab); + toptab = nil; + return; + } + + d = pmctab; + strncpy(d->name, ".", KNAMELEN); + mkqid(&d->qid, Qctr, 0, QTDIR); + d->perm = DMDIR|0555; + for (i = 1; i < nr + 1; i++) { + d = &pmctab[i]; + snprint(d->name, KNAMELEN, "ctr%2.2ud", i - 1); + mkqid(&d->qid, PMCQID(i - 1, Qdata), 0, 0); + d->perm = 0600; + + d = &pmctab[nr + i]; + snprint(d->name, KNAMELEN, "ctr%2.2udctl", i - 1); + mkqid(&d->qid, PMCQID(i - 1, Qctl), 0, 0); + d->perm = 0600; + } + +} + +static void +pmcnull(PmcCtl *p) +{ + memset(p, 0xff, sizeof(PmcCtl)); + p->enab = PmcCtlNullval; + p->user = PmcCtlNullval; + p->os = PmcCtlNullval; + p->reset = PmcCtlNullval; + p->nodesc = 1; +} + +static void +pmcinit(void) +{ + int i, j, ncores, nr; + Mach *mp; + + _pmcupdate = pmcupdate; + ncores = 0; + nr = pmcnregs(); + for(i = 0; i < MACHMAX; i++) + if((mp = sys->machptr[i]) != nil && mp->nixrole != NIXUC){ + ncores++; + for(j = 0; j < nr; j++) + pmcnull(&mp->pmc[j]); + } + topdirinit(ncores); + ctrdirinit(); +} + +static Chan * +pmcattach(char *spec) +{ + if (pmctab == nil) + error(Enomem); + return devattach(L'ε', spec); +} +int +pmcgen(Chan *c, char *name, Dirtab*, int, int s, Dir *dp) +{ + int t, i, n; + Dirtab *l, *d; + + if(s == DEVDOTDOT){ + devdir(c, (Qid){Qdir, 0, QTDIR}, "#ε", 0, eve, 0555, dp); + c->aux = nil; + return 1; + } + /* first, for directories, generate children */ + switch((int)PMCTYPE(c->qid.path)){ + case Qdir: + return devgen(c, name, toptab, ntoptab, s, dp); + case Qctr: + return devgen(c, name, pmctab, npmctab, s, dp); + case Qcore: + c->aux = (void *)PMCID(c->qid.path); /* core no */ + return devgen(c, name, pmctab, npmctab, s, dp); + default: + if(s != 0) + return -1; + + t = PMCTYPE(c->qid.path); + if(t < Qctr){ + i = t; + l = toptab; + n = ntoptab; + }else{ + i = PMCID(t); + if (t == Qctl) + i += (npmctab - 1)/2; + l = pmctab; + n = npmctab; + } + if(i >=n) + return -1; + + d = &l[i]; + + devdir(c, d->qid, d->name, d->length, eve, d->perm, dp); + return 1; + } +} + +static Walkqid* +pmcwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, nil, 0, pmcgen); +} + +static long +pmcstat(Chan *c, uchar *dp, long n) +{ + return devstat(c, dp, n, nil, 0, pmcgen); +} + +static Chan* +pmcopen(Chan *c, int omode) +{ + if (!iseve()) + error(Eperm); + return devopen(c, omode, nil, 0, pmcgen); +} + +static void +pmcclose(Chan *) +{ +} + + + +static long +pmcread(Chan *c, void *a, long n, vlong offset) +{ + ulong type, id; + PmcCtl p; + char *s; + u64int v; + u64int coreno; + + type = PMCTYPE(c->qid.path); + id = PMCID(c->qid.path); + + switch(type){ + case Qcore: + case Qdir: + case Qctr: + return devdirread(c, a, n, nil, 0, pmcgen); + } + + s = malloc(PmcCtlRdStr); + if(waserror()){ + free(s); + nexterror(); + } + coreno = (u64int)c->aux; + p.coreno = coreno; + switch(type){ + case Qdata: + v = pmcgetctr(coreno, id); + snprint(s, PmcCtlRdStr, "%#ullx", v); + break; + case Qctl: + if (pmcgetctl(coreno, &p, id) < 0) + error("bad ctr"); + if (pmcctlstr(s, PmcCtlRdStr, &p) < 0) + error("bad pmc"); + break; + case Qgctl: + if (pmcdescstr(s, PmcCtlRdStr) < 0) + error("bad pmc"); + break; + default: + error(Eperm); + } + n = readstr(offset, a, n, s); + free(s); + poperror(); + return n; +} + +enum{ + Enable, + Disable, + User, + Os, + NoUser, + NoOs, + Reset, + Debug, +}; + +static Cmdtab pmcctlmsg[] = +{ + Enable, "enable", 0, + Disable, "disable", 0, + User, "user", 0, + Os, "os", 0, + NoUser, "nouser", 0, + NoOs, "noos", 0, + Reset, "reset", 0, + Debug, "debug", 0, +}; + +typedef void (*APfunc)(void); + +typedef struct AcPmcArg AcPmcArg; +struct AcPmcArg { + int regno; + int coreno; + PmcCtl; +}; + +typedef struct AcCtrArg AcCtrArg; +struct AcCtrArg { + int regno; + int coreno; + u64int v; +}; + +void +acpmcsetctl(void) +{ + AcPmcArg p; + Mach *mp; + + mp = up->ac; + memmove(&p, mp->icc->data, sizeof(AcPmcArg)); + + mp->icc->rc = pmcsetctl(p.coreno, &p, p.regno); + return; +} + +void +acpmcsetctr(void) +{ + AcCtrArg ctr; + Mach *mp; + + mp = up->ac; + memmove(&ctr, mp->icc->data, sizeof(AcCtrArg)); + + mp->icc->rc = pmcsetctr(ctr.coreno, ctr.v, ctr.regno); + return; +} + + +static long +pmcwrite(Chan *c, void *a, long n, vlong) +{ + Cmdbuf *cb; + Cmdtab *ct; + ulong type; + char str[64]; /* 0x0000000000000000\0 */ + AcPmcArg p; + AcCtrArg ctr; + u64int coreno; + Mach *mp; + + if (c->qid.type == QTDIR) + error(Eperm); + if (c->qid.path == Qgctl) + error(Eperm); + if (n >= sizeof(str)) + error(Ebadctl); + + pmcnull(&p); + coreno = (u64int)c->aux; + p.coreno = coreno; + type = PMCTYPE(c->qid.path); + p.regno = PMCID(c->qid.path); + memmove(str, a, n); + str[n] = '\0'; + mp = up->ac; + + ctr.coreno = coreno; + ctr.regno = p.regno; + if (type == Qdata) { + /* I am a handler for a proc in the core, run an RPC*/ + if (mp != nil && mp->machno == coreno) { + if (runac(mp, acpmcsetctr, 0, &ctr, sizeof(AcCtrArg)) < 0) + n = -1; + } else { + if (pmcsetctr(coreno, strtoull(str, 0, 0), p.regno) < 0) + n = -1; + } + return n; + } + + + /* TODO: should iterate through multiple lines */ + if (strncmp(str, "set ", 4) == 0){ + memmove(p.descstr, (char *)str + 4, n - 4); + p.descstr[n - 4] = '\0'; + p.nodesc = 0; + } else { + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + ct = lookupcmd(cb, pmcctlmsg, nelem(pmcctlmsg)); + switch(ct->index){ + case Enable: + p.enab = 1; + break; + case Disable: + p.enab = 0; + break; + case User: + p.user = 1; + break; + case Os: + p.os = 1; + break; + case NoUser: + p.user = 0; + break; + case NoOs: + p.os = 0; + break; + case Reset: + p.reset = 1; + break; + case Debug: + pmcdebug = ~pmcdebug; + break; + default: + cmderror(cb, "invalid ctl"); + break; + } + free(cb); + poperror(); + } + /* I am a handler for a proc in the core, run an RPC*/ + if (mp != nil && mp->machno == coreno) { + if (runac(mp, acpmcsetctl, 0, &p, sizeof(AcPmcArg)) < 0) + n = -1; + } else { + if (pmcsetctl(coreno, &p, p.regno) < 0) + n = -1; + } + return n; +} + + +Dev pmcdevtab = { + L'ε', + "pmc", + + pmcinit, + devinit, + devshutdown, + pmcattach, + pmcwalk, + pmcstat, + pmcopen, + devcreate, + pmcclose, + pmcread, + devbread, + pmcwrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru 0/sys/src/nix/port/devprobe.c 4/sys/src/nix/port/devprobe.c --- 0/sys/src/nix/port/devprobe.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devprobe.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,400 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "probe.h" + +enum { + Qdir, + Qctl, + Qdata, +}; + +enum { + ProbeEntry = 1, + ProbeExit +}; + +/* fix me make this programmable */ +enum { + defaultlogsize = 1024, + printsize = 64, +}; + +typedef struct Probelog Probelog; +struct Probelog { + uvlong ticks; + /* yeah, waste a whole int on something stupid but ... */ + int info; + ulong pc; + /* these are different depending on type */ + long dat[4]; +}; + +static Rendez probesleep; +static QLock probeslk; +static Probe *probes; +static Lock loglk; +static Probelog *probelog = nil; +/* probe indices. These are just unsigned longs. You mask them + * to get an index. This makes fifo empty/full etc. trivial. + */ +static ulong pw = 0, pr = 0; +static int probesactive = 0; +static unsigned long logsize = defaultlogsize, logmask = defaultlogsize - 1; + +static char eventname[] = { + [ProbeEntry] = 'E', + [ProbeExit] = 'X' +}; + +static Dirtab probedir[]={ + ".", {Qdir, 0, QTDIR}, 0, DMDIR|0555, + "probectl", {Qctl}, 0, 0664, + "probe", {Qdata}, 0, 0440, +}; + +char hex[] = { + '0', + '1', + '2', + '3', + '4', + '5', + '6', + '7', + '8', + '9', + 'A', + 'B', + 'C', + 'D', + 'E', + 'F', +}; + +/* big-endian ... */ +void +hex32(ulong l, char *c) +{ + int i; + for(i = 8; i; i--){ + c[i-1] = hex[l&0xf]; + l >>= 4; + } +} + +void +hex64(uvlong l, char *c) +{ + hex32(l>>32, c); + hex32(l, &c[8]); +} +static int +lognonempty(void *) +{ + return pw - pr; +} + +static int +logfull(void) +{ + return (pw - pr) >= logsize; +} + +static ulong +idx(ulong f) +{ + return f & logmask; +} + +/* can return NULL, meaning, no record for you */ +static struct Probelog * +newpl(void) +{ + ulong index; + + if (logfull()){ + wakeup(&probesleep); + return nil; + } + + ilock(&loglk); + index = pw++; + iunlock(&loglk); + + return &probelog[idx(index)]; + +} + +static void +probeentry(Probe *p) +{ + struct Probelog *pl; +//print("probeentry %p p %p func %p argp %p\n", &p, p, p->func, p->argp); + pl = newpl(); + if (! pl) + return; + cycles(&pl->ticks); + pl->pc = (ulong)p->func; + pl->dat[0] = p->argp[0]; + pl->dat[1] = p->argp[1]; + pl->dat[2] = p->argp[2]; + pl->dat[3] = p->argp[3]; + pl->info = ProbeEntry; +} + +static void +probeexit(Probe *p) +{ +//print("probeexit %p p %p func %p argp %p\n", &p, p, p->func, p->argp); + struct Probelog *pl; + pl = newpl(); + if (! pl) + return; + cycles(&pl->ticks); + pl->pc = (ulong)p->func; + pl->dat[0] = p->rval; + pl->info = ProbeExit; +} + +static Chan* +probeattach(char *spec) +{ + return devattach('+', spec); +} + +static Walkqid* +probewalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, probedir, nelem(probedir), devgen); +} + +static long +probestat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, probedir, nelem(probedir), devgen); +} + +static Chan* +probeopen(Chan *c, int omode) +{ + /* if there is no probelog, allocate one. Open always fails + * if the basic alloc fails. You can resize it later. + */ + if (! probelog) + probelog = malloc(sizeof(*probelog)*logsize); + /* I guess malloc doesn't toss an error */ + if (! probelog) + error("probelog malloc failed"); + + c = devopen(c, omode, probedir, nelem(probedir), devgen); + return c; +} + +static void +probeclose(Chan *) +{ +} + +static long +proberead(Chan *c, void *a, long n, vlong offset) +{ + char *buf; + char *cp = a; + struct Probelog *pl; + Probe *p; + int i; + static QLock gate; + if(c->qid.type == QTDIR) + return devdirread(c, a, n, probedir, nelem(probedir), devgen); + switch((ulong)c->qid.path){ + default: + error("proberead: bad qid"); + case Qctl: + buf = malloc(READSTR); + i = 0; + qlock(&probeslk); + i += snprint(buf + i, READSTR - i, "logsize %lud\n", logsize); + for(p = probes; p != nil; p = p->next) + i += snprint(buf + i, READSTR - i, "probe %p new %s\n", + p->func, p->name); + + for(p = probes; p != nil; p = p->next) + if (p->enabled) + i += snprint(buf + i, READSTR - i, "probe %s on\n", + p->name); + i += snprint(buf + i, READSTR - i, "#probehits %lud, in queue %lud\n", + pw, pw-pr); + snprint(buf + i, READSTR - i, "#probelog %p\n", probelog); + qunlock(&probeslk); + n = readstr(offset, a, n, buf); + free(buf); + break; + case Qdata: + qlock(&gate); + if(waserror()){ + qunlock(&gate); + nexterror(); + } + while(!lognonempty(nil)) + tsleep(&probesleep, lognonempty, nil, 5000); + i = 0; + while(lognonempty((void *)0)){ + int j; + pl = probelog + idx(pr); + + if ((i + printsize) >= n) + break; + /* simple format */ + cp[0] = eventname[pl->info]; + cp ++; + *cp++ = ' '; + hex32(pl->pc, cp); + cp[8] = ' '; + cp += 9; + hex64(pl->ticks, cp); + cp[16] = ' '; + cp += 17; + for(j = 0; j < 4; j++){ + hex32(pl->dat[j], cp); + cp[8] = ' '; + cp += 9; + } + /* adjust for extra skip above */ + cp--; + *cp++ = '\n'; + pr++; + i += printsize; + } + poperror(); + qunlock(&gate); + n = i; + break; + } + return n; +} + +static long +probewrite(Chan *c, void *a, long n, vlong) +{ + char *tok[5]; + char *ep, *s = nil; + Probe *p, **pp; + int ntok; + + qlock(&probeslk); + if(waserror()){ + qunlock(&probeslk); + if(s != nil) free(s); + nexterror(); + } + switch((ulong)c->qid.path){ + default: + error("proberead: bad qid"); + case Qctl: + s = malloc(n + 1); + memmove(s, a, n); + s[n] = 0; + ntok = tokenize(s, tok, nelem(tok)); + if(!strcmp(tok[0], "probe")){ /* 'probe' ktextaddr 'on'|'off'|'mk'|'del' [name] */ + if(ntok < 3) + error("devprobe: usage: 'probe' [ktextaddr|name] 'on'|'off'|'mk'|'del' [name]"); + for(pp = &probes; *pp != nil; pp = &(*pp)->next) + if(!strcmp(tok[1], (*pp)->name)) + break; + p = *pp; + if(!strcmp(tok[2], "new")){ + ulong addr; + void *func; + addr = strtoul(tok[1], &ep, 0); + func = (void*)addr; + if(*ep) + error("devprobe: address not in recognized format"); + // if(addr < ((ulong) start) || addr > ((ulong) end)) + // error("devprobe: address out of bounds"); + if(p != nil) + error("devprobe: 0x%p already has probe"); + p = mkprobe(func, probeentry, probeexit); + p->next = probes; + if(ntok < 4) + snprint(p->name, sizeof p->name, "%p", func); + else + strncpy(p->name, tok[3], sizeof p->name); + probes = p; + } else if(!strcmp(tok[2], "on")){ + if(p == nil) + error("devprobe: probe not found"); + if(!p->enabled) + probeinstall(p); +print("probeinstall in devprobe\n"); + probesactive++; + } else if(!strcmp(tok[2], "off")){ + if(p == nil) + error("devprobe: probe not found"); + if(p->enabled) + probeuninstall(p); + probesactive--; + } else if(!strcmp(tok[2], "del")){ + if(p == nil) + error("devprobe: probe not found"); + if(p->enabled) + probeuninstall(p); + probesactive--; + *pp = p->next; + freeprobe(p); + } else if(!strcmp(tok[2], "mv")){ + if(p == nil) + error("devprobe: probe not found"); + if(ntok < 4) + error("devprobe: rename without new name?"); + strncpy(p->name, tok[3], sizeof p->name); + } + } else if(!strcmp(tok[0], "size")){ + int l, size; + struct Probelog *newprobelog; + l = strtoul(tok[1], &ep, 0); + if(*ep) + error("devprobe: size not in recognized format"); + size = 1 << l; + /* sort of foolish. Alloc new probe first, then free old. */ + /* and too bad if there are unread probes */ + newprobelog = malloc(sizeof(*newprobelog)*size); + /* does malloc throw waserror? I don't know */ + free(probelog); + probelog = newprobelog; + logsize = size; + pr = pw = 0; + } else { + error("devprobe: usage: 'probe' [ktextaddr|name] 'on'|'off'|'mk'|'del' [name] or: 'size' buffersize (power of 2)"); + } + free(s); + break; + } + poperror(); + qunlock(&probeslk); + return n; +} + +Dev probedevtab = { + '+', + "probe", + devreset, + devinit, + devshutdown, + probeattach, + probewalk, + probestat, + probeopen, + devcreate, + probeclose, + proberead, + devbread, + probewrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru 0/sys/src/nix/port/devproc.c 4/sys/src/nix/port/devproc.c --- 0/sys/src/nix/port/devproc.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devproc.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1797 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/edf.h" +#include "tos.h" +#include +#include "ureg.h" + +enum +{ + Qdir, + Qtrace, + Qtracepids, + Qargs, + Qctl, + Qfd, + Qfpregs, + Qkregs, + Qmem, + Qnote, + Qnoteid, + Qnotepg, + Qns, + Qproc, + Qregs, + Qsegment, + Qstatus, + Qtext, + Qwait, + Qprofile, + Qsyscall, + Qcore, +}; + +enum +{ + CMclose, + CMclosefiles, + CMfixedpri, + CMhang, + CMkill, + CMnohang, + CMnoswap, + CMpri, + CMprivate, + CMprofile, + CMstart, + CMstartstop, + CMstartsyscall, + CMstop, + CMwaitstop, + CMwired, + CMtrace, + /* real time */ + CMperiod, + CMdeadline, + CMcost, + CMsporadic, + CMdeadlinenotes, + CMadmit, + CMextra, + CMexpel, + CMevent, + CMcore, +}; + +enum{ + Nevents = 0x4000, + Emask = Nevents - 1, + Ntracedpids = 1024, +}; + +/* + 6 * 12 for extra NIX counters. */ +#define STATSIZE (2*KNAMELEN+12+9*12 +6*12) + +/* + * Status, fd, and ns are left fully readable (0444) because of their use in debugging, + * particularly on shared servers. + * Arguably, ns and fd shouldn't be readable; if you'd prefer, change them to 0000 + */ +Dirtab procdir[] = +{ + "args", {Qargs}, 0, 0660, + "ctl", {Qctl}, 0, 0000, + "fd", {Qfd}, 0, 0444, + "fpregs", {Qfpregs}, 0, 0000, + "kregs", {Qkregs}, sizeof(Ureg), 0600, + "mem", {Qmem}, 0, 0000, + "note", {Qnote}, 0, 0000, + "noteid", {Qnoteid}, 0, 0664, + "notepg", {Qnotepg}, 0, 0000, + "ns", {Qns}, 0, 0444, + "proc", {Qproc}, 0, 0400, + "regs", {Qregs}, sizeof(Ureg), 0000, + "segment", {Qsegment}, 0, 0444, + "status", {Qstatus}, STATSIZE, 0444, + "text", {Qtext}, 0, 0000, + "wait", {Qwait}, 0, 0400, + "profile", {Qprofile}, 0, 0400, + "syscall", {Qsyscall}, 0, 0400, + "core", {Qcore}, 0, 0444, +}; + +static +Cmdtab proccmd[] = { + CMclose, "close", 2, + CMclosefiles, "closefiles", 1, + CMfixedpri, "fixedpri", 2, + CMhang, "hang", 1, + CMnohang, "nohang", 1, + CMnoswap, "noswap", 1, + CMkill, "kill", 1, + CMpri, "pri", 2, + CMprivate, "private", 1, + CMprofile, "profile", 1, + CMstart, "start", 1, + CMstartstop, "startstop", 1, + CMstartsyscall, "startsyscall", 1, + CMstop, "stop", 1, + CMwaitstop, "waitstop", 1, + CMwired, "wired", 2, + CMtrace, "trace", 0, + CMperiod, "period", 2, + CMdeadline, "deadline", 2, + CMcost, "cost", 2, + CMsporadic, "sporadic", 1, + CMdeadlinenotes, "deadlinenotes", 1, + CMadmit, "admit", 1, + CMextra, "extra", 1, + CMexpel, "expel", 1, + CMevent, "event", 1, + CMcore, "core", 2, +}; + +/* Segment type from portdat.h */ +static char *sname[]={ "Text", "Data", "Bss", "Stack", "Shared", "Phys", }; + +/* + * Qids are, in path: + * 4 bits of file type (qids above) + * 23 bits of process slot number + 1 + * in vers, + * 32 bits of pid, for consistency checking + * If notepg, c->pgrpid.path is pgrp slot, .vers is noteid. + */ +#define QSHIFT 5 /* location in qid of proc slot # */ +#define SLOTBITS 23 /* number of bits in the slot */ +#define QIDMASK ((1<>0) +#define SLOT(q) (((((ulong)(q).path)&SLOTMASK)>>QSHIFT)-1) +#define PID(q) ((q).vers) +#define NOTEID(q) ((q).vers) + +static void procctlreq(Proc*, char*, int); +static int procctlmemio(Proc*, uintptr, int, void*, int); +static Chan* proctext(Chan*, Proc*); +static Segment* txt2data(Proc*, Segment*); +static int procstopped(void*); +static void mntscan(Mntwalk*, Proc*); + +static Traceevent *tevents; +static char *tpids, *tpidsc, *tpidse; +static Lock tlock; +static int topens; +static int tproduced, tconsumed; +static void notrace(Proc*, int, vlong); + +void (*proctrace)(Proc*, int, vlong) = notrace; + +static void +profclock(Ureg *ur, Timer *) +{ + Tos *tos; + + if(up == nil || up->state != Running) + return; + + /* user profiling clock */ + if(userureg(ur)){ + tos = (Tos*)(USTKTOP-sizeof(Tos)); + tos->clock += TK2MS(1); + segclock(userpc(ur)); + } +} + +static int +procgen(Chan *c, char *name, Dirtab *tab, int, int s, Dir *dp) +{ + Qid qid; + Proc *p; + char *ename; + Segment *q; + int pid; + ulong path, perm, len; + + if(s == DEVDOTDOT){ + mkqid(&qid, Qdir, 0, QTDIR); + devdir(c, qid, "#p", 0, eve, 0555, dp); + return 1; + } + + if(c->qid.path == Qdir){ + if(s == 0){ + strcpy(up->genbuf, "trace"); + mkqid(&qid, Qtrace, -1, QTFILE); + devdir(c, qid, up->genbuf, 0, eve, 0444, dp); + return 1; + } + if(s == 1){ + strcpy(up->genbuf, "tracepids"); + mkqid(&qid, Qtracepids, -1, QTFILE); + devdir(c, qid, up->genbuf, 0, eve, 0444, dp); + return 1; + } + s -= 2; + if(name != nil){ + /* ignore s and use name to find pid */ + pid = strtol(name, &ename, 10); + if(pid<=0 || ename[0]!='\0') + return -1; + s = psindex(pid); + if(s < 0) + return -1; + } + else if(s >= conf.nproc) + return -1; + + if((p = psincref(s)) == nil || (pid = p->pid) == 0) + return 0; + snprint(up->genbuf, sizeof up->genbuf, "%ud", pid); + /* + * String comparison is done in devwalk so + * name must match its formatted pid. + */ + if(name != nil && strcmp(name, up->genbuf) != 0) + return -1; + mkqid(&qid, (s+1)<genbuf, 0, p->user, DMDIR|0555, dp); + psdecref(p); + return 1; + } + if(c->qid.path == Qtrace){ + strcpy(up->genbuf, "trace"); + mkqid(&qid, Qtrace, -1, QTFILE); + devdir(c, qid, up->genbuf, 0, eve, 0444, dp); + return 1; + } + if(c->qid.path == Qtracepids){ + strcpy(up->genbuf, "tracepids"); + mkqid(&qid, Qtrace, -1, QTFILE); + devdir(c, qid, up->genbuf, 0, eve, 0444, dp); + return 1; + } + if(s >= nelem(procdir)) + return -1; + if(tab) + panic("procgen"); + + tab = &procdir[s]; + path = c->qid.path&~(((1<qid))) == nil) + return -1; + perm = tab->perm; + if(perm == 0) + perm = p->procmode; + else /* just copy read bits */ + perm |= p->procmode & 0444; + + len = tab->length; + switch(QID(c->qid)) { + case Qwait: + len = p->nwait; /* incorrect size, but >0 means there's something to read */ + break; + case Qprofile: + q = p->seg[TSEG]; + if(q && q->profile) { + len = (q->top-q->base)>>LRESPROF; + len *= sizeof(*q->profile); + } + break; + } + + mkqid(&qid, path|tab->qid.path, c->qid.vers, QTFILE); + devdir(c, qid, tab->name, len, p->user, perm, dp); + psdecref(p); + return 1; +} + +static void +notrace(Proc*, Tevent, vlong) +{ +} +static Lock tlck; + +static void +_proctrace(Proc* p, Tevent etype, vlong ts) +{ + Traceevent *te; + int tp; + + ilock(&tlck); + if (p->trace == 0 || topens == 0 || + tproduced - tconsumed >= Nevents){ + iunlock(&tlck); + return; + } + tp = tproduced++; + iunlock(&tlck); + + te = &tevents[tp&Emask]; + te->pid = p->pid; + te->etype = etype; + if (ts == 0) + te->time = todget(nil); + else + te->time = ts; + te->core = m->machno; +} + +void +proctracepid(Proc *p) +{ + if(p->trace == 1 && proctrace != notrace){ + p->trace = 2; + ilock(&tlck); + tpidsc = seprint(tpidsc, tpidse, "%d %s\n", p->pid, p->text); + iunlock(&tlck); + } +} + +static void +procinit(void) +{ + if(conf.nproc >= (SLOTMASK>>QSHIFT) - 1) + print("warning: too many procs for devproc\n"); + addclock0link((void (*)(void))profclock, 113); /* Relative prime to HZ */ +} + +static Chan* +procattach(char *spec) +{ + return devattach('p', spec); +} + +static Walkqid* +procwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, 0, 0, procgen); +} + +static long +procstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, 0, 0, procgen); +} + +/* + * none can't read or write state on other + * processes. This is to contain access of + * servers running as none should they be + * subverted by, for example, a stack attack. + */ +static void +nonone(Proc *p) +{ + if(p == up) + return; + if(strcmp(up->user, "none") != 0) + return; + if(iseve()) + return; + error(Eperm); +} + +static Chan* +procopen(Chan *c, int omode) +{ + Proc *p; + Pgrp *pg; + Chan *tc; + int pid; + + if(c->qid.type & QTDIR) + return devopen(c, omode, 0, 0, procgen); + + if(QID(c->qid) == Qtrace){ + if (omode != OREAD) + error(Eperm); + lock(&tlock); + if (waserror()){ + unlock(&tlock); + nexterror(); + } + if (topens > 0) + error("already open"); + topens++; + if (tevents == nil){ + tevents = (Traceevent*)malloc(sizeof(Traceevent) * Nevents); + if(tevents == nil) + error(Enomem); + tpids = malloc(Ntracedpids * 20); + if(tpids == nil){ + free(tpids); + tpids = nil; + error(Enomem); + } + tpidsc = tpids; + tpidse = tpids + Ntracedpids * 20; + *tpidsc = 0; + tproduced = tconsumed = 0; + } + proctrace = _proctrace; + poperror(); + unlock(&tlock); + + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + return c; + } + if(QID(c->qid) == Qtracepids){ + if (omode != OREAD) + error(Eperm); + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + return c; + } + if((p = psincref(SLOT(c->qid))) == nil) + error(Eprocdied); + qlock(&p->debug); + if(waserror()){ + qunlock(&p->debug); + psdecref(p); + nexterror(); + } + pid = PID(c->qid); + if(p->pid != pid) + error(Eprocdied); + + omode = openmode(omode); + + switch(QID(c->qid)){ + case Qtext: + if(omode != OREAD) + error(Eperm); + tc = proctext(c, p); + tc->offset = 0; + poperror(); + qunlock(&p->debug); + psdecref(p); + cclose(c); + return tc; + + case Qproc: + case Qsegment: + case Qprofile: + case Qfd: + if(omode != OREAD) + error(Eperm); + break; + + case Qnote: + if(p->privatemem) + error(Eperm); + break; + + case Qmem: + case Qctl: + if(p->privatemem) + error(Eperm); + nonone(p); + break; + + case Qargs: + case Qnoteid: + case Qstatus: + case Qwait: + case Qregs: + case Qfpregs: + case Qkregs: + case Qsyscall: + case Qcore: + nonone(p); + break; + + case Qns: + if(omode != OREAD) + error(Eperm); + c->aux = malloc(sizeof(Mntwalk)); + break; + + case Qnotepg: + nonone(p); + pg = p->pgrp; + if(pg == nil) + error(Eprocdied); + if(omode!=OWRITE || pg->pgrpid == 1) + error(Eperm); + c->pgrpid.path = pg->pgrpid+1; + c->pgrpid.vers = p->noteid; + break; + + default: + poperror(); + qunlock(&p->debug); + psdecref(p); + pprint("procopen %#llux\n", c->qid.path); + error(Egreg); + } + + /* Affix pid to qid */ + if(p->state != Dead) + c->qid.vers = p->pid; + + /* make sure the process slot didn't get reallocated while we were playing */ + coherence(); + if(p->pid != pid) + error(Eprocdied); + + tc = devopen(c, omode, 0, 0, procgen); + poperror(); + qunlock(&p->debug); + psdecref(p); + + return tc; +} + +static long +procwstat(Chan *c, uchar *db, long n) +{ + Proc *p; + Dir *d; + + if(c->qid.type & QTDIR) + error(Eperm); + + if(QID(c->qid) == Qtrace) + return devwstat(c, db, n); + + if((p = psincref(SLOT(c->qid))) == nil) + error(Eprocdied); + nonone(p); + d = nil; + qlock(&p->debug); + if(waserror()){ + qunlock(&p->debug); + psdecref(p); + free(d); + nexterror(); + } + + if(p->pid != PID(c->qid)) + error(Eprocdied); + + if(strcmp(up->user, p->user) != 0 && strcmp(up->user, eve) != 0) + error(Eperm); + + d = smalloc(sizeof(Dir)+n); + n = convM2D(db, n, &d[0], (char*)&d[1]); + if(n == 0) + error(Eshortstat); + if(!emptystr(d->uid) && strcmp(d->uid, p->user) != 0){ + if(strcmp(up->user, eve) != 0) + error(Eperm); + else + kstrdup(&p->user, d->uid); + } + if(d->mode != ~0UL) + p->procmode = d->mode&0777; + + poperror(); + qunlock(&p->debug); + psdecref(p); + free(d); + + return n; +} + + +static long +procoffset(long offset, char *va, int *np) +{ + if(offset > 0) { + offset -= *np; + if(offset < 0) { + memmove(va, va+*np+offset, -offset); + *np = -offset; + } + else + *np = 0; + } + return offset; +} + +static int +procqidwidth(Chan *c) +{ + char buf[32]; + + return sprint(buf, "%lud", c->qid.vers); +} + +int +procfdprint(Chan *c, int fd, int w, char *s, int ns) +{ + int n; + + if(w == 0) + w = procqidwidth(c); + n = snprint(s, ns, "%3d %.2s %C %4ud (%.16llux %*lud %.2ux) %5d %8lld %s\n", + fd, + &"r w rw"[(c->mode&3)<<1], + c->dev->dc, c->devno, + c->qid.path, w, c->qid.vers, c->qid.type, + c->iounit, c->offset, c->path->s); + return n; +} + +static int +procfds(Proc *p, char *va, int count, long offset) +{ + Fgrp *f; + Chan *c; + char buf[256]; + int n, i, w, ww; + char *a; + + /* print to buf to avoid holding fgrp lock while writing to user space */ + if(count > sizeof buf) + count = sizeof buf; + a = buf; + + qlock(&p->debug); + f = p->fgrp; + if(f == nil){ + qunlock(&p->debug); + return 0; + } + lock(f); + if(waserror()){ + unlock(f); + qunlock(&p->debug); + nexterror(); + } + + n = readstr(0, a, count, p->dot->path->s); + n += snprint(a+n, count-n, "\n"); + offset = procoffset(offset, a, &n); + /* compute width of qid.path */ + w = 0; + for(i = 0; i <= f->maxfd; i++) { + c = f->fd[i]; + if(c == nil) + continue; + ww = procqidwidth(c); + if(ww > w) + w = ww; + } + for(i = 0; i <= f->maxfd; i++) { + c = f->fd[i]; + if(c == nil) + continue; + n += procfdprint(c, i, w, a+n, count-n); + offset = procoffset(offset, a, &n); + } + poperror(); + unlock(f); + qunlock(&p->debug); + + /* copy result to user space, now that locks are released */ + memmove(va, buf, n); + + return n; +} + +static void +procclose(Chan * c) +{ + if(QID(c->qid) == Qtrace){ + lock(&tlock); + if(topens > 0) + topens--; + if(topens == 0) + proctrace = notrace; + unlock(&tlock); + } + if(QID(c->qid) == Qns && c->aux != 0) + free(c->aux); +} + +static void +int2flag(int flag, char *s) +{ + if(flag == 0){ + *s = '\0'; + return; + } + *s++ = '-'; + if(flag & MAFTER) + *s++ = 'a'; + if(flag & MBEFORE) + *s++ = 'b'; + if(flag & MCREATE) + *s++ = 'c'; + if(flag & MCACHE) + *s++ = 'C'; + *s = '\0'; +} + +static char* +argcpy(char *s, char *p) +{ + char *t, *tp, *te; + int n; + + n = p - s; + if(n > 128) + n = 128; + if(n <= 0){ + t = smalloc(1); + *t = 0; + return t; + } + t = smalloc(n); + tp = t; + te = t+n; + + while(tp + 1 < te){ + for(p--; p>s && p[-1] != 0; p--) + ; + tp = seprint(tp, te, "%q ", p); + if(p == s) + break; + } + if(*tp == ' ') + *tp = 0; + return t; +} + +static int +procargs(Proc *p, char *buf, int nbuf) +{ + char *s; + + if(p->setargs == 0){ + s = argcpy(p->args, p->args+p->nargs); + free(p->args); + p->nargs = strlen(s); + p->args = s; + p->setargs = 1; + } + return snprint(buf, nbuf, "%s", p->args); +} + +static int +eventsavailable(void *) +{ + return tproduced > tconsumed; +} + +static long +procread(Chan *c, void *va, long n, vlong off) +{ + Proc *p; + Mach *ac, *wired; + long l, r; + Waitq *wq; + Ureg kur; + uchar *rptr; + Confmem *cm; + Mntwalk *mw; + Segment *sg, *s; + int i, j, navail, pid, rsize; + char flag[10], *sps, *srv, statbuf[NSEG*64]; + uintptr offset, u; + int tesz; + + if(c->qid.type & QTDIR) + return devdirread(c, va, n, 0, 0, procgen); + + offset = off; + + if(QID(c->qid) == Qtrace){ + if(!eventsavailable(nil)) + return 0; + + rptr = va; + tesz = BIT32SZ + BIT32SZ + BIT64SZ + BIT32SZ; + navail = tproduced - tconsumed; + if(navail > n / tesz) + navail = n / tesz; + while(navail > 0) { + PBIT32(rptr, tevents[tconsumed & Emask].pid); + rptr += BIT32SZ; + PBIT32(rptr, tevents[tconsumed & Emask].etype); + rptr += BIT32SZ; + PBIT64(rptr, tevents[tconsumed & Emask].time); + rptr += BIT64SZ; + PBIT32(rptr, tevents[tconsumed & Emask].core); + rptr += BIT32SZ; + tconsumed++; + navail--; + } + return rptr - (uchar*)va; + } + + if(QID(c->qid) == Qtracepids) + if(tpids == nil) + return 0; + else + return readstr(off, va, n, tpids); + + if((p = psincref(SLOT(c->qid))) == nil) + error(Eprocdied); + if(p->pid != PID(c->qid)){ + psdecref(p); + error(Eprocdied); + } + + switch(QID(c->qid)){ + default: + psdecref(p); + break; + case Qargs: + qlock(&p->debug); + j = procargs(p, up->genbuf, sizeof up->genbuf); + qunlock(&p->debug); + psdecref(p); + if(offset >= j) + return 0; + if(offset+n > j) + n = j-offset; + memmove(va, &up->genbuf[offset], n); + return n; + + case Qsyscall: + if(p->syscalltrace == nil) + return 0; + return readstr(offset, va, n, p->syscalltrace); + + case Qcore: + i = 0; + ac = p->ac; + wired = p->wired; + if(ac != nil) + i = ac->machno; + else if(wired != nil) + i = wired->machno; + snprint(statbuf, sizeof statbuf, "%d\n", i); + return readstr(offset, va, n, statbuf); + + case Qmem: + if(offset < KZERO + || (offset >= USTKTOP-USTKSIZE && offset < USTKTOP)){ + r = procctlmemio(p, offset, n, va, 1); + psdecref(p); + return r; + } + + if(!iseve()){ + psdecref(p); + error(Eperm); + } + + /* validate kernel addresses */ + if(offset < PTR2UINT(end)) { + if(offset+n > PTR2UINT(end)) + n = PTR2UINT(end) - offset; + memmove(va, UINT2PTR(offset), n); + psdecref(p); + return n; + } + for(i=0; ikbase <= offset && offset <= cm->klimit-1){ + if(offset+n >= cm->klimit-1) + n = cm->klimit - offset; + memmove(va, UINT2PTR(offset), n); + psdecref(p); + return n; + } + } + psdecref(p); + error(Ebadarg); + + case Qprofile: + s = p->seg[TSEG]; + if(s == 0 || s->profile == 0) + error("profile is off"); + i = (s->top-s->base)>>LRESPROF; + i *= sizeof(*s->profile); + if(offset >= i){ + psdecref(p); + return 0; + } + if(offset+n > i) + n = i - offset; + memmove(va, ((char*)s->profile)+offset, n); + psdecref(p); + return n; + + case Qnote: + qlock(&p->debug); + if(waserror()){ + qunlock(&p->debug); + psdecref(p); + nexterror(); + } + if(p->pid != PID(c->qid)) + error(Eprocdied); + if(n < 1) /* must accept at least the '\0' */ + error(Etoosmall); + if(p->nnote == 0) + n = 0; + else { + i = strlen(p->note[0].msg) + 1; + if(i > n) + i = n; + rptr = va; + memmove(rptr, p->note[0].msg, i); + rptr[i-1] = '\0'; + p->nnote--; + memmove(p->note, p->note+1, p->nnote*sizeof(Note)); + n = i; + } + if(p->nnote == 0) + p->notepending = 0; + poperror(); + qunlock(&p->debug); + psdecref(p); + return n; + + case Qproc: + if(offset >= sizeof(Proc)){ + psdecref(p); + return 0; + } + if(offset+n > sizeof(Proc)) + n = sizeof(Proc) - offset; + memmove(va, ((char*)p)+offset, n); + psdecref(p); + return n; + + case Qregs: + rptr = (uchar*)p->dbgreg; + rsize = sizeof(Ureg); + regread: + if(rptr == 0){ + psdecref(p); + error(Enoreg); + } + if(offset >= rsize){ + psdecref(p); + return 0; + } + if(offset+n > rsize) + n = rsize - offset; + memmove(va, rptr+offset, n); + psdecref(p); + return n; + + case Qkregs: + memset(&kur, 0, sizeof(Ureg)); + setkernur(&kur, p); + rptr = (uchar*)&kur; + rsize = sizeof(Ureg); + goto regread; + + case Qfpregs: + r = fpudevprocio(p, va, n, offset, 0); + psdecref(p); + return r; + + case Qstatus: + if(offset >= STATSIZE){ + psdecref(p); + return 0; + } + if(offset+n > STATSIZE) + n = STATSIZE - offset; + + sps = p->psstate; + if(sps == 0) + sps = statename[p->state]; + memset(statbuf, ' ', sizeof statbuf); + j = 2*KNAMELEN + 12; + snprint(statbuf, j+1, "%-*.*s%-*.*s%-12.11s", + KNAMELEN, KNAMELEN-1, p->text, + KNAMELEN, KNAMELEN-1, p->user, + sps); + + for(i = 0; i < 6; i++) { + l = p->time[i]; + if(i == TReal) + l = sys->ticks - l; + l = TK2MS(l); + readnum(0, statbuf+j+NUMSIZE*i, NUMSIZE, l, NUMSIZE); + } + /* ignore stack, which is mostly non-existent */ + u = 0; + for(i=1; iseg[i]; + if(s) + u += s->top - s->base; + } + readnum(0, statbuf+j+NUMSIZE*6, NUMSIZE, u>>10u, NUMSIZE); /* wrong size */ + readnum(0, statbuf+j+NUMSIZE*7, NUMSIZE, p->basepri, NUMSIZE); + readnum(0, statbuf+j+NUMSIZE*8, NUMSIZE, p->priority, NUMSIZE); + + /* + * NIX: added # of traps, syscalls, and iccs + */ + readnum(0, statbuf+j+NUMSIZE*9, NUMSIZE, p->ntrap, NUMSIZE); + readnum(0, statbuf+j+NUMSIZE*10, NUMSIZE, p->nintr, NUMSIZE); + readnum(0, statbuf+j+NUMSIZE*11, NUMSIZE, p->nsyscall, NUMSIZE); + readnum(0, statbuf+j+NUMSIZE*12, NUMSIZE, p->nicc, NUMSIZE); + readnum(0, statbuf+j+NUMSIZE*13, NUMSIZE, p->nactrap, NUMSIZE); + readnum(0, statbuf+j+NUMSIZE*14, NUMSIZE, p->nacsyscall, NUMSIZE); + memmove(va, statbuf+offset, n); + psdecref(p); + return n; + + case Qsegment: + j = 0; + for(i = 0; i < NSEG; i++) { + sg = p->seg[i]; + if(sg == 0) + continue; + j += sprint(statbuf+j, "%-6s %c%c %p %p %4d\n", + sname[sg->type&SG_TYPE], + sg->type&SG_RONLY ? 'R' : ' ', + sg->profile ? 'P' : ' ', + sg->base, sg->top, sg->ref); + } + psdecref(p); + if(offset >= j) + return 0; + if(offset+n > j) + n = j-offset; + if(n == 0 && offset == 0) + exhausted("segments"); + memmove(va, &statbuf[offset], n); + return n; + + case Qwait: + if(!canqlock(&p->qwaitr)){ + psdecref(p); + error(Einuse); + } + + if(waserror()) { + qunlock(&p->qwaitr); + psdecref(p); + nexterror(); + } + + lock(&p->exl); + if(up == p && p->nchild == 0 && p->waitq == 0) { + unlock(&p->exl); + error(Enochild); + } + pid = p->pid; + while(p->waitq == 0) { + unlock(&p->exl); + sleep(&p->waitr, haswaitq, p); + if(p->pid != pid) + error(Eprocdied); + lock(&p->exl); + } + wq = p->waitq; + p->waitq = wq->next; + p->nwait--; + unlock(&p->exl); + + poperror(); + qunlock(&p->qwaitr); + psdecref(p); + n = snprint(va, n, "%d %lud %lud %lud %q", + wq->w.pid, + wq->w.time[TUser], wq->w.time[TSys], wq->w.time[TReal], + wq->w.msg); + free(wq); + return n; + + case Qns: + qlock(&p->debug); + if(waserror()){ + qunlock(&p->debug); + psdecref(p); + nexterror(); + } + if(p->pgrp == nil || p->pid != PID(c->qid)) + error(Eprocdied); + mw = c->aux; + if(mw->cddone){ + poperror(); + qunlock(&p->debug); + psdecref(p); + return 0; + } + mntscan(mw, p); + if(mw->mh == 0){ + mw->cddone = 1; + i = snprint(va, n, "cd %s\n", p->dot->path->s); + poperror(); + qunlock(&p->debug); + psdecref(p); + return i; + } + int2flag(mw->cm->mflag, flag); + if(strcmp(mw->cm->to->path->s, "#M") == 0){ + srv = srvname(mw->cm->to->mchan); + i = snprint(va, n, "mount %s %s %s %s\n", flag, + srv==nil? mw->cm->to->mchan->path->s : srv, + mw->mh->from->path->s, mw->cm->spec? mw->cm->spec : ""); + free(srv); + }else + i = snprint(va, n, "bind %s %s %s\n", flag, + mw->cm->to->path->s, mw->mh->from->path->s); + poperror(); + qunlock(&p->debug); + psdecref(p); + return i; + + case Qnoteid: + r = readnum(offset, va, n, p->noteid, NUMSIZE); + psdecref(p); + return r; + case Qfd: + r = procfds(p, va, n, offset); + psdecref(p); + return r; + } + error(Egreg); + return 0; /* not reached */ +} + +static void +mntscan(Mntwalk *mw, Proc *p) +{ + Pgrp *pg; + Mount *t; + Mhead *f; + int best, i, last, nxt; + + pg = p->pgrp; + rlock(&pg->ns); + + nxt = 0; + best = (int)(~0U>>1); /* largest 2's complement int */ + + last = 0; + if(mw->mh) + last = mw->cm->mountid; + + for(i = 0; i < MNTHASH; i++) { + for(f = pg->mnthash[i]; f; f = f->hash) { + for(t = f->mount; t; t = t->next) { + if(mw->mh == 0 || + (t->mountid > last && t->mountid < best)) { + mw->cm = t; + mw->mh = f; + best = mw->cm->mountid; + nxt = 1; + } + } + } + } + if(nxt == 0) + mw->mh = 0; + + runlock(&pg->ns); +} + +static long +procwrite(Chan *c, void *va, long n, vlong off) +{ + Proc *p, *t; + int i, id, l; + char *args, buf[ERRMAX]; + uintptr offset; + + if(c->qid.type & QTDIR) + error(Eisdir); + + /* Use the remembered noteid in the channel rather + * than the process pgrpid + */ + if(QID(c->qid) == Qnotepg) { + pgrpnote(NOTEID(c->pgrpid), va, n, NUser); + return n; + } + + if((p = psincref(SLOT(c->qid))) == nil) + error(Eprocdied); + + qlock(&p->debug); + if(waserror()){ + qunlock(&p->debug); + psdecref(p); + nexterror(); + } + if(p->pid != PID(c->qid)) + error(Eprocdied); + + offset = off; + + switch(QID(c->qid)){ + case Qargs: + if(n == 0) + error(Eshort); + if(n >= sizeof buf - strlen(p->text) - 1) + error(Etoobig); + l = snprint(buf, sizeof buf, "%s [%s]", p->text, (char*)va); + args = malloc(l+1); + if(args == nil) + error(Enomem); + memmove(args, buf, l); + args[l] = 0; + free(p->args); + p->nargs = l; + p->args = args; + p->setargs = 1; + break; + + case Qmem: + if(p->state != Stopped) + error(Ebadctl); + + n = procctlmemio(p, offset, n, va, 0); + break; + + case Qregs: + if(offset >= sizeof(Ureg)) + n = 0; + else if(offset+n > sizeof(Ureg)) + n = sizeof(Ureg) - offset; + if(p->dbgreg == 0) + error(Enoreg); + setregisters(p->dbgreg, (char*)(p->dbgreg)+offset, va, n); + break; + + case Qfpregs: + n = fpudevprocio(p, va, n, offset, 1); + break; + + case Qctl: + procctlreq(p, va, n); + break; + + case Qnote: + if(p->kp) + error(Eperm); + if(n >= ERRMAX-1) + error(Etoobig); + memmove(buf, va, n); + buf[n] = 0; + if(!postnote(p, 0, buf, NUser)) + error("note not posted"); + break; + case Qnoteid: + id = atoi(va); + if(id == p->pid) { + p->noteid = id; + break; + } + for(i = 0; (t = psincref(i)) != nil; i++){ + if(t->state == Dead || t->noteid != id){ + psdecref(t); + continue; + } + if(strcmp(p->user, t->user) != 0){ + psdecref(t); + error(Eperm); + } + psdecref(t); + p->noteid = id; + break; + } + if(p->noteid != id) + error(Ebadarg); + break; + default: + poperror(); + qunlock(&p->debug); + psdecref(p); + pprint("unknown qid %#llux in procwrite\n", c->qid.path); + error(Egreg); + } + poperror(); + qunlock(&p->debug); + psdecref(p); + return n; +} + +Dev procdevtab = { + 'p', + "proc", + + devreset, + procinit, + devshutdown, + procattach, + procwalk, + procstat, + procopen, + devcreate, + procclose, + procread, + devbread, + procwrite, + devbwrite, + devremove, + procwstat, +}; + +static Chan* +proctext(Chan *c, Proc *p) +{ + Chan *tc; + Image *i; + Segment *s; + + s = p->seg[TSEG]; + if(s == 0) + error(Enonexist); + if(p->state==Dead) + error(Eprocdied); + + lock(s); + i = s->image; + if(i == 0) { + unlock(s); + error(Eprocdied); + } + unlock(s); + + lock(i); + if(waserror()) { + unlock(i); + nexterror(); + } + + tc = i->c; + if(tc == 0) + error(Eprocdied); + + if(incref(tc) == 1 || (tc->flag&COPEN) == 0 || tc->mode!=OREAD) { + cclose(tc); + error(Eprocdied); + } + + if(p->pid != PID(c->qid)){ + cclose(tc); + error(Eprocdied); + } + + poperror(); + unlock(i); + + return tc; +} + +void +procstopwait(Proc *p, int ctl) +{ + int pid; + + if(p->pdbg) + error(Einuse); + if(procstopped(p) || p->state == Broken) + return; + + if(ctl != 0) + p->procctl = ctl; + p->pdbg = up; + pid = p->pid; + qunlock(&p->debug); + up->psstate = "Stopwait"; + if(waserror()) { + p->pdbg = 0; + qlock(&p->debug); + nexterror(); + } + sleep(&up->sleep, procstopped, p); + poperror(); + qlock(&p->debug); + if(p->pid != pid) + error(Eprocdied); +} + +static void +procctlcloseone(Proc *p, Fgrp *f, int fd) +{ + Chan *c; + + c = f->fd[fd]; + if(c == nil) + return; + f->fd[fd] = nil; + unlock(f); + qunlock(&p->debug); + cclose(c); + qlock(&p->debug); + lock(f); +} + +void +procctlclosefiles(Proc *p, int all, int fd) +{ + int i; + Fgrp *f; + + f = p->fgrp; + if(f == nil) + error(Eprocdied); + + lock(f); + f->ref++; + if(all) + for(i = 0; i < f->maxfd; i++) + procctlcloseone(p, f, i); + else + procctlcloseone(p, f, fd); + unlock(f); + closefgrp(f); +} + +static char * +parsetime(vlong *rt, char *s) +{ + uvlong ticks; + ulong l; + char *e, *p; + static int p10[] = {100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1}; + + if (s == nil) + return("missing value"); + ticks=strtoul(s, &e, 10); + if (*e == '.'){ + p = e+1; + l = strtoul(p, &e, 10); + if(e-p > nelem(p10)) + return "too many digits after decimal point"; + if(e-p == 0) + return "ill-formed number"; + l *= p10[e-p-1]; + }else + l = 0; + if (*e == '\0' || strcmp(e, "s") == 0){ + ticks = 1000000000 * ticks + l; + }else if (strcmp(e, "ms") == 0){ + ticks = 1000000 * ticks + l/1000; + }else if (strcmp(e, "µs") == 0 || strcmp(e, "us") == 0){ + ticks = 1000 * ticks + l/1000000; + }else if (strcmp(e, "ns") != 0) + return "unrecognized unit"; + *rt = ticks; + return nil; +} + +static void +procctlreq(Proc *p, char *va, int n) +{ + Segment *s; + int npc, pri, core; + Cmdbuf *cb; + Cmdtab *ct; + vlong time; + char *e; + + if(p->kp) /* no ctl requests to kprocs */ + error(Eperm); + + cb = parsecmd(va, n); + if(waserror()){ + free(cb); + nexterror(); + } + + ct = lookupcmd(cb, proccmd, nelem(proccmd)); + + switch(ct->index){ + case CMclose: + procctlclosefiles(p, 0, atoi(cb->f[1])); + break; + case CMclosefiles: + procctlclosefiles(p, 1, 0); + break; + case CMhang: + p->hang = 1; + break; + case CMkill: + switch(p->state) { + case Broken: + unbreak(p); + break; + case Stopped: + case Semdown: + case Semalt: + p->procctl = Proc_exitme; + postnote(p, 0, "sys: killed", NExit); + ready(p); + break; + default: + p->procctl = Proc_exitme; + postnote(p, 0, "sys: killed", NExit); + } + break; + case CMnohang: + p->hang = 0; + break; + case CMnoswap: + p->noswap = 1; + break; + case CMpri: + pri = atoi(cb->f[1]); + if(pri > PriNormal && !iseve()) + error(Eperm); + procpriority(p, pri, 0); + break; + case CMfixedpri: + pri = atoi(cb->f[1]); + if(pri > PriNormal && !iseve()) + error(Eperm); + procpriority(p, pri, 1); + break; + case CMprivate: + p->privatemem = 1; + break; + case CMprofile: + s = p->seg[TSEG]; + if(s == 0 || (s->type&SG_TYPE) != SG_TEXT) + error(Ebadctl); + if(s->profile != 0) + free(s->profile); + npc = (s->top-s->base)>>LRESPROF; + s->profile = malloc(npc*sizeof(*s->profile)); + if(s->profile == 0) + error(Enomem); + break; + case CMstart: + if(p->state != Stopped) + error(Ebadctl); + ready(p); + break; + case CMstartstop: + if(p->state != Stopped) + error(Ebadctl); + p->procctl = Proc_traceme; + ready(p); + procstopwait(p, Proc_traceme); + break; + case CMstartsyscall: + if(p->state != Stopped) + error(Ebadctl); + p->procctl = Proc_tracesyscall; + ready(p); + procstopwait(p, Proc_tracesyscall); + break; + case CMstop: + procstopwait(p, Proc_stopme); + break; + case CMwaitstop: + procstopwait(p, 0); + break; + case CMwired: + core = atoi(cb->f[1]); + procwired(p, core); + sched(); + break; + case CMtrace: + switch(cb->nf){ + case 1: + p->trace ^= 1; + break; + case 2: + p->trace = (atoi(cb->f[1]) != 0); + break; + default: + error("args"); + } + break; + /* real time */ + case CMperiod: + if(p->edf == nil) + edfinit(p); + if(e=parsetime(&time, cb->f[1])) /* time in ns */ + error(e); + edfstop(p); + p->edf->T = time/1000; /* Edf times are in µs */ + break; + case CMdeadline: + if(p->edf == nil) + edfinit(p); + if(e=parsetime(&time, cb->f[1])) + error(e); + edfstop(p); + p->edf->D = time/1000; + break; + case CMcost: + if(p->edf == nil) + edfinit(p); + if(e=parsetime(&time, cb->f[1])) + error(e); + edfstop(p); + p->edf->C = time/1000; + break; + case CMsporadic: + if(p->edf == nil) + edfinit(p); + p->edf->flags |= Sporadic; + break; + case CMdeadlinenotes: + if(p->edf == nil) + edfinit(p); + p->edf->flags |= Sendnotes; + break; + case CMadmit: + if(p->edf == 0) + error("edf params"); + if(e = edfadmit(p)) + error(e); + break; + case CMextra: + if(p->edf == nil) + edfinit(p); + p->edf->flags |= Extratime; + break; + case CMexpel: + if(p->edf) + edfstop(p); + break; + case CMevent: + if(up->trace) + proctrace(up, SUser, 0); + break; + case CMcore: + core = atoi(cb->f[1]); + if(core >= MACHMAX) + error("wrong core number"); + else if(core == 0){ + if(p->ac == nil) + error("not running in an ac"); + p->procctl = Proc_totc; + if(p != up && p->state == Exotic){ + /* see the comment in postnote */ + intrac(p); + } + }else{ + if(p->ac != nil) + error("running in an ac"); + if(core < 0) + p->ac = getac(p, -1); + else + p->ac = getac(p, core); + p->procctl = Proc_toac; + p->prepagemem = 1; + } + break; + } + poperror(); + free(cb); +} + +static int +procstopped(void *a) +{ + Proc *p = a; + return p->state == Stopped; +} + +static int +procctlmemio(Proc *p, uintptr offset, int n, void *va, int read) +{ + KMap *k; + Pte *pte; + Page *pg; + Segment *s; + uintptr soff, l; /* hmmmm */ + uchar *b; + uintmem pgsz; + + for(;;) { + s = seg(p, offset, 1); + if(s == 0) + error(Ebadarg); + + if(offset+n >= s->top) + n = s->top-offset; + + if(!read && (s->type&SG_TYPE) == SG_TEXT) + s = txt2data(p, s); + + s->steal++; + soff = offset-s->base; + if(waserror()) { + s->steal--; + nexterror(); + } + if(fixfault(s, offset, read, 0, s->color) == 0) + break; + poperror(); + s->steal--; + } + poperror(); + pte = s->map[soff/PTEMAPMEM]; + if(pte == 0) + panic("procctlmemio"); + pgsz = m->pgsz[s->pgszi]; + pg = pte->pages[(soff&(PTEMAPMEM-1))/pgsz]; + if(pagedout(pg)) + panic("procctlmemio1"); + + l = pgsz - (offset&(pgsz-1)); + if(n > l) + n = l; + + k = kmap(pg); + if(waserror()) { + s->steal--; + kunmap(k); + nexterror(); + } + b = (uchar*)VA(k); + b += offset&(pgsz-1); + if(read == 1) + memmove(va, b, n); /* This can fault */ + else + memmove(b, va, n); + poperror(); + kunmap(k); + + /* Ensure the process sees text page changes */ + if(s->flushme) + memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl)); + + s->steal--; + + if(read == 0) + p->newtlb = 1; + + return n; +} + +static Segment* +txt2data(Proc *p, Segment *s) +{ + int i; + Segment *ps; + + ps = newseg(SG_DATA, s->base, s->size); + ps->image = s->image; + incref(ps->image); + ps->fstart = s->fstart; + ps->flen = s->flen; + ps->flushme = 1; + + qlock(&p->seglock); + for(i = 0; i < NSEG; i++) + if(p->seg[i] == s) + break; + if(i == NSEG) + panic("segment gone"); + + qunlock(&s->lk); + putseg(s); + qlock(&ps->lk); + p->seg[i] = ps; + qunlock(&p->seglock); + + return ps; +} + +Segment* +data2txt(Segment *s) +{ + Segment *ps; + + ps = newseg(SG_TEXT, s->base, s->size); + ps->image = s->image; + incref(ps->image); + ps->fstart = s->fstart; + ps->flen = s->flen; + ps->flushme = 1; + + return ps; +} diff -Nru 0/sys/src/nix/port/devroot.c 4/sys/src/nix/port/devroot.c --- 0/sys/src/nix/port/devroot.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devroot.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,254 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +enum +{ + Qdir = 0, + Qboot = 0x1000, + + Nrootfiles = 32, + Nbootfiles = 32, +}; + +typedef struct Dirlist Dirlist; +struct Dirlist +{ + uint base; + Dirtab *dir; + uchar **data; + int ndir; + int mdir; +}; + +static Dirtab rootdir[Nrootfiles] = { + "#/", {Qdir, 0, QTDIR}, 0, DMDIR|0555, + "boot", {Qboot, 0, QTDIR}, 0, DMDIR|0555, +}; +static uchar *rootdata[Nrootfiles]; +static Dirlist rootlist = +{ + 0, + rootdir, + rootdata, + 2, + Nrootfiles +}; + +static Dirtab bootdir[Nbootfiles] = { + "boot", {Qboot, 0, QTDIR}, 0, DMDIR|0555, +}; +static uchar *bootdata[Nbootfiles]; +static Dirlist bootlist = +{ + Qboot, + bootdir, + bootdata, + 1, + Nbootfiles +}; + +/* + * add a file to the list + */ +static void +addlist(Dirlist *l, char *name, uchar *contents, ulong len, int perm) +{ + Dirtab *d; + + if(l->ndir >= l->mdir) + panic("too many root files"); + l->data[l->ndir] = contents; + d = &l->dir[l->ndir]; + strcpy(d->name, name); + d->length = len; + d->perm = perm; + d->qid.type = 0; + d->qid.vers = 0; + d->qid.path = ++l->ndir + l->base; + if(perm & DMDIR) + d->qid.type |= QTDIR; +} + +/* + * add a root file + */ +void +addbootfile(char *name, uchar *contents, ulong len) +{ + addlist(&bootlist, name, contents, len, 0555); +} + +/* + * add a root directory + */ +static void +addrootdir(char *name) +{ + addlist(&rootlist, name, nil, 0, DMDIR|0555); +} + +static void +rootreset(void) +{ + addrootdir("bin"); + addrootdir("dev"); + addrootdir("env"); + addrootdir("fd"); + addrootdir("mnt"); + addrootdir("net"); + addrootdir("net.alt"); + addrootdir("proc"); + addrootdir("root"); + addrootdir("srv"); +} + +static Chan* +rootattach(char *spec) +{ + return devattach('/', spec); +} + +static int +rootgen(Chan *c, char *name, Dirtab*, int, int s, Dir *dp) +{ + int t; + Dirtab *d; + Dirlist *l; + + switch((int)c->qid.path){ + case Qdir: + if(s == DEVDOTDOT){ + devdir(c, (Qid){Qdir, 0, QTDIR}, "#/", 0, eve, 0555, dp); + return 1; + } + return devgen(c, name, rootlist.dir, rootlist.ndir, s, dp); + case Qboot: + if(s == DEVDOTDOT){ + devdir(c, (Qid){Qdir, 0, QTDIR}, "#/", 0, eve, 0555, dp); + return 1; + } + return devgen(c, name, bootlist.dir, bootlist.ndir, s, dp); + default: + if(s == DEVDOTDOT){ + if((int)c->qid.path < Qboot) + devdir(c, (Qid){Qdir, 0, QTDIR}, "#/", 0, eve, 0555, dp); + else + devdir(c, (Qid){Qboot, 0, QTDIR}, "#/", 0, eve, 0555, dp); + return 1; + } + if(s != 0) + return -1; + if((int)c->qid.path < Qboot){ + t = c->qid.path-1; + l = &rootlist; + }else{ + t = c->qid.path - Qboot - 1; + l = &bootlist; + } + if(t >= l->ndir) + return -1; +if(t < 0){ +print("rootgen %#llux %d %d\n", c->qid.path, s, t); +panic("whoops"); +} + d = &l->dir[t]; + devdir(c, d->qid, d->name, d->length, eve, d->perm, dp); + return 1; + } +} + +static Walkqid* +rootwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, nil, 0, rootgen); +} + +static long +rootstat(Chan *c, uchar *dp, long n) +{ + return devstat(c, dp, n, nil, 0, rootgen); +} + +static Chan* +rootopen(Chan *c, int omode) +{ + return devopen(c, omode, nil, 0, devgen); +} + +/* + * sysremove() knows this is a nop + */ +static void +rootclose(Chan*) +{ +} + +static long +rootread(Chan *c, void *buf, long n, vlong off) +{ + ulong t; + Dirtab *d; + Dirlist *l; + uchar *data; + ulong offset = off; + + t = c->qid.path; + switch(t){ + case Qdir: + case Qboot: + return devdirread(c, buf, n, nil, 0, rootgen); + } + + if(t= l->ndir) + error(Egreg); + + d = &l->dir[t]; + data = l->data[t]; + if(offset >= d->length) + return 0; + if(offset+n > d->length) + n = d->length - offset; + memmove(buf, data+offset, n); + return n; +} + +static long +rootwrite(Chan*, void*, long, vlong) +{ + error(Egreg); + return 0; +} + +Dev rootdevtab = { + '/', + "root", + + rootreset, + devinit, + devshutdown, + rootattach, + rootwalk, + rootstat, + rootopen, + devcreate, + rootclose, + rootread, + devbread, + rootwrite, + devbwrite, + devremove, + devwstat, +}; + diff -Nru 0/sys/src/nix/port/devsd.c 4/sys/src/nix/port/devsd.c --- 0/sys/src/nix/port/devsd.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devsd.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1892 @@ +/* + * Storage Device. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" +#include "../port/error.h" + +#include "../port/sd.h" + +extern Dev sddevtab; +extern SDifc* sdifc[]; + +static char Echange[] = "media or partition has changed"; +static char Enoata[] = "raw ata commands not supported"; +static char Enoscsi[] = "raw scsi commands not supported"; + +static char devletters[] = "0123456789" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + +static SDev *devs[sizeof devletters-1]; +static QLock devslock; +static SDunit topctlunit; + +enum { + Ahdrsz = 2, +}; + +enum { + Rawcmd, + Rawdata, + Rawstatus, +}; + +enum { + Qtopdir = 1, /* top level directory */ + Qtopbase, + Qtopctl = Qtopbase, + + Qunitdir, /* directory per unit */ + Qunitbase, + Qctl = Qunitbase, + Qraw, + Qpart, + Qextra, + + TypeLOG = 4, + NType = (1<>TypeSHIFT) & TypeMASK) +#define PART(q) ((((ulong)(q).path)>>PartSHIFT) & PartMASK) +#define UNIT(q) ((((ulong)(q).path)>>UnitSHIFT) & UnitMASK) +#define DEV(q) ((((ulong)(q).path)>>DevSHIFT) & DevMASK) +#define QID(d,u, p, t) (((d)<part != nil){ + partno = -1; + for(i = 0; i < unit->npart; i++){ + pp = &unit->part[i]; + if(!pp->valid){ + if(partno == -1) + partno = i; + break; + } + if(strcmp(name, pp->name) == 0){ + if(pp->start == start && pp->end == end) + return; + error(Ebadctl); + } + } + } + else{ + if((unit->part = malloc(sizeof(SDpart)*SDnpart)) == nil) + error(Enomem); + unit->npart = SDnpart; + partno = 0; + } + + /* + * If no free slot found then increase the + * array size (can't get here with unit->part == nil). + */ + if(partno == -1){ + if(unit->npart >= NPart) + error(Enomem); + if((pp = malloc(sizeof(SDpart)*(unit->npart+SDnpart))) == nil) + error(Enomem); + memmove(pp, unit->part, sizeof(SDpart)*unit->npart); + free(unit->part); + unit->part = pp; + partno = unit->npart; + unit->npart += SDnpart; + } + + /* + * Check size and extent are valid. + */ + if(start > end || end > unit->sectors) + error(Eio); + pp = &unit->part[partno]; + pp->start = start; + pp->end = end; + kstrdup(&pp->name, name); + kstrdup(&pp->user, eve); + pp->perm = 0640; + pp->valid = 1; +} + +static void +sddelpart(SDunit* unit, char* name) +{ + int i; + SDpart *pp; + + /* + * Look for the partition to delete. + * Can't delete if someone still has it open. + */ + pp = unit->part; + for(i = 0; i < unit->npart; i++){ + if(strcmp(name, pp->name) == 0) + break; + pp++; + } + if(i >= unit->npart) + error(Ebadctl); + if(strcmp(up->user, pp->user) && !iseve()) + error(Eperm); + pp->valid = 0; + pp->vers++; +} + +static void +sdincvers(SDunit *unit) +{ + int i; + + unit->vers++; + if(unit->part){ + for(i = 0; i < unit->npart; i++){ + unit->part[i].valid = 0; + unit->part[i].vers++; + } + } +} + +static int +sdinitpart(SDunit* unit) +{ + int nf; + ulong vers0; + uvlong start, end; + char *f[4], *p, *q, buf[10]; + + vers0 = unit->vers; + if(unit->sectors > 0){ + unit->sectors = unit->secsize = 0; + sdincvers(unit); + } + + if(unit->inquiry[0] & 0xC0) + return 0; + switch(unit->inquiry[0] & 0x1F){ + case 0x00: /* DA */ + case 0x04: /* WORM */ + case 0x05: /* CD-ROM */ + case 0x07: /* MO */ + break; + default: + return 0; + } + + if(unit->dev->ifc->online) + unit->dev->ifc->online(unit); + if(unit->sectors){ + sdincvers(unit); + sdaddpart(unit, "data", 0, unit->sectors); + } + if(unit->sectors && vers0 == 0){ + /* + * Use partitions passed from boot program, + * e.g. + * sdC0part=dos 63 123123/plan9 123123 456456 + * This happens before /boot sets hostname so the + * partitions will have the null-string for user. + * The gen functions patch it up. + */ + snprint(buf, sizeof buf, "%spart", unit->name); + for(p = getconf(buf); p != nil; p = q){ + if(q = strchr(p, '/')) + *q++ = '\0'; + nf = tokenize(p, f, nelem(f)); + if(nf < 3) + continue; + + start = strtoull(f[1], 0, 0); + end = strtoull(f[2], 0, 0); + if(!waserror()){ + sdaddpart(unit, f[0], start, end); + poperror(); + } + } + } + + return 1; +} + +static int +sdindex(int idno) +{ + char *p; + + p = strchr(devletters, idno); + if(p == nil) + return -1; + return p-devletters; +} + +static SDev* +sdgetdev(int idno) +{ + SDev *sdev; + int i; + + if((i = sdindex(idno)) < 0) + return nil; + + qlock(&devslock); + if(sdev = devs[i]) + incref(&sdev->r); + qunlock(&devslock); + return sdev; +} + +static SDunit* +sdgetunit(SDev* sdev, int subno) +{ + SDunit *unit; + char buf[32]; + + /* + * Associate a unit with a given device and sub-unit + * number on that device. + * The device will be probed if it has not already been + * successfully accessed. + */ + qlock(&sdev->unitlock); + if(subno > sdev->nunit){ + qunlock(&sdev->unitlock); + return nil; + } + + unit = sdev->unit[subno]; + if(unit == nil){ + /* + * Probe the unit only once. This decision + * may be a little severe and reviewed later. + */ + if(sdev->unitflg[subno]){ + qunlock(&sdev->unitlock); + return nil; + } + if((unit = malloc(sizeof(SDunit))) == nil){ + qunlock(&sdev->unitlock); + return nil; + } + sdev->unitflg[subno] = 1; + + snprint(buf, sizeof buf, "%s%x", sdev->name, subno); + kstrdup(&unit->name, buf); + kstrdup(&unit->user, eve); + unit->perm = 0555; + unit->subno = subno; + unit->dev = sdev; + + if(sdev->enabled == 0 && sdev->ifc->enable) + sdev->ifc->enable(sdev); + sdev->enabled = 1; + + /* + * No need to lock anything here as this is only + * called before the unit is made available in the + * sdunit[] array. + */ + if(unit->dev->ifc->verify(unit) == 0){ + qunlock(&sdev->unitlock); + free(unit); + return nil; + } + sdev->unit[subno] = unit; + } + qunlock(&sdev->unitlock); + return unit; +} + +static void +sdreset(void) +{ + int i; + + /* + * Probe all known controller types and register any devices found. + */ + for(i = 0; sdifc[i] != nil; i++){ + if(sdifc[i]->pnp == nil) + continue; + sdadddevs(sdifc[i]->pnp()); + } +} + +void +sdadddevs(SDev *sdev) +{ + int i, j, id; + SDev *next; + + for(; sdev; sdev=next){ + next = sdev->next; + + sdev->unit = malloc(sdev->nunit * sizeof(SDunit*)); + sdev->unitflg = malloc(sdev->nunit * sizeof(int)); + if(sdev->unit == nil || sdev->unitflg == nil){ + print("sdadddevs: out of memory\n"); + giveup: + free(sdev->unit); + free(sdev->unitflg); + if(sdev->ifc->clear) + sdev->ifc->clear(sdev); + free(sdev); + continue; + } + id = sdindex(sdev->idno); + if(id == -1){ + print("sdadddevs: bad id number %d (%C)\n", id, id); + goto giveup; + } + qlock(&devslock); + for(i=0; iidno = devletters[j]; + devs[j] = sdev; + snprint(sdev->name, sizeof sdev->name, "sd%c", devletters[j]); + break; + } + } + qunlock(&devslock); + if(i == nelem(devs)){ + print("sdadddevs: out of device letters\n"); + goto giveup; + } + } +} + +// void +// sdrmdevs(SDev *sdev) +// { +// char buf[2]; +// +// snprint(buf, sizeof buf, "%c", sdev->idno); +// unconfigure(buf); +// } + +static int +sd2gen(Chan* c, int i, Dir* dp) +{ + Qid q; + uvlong l; + SDfile *e; + SDpart *pp; + SDperm *perm; + SDunit *unit; + SDev *sdev; + int rv, t; + + sdev = sdgetdev(DEV(c->qid)); + assert(sdev); + unit = sdev->unit[UNIT(c->qid)]; + + rv = -1; + switch(i){ + case Qctl: + mkqid(&q, QID(DEV(c->qid), UNIT(c->qid), PART(c->qid), Qctl), + unit->vers, QTFILE); + perm = &unit->ctlperm; + if(emptystr(perm->user)){ + kstrdup(&perm->user, eve); + perm->perm = 0644; + } + devdir(c, q, "ctl", 0, perm->user, perm->perm, dp); + rv = 1; + break; + + case Qraw: + mkqid(&q, QID(DEV(c->qid), UNIT(c->qid), PART(c->qid), Qraw), + unit->vers, QTFILE); + perm = &unit->rawperm; + if(emptystr(perm->user)){ + kstrdup(&perm->user, eve); + perm->perm = DMEXCL|0600; + } + devdir(c, q, "raw", 0, perm->user, perm->perm, dp); + rv = 1; + break; + + case Qpart: + pp = &unit->part[PART(c->qid)]; + l = (pp->end - pp->start) * unit->secsize; + mkqid(&q, QID(DEV(c->qid), UNIT(c->qid), PART(c->qid), Qpart), + unit->vers+pp->vers, QTFILE); + if(emptystr(pp->user)) + kstrdup(&pp->user, eve); + devdir(c, q, pp->name, l, pp->user, pp->perm, dp); + rv = 1; + break; + case Qextra: + t = PART(c->qid); + if(t >= unit->nefile) + break; + mkqid(&q, QID(DEV(c->qid), UNIT(c->qid), PART(c->qid), Qextra), + unit->vers, QTFILE); + e = unit->efile + t; + if(emptystr(e->user)) + kstrdup(&e->user, eve); + devdir(c, q, e->name, 0, e->user, e->perm, dp); + rv = 1; + break; + } + + decref(&sdev->r); + return rv; +} + +static int +sd1gen(Chan* c, int i, Dir* dp) +{ + Qid q; + SDperm *p; + + switch(i){ + case Qtopctl: + mkqid(&q, QID(0, 0, 0, Qtopctl), 0, QTFILE); + qlock(&topctlunit.ctl); + p = &topctlunit.ctlperm; + if(p->user == nil || p->user[0] == 0){ + kstrdup(&p->name, "sdctl"); + kstrdup(&p->user, eve); + p->perm = 0640; + } + devdir(c, q, p->name, 0, p->user, p->perm, dp); + qunlock(&topctlunit.ctl); + return 1; + } + return -1; +} + +static int +efilegen(Chan *c, SDunit *unit, int i, Dir *dp) +{ + Qid q; + SDfile *e; + + i -= SDnpart; + if(unit->nefile == 0 || i >= unit->nefile) + return -1; + if(i < 0) + return 0; + e = unit->efile + i; + if(emptystr(e->user)) + kstrdup(&e->user, eve); + mkqid(&q, QID(DEV(c->qid), UNIT(c->qid), i, Qextra), + unit->vers, QTFILE); + devdir(c, q, e->name, 0, e->user, e->perm, dp); + return 1; +} + +static int +sdgen(Chan* c, char*, Dirtab*, int, int s, Dir* dp) +{ + Qid q; + uvlong l; + int i, r; + SDpart *pp; + SDunit *unit; + SDev *sdev; + + switch(TYPE(c->qid)){ + case Qtopdir: + if(s == DEVDOTDOT){ + mkqid(&q, QID(0, 0, 0, Qtopdir), 0, QTDIR); + sprint(up->genbuf, "#%C", sddevtab.dc); + devdir(c, q, up->genbuf, 0, eve, 0555, dp); + return 1; + } + + if(s+Qtopbase < Qunitdir) + return sd1gen(c, s+Qtopbase, dp); + s -= (Qunitdir-Qtopbase); + + qlock(&devslock); + for(i=0; inunit) + break; + s -= devs[i]->nunit; + } + } + + if(i == nelem(devs)){ + /* Run off the end of the list */ + qunlock(&devslock); + return -1; + } + + if((sdev = devs[i]) == nil){ + qunlock(&devslock); + return 0; + } + + incref(&sdev->r); + qunlock(&devslock); + + if((unit = sdev->unit[s]) == nil) + if((unit = sdgetunit(sdev, s)) == nil){ + decref(&sdev->r); + return 0; + } + + mkqid(&q, QID(sdev->idno, s, 0, Qunitdir), 0, QTDIR); + if(emptystr(unit->user)) + kstrdup(&unit->user, eve); + devdir(c, q, unit->name, 0, unit->user, unit->perm, dp); + decref(&sdev->r); + return 1; + + case Qunitdir: + if(s == DEVDOTDOT){ + mkqid(&q, QID(0, 0, 0, Qtopdir), 0, QTDIR); + sprint(up->genbuf, "#%C", sddevtab.dc); + devdir(c, q, up->genbuf, 0, eve, 0555, dp); + return 1; + } + + if((sdev = sdgetdev(DEV(c->qid))) == nil){ + devdir(c, c->qid, "unavailable", 0, eve, 0, dp); + return 1; + } + + unit = sdev->unit[UNIT(c->qid)]; + qlock(&unit->ctl); + + /* + * Check for media change. + * If one has already been detected, sectors will be zero. + * If there is one waiting to be detected, online + * will return > 1. + * Online is a bit of a large hammer but does the job. + */ + if(unit->sectors == 0 + || (unit->dev->ifc->online && unit->dev->ifc->online(unit) > 1)) + sdinitpart(unit); + + i = s+Qunitbase; + if(i < Qpart){ + r = sd2gen(c, i, dp); + qunlock(&unit->ctl); + decref(&sdev->r); + return r; + } + i -= Qpart; + if(unit->part == nil || i >= unit->npart){ + r = efilegen(c, unit, i, dp); + qunlock(&unit->ctl); + decref(&sdev->r); + return r; + } + pp = &unit->part[i]; + if(!pp->valid || unit->sectors == 0){ + qunlock(&unit->ctl); + decref(&sdev->r); + return 0; + } + l = (pp->end - pp->start) * (uvlong)unit->secsize; + mkqid(&q, QID(DEV(c->qid), UNIT(c->qid), i, Qpart), + unit->vers+pp->vers, QTFILE); + if(emptystr(pp->user)) + kstrdup(&pp->user, eve); + devdir(c, q, pp->name, l, pp->user, pp->perm, dp); + qunlock(&unit->ctl); + decref(&sdev->r); + return 1; + case Qraw: + case Qctl: + case Qpart: + case Qextra: + if((sdev = sdgetdev(DEV(c->qid))) == nil){ + devdir(c, q, "unavailable", 0, eve, 0, dp); + return 1; + } + unit = sdev->unit[UNIT(c->qid)]; + qlock(&unit->ctl); + r = sd2gen(c, TYPE(c->qid), dp); + qunlock(&unit->ctl); + decref(&sdev->r); + return r; + case Qtopctl: + return sd1gen(c, TYPE(c->qid), dp); + default: + break; + } + + return -1; +} + +static Chan* +sdattach(char* spec) +{ + Chan *c; + char *p; + SDev *sdev; + int idno, subno; + + if(*spec == '\0'){ + c = devattach(sddevtab.dc, spec); + mkqid(&c->qid, QID(0, 0, 0, Qtopdir), 0, QTDIR); + return c; + } + + if(spec[0] != 's' || spec[1] != 'd') + error(Ebadspec); + idno = spec[2]; + subno = strtol(&spec[3], &p, 16); + if(p == &spec[3]) + error(Ebadspec); + + if((sdev=sdgetdev(idno)) == nil) + error(Enonexist); + if(sdgetunit(sdev, subno) == nil){ + decref(&sdev->r); + error(Enonexist); + } + + c = devattach(sddevtab.dc, spec); + mkqid(&c->qid, QID(sdev->idno, subno, 0, Qunitdir), 0, QTDIR); + c->devno = (sdev->idno << UnitLOG) + subno; + decref(&sdev->r); + return c; +} + +static Walkqid* +sdwalk(Chan* c, Chan* nc, char** name, int nname) +{ + return devwalk(c, nc, name, nname, nil, 0, sdgen); +} + +static long +sdstat(Chan* c, uchar* db, long n) +{ + return devstat(c, db, n, nil, 0, sdgen); +} + +static Chan* +sdopen(Chan* c, int omode) +{ + SDpart *pp; + SDunit *unit; + SDev *sdev; + uchar tp; + + c = devopen(c, omode, 0, 0, sdgen); + if((tp = TYPE(c->qid)) != Qctl && tp != Qraw && tp != Qpart) + return c; + + sdev = sdgetdev(DEV(c->qid)); + if(sdev == nil) + error(Enonexist); + + unit = sdev->unit[UNIT(c->qid)]; + + switch(TYPE(c->qid)){ + case Qctl: + c->qid.vers = unit->vers; + break; + case Qraw: + c->qid.vers = unit->vers; + if(tas32(&unit->rawinuse) != 0){ + c->flag &= ~COPEN; + decref(&sdev->r); + error(Einuse); + } + unit->state = Rawcmd; + break; + case Qpart: + qlock(&unit->ctl); + if(waserror()){ + qunlock(&unit->ctl); + c->flag &= ~COPEN; + decref(&sdev->r); + nexterror(); + } + pp = &unit->part[PART(c->qid)]; + c->qid.vers = unit->vers+pp->vers; + qunlock(&unit->ctl); + poperror(); + break; + } + decref(&sdev->r); + return c; +} + +static void +sdclose(Chan* c) +{ + SDunit *unit; + SDev *sdev; + + if(c->qid.type & QTDIR) + return; + if(!(c->flag & COPEN)) + return; + + switch(TYPE(c->qid)){ + default: + break; + case Qraw: + sdev = sdgetdev(DEV(c->qid)); + if(sdev){ + unit = sdev->unit[UNIT(c->qid)]; + unit->rawinuse = 0; + decref(&sdev->r); + } + break; + } +} + +#define iskaddr(a) ((uintptr)(a) > KZERO) + +static long +sdbio(Chan* c, int write, char* a, long len, uvlong off) +{ + int nchange, hard, allocd; + long l; + uchar *b; + SDpart *pp; + SDunit *unit; + SDev *sdev; + ulong max, nb, offset; + uvlong bno; + + sdev = sdgetdev(DEV(c->qid)); + if(sdev == nil){ + decref(&sdev->r); + error(Enonexist); + } + unit = sdev->unit[UNIT(c->qid)]; + if(unit == nil) + error(Enonexist); + + nchange = 0; + qlock(&unit->ctl); + while(waserror()){ + /* notification of media change; go around again */ + if(strcmp(up->errstr, Eio) == 0 && unit->sectors == 0 && nchange++ == 0){ + sdinitpart(unit); + continue; + } + + /* other errors; give up */ + qunlock(&unit->ctl); + decref(&sdev->r); + nexterror(); + } + pp = &unit->part[PART(c->qid)]; + if(unit->vers+pp->vers != c->qid.vers) + error(Echange); + + /* + * Check the request is within bounds. + * Removeable drives are locked throughout the I/O + * in case the media changes unexpectedly. + * Non-removeable drives are not locked during the I/O + * to allow the hardware to optimise if it can; this is + * a little fast and loose. + * It's assumed that non-removeable media parameters + * (sectors, secsize) can't change once the drive has + * been brought online. + */ + bno = (off/unit->secsize) + pp->start; + nb = ((off+len+unit->secsize-1)/unit->secsize) + pp->start - bno; + max = SDmaxio/unit->secsize; + if(nb > max) + nb = max; + if(bno+nb > pp->end) + nb = pp->end - bno; + if(bno >= pp->end || nb == 0){ + if(write) + error(Eio); + qunlock(&unit->ctl); + decref(&sdev->r); + poperror(); + return 0; + } + if(!(unit->inquiry[1] & 0x80)){ + qunlock(&unit->ctl); + poperror(); + } + + offset = off%unit->secsize; + if(offset+len > nb*unit->secsize) + len = nb*unit->secsize - offset; + hard = offset || write && len%unit->secsize; + + if(iskaddr(a) && !hard) { + b = (uchar*)a; + allocd = 0; + }else{ + b = sdmalloc(nb*unit->secsize); + if(b == nil) + error(Enomem); + allocd = 1; + } + if(waserror()){ + if(allocd) + sdfree(b); + if(!(unit->inquiry[1] & 0x80)) + decref(&sdev->r); /* gadverdamme! */ + nexterror(); + } + + if(write){ + if(hard){ + l = unit->dev->ifc->bio(unit, 0, 0, b, nb, bno); + if(l < 0) + error(Eio); + if(l < (nb*unit->secsize)){ + nb = l/unit->secsize; + l = nb*unit->secsize - offset; + if(len > l) + len = l; + } + } + if(allocd) + memmove(b+offset, a, len); + l = unit->dev->ifc->bio(unit, 0, 1, b, nb, bno); + if(l < 0) + error(Eio); + if(l < offset) + len = 0; + else if(len > l - offset) + len = l - offset; + } + else{ + l = unit->dev->ifc->bio(unit, 0, 0, b, nb, bno); + if(l < 0) + error(Eio); + if(l < offset) + len = 0; + else if(len > l - offset) + len = l - offset; + if(allocd) + memmove(a, b+offset, len); + } + if(allocd) + sdfree(b); + poperror(); + + if(unit->inquiry[1] & 0x80){ + qunlock(&unit->ctl); + poperror(); + } + + decref(&sdev->r); + return len; +} + +static long +sdrio(SDreq* r, void* a, long n) +{ + char *errstr; + int rv; + void *data; + SDunit *u; + int (*f)(SDreq*); + + if(n >= SDmaxio || n < 0) + error(Etoobig); + u = r->unit; + if(u->haversense && r->cmd[0] == 0x03){ + u->haversense = 0; + r->rlen = sizeof u->rsense; + if(r->rlen > n) + r->rlen = n; + memmove(a, u->rsense, r->rlen); + r->status = SDok; + return r->rlen; + } + + data = nil; + if(n > 0 && (data = sdmalloc(n)) == nil) + error(Enomem); + if(waserror()){ + sdfree(data); + r->data = nil; + nexterror(); + } + if(r->write && n > 0) + memmove(data, a, n); + r->data = data; + r->dlen = n; + + if(r->proto == SData){ + f = u->dev->ifc->ataio; + errstr = Enoata; + }else{ + f = u->dev->ifc->rio; + errstr = Enoscsi; + } + if(f == nil) + error(errstr); + rv = f(r); + if(r->flags & SDvalidsense){ + memmove(u->rsense, r->sense, sizeof u->rsense); + u->haversense = 1; + } + if(rv != SDok) + error(Eio); + + if(!r->write && r->rlen > 0) + memmove(a, data, r->rlen); + poperror(); + sdfree(data); + r->data = nil; + + return r->rlen; +} + +/* + * SCSI simulation for non-SCSI devices + * + * see /sys/src/cmd/scuzz/sense.c for information on key. + * see /sys/lib/scsicodes for asc:ascq codes + */ +int +sdsetsense(SDreq *r, int status, int key, int asc, int ascq) +{ + int len; + SDunit *unit; + + unit = r->unit; + unit->sense[0] = 0x80 | 0x70; /* valid; fixed-format */ + unit->sense[2] = key; + unit->sense[12] = asc; + unit->sense[13] = ascq; + + r->status = status; + if(status == SDcheck && !(r->flags & SDnosense)){ + /* request sense case from sdfakescsi */ + len = sizeof unit->sense; + if(len > sizeof r->sense-1) + len = sizeof r->sense-1; + memmove(r->sense, unit->sense, len); + unit->sense[2] = 0; + unit->sense[12] = 0; + unit->sense[13] = 0; + r->flags |= SDvalidsense; + return SDok; + } + return status; +} + +int +sdfakescsi(SDreq *r) +{ + uchar *cmd, *p; + uvlong len; + SDunit *unit; + + cmd = r->cmd; + r->rlen = 0; + unit = r->unit; + + /* + * Map SCSI commands into ATA commands for discs. + * Fail any command with a LUN except INQUIRY which + * will return 'logical unit not supported'. + */ + if((cmd[1]>>5) && cmd[0] != 0x12) + return sdsetsense(r, SDcheck, 0x05, 0x25, 0); + + switch(cmd[0]){ + default: + return sdsetsense(r, SDcheck, 0x05, 0x20, 0); + + case 0x00: /* test unit ready */ + return sdsetsense(r, SDok, 0, 0, 0); + + case 0x03: /* request sense */ + if(cmd[4] < sizeof unit->sense) + len = cmd[4]; + else + len = sizeof unit->sense; + if(r->data && r->dlen >= len){ + memmove(r->data, unit->sense, len); + r->rlen = len; + } + return sdsetsense(r, SDok, 0, 0, 0); + + case 0x12: /* inquiry */ + if(cmd[4] < sizeof unit->inquiry) + len = cmd[4]; + else + len = sizeof unit->inquiry; + if(r->data && r->dlen >= len){ + memmove(r->data, unit->inquiry, len); + r->rlen = len; + } + return sdsetsense(r, SDok, 0, 0, 0); + + case 0x1B: /* start/stop unit */ + /* + * nop for now, can use power management later. + */ + return sdsetsense(r, SDok, 0, 0, 0); + + case 0x25: /* read capacity */ + if((cmd[1] & 0x01) || cmd[2] || cmd[3]) + return sdsetsense(r, SDcheck, 0x05, 0x24, 0); + if(r->data == nil || r->dlen < 8) + return sdsetsense(r, SDcheck, 0x05, 0x20, 1); + + /* + * Read capacity returns the LBA of the last sector. + */ + len = unit->sectors; + if(len > 0) + len--; + p = r->data; + *p++ = len>>24; + *p++ = len>>16; + *p++ = len>>8; + *p++ = len; + len = unit->secsize; + *p++ = len>>24; + *p++ = len>>16; + *p++ = len>>8; + *p++ = len; + r->rlen = p - (uchar*)r->data; + return sdsetsense(r, SDok, 0, 0, 0); + + case 0x9E: /* long read capacity */ + if((cmd[1] & 0x01) || cmd[2] || cmd[3]) + return sdsetsense(r, SDcheck, 0x05, 0x24, 0); + if(r->data == nil || r->dlen < 8) + return sdsetsense(r, SDcheck, 0x05, 0x20, 1); + /* + * Read capcity returns the LBA of the last sector. + */ + len = unit->sectors; + if(len > 0) + len--; + p = r->data; + *p++ = len>>56; + *p++ = len>>48; + *p++ = len>>40; + *p++ = len>>32; + *p++ = len>>24; + *p++ = len>>16; + *p++ = len>>8; + *p++ = len; + len = unit->secsize; + *p++ = len>>24; + *p++ = len>>16; + *p++ = len>>8; + *p++ = len; + r->rlen = p - (uchar*)r->data; + return sdsetsense(r, SDok, 0, 0, 0); + case 0x08: /* read6 */ + case 0x0a: /* write6 */ + case 0x28: /* read10 */ + case 0x2a: /* write10 */ + case 0xa8: /* read12 */ + case 0xaa: /* write12 */ + case 0x88: /* read16 */ + case 0x8a: /* write16 */ + return SDnostatus; + } +} + +int +sdfakescsirw(SDreq *r, uvlong *llba, int *nsec, int *rwp) +{ + uchar *c; + int rw, count; + uvlong lba; + + c = r->cmd; + rw = SDread; + if((c[0] & 0xf) == 0xa) + rw = SDwrite; + switch(c[0]){ + case 0x08: /* read6 */ + case 0x0a: + lba = (c[1] & 0xf)<<16 | c[2]<<8 | c[3]; + count = c[4]; + break; + case 0x28: /* read10 */ + case 0x2a: + lba = c[2]<<24 | c[3]<<16 | c[4]<<8 | c[5]; + count = c[7]<<8 | c[8]; + break; + case 0xa8: /* read12 */ + case 0xaa: + lba = c[2]<<24 | c[3]<<16 | c[4]<<8 | c[5]; + count = c[6]<<24 | c[7]<<16 | c[8]<<8 | c[9]; + break; + case 0x88: /* read16 */ + case 0x8a: + /* ata commands only go to 48-bit lba */ + if(c[2] || c[3]) + return sdsetsense(r, SDcheck, 3, 0xc, 2); + lba = (uvlong)c[4]<<40 | (uvlong)c[5]<<32; + lba |= c[6]<<24 | c[7]<<16 | c[8]<<8 | c[9]; + count = c[10]<<24 | c[11]<<16 | c[12]<<8 | c[13]; + break; + default: + print("%s: bad cmd 0x%.2ux\n", r->unit->name, c[0]); + r->status = sdsetsense(r, SDcheck, 0x05, 0x20, 0); + return SDcheck; + } + if(r->data == nil) + return SDok; + if(r->dlen < count * r->unit->secsize) + count = r->dlen/r->unit->secsize; + if(rwp) + *rwp = rw; + *llba = lba; + *nsec = count; + return SDnostatus; +} + +static long +extrarw(int write, Chan *c, void *a, long n, vlong off) +{ + int i; + SDrw *f; + SDev *sdev; + SDunit *unit; + + sdev = sdgetdev(DEV(c->qid)); + if(sdev == nil) + error(Enonexist); + if(waserror()){ + decref(&sdev->r); + nexterror(); + } + unit = sdev->unit[UNIT(c->qid)]; + if(unit->vers != c->qid.vers) + error(Echange); + unit = sdev->unit[UNIT(c->qid)]; + i = PART(c->qid); + if(i >= unit->nefile) + error(Enonexist); + f = unit->efile[i].r; + if(write) + f = unit->efile[i].w; + if(i >= unit->nefile || f == nil) + error(Eperm); + n = f(unit, c, a, n, off); + poperror(); + decref(&sdev->r); + return n; +} + +static char* +deftopctl(SDev *s, char *p, char *e) +{ + return seprint(p, e, "sd%c %s %d units\n", s->idno, s->ifc->name, s->nunit); +} + +static long +sdread(Chan *c, void *a, long n, vlong off) +{ + char *p, *e, *buf; + SDev *sdev; + SDpart *pp; + SDreq *r; + SDunit *unit; + ulong offset; + int i, l, m, status; + + offset = off; + switch(TYPE(c->qid)){ + default: + error(Eperm); + case Qtopctl: + m = 64*1024; /* room for register dumps */ + p = buf = malloc(m); + assert(p); + e = p + m; + qlock(&devslock); + for(i = 0; i < nelem(devs); i++){ + sdev = devs[i]; + if(sdev && sdev->ifc->rtopctl) + p = sdev->ifc->rtopctl(sdev, p, e); + else if(sdev) + p = deftopctl(sdev, p, e); + } + qunlock(&devslock); + n = readstr(off, a, n, buf); + free(buf); + return n; + + case Qtopdir: + case Qunitdir: + return devdirread(c, a, n, 0, 0, sdgen); + + case Qctl: + sdev = sdgetdev(DEV(c->qid)); + if(sdev == nil) + error(Enonexist); + + unit = sdev->unit[UNIT(c->qid)]; + m = 16*1024; /* room for register dumps */ + p = malloc(m); + l = snprint(p, m, "inquiry %.48s\n", + (char*)unit->inquiry+8); + qlock(&unit->ctl); + /* + * If there's a device specific routine it must + * provide all information pertaining to night geometry + * and the garscadden trains. + */ + if(unit->dev->ifc->rctl) + l += unit->dev->ifc->rctl(unit, p+l, m-l); + if(unit->sectors == 0) + sdinitpart(unit); + if(unit->sectors){ + if(unit->dev->ifc->rctl == nil) + l += snprint(p+l, m-l, + "geometry %llud %lud\n", + unit->sectors, unit->secsize); + pp = unit->part; + for(i = 0; i < unit->npart; i++){ + if(pp->valid) + l += snprint(p+l, m-l, + "part %s %llud %llud\n", + pp->name, pp->start, pp->end); + pp++; + } + } + qunlock(&unit->ctl); + decref(&sdev->r); + l = readstr(offset, a, n, p); + free(p); + return l; + + case Qraw: + sdev = sdgetdev(DEV(c->qid)); + if(sdev == nil) + error(Enonexist); + + unit = sdev->unit[UNIT(c->qid)]; + qlock(&unit->raw); + if(waserror()){ + qunlock(&unit->raw); + decref(&sdev->r); + nexterror(); + } + if(unit->state == Rawdata){ + unit->state = Rawstatus; + r = unit->req; + r->timeout = 0; + i = sdrio(r, a, n); + } + else if(unit->state == Rawstatus){ + r = unit->req; + unit->req = nil; + unit->state = Rawcmd; + status = r->status; + if(r->proto == SData){ + p = a; + i = 16 + Ahdrsz; + if(n < i) + i = n; + if(i > 0) + p[0] = status; + if(i > Ahdrsz) + memmove(p + Ahdrsz, r->cmd, i - Ahdrsz); + }else + i = readnum(0, a, n, status, NUMSIZE); + free(r); + } else + i = 0; + poperror(); + qunlock(&unit->raw); + decref(&sdev->r); + return i; + + case Qpart: + return sdbio(c, 0, a, n, off); + case Qextra: + return extrarw(0, c, a, n, off); + } +} + +static void legacytopctl(Cmdbuf*); + +static long +sdwrite(Chan* c, void* a, long n, vlong off) +{ + char *f0; + int i, atacdb, proto, ataproto; + uchar *u; + uvlong end, start; + Cmdbuf *cb; + SDifc *ifc; + SDreq *req; + SDunit *unit; + SDev *sdev; + + switch(TYPE(c->qid)){ + default: + error(Eperm); + case Qtopctl: + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + if(cb->nf == 0) + error("empty control message"); + f0 = cb->f[0]; + cb->f++; + cb->nf--; + if(strcmp(f0, "config") == 0){ + /* wormhole into ugly legacy interface */ + legacytopctl(cb); + poperror(); + free(cb); + break; + } + /* + * "ata arg..." invokes sdifc[i]->wtopctl(nil, cb), + * where sdifc[i]->name=="ata" and cb contains the args. + */ + ifc = nil; + sdev = nil; + for(i=0; sdifc[i]; i++){ + if(strcmp(sdifc[i]->name, f0) == 0){ + ifc = sdifc[i]; + sdev = nil; + goto subtopctl; + } + } + /* + * "sd1 arg..." invokes sdifc[i]->wtopctl(sdev, cb), + * where sdifc[i] and sdev match controller letter "1", + * and cb contains the args. + */ + if(f0[0]=='s' && f0[1]=='d' && f0[2] && f0[3] == 0){ + if((sdev = sdgetdev(f0[2])) != nil){ + ifc = sdev->ifc; + goto subtopctl; + } + } + error("unknown interface"); + + subtopctl: + if(waserror()){ + if(sdev) + decref(&sdev->r); + nexterror(); + } + if(ifc->wtopctl) + ifc->wtopctl(sdev, cb); + else + error(Ebadctl); + poperror(); + poperror(); + if(sdev) + decref(&sdev->r); + free(cb); + break; + + case Qctl: + cb = parsecmd(a, n); + sdev = sdgetdev(DEV(c->qid)); + if(sdev == nil) + error(Enonexist); + unit = sdev->unit[UNIT(c->qid)]; + + qlock(&unit->ctl); + if(waserror()){ + qunlock(&unit->ctl); + decref(&sdev->r); + free(cb); + nexterror(); + } + if(unit->vers != c->qid.vers) + error(Echange); + + if(cb->nf < 1) + error(Ebadctl); + if(strcmp(cb->f[0], "part") == 0){ + if(cb->nf != 4) + error(Ebadctl); + if(unit->sectors == 0 && !sdinitpart(unit)) + error(Eio); + start = strtoull(cb->f[2], 0, 0); + end = strtoull(cb->f[3], 0, 0); + sdaddpart(unit, cb->f[1], start, end); + } + else if(strcmp(cb->f[0], "delpart") == 0){ + if(cb->nf != 2 || unit->part == nil) + error(Ebadctl); + sddelpart(unit, cb->f[1]); + } + else if(unit->dev->ifc->wctl) + unit->dev->ifc->wctl(unit, cb); + else + error(Ebadctl); + qunlock(&unit->ctl); + decref(&sdev->r); + poperror(); + free(cb); + break; + + case Qraw: + proto = SDcdb; + ataproto = 0; + atacdb = 0; + sdev = sdgetdev(DEV(c->qid)); + if(sdev == nil) + error(Enonexist); + unit = sdev->unit[UNIT(c->qid)]; + qlock(&unit->raw); + if(waserror()){ + qunlock(&unit->raw); + decref(&sdev->r); + nexterror(); + } + switch(unit->state){ + case Rawcmd: + /* sneaky ata commands */ + u = a; + if(n > 1 && *u == 0xff){ + proto = SData; + ataproto = u[1]; + a = u + 2; + atacdb = Ahdrsz; + n -= Ahdrsz; + } + if(n < 6 || n > sizeof(req->cmd)) + error(Ebadarg); + if((req = malloc(sizeof(SDreq))) == nil) + error(Enomem); + req->unit = unit; + if(waserror()){ + free(req); + nexterror(); + } + memmove(req->cmd, a, n); + poperror(); + req->clen = n; + /* req->flags = SDnosense; */ + req->status = ~0; + req->proto = proto; + req->ataproto = ataproto; + unit->req = req; + unit->state = Rawdata; + n += atacdb; + break; + + case Rawstatus: + unit->state = Rawcmd; + free(unit->req); + unit->req = nil; + error(Ebadusefd); + + case Rawdata: + unit->state = Rawstatus; + req = unit->req; + req->write = 1; + n = sdrio(req, a, n); + } + poperror(); + qunlock(&unit->raw); + decref(&sdev->r); + break; + case Qpart: + return sdbio(c, 1, a, n, off); + case Qextra: + return extrarw(1, c, a, n, off); + } + + return n; +} + +static long +sdwstat(Chan* c, uchar* dp, long n) +{ + Dir *d; + SDpart *pp; + SDperm *perm; + SDunit *unit; + SDev *sdev; + + if(c->qid.type & QTDIR) + error(Eperm); + if(TYPE(c->qid) == Qtopctl){ + unit = &topctlunit; + sdev = nil; + }else{ + sdev = sdgetdev(DEV(c->qid)); + if(sdev == nil) + error(Enonexist); + unit = sdev->unit[UNIT(c->qid)]; + } + qlock(&unit->ctl); + + d = nil; + if(waserror()){ + free(d); + qunlock(&unit->ctl); + if(sdev != nil) + decref(&sdev->r); + nexterror(); + } + + switch(TYPE(c->qid)){ + default: + error(Eperm); + case Qtopctl: + case Qctl: + perm = &unit->ctlperm; + break; + case Qraw: + perm = &unit->rawperm; + break; + case Qpart: + pp = &unit->part[PART(c->qid)]; + if(unit->vers+pp->vers != c->qid.vers) + error(Enonexist); + perm = &pp->SDperm; + break; + } + + if(strcmp(up->user, perm->user) && !iseve()) + error(Eperm); + + d = smalloc(sizeof(Dir)+n); + n = convM2D(dp, n, &d[0], (char*)&d[1]); + if(n == 0) + error(Eshortstat); + if(d->atime != ~0 || d->mtime != ~0 || d->length != ~0) + error(Eperm); + if(!emptystr(d[0].muid) || !emptystr(d[0].name)) + error(Eperm); + if(!emptystr(d[0].uid)) + kstrdup(&perm->user, d[0].uid); + if(!emptystr(d[0].gid) && strcmp(d[0].gid, eve) != 0) + error(Eperm); + if(d[0].mode != ~0UL) + perm->perm = (perm->perm & ~0777) | (d[0].mode & 0777); + + free(d); + d = nil; USED(d); + qunlock(&unit->ctl); + if(sdev != nil) + decref(&sdev->r); + poperror(); + return n; +} + +static int +configure(char* spec, DevConf* cf) +{ + SDev *s, *sdev; + char *p; + int i; + + if(sdindex(*spec) < 0) + error("bad sd spec"); + + if((p = strchr(cf->type, '/')) != nil) + *p++ = '\0'; + + for(i = 0; sdifc[i] != nil; i++) + if(strcmp(sdifc[i]->name, cf->type) == 0) + break; + if(sdifc[i] == nil) + error("sd type not found"); + if(p) + *(p-1) = '/'; + + if(sdifc[i]->probe == nil) + error("sd type cannot probe"); + + sdev = sdifc[i]->probe(cf); + for(s=sdev; s; s=s->next) + s->idno = *spec; + sdadddevs(sdev); + return 0; +} + +static int +unconfigure(char* spec) +{ + int i; + SDev *sdev; + SDunit *unit; + + if((i = sdindex(*spec)) < 0) + error(Enonexist); + + qlock(&devslock); + if((sdev = devs[i]) == nil){ + qunlock(&devslock); + error(Enonexist); + } + if(sdev->r.ref){ + qunlock(&devslock); + error(Einuse); + } + devs[i] = nil; + qunlock(&devslock); + + /* make sure no interrupts arrive anymore before removing resources */ + if(sdev->enabled && sdev->ifc->disable) + sdev->ifc->disable(sdev); + + for(i = 0; i != sdev->nunit; i++){ + if(unit = sdev->unit[i]){ + free(unit->name); + free(unit->user); + free(unit); + } + } + + if(sdev->ifc->clear) + sdev->ifc->clear(sdev); + free(sdev); + return 0; +} + +static int +sdconfig(int on, char* spec, DevConf* cf) +{ + if(on) + return configure(spec, cf); + return unconfigure(spec); +} + +int +sdaddfile(SDunit *unit, char *s, int perm, char *u, SDrw *r, SDrw *w) +{ + int i; + SDfile *e; + static Lock lk; + + if(unit == nil) + return -1; + lock(&lk); + for(i = 0; i < unit->nefile; i++) + if(strcmp(unit->efile[i].name, s) == 0) + break; + if(i >= nelem(unit->efile)){ + unlock(&lk); + return -1; + } + if(i >= unit->nefile) + unit->nefile = i + 1; + e = unit->efile + i; + if(e->name == nil) + kstrdup(&e->name, s); + if(e->user == nil) + kstrdup(&e->user, u); + e->perm = perm; + e->r = r; + e->w = w; + unlock(&lk); + return 0; +} + +static void +sdshutdown(void) +{ + int i; + SDev *sd; + + for(i = 0; i < nelem(devs); i++){ + sd = devs[i]; + if(sd == nil) + continue; + if(sd->ifc->disable == nil){ + print("#S/sd%c: no disable function\n", devletters[i]); + continue; + } + sd->ifc->disable(sd); + } +} + +Dev sddevtab = { + 'S', + "sd", + + sdreset, + devinit, + sdshutdown, + sdattach, + sdwalk, + sdstat, + sdopen, + devcreate, + sdclose, + sdread, + devbread, + sdwrite, + devbwrite, + devremove, + sdwstat, + devpower, + sdconfig, +}; + +/* + * This is wrong for so many reasons. This code must go. + */ +typedef struct Confdata Confdata; +struct Confdata { + int on; + char* spec; + DevConf cf; +}; + +static void +parseswitch(Confdata* cd, char* option) +{ + if(!strcmp("on", option)) + cd->on = 1; + else if(!strcmp("off", option)) + cd->on = 0; + else + error(Ebadarg); +} + +static void +parsespec(Confdata* cd, char* option) +{ + if(strlen(option) > 1) + error(Ebadarg); + cd->spec = option; +} + +static Devport* +getnewport(DevConf* dc) +{ + Devport *p; + + p = malloc((dc->nports + 1) * sizeof(Devport)); + if(dc->nports > 0){ + memmove(p, dc->ports, dc->nports * sizeof(Devport)); + free(dc->ports); + } + dc->ports = p; + p = &dc->ports[dc->nports++]; + p->size = -1; + p->port = (ulong)-1; + return p; +} + +static void +parseport(Confdata* cd, char* option) +{ + char *e; + Devport *p; + + if(cd->cf.nports == 0 || cd->cf.ports[cd->cf.nports-1].port != (ulong)-1) + p = getnewport(&cd->cf); + else + p = &cd->cf.ports[cd->cf.nports-1]; + p->port = strtol(option, &e, 0); + if(e == nil || *e != '\0') + error(Ebadarg); +} + +static void +parsesize(Confdata* cd, char* option) +{ + char *e; + Devport *p; + + if(cd->cf.nports == 0 || cd->cf.ports[cd->cf.nports-1].size != -1) + p = getnewport(&cd->cf); + else + p = &cd->cf.ports[cd->cf.nports-1]; + p->size = (int)strtol(option, &e, 0); + if(e == nil || *e != '\0') + error(Ebadarg); +} + +static void +parseirq(Confdata* cd, char* option) +{ + char *e; + + cd->cf.intnum = strtoul(option, &e, 0); + if(e == nil || *e != '\0') + error(Ebadarg); +} + +static void +parsetype(Confdata* cd, char* option) +{ + cd->cf.type = option; +} + +static struct { + char *name; + void (*parse)(Confdata*, char*); +} options[] = { + "switch", parseswitch, + "spec", parsespec, + "port", parseport, + "size", parsesize, + "irq", parseirq, + "type", parsetype, +}; + +static void +legacytopctl(Cmdbuf *cb) +{ + char *opt; + int i, j; + Confdata cd; + + memset(&cd, 0, sizeof cd); + cd.on = -1; + for(i=0; inf; i+=2){ + if(i+2 > cb->nf) + error(Ebadarg); + opt = cb->f[i]; + for(j=0; jf[i+1]); + break; + } + if(j == nelem(options)) + error(Ebadarg); + } + if(cd.on < 0 || cd.spec == 0) + error(Ebadarg); + if(cd.on && cd.cf.type == nil) + error(Ebadarg); + sdconfig(cd.on, cd.spec, &cd.cf); +} diff -Nru 0/sys/src/nix/port/devsegment.c 4/sys/src/nix/port/devsegment.c --- 0/sys/src/nix/port/devsegment.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devsegment.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,765 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +enum +{ + Qtopdir, + Qsegdir, + Qctl, + Qdata, + Qfree, + + /* commands to kproc */ + Cnone=0, + Cread, + Cwrite, + Cstart, + Cdie, +}; + +#define TYPE(x) (int)( (c)->qid.path & 0x7 ) +#define SEG(x) ( ((c)->qid.path >> 3) & 0x3f ) +#define PATH(s, t) ( ((s)<<3) | (t) ) + +typedef struct Globalseg Globalseg; +typedef struct Freemsg Freemsg; + +struct Freemsg +{ + Freemsg *next; +}; + +struct Globalseg +{ + Ref; + Segment *s; + + char *name; + char *uid; + vlong length; + long perm; + + Freemsg *free; + + /* kproc to do reading and writing */ + QLock l; /* sync kproc access */ + Rendez cmdwait; /* where kproc waits */ + Rendez replywait; /* where requestor waits */ + Proc *kproc; + char *data; + long off; + int dlen; + int cmd; + char err[64]; +}; + +static Globalseg *globalseg[100]; +static Lock globalseglock; +Segment *heapseg; + + Segment* (*_globalsegattach)(Proc*, char*); +static Segment* globalsegattach(Proc*, char*); +static int cmddone(void*); +static void segmentkproc(void*); +static void docmd(Globalseg*, int); + +/* + * returns with globalseg incref'd + */ +static Globalseg* +getgseg(Chan *c) +{ + int x; + Globalseg *g; + + x = SEG(c); + lock(&globalseglock); + if(x >= nelem(globalseg)) + panic("getgseg"); + g = globalseg[x]; + if(g != nil) + incref(g); + unlock(&globalseglock); + if(g == nil) + error("global segment disappeared"); + return g; +} + +static void +putgseg(Globalseg *g) +{ + if(decref(g) > 0) + return; + if(g->s == heapseg) + heapseg = nil; + if(g->s != nil) + putseg(g->s); + if(g->kproc) + docmd(g, Cdie); + free(g->name); + free(g->uid); + free(g); +} + +static int +segmentgen(Chan *c, char*, Dirtab*, int, int s, Dir *dp) +{ + Qid q; + Globalseg *g; + ulong size; + + switch(TYPE(c)) { + case Qtopdir: + if(s == DEVDOTDOT){ + q.vers = 0; + q.path = PATH(0, Qtopdir); + q.type = QTDIR; + devdir(c, q, "#g", 0, eve, DMDIR|0777, dp); + break; + } + + if(s >= nelem(globalseg)) + return -1; + + lock(&globalseglock); + g = globalseg[s]; + if(g == nil){ + unlock(&globalseglock); + return 0; + } + q.vers = 0; + q.path = PATH(s, Qsegdir); + q.type = QTDIR; + devdir(c, q, g->name, 0, g->uid, DMDIR|0777, dp); + unlock(&globalseglock); + + break; + case Qsegdir: + if(s == DEVDOTDOT){ + q.vers = 0; + q.path = PATH(0, Qtopdir); + q.type = QTDIR; + devdir(c, q, "#g", 0, eve, DMDIR|0777, dp); + break; + } + /* fall through */ + case Qctl: + case Qdata: + case Qfree: + g = getgseg(c); + if(waserror()){ + putgseg(g); + nexterror(); + } + q.vers = 0; + q.type = QTFILE; + switch(s){ + case 0: + q.path = PATH(SEG(c), Qctl); + devdir(c, q, "ctl", 0, g->uid, g->perm, dp); + break; + case 1: + q.path = PATH(SEG(c), Qdata); + if(g->s != nil) + size = g->s->top - g->s->base; + else + size = 0; + devdir(c, q, "data", size, g->uid, g->perm, dp); + break; + case 2: + q.path = PATH(SEG(c), Qfree); + devdir(c, q, "free", 0, g->uid, g->perm&0444, dp); + break; + + default: + poperror(); + putgseg(g); + return -1; + } + poperror(); + putgseg(g); + break; + } + return 1; +} + +static void +segmentinit(void) +{ + _globalsegattach = globalsegattach; +} + +static Chan* +segmentattach(char *spec) +{ + return devattach('g', spec); +} + +static Walkqid* +segmentwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, 0, 0, segmentgen); +} + +static long +segmentstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, 0, 0, segmentgen); +} + +static int +cmddone(void *arg) +{ + Globalseg *g = arg; + + return g->cmd == Cnone; +} + +static Chan* +segmentopen(Chan *c, int omode) +{ + Globalseg *g; + + switch(TYPE(c)){ + case Qtopdir: + case Qsegdir: + if(omode != 0) + error(Eisdir); + break; + case Qctl: + case Qfree: + g = getgseg(c); + if(waserror()){ + putgseg(g); + nexterror(); + } + devpermcheck(g->uid, g->perm, omode); + c->aux = g; + poperror(); + c->flag |= COPEN; + break; + case Qdata: + g = getgseg(c); + if(waserror()){ + putgseg(g); + nexterror(); + } + devpermcheck(g->uid, g->perm, omode); + if(g->s == nil) + error("segment not yet allocated"); + if(g->kproc == nil){ + qlock(&g->l); + if(waserror()){ + qunlock(&g->l); + nexterror(); + } + if(g->kproc == nil){ + g->cmd = Cnone; + kproc(g->name, segmentkproc, g); + docmd(g, Cstart); + } + poperror(); + qunlock(&g->l); + } + c->aux = g; + poperror(); + c->flag |= COPEN; + break; + default: + panic("segmentopen"); + } + c->mode = openmode(omode); + c->offset = 0; + return c; +} + +static void +segmentclose(Chan *c) +{ + if(TYPE(c) == Qtopdir) + return; + if(c->flag & COPEN) + putgseg(c->aux); +} + +static void +segmentcreate(Chan *c, char *name, int omode, int perm) +{ + int x, xfree; + Globalseg *g; + char *ep; + + if(TYPE(c) != Qtopdir) + error(Eperm); + + if(isphysseg(name)) + error(Eexist); + + if((perm & DMDIR) == 0) + error("must create directory"); + + if(waserror()){ + unlock(&globalseglock); + nexterror(); + } + xfree = -1; + if(name[0] == '#' && name[1] >= '0' && name[1] <= '9'){ + /* hack for cnk: if #n, treat it as index n */ + xfree = strtoul(name+1, &ep, 0); + if(*ep) + xfree = -1; + else if(xfree < 0 || xfree >= nelem(globalseg)) + error("invalid global segment index"); + } + lock(&globalseglock); + if(xfree < 0){ + for(x = 0; x < nelem(globalseg); x++){ + g = globalseg[x]; + if(g == nil){ + if(xfree < 0) + xfree = x; + } else { + if(strcmp(g->name, name) == 0) + error(Eexist); + } + } + if(xfree < 0) + error("too many global segments"); + }else{ + g = globalseg[xfree]; + if(g != nil) + error(Eexist); + } + g = smalloc(sizeof(Globalseg)); + g->ref = 1; + kstrdup(&g->name, name); + kstrdup(&g->uid, up->user); + g->perm = 0660; + globalseg[xfree] = g; + unlock(&globalseglock); + poperror(); + + c->qid.path = PATH(xfree, Qsegdir); + c->qid.type = QTDIR; + c->qid.vers = 0; + c->mode = openmode(omode); + c->mode = OWRITE; + + DBG("segmentcreate(%s, %#o %#ux)\n", name, omode, perm); +} + +enum{PTRSIZE = 19}; /* "0x1234567812345678 " */ +static int +readptr(char *buf, long n, uintptr val) +{ + if(n < PTRSIZE) + return 0; + snprint(buf, sizeof buf, "%*#ullx", PTRSIZE-1, val); + buf[PTRSIZE-1] = ' '; + return PTRSIZE; +} + +static int +znotempty(void *x) +{ + Zseg *zs; + + zs = x; + return zs->end != 0; +} + +static long +segmentread(Chan *c, void *a, long n, vlong voff) +{ + Globalseg *g; + Zseg *zs; + uintptr va; + char *p, *s; + long tot; + char buf[64]; + + if(c->qid.type == QTDIR) + return devdirread(c, a, n, (Dirtab *)0, 0L, segmentgen); + + g = c->aux; + switch(TYPE(c)){ + case Qfree: + if(g->s == nil) + error("segment not yet allocated"); + if(n < PTRSIZE) + error("read buffer too small"); + zs = &g->s->zseg; + qlock(&g->s->lk); + if(waserror()){ + qunlock(&g->s->lk); + nexterror(); + } + while((va = zgetaddr(g->s)) == 0ULL){ + qunlock(&g->s->lk); + sleep(&zs->rr, znotempty, zs); + qlock(&g->s->lk); + } + p = a; + for(tot = 0; n-tot > PTRSIZE; tot += PTRSIZE){ + p += readptr(p, n, va); + if((va = zgetaddr(g->s)) == 0ULL) + break; + } + poperror(); + qunlock(&g->s->lk); + return tot; + case Qctl: + if(g->s == nil) + error("segment not yet allocated"); + if(g->s->type&SG_KZIO) + s = "kmsg"; + else if(g->s->type&SG_ZIO) + s = "umsg"; + else + s = "addr"; + snprint(buf, sizeof(buf), "%s %#p %#p\n", + s, g->s->base, (uintptr)(g->s->top-g->s->base)); + return readstr(voff, a, n, buf); + case Qdata: + if(voff < 0) + error(Enegoff); + if(voff + n > g->s->top - g->s->base){ + n = g->s->top - voff; + if(n <= 0) + break; + } + qlock(&g->l); + if(waserror()){ + qunlock(&g->l); + nexterror(); + } + + g->off = voff + g->s->base; + g->data = smalloc(n); + if(waserror()){ + free(g->data); + nexterror(); + } + g->dlen = n; + docmd(g, Cread); + memmove(a, g->data, g->dlen); + poperror(); + free(g->data); + + poperror(); + qunlock(&g->l); + return g->dlen; + default: + panic("segmentread"); + } + return 0; /* not reached */ +} + +/* + * BUG: we allocate virtual addresses but never give them + * back when the segment is destroyed. + * BUG: what if we overlap other segments attached by the user? + */ +static ulong +placeseg(ulong len) +{ + static Lock lck; + static ulong va = HEAPTOP; + ulong v; + + len += BIGPGSZ; /* so we fault upon overflows */ + lock(&lck); + len = BIGPGROUND(len); + va -= len; + v = va; + unlock(&lck); + + return v; +} + +static long +segmentwrite(Chan *c, void *a, long n, vlong voff) +{ + Cmdbuf *cb; + Globalseg *g; + uintptr va, len, top; + int i; + struct{ + char *name; + int type; + }segs[] = { + {"kmsg", SG_SHARED|SG_ZIO|SG_KZIO}, + {"umsg", SG_SHARED|SG_ZIO}, + {"addr", SG_SHARED}, + }; + + if(c->qid.type == QTDIR) + error(Eperm); + + switch(TYPE(c)){ + case Qfree: + error(Eperm); + break; + case Qctl: + g = c->aux; + cb = parsecmd(a, n); + for(i = 0; i < nelem(segs); i++) + if(strcmp(cb->f[0], segs[i].name) == 0) + break; + if(i < nelem(segs)){ + if(g->s != nil) + error("already has a virtual address"); + if(cb->nf < 3) + cmderror(cb, Ebadarg); + va = strtoul(cb->f[1], 0, 0); + len = strtoul(cb->f[2], 0, 0); + if(va == 0) + va = placeseg(len); + top = BIGPGROUND(va + len); + va = va&~(BIGPGSZ-1); + len = (top - va) / BIGPGSZ; + if(len == 0) + cmderror(cb, "empty segment"); + + g->s = newseg(segs[i].type, va, len); + if(i == 0) + newzmap(g->s); + else if(i == 1) + zgrow(g->s); + DBG("newseg %s base %#ullx len %#ullx\n", + cb->f[0], va, len*BIGPGSZ); + if(i == 0 || i == 1) + dumpzseg(g->s); + }else if(strcmp(cb->f[0], "heap") == 0){ + if(g == nil) + error("no globalseg"); + if(g->s == nil) + error("no segment"); + if(heapseg) + error("heap already set"); + else + heapseg = g->s; + }else + error(Ebadctl); + break; + case Qdata: + g = c->aux; + if(voff < 0) + error(Enegoff); + if(voff + n > g->s->top - g->s->base){ + n = g->s->top - voff; + if(n <= 0) + break; + } + qlock(&g->l); + if(waserror()){ + qunlock(&g->l); + nexterror(); + } + + g->off = voff + g->s->base; + g->data = smalloc(n); + if(waserror()){ + free(g->data); + nexterror(); + } + g->dlen = n; + memmove(g->data, a, g->dlen); + docmd(g, Cwrite); + poperror(); + free(g->data); + + poperror(); + qunlock(&g->l); + break; + default: + panic("segmentwrite"); + } + return n; +} + +static long +segmentwstat(Chan *c, uchar *dp, long n) +{ + Globalseg *g; + Dir *d; + + if(c->qid.type == QTDIR) + error(Eperm); + + g = getgseg(c); + if(waserror()){ + putgseg(g); + nexterror(); + } + + if(strcmp(g->uid, up->user)!=0 && !iseve()) + error(Eperm); + d = smalloc(sizeof(Dir)+n); + if(waserror()){ + free(d); + nexterror(); + } + n = convM2D(dp, n, &d[0], (char*)&d[1]); + if(!emptystr(d->uid) && strcmp(d->uid, g->uid) != 0) + kstrdup(&g->uid, d->uid); + if(d->mode != ~0UL) + g->perm = d->mode & 0777; + poperror(); + free(d); + + poperror(); + putgseg(g); + + return n; +} + +static void +segmentremove(Chan *c) +{ + Globalseg *g; + int x; + + if(TYPE(c) != Qsegdir) + error(Eperm); + lock(&globalseglock); + x = SEG(c); + g = globalseg[x]; + globalseg[x] = nil; + unlock(&globalseglock); + if(g != nil) + putgseg(g); +} + +/* + * called by segattach() + */ +static Segment* +globalsegattach(Proc *p, char *name) +{ + int x; + Globalseg *g; + Segment *s; + + g = nil; + if(waserror()){ + unlock(&globalseglock); + nexterror(); + } + lock(&globalseglock); + for(x = 0; x < nelem(globalseg); x++){ + g = globalseg[x]; + if(g != nil && strcmp(g->name, name) == 0) + break; + } + if(x == nelem(globalseg)){ + unlock(&globalseglock); + poperror(); + return nil; + } + devpermcheck(g->uid, g->perm, ORDWR); + s = g->s; + if(s == nil) + error("global segment not assigned a virtual address"); + if(isoverlap(p, s->base, s->top - s->base) != nil) + error("overlaps existing segment"); + incref(s); + unlock(&globalseglock); + poperror(); + return s; +} + +static void +docmd(Globalseg *g, int cmd) +{ + g->err[0] = 0; + g->cmd = cmd; + wakeup(&g->cmdwait); + while(waserror()) + {} /* no interrupts */ + sleep(&g->replywait, cmddone, g); + poperror(); + if(g->err[0]) + error(g->err); +} + +static int +cmdready(void *arg) +{ + Globalseg *g = arg; + + return g->cmd != Cnone; +} + +/* + * TO DO: better approach is to send segment with command, + * temporarily add it to segment array at SEG1, do the operation, then putseg. + * otherwise there are as many kprocs as segments. + */ +static void +segmentkproc(void *arg) +{ + Globalseg *g = arg; + int done; + int sno; + + for(sno = 0; sno < NSEG; sno++) + if(up->seg[sno] == nil && sno != ESEG) + break; + if(sno == NSEG) + panic("segmentkproc"); + g->kproc = up; + + incref(g->s); + up->seg[sno] = g->s; + + for(done = 0; !done;){ + sleep(&g->cmdwait, cmdready, g); + if(waserror()){ + strncpy(g->err, up->errstr, sizeof(g->err)); + } else { + switch(g->cmd){ + case Cstart: + break; + case Cdie: + done = 1; + break; + case Cread: + memmove(g->data, (char*)g->off, g->dlen); + break; + case Cwrite: + memmove((char*)g->off, g->data, g->dlen); + break; + } + poperror(); + } + g->cmd = Cnone; + wakeup(&g->replywait); + } +} + +Dev segmentdevtab = { + 'g', + "segment", + + devreset, + segmentinit, + devshutdown, + segmentattach, + segmentwalk, + segmentstat, + segmentopen, + segmentcreate, + segmentclose, + segmentread, + devbread, + segmentwrite, + devbwrite, + segmentremove, + segmentwstat, +}; + diff -Nru 0/sys/src/nix/port/devsrv.c 4/sys/src/nix/port/devsrv.c --- 0/sys/src/nix/port/devsrv.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devsrv.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,358 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + + +typedef struct Srv Srv; +struct Srv +{ + char *name; + char *owner; + ulong perm; + Chan *chan; + Srv *link; + ulong path; +}; + +static QLock srvlk; +static Srv *srv; +static int qidpath; + +static int +srvgen(Chan *c, char*, Dirtab*, int, int s, Dir *dp) +{ + Srv *sp; + Qid q; + + if(s == DEVDOTDOT){ + devdir(c, c->qid, "#s", 0, eve, 0555, dp); + return 1; + } + + qlock(&srvlk); + for(sp = srv; sp && s; sp = sp->link) + s--; + + if(sp == 0) { + qunlock(&srvlk); + return -1; + } + + mkqid(&q, sp->path, 0, QTFILE); + /* make sure name string continues to exist after we release lock */ + kstrcpy(up->genbuf, sp->name, sizeof up->genbuf); + devdir(c, q, up->genbuf, 0, sp->owner, sp->perm, dp); + qunlock(&srvlk); + return 1; +} + +static void +srvinit(void) +{ + qidpath = 1; +} + +static Chan* +srvattach(char *spec) +{ + return devattach('s', spec); +} + +static Walkqid* +srvwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, 0, 0, srvgen); +} + +static Srv* +srvlookup(char *name, ulong qidpath) +{ + Srv *sp; + for(sp = srv; sp; sp = sp->link) + if(sp->path == qidpath || (name && strcmp(sp->name, name) == 0)) + return sp; + return nil; +} + +static long +srvstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, 0, 0, srvgen); +} + +char* +srvname(Chan *c) +{ + Srv *sp; + char *s; + + for(sp = srv; sp; sp = sp->link) + if(sp->chan == c){ + s = smalloc(3+strlen(sp->name)+1); + sprint(s, "#s/%s", sp->name); + return s; + } + return nil; +} + +static Chan* +srvopen(Chan *c, int omode) +{ + Srv *sp; + + if(c->qid.type == QTDIR){ + if(omode & ORCLOSE) + error(Eperm); + if(omode != OREAD) + error(Eisdir); + c->mode = omode; + c->flag |= COPEN; + c->offset = 0; + return c; + } + qlock(&srvlk); + if(waserror()){ + qunlock(&srvlk); + nexterror(); + } + + sp = srvlookup(nil, c->qid.path); + if(sp == 0 || sp->chan == 0) + error(Eshutdown); + + if(omode&OTRUNC) + error("srv file already exists"); + if(openmode(omode)!=sp->chan->mode && sp->chan->mode!=ORDWR) + error(Eperm); + devpermcheck(sp->owner, sp->perm, omode); + + cclose(c); + incref(sp->chan); + qunlock(&srvlk); + poperror(); + return sp->chan; +} + +static void +srvcreate(Chan *c, char *name, int omode, int perm) +{ + Srv *sp; + char *sname; + + if(openmode(omode) != OWRITE) + error(Eperm); + + if(omode & OCEXEC) /* can't happen */ + panic("someone broke namec"); + + sp = smalloc(sizeof *sp); + sname = smalloc(strlen(name)+1); + + qlock(&srvlk); + if(waserror()){ + free(sname); + free(sp); + qunlock(&srvlk); + nexterror(); + } + if(sp == nil || sname == nil) + error(Enomem); + if(srvlookup(name, -1)) + error(Eexist); + + sp->path = qidpath++; + sp->link = srv; + strcpy(sname, name); + sp->name = sname; + c->qid.type = QTFILE; + c->qid.path = sp->path; + srv = sp; + qunlock(&srvlk); + poperror(); + + kstrdup(&sp->owner, up->user); + sp->perm = perm&0777; + + c->flag |= COPEN; + c->mode = OWRITE; +} + +static void +srvremove(Chan *c) +{ + Srv *sp, **l; + + if(c->qid.type == QTDIR) + error(Eperm); + + qlock(&srvlk); + if(waserror()){ + qunlock(&srvlk); + nexterror(); + } + l = &srv; + for(sp = *l; sp; sp = sp->link) { + if(sp->path == c->qid.path) + break; + + l = &sp->link; + } + if(sp == 0) + error(Enonexist); + + /* + * Only eve can remove system services. + * No one can remove #s/boot. + */ + if(strcmp(sp->owner, eve) == 0 && !iseve()) + error(Eperm); + if(strcmp(sp->name, "boot") == 0) + error(Eperm); + + /* + * No removing personal services. + */ + if((sp->perm&7) != 7 && strcmp(sp->owner, up->user) && !iseve()) + error(Eperm); + + *l = sp->link; + qunlock(&srvlk); + poperror(); + + if(sp->chan) + cclose(sp->chan); + free(sp->owner); + free(sp->name); + free(sp); +} + +static long +srvwstat(Chan *c, uchar *dp, long n) +{ + Dir d; + Srv *sp; + char *strs; + + if(c->qid.type & QTDIR) + error(Eperm); + + strs = nil; + qlock(&srvlk); + if(waserror()){ + qunlock(&srvlk); + free(strs); + nexterror(); + } + + sp = srvlookup(nil, c->qid.path); + if(sp == 0) + error(Enonexist); + + if(strcmp(sp->owner, up->user) != 0 && !iseve()) + error(Eperm); + + strs = smalloc(n); + n = convM2D(dp, n, &d, strs); + if(n == 0) + error(Eshortstat); + if(d.mode != ~0UL) + sp->perm = d.mode & 0777; + if(d.uid && *d.uid) + kstrdup(&sp->owner, d.uid); + if(d.name && *d.name && strcmp(sp->name, d.name) != 0) { + if(strchr(d.name, '/') != nil) + error(Ebadchar); + kstrdup(&sp->name, d.name); + } + + qunlock(&srvlk); + free(strs); + poperror(); + return n; +} + +static void +srvclose(Chan *c) +{ + /* + * in theory we need to override any changes in removability + * since open, but since all that's checked is the owner, + * which is immutable, all is well. + */ + if(c->flag & CRCLOSE){ + if(waserror()) + return; + srvremove(c); + poperror(); + } +} + +static long +srvread(Chan *c, void *va, long n, vlong) +{ + isdir(c); + return devdirread(c, va, n, 0, 0, srvgen); +} + +static long +srvwrite(Chan *c, void *va, long n, vlong) +{ + Srv *sp; + Chan *c1; + int fd; + char buf[32]; + + if(n >= sizeof buf) + error(Egreg); + memmove(buf, va, n); /* so we can NUL-terminate */ + buf[n] = 0; + fd = strtoul(buf, 0, 0); + + c1 = fdtochan(fd, -1, 0, 1); /* error check and inc ref */ + + qlock(&srvlk); + if(waserror()) { + qunlock(&srvlk); + cclose(c1); + nexterror(); + } + if(c1->flag & (CCEXEC|CRCLOSE)) + error("posted fd has remove-on-close or close-on-exec"); + if(c1->qid.type & QTAUTH) + error("cannot post auth file in srv"); + sp = srvlookup(nil, c->qid.path); + if(sp == 0) + error(Enonexist); + + if(sp->chan) + error(Ebadusefd); + + sp->chan = c1; + qunlock(&srvlk); + poperror(); + return n; +} + +Dev srvdevtab = { + 's', + "srv", + + devreset, + srvinit, + devshutdown, + srvattach, + srvwalk, + srvstat, + srvopen, + srvcreate, + srvclose, + srvread, + devbread, + srvwrite, + devbwrite, + srvremove, + srvwstat, +}; diff -Nru 0/sys/src/nix/port/devssl.c 4/sys/src/nix/port/devssl.c --- 0/sys/src/nix/port/devssl.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devssl.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1506 @@ +/* + * devssl - secure sockets layer + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include + +#define NOSPOOKS 1 + +typedef struct OneWay OneWay; +struct OneWay +{ + QLock q; + QLock ctlq; + + void *state; /* encryption state */ + int slen; /* hash data length */ + uchar *secret; /* secret */ + ulong mid; /* message id */ +}; + +enum +{ + /* connection states */ + Sincomplete= 0, + Sclear= 1, + Sencrypting= 2, + Sdigesting= 4, + Sdigenc= Sencrypting|Sdigesting, + + /* encryption algorithms */ + Noencryption= 0, + DESCBC= 1, + DESECB= 2, + RC4= 3 +}; + +typedef struct Dstate Dstate; +struct Dstate +{ + Chan *c; /* io channel */ + uchar state; /* state of connection */ + int ref; /* serialized by dslock for atomic destroy */ + + uchar encryptalg; /* encryption algorithm */ + ushort blocklen; /* blocking length */ + + ushort diglen; /* length of digest */ + DigestState *(*hf)(uchar*, ulong, uchar*, DigestState*); /* hash func */ + + /* for SSL format */ + int max; /* maximum unpadded data per msg */ + int maxpad; /* maximum padded data per msg */ + + /* input side */ + OneWay in; + Block *processed; + Block *unprocessed; + + /* output side */ + OneWay out; + + /* protections */ + char *user; + int perm; +}; + +enum +{ + Maxdmsg= 1<<16, + Maxdstate= 128, /* must be a power of 2 */ +}; + +Lock dslock; +int dshiwat; +char *dsname[Maxdstate]; +Dstate *dstate[Maxdstate]; +char *encalgs; +char *hashalgs; + +enum{ + Qtopdir = 1, /* top level directory */ + Qprotodir, + Qclonus, + Qconvdir, /* directory for a conversation */ + Qdata, + Qctl, + Qsecretin, + Qsecretout, + Qencalgs, + Qhashalgs, +}; + +#define TYPE(x) ((x).path & 0xf) +#define CONV(x) (((x).path >> 5)&(Maxdstate-1)) +#define QID(c, y) (((c)<<5) | (y)) + +static void ensure(Dstate*, Block**, int); +static void consume(Block**, uchar*, int); +static void setsecret(OneWay*, uchar*, int); +static Block* encryptb(Dstate*, Block*, int); +static Block* decryptb(Dstate*, Block*); +static Block* digestb(Dstate*, Block*, int); +static void checkdigestb(Dstate*, Block*); +static Chan* buftochan(char*); +static void sslhangup(Dstate*); +static Dstate* dsclone(Chan *c); +static void dsnew(Chan *c, Dstate **); +static long sslput(Dstate *s, Block * volatile b); + +char *sslnames[] = { +[Qclonus] "clone", +[Qdata] "data", +[Qctl] "ctl", +[Qsecretin] "secretin", +[Qsecretout] "secretout", +[Qencalgs] "encalgs", +[Qhashalgs] "hashalgs", +}; + +static int +sslgen(Chan *c, char*, Dirtab *d, int nd, int s, Dir *dp) +{ + Qid q; + Dstate *ds; + char name[16], *p, *nm; + int ft; + + USED(nd); + USED(d); + + q.type = QTFILE; + q.vers = 0; + + ft = TYPE(c->qid); + switch(ft) { + case Qtopdir: + if(s == DEVDOTDOT){ + q.path = QID(0, Qtopdir); + q.type = QTDIR; + devdir(c, q, "#D", 0, eve, 0555, dp); + return 1; + } + if(s > 0) + return -1; + q.path = QID(0, Qprotodir); + q.type = QTDIR; + devdir(c, q, "ssl", 0, eve, 0555, dp); + return 1; + case Qprotodir: + if(s == DEVDOTDOT){ + q.path = QID(0, Qtopdir); + q.type = QTDIR; + devdir(c, q, ".", 0, eve, 0555, dp); + return 1; + } + if(s < dshiwat) { + q.path = QID(s, Qconvdir); + q.type = QTDIR; + ds = dstate[s]; + if(ds != 0) + nm = ds->user; + else + nm = eve; + if(dsname[s] == nil){ + sprint(name, "%d", s); + kstrdup(&dsname[s], name); + } + devdir(c, q, dsname[s], 0, nm, 0555, dp); + return 1; + } + if(s > dshiwat) + return -1; + q.path = QID(0, Qclonus); + devdir(c, q, "clone", 0, eve, 0555, dp); + return 1; + case Qconvdir: + if(s == DEVDOTDOT){ + q.path = QID(0, Qprotodir); + q.type = QTDIR; + devdir(c, q, "ssl", 0, eve, 0555, dp); + return 1; + } + ds = dstate[CONV(c->qid)]; + if(ds != 0) + nm = ds->user; + else + nm = eve; + switch(s) { + default: + return -1; + case 0: + q.path = QID(CONV(c->qid), Qctl); + p = "ctl"; + break; + case 1: + q.path = QID(CONV(c->qid), Qdata); + p = "data"; + break; + case 2: + q.path = QID(CONV(c->qid), Qsecretin); + p = "secretin"; + break; + case 3: + q.path = QID(CONV(c->qid), Qsecretout); + p = "secretout"; + break; + case 4: + q.path = QID(CONV(c->qid), Qencalgs); + p = "encalgs"; + break; + case 5: + q.path = QID(CONV(c->qid), Qhashalgs); + p = "hashalgs"; + break; + } + devdir(c, q, p, 0, nm, 0660, dp); + return 1; + case Qclonus: + devdir(c, c->qid, sslnames[TYPE(c->qid)], 0, eve, 0555, dp); + return 1; + default: + ds = dstate[CONV(c->qid)]; + if(ds != 0) + nm = ds->user; + else + nm = eve; + devdir(c, c->qid, sslnames[TYPE(c->qid)], 0, nm, 0660, dp); + return 1; + } +} + +static Chan* +sslattach(char *spec) +{ + Chan *c; + + c = devattach('D', spec); + c->qid.path = QID(0, Qtopdir); + c->qid.vers = 0; + c->qid.type = QTDIR; + return c; +} + +static Walkqid* +sslwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, nil, 0, sslgen); +} + +static long +sslstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, nil, 0, sslgen); +} + +static Chan* +sslopen(Chan *c, int omode) +{ + Dstate *s, **pp; + int perm; + int ft; + + perm = 0; + omode &= 3; + switch(omode) { + case OREAD: + perm = 4; + break; + case OWRITE: + perm = 2; + break; + case ORDWR: + perm = 6; + break; + } + + ft = TYPE(c->qid); + switch(ft) { + default: + panic("sslopen"); + case Qtopdir: + case Qprotodir: + case Qconvdir: + if(omode != OREAD) + error(Eperm); + break; + case Qclonus: + s = dsclone(c); + if(s == 0) + error(Enodev); + break; + case Qctl: + case Qdata: + case Qsecretin: + case Qsecretout: + if(waserror()) { + unlock(&dslock); + nexterror(); + } + lock(&dslock); + pp = &dstate[CONV(c->qid)]; + s = *pp; + if(s == 0) + dsnew(c, pp); + else { + if((perm & (s->perm>>6)) != perm + && (strcmp(up->user, s->user) != 0 + || (perm & s->perm) != perm)) + error(Eperm); + + s->ref++; + } + unlock(&dslock); + poperror(); + break; + case Qencalgs: + case Qhashalgs: + if(omode != OREAD) + error(Eperm); + break; + } + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + return c; +} + +static long +sslwstat(Chan *c, uchar *db, long n) +{ + Dir *dir; + Dstate *s; + int l; + + s = dstate[CONV(c->qid)]; + if(s == 0) + error(Ebadusefd); + if(strcmp(s->user, up->user) != 0) + error(Eperm); + + dir = smalloc(sizeof(Dir)+n); + l = convM2D(db, n, &dir[0], (char*)&dir[1]); + if(l == 0){ + free(dir); + error(Eshortstat); + } + + if(!emptystr(dir->uid)) + kstrdup(&s->user, dir->uid); + if(dir->mode != ~0UL) + s->perm = dir->mode; + + free(dir); + return l; +} + +static void +sslclose(Chan *c) +{ + Dstate *s; + int ft; + + ft = TYPE(c->qid); + switch(ft) { + case Qctl: + case Qdata: + case Qsecretin: + case Qsecretout: + if((c->flag & COPEN) == 0) + break; + + s = dstate[CONV(c->qid)]; + if(s == 0) + break; + + lock(&dslock); + if(--s->ref > 0) { + unlock(&dslock); + break; + } + dstate[CONV(c->qid)] = 0; + unlock(&dslock); + + if(s->user != nil) + free(s->user); + sslhangup(s); + if(s->c) + cclose(s->c); + if(s->in.secret) + free(s->in.secret); + if(s->out.secret) + free(s->out.secret); + if(s->in.state) + free(s->in.state); + if(s->out.state) + free(s->out.state); + free(s); + + } +} + +/* + * make sure we have at least 'n' bytes in list 'l' + */ +static void +ensure(Dstate *s, Block **l, int n) +{ + int sofar, i; + Block *b, *bl; + + sofar = 0; + for(b = *l; b; b = b->next){ + sofar += BLEN(b); + if(sofar >= n) + return; + l = &b->next; + } + + while(sofar < n){ + bl = s->c->dev->bread(s->c, Maxdmsg, 0); + if(bl == 0) + nexterror(); + *l = bl; + i = 0; + for(b = bl; b; b = b->next){ + i += BLEN(b); + l = &b->next; + } + if(i == 0) + error(Ehungup); + sofar += i; + } +} + +/* + * copy 'n' bytes from 'l' into 'p' and free + * the bytes in 'l' + */ +static void +consume(Block **l, uchar *p, int n) +{ + Block *b; + int i; + + for(; *l && n > 0; n -= i){ + b = *l; + i = BLEN(b); + if(i > n) + i = n; + memmove(p, b->rp, i); + b->rp += i; + p += i; + if(BLEN(b) < 0) + panic("consume"); + if(BLEN(b)) + break; + *l = b->next; + freeb(b); + } +} + +/* + * give back n bytes +static void +regurgitate(Dstate *s, uchar *p, int n) +{ + Block *b; + + if(n <= 0) + return; + b = s->unprocessed; + if(s->unprocessed == nil || b->rp - b->base < n) { + b = allocb(n); + memmove(b->wp, p, n); + b->wp += n; + b->next = s->unprocessed; + s->unprocessed = b; + } else { + b->rp -= n; + memmove(b->rp, p, n); + } +} + */ + +/* + * remove at most n bytes from the queue, if discard is set + * dump the remainder + */ +static Block* +qtake(Block **l, int n, int discard) +{ + Block *nb, *b, *first; + int i; + + first = *l; + for(b = first; b; b = b->next){ + i = BLEN(b); + if(i == n){ + if(discard){ + freeblist(b->next); + *l = 0; + } else + *l = b->next; + b->next = 0; + return first; + } else if(i > n){ + i -= n; + if(discard){ + freeblist(b->next); + b->wp -= i; + *l = 0; + } else { + nb = allocb(i); + memmove(nb->wp, b->rp+n, i); + nb->wp += i; + b->wp -= i; + nb->next = b->next; + *l = nb; + } + b->next = 0; + if(BLEN(b) < 0) + panic("qtake"); + return first; + } else + n -= i; + if(BLEN(b) < 0) + panic("qtake"); + } + *l = 0; + return first; +} + +/* + * We can't let Eintr's lose data since the program + * doing the read may be able to handle it. The only + * places Eintr is possible is during the read's in consume. + * Therefore, we make sure we can always put back the bytes + * consumed before the last ensure. + */ +static Block* +sslbread(Chan *c, long n, vlong) +{ + Dstate * volatile s; + Block *b; + uchar consumed[3], *p; + int toconsume; + int len, pad; + + s = dstate[CONV(c->qid)]; + if(s == 0) + panic("sslbread"); + if(s->state == Sincomplete) + error(Ebadusefd); + + qlock(&s->in.q); + if(waserror()){ + qunlock(&s->in.q); + nexterror(); + } + + if(s->processed == 0){ + /* + * Read in the whole message. Until we've got it all, + * it stays on s->unprocessed, so that if we get Eintr, + * we'll pick up where we left off. + */ + ensure(s, &s->unprocessed, 3); + s->unprocessed = pullupblock(s->unprocessed, 2); + p = s->unprocessed->rp; + if(p[0] & 0x80){ + len = ((p[0] & 0x7f)<<8) | p[1]; + ensure(s, &s->unprocessed, len); + pad = 0; + toconsume = 2; + } else { + s->unprocessed = pullupblock(s->unprocessed, 3); + len = ((p[0] & 0x3f)<<8) | p[1]; + pad = p[2]; + if(pad > len){ + print("pad %d buf len %d\n", pad, len); + error("bad pad in ssl message"); + } + toconsume = 3; + } + ensure(s, &s->unprocessed, toconsume+len); + + /* skip header */ + consume(&s->unprocessed, consumed, toconsume); + + /* grab the next message and decode/decrypt it */ + b = qtake(&s->unprocessed, len, 0); + + if(blocklen(b) != len) + print("devssl: sslbread got wrong count %d != %d", blocklen(b), len); + + if(waserror()){ + qunlock(&s->in.ctlq); + if(b != nil) + freeb(b); + nexterror(); + } + qlock(&s->in.ctlq); + switch(s->state){ + case Sencrypting: + if(b == nil) + error("ssl message too short (encrypting)"); + b = decryptb(s, b); + break; + case Sdigesting: + b = pullupblock(b, s->diglen); + if(b == nil) + error("ssl message too short (digesting)"); + checkdigestb(s, b); + pullblock(&b, s->diglen); + len -= s->diglen; + break; + case Sdigenc: + b = decryptb(s, b); + b = pullupblock(b, s->diglen); + if(b == nil) + error("ssl message too short (dig+enc)"); + checkdigestb(s, b); + pullblock(&b, s->diglen); + len -= s->diglen; + break; + } + + /* remove pad */ + if(pad) + s->processed = qtake(&b, len - pad, 1); + else + s->processed = b; + b = nil; + s->in.mid++; + qunlock(&s->in.ctlq); + poperror(); + } + + /* return at most what was asked for */ + b = qtake(&s->processed, n, 0); + + qunlock(&s->in.q); + poperror(); + + return b; +} + +static long +sslread(Chan *c, void *a, long n, vlong off) +{ + Block * volatile b; + Block *nb; + uchar *va; + int i; + char buf[128]; + long offset; + int ft; + + if(c->qid.type & QTDIR) + return devdirread(c, a, n, 0, 0, sslgen); + + ft = TYPE(c->qid); + offset = off; + switch(ft) { + default: + error(Ebadusefd); + case Qctl: + ft = CONV(c->qid); + sprint(buf, "%d", ft); + return readstr(offset, a, n, buf); + case Qdata: + b = sslbread(c, n, offset); + break; + case Qencalgs: + return readstr(offset, a, n, encalgs); + break; + case Qhashalgs: + return readstr(offset, a, n, hashalgs); + break; + } + + if(waserror()){ + freeblist(b); + nexterror(); + } + + n = 0; + va = a; + for(nb = b; nb; nb = nb->next){ + i = BLEN(nb); + memmove(va+n, nb->rp, i); + n += i; + } + + freeblist(b); + poperror(); + + return n; +} + +/* + * this algorithm doesn't have to be great since we're just + * trying to obscure the block fill + */ +static void +randfill(uchar *buf, int len) +{ + while(len-- > 0) + *buf++ = nrand(256); +} + +static long +sslbwrite(Chan *c, Block *b, vlong) +{ + Dstate * volatile s; + long rv; + + s = dstate[CONV(c->qid)]; + if(s == nil) + panic("sslbwrite"); + + if(s->state == Sincomplete){ + freeb(b); + error(Ebadusefd); + } + + /* lock so split writes won't interleave */ + if(waserror()){ + qunlock(&s->out.q); + nexterror(); + } + qlock(&s->out.q); + + rv = sslput(s, b); + + poperror(); + qunlock(&s->out.q); + + return rv; +} + +/* + * use SSL record format, add in count, digest and/or encrypt. + * the write is interruptable. if it is interrupted, we'll + * get out of sync with the far side. not much we can do about + * it since we don't know if any bytes have been written. + */ +static long +sslput(Dstate *s, Block * volatile b) +{ + Block *nb; + int h, n, l, pad, rv; + uchar *p; + int offset; + + if(waserror()){ + if(b != nil) + freeb(b); + nexterror(); + } + + rv = 0; + while(b != nil){ + l = n = BLEN(b); + h = s->diglen + 2; + + /* trim to maximum block size */ + pad = 0; + if(l > s->max){ + l = s->max; + } else if(s->blocklen != 1){ + pad = (l + s->diglen)%s->blocklen; + if(pad){ + if(l > s->maxpad){ + pad = 0; + l = s->maxpad; + } else { + pad = s->blocklen - pad; + h++; + } + } + } + + rv += l; + if(l != n){ + nb = allocb(l + h + pad); + memmove(nb->wp + h, b->rp, l); + nb->wp += l + h; + b->rp += l; + } else { + /* add header space */ + nb = padblock(b, h); + b = 0; + } + l += s->diglen; + + /* SSL style count */ + if(pad){ + nb = padblock(nb, -pad); + randfill(nb->wp, pad); + nb->wp += pad; + l += pad; + + p = nb->rp; + p[0] = (l>>8); + p[1] = l; + p[2] = pad; + offset = 3; + } else { + p = nb->rp; + p[0] = (l>>8) | 0x80; + p[1] = l; + offset = 2; + } + + switch(s->state){ + case Sencrypting: + nb = encryptb(s, nb, offset); + break; + case Sdigesting: + nb = digestb(s, nb, offset); + break; + case Sdigenc: + nb = digestb(s, nb, offset); + nb = encryptb(s, nb, offset); + break; + } + + s->out.mid++; + + l = BLEN(nb); + s->c->dev->bwrite(s->c, nb, s->c->offset); + s->c->offset += l; + } + + poperror(); + return rv; +} + +static void +setsecret(OneWay *w, uchar *secret, int n) +{ + if(w->secret) + free(w->secret); + + w->secret = smalloc(n); + memmove(w->secret, secret, n); + w->slen = n; +} + +static void +initDESkey(OneWay *w) +{ + if(w->state){ + free(w->state); + w->state = 0; + } + + w->state = smalloc(sizeof(DESstate)); + if(w->slen >= 16) + setupDESstate(w->state, w->secret, w->secret+8); + else if(w->slen >= 8) + setupDESstate(w->state, w->secret, 0); + else + error("secret too short"); +} + +/* + * 40 bit DES is the same as 56 bit DES. However, + * 16 bits of the key are masked to zero. + */ +static void +initDESkey_40(OneWay *w) +{ + uchar key[8]; + + if(w->state){ + free(w->state); + w->state = 0; + } + + if(w->slen >= 8){ + memmove(key, w->secret, 8); + key[0] &= 0x0f; + key[2] &= 0x0f; + key[4] &= 0x0f; + key[6] &= 0x0f; + } + + w->state = smalloc(sizeof(DESstate)); + if(w->slen >= 16) + setupDESstate(w->state, key, w->secret+8); + else if(w->slen >= 8) + setupDESstate(w->state, key, 0); + else + error("secret too short"); +} + +static void +initRC4key(OneWay *w) +{ + if(w->state){ + free(w->state); + w->state = 0; + } + + w->state = smalloc(sizeof(RC4state)); + setupRC4state(w->state, w->secret, w->slen); +} + +/* + * 40 bit RC4 is the same as n-bit RC4. However, + * we ignore all but the first 40 bits of the key. + */ +static void +initRC4key_40(OneWay *w) +{ + if(w->state){ + free(w->state); + w->state = 0; + } + + if(w->slen > 5) + w->slen = 5; + + w->state = smalloc(sizeof(RC4state)); + setupRC4state(w->state, w->secret, w->slen); +} + +/* + * 128 bit RC4 is the same as n-bit RC4. However, + * we ignore all but the first 128 bits of the key. + */ +static void +initRC4key_128(OneWay *w) +{ + if(w->state){ + free(w->state); + w->state = 0; + } + + if(w->slen > 16) + w->slen = 16; + + w->state = smalloc(sizeof(RC4state)); + setupRC4state(w->state, w->secret, w->slen); +} + + +typedef struct Hashalg Hashalg; +struct Hashalg +{ + char *name; + int diglen; + DigestState *(*hf)(uchar*, ulong, uchar*, DigestState*); +}; + +Hashalg hashtab[] = +{ + { "md4", MD4dlen, md4, }, + { "md5", MD5dlen, md5, }, + { "sha1", SHA1dlen, sha1, }, + { "sha", SHA1dlen, sha1, }, + { 0 } +}; + +static int +parsehashalg(char *p, Dstate *s) +{ + Hashalg *ha; + + for(ha = hashtab; ha->name; ha++){ + if(strcmp(p, ha->name) == 0){ + s->hf = ha->hf; + s->diglen = ha->diglen; + s->state &= ~Sclear; + s->state |= Sdigesting; + return 0; + } + } + return -1; +} + +typedef struct Encalg Encalg; +struct Encalg +{ + char *name; + int blocklen; + int alg; + void (*keyinit)(OneWay*); +}; + +#ifdef NOSPOOKS +Encalg encrypttab[] = +{ + { "descbc", 8, DESCBC, initDESkey, }, /* DEPRECATED -- use des_56_cbc */ + { "desecb", 8, DESECB, initDESkey, }, /* DEPRECATED -- use des_56_ecb */ + { "des_56_cbc", 8, DESCBC, initDESkey, }, + { "des_56_ecb", 8, DESECB, initDESkey, }, + { "des_40_cbc", 8, DESCBC, initDESkey_40, }, + { "des_40_ecb", 8, DESECB, initDESkey_40, }, + { "rc4", 1, RC4, initRC4key_40, }, /* DEPRECATED -- use rc4_X */ + { "rc4_256", 1, RC4, initRC4key, }, + { "rc4_128", 1, RC4, initRC4key_128, }, + { "rc4_40", 1, RC4, initRC4key_40, }, + { 0 } +}; +#else +Encalg encrypttab[] = +{ + { "des_40_cbc", 8, DESCBC, initDESkey_40, }, + { "des_40_ecb", 8, DESECB, initDESkey_40, }, + { "rc4", 1, RC4, initRC4key_40, }, /* DEPRECATED -- use rc4_X */ + { "rc4_40", 1, RC4, initRC4key_40, }, + { 0 } +}; +#endif NOSPOOKS + +static int +parseencryptalg(char *p, Dstate *s) +{ + Encalg *ea; + + for(ea = encrypttab; ea->name; ea++){ + if(strcmp(p, ea->name) == 0){ + s->encryptalg = ea->alg; + s->blocklen = ea->blocklen; + (*ea->keyinit)(&s->in); + (*ea->keyinit)(&s->out); + s->state &= ~Sclear; + s->state |= Sencrypting; + return 0; + } + } + return -1; +} + +static long +sslwrite(Chan *c, void *a, long n, vlong) +{ + Dstate * volatile s; + Block * volatile b; + int l, t; + char *p, *np, *e, buf[128]; + uchar *x; + + s = dstate[CONV(c->qid)]; + if(s == 0) + panic("sslwrite"); + + t = TYPE(c->qid); + if(t == Qdata){ + if(s->state == Sincomplete) + error(Ebadusefd); + + /* lock should a write gets split over multiple records */ + if(waserror()){ + qunlock(&s->out.q); + nexterror(); + } + qlock(&s->out.q); + + p = a; + e = p + n; + do { + l = e - p; + if(l > s->max) + l = s->max; + + b = allocb(l); + if(waserror()){ + freeb(b); + nexterror(); + } + memmove(b->wp, p, l); + poperror(); + b->wp += l; + + sslput(s, b); + + p += l; + } while(p < e); + + poperror(); + qunlock(&s->out.q); + return n; + } + + /* mutex with operations using what we're about to change */ + if(waserror()){ + qunlock(&s->in.ctlq); + qunlock(&s->out.q); + nexterror(); + } + qlock(&s->in.ctlq); + qlock(&s->out.q); + + switch(t){ + default: + panic("sslwrite"); + case Qsecretin: + setsecret(&s->in, a, n); + goto out; + case Qsecretout: + setsecret(&s->out, a, n); + goto out; + case Qctl: + break; + } + + if(n >= sizeof(buf)) + error("arg too long"); + strncpy(buf, a, n); + buf[n] = 0; + p = strchr(buf, '\n'); + if(p) + *p = 0; + p = strchr(buf, ' '); + if(p) + *p++ = 0; + + if(strcmp(buf, "fd") == 0){ + s->c = buftochan(p); + + /* default is clear (msg delimiters only) */ + s->state = Sclear; + s->blocklen = 1; + s->diglen = 0; + s->maxpad = s->max = (1<<15) - s->diglen - 1; + s->in.mid = 0; + s->out.mid = 0; + } else if(strcmp(buf, "alg") == 0 && p != 0){ + s->blocklen = 1; + s->diglen = 0; + + if(s->c == 0) + error("must set fd before algorithm"); + + s->state = Sclear; + s->maxpad = s->max = (1<<15) - s->diglen - 1; + if(strcmp(p, "clear") == 0){ + goto out; + } + + if(s->in.secret && s->out.secret == 0) + setsecret(&s->out, s->in.secret, s->in.slen); + if(s->out.secret && s->in.secret == 0) + setsecret(&s->in, s->out.secret, s->out.slen); + if(s->in.secret == 0 || s->out.secret == 0) + error("algorithm but no secret"); + + s->hf = 0; + s->encryptalg = Noencryption; + s->blocklen = 1; + + for(;;){ + np = strchr(p, ' '); + if(np) + *np++ = 0; + + if(parsehashalg(p, s) < 0) + if(parseencryptalg(p, s) < 0) + error("bad algorithm"); + + if(np == 0) + break; + p = np; + } + + if(s->hf == 0 && s->encryptalg == Noencryption) + error("bad algorithm"); + + if(s->blocklen != 1){ + s->max = (1<<15) - s->diglen - 1; + s->max -= s->max % s->blocklen; + s->maxpad = (1<<14) - s->diglen - 1; + s->maxpad -= s->maxpad % s->blocklen; + } else + s->maxpad = s->max = (1<<15) - s->diglen - 1; + } else if(strcmp(buf, "secretin") == 0 && p != 0) { + l = (strlen(p)*3)/2; + x = smalloc(l); + t = dec64(x, l, p, strlen(p)); + setsecret(&s->in, x, t); + free(x); + } else if(strcmp(buf, "secretout") == 0 && p != 0) { + l = (strlen(p)*3)/2 + 1; + x = smalloc(l); + t = dec64(x, l, p, strlen(p)); + setsecret(&s->out, x, t); + free(x); + } else + error(Ebadarg); + +out: + qunlock(&s->in.ctlq); + qunlock(&s->out.q); + poperror(); + return n; +} + +static void +sslinit(void) +{ + struct Encalg *e; + struct Hashalg *h; + int n; + char *cp; + + n = 1; + for(e = encrypttab; e->name != nil; e++) + n += strlen(e->name) + 1; + cp = encalgs = smalloc(n); + for(e = encrypttab;;){ + strcpy(cp, e->name); + cp += strlen(e->name); + e++; + if(e->name == nil) + break; + *cp++ = ' '; + } + *cp = 0; + + n = 1; + for(h = hashtab; h->name != nil; h++) + n += strlen(h->name) + 1; + cp = hashalgs = smalloc(n); + for(h = hashtab;;){ + strcpy(cp, h->name); + cp += strlen(h->name); + h++; + if(h->name == nil) + break; + *cp++ = ' '; + } + *cp = 0; +} + +Dev ssldevtab = { + 'D', + "ssl", + + devreset, + sslinit, + devshutdown, + sslattach, + sslwalk, + sslstat, + sslopen, + devcreate, + sslclose, + sslread, + sslbread, + sslwrite, + sslbwrite, + devremove, + sslwstat, +}; + +static Block* +encryptb(Dstate *s, Block *b, int offset) +{ + uchar *p, *ep, *p2, *ip, *eip; + DESstate *ds; + + switch(s->encryptalg){ + case DESECB: + ds = s->out.state; + ep = b->rp + BLEN(b); + for(p = b->rp + offset; p < ep; p += 8) + block_cipher(ds->expanded, p, 0); + break; + case DESCBC: + ds = s->out.state; + ep = b->rp + BLEN(b); + for(p = b->rp + offset; p < ep; p += 8){ + p2 = p; + ip = ds->ivec; + for(eip = ip+8; ip < eip; ) + *p2++ ^= *ip++; + block_cipher(ds->expanded, p, 0); + memmove(ds->ivec, p, 8); + } + break; + case RC4: + rc4(s->out.state, b->rp + offset, BLEN(b) - offset); + break; + } + return b; +} + +static Block* +decryptb(Dstate *s, Block *bin) +{ + Block *b, **l; + uchar *p, *ep, *tp, *ip, *eip; + DESstate *ds; + uchar tmp[8]; + int i; + + l = &bin; + for(b = bin; b; b = b->next){ + /* make sure we have a multiple of s->blocklen */ + if(s->blocklen > 1){ + i = BLEN(b); + if(i % s->blocklen){ + *l = b = pullupblock(b, i + s->blocklen - (i%s->blocklen)); + if(b == 0) + error("ssl encrypted message too short"); + } + } + l = &b->next; + + /* decrypt */ + switch(s->encryptalg){ + case DESECB: + ds = s->in.state; + ep = b->rp + BLEN(b); + for(p = b->rp; p < ep; p += 8) + block_cipher(ds->expanded, p, 1); + break; + case DESCBC: + ds = s->in.state; + ep = b->rp + BLEN(b); + for(p = b->rp; p < ep;){ + memmove(tmp, p, 8); + block_cipher(ds->expanded, p, 1); + tp = tmp; + ip = ds->ivec; + for(eip = ip+8; ip < eip; ){ + *p++ ^= *ip; + *ip++ = *tp++; + } + } + break; + case RC4: + rc4(s->in.state, b->rp, BLEN(b)); + break; + } + } + return bin; +} + +static Block* +digestb(Dstate *s, Block *b, int offset) +{ + uchar *p; + DigestState ss; + uchar msgid[4]; + ulong n, h; + OneWay *w; + + w = &s->out; + + memset(&ss, 0, sizeof(ss)); + h = s->diglen + offset; + n = BLEN(b) - h; + + /* hash secret + message */ + (*s->hf)(w->secret, w->slen, 0, &ss); + (*s->hf)(b->rp + h, n, 0, &ss); + + /* hash message id */ + p = msgid; + n = w->mid; + *p++ = n>>24; + *p++ = n>>16; + *p++ = n>>8; + *p = n; + (*s->hf)(msgid, 4, b->rp + offset, &ss); + + return b; +} + +static void +checkdigestb(Dstate *s, Block *bin) +{ + uchar *p; + DigestState ss; + uchar msgid[4]; + int n, h; + OneWay *w; + uchar digest[128]; + Block *b; + + w = &s->in; + + memset(&ss, 0, sizeof(ss)); + + /* hash secret */ + (*s->hf)(w->secret, w->slen, 0, &ss); + + /* hash message */ + h = s->diglen; + for(b = bin; b; b = b->next){ + n = BLEN(b) - h; + if(n < 0) + panic("checkdigestb"); + (*s->hf)(b->rp + h, n, 0, &ss); + h = 0; + } + + /* hash message id */ + p = msgid; + n = w->mid; + *p++ = n>>24; + *p++ = n>>16; + *p++ = n>>8; + *p = n; + (*s->hf)(msgid, 4, digest, &ss); + + if(memcmp(digest, bin->rp, s->diglen) != 0) + error("bad digest"); +} + +/* get channel associated with an fd */ +static Chan* +buftochan(char *p) +{ + Chan *c; + int fd; + + if(p == 0) + error(Ebadarg); + fd = strtoul(p, 0, 0); + if(fd < 0) + error(Ebadarg); + c = fdtochan(fd, -1, 0, 1); /* error check and inc ref */ + if(c->dev == &ssldevtab){ + cclose(c); + error("cannot ssl encrypt devssl files"); + } + return c; +} + +/* hand up a digest connection */ +static void +sslhangup(Dstate *s) +{ + Block *b; + + qlock(&s->in.q); + for(b = s->processed; b; b = s->processed){ + s->processed = b->next; + freeb(b); + } + if(s->unprocessed){ + freeb(s->unprocessed); + s->unprocessed = 0; + } + s->state = Sincomplete; + qunlock(&s->in.q); +} + +static Dstate* +dsclone(Chan *ch) +{ + int i; + Dstate *ret; + + if(waserror()) { + unlock(&dslock); + nexterror(); + } + lock(&dslock); + ret = nil; + for(i=0; i= dshiwat) + dshiwat++; + memset(s, 0, sizeof(*s)); + s->state = Sincomplete; + s->ref = 1; + kstrdup(&s->user, up->user); + s->perm = 0660; + t = TYPE(ch->qid); + if(t == Qclonus) + t = Qctl; + ch->qid.path = QID(pp - dstate, t); + ch->qid.vers = 0; +} diff -Nru 0/sys/src/nix/port/devtab.c 4/sys/src/nix/port/devtab.c --- 0/sys/src/nix/port/devtab.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devtab.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,89 @@ +/* + * Stub. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +extern Dev* devtab[]; + +void +devtabreset(void) +{ + int i; + + for(i = 0; devtab[i] != nil; i++) + devtab[i]->reset(); +} + +void +devtabinit(void) +{ + int i; + + for(i = 0; devtab[i] != nil; i++) + devtab[i]->init(); +} + +void +devtabshutdown(void) +{ + int i; + + /* + * Shutdown in reverse order. + */ + for(i = 0; devtab[i] != nil; i++) + ; + for(i--; i >= 0; i--) + devtab[i]->shutdown(); +} + + +Dev* +devtabget(int dc, int user) +{ + int i; + + for(i = 0; devtab[i] != nil; i++){ + if(devtab[i]->dc == dc) + return devtab[i]; + } + + if(user == 0) + panic("devtabget %C\n", dc); + + return nil; +} + +long +devtabread(Chan*, void* buf, long n, vlong off) +{ + int i; + Dev *dev; + char *alloc, *e, *p; + + alloc = malloc(READSTR); + if(alloc == nil) + error(Enomem); + + p = alloc; + e = p + READSTR; + for(i = 0; devtab[i] != nil; i++){ + dev = devtab[i]; + p = seprint(p, e, "#%C %s\n", dev->dc, dev->name); + } + + if(waserror()){ + free(alloc); + nexterror(); + } + n = readstr(off, buf, n, alloc); + free(alloc); + poperror(); + + return n; +} diff -Nru 0/sys/src/nix/port/devtls.c 4/sys/src/nix/port/devtls.c --- 0/sys/src/nix/port/devtls.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devtls.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,2181 @@ +/* + * devtls - record layer for transport layer security 1.0 and secure sockets layer 3.0 + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include + +typedef struct OneWay OneWay; +typedef struct Secret Secret; +typedef struct TlsRec TlsRec; +typedef struct TlsErrs TlsErrs; + +enum { + Statlen= 1024, /* max. length of status or stats message */ + /* buffer limits */ + MaxRecLen = 1<<14, /* max payload length of a record layer message */ + MaxCipherRecLen = MaxRecLen + 2048, + RecHdrLen = 5, + MaxMacLen = SHA1dlen, + + /* protocol versions we can accept */ + TLSVersion = 0x0301, + SSL3Version = 0x0300, + ProtocolVersion = 0x0301, /* maximum version we speak */ + MinProtoVersion = 0x0300, /* limits on version we accept */ + MaxProtoVersion = 0x03ff, + + /* connection states */ + SHandshake = 1 << 0, /* doing handshake */ + SOpen = 1 << 1, /* application data can be sent */ + SRClose = 1 << 2, /* remote side has closed down */ + SLClose = 1 << 3, /* sent a close notify alert */ + SAlert = 1 << 5, /* sending or sent a fatal alert */ + SError = 1 << 6, /* some sort of error has occured */ + SClosed = 1 << 7, /* it is all over */ + + /* record types */ + RChangeCipherSpec = 20, + RAlert, + RHandshake, + RApplication, + + SSL2ClientHello = 1, + HSSL2ClientHello = 9, /* local convention; see tlshand.c */ + + /* alerts */ + ECloseNotify = 0, + EUnexpectedMessage = 10, + EBadRecordMac = 20, + EDecryptionFailed = 21, + ERecordOverflow = 22, + EDecompressionFailure = 30, + EHandshakeFailure = 40, + ENoCertificate = 41, + EBadCertificate = 42, + EUnsupportedCertificate = 43, + ECertificateRevoked = 44, + ECertificateExpired = 45, + ECertificateUnknown = 46, + EIllegalParameter = 47, + EUnknownCa = 48, + EAccessDenied = 49, + EDecodeError = 50, + EDecryptError = 51, + EExportRestriction = 60, + EProtocolVersion = 70, + EInsufficientSecurity = 71, + EInternalError = 80, + EUserCanceled = 90, + ENoRenegotiation = 100, + + EMAX = 256 +}; + +struct Secret +{ + char *encalg; /* name of encryption alg */ + char *hashalg; /* name of hash alg */ + int (*enc)(Secret*, uchar*, int); + int (*dec)(Secret*, uchar*, int); + int (*unpad)(uchar*, int, int); + DigestState *(*mac)(uchar*, ulong, uchar*, ulong, uchar*, DigestState*); + int block; /* encryption block len, 0 if none */ + int maclen; + void *enckey; + uchar mackey[MaxMacLen]; +}; + +struct OneWay +{ + QLock io; /* locks io access */ + QLock seclock; /* locks secret paramaters */ + ulong seq; + Secret *sec; /* cipher in use */ + Secret *new; /* cipher waiting for enable */ +}; + +struct TlsRec +{ + Chan *c; /* io channel */ + int ref; /* serialized by tdlock for atomic destroy */ + int version; /* version of the protocol we are speaking */ + char verset; /* version has been set */ + char opened; /* opened command every issued? */ + char err[ERRMAX]; /* error message to return to handshake requests */ + vlong handin; /* bytes communicated by the record layer */ + vlong handout; + vlong datain; + vlong dataout; + + Lock statelk; + int state; + int debug; + + /* record layer mac functions for different protocol versions */ + void (*packMac)(Secret*, uchar*, uchar*, uchar*, uchar*, int, uchar*); + + /* input side -- protected by in.io */ + OneWay in; + Block *processed; /* next bunch of application data */ + Block *unprocessed; /* data read from c but not parsed into records */ + + /* handshake queue */ + Lock hqlock; /* protects hqref, alloc & free of handq, hprocessed */ + int hqref; + Queue *handq; /* queue of handshake messages */ + Block *hprocessed; /* remainder of last block read from handq */ + QLock hqread; /* protects reads for hprocessed, handq */ + + /* output side */ + OneWay out; + + /* protections */ + char *user; + int perm; +}; + +struct TlsErrs{ + int err; + int sslerr; + int tlserr; + int fatal; + char *msg; +}; + +static TlsErrs tlserrs[] = { + {ECloseNotify, ECloseNotify, ECloseNotify, 0, "close notify"}, + {EUnexpectedMessage, EUnexpectedMessage, EUnexpectedMessage, 1, "unexpected message"}, + {EBadRecordMac, EBadRecordMac, EBadRecordMac, 1, "bad record mac"}, + {EDecryptionFailed, EIllegalParameter, EDecryptionFailed, 1, "decryption failed"}, + {ERecordOverflow, EIllegalParameter, ERecordOverflow, 1, "record too long"}, + {EDecompressionFailure, EDecompressionFailure, EDecompressionFailure, 1, "decompression failed"}, + {EHandshakeFailure, EHandshakeFailure, EHandshakeFailure, 1, "could not negotiate acceptable security parameters"}, + {ENoCertificate, ENoCertificate, ECertificateUnknown, 1, "no appropriate certificate available"}, + {EBadCertificate, EBadCertificate, EBadCertificate, 1, "corrupted or invalid certificate"}, + {EUnsupportedCertificate, EUnsupportedCertificate, EUnsupportedCertificate, 1, "unsupported certificate type"}, + {ECertificateRevoked, ECertificateRevoked, ECertificateRevoked, 1, "revoked certificate"}, + {ECertificateExpired, ECertificateExpired, ECertificateExpired, 1, "expired certificate"}, + {ECertificateUnknown, ECertificateUnknown, ECertificateUnknown, 1, "unacceptable certificate"}, + {EIllegalParameter, EIllegalParameter, EIllegalParameter, 1, "illegal parameter"}, + {EUnknownCa, EHandshakeFailure, EUnknownCa, 1, "unknown certificate authority"}, + {EAccessDenied, EHandshakeFailure, EAccessDenied, 1, "access denied"}, + {EDecodeError, EIllegalParameter, EDecodeError, 1, "error decoding message"}, + {EDecryptError, EIllegalParameter, EDecryptError, 1, "error decrypting message"}, + {EExportRestriction, EHandshakeFailure, EExportRestriction, 1, "export restriction violated"}, + {EProtocolVersion, EIllegalParameter, EProtocolVersion, 1, "protocol version not supported"}, + {EInsufficientSecurity, EHandshakeFailure, EInsufficientSecurity, 1, "stronger security routines required"}, + {EInternalError, EHandshakeFailure, EInternalError, 1, "internal error"}, + {EUserCanceled, ECloseNotify, EUserCanceled, 0, "handshake canceled by user"}, + {ENoRenegotiation, EUnexpectedMessage, ENoRenegotiation, 0, "no renegotiation"}, +}; + +enum +{ + /* max. open tls connections */ + MaxTlsDevs = 1024 +}; + +static Lock tdlock; +static int tdhiwat; +static int maxtlsdevs = 128; +static TlsRec **tlsdevs; +static char **trnames; +static char *encalgs; +static char *hashalgs; + +enum{ + Qtopdir = 1, /* top level directory */ + Qprotodir, + Qclonus, + Qencalgs, + Qhashalgs, + Qconvdir, /* directory for a conversation */ + Qdata, + Qctl, + Qhand, + Qstatus, + Qstats, +}; + +#define TYPE(x) ((x).path & 0xf) +#define CONV(x) (((x).path >> 5)&(MaxTlsDevs-1)) +#define QID(c, y) (((c)<<5) | (y)) + +static void checkstate(TlsRec *, int, int); +static void ensure(TlsRec*, Block**, int); +static void consume(Block**, uchar*, int); +static Chan* buftochan(char*); +static void tlshangup(TlsRec*); +static void tlsError(TlsRec*, char *); +static void alertHand(TlsRec*, char *); +static TlsRec *newtls(Chan *c); +static TlsRec *mktlsrec(void); +static DigestState*sslmac_md5(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest, DigestState *s); +static DigestState*sslmac_sha1(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest, DigestState *s); +static DigestState*nomac(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest, DigestState *s); +static void sslPackMac(Secret *sec, uchar *mackey, uchar *seq, uchar *header, uchar *body, int len, uchar *mac); +static void tlsPackMac(Secret *sec, uchar *mackey, uchar *seq, uchar *header, uchar *body, int len, uchar *mac); +static void put64(uchar *p, vlong x); +static void put32(uchar *p, u32int); +static void put24(uchar *p, int); +static void put16(uchar *p, int); +static u32int get32(uchar *p); +static int get16(uchar *p); +static void tlsSetState(TlsRec *tr, int new, int old); +static void rcvAlert(TlsRec *tr, int err); +static void sendAlert(TlsRec *tr, int err); +static void rcvError(TlsRec *tr, int err, char *msg, ...); +static int rc4enc(Secret *sec, uchar *buf, int n); +static int des3enc(Secret *sec, uchar *buf, int n); +static int des3dec(Secret *sec, uchar *buf, int n); +static int noenc(Secret *sec, uchar *buf, int n); +static int sslunpad(uchar *buf, int n, int block); +static int tlsunpad(uchar *buf, int n, int block); +static void freeSec(Secret *sec); +static char *tlsstate(int s); +static void pdump(int, void*, char*); + +#pragma varargck argpos rcvError 3 + +static char *tlsnames[] = { +[Qclonus] "clone", +[Qencalgs] "encalgs", +[Qhashalgs] "hashalgs", +[Qdata] "data", +[Qctl] "ctl", +[Qhand] "hand", +[Qstatus] "status", +[Qstats] "stats", +}; + +static int convdir[] = { Qctl, Qdata, Qhand, Qstatus, Qstats }; + +static int +tlsgen(Chan *c, char*, Dirtab *, int, int s, Dir *dp) +{ + Qid q; + TlsRec *tr; + char *name, *nm; + int perm, t; + + q.vers = 0; + q.type = QTFILE; + + t = TYPE(c->qid); + switch(t) { + case Qtopdir: + if(s == DEVDOTDOT){ + q.path = QID(0, Qtopdir); + q.type = QTDIR; + devdir(c, q, "#a", 0, eve, 0555, dp); + return 1; + } + if(s > 0) + return -1; + q.path = QID(0, Qprotodir); + q.type = QTDIR; + devdir(c, q, "tls", 0, eve, 0555, dp); + return 1; + case Qprotodir: + if(s == DEVDOTDOT){ + q.path = QID(0, Qtopdir); + q.type = QTDIR; + devdir(c, q, ".", 0, eve, 0555, dp); + return 1; + } + if(s < 3){ + switch(s) { + default: + return -1; + case 0: + q.path = QID(0, Qclonus); + break; + case 1: + q.path = QID(0, Qencalgs); + break; + case 2: + q.path = QID(0, Qhashalgs); + break; + } + perm = 0444; + if(TYPE(q) == Qclonus) + perm = 0555; + devdir(c, q, tlsnames[TYPE(q)], 0, eve, perm, dp); + return 1; + } + s -= 3; + if(s >= tdhiwat) + return -1; + q.path = QID(s, Qconvdir); + q.type = QTDIR; + lock(&tdlock); + tr = tlsdevs[s]; + if(tr != nil) + nm = tr->user; + else + nm = eve; + if((name = trnames[s]) == nil){ + name = trnames[s] = smalloc(16); + sprint(name, "%d", s); + } + devdir(c, q, name, 0, nm, 0555, dp); + unlock(&tdlock); + return 1; + case Qconvdir: + if(s == DEVDOTDOT){ + q.path = QID(0, Qprotodir); + q.type = QTDIR; + devdir(c, q, "tls", 0, eve, 0555, dp); + return 1; + } + if(s < 0 || s >= nelem(convdir)) + return -1; + lock(&tdlock); + tr = tlsdevs[CONV(c->qid)]; + if(tr != nil){ + nm = tr->user; + perm = tr->perm; + }else{ + perm = 0; + nm = eve; + } + t = convdir[s]; + if(t == Qstatus || t == Qstats) + perm &= 0444; + q.path = QID(CONV(c->qid), t); + devdir(c, q, tlsnames[t], 0, nm, perm, dp); + unlock(&tdlock); + return 1; + case Qclonus: + case Qencalgs: + case Qhashalgs: + perm = 0444; + if(t == Qclonus) + perm = 0555; + devdir(c, c->qid, tlsnames[t], 0, eve, perm, dp); + return 1; + default: + lock(&tdlock); + tr = tlsdevs[CONV(c->qid)]; + if(tr != nil){ + nm = tr->user; + perm = tr->perm; + }else{ + perm = 0; + nm = eve; + } + if(t == Qstatus || t == Qstats) + perm &= 0444; + devdir(c, c->qid, tlsnames[t], 0, nm, perm, dp); + unlock(&tdlock); + return 1; + } +} + +static Chan* +tlsattach(char *spec) +{ + Chan *c; + + c = devattach('a', spec); + c->qid.path = QID(0, Qtopdir); + c->qid.type = QTDIR; + c->qid.vers = 0; + return c; +} + +static Walkqid* +tlswalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, nil, 0, tlsgen); +} + +static long +tlsstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, nil, 0, tlsgen); +} + +static Chan* +tlsopen(Chan *c, int omode) +{ + TlsRec *tr, **pp; + int t, perm; + + perm = 0; + omode &= 3; + switch(omode) { + case OREAD: + perm = 4; + break; + case OWRITE: + perm = 2; + break; + case ORDWR: + perm = 6; + break; + } + + t = TYPE(c->qid); + switch(t) { + default: + panic("tlsopen"); + case Qtopdir: + case Qprotodir: + case Qconvdir: + if(omode != OREAD) + error(Eperm); + break; + case Qclonus: + tr = newtls(c); + if(tr == nil) + error(Enodev); + break; + case Qctl: + case Qdata: + case Qhand: + case Qstatus: + case Qstats: + if((t == Qstatus || t == Qstats) && omode != OREAD) + error(Eperm); + if(waserror()) { + unlock(&tdlock); + nexterror(); + } + lock(&tdlock); + pp = &tlsdevs[CONV(c->qid)]; + tr = *pp; + if(tr == nil) + error("must open connection using clone"); + if((perm & (tr->perm>>6)) != perm + && (strcmp(up->user, tr->user) != 0 + || (perm & tr->perm) != perm)) + error(Eperm); + if(t == Qhand){ + if(waserror()){ + unlock(&tr->hqlock); + nexterror(); + } + lock(&tr->hqlock); + if(tr->handq != nil) + error(Einuse); + tr->handq = qopen(2 * MaxCipherRecLen, 0, nil, nil); + if(tr->handq == nil) + error("cannot allocate handshake queue"); + tr->hqref = 1; + unlock(&tr->hqlock); + poperror(); + } + tr->ref++; + unlock(&tdlock); + poperror(); + break; + case Qencalgs: + case Qhashalgs: + if(omode != OREAD) + error(Eperm); + break; + } + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + c->iounit = qiomaxatomic; + return c; +} + +static long +tlswstat(Chan *c, uchar *dp, long n) +{ + Dir *d; + TlsRec *tr; + int rv; + + d = nil; + if(waserror()){ + free(d); + unlock(&tdlock); + nexterror(); + } + + lock(&tdlock); + tr = tlsdevs[CONV(c->qid)]; + if(tr == nil) + error(Ebadusefd); + if(strcmp(tr->user, up->user) != 0) + error(Eperm); + + d = smalloc(n + sizeof *d); + rv = convM2D(dp, n, &d[0], (char*) &d[1]); + if(rv == 0) + error(Eshortstat); + if(!emptystr(d->uid)) + kstrdup(&tr->user, d->uid); + if(d->mode != ~0UL) + tr->perm = d->mode; + + free(d); + poperror(); + unlock(&tdlock); + + return rv; +} + +static void +dechandq(TlsRec *tr) +{ + lock(&tr->hqlock); + if(--tr->hqref == 0){ + if(tr->handq != nil){ + qfree(tr->handq); + tr->handq = nil; + } + if(tr->hprocessed != nil){ + freeb(tr->hprocessed); + tr->hprocessed = nil; + } + } + unlock(&tr->hqlock); +} + +static void +tlsclose(Chan *c) +{ + TlsRec *tr; + int t; + + t = TYPE(c->qid); + switch(t) { + case Qctl: + case Qdata: + case Qhand: + case Qstatus: + case Qstats: + if((c->flag & COPEN) == 0) + break; + + tr = tlsdevs[CONV(c->qid)]; + if(tr == nil) + break; + + if(t == Qhand) + dechandq(tr); + + lock(&tdlock); + if(--tr->ref > 0) { + unlock(&tdlock); + return; + } + tlsdevs[CONV(c->qid)] = nil; + unlock(&tdlock); + + if(tr->c != nil && !waserror()){ + checkstate(tr, 0, SOpen|SHandshake|SRClose); + sendAlert(tr, ECloseNotify); + poperror(); + } + tlshangup(tr); + if(tr->c != nil) + cclose(tr->c); + freeSec(tr->in.sec); + freeSec(tr->in.new); + freeSec(tr->out.sec); + freeSec(tr->out.new); + free(tr->user); + free(tr); + break; + } +} + +/* + * make sure we have at least 'n' bytes in list 'l' + */ +static void +ensure(TlsRec *s, Block **l, int n) +{ + int sofar, i; + Block *b, *bl; + + sofar = 0; + for(b = *l; b; b = b->next){ + sofar += BLEN(b); + if(sofar >= n) + return; + l = &b->next; + } + + while(sofar < n){ + bl = s->c->dev->bread(s->c, MaxCipherRecLen + RecHdrLen, 0); + if(bl == 0) + error(Ehungup); + *l = bl; + i = 0; + for(b = bl; b; b = b->next){ + i += BLEN(b); + l = &b->next; + } + if(i == 0) + error(Ehungup); + sofar += i; + } +if(s->debug) pprint("ensure read %d\n", sofar); +} + +/* + * copy 'n' bytes from 'l' into 'p' and free + * the bytes in 'l' + */ +static void +consume(Block **l, uchar *p, int n) +{ + Block *b; + int i; + + for(; *l && n > 0; n -= i){ + b = *l; + i = BLEN(b); + if(i > n) + i = n; + memmove(p, b->rp, i); + b->rp += i; + p += i; + if(BLEN(b) < 0) + panic("consume"); + if(BLEN(b)) + break; + *l = b->next; + freeb(b); + } +} + +/* + * give back n bytes + */ +static void +regurgitate(TlsRec *s, uchar *p, int n) +{ + Block *b; + + if(n <= 0) + return; + b = s->unprocessed; + if(s->unprocessed == nil || b->rp - b->base < n) { + b = allocb(n); + memmove(b->wp, p, n); + b->wp += n; + b->next = s->unprocessed; + s->unprocessed = b; + } else { + b->rp -= n; + memmove(b->rp, p, n); + } +} + +/* + * remove at most n bytes from the queue + */ +static Block* +qgrab(Block **l, int n) +{ + Block *bb, *b; + int i; + + b = *l; + if(BLEN(b) == n){ + *l = b->next; + b->next = nil; + return b; + } + + i = 0; + for(bb = b; bb != nil && i < n; bb = bb->next) + i += BLEN(bb); + if(i > n) + i = n; + + bb = allocb(i); + consume(l, bb->wp, i); + bb->wp += i; + return bb; +} + +static void +tlsclosed(TlsRec *tr, int new) +{ + lock(&tr->statelk); + if(tr->state == SOpen || tr->state == SHandshake) + tr->state = new; + else if((new | tr->state) == (SRClose|SLClose)) + tr->state = SClosed; + unlock(&tr->statelk); + alertHand(tr, "close notify"); +} + +/* + * read and process one tls record layer message + * must be called with tr->in.io held + * We can't let Eintrs lose data, since doing so will get + * us out of sync with the sender and break the reliablity + * of the channel. Eintr only happens during the reads in + * consume. Therefore we put back any bytes consumed before + * the last call to ensure. + */ +static void +tlsrecread(TlsRec *tr) +{ + OneWay *volatile in; + Block *volatile b; + uchar *p, seq[8], header[RecHdrLen], hmac[MD5dlen]; + int volatile nconsumed; + int len, type, ver, unpad_len; + + nconsumed = 0; + if(waserror()){ + if(strcmp(up->errstr, Eintr) == 0 && !waserror()){ + regurgitate(tr, header, nconsumed); + poperror(); + }else + tlsError(tr, "channel error"); + nexterror(); + } + ensure(tr, &tr->unprocessed, RecHdrLen); + consume(&tr->unprocessed, header, RecHdrLen); +if(tr->debug)pprint("consumed %d header\n", RecHdrLen); + nconsumed = RecHdrLen; + + if((tr->handin == 0) && (header[0] & 0x80)){ + /* Cope with an SSL3 ClientHello expressed in SSL2 record format. + This is sent by some clients that we must interoperate + with, such as Java's JSSE and Microsoft's Internet Explorer. */ + len = (get16(header) & ~0x8000) - 3; + type = header[2]; + ver = get16(header + 3); + if(type != SSL2ClientHello || len < 22) + rcvError(tr, EProtocolVersion, "invalid initial SSL2-like message"); + }else{ /* normal SSL3 record format */ + type = header[0]; + ver = get16(header+1); + len = get16(header+3); + } + if(ver != tr->version && (tr->verset || ver < MinProtoVersion || ver > MaxProtoVersion)) + rcvError(tr, EProtocolVersion, "devtls expected ver=%x%s, saw (len=%d) type=%x ver=%x '%.12s'", + tr->version, tr->verset?"/set":"", len, type, ver, (char*)header); + if(len > MaxCipherRecLen || len < 0) + rcvError(tr, ERecordOverflow, "record message too long %d", len); + ensure(tr, &tr->unprocessed, len); + nconsumed = 0; + poperror(); + + /* + * If an Eintr happens after this, we'll get out of sync. + * Make sure nothing we call can sleep. + * Errors are ok, as they kill the connection. + * Luckily, allocb won't sleep, it'll just error out. + */ + b = nil; + if(waserror()){ + if(b != nil) + freeb(b); + tlsError(tr, "channel error"); + nexterror(); + } + b = qgrab(&tr->unprocessed, len); +if(tr->debug) pprint("consumed unprocessed %d\n", len); + + in = &tr->in; + if(waserror()){ + qunlock(&in->seclock); + nexterror(); + } + qlock(&in->seclock); + p = b->rp; + if(in->sec != nil) { + /* to avoid Canvel-Hiltgen-Vaudenay-Vuagnoux attack, all errors here + should look alike, including timing of the response. */ + unpad_len = (*in->sec->dec)(in->sec, p, len); + if(unpad_len >= in->sec->maclen) + len = unpad_len - in->sec->maclen; +if(tr->debug) pprint("decrypted %d\n", unpad_len); +if(tr->debug) pdump(unpad_len, p, "decrypted:"); + + /* update length */ + put16(header+3, len); + put64(seq, in->seq); + in->seq++; + (*tr->packMac)(in->sec, in->sec->mackey, seq, header, p, len, hmac); + if(unpad_len < in->sec->maclen) + rcvError(tr, EBadRecordMac, "short record mac"); + if(memcmp(hmac, p+len, in->sec->maclen) != 0) + rcvError(tr, EBadRecordMac, "record mac mismatch"); + b->wp = b->rp + len; + } + qunlock(&in->seclock); + poperror(); + if(len < 0) + rcvError(tr, EDecodeError, "runt record message"); + + switch(type) { + default: + rcvError(tr, EIllegalParameter, "invalid record message %#x", type); + break; + case RChangeCipherSpec: + if(len != 1 || p[0] != 1) + rcvError(tr, EDecodeError, "invalid change cipher spec"); + qlock(&in->seclock); + if(in->new == nil){ + qunlock(&in->seclock); + rcvError(tr, EUnexpectedMessage, "unexpected change cipher spec"); + } + freeSec(in->sec); + in->sec = in->new; + in->new = nil; + in->seq = 0; + qunlock(&in->seclock); + break; + case RAlert: + if(len != 2) + rcvError(tr, EDecodeError, "invalid alert"); + if(p[0] == 2) + rcvAlert(tr, p[1]); + if(p[0] != 1) + rcvError(tr, EIllegalParameter, "invalid alert fatal code"); + + /* + * propate non-fatal alerts to handshaker + */ + if(p[1] == ECloseNotify) { + tlsclosed(tr, SRClose); + if(tr->opened) + error("tls hungup"); + error("close notify"); + } + if(p[1] == ENoRenegotiation) + alertHand(tr, "no renegotiation"); + else if(p[1] == EUserCanceled) + alertHand(tr, "handshake canceled by user"); + else + rcvError(tr, EIllegalParameter, "invalid alert code"); + break; + case RHandshake: + /* + * don't worry about dropping the block + * qbwrite always queues even if flow controlled and interrupted. + * + * if there isn't any handshaker, ignore the request, + * but notify the other side we are doing so. + */ + lock(&tr->hqlock); + if(tr->handq != nil){ + tr->hqref++; + unlock(&tr->hqlock); + if(waserror()){ + dechandq(tr); + nexterror(); + } + b = padblock(b, 1); + *b->rp = RHandshake; + qbwrite(tr->handq, b); + b = nil; + poperror(); + dechandq(tr); + }else{ + unlock(&tr->hqlock); + if(tr->verset && tr->version != SSL3Version && !waserror()){ + sendAlert(tr, ENoRenegotiation); + poperror(); + } + } + break; + case SSL2ClientHello: + lock(&tr->hqlock); + if(tr->handq != nil){ + tr->hqref++; + unlock(&tr->hqlock); + if(waserror()){ + dechandq(tr); + nexterror(); + } + /* Pass the SSL2 format data, so that the handshake code can compute + the correct checksums. HSSL2ClientHello = HandshakeType 9 is + unused in RFC2246. */ + b = padblock(b, 8); + b->rp[0] = RHandshake; + b->rp[1] = HSSL2ClientHello; + put24(&b->rp[2], len+3); + b->rp[5] = SSL2ClientHello; + put16(&b->rp[6], ver); + qbwrite(tr->handq, b); + b = nil; + poperror(); + dechandq(tr); + }else{ + unlock(&tr->hqlock); + if(tr->verset && tr->version != SSL3Version && !waserror()){ + sendAlert(tr, ENoRenegotiation); + poperror(); + } + } + break; + case RApplication: + if(!tr->opened) + rcvError(tr, EUnexpectedMessage, "application message received before handshake completed"); + if(BLEN(b) > 0){ + tr->processed = b; + b = nil; + } + break; + } + if(b != nil) + freeb(b); + poperror(); +} + +/* + * got a fatal alert message + */ +static void +rcvAlert(TlsRec *tr, int err) +{ + char *s; + int i; + + s = "unknown error"; + for(i=0; i < nelem(tlserrs); i++){ + if(tlserrs[i].err == err){ + s = tlserrs[i].msg; + break; + } + } +if(tr->debug) pprint("rcvAlert: %s\n", s); + + tlsError(tr, s); + if(!tr->opened) + error(s); + error("tls error"); +} + +/* + * found an error while decoding the input stream + */ +static void +rcvError(TlsRec *tr, int err, char *fmt, ...) +{ + char msg[ERRMAX]; + va_list arg; + + va_start(arg, fmt); + vseprint(msg, msg+sizeof(msg), fmt, arg); + va_end(arg); +if(tr->debug) pprint("rcvError: %s\n", msg); + + sendAlert(tr, err); + + if(!tr->opened) + error(msg); + error("tls error"); +} + +/* + * make sure the next hand operation returns with a 'msg' error + */ +static void +alertHand(TlsRec *tr, char *msg) +{ + Block *b; + int n; + + lock(&tr->hqlock); + if(tr->handq == nil){ + unlock(&tr->hqlock); + return; + } + tr->hqref++; + unlock(&tr->hqlock); + + n = strlen(msg); + if(waserror()){ + dechandq(tr); + nexterror(); + } + b = allocb(n + 2); + *b->wp++ = RAlert; + memmove(b->wp, msg, n + 1); + b->wp += n + 1; + + qbwrite(tr->handq, b); + + poperror(); + dechandq(tr); +} + +static void +checkstate(TlsRec *tr, int ishand, int ok) +{ + int state; + + lock(&tr->statelk); + state = tr->state; + unlock(&tr->statelk); + if(state & ok) + return; + switch(state){ + case SHandshake: + case SOpen: + break; + case SError: + case SAlert: + if(ishand) + error(tr->err); + error("tls error"); + case SRClose: + case SLClose: + case SClosed: + error("tls hungup"); + } + error("tls improperly configured"); +} + +static Block* +tlsbread(Chan *c, long n, vlong offset) +{ + int ty; + Block *b; + TlsRec *volatile tr; + + ty = TYPE(c->qid); + switch(ty) { + default: + return devbread(c, n, offset); + case Qhand: + case Qdata: + break; + } + + tr = tlsdevs[CONV(c->qid)]; + if(tr == nil) + panic("tlsbread"); + + if(waserror()){ + qunlock(&tr->in.io); + nexterror(); + } + qlock(&tr->in.io); + if(ty == Qdata){ + checkstate(tr, 0, SOpen); + while(tr->processed == nil) + tlsrecread(tr); + + /* return at most what was asked for */ + b = qgrab(&tr->processed, n); +if(tr->debug) pprint("consumed processed %ld\n", BLEN(b)); +if(tr->debug) pdump(BLEN(b), b->rp, "consumed:"); + qunlock(&tr->in.io); + poperror(); + tr->datain += BLEN(b); + }else{ + checkstate(tr, 1, SOpen|SHandshake|SLClose); + + /* + * it's ok to look at state without the lock + * since it only protects reading records, + * and we have that tr->in.io held. + */ + while(!tr->opened && tr->hprocessed == nil && !qcanread(tr->handq)) + tlsrecread(tr); + + qunlock(&tr->in.io); + poperror(); + + if(waserror()){ + qunlock(&tr->hqread); + nexterror(); + } + qlock(&tr->hqread); + if(tr->hprocessed == nil){ + b = qbread(tr->handq, MaxRecLen + 1); + if(*b->rp++ == RAlert){ + kstrcpy(up->errstr, (char*)b->rp, ERRMAX); + freeb(b); + nexterror(); + } + tr->hprocessed = b; + } + b = qgrab(&tr->hprocessed, n); + poperror(); + qunlock(&tr->hqread); + tr->handin += BLEN(b); + } + + return b; +} + +static long +tlsread(Chan *c, void *a, long n, vlong off) +{ + Block *volatile b; + Block *nb; + uchar *va; + int i, ty; + char *buf, *s, *e; + long offset; + TlsRec * tr; + + if(c->qid.type & QTDIR) + return devdirread(c, a, n, 0, 0, tlsgen); + + offset = off; + tr = tlsdevs[CONV(c->qid)]; + ty = TYPE(c->qid); + switch(ty) { + default: + error(Ebadusefd); + case Qstatus: + buf = smalloc(Statlen); + qlock(&tr->in.seclock); + qlock(&tr->out.seclock); + s = buf; + e = buf + Statlen; + s = seprint(s, e, "State: %s\n", tlsstate(tr->state)); + s = seprint(s, e, "Version: %#x\n", tr->version); + if(tr->in.sec != nil) + s = seprint(s, e, "EncIn: %s\nHashIn: %s\n", tr->in.sec->encalg, tr->in.sec->hashalg); + if(tr->in.new != nil) + s = seprint(s, e, "NewEncIn: %s\nNewHashIn: %s\n", tr->in.new->encalg, tr->in.new->hashalg); + if(tr->out.sec != nil) + s = seprint(s, e, "EncOut: %s\nHashOut: %s\n", tr->out.sec->encalg, tr->out.sec->hashalg); + if(tr->out.new != nil) + seprint(s, e, "NewEncOut: %s\nNewHashOut: %s\n", tr->out.new->encalg, tr->out.new->hashalg); + qunlock(&tr->in.seclock); + qunlock(&tr->out.seclock); + n = readstr(offset, a, n, buf); + free(buf); + return n; + case Qstats: + buf = smalloc(Statlen); + s = buf; + e = buf + Statlen; + s = seprint(s, e, "DataIn: %lld\n", tr->datain); + s = seprint(s, e, "DataOut: %lld\n", tr->dataout); + s = seprint(s, e, "HandIn: %lld\n", tr->handin); + seprint(s, e, "HandOut: %lld\n", tr->handout); + n = readstr(offset, a, n, buf); + free(buf); + return n; + case Qctl: + buf = smalloc(Statlen); + snprint(buf, Statlen, "%llud", CONV(c->qid)); + n = readstr(offset, a, n, buf); + free(buf); + return n; + case Qdata: + case Qhand: + b = tlsbread(c, n, offset); + break; + case Qencalgs: + return readstr(offset, a, n, encalgs); + case Qhashalgs: + return readstr(offset, a, n, hashalgs); + } + + if(waserror()){ + freeblist(b); + nexterror(); + } + + n = 0; + va = a; + for(nb = b; nb; nb = nb->next){ + i = BLEN(nb); + memmove(va+n, nb->rp, i); + n += i; + } + + freeblist(b); + poperror(); + + return n; +} + +/* + * write a block in tls records + */ +static void +tlsrecwrite(TlsRec *tr, int type, Block *b) +{ + Block *volatile bb; + Block *nb; + uchar *p, seq[8]; + OneWay *volatile out; + int n, maclen, pad, ok; + + out = &tr->out; + bb = b; + if(waserror()){ + qunlock(&out->io); + if(bb != nil) + freeb(bb); + nexterror(); + } + qlock(&out->io); +if(tr->debug)pprint("send %ld\n", BLEN(b)); +if(tr->debug)pdump(BLEN(b), b->rp, "sent:"); + + + ok = SHandshake|SOpen|SRClose; + if(type == RAlert) + ok |= SAlert; + while(bb != nil){ + checkstate(tr, type != RApplication, ok); + + /* + * get at most one maximal record's input, + * with padding on the front for header and + * back for mac and maximal block padding. + */ + if(waserror()){ + qunlock(&out->seclock); + nexterror(); + } + qlock(&out->seclock); + maclen = 0; + pad = 0; + if(out->sec != nil){ + maclen = out->sec->maclen; + pad = maclen + out->sec->block; + } + n = BLEN(bb); + if(n > MaxRecLen){ + n = MaxRecLen; + nb = allocb(n + pad + RecHdrLen); + memmove(nb->wp + RecHdrLen, bb->rp, n); + bb->rp += n; + }else{ + /* + * carefully reuse bb so it will get freed if we're out of memory + */ + bb = padblock(bb, RecHdrLen); + if(pad) + nb = padblock(bb, -pad); + else + nb = bb; + bb = nil; + } + + p = nb->rp; + p[0] = type; + put16(p+1, tr->version); + put16(p+3, n); + + if(out->sec != nil){ + put64(seq, out->seq); + out->seq++; + (*tr->packMac)(out->sec, out->sec->mackey, seq, p, p + RecHdrLen, n, p + RecHdrLen + n); + n += maclen; + + /* encrypt */ + n = (*out->sec->enc)(out->sec, p + RecHdrLen, n); + nb->wp = p + RecHdrLen + n; + + /* update length */ + put16(p+3, n); + } + if(type == RChangeCipherSpec){ + if(out->new == nil) + error("change cipher without a new cipher"); + freeSec(out->sec); + out->sec = out->new; + out->new = nil; + out->seq = 0; + } + qunlock(&out->seclock); + poperror(); + + /* + * if bwrite error's, we assume the block is queued. + * if not, we're out of sync with the receiver and will not recover. + */ + if(waserror()){ + if(strcmp(up->errstr, "interrupted") != 0) + tlsError(tr, "channel error"); + nexterror(); + } + tr->c->dev->bwrite(tr->c, nb, 0); + poperror(); + } + qunlock(&out->io); + poperror(); +} + +static long +tlsbwrite(Chan *c, Block *b, vlong offset) +{ + int ty; + ulong n; + TlsRec *tr; + + n = BLEN(b); + + tr = tlsdevs[CONV(c->qid)]; + if(tr == nil) + panic("tlsbread"); + + ty = TYPE(c->qid); + switch(ty) { + default: + return devbwrite(c, b, offset); + case Qhand: + tlsrecwrite(tr, RHandshake, b); + tr->handout += n; + break; + case Qdata: + checkstate(tr, 0, SOpen); + tlsrecwrite(tr, RApplication, b); + tr->dataout += n; + break; + } + + return n; +} + +typedef struct Hashalg Hashalg; +struct Hashalg +{ + char *name; + int maclen; + void (*initkey)(Hashalg *, int, Secret *, uchar*); +}; + +static void +initmd5key(Hashalg *ha, int version, Secret *s, uchar *p) +{ + s->maclen = ha->maclen; + if(version == SSL3Version) + s->mac = sslmac_md5; + else + s->mac = hmac_md5; + memmove(s->mackey, p, ha->maclen); +} + +static void +initclearmac(Hashalg *, int, Secret *s, uchar *) +{ + s->maclen = 0; + s->mac = nomac; +} + +static void +initsha1key(Hashalg *ha, int version, Secret *s, uchar *p) +{ + s->maclen = ha->maclen; + if(version == SSL3Version) + s->mac = sslmac_sha1; + else + s->mac = hmac_sha1; + memmove(s->mackey, p, ha->maclen); +} + +static Hashalg hashtab[] = +{ + { "clear", 0, initclearmac, }, + { "md5", MD5dlen, initmd5key, }, + { "sha1", SHA1dlen, initsha1key, }, + { 0 } +}; + +static Hashalg* +parsehashalg(char *p) +{ + Hashalg *ha; + + for(ha = hashtab; ha->name; ha++) + if(strcmp(p, ha->name) == 0) + return ha; + error("unsupported hash algorithm"); + return nil; +} + +typedef struct Encalg Encalg; +struct Encalg +{ + char *name; + int keylen; + int ivlen; + void (*initkey)(Encalg *ea, Secret *, uchar*, uchar*); +}; + +static void +initRC4key(Encalg *ea, Secret *s, uchar *p, uchar *) +{ + s->enckey = smalloc(sizeof(RC4state)); + s->enc = rc4enc; + s->dec = rc4enc; + s->block = 0; + setupRC4state(s->enckey, p, ea->keylen); +} + +static void +initDES3key(Encalg *, Secret *s, uchar *p, uchar *iv) +{ + s->enckey = smalloc(sizeof(DES3state)); + s->enc = des3enc; + s->dec = des3dec; + s->block = 8; + setupDES3state(s->enckey, (uchar (*)[8])p, iv); +} + +static void +initclearenc(Encalg *, Secret *s, uchar *, uchar *) +{ + s->enc = noenc; + s->dec = noenc; + s->block = 0; +} + +static Encalg encrypttab[] = +{ + { "clear", 0, 0, initclearenc }, + { "rc4_128", 128/8, 0, initRC4key }, + { "3des_ede_cbc", 3 * 8, 8, initDES3key }, + { 0 } +}; + +static Encalg* +parseencalg(char *p) +{ + Encalg *ea; + + for(ea = encrypttab; ea->name; ea++) + if(strcmp(p, ea->name) == 0) + return ea; + error("unsupported encryption algorithm"); + return nil; +} + +static long +tlswrite(Chan *c, void *a, long n, vlong off) +{ + Encalg *ea; + Hashalg *ha; + TlsRec *volatile tr; + Secret *volatile tos, *volatile toc; + Block *volatile b; + Cmdbuf *volatile cb; + int m, ty; + char *p, *e; + uchar *volatile x; + ulong offset = off; + + tr = tlsdevs[CONV(c->qid)]; + if(tr == nil) + panic("tlswrite"); + + ty = TYPE(c->qid); + switch(ty){ + case Qdata: + case Qhand: + p = a; + e = p + n; + do{ + m = e - p; + if(m > MaxRecLen) + m = MaxRecLen; + + b = allocb(m); + if(waserror()){ + freeb(b); + nexterror(); + } + memmove(b->wp, p, m); + poperror(); + b->wp += m; + + tlsbwrite(c, b, offset); + + p += m; + }while(p < e); + return n; + case Qctl: + break; + default: + error(Ebadusefd); + return -1; + } + + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + if(cb->nf < 1) + error("short control request"); + + /* mutex with operations using what we're about to change */ + if(waserror()){ + qunlock(&tr->in.seclock); + qunlock(&tr->out.seclock); + nexterror(); + } + qlock(&tr->in.seclock); + qlock(&tr->out.seclock); + + if(strcmp(cb->f[0], "fd") == 0){ + if(cb->nf != 3) + error("usage: fd open-fd version"); + if(tr->c != nil) + error(Einuse); + m = strtol(cb->f[2], nil, 0); + if(m < MinProtoVersion || m > MaxProtoVersion) + error("unsupported version"); + tr->c = buftochan(cb->f[1]); + tr->version = m; + tlsSetState(tr, SHandshake, SClosed); + }else if(strcmp(cb->f[0], "version") == 0){ + if(cb->nf != 2) + error("usage: version vers"); + if(tr->c == nil) + error("must set fd before version"); + if(tr->verset) + error("version already set"); + m = strtol(cb->f[1], nil, 0); + if(m == SSL3Version) + tr->packMac = sslPackMac; + else if(m == TLSVersion) + tr->packMac = tlsPackMac; + else + error("unsupported version"); + tr->verset = 1; + tr->version = m; + }else if(strcmp(cb->f[0], "secret") == 0){ + if(cb->nf != 5) + error("usage: secret hashalg encalg isclient secretdata"); + if(tr->c == nil || !tr->verset) + error("must set fd and version before secrets"); + + if(tr->in.new != nil){ + freeSec(tr->in.new); + tr->in.new = nil; + } + if(tr->out.new != nil){ + freeSec(tr->out.new); + tr->out.new = nil; + } + + ha = parsehashalg(cb->f[1]); + ea = parseencalg(cb->f[2]); + + p = cb->f[4]; + m = (strlen(p)*3)/2; + x = smalloc(m); + tos = nil; + toc = nil; + if(waserror()){ + freeSec(tos); + freeSec(toc); + free(x); + nexterror(); + } + m = dec64(x, m, p, strlen(p)); + if(m < 2 * ha->maclen + 2 * ea->keylen + 2 * ea->ivlen) + error("not enough secret data provided"); + + tos = smalloc(sizeof(Secret)); + toc = smalloc(sizeof(Secret)); + if(!ha->initkey || !ea->initkey) + error("misimplemented secret algorithm"); + (*ha->initkey)(ha, tr->version, tos, &x[0]); + (*ha->initkey)(ha, tr->version, toc, &x[ha->maclen]); + (*ea->initkey)(ea, tos, &x[2 * ha->maclen], &x[2 * ha->maclen + 2 * ea->keylen]); + (*ea->initkey)(ea, toc, &x[2 * ha->maclen + ea->keylen], &x[2 * ha->maclen + 2 * ea->keylen + ea->ivlen]); + + if(!tos->mac || !tos->enc || !tos->dec + || !toc->mac || !toc->enc || !toc->dec) + error("missing algorithm implementations"); + if(strtol(cb->f[3], nil, 0) == 0){ + tr->in.new = tos; + tr->out.new = toc; + }else{ + tr->in.new = toc; + tr->out.new = tos; + } + if(tr->version == SSL3Version){ + toc->unpad = sslunpad; + tos->unpad = sslunpad; + }else{ + toc->unpad = tlsunpad; + tos->unpad = tlsunpad; + } + toc->encalg = ea->name; + toc->hashalg = ha->name; + tos->encalg = ea->name; + tos->hashalg = ha->name; + + free(x); + poperror(); + }else if(strcmp(cb->f[0], "changecipher") == 0){ + if(cb->nf != 1) + error("usage: changecipher"); + if(tr->out.new == nil) + error("cannot change cipher spec without setting secret"); + + qunlock(&tr->in.seclock); + qunlock(&tr->out.seclock); + poperror(); + free(cb); + poperror(); + + /* + * the real work is done as the message is written + * so the stream is encrypted in sync. + */ + b = allocb(1); + *b->wp++ = 1; + tlsrecwrite(tr, RChangeCipherSpec, b); + return n; + }else if(strcmp(cb->f[0], "opened") == 0){ + if(cb->nf != 1) + error("usage: opened"); + if(tr->in.sec == nil || tr->out.sec == nil) + error("cipher must be configured before enabling data messages"); + lock(&tr->statelk); + if(tr->state != SHandshake && tr->state != SOpen){ + unlock(&tr->statelk); + error("cannot enable data messages"); + } + tr->state = SOpen; + unlock(&tr->statelk); + tr->opened = 1; + }else if(strcmp(cb->f[0], "alert") == 0){ + if(cb->nf != 2) + error("usage: alert n"); + if(tr->c == nil) + error("must set fd before sending alerts"); + m = strtol(cb->f[1], nil, 0); + + qunlock(&tr->in.seclock); + qunlock(&tr->out.seclock); + poperror(); + free(cb); + poperror(); + + sendAlert(tr, m); + + if(m == ECloseNotify) + tlsclosed(tr, SLClose); + + return n; + } else if(strcmp(cb->f[0], "debug") == 0){ + if(cb->nf == 2){ + if(strcmp(cb->f[1], "on") == 0) + tr->debug = 1; + else + tr->debug = 0; + } else + tr->debug = 1; + } else + error(Ebadarg); + + qunlock(&tr->in.seclock); + qunlock(&tr->out.seclock); + poperror(); + free(cb); + poperror(); + + return n; +} + +static void +tlsinit(void) +{ + struct Encalg *e; + struct Hashalg *h; + int n; + char *cp; + static int already; + + if(!already){ +// fmtinstall('H', encodefmt); + already = 1; + } + + tlsdevs = smalloc(sizeof(TlsRec*) * maxtlsdevs); + trnames = smalloc((sizeof *trnames) * maxtlsdevs); + + n = 1; + for(e = encrypttab; e->name != nil; e++) + n += strlen(e->name) + 1; + cp = encalgs = smalloc(n); + for(e = encrypttab;;){ + strcpy(cp, e->name); + cp += strlen(e->name); + e++; + if(e->name == nil) + break; + *cp++ = ' '; + } + *cp = 0; + + n = 1; + for(h = hashtab; h->name != nil; h++) + n += strlen(h->name) + 1; + cp = hashalgs = smalloc(n); + for(h = hashtab;;){ + strcpy(cp, h->name); + cp += strlen(h->name); + h++; + if(h->name == nil) + break; + *cp++ = ' '; + } + *cp = 0; +} + +Dev tlsdevtab = { + 'a', + "tls", + + devreset, + tlsinit, + devshutdown, + tlsattach, + tlswalk, + tlsstat, + tlsopen, + devcreate, + tlsclose, + tlsread, + tlsbread, + tlswrite, + tlsbwrite, + devremove, + tlswstat, +}; + +/* get channel associated with an fd */ +static Chan* +buftochan(char *p) +{ + Chan *c; + int fd; + + if(p == 0) + error(Ebadarg); + fd = strtoul(p, 0, 0); + if(fd < 0) + error(Ebadarg); + c = fdtochan(fd, -1, 0, 1); /* error check and inc ref */ + return c; +} + +static void +sendAlert(TlsRec *tr, int err) +{ + Block *b; + int i, fatal; + char *msg; + +if(tr->debug)pprint("sendAlert %d\n", err); + fatal = 1; + msg = "tls unknown alert"; + for(i=0; i < nelem(tlserrs); i++) { + if(tlserrs[i].err == err) { + msg = tlserrs[i].msg; + if(tr->version == SSL3Version) + err = tlserrs[i].sslerr; + else + err = tlserrs[i].tlserr; + fatal = tlserrs[i].fatal; + break; + } + } + + if(!waserror()){ + b = allocb(2); + *b->wp++ = fatal + 1; + *b->wp++ = err; + if(fatal) + tlsSetState(tr, SAlert, SOpen|SHandshake|SRClose); + tlsrecwrite(tr, RAlert, b); + poperror(); + } + if(fatal) + tlsError(tr, msg); +} + +static void +tlsError(TlsRec *tr, char *msg) +{ + int s; + +if(tr->debug)pprint("tleError %s\n", msg); + lock(&tr->statelk); + s = tr->state; + tr->state = SError; + if(s != SError){ + strncpy(tr->err, msg, ERRMAX - 1); + tr->err[ERRMAX - 1] = '\0'; + } + unlock(&tr->statelk); + if(s != SError) + alertHand(tr, msg); +} + +static void +tlsSetState(TlsRec *tr, int new, int old) +{ + lock(&tr->statelk); + if(tr->state & old) + tr->state = new; + unlock(&tr->statelk); +} + +/* hand up a digest connection */ +static void +tlshangup(TlsRec *tr) +{ + Block *b; + + qlock(&tr->in.io); + for(b = tr->processed; b; b = tr->processed){ + tr->processed = b->next; + freeb(b); + } + if(tr->unprocessed != nil){ + freeb(tr->unprocessed); + tr->unprocessed = nil; + } + qunlock(&tr->in.io); + + tlsSetState(tr, SClosed, ~0); +} + +static TlsRec* +newtls(Chan *ch) +{ + TlsRec **pp, **ep, **np; + char **nmp; + int t, newmax; + + if(waserror()) { + unlock(&tdlock); + nexterror(); + } + lock(&tdlock); + ep = &tlsdevs[maxtlsdevs]; + for(pp = tlsdevs; pp < ep; pp++) + if(*pp == nil) + break; + if(pp >= ep) { + if(maxtlsdevs >= MaxTlsDevs) { + unlock(&tdlock); + poperror(); + return nil; + } + newmax = 2 * maxtlsdevs; + if(newmax > MaxTlsDevs) + newmax = MaxTlsDevs; + np = smalloc(sizeof(TlsRec*) * newmax); + memmove(np, tlsdevs, sizeof(TlsRec*) * maxtlsdevs); + tlsdevs = np; + pp = &tlsdevs[maxtlsdevs]; + memset(pp, 0, sizeof(TlsRec*)*(newmax - maxtlsdevs)); + + nmp = smalloc(sizeof *nmp * newmax); + memmove(nmp, trnames, sizeof *nmp * maxtlsdevs); + trnames = nmp; + + maxtlsdevs = newmax; + } + *pp = mktlsrec(); + if(pp - tlsdevs >= tdhiwat) + tdhiwat++; + t = TYPE(ch->qid); + if(t == Qclonus) + t = Qctl; + ch->qid.path = QID(pp - tlsdevs, t); + ch->qid.vers = 0; + unlock(&tdlock); + poperror(); + return *pp; +} + +static TlsRec * +mktlsrec(void) +{ + TlsRec *tr; + + tr = mallocz(sizeof(*tr), 1); + if(tr == nil) + error(Enomem); + tr->state = SClosed; + tr->ref = 1; + kstrdup(&tr->user, up->user); + tr->perm = 0660; + return tr; +} + +static char* +tlsstate(int s) +{ + switch(s){ + case SHandshake: + return "Handshaking"; + case SOpen: + return "Established"; + case SRClose: + return "RemoteClosed"; + case SLClose: + return "LocalClosed"; + case SAlert: + return "Alerting"; + case SError: + return "Errored"; + case SClosed: + return "Closed"; + } + return "Unknown"; +} + +static void +freeSec(Secret *s) +{ + if(s != nil){ + free(s->enckey); + free(s); + } +} + +static int +noenc(Secret *, uchar *, int n) +{ + return n; +} + +static int +rc4enc(Secret *sec, uchar *buf, int n) +{ + rc4(sec->enckey, buf, n); + return n; +} + +static int +tlsunpad(uchar *buf, int n, int block) +{ + int pad, nn; + + pad = buf[n - 1]; + nn = n - 1 - pad; + if(nn <= 0 || n % block) + return -1; + while(--n > nn) + if(pad != buf[n - 1]) + return -1; + return nn; +} + +static int +sslunpad(uchar *buf, int n, int block) +{ + int pad, nn; + + pad = buf[n - 1]; + nn = n - 1 - pad; + if(nn <= 0 || n % block) + return -1; + return nn; +} + +static int +blockpad(uchar *buf, int n, int block) +{ + int pad, nn; + + nn = n + block; + nn -= nn % block; + pad = nn - (n + 1); + while(n < nn) + buf[n++] = pad; + return nn; +} + +static int +des3enc(Secret *sec, uchar *buf, int n) +{ + n = blockpad(buf, n, 8); + des3CBCencrypt(buf, n, sec->enckey); + return n; +} + +static int +des3dec(Secret *sec, uchar *buf, int n) +{ + des3CBCdecrypt(buf, n, sec->enckey); + return (*sec->unpad)(buf, n, 8); +} +static DigestState* +nomac(uchar *, ulong, uchar *, ulong, uchar *, DigestState *) +{ + return nil; +} + +/* + * sslmac: mac calculations for ssl 3.0 only; tls 1.0 uses the standard hmac. + */ +static DigestState* +sslmac_x(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest, DigestState *s, + DigestState*(*x)(uchar*, ulong, uchar*, DigestState*), int xlen, int padlen) +{ + int i; + uchar pad[48], innerdigest[20]; + + if(xlen > sizeof(innerdigest) + || padlen > sizeof(pad)) + return nil; + + if(klen>64) + return nil; + + /* first time through */ + if(s == nil){ + for(i=0; imac)(buf, 11, mackey, sec->maclen, 0, 0); + (*sec->mac)(body, len, mackey, sec->maclen, mac, s); +} + +static void +tlsPackMac(Secret *sec, uchar *mackey, uchar *seq, uchar *header, uchar *body, int len, uchar *mac) +{ + DigestState *s; + uchar buf[13]; + + memmove(buf, seq, 8); + memmove(&buf[8], header, 5); + + s = (*sec->mac)(buf, 13, mackey, sec->maclen, 0, 0); + (*sec->mac)(body, len, mackey, sec->maclen, mac, s); +} + +static void +put32(uchar *p, u32int x) +{ + p[0] = x>>24; + p[1] = x>>16; + p[2] = x>>8; + p[3] = x; +} + +static void +put64(uchar *p, vlong x) +{ + put32(p, (u32int)(x >> 32)); + put32(p+4, (u32int)x); +} + +static void +put24(uchar *p, int x) +{ + p[0] = x>>16; + p[1] = x>>8; + p[2] = x; +} + +static void +put16(uchar *p, int x) +{ + p[0] = x>>8; + p[1] = x; +} + +static u32int +get32(uchar *p) +{ + return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3]; +} + +static int +get16(uchar *p) +{ + return (p[0]<<8)|p[1]; +} + +static char *charmap = "0123456789abcdef"; + +static void +pdump(int len, void *a, char *tag) +{ + uchar *p; + int i; + char buf[65+32]; + char *q; + + p = a; + strcpy(buf, tag); + while(len > 0){ + q = buf + strlen(tag); + for(i = 0; len > 0 && i < 32; i++){ + if(*p >= ' ' && *p < 0x7f){ + *q++ = ' '; + *q++ = *p; + } else { + *q++ = charmap[*p>>4]; + *q++ = charmap[*p & 0xf]; + } + len--; + p++; + } + *q = 0; + + if(len > 0) + pprint("%s...\n", buf); + else + pprint("%s\n", buf); + } +} diff -Nru 0/sys/src/nix/port/devtrace.c 4/sys/src/nix/port/devtrace.c --- 0/sys/src/nix/port/devtrace.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devtrace.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,887 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" +#include "netif.h" + +#pragma profile 0 + +typedef struct Trace Trace; +/* This is a trace--a segment of memory to watch for entries and exits */ +struct Trace { + struct Trace *next; + void *func; + void *start; + void *end; + int enabled; + char name[16]; +}; + +enum { + Qdir, + Qctl, + Qdata, +}; + +enum { + TraceEntry = 1, + TraceExit, +}; + +/* fix me make this programmable */ +enum { + defaultlogsize = 8192, +}; + +/* This represents a trace "hit" or event */ +typedef struct Tracelog Tracelog; +struct Tracelog { + uvlong ticks; + int info; + uintptr pc; + /* these are different depending on type */ + uintptr dat[5]; + int machno; +}; + + +static Rendez tracesleep; +static QLock traceslock; +/* this will contain as many entries as there are valid pc values */ +static Trace **tracemap; +static Trace *traces; /* This stores all the traces */ +static Lock loglk; +static Tracelog *tracelog = nil; +int traceactive = 0; +/* trace indices. These are just unsigned longs. You mask them + * to get an index. This makes fifo empty/full etc. trivial. + */ +static uint pw = 0, pr = 0; +static int tracesactive = 0; +static int all = 0; +static int watching = 0; +static int slothits = 0; +static unsigned int traceinhits = 0; +static unsigned int newplfail = 0; +static unsigned long logsize = defaultlogsize, logmask = defaultlogsize - 1; + +static int printsize = 0; //The length of a line being printed + +/* These are for observing a single process */ +static int *pidwatch = nil; +static int numpids = 0; +static const PIDWATCHSIZE = 32; /* The number of PIDS that can be watched. Pretty arbitrary. */ + +int codesize = 0; + +static uvlong lastestamp; /* last entry timestamp */ +static uvlong lastxstamp; /* last exit timestamp */ + +/* Trace events can be either Entries or Exits */ +static char eventname[] = { + [TraceEntry] = 'E', + [TraceExit] = 'X', +}; + +static Dirtab tracedir[]={ + ".", {Qdir, 0, QTDIR}, 0, DMDIR|0555, + "tracectl", {Qctl}, 0, 0664, + "trace", {Qdata}, 0, 0440, +}; + +char hex[] = { + '0', + '1', + '2', + '3', + '4', + '5', + '6', + '7', + '8', + '9', + 'a', + 'b', + 'c', + 'd', + 'e', + 'f', +}; + +/* big-endian ... */ +void +hex8(u32int l, char *c) +{ + int i; + for(i = 2; i; i--){ + c[i-1] = hex[l&0xf]; + l >>= 4; + } +} + +void +hex16(u32int l, char *c) +{ + int i; + for(i = 4; i; i--){ + c[i-1] = hex[l&0xf]; + l >>= 4; + } +} + +void +hex32(u32int l, char *c) +{ + int i; + for(i = 8; i; i--){ + c[i-1] = hex[l&0xf]; + l >>= 4; + } +} + +void +hex64(u64int l, char *c) +{ + hex32(l>>32, c); + hex32(l, &c[8]); +} + +static int +lognonempty(void *) +{ + return pw - pr; +} + +static int +logfull(void) +{ + return (pw - pr) >= logsize; +} + +static u64int +idx(u64int f) +{ + return f & logmask; +} + +/* + * Check if the given trace overlaps any others + * Returns 1 if there is overlap, 0 if clear. + */ +int +overlapping(Trace *p) { + Trace *curr; + + curr = traces; + + if (!curr) + return 0; + + do { + if ((curr->start < p->start && p->start < curr->end) || + (curr->start < p->end && p->end < curr->end)) + return 1; + curr = curr->next; + } while (curr != nil); + + return 0; +} + +/* Make sure a PC is valid and traced; if so, return its Trace */ +/* if dopanic == 1, the kernel will panic on an invalid PC */ +struct Trace ** +traceslot(void *pc, int dopanic) +{ + int index; + struct Trace **p; + + if (pc > etext) { + if (dopanic) + panic("Bad PC %p", pc); + + print("Invalid PC %p\n", pc); + return nil; + } + index = (int)((uintptr)pc - KTZERO); + if (index > codesize){ + if (dopanic) { + panic("Bad PC %p", pc); + while(1); + } + print("Invalid PC %p\n", pc); + return nil; + } + p = &tracemap[index]; + if (tracemap[index]) + ainc(&slothits); + return p; +} + +/* Check if the given PC is traced and return a Trace if so */ +struct Trace * +traced(void *pc, int dopanic) +{ + struct Trace **p; + + p = traceslot(pc, dopanic); + + if (p == nil) + return nil; + + return *p; +} + +/* + * Return 1 if pid is being watched or no pids are being watched. + * Return 0 if pids are being watched and the argument is not + * among them. + */ +int +watchingpid(int pid) { + int i; + + if (pidwatch[0] == 0) + return 1; + + for (i = 0; i < numpids; i++) { + if (pidwatch[i] == pid) + return 1; + } + return 0; +} + +/* + * Remove a trace. + */ +void +removetrace(Trace *p) { + unsigned char *cp; + struct Trace *prev; + struct Trace *curr; + struct Trace **slot; + + slot = traceslot(p->start, 0); + for(cp = p->start; cp <= p->end; slot++, cp++) + *slot = nil; + + curr = traces; + + if (curr == p) { + if (curr->next) { + traces = curr->next; + } else { + traces = nil; //this seems to work fine + } + free(curr); + return; + } + + prev = curr; + curr = curr->next; + do { + if (curr == p) { + prev->next = curr->next; + return; + } + prev = curr; + curr = curr->next; + } while (curr != nil); + +} + +/* it is recommended that you call these with something sane. */ +/* these next two functions assume you locked tracelock */ + +/* Turn on a trace */ +void +traceon(struct Trace *p) +{ + unsigned char *cp; + struct Trace **slot; + slot = traceslot(p->start, 0); + for(cp = p->start; cp <= p->end; slot++, cp++) + *slot = p; + p->enabled = 1; + tracesactive++; +} + +/* Turn off a trace */ +void +traceoff(struct Trace *p) +{ + unsigned char *cp; + struct Trace **slot; + slot = traceslot(p->start, 0); + for(cp = p->start; cp <= p->end; slot++, cp++) + *slot = nil; + p->enabled = 0; + tracesactive--; +} + +/* Make a new tracelog (an event) */ +/* can return NULL, meaning, no record for you */ +static struct Tracelog * +newpl(void) +{ + uint index; + + index = ainc((int *)&pw); + + return &tracelog[idx(index)]; + +} + +/* Called every time a (traced) function starts */ +/* this is not really smp safe. FIX */ +void +tracein(void* pc, uintptr a1, uintptr a2, uintptr a3, uintptr a4) +{ + struct Tracelog *pl; + + /* if we are here, tracing is active. Turn it off. */ + traceactive = 0; + if (! traced(pc, 1)){ + traceactive = 1; + return; + } + + ainc((int *)&traceinhits); + /* Continue if we are watching this pid or we're not watching any */ + if (!all) + if (!up || !watchingpid(up->pid)){ + traceactive = 1; + return; + } + + pl = newpl(); + + if (! pl) { + ainc((int *)&newplfail); + traceactive = 1; + return; + } + + cycles(&pl->ticks); + + pl->pc = (uintptr)pc; + if (up) + pl->dat[0] = up->pid; + else + pl->dat[0] = (unsigned long)-1; + + pl->dat[1] = a1; + pl->dat[2] = a2; + pl->dat[3] = a3; + pl->dat[4] = a4; + + pl->info = TraceEntry; + pl->machno = m->machno; + traceactive = 1; +} + +/* Called every time a traced function exits */ +void +traceout(void* pc, uintptr retval) +{ + struct Tracelog *pl; + /* if we are here, tracing is active. Turn it off. */ + traceactive = 0; + if (! traced(pc, 1)){ + traceactive = 1; + return; + } + + if (!all) + if (!up || !watchingpid(up->pid)){ + traceactive = 1; + return; + } + + pl = newpl(); + if (! pl){ + traceactive = 1; + return; + } + + cycles(&pl->ticks); + + pl->pc = (uintptr)pc; + if (up) + pl->dat[0] = up->pid; + else + pl->dat[0] = (unsigned long)-1; + + pl->dat[1] = retval; + pl->dat[2] = 0; + pl->dat[3] = 0; + + pl->info = TraceExit; + pl->machno = m->machno; + traceactive = 1; +} + +/* Create a new trace with the given range */ +static Trace * +mktrace(void *func, void *start, void *end) +{ + Trace *p; + p = mallocz(sizeof p[0], 1); + p->func = func; + p->start = start; + p->end = end; + return p; +} + +/* Get rid of an old trace */ +static void +freetrace(Trace *p) +{ + free(p); +} + + +static Chan* +traceattach(char *spec) +{ + return devattach('T', spec); +} + +static Walkqid* +tracewalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, tracedir, nelem(tracedir), devgen); +} + +static long +tracestat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, tracedir, nelem(tracedir), devgen); +} + +static Chan* +traceopen(Chan *c, int omode) +{ + + /* if there is no tracelog, allocate one. Open always fails + * if the basic alloc fails. You can resize it later. + */ + + codesize = (uintptr)etext - (uintptr)KTZERO; + if (! tracemap) + //tracemap = mallocz(sizeof(struct tracemap *)*codesize, 1); + tracemap = mallocz(sizeof(struct Trace *)*codesize, 1); + if (! tracemap) + error("tracemap malloc failed"); + if (! tracelog) + tracelog = mallocz(sizeof(*tracelog)*logsize, 1); + /* I guess malloc doesn't toss an error */ + if (! tracelog) + error("tracelog malloc failed"); + if (! pidwatch) + pidwatch = mallocz(sizeof(int)*PIDWATCHSIZE, 1); + if (! pidwatch) + error("pidwatch malloc failed"); + c = devopen(c, omode, tracedir, nelem(tracedir), devgen); + return c; +} + +static void +traceclose(Chan *) +{ +} + +/* + * Reading from the device, either the data or control files. + * The data reading involves deep rminnich magic so we don't have + * to call print(), which is traced. + */ +static long +traceread(Chan *c, void *a, long n, vlong offset) +{ + char *buf; + char *cp = a; + struct Tracelog *pl; + Trace *p; + int i, j; + int saveactive = traceactive; + traceactive = 0; + static QLock gate; + + if (waserror()) { + traceactive = saveactive; + nexterror(); + } + + if(c->qid.type == QTDIR) { + long l = devdirread(c, a, n, tracedir, nelem(tracedir), devgen); + poperror(); + traceactive = saveactive; + return l; + } + + switch((int) c->qid.path){ + default: + error("traceread: bad qid"); + case Qctl: + i = 0; + qlock(&traceslock); + buf = malloc(READSTR); + i += snprint(buf + i, READSTR - i, "logsize %lud\n", logsize); + for(p = traces; p != nil; p = p->next) + i += snprint(buf + i, READSTR - i, "trace %p %p new %s\n", + p->start, p->end, p->name); + + for(p = traces; p != nil; p = p->next) + i += snprint(buf + i, READSTR - i, "#trace %p traced? %p\n", + p->func, traced(p->func, 0)); + + for(p = traces; p != nil; p = p->next) + if (p->enabled) + i += snprint(buf + i, READSTR - i, "trace %s on\n", + p->name); + i += snprint(buf + i, READSTR - i, "#tracehits %d, in queue %d\n", + pw, pw-pr); + i += snprint(buf + i, READSTR - i, "#tracelog %p\n", tracelog); + i += snprint(buf + i, READSTR - i, "#traceactive %d\n", saveactive); + i += snprint(buf + i, READSTR - i, "#slothits %d\n", slothits); + i += snprint(buf + i, READSTR - i, "#traceinhits %d\n", traceinhits); + for (j = 0; j < numpids - 1; j++) + i += snprint(buf + i, READSTR - i, "watch %d\n", pidwatch[j]); + snprint(buf + i, READSTR - i, "watch %d\n", pidwatch[numpids - 1]); + n = readstr(offset, a, n, buf); + free(buf); + qunlock(&traceslock); + break; + case Qdata: + + // Set the printsize + /* 32-bit E PCPCPCPC TIMETIMETIMETIME PID# CR XXARG1XX XXARG2XX XXARG3XX XXARG4XX\n */ + if (sizeof(uintptr) == 4) { + printsize = 73; // 32-bit format + } else { + printsize = 121; // must be 64-bit + } + + i = 0; + while(lognonempty((void *)0)){ + int j; + + if ((pw - pr) > logsize) + pr = pw - logsize; + + pl = tracelog + idx(pr); + + if ((i + printsize) > n) + break; + /* simple format */ + if (sizeof(uintptr) == 4) { + cp[0] = eventname[pl->info]; + cp ++; + *cp++ = ' '; + hex32((uint)pl->pc, cp); + cp[8] = ' '; + cp += 9; + hex64(pl->ticks, cp); + cp[16] = ' '; + cp += 17; + hex16(pl->dat[0], cp); + cp += 4; + cp[0] = ' '; + cp++; + hex8(pl->machno, cp); + cp += 2; + cp[0] = ' '; + cp++; + for(j = 1; j < 4; j++){ + hex32(pl->dat[j], cp); + cp[8] = ' '; + cp += 9; + } + /* adjust for extra skip above */ + cp--; + *cp++ = '\n'; + pr++; + i += printsize; + } else { + cp[0] = eventname[pl->info]; + cp ++; + *cp++ = ' '; + hex64((u64int)pl->pc, cp); + cp[16] = ' '; + cp += 17; + hex64(pl->ticks, cp); + cp[16] = ' '; + cp += 17; + hex32(pl->dat[0], cp); + cp += 8; + cp[0] = ' '; + cp++; + cp[0] = ' '; + cp++; + cp[0] = ' '; + cp++; + cp[0] = ' '; + cp++; + hex8(pl->machno, cp); + cp += 4; + for (j = 1; j < 5; j++) { + hex64(pl->dat[j], cp); + cp[16] = ' '; + cp += 17; + } + cp--; + *cp++ = '\n'; + pr++; + i += printsize; + } + } + n = i; + break; + } + poperror(); + traceactive = saveactive; + return n; +} + +/* + * Process commands sent to the ctl file. + */ +static long +tracewrite(Chan *c, void *a, long n, vlong) +{ + char *tok[6]; //changed this so "tracein" works with the new 4th arg + char *ep, *s = nil; + Trace *p, **pp, *foo; + int ntok; + int saveactive = traceactive; + traceactive = 0; + + qlock(&traceslock); + if(waserror()){ + qunlock(&traceslock); + if(s != nil) free(s); + traceactive = saveactive; + nexterror(); + } + switch((uintptr)c->qid.path){ + default: + error("tracewrite: bad qid"); + case Qctl: + s = malloc(n + 1); + memmove(s, a, n); + s[n] = 0; + ntok = tokenize(s, tok, nelem(tok)); + if(!strcmp(tok[0], "trace")){ /* 'trace' ktextaddr 'on'|'off'|'mk'|'del' [name] */ + if(ntok < 3) { + error("devtrace: usage: 'trace' [ktextaddr|name] 'on'|'off'|'mk'|'del' [name]"); + } + for(pp = &traces; *pp != nil; pp = &(*pp)->next){ + if(!strcmp(tok[1], (*pp)->name)) + break; +} + p = *pp; + if((ntok > 3) && (!strcmp(tok[3], "new"))){ + uintptr addr; + void *start, *end, *func; + if (ntok != 5) { + error("devtrace: usage: trace new "); + } + addr = (uintptr)strtoul(tok[1], &ep, 16); + if (addr < KTZERO) + addr |= KTZERO; + func = start = (void *)addr; + if(*ep) { + error("devtrace: start address not in recognized format"); + } + addr = (uintptr)strtoul(tok[2], &ep, 16); + if (addr < KTZERO) + addr |= KTZERO; + end = (void *)addr; + if(*ep) { + error("devtrace: end address not in recognized format"); + } + + if (start > end || start > etext || end > etext) + error("devtrace: invalid address range"); + + /* What do we do here? start and end are weird * + if((addr < (uintptr)start) || (addr > (uintptr)end) + error("devtrace: address out of bounds"); + */ + if(p) { + error("devtrace: trace already exists"); + } + p = mktrace(func, start, end); + for (foo = traces; foo != nil; foo = foo->next) { + if (!strcmp(tok[4], foo->name)) + error("devtrace: trace with that name already exists"); + } + + if (!overlapping(p)) { + p->next = traces; + if(ntok < 5) + snprint(p->name, sizeof p->name, "%p", func); + else + strncpy(p->name, tok[4], sizeof p->name); + traces = p; + } else { + error("devtrace: given range overlaps with existing trace"); + } + } else if(!strcmp(tok[2], "remove")){ + if (ntok != 3) + error("devtrace: usage: trace remove"); + if (p == nil) { + error("devtrace: trace not found"); + } + removetrace(p); + } else if(!strcmp(tok[2], "on")){ + if (ntok != 3) + error("devtrace: usage: trace on"); + + if(p == nil) { + error("devtrace: trace not found"); + } + if (! traced(p->func, 0)){ + traceon(p); + } + } else if(!strcmp(tok[2], "off")){ + if (ntok != 3) + error("devtrace: usage: trace off"); + if(p == nil) { + error("devtrace: trace not found"); + } + if(traced(p->func, 0)){ + traceoff(p); + } + } + } else if(!strcmp(tok[0], "query")){ + /* See if addr is being traced */ + Trace* p; + uintptr addr; + if (ntok != 2) { + error("devtrace: usage: query "); + } + addr = (uintptr)strtoul(tok[1], &ep, 16); + if (addr < KTZERO) + addr |= KTZERO; + p = traced((void *)addr, 0); + if (p) { + print("Probing is enabled\n"); + } else { + print("Probing is disabled\n"); + } + } else if(!strcmp(tok[0], "size")){ + int l, size; + struct Tracelog *newtracelog; + + if (ntok != 2) + error("devtrace: usage: size "); + + l = strtoul(tok[1], &ep, 0); + if(*ep) { + error("devtrace: size not in recognized format"); + } + size = 1 << l; + /* sort of foolish. Alloc new trace first, then free old. */ + /* and too bad if there are unread traces */ + newtracelog = mallocz(sizeof(*newtracelog)*size, 1); + /* does malloc throw waserror? I don't know */ + if (newtracelog){ + free(tracelog); + tracelog = newtracelog; + logsize = size; + logmask = size - 1; + pr = pw = 0; + } else { + error("devtrace: can't allocate that much"); + } + } else if (!strcmp(tok[0], "testtracein")) { + /* Manually jump to a certain bit of traced code */ + uintptr pc, a1, a2, a3, a4; + int x; + + if (ntok != 6) + error("devtrace: usage: testtracein "); + + pc = (uintptr)strtoul(tok[1], &ep, 16); + if (pc < KTZERO) + pc |= KTZERO; + a1 = (uintptr)strtoul(tok[2], &ep, 16); + a2 = (uintptr)strtoul(tok[3], &ep, 16); + a3 = (uintptr)strtoul(tok[4], &ep, 16); + a4 = (uintptr)strtoul(tok[5], &ep, 16); + + if (traced((void *)pc, 0)) { + x = splhi(); + watching = 1; + tracein((void *)pc, a1, a2, a3, a4); + watching = 0; + splx(x); + } + } else if (!strcmp(tok[0], "watch")) { + /* Watch a certain PID */ + int pid; + + if (ntok != 2) { + error("devtrace: usage: watch [0|]"); + } + + pid = atoi(tok[1]); + if (pid == 0) { + pidwatch = mallocz(sizeof(int)*PIDWATCHSIZE, 1); + numpids = 0; + } else if (pid < 0) { + error("PID must be greater than zero."); + } else if (numpids < PIDWATCHSIZE) { + pidwatch[numpids] = pid; + ainc(&numpids); + } else { + error("pidwatch array full!"); + } + } else if (!strcmp(tok[0], "start")) { + if (ntok != 1) + error("devtrace: usage: start"); + saveactive = 1; + } else if (!strcmp(tok[0], "stop")) { + if (ntok != 1) + error("devtrace: usage: stop"); + saveactive = 0; + all = 0; + } else if (!strcmp(tok[0], "all")) { + if (ntok != 1) + error("devtrace: usage: all"); + saveactive = 1; + all = 1; + } else { + error("devtrace: usage: 'trace' [ktextaddr|name] 'on'|'off'|'mk'|'del' [name] or: 'size' buffersize (power of 2)"); + } + free(s); + break; + } + poperror(); + qunlock(&traceslock); + traceactive = saveactive; + return n; +} + +Dev tracedevtab = { + 'T', + "trace", + devreset, + devinit, + devshutdown, + traceattach, + tracewalk, + tracestat, + traceopen, + devcreate, + traceclose, + traceread, + devbread, + tracewrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru 0/sys/src/nix/port/devuart.c 4/sys/src/nix/port/devuart.c --- 0/sys/src/nix/port/devuart.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devuart.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,794 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" + +enum { + Qdir = 0, + Qdata, + Qctl, + Qstat, +}; + +#define UARTTYPE(x) (((unsigned)x)&0x1f) +#define UARTID(x) ((((unsigned)x))>>5) +#define UARTQID(i, t) ((((unsigned)i)<<5)|(t)) + +enum +{ + /* soft flow control chars */ + CTLS= 023, + CTLQ= 021, +}; + +extern Dev uartdevtab; +extern PhysUart* physuart[]; + +static Uart* uartlist; +static Uart** uart; +static int uartnuart; +static Dirtab *uartdir; +static int uartndir; +static Timer *uarttimer; + +struct Uartalloc { + Lock; + Uart *elist; /* list of enabled interfaces */ +} uartalloc; + +static void uartclock(void); +static void uartflow(void*); + +/* + * enable/disable uart and add/remove to list of enabled uarts + */ +static Uart* +uartenable(Uart *p) +{ + Uart **l; + + if(p->iq == nil){ + if((p->iq = qopen(8*1024, 0, uartflow, p)) == nil) + return nil; + } + else + qreopen(p->iq); + if(p->oq == nil){ + if((p->oq = qopen(8*1024, 0, uartkick, p)) == nil){ + qfree(p->iq); + p->iq = nil; + return nil; + } + } + else + qreopen(p->oq); + + p->ir = p->istage; + p->iw = p->istage; + p->ie = &p->istage[Stagesize]; + p->op = p->ostage; + p->oe = p->ostage; + + p->hup_dsr = p->hup_dcd = 0; + p->dsr = p->dcd = 0; + + /* assume we can send */ + p->cts = 1; + p->ctsbackoff = 0; + + if(p->bits == 0) + uartctl(p, "l8"); + if(p->stop == 0) + uartctl(p, "s1"); + if(p->parity == 0) + uartctl(p, "pn"); + if(p->baud == 0) + uartctl(p, "b9600"); + (*p->phys->enable)(p, 1); + + lock(&uartalloc); + for(l = &uartalloc.elist; *l; l = &(*l)->elist){ + if(*l == p) + break; + } + if(*l == 0){ + p->elist = uartalloc.elist; + uartalloc.elist = p; + } + p->enabled = 1; + unlock(&uartalloc); + + return p; +} + +static void +uartdisable(Uart *p) +{ + Uart **l; + + (*p->phys->disable)(p); + + lock(&uartalloc); + for(l = &uartalloc.elist; *l; l = &(*l)->elist){ + if(*l == p){ + *l = p->elist; + break; + } + } + p->enabled = 0; + unlock(&uartalloc); +} + +Uart* +uartconsole(int i, char *cmd) +{ + Uart *p; + + if(i >= uartnuart || (p = uart[i]) == nil) + return nil; + + qlock(p); + if(!p->console){ + if(p->opens == 0 && uartenable(p) == nil){ + qunlock(p); + return nil; + } + p->opens++; + + addkbdq(p->iq, -1); + addconsdev(p->oq, uartputs, 2, 0); + p->putc = kbdcr2nl; + if(cmd != nil && *cmd != '\0') + uartctl(p, cmd); + + p->console = 1; + } + qunlock(p); + + return p; +} + +static void +uartsetlength(int i) +{ + Uart *p; + + if(i > 0){ + p = uart[i]; + if(p && p->opens && p->iq) + uartdir[1+3*i].length = qlen(p->iq); + } else for(i = 0; i < uartnuart; i++){ + p = uart[i]; + if(p && p->opens && p->iq) + uartdir[1+3*i].length = qlen(p->iq); + } +} + +/* + * set up the '#t' directory + */ +static void +uartreset(void) +{ + int i; + Dirtab *dp; + Uart *p, *tail; + + tail = nil; + for(i = 0; physuart[i] != nil; i++){ + if(physuart[i]->pnp == nil) + continue; + if((p = physuart[i]->pnp()) == nil) + continue; + if(uartlist != nil) + tail->next = p; + else + uartlist = p; + for(tail = p; tail->next != nil; tail = tail->next) + uartnuart++; + uartnuart++; + } + + if(uartnuart) + uart = malloc(uartnuart*sizeof(Uart*)); + + uartndir = 1 + 3*uartnuart; + uartdir = malloc(uartndir * sizeof(Dirtab)); + if(uartnuart > 0 && uart == nil || uartdir == nil) + panic("uartreset: no memory"); + dp = uartdir; + strcpy(dp->name, "."); + mkqid(&dp->qid, 0, 0, QTDIR); + dp->length = 0; + dp->perm = DMDIR|0555; + dp++; + p = uartlist; + for(i = 0; i < uartnuart; i++){ + /* 3 directory entries per port */ + sprint(dp->name, "eia%d", i); + dp->qid.path = UARTQID(i, Qdata); + dp->perm = 0660; + dp++; + sprint(dp->name, "eia%dctl", i); + dp->qid.path = UARTQID(i, Qctl); + dp->perm = 0660; + dp++; + sprint(dp->name, "eia%dstatus", i); + dp->qid.path = UARTQID(i, Qstat); + dp->perm = 0444; + dp++; + + uart[i] = p; + p->dev = i; + if(p->console || p->special){ + /* + * No qlock here, only called at boot time. + */ + if(uartenable(p) != nil){ + if(p->console){ + addkbdq(p->iq, -1); + addconsdev(p->oq, uartputs, 2, 0); + p->putc = kbdcr2nl; + } + p->opens++; + } + } + p = p->next; + } + + if(uartnuart){ + /* + * at 115200 baud, the 1024 char buffer takes 56 ms to process, + * processing it every 22 ms should be fine. + */ + uarttimer = addclock0link(uartclock, 22); + } +} + + +static Chan* +uartattach(char *spec) +{ + return devattach('t', spec); +} + +static Walkqid* +uartwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, uartdir, uartndir, devgen); +} + +static long +uartstat(Chan *c, uchar *dp, long n) +{ + if(UARTTYPE(c->qid.path) == Qdata) + uartsetlength(UARTID(c->qid.path)); + return devstat(c, dp, n, uartdir, uartndir, devgen); +} + +static Chan* +uartopen(Chan *c, int omode) +{ + Uart *p; + + c = devopen(c, omode, uartdir, uartndir, devgen); + + switch(UARTTYPE(c->qid.path)){ + case Qctl: + case Qdata: + p = uart[UARTID(c->qid.path)]; + qlock(p); + if(p->opens == 0 && uartenable(p) == nil){ + qunlock(p); + c->flag &= ~COPEN; + error(Enodev); + } + p->opens++; + qunlock(p); + break; + } + + c->iounit = qiomaxatomic; + return c; +} + +static int +uartdrained(void* arg) +{ + Uart *p; + + p = arg; + return qlen(p->oq) == 0 && p->op == p->oe; +} + +static void +uartdrainoutput(Uart *p) +{ + if(!p->enabled) + return; + + p->drain = 1; + if(waserror()){ + p->drain = 0; + nexterror(); + } + sleep(&p->r, uartdrained, p); + poperror(); +} + +static void +uartclose(Chan *c) +{ + Uart *p; + + if(c->qid.type & QTDIR) + return; + if((c->flag & COPEN) == 0) + return; + switch(UARTTYPE(c->qid.path)){ + case Qdata: + case Qctl: + p = uart[UARTID(c->qid.path)]; + qlock(p); + if(--(p->opens) == 0){ + qclose(p->iq); + ilock(&p->rlock); + p->ir = p->iw = p->istage; + iunlock(&p->rlock); + + /* + */ + qhangup(p->oq, nil); + if(!waserror()){ + uartdrainoutput(p); + poperror(); + } + qclose(p->oq); + uartdisable(p); + p->dcd = p->dsr = p->dohup = 0; + } + qunlock(p); + break; + } +} + +static long +uartread(Chan *c, void *buf, long n, vlong off) +{ + Uart *p; + ulong offset = off; + + if(c->qid.type & QTDIR){ + uartsetlength(-1); + return devdirread(c, buf, n, uartdir, uartndir, devgen); + } + + p = uart[UARTID(c->qid.path)]; + switch(UARTTYPE(c->qid.path)){ + case Qdata: + return qread(p->iq, buf, n); + case Qctl: + return readnum(offset, buf, n, UARTID(c->qid.path), NUMSIZE); + case Qstat: + return (*p->phys->status)(p, buf, n, offset); + } + + return 0; +} + +int +uartctl(Uart *p, char *cmd) +{ + char *f[16]; + int i, n, nf; + + nf = tokenize(cmd, f, nelem(f)); + for(i = 0; i < nf; i++){ + if(strncmp(f[i], "break", 5) == 0){ + (*p->phys->dobreak)(p, 0); + continue; + } + + n = atoi(f[i]+1); + switch(*f[i]){ + case 'B': + case 'b': + uartdrainoutput(p); + if((*p->phys->baud)(p, n) < 0) + return -1; + break; + case 'C': + case 'c': + p->hup_dcd = n; + break; + case 'D': + case 'd': + uartdrainoutput(p); + (*p->phys->dtr)(p, n); + break; + case 'E': + case 'e': + p->hup_dsr = n; + break; + case 'F': + case 'f': + if(p->oq != nil) + qflush(p->oq); + break; + case 'H': + case 'h': + if(p->iq != nil) + qhangup(p->iq, 0); + if(p->oq != nil) + qhangup(p->oq, 0); + break; + case 'I': + case 'i': + uartdrainoutput(p); + (*p->phys->fifo)(p, n); + break; + case 'K': + case 'k': + uartdrainoutput(p); + (*p->phys->dobreak)(p, n); + break; + case 'L': + case 'l': + uartdrainoutput(p); + if((*p->phys->bits)(p, n) < 0) + return -1; + break; + case 'M': + case 'm': + uartdrainoutput(p); + (*p->phys->modemctl)(p, n); + break; + case 'N': + case 'n': + if(p->oq != nil) + qnoblock(p->oq, n); + break; + case 'P': + case 'p': + uartdrainoutput(p); + if((*p->phys->parity)(p, *(f[i]+1)) < 0) + return -1; + break; + case 'Q': + case 'q': + if(p->iq != nil) + qsetlimit(p->iq, n); + if(p->oq != nil) + qsetlimit(p->oq, n); + break; + case 'R': + case 'r': + uartdrainoutput(p); + (*p->phys->rts)(p, n); + break; + case 'S': + case 's': + uartdrainoutput(p); + if((*p->phys->stop)(p, n) < 0) + return -1; + break; + case 'W': + case 'w': + if(uarttimer == nil || n < 1) + return -1; + uarttimer->tns = (vlong)n * 100000LL; + break; + case 'X': + case 'x': + if(p->enabled){ + ilock(&p->tlock); + p->xonoff = n; + iunlock(&p->tlock); + } + break; + } + } + return 0; +} + +static long +uartwrite(Chan *c, void *buf, long n, vlong) +{ + Uart *p; + char *cmd; + + if(c->qid.type & QTDIR) + error(Eperm); + + p = uart[UARTID(c->qid.path)]; + + switch(UARTTYPE(c->qid.path)){ + case Qdata: + qlock(p); + if(waserror()){ + qunlock(p); + nexterror(); + } + + n = qwrite(p->oq, buf, n); + + qunlock(p); + poperror(); + break; + case Qctl: + cmd = malloc(n+1); + memmove(cmd, buf, n); + cmd[n] = 0; + qlock(p); + if(waserror()){ + qunlock(p); + free(cmd); + nexterror(); + } + + /* let output drain */ + if(uartctl(p, cmd) < 0) + error(Ebadarg); + + qunlock(p); + poperror(); + free(cmd); + break; + } + + return n; +} + +static long +uartwstat(Chan *c, uchar *dp, long n) +{ + Dir d; + Dirtab *dt; + + if(!iseve()) + error(Eperm); + if(QTDIR & c->qid.type) + error(Eperm); + if(UARTTYPE(c->qid.path) == Qstat) + error(Eperm); + + dt = &uartdir[1 + 3 * UARTID(c->qid.path)]; + n = convM2D(dp, n, &d, nil); + if(n == 0) + error(Eshortstat); + if(d.mode != ~0UL) + dt[0].perm = dt[1].perm = d.mode; + return n; +} + +void +uartpower(int on) +{ + Uart *p; + + for(p = uartlist; p != nil; p = p->next) { + if(p->phys->power) + (*p->phys->power)(p, on); + } +} + +Dev uartdevtab = { + 't', + "uart", + + uartreset, + devinit, + devshutdown, + uartattach, + uartwalk, + uartstat, + uartopen, + devcreate, + uartclose, + uartread, + devbread, + uartwrite, + devbwrite, + devremove, + uartwstat, + uartpower, +}; + +/* + * restart input if it's off + */ +static void +uartflow(void *v) +{ + Uart *p; + + p = v; + if(p->modem) + (*p->phys->rts)(p, 1); +} + +/* + * put some bytes into the local queue to avoid calling + * qconsume for every character + */ +int +uartstageoutput(Uart *p) +{ + int n; + + n = qconsume(p->oq, p->ostage, Stagesize); + if(n <= 0) + return 0; + p->op = p->ostage; + p->oe = p->ostage + n; + return n; +} + +/* + * restart output + */ +void +uartkick(void *v) +{ + Uart *p = v; + + if(p->blocked) + return; + + ilock(&p->tlock); + (*p->phys->kick)(p); + iunlock(&p->tlock); + + if(p->drain && uartdrained(p)){ + p->drain = 0; + wakeup(&p->r); + } +} + +/* + * Move data from the interrupt staging area to + * the input Queue. + */ +static void +uartstageinput(Uart *p) +{ + int n; + uchar *ir, *iw; + + while(p->ir != p->iw){ + ir = p->ir; + if(p->ir > p->iw){ + iw = p->ie; + p->ir = p->istage; + } + else{ + iw = p->iw; + p->ir = p->iw; + } + if((n = qproduce(p->iq, ir, iw - ir)) < 0){ + p->serr++; + (*p->phys->rts)(p, 0); + } + else if(n == 0) + p->berr++; + } +} + +/* + * receive a character at interrupt time + */ +void +uartrecv(Uart *p, char ch) +{ + uchar *next; + + /* software flow control */ + if(p->xonoff){ + if(ch == CTLS){ + p->blocked = 1; + }else if(ch == CTLQ){ + p->blocked = 0; + p->ctsbackoff = 2; /* clock gets output going again */ + } + } + + /* receive the character */ + if(p->putc) + p->putc(p->iq, ch); + else{ + ilock(&p->rlock); + next = p->iw + 1; + if(next == p->ie) + next = p->istage; + if(next == p->ir) + uartstageinput(p); + if(next != p->ir){ + *p->iw = ch; + p->iw = next; + } + iunlock(&p->rlock); + } +} + +/* + * we save up input characters till clock time to reduce + * per character interrupt overhead. + */ +static void +uartclock(void) +{ + Uart *p; + + lock(&uartalloc); + for(p = uartalloc.elist; p; p = p->elist){ + + if(p->phys->poll != nil) + (*p->phys->poll)(p); + + /* this hopefully amortizes cost of qproduce to many chars */ + if(p->iw != p->ir){ + ilock(&p->rlock); + uartstageinput(p); + iunlock(&p->rlock); + } + + /* hang up if requested */ + if(p->dohup){ + qhangup(p->iq, 0); + qhangup(p->oq, 0); + p->dohup = 0; + } + + /* this adds hysteresis to hardware/software flow control */ + if(p->ctsbackoff){ + ilock(&p->tlock); + if(p->ctsbackoff){ + if(--(p->ctsbackoff) == 0) + (*p->phys->kick)(p); + } + iunlock(&p->tlock); + } + } + unlock(&uartalloc); +} + +/* + * polling console input, output + */ + +Uart* consuart; + +int +uartgetc(void) +{ + if(consuart == nil || consuart->phys->getc == nil) + return -1; + return consuart->phys->getc(consuart); +} + +void +uartputc(int c) +{ + if(consuart == nil || consuart->phys->putc == nil) + return; + consuart->phys->putc(consuart, c); +} + +void +uartputs(char *s, int n) +{ + char *e; + + if(consuart == nil || consuart->phys->putc == nil) + return; + + e = s+n; + for(; sphys->putc(consuart, '\r'); + consuart->phys->putc(consuart, *s); + } +} diff -Nru 0/sys/src/nix/port/devusb.c 4/sys/src/nix/port/devusb.c --- 0/sys/src/nix/port/devusb.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devusb.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1473 @@ +/* + * USB device driver framework. + * + * This is in charge of providing access to actual HCIs + * and providing I/O to the various endpoints of devices. + * A separate user program (usbd) is in charge of + * enumerating the bus, setting up endpoints and + * starting devices (also user programs). + * + * The interface provided is a violation of the standard: + * you're welcome. + * + * The interface consists of a root directory with several files + * plus a directory (epN.M) with two files per endpoint. + * A device is represented by its first endpoint, which + * is a control endpoint automatically allocated for each device. + * Device control endpoints may be used to create new endpoints. + * Devices corresponding to hubs may also allocate new devices, + * perhaps also hubs. Initially, a hub device is allocated for + * each controller present, to represent its root hub. Those can + * never be removed. + * + * All endpoints refer to the first endpoint (epN.0) of the device, + * which keeps per-device information, and also to the HCI used + * to reach them. Although all endpoints cache that information. + * + * epN.M/data files permit I/O and are considered DMEXCL. + * epN.M/ctl files provide status info and accept control requests. + * + * Endpoints may be given file names to be listed also at #u, + * for those drivers that have nothing to do after configuring the + * device and its endpoints. + * + * Drivers for different controllers are kept at usb[oue]hci.c + * It's likely we could factor out much from controllers into + * a generic controller driver, the problem is that details + * regarding how to handle toggles, tokens, Tds, etc. will + * get in the way. Thus, code is probably easier the way it is. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/usb.h" + +typedef struct Hcitype Hcitype; + +enum +{ + /* Qid numbers */ + Qdir = 0, /* #u */ + Qusbdir, /* #u/usb */ + Qctl, /* #u/usb/ctl - control requests */ + + Qep0dir, /* #u/usb/ep0.0 - endpoint 0 dir */ + Qep0io, /* #u/usb/ep0.0/data - endpoint 0 I/O */ + Qep0ctl, /* #u/usb/ep0.0/ctl - endpoint 0 ctl. */ + Qep0dummy, /* give 4 qids to each endpoint */ + + Qepdir = 0, /* (qid-qep0dir)&3 is one of these */ + Qepio, /* to identify which file for the endpoint */ + Qepctl, + + /* ... */ + + /* Usb ctls. */ + CMdebug = 0, /* debug on|off */ + CMdump, /* dump (data structures for debug) */ + + /* Ep. ctls */ + CMnew = 0, /* new nb ctl|bulk|intr|iso r|w|rw (endpoint) */ + CMnewdev, /* newdev full|low|high portnb (allocate new devices) */ + CMhub, /* hub (set the device as a hub) */ + CMspeed, /* speed full|low|high|no */ + CMmaxpkt, /* maxpkt size */ + CMntds, /* ntds nb (max nb. of tds per µframe) */ + CMclrhalt, /* clrhalt (halt was cleared on endpoint) */ + CMpollival, /* pollival interval (interrupt/iso) */ + CMhz, /* hz n (samples/sec; iso) */ + CMsamplesz, /* samplesz n (sample size; iso) */ + CMinfo, /* info infostr (ke.ep info for humans) */ + CMdetach, /* detach (abort I/O forever on this ep). */ + CMaddress, /* address (address is assigned) */ + CMdebugep, /* debug n (set/clear debug for this ep) */ + CMname, /* name str (show up as #u/name as well) */ + CMtmout, /* timeout n (activate timeouts for ep) */ + CMpreset, /* reset the port */ + + /* Hub feature selectors */ + Rportenable = 1, + Rportreset = 4, + +}; + +struct Hcitype +{ + char* type; + int (*reset)(Hci*); +}; + +#define QID(q) ((int)(q).path) + +static char Edetach[] = "device is detached"; +static char Enotconf[] = "endpoint not configured"; +char Estalled[] = "endpoint stalled"; + +static Cmdtab usbctls[] = +{ + {CMdebug, "debug", 2}, + {CMdump, "dump", 1}, +}; + +static Cmdtab epctls[] = +{ + {CMnew, "new", 4}, + {CMnewdev, "newdev", 3}, + {CMhub, "hub", 1}, + {CMspeed, "speed", 2}, + {CMmaxpkt, "maxpkt", 2}, + {CMntds, "ntds", 2}, + {CMpollival, "pollival", 2}, + {CMsamplesz, "samplesz", 2}, + {CMhz, "hz", 2}, + {CMinfo, "info", 0}, + {CMdetach, "detach", 1}, + {CMaddress, "address", 1}, + {CMdebugep, "debug", 2}, + {CMclrhalt, "clrhalt", 1}, + {CMname, "name", 2}, + {CMtmout, "timeout", 2}, + {CMpreset, "reset", 1}, +}; + +static Dirtab usbdir[] = +{ + "ctl", {Qctl}, 0, 0666, +}; + +char *usbmodename[] = +{ + [OREAD] "r", + [OWRITE] "w", + [ORDWR] "rw", +}; + +static char *ttname[] = +{ + [Tnone] "none", + [Tctl] "control", + [Tiso] "iso", + [Tintr] "interrupt", + [Tbulk] "bulk", +}; + +static char *spname[] = +{ + [Fullspeed] "full", + [Lowspeed] "low", + [Highspeed] "high", + [Nospeed] "no", +}; + +static int debug; +static Hcitype hcitypes[Nhcis]; +static Hci* hcis[Nhcis]; +static QLock epslck; /* add, del, lookup endpoints */ +static Ep* eps[Neps]; /* all endpoints known */ +static int epmax; /* 1 + last endpoint index used */ +static int usbidgen; /* device address generator */ + +/* + * Is there something like this in a library? should it be? + */ +char* +seprintdata(char *s, char *se, uchar *d, int n) +{ + if(n > 10) + return seprint(s, se, " %#p[%d]: %.10H...", d, n, d); + else + return seprint(s, se, " %#p[%d]: %.*H", d, n, n, d); +} + +static int +name2speed(char *name) +{ + int i; + + for(i = 0; i < nelem(spname); i++) + if(strcmp(name, spname[i]) == 0) + return i; + return Nospeed; +} + +static int +name2ttype(char *name) +{ + int i; + + for(i = 0; i < nelem(ttname); i++) + if(strcmp(name, ttname[i]) == 0) + return i; + /* may be a std. USB ep. type */ + i = strtol(name, nil, 0); + switch(i+1){ + case Tctl: + case Tiso: + case Tbulk: + case Tintr: + return i+1; + default: + return Tnone; + } +} + +static int +name2mode(char *mode) +{ + int i; + + for(i = 0; i < nelem(usbmodename); i++) + if(strcmp(mode, usbmodename[i]) == 0) + return i; + return -1; +} + +static int +qid2epidx(int q) +{ + q = (q-Qep0dir)/4; + if(q < 0 || q >= epmax || eps[q] == nil) + return -1; + return q; +} + +static int +isqtype(int q, int type) +{ + if(q < Qep0dir) + return 0; + q -= Qep0dir; + return (q & 3) == type; +} + +void +addhcitype(char* t, int (*r)(Hci*)) +{ + static int ntype; + + if(ntype == Nhcis) + panic("too many USB host interface types"); + hcitypes[ntype].type = t; + hcitypes[ntype].reset = r; + ntype++; +} + +static char* +seprintep(char *s, char *se, Ep *ep, int all) +{ + static char* dsnames[] = { "config", "enabled", "detached", "reset" }; + Udev *d; + int i; + int di; + + d = ep->dev; + + qlock(ep); + if(waserror()){ + qunlock(ep); + nexterror(); + } + di = ep->dev->nb; + if(all) + s = seprint(s, se, "dev %d ep %d ", di, ep->nb); + s = seprint(s, se, "%s", dsnames[ep->dev->state]); + s = seprint(s, se, " %s", ttname[ep->ttype]); + assert(ep->mode == OREAD || ep->mode == OWRITE || ep->mode == ORDWR); + s = seprint(s, se, " %s", usbmodename[ep->mode]); + s = seprint(s, se, " speed %s", spname[d->speed]); + s = seprint(s, se, " maxpkt %ld", ep->maxpkt); + s = seprint(s, se, " pollival %ld", ep->pollival); + s = seprint(s, se, " samplesz %ld", ep->samplesz); + s = seprint(s, se, " hz %ld", ep->hz); + s = seprint(s, se, " hub %d", ep->dev->hub); + s = seprint(s, se, " port %d", ep->dev->port); + if(ep->inuse) + s = seprint(s, se, " busy"); + else + s = seprint(s, se, " idle"); + if(all){ + s = seprint(s, se, " load %uld", ep->load); + s = seprint(s, se, " ref %d addr %#p", ep->ref, ep); + s = seprint(s, se, " idx %d", ep->idx); + if(ep->name != nil) + s = seprint(s, se, " name '%s'", ep->name); + if(ep->tmout != 0) + s = seprint(s, se, " tmout"); + if(ep == ep->ep0){ + s = seprint(s, se, " ctlrno %#x", ep->hp->ctlrno); + s = seprint(s, se, " eps:"); + for(i = 0; i < nelem(d->eps); i++) + if(d->eps[i] != nil) + s = seprint(s, se, " ep%d.%d", di, i); + } + } + if(ep->info != nil) + s = seprint(s, se, "\n%s %s\n", ep->info, ep->hp->type); + else + s = seprint(s, se, "\n"); + qunlock(ep); + poperror(); + return s; +} + +static Ep* +epalloc(Hci *hp) +{ + Ep *ep; + int i; + + ep = smalloc(sizeof(Ep)); + ep->ref = 1; + qlock(&epslck); + for(i = 0; i < Neps; i++) + if(eps[i] == nil) + break; + if(i == Neps){ + qunlock(&epslck); + free(ep); + print("usb: bug: too few endpoints.\n"); + return nil; + } + ep->idx = i; + if(epmax <= i) + epmax = i+1; + eps[i] = ep; + ep->hp = hp; + ep->maxpkt = 8; + ep->ntds = 1; + ep->samplesz = ep->pollival = ep->hz = 0; /* make them void */ + qunlock(&epslck); + return ep; +} + +static Ep* +getep(int i) +{ + Ep *ep; + + if(i < 0 || i >= epmax || eps[i] == nil) + return nil; + qlock(&epslck); + ep = eps[i]; + if(ep != nil) + incref(ep); + qunlock(&epslck); + return ep; +} + +static void +putep(Ep *ep) +{ + Udev *d; + + if(ep != nil && decref(ep) == 0){ + d = ep->dev; + deprint("usb: ep%d.%d %#p released\n", d->nb, ep->nb, ep); + qlock(&epslck); + eps[ep->idx] = nil; + if(ep->idx == epmax-1) + epmax--; + if(ep == ep->ep0 && ep->dev != nil && ep->dev->nb == usbidgen) + usbidgen--; + qunlock(&epslck); + if(d != nil){ + qlock(ep->ep0); + d->eps[ep->nb] = nil; + qunlock(ep->ep0); + } + if(ep->ep0 != ep){ + putep(ep->ep0); + ep->ep0 = nil; + } + free(ep->info); + free(ep->name); + free(ep); + } +} + +static void +dumpeps(void) +{ + int i; + static char buf[512]; + char *s; + char *e; + Ep *ep; + + print("usb dump eps: epmax %d Neps %d (ref=1+ for dump):\n", epmax, Neps); + for(i = 0; i < epmax; i++){ + s = buf; + e = buf+sizeof(buf); + ep = getep(i); + if(ep != nil){ + if(waserror()){ + putep(ep); + nexterror(); + } + s = seprint(s, e, "ep%d.%d ", ep->dev->nb, ep->nb); + seprintep(s, e, ep, 1); + print("%s", buf); + ep->hp->seprintep(buf, e, ep); + print("%s", buf); + poperror(); + putep(ep); + } + } + print("usb dump hcis:\n"); + for(i = 0; i < Nhcis; i++) + if(hcis[i] != nil) + hcis[i]->dump(hcis[i]); +} + +static int +newusbid(Hci *) +{ + int id; + + qlock(&epslck); + id = ++usbidgen; + if(id >= 0x7F) + print("#u: too many device addresses; reuse them more\n"); + qunlock(&epslck); + return id; +} + +/* + * Create endpoint 0 for a new device + */ +static Ep* +newdev(Hci *hp, int ishub, int isroot) +{ + Ep *ep; + Udev *d; + + ep = epalloc(hp); + d = ep->dev = smalloc(sizeof(Udev)); + d->nb = newusbid(hp); + d->eps[0] = ep; + ep->nb = 0; + ep->toggle[0] = ep->toggle[1] = 0; + d->ishub = ishub; + d->isroot = isroot; + if(hp->highspeed != 0) + d->speed = Highspeed; + else + d->speed = Fullspeed; + d->state = Dconfig; /* address not yet set */ + ep->dev = d; + ep->ep0 = ep; /* no ref counted here */ + ep->ttype = Tctl; + ep->tmout = Xfertmout; + ep->mode = ORDWR; + dprint("newdev %#p ep%d.%d %#p\n", d, d->nb, ep->nb, ep); + return ep; +} + +/* + * Create a new endpoint for the device + * accessed via the given endpoint 0. + */ +static Ep* +newdevep(Ep *ep, int i, int tt, int mode) +{ + Ep *nep; + Udev *d; + + d = ep->dev; + if(d->eps[i] != nil) + error("endpoint already in use"); + nep = epalloc(ep->hp); + incref(ep); + d->eps[i] = nep; + nep->nb = i; + nep->toggle[0] = nep->toggle[1] = 0; + nep->ep0 = ep; + nep->dev = ep->dev; + nep->mode = mode; + nep->ttype = tt; + nep->debug = ep->debug; + /* set defaults */ + switch(tt){ + case Tctl: + nep->tmout = Xfertmout; + break; + case Tintr: + nep->pollival = 10; + break; + case Tiso: + nep->tmout = Xfertmout; + nep->pollival = 10; + nep->samplesz = 4; + nep->hz = 44100; + break; + } + deprint("newdevep ep%d.%d %#p\n", d->nb, nep->nb, nep); + return ep; +} + +static int +epdataperm(int mode) +{ + + switch(mode){ + case OREAD: + return 0440|DMEXCL; + break; + case OWRITE: + return 0220|DMEXCL; + break; + default: + return 0660|DMEXCL; + } +} + +static int +usbgen(Chan *c, char *, Dirtab*, int, int s, Dir *dp) +{ + Qid q; + Dirtab *dir; + int perm; + char *se; + Ep *ep; + int nb; + int mode; + + if(0)ddprint("usbgen q %#x s %d...", QID(c->qid), s); + if(s == DEVDOTDOT){ + if(QID(c->qid) <= Qusbdir){ + mkqid(&q, Qdir, 0, QTDIR); + devdir(c, q, "#u", 0, eve, 0555, dp); + }else{ + mkqid(&q, Qusbdir, 0, QTDIR); + devdir(c, q, "usb", 0, eve, 0555, dp); + } + if(0)ddprint("ok\n"); + return 1; + } + + switch(QID(c->qid)){ + case Qdir: /* list #u */ + if(s == 0){ + mkqid(&q, Qusbdir, 0, QTDIR); + devdir(c, q, "usb", 0, eve, 0555, dp); + if(0)ddprint("ok\n"); + return 1; + } + s--; + if(s < 0 || s >= epmax) + goto Fail; + ep = getep(s); + if(ep == nil || ep->name == nil){ + if(ep != nil) + putep(ep); + if(0)ddprint("skip\n"); + return 0; + } + if(waserror()){ + putep(ep); + nexterror(); + } + mkqid(&q, Qep0io+s*4, 0, QTFILE); + devdir(c, q, ep->name, 0, eve, epdataperm(ep->mode), dp); + putep(ep); + poperror(); + if(0)ddprint("ok\n"); + return 1; + + case Qusbdir: /* list #u/usb */ + Usbdir: + if(s < nelem(usbdir)){ + dir = &usbdir[s]; + mkqid(&q, dir->qid.path, 0, QTFILE); + devdir(c, q, dir->name, dir->length, eve, dir->perm, dp); + if(0)ddprint("ok\n"); + return 1; + } + s -= nelem(usbdir); + if(s < 0 || s >= epmax) + goto Fail; + ep = getep(s); + if(ep == nil){ + if(0)ddprint("skip\n"); + return 0; + } + if(waserror()){ + putep(ep); + nexterror(); + } + se = up->genbuf+sizeof(up->genbuf); + seprint(up->genbuf, se, "ep%d.%d", ep->dev->nb, ep->nb); + mkqid(&q, Qep0dir+4*s, 0, QTDIR); + putep(ep); + poperror(); + devdir(c, q, up->genbuf, 0, eve, 0755, dp); + if(0)ddprint("ok\n"); + return 1; + + case Qctl: + s = 0; + goto Usbdir; + + default: /* list #u/usb/epN.M */ + nb = qid2epidx(QID(c->qid)); + ep = getep(nb); + if(ep == nil) + goto Fail; + mode = ep->mode; + putep(ep); + if(isqtype(QID(c->qid), Qepdir)){ + Epdir: + switch(s){ + case 0: + mkqid(&q, Qep0io+nb*4, 0, QTFILE); + perm = epdataperm(mode); + devdir(c, q, "data", 0, eve, perm, dp); + break; + case 1: + mkqid(&q, Qep0ctl+nb*4, 0, QTFILE); + devdir(c, q, "ctl", 0, eve, 0664, dp); + break; + default: + goto Fail; + } + }else if(isqtype(QID(c->qid), Qepctl)){ + s = 1; + goto Epdir; + }else{ + s = 0; + goto Epdir; + } + if(0)ddprint("ok\n"); + return 1; + } +Fail: + if(0)ddprint("fail\n"); + return -1; +} + +static Hci* +hciprobe(int cardno, int ctlrno) +{ + Hci *hp; + char *type; + char name[64]; + static int epnb = 1; /* guess the endpoint nb. for the controller */ + + ddprint("hciprobe %d %d\n", cardno, ctlrno); + hp = smalloc(sizeof(Hci)); + hp->ctlrno = ctlrno; + hp->tbdf = BUSUNKNOWN; + + if(cardno < 0){ + if(isaconfig("usb", ctlrno, hp) == 0){ + free(hp); + return nil; + } + for(cardno = 0; cardno < Nhcis; cardno++){ + if(hcitypes[cardno].type == nil) + break; + type = hp->type; + if(type==nil || *type==0) + type = "uhci"; + if(cistrcmp(hcitypes[cardno].type, type) == 0) + break; + } + } + + if(cardno >= Nhcis || hcitypes[cardno].type == nil){ + free(hp); + return nil; + } + dprint("%s...", hcitypes[cardno].type); + if(hcitypes[cardno].reset(hp) < 0){ + free(hp); + return nil; + } + + /* + * IRQ2 doesn't really exist, it's used to gang the interrupt + * controllers together. A device set to IRQ2 will appear on + * the second interrupt controller as IRQ9. + */ +/*port*/ if(hp->irq == 2) +/*port*/ hp->irq = 9; + snprint(name, sizeof(name), "usb%s", hcitypes[cardno].type); + intrenable(hp->irq, hp->interrupt, hp, hp->tbdf, name); + + /* + * modern machines have too many usb controllers to list on + * the console. + */ + dprint("#u/usb/ep%d.0: %s: port %#p irq %d\n", + epnb, hcitypes[cardno].type, hp->port, hp->irq); + epnb++; + return hp; +} + +static void +usbreset(void) +{ + int cardno, ctlrno; + Hci *hp; + + if(getconf("*nousbprobe")) + return; + dprint("usbreset\n"); + + for(ctlrno = 0; ctlrno < Nhcis; ctlrno++) + if((hp = hciprobe(-1, ctlrno)) != nil) + hcis[ctlrno] = hp; + cardno = ctlrno = 0; + while(cardno < Nhcis && ctlrno < Nhcis && hcitypes[cardno].type != nil) + if(hcis[ctlrno] != nil) + ctlrno++; + else{ + hp = hciprobe(cardno, ctlrno); + if(hp == nil) + cardno++; + hcis[ctlrno++] = hp; + } + if(hcis[Nhcis-1] != nil) + print("usbreset: bug: Nhcis (%d) too small\n", Nhcis); +} + +/* need to move this to arch directory */ +static void +usbinit(void) +{ + Hci *hp; + int ctlrno; + Ep *d; + char info[40]; + + dprint("usbinit\n"); + for(ctlrno = 0; ctlrno < Nhcis; ctlrno++){ + hp = hcis[ctlrno]; + if(hp != nil){ + if(hp->init != nil) + hp->init(hp); + d = newdev(hp, 1, 1); /* new root hub */ + d->dev->state = Denabled; /* although addr == 0 */ + d->maxpkt = 64; + snprint(info, sizeof(info), "ports %d", hp->nports); + kstrdup(&d->info, info); + } + } +} + +static Chan* +usbattach(char *spec) +{ + return devattach(L'u', spec); +} + +static Walkqid* +usbwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, nil, 0, usbgen); +} + +static long +usbstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, nil, 0, usbgen); +} + +/* + * µs for the given transfer, for bandwidth allocation. + * This is a very rough worst case for what 5.11.3 + * of the usb 2.0 spec says. + * Also, we are using maxpkt and not actual transfer sizes. + * Only when we are sure we + * are not exceeding b/w might we consider adjusting it. + */ +static ulong +usbload(int speed, int maxpkt) +{ + enum{ Hostns = 1000, Hubns = 333 }; + ulong l; + ulong bs; + + l = 0; + bs = 10UL * maxpkt; + switch(speed){ + case Highspeed: + l = 55*8*2 + 2 * (3 + bs) + Hostns; + break; + case Fullspeed: + l = 9107 + 84 * (4 + bs) + Hostns; + break; + case Lowspeed: + l = 64107 + 2 * Hubns + 667 * (3 + bs) + Hostns; + break; + default: + print("usbload: bad speed %d\n", speed); + /* let it run */ + } + return l / 1000UL; /* in µs */ +} + +static Chan* +usbopen(Chan *c, int omode) +{ + int q; + Ep *ep; + int mode; + + mode = openmode(omode); + q = QID(c->qid); + + if(q >= Qep0dir && qid2epidx(q) < 0) + error(Eio); + if(q < Qep0dir || isqtype(q, Qepctl) || isqtype(q, Qepdir)) + return devopen(c, omode, nil, 0, usbgen); + + ep = getep(qid2epidx(q)); + if(ep == nil) + error(Eio); + deprint("usbopen q %#x fid %d omode %d\n", q, c->fid, mode); + if(waserror()){ + putep(ep); + nexterror(); + } + qlock(ep); + if(ep->inuse){ + qunlock(ep); + error(Einuse); + } + ep->inuse = 1; + qunlock(ep); + if(waserror()){ + ep->inuse = 0; + nexterror(); + } + if(mode != OREAD && ep->mode == OREAD) + error(Eperm); + if(mode != OWRITE && ep->mode == OWRITE) + error(Eperm); + if(ep->ttype == Tnone) + error(Enotconf); + ep->clrhalt = 0; + ep->rhrepl = -1; + if(ep->load == 0) + ep->load = usbload(ep->dev->speed, ep->maxpkt); + ep->hp->epopen(ep); + + poperror(); /* ep->inuse */ + poperror(); /* don't putep(): ref kept for fid using the ep. */ + + c->mode = mode; + c->flag |= COPEN; + c->offset = 0; + c->aux = nil; /* paranoia */ + return c; +} + +static void +epclose(Ep *ep) +{ + qlock(ep); + if(waserror()){ + qunlock(ep); + nexterror(); + } + if(ep->inuse){ + ep->hp->epclose(ep); + ep->inuse = 0; + } + qunlock(ep); + poperror(); +} + +static void +usbclose(Chan *c) +{ + int q; + Ep *ep; + + q = QID(c->qid); + if(q < Qep0dir || isqtype(q, Qepctl) || isqtype(q, Qepdir)) + return; + + ep = getep(qid2epidx(q)); + if(ep == nil) + return; + deprint("usbclose q %#x fid %d ref %d\n", q, c->fid, ep->ref); + if(waserror()){ + putep(ep); + nexterror(); + } + if(c->flag & COPEN){ + free(c->aux); + c->aux = nil; + epclose(ep); + putep(ep); /* release ref kept since usbopen */ + c->flag &= ~COPEN; + } + poperror(); + putep(ep); +} + +static long +ctlread(Chan *c, void *a, long n, vlong offset) +{ + int q; + char *s; + char *us; + char *se; + Ep *ep; + int i; + + q = QID(c->qid); + us = s = smalloc(READSTR); + se = s + READSTR; + if(waserror()){ + free(us); + nexterror(); + } + if(q == Qctl) + for(i = 0; i < epmax; i++){ + ep = getep(i); + if(ep != nil){ + if(waserror()){ + putep(ep); + nexterror(); + } + s = seprint(s, se, "ep%d.%d ", ep->dev->nb, ep->nb); + s = seprintep(s, se, ep, 0); + poperror(); + } + putep(ep); + } + else{ + ep = getep(qid2epidx(q)); + if(ep == nil) + error(Eio); + if(waserror()){ + putep(ep); + nexterror(); + } + if(c->aux != nil){ + /* After a new endpoint request we read + * the new endpoint name back. + */ + strecpy(s, se, c->aux); + free(c->aux); + c->aux = nil; + }else + seprintep(s, se, ep, 0); + poperror(); + putep(ep); + } + n = readstr(offset, a, n, us); + poperror(); + free(us); + return n; +} + +/* + * Fake root hub emulation. + */ +static long +rhubread(Ep *ep, void *a, long n) +{ + char *b; + + if(ep->dev->isroot == 0 || ep->nb != 0 || n < 2) + return -1; + if(ep->rhrepl < 0) + return -1; + + b = a; + memset(b, 0, n); + PUT2(b, ep->rhrepl); + ep->rhrepl = -1; + return n; +} + +static long +rhubwrite(Ep *ep, void *a, long n) +{ + uchar *s; + int cmd; + int feature; + int port; + Hci *hp; + + if(ep->dev == nil || ep->dev->isroot == 0 || ep->nb != 0) + return -1; + if(n != Rsetuplen) + error("root hub is a toy hub"); + ep->rhrepl = -1; + s = a; + if(s[Rtype] != (Rh2d|Rclass|Rother) && s[Rtype] != (Rd2h|Rclass|Rother)) + error("root hub is a toy hub"); + hp = ep->hp; + cmd = s[Rreq]; + feature = GET2(s+Rvalue); + port = GET2(s+Rindex); + if(port < 1 || port > hp->nports) + error("bad hub port number"); + switch(feature){ + case Rportenable: + ep->rhrepl = hp->portenable(hp, port, cmd == Rsetfeature); + break; + case Rportreset: + ep->rhrepl = hp->portreset(hp, port, cmd == Rsetfeature); + break; + case Rgetstatus: + ep->rhrepl = hp->portstatus(hp, port); + break; + default: + ep->rhrepl = 0; + } + return n; +} + +static long +usbread(Chan *c, void *a, long n, vlong offset) +{ + int q; + Ep *ep; + int nr; + + q = QID(c->qid); + + if(c->qid.type == QTDIR) + return devdirread(c, a, n, nil, 0, usbgen); + + if(q == Qctl || isqtype(q, Qepctl)) + return ctlread(c, a, n, offset); + + ep = getep(qid2epidx(q)); + if(ep == nil) + error(Eio); + if(waserror()){ + putep(ep); + nexterror(); + } + if(ep->dev->state == Ddetach) + error(Edetach); + if(ep->mode == OWRITE || ep->inuse == 0) + error(Ebadusefd); + switch(ep->ttype){ + case Tnone: + error("endpoint not configured"); + case Tctl: + nr = rhubread(ep, a, n); + if(nr >= 0){ + n = nr; + break; + } + /* else fall */ + default: + ddeprint("\nusbread q %#x fid %d cnt %ld off %lld\n",q,c->fid,n,offset); + n = ep->hp->epread(ep, a, n); + break; + } + poperror(); + putep(ep); + return n; +} + +static long +pow2(int n) +{ + return 1 << n; +} + +static void +setmaxpkt(Ep *ep, char* s) +{ + long spp; /* samples per packet */ + + if(ep->dev->speed == Highspeed) + spp = (ep->hz * ep->pollival * ep->ntds + 7999) / 8000; + else + spp = (ep->hz * ep->pollival + 999) / 1000; + ep->maxpkt = spp * ep->samplesz; + deprint("usb: %s: setmaxpkt: hz %ld poll %ld" + " ntds %d %s speed -> spp %ld maxpkt %ld\n", s, + ep->hz, ep->pollival, ep->ntds, spname[ep->dev->speed], + spp, ep->maxpkt); + if(ep->maxpkt > 1024){ + print("usb: %s: maxpkt %ld > 1024. truncating\n", s, ep->maxpkt); + ep->maxpkt = 1024; + } +} + +/* + * Many endpoint ctls. simply update the portable representation + * of the endpoint. The actual controller driver will look + * at them to setup the endpoints as dictated. + */ +static long +epctl(Ep *ep, Chan *c, void *a, long n) +{ + int i, l, mode, nb, tt; + char *b, *s; + Cmdbuf *cb; + Cmdtab *ct; + Ep *nep; + Udev *d; + static char *Info = "info "; + + d = ep->dev; + + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + ct = lookupcmd(cb, epctls, nelem(epctls)); + if(ct == nil) + error(Ebadctl); + i = ct->index; + if(i == CMnew || i == CMspeed || i == CMhub || i == CMpreset) + if(ep != ep->ep0) + error("allowed only on a setup endpoint"); + if(i != CMclrhalt && i != CMdetach && i != CMdebugep && i != CMname) + if(ep != ep->ep0 && ep->inuse != 0) + error("must configure before using"); + switch(i){ + case CMnew: + deprint("usb epctl %s\n", cb->f[0]); + nb = strtol(cb->f[1], nil, 0); + if(nb < 0 || nb >= Ndeveps) + error("bad endpoint number"); + tt = name2ttype(cb->f[2]); + if(tt == Tnone) + error("unknown endpoint type"); + mode = name2mode(cb->f[3]); + if(mode < 0) + error("unknown i/o mode"); + newdevep(ep, nb, tt, mode); + break; + case CMnewdev: + deprint("usb epctl %s\n", cb->f[0]); + if(ep != ep->ep0 || d->ishub == 0) + error("not a hub setup endpoint"); + l = name2speed(cb->f[1]); + if(l == Nospeed) + error("speed must be full|low|high"); + nep = newdev(ep->hp, 0, 0); + nep->dev->speed = l; + if(nep->dev->speed != Lowspeed) + nep->maxpkt = 64; /* assume full speed */ + nep->dev->hub = d->nb; + nep->dev->port = atoi(cb->f[2]); + /* next read request will read + * the name for the new endpoint + */ + l = sizeof(up->genbuf); + snprint(up->genbuf, l, "ep%d.%d", nep->dev->nb, nep->nb); + kstrdup(&c->aux, up->genbuf); + break; + case CMhub: + deprint("usb epctl %s\n", cb->f[0]); + d->ishub = 1; + break; + case CMspeed: + l = name2speed(cb->f[1]); + deprint("usb epctl %s %d\n", cb->f[0], l); + if(l == Nospeed) + error("speed must be full|low|high"); + qlock(ep->ep0); + d->speed = l; + qunlock(ep->ep0); + break; + case CMmaxpkt: + l = strtoul(cb->f[1], nil, 0); + deprint("usb epctl %s %d\n", cb->f[0], l); + if(l < 1 || l > 1024) + error("maxpkt not in [1:1024]"); + qlock(ep); + ep->maxpkt = l; + qunlock(ep); + break; + case CMntds: + l = strtoul(cb->f[1], nil, 0); + deprint("usb epctl %s %d\n", cb->f[0], l); + if(l < 1 || l > 3) + error("ntds not in [1:3]"); + qlock(ep); + ep->ntds = l; + qunlock(ep); + break; + case CMpollival: + if(ep->ttype != Tintr && ep->ttype != Tiso) + error("not an intr or iso endpoint"); + l = strtoul(cb->f[1], nil, 0); + deprint("usb epctl %s %d\n", cb->f[0], l); + if(ep->ttype == Tiso || + (ep->ttype == Tintr && ep->dev->speed == Highspeed)){ + if(l < 1 || l > 16) + error("pollival power not in [1:16]"); + l = pow2(l-1); + }else + if(l < 1 || l > 255) + error("pollival not in [1:255]"); + qlock(ep); + ep->pollival = l; + if(ep->ttype == Tiso) + setmaxpkt(ep, "pollival"); + qunlock(ep); + break; + case CMsamplesz: + if(ep->ttype != Tiso) + error("not an iso endpoint"); + l = strtoul(cb->f[1], nil, 0); + deprint("usb epctl %s %d\n", cb->f[0], l); + if(l <= 0 || l > 8) + error("samplesz not in [1:8]"); + qlock(ep); + ep->samplesz = l; + setmaxpkt(ep, "samplesz"); + qunlock(ep); + break; + case CMhz: + if(ep->ttype != Tiso) + error("not an iso endpoint"); + l = strtoul(cb->f[1], nil, 0); + deprint("usb epctl %s %d\n", cb->f[0], l); + if(l <= 0 || l > 100000) + error("hz not in [1:100000]"); + qlock(ep); + ep->hz = l; + setmaxpkt(ep, "hz"); + qunlock(ep); + break; + case CMclrhalt: + qlock(ep); + deprint("usb epctl %s\n", cb->f[0]); + ep->clrhalt = 1; + qunlock(ep); + break; + case CMinfo: + deprint("usb epctl %s\n", cb->f[0]); + l = strlen(Info); + s = a; + if(n < l+2 || strncmp(Info, s, l) != 0) + error(Ebadctl); + if(n > 1024) + n = 1024; + b = smalloc(n); + memmove(b, s+l, n-l); + b[n-l] = 0; + if(b[n-l-1] == '\n') + b[n-l-1] = 0; + qlock(ep); + free(ep->info); + ep->info = b; + qunlock(ep); + break; + case CMaddress: + deprint("usb epctl %s\n", cb->f[0]); + ep->dev->state = Denabled; + break; + case CMdetach: + if(ep->dev->isroot != 0) + error("can't detach a root hub"); + deprint("usb epctl %s ep%d.%d\n", + cb->f[0], ep->dev->nb, ep->nb); + ep->dev->state = Ddetach; + /* Release file system ref. for its endpoints */ + for(i = 0; i < nelem(ep->dev->eps); i++) + putep(ep->dev->eps[i]); + break; + case CMdebugep: + if(strcmp(cb->f[1], "on") == 0) + ep->debug = 1; + else if(strcmp(cb->f[1], "off") == 0) + ep->debug = 0; + else + ep->debug = strtoul(cb->f[1], nil, 0); + print("usb: ep%d.%d debug %d\n", + ep->dev->nb, ep->nb, ep->debug); + break; + case CMname: + deprint("usb epctl %s %s\n", cb->f[0], cb->f[1]); + validname(cb->f[1], 0); + kstrdup(&ep->name, cb->f[1]); + break; + case CMtmout: + deprint("usb epctl %s\n", cb->f[0]); + if(ep->ttype == Tiso || ep->ttype == Tctl) + error("ctl ignored for this endpoint type"); + ep->tmout = strtoul(cb->f[1], nil, 0); + if(ep->tmout != 0 && ep->tmout < Xfertmout) + ep->tmout = Xfertmout; + break; + case CMpreset: + deprint("usb epctl %s\n", cb->f[0]); + if(ep->ttype != Tctl) + error("not a control endpoint"); + if(ep->dev->state != Denabled) + error("forbidden on devices not enabled"); + ep->dev->state = Dreset; + break; + default: + panic("usb: unknown epctl %d", ct->index); + } + free(cb); + poperror(); + return n; +} + +static long +usbctl(void *a, long n) +{ + Cmdtab *ct; + Cmdbuf *cb; + Ep *ep; + int i; + + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + ct = lookupcmd(cb, usbctls, nelem(usbctls)); + dprint("usb ctl %s\n", cb->f[0]); + switch(ct->index){ + case CMdebug: + if(strcmp(cb->f[1], "on") == 0) + debug = 1; + else if(strcmp(cb->f[1], "off") == 0) + debug = 0; + else + debug = strtol(cb->f[1], nil, 0); + print("usb: debug %d\n", debug); + for(i = 0; i < epmax; i++) + if((ep = getep(i)) != nil){ + ep->hp->debug(ep->hp, debug); + putep(ep); + } + break; + case CMdump: + dumpeps(); + break; + } + free(cb); + poperror(); + return n; +} + +static long +ctlwrite(Chan *c, void *a, long n) +{ + int q; + Ep *ep; + + q = QID(c->qid); + if(q == Qctl) + return usbctl(a, n); + + ep = getep(qid2epidx(q)); + if(ep == nil) + error(Eio); + if(waserror()){ + putep(ep); + nexterror(); + } + if(ep->dev->state == Ddetach) + error(Edetach); + if(isqtype(q, Qepctl) && c->aux != nil){ + /* Be sure we don't keep a cloned ep name */ + free(c->aux); + c->aux = nil; + error("read, not write, expected"); + } + n = epctl(ep, c, a, n); + putep(ep); + poperror(); + return n; +} + +static long +usbwrite(Chan *c, void *a, long n, vlong off) +{ + int nr, q; + Ep *ep; + + if(c->qid.type == QTDIR) + error(Eisdir); + + q = QID(c->qid); + + if(q == Qctl || isqtype(q, Qepctl)) + return ctlwrite(c, a, n); + + ep = getep(qid2epidx(q)); + if(ep == nil) + error(Eio); + if(waserror()){ + putep(ep); + nexterror(); + } + if(ep->dev->state == Ddetach) + error(Edetach); + if(ep->mode == OREAD || ep->inuse == 0) + error(Ebadusefd); + + switch(ep->ttype){ + case Tnone: + error("endpoint not configured"); + case Tctl: + nr = rhubwrite(ep, a, n); + if(nr >= 0){ + n = nr; + break; + } + /* else fall */ + default: + ddeprint("\nusbwrite q %#x fid %d cnt %ld off %lld\n",q, c->fid, n, off); + ep->hp->epwrite(ep, a, n); + } + putep(ep); + poperror(); + return n; +} + +void +usbshutdown(void) +{ + Hci *hp; + int i; + + for(i = 0; i < Nhcis; i++){ + hp = hcis[i]; + if(hp == nil) + continue; + if(hp->shutdown == nil) + print("#u: no shutdown function for %s\n", hp->type); + else + hp->shutdown(hp); + } +} + +Dev usbdevtab = { + L'u', + "usb", + + usbreset, + usbinit, + usbshutdown, + usbattach, + usbwalk, + usbstat, + usbopen, + devcreate, + usbclose, + usbread, + devbread, + usbwrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru 0/sys/src/nix/port/devwd.c 4/sys/src/nix/port/devwd.c --- 0/sys/src/nix/port/devwd.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devwd.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,144 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" + +enum { + Qdir, + Qwdctl, +}; + +static Watchdog *wd; +static Dirtab wddir[] = { + ".", { Qdir, 0, QTDIR }, 0, 0550, + "wdctl", { Qwdctl, 0 }, 0, 0660, +}; + + +void +addwatchdog(Watchdog *watchdog) +{ + if(wd){ + print("addwatchdog: watchdog already installed\n"); + return; + } + wd = watchdog; + if(wd) + wd->disable(); +} + +static Chan* +wdattach(char *spec) +{ + return devattach('w', spec); +} + +static Walkqid* +wdwalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, wddir, nelem(wddir), devgen); +} + +static long +wdstat(Chan *c, uchar *dp, long n) +{ + return devstat(c, dp, n, wddir, nelem(wddir), devgen); +} + +static Chan* +wdopen(Chan* c, int omode) +{ + return devopen(c, omode, wddir, nelem(wddir), devgen); +} + +static void +wdclose(Chan*) +{ +} + +static long +wdread(Chan* c, void* a, long n, vlong off) +{ + long offset; + char s[READSTR]; + + offset = off; + switch((ulong)c->qid.path){ + case Qdir: + return devdirread(c, a, n, wddir, nelem(wddir), devgen); + + case Qwdctl: + if(wd == nil || wd->stat == nil) + return 0; + + wd->stat(s, s + READSTR); + return readstr(offset, a, n, s); + + default: + error(Egreg); + break; + } + return 0; +} + +static long +wdwrite(Chan* c, void* a, long n, vlong off) +{ + char *p; + + switch((ulong)c->qid.path){ + case Qdir: + error(Eperm); + + case Qwdctl: + if(wd == nil) + return n; + + if(off != 0ll) + error(Ebadarg); + + if(p = strchr(a, '\n')) + *p = 0; + + if(!strncmp(a, "enable", n)) + wd->enable(); + else if(!strncmp(a, "disable", n)) + wd->disable(); + else if(!strncmp(a, "restart", n)) + wd->restart(); + else + error(Ebadarg); + return n; + + default: + error(Egreg); + break; + } + + return 0; +} + +Dev wddevtab = { + 'w', + "watchdog", + + devreset, + devinit, + devshutdown, + wdattach, + wdwalk, + wdstat, + wdopen, + devcreate, + wdclose, + wdread, + devbread, + wdwrite, + devbwrite, + devremove, + devwstat, + devpower, +}; diff -Nru 0/sys/src/nix/port/devws.c 4/sys/src/nix/port/devws.c --- 0/sys/src/nix/port/devws.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devws.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,173 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + + +/* + * reported times can be translated to a more readable format by + * using something like: + * awk '{printf("print(\"%s: %s times; %s us worst; %s ws total\");\nsrc(%s)\n", + * $1, $3, $4, $5, $2); }' | acid ../k10/9k8cpu + * on the wsdata file, after doing a sort +2nr on it. + */ + +enum{ + WSdirqid, + WSdataqid, + WSctlqid, +}; + +Dirtab Wstab[]={ + ".", {WSdirqid, 0, QTDIR},0, DMDIR|0550, + "wsdata", {WSdataqid}, 0, 0600, + "wsctl", {WSctlqid}, 0, 0600, +}; + + +/* + * waitstats functions are in taslock.c, because they use Locks but + * callers in taslock.c must not call them to avoid + * a loop. + * This is only the user interface. + */ + +static char* +collect(void) +{ + extern Lock waitstatslk; + char *buf, *s; + int i, n; + static char *wname[] = { + [WSlock] "lock", + [WSqlock] "qlock", + [WSslock] "slock", + }; + + n = waitstats.npcs * (strlen("slock") + 1 + 19 * 3 + 1) + 1; + buf = smalloc(n); + s = buf; + lock(&waitstatslk); + for(i = 0; i < NWstats; i++) + if(waitstats.pcs[i] != 0) + s = seprint(s, buf+n, "%s %#llux %d %#llud %#llud\n", + wname[waitstats.type[i]], + waitstats.pcs[i], waitstats.ns[i], waitstats.wait[i], + waitstats.total[i]); + unlock(&waitstatslk); + if(s == buf + n) + print("collect: fix devws.c, buffer was too short"); + return buf; +} + +static Chan* +wsattach(char *spec) +{ + return devattach('W', spec); +} + +static Walkqid* +wswalk(Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, Wstab, nelem(Wstab), devgen); +} + +static long +wsstat(Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, Wstab, nelem(Wstab), devgen); +} + +static Chan* +wsopen(Chan *c, int omode) +{ + if(c->qid.type & QTDIR){ + if(omode != OREAD) + error(Eperm); + } + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + c->aux = nil; + if(c->qid.path == WSdataqid) + c->aux = collect(); + return c; +} + +static void +wsclose(Chan *c) +{ + free(c->aux); +} + +static long +wsread(Chan *c, void *va, long n, vlong off) +{ + + switch((int)c->qid.path){ + case WSdirqid: + n = devdirread(c, va, n, Wstab, nelem(Wstab), devgen); + break; + case WSdataqid: + n = readstr(off, va, n, c->aux); + break; + default: + n = 0; + } + return n; +} + +static long +wswrite(Chan *c, void *a, long n, vlong) +{ + char *buf; + + switch((int)(c->qid.path)){ + case WSctlqid: + buf = smalloc(n + 1); + memmove(buf, a, n); + buf[n] = 0; + if(n > 0 && buf[n-1] == '\n') + buf[n-1] = 0; + if(strcmp(buf, "clear") == 0){ + lockstats.locks = lockstats.glare = lockstats.inglare = 0; + qlockstats.qlock = qlockstats.qlockq = 0; + clearwaitstats(); + }else if(strcmp(buf, "start") == 0) + startwaitstats(1); + else if(strcmp(buf, "stop") == 0) + startwaitstats(0); + else{ + free(buf); + error(Ebadctl); + } + free(buf); + break; + default: + error(Ebadusefd); + } + return n; +} + +Dev wsdevtab = { + 'W', + "waitstats", + + devreset, + devinit, + devshutdown, + wsattach, + wswalk, + wsstat, + wsopen, + devcreate, + wsclose, + wsread, + devbread, + wswrite, + devbwrite, + devremove, + devwstat, +}; diff -Nru 0/sys/src/nix/port/devzp.c 4/sys/src/nix/port/devzp.c --- 0/sys/src/nix/port/devzp.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/devzp.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,594 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +enum +{ + Incr = 16, + Maxatomic = 64*KiB, +}; + +typedef struct ZPipe ZPipe; +typedef struct Zq Zq; + +struct Zq +{ + Lock; /* to protect Zq */ + QLock rlck; /* one reader at a time */ + Kzio* io; /* io[] */ + Kzio* ep; /* end pointer */ + int closed; /* queue is closed */ + int waiting; /* reader is waiting */ + Kzio* rp; /* read pointer */ + Kzio* wp; /* write pointer */ + Rendez rr; /* reader rendez */ +}; + +struct ZPipe +{ + QLock; + ZPipe *next; + int ref; + ulong path; + Zq q[2]; + int qref[2]; +}; + +struct +{ + Lock; + ulong path; +} zpalloc; + +enum +{ + Qdir, + Qdata0, + Qdata1, +}; + +Dirtab zpdir[] = +{ + ".", {Qdir,0,QTDIR}, 0, DMDIR|0500, + "data", {Qdata0}, 0, 0600, + "data1", {Qdata1}, 0, 0600, +}; +#define NZPDIR 3 + +#define ZPTYPE(x) (((unsigned)x)&0x1f) +#define ZPID(x) ((((unsigned)x))>>5) +#define ZPQID(i, t) ((((unsigned)i)<<5)|(t)) +#define ZQLEN(q) ((q)->wp - (q)->rp) + +static int +zqnotempty(void *x) +{ + Zq *q; + + q = x; + return ZQLEN(q) != 0 || q->closed != 0; +} + +static void +zqdump(Zq *q) +{ + Kzio *io; + + if(DBGFLG == 0) + return; + print("zq %#p: io %#p rp %ld wp %ld ep %ld\n", + q, q->io, q->rp - q->io, q->wp - q->io, q->ep - q->io); + for(io = q->rp; io != nil && io < q->wp; io++) + print("\tio[%ld] = %Z\n", io - q->io, io); + print("\n"); +} + +/* + * BUG: alloczio in here could be allocating data + * in the kernel that is not needed. In fact, such data + * might be in the kernel already. It's only that we don't + * have a way to reference more than once to the same source + * data (no reference counters). + */ +static int +zqread(Zq *q, Kzio io[], int nio, usize count) +{ + int i; + long tot, nr; + Kzio *qio; + Segment *s; + char *p; + + DBG("zqread %ld\n", count); + qlock(&q->rlck); + lock(q); + if(waserror()){ + unlock(q); + qunlock(&q->rlck); + nexterror(); + } + while(q->closed == 0 && ZQLEN(q) == 0){ + q->waiting++; + unlock(q); + sleep(&q->rr, zqnotempty, q); + lock(q); + } + i = 0; + for(tot = 0; ZQLEN(q) > 0 && i < nio && tot < count; tot += nr){ + qio = q->rp; + nr = qio->size; + if(tot + nr > count){ + if(i > 0) + break; + io[i] = *qio; + nr = count - tot; + io[i].size = nr; + s = getzkseg(); + if(s == nil){ + DBG("zqread: bytes thrown away\n"); + goto Consume; /* we drop bytes! */ + } + qio->size -= nr; + qio->data = alloczio(s, qio->size); + p = io[i].data; + memmove(qio->data, p + io[i].size, qio->size); + DBG("zqread: copy %#Z %#Z\n", qio, io); + qio->seg = s; + }else + io[i] = *qio; + Consume: + i++; + q->rp++; + } + if(q->rp == q->wp) + q->rp = q->wp = q->io; + zqdump(q); + poperror(); + unlock(q); + qunlock(&q->rlck); + return i; +} + +/* + * BUG: no flow control here. + * We queue as many io[]s as we want. + * Perhaps it would be better to do flow control, + * but the process feeding the queue would run out + * of buffering at some point, which also provides + * flow control somehow. + */ +static long +zqwrite(Zq *q, Kzio io[], int nio) +{ + int i, ei, ri, wi, awake; + + lock(q); + if(waserror()){ + unlock(q); + nexterror(); + } + + DBG("zqwrite io%#p[%d]\n", io, nio); + if(DBGFLG) + for(i = 0; i < nio; i++) + print("\tio%#p[%d] = %Z\n", io, i, &io[i]); + if(q->closed) + error("queue is closed"); + if(q->wp + nio > q->ep){ + if(q->rp > q->io){ + memmove(q->io, q->rp, ZQLEN(q)*sizeof q->io[0]); + q->wp = q->io + ZQLEN(q); + q->rp = q->io; + } + if(q->wp + nio > q->ep){ + ei = q->ep - q->io; + ei += Incr; + ri = q->rp - q->io; + wi = q->wp - q->io; + q->io = realloc(q->io, ei*sizeof q->io[0]); + if(q->io == nil) + panic("zqwrite: no memory"); + q->ep = q->io + ei; + q->rp = q->io + ri; + q->wp = q->io + wi; + DBG("zqwrite: io %#p rp %#p wp %#p ep %#p\n", + q->io, q->rp, q->wp, q->ep); + } + assert(q->wp + nio <= q->ep); + } + memmove(q->wp, io, nio*sizeof io[0]); + q->wp += nio; + awake = q->waiting; + if(awake) + q->waiting--; + zqdump(q); + poperror(); + unlock(q); + if(awake) + wakeup(&q->rr); + return nio; +} + +static void +zqflush(Zq *q) +{ + lock(q); + for(;q->rp < q->wp; q->rp++){ + qlock(&q->rp->seg->lk); + zputaddr(q->rp->seg, PTR2UINT(q->rp->data)); + qunlock(&q->rp->seg->lk); + putseg(q->rp->seg); + } + q->rp = q->wp = q->io; + unlock(q); +} + +static void +zqclose(Zq *q) +{ + q->closed = 1; + zqflush(q); + wakeup(&q->rr); +} + +static void +zqhangup(Zq *q) +{ + q->closed = 1; + wakeup(&q->rr); +} + +static void +zqreopen(Zq *q) +{ + q->closed = 0; +} + +/* + * create a zp, no streams are created until an open + */ +static Chan* +zpattach(char *spec) +{ + ZPipe *p; + Chan *c; + + c = devattach(L'∏', spec); + p = malloc(sizeof(ZPipe)); + if(p == 0) + exhausted("memory"); + p->ref = 1; + + lock(&zpalloc); + p->path = ++zpalloc.path; + unlock(&zpalloc); + + mkqid(&c->qid, ZPQID(2*p->path, Qdir), 0, QTDIR); + c->aux = p; + c->devno = 0; + return c; +} + +static int +zpgen(Chan *c, char*, Dirtab *tab, int ntab, int i, Dir *dp) +{ + Qid q; + int len; + ZPipe *p; + + if(i == DEVDOTDOT){ + devdir(c, c->qid, "#∏", 0, eve, DMDIR|0555, dp); + return 1; + } + i++; /* skip . */ + if(tab==0 || i>=ntab) + return -1; + + tab += i; + p = c->aux; + switch((ulong)tab->qid.path){ + case Qdata0: + len = ZQLEN(&p->q[0]); + break; + case Qdata1: + len = ZQLEN(&p->q[1]); + break; + default: + len = tab->length; + break; + } + mkqid(&q, ZPQID(ZPID(c->qid.path), tab->qid.path), 0, QTFILE); + devdir(c, q, tab->name, len, eve, tab->perm, dp); + return 1; +} + + +static Walkqid* +zpwalk(Chan *c, Chan *nc, char **name, int nname) +{ + Walkqid *wq; + ZPipe *p; + + wq = devwalk(c, nc, name, nname, zpdir, NZPDIR, zpgen); + if(wq != nil && wq->clone != nil && wq->clone != c){ + p = c->aux; + qlock(p); + p->ref++; + if(c->flag & COPEN){ + print("channel open in zpwalk\n"); + switch(ZPTYPE(c->qid.path)){ + case Qdata0: + p->qref[0]++; + break; + case Qdata1: + p->qref[1]++; + break; + } + } + qunlock(p); + } + return wq; +} + +static long +zpstat(Chan *c, uchar *db, long n) +{ + ZPipe *p; + Dir dir; + + p = c->aux; + + switch(ZPTYPE(c->qid.path)){ + case Qdir: + devdir(c, c->qid, ".", 0, eve, DMDIR|0555, &dir); + break; + case Qdata0: + devdir(c, c->qid, "data", ZQLEN(&p->q[0]), eve, 0600, &dir); + break; + case Qdata1: + devdir(c, c->qid, "data1", ZQLEN(&p->q[1]), eve, 0600, &dir); + break; + default: + panic("zpstat"); + } + n = convD2M(&dir, db, n); + if(n < BIT16SZ) + error(Eshortstat); + return n; +} + +/* + * if the stream doesn't exist, create it + */ +static Chan* +zpopen(Chan *c, int omode) +{ + ZPipe *p; + + if(c->qid.type & QTDIR){ + if(omode != OREAD) + error(Ebadarg); + c->mode = omode; + c->flag |= COPEN; + c->offset = 0; + return c; + } + + p = c->aux; + qlock(p); + switch(ZPTYPE(c->qid.path)){ + case Qdata0: + p->qref[0]++; + break; + case Qdata1: + p->qref[1]++; + break; + } + qunlock(p); + + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + c->iounit = Maxatomic; /* should we care? */ + return c; +} + +static void +zpclose(Chan *c) +{ + ZPipe *p; + + p = c->aux; + qlock(p); + + if(c->flag & COPEN){ + /* + * closing either side hangs up the stream + */ + switch(ZPTYPE(c->qid.path)){ + case Qdata0: + p->qref[0]--; + if(p->qref[0] == 0){ + zqhangup(&p->q[1]); + zqclose(&p->q[0]); + } + break; + case Qdata1: + p->qref[1]--; + if(p->qref[1] == 0){ + zqhangup(&p->q[0]); + zqclose(&p->q[1]); + } + break; + } + } + + /* + * if both sides are closed, they are reusable + */ + if(p->qref[0] == 0 && p->qref[1] == 0){ + zqreopen(&p->q[0]); + zqreopen(&p->q[1]); + } + + /* + * free the structure on last close + */ + p->ref--; + if(p->ref == 0){ + qunlock(p); + free(p); + } else + qunlock(p); +} + +static long +zpread(Chan *c, void *va, long n, vlong) +{ + ZPipe *p; + Kzio io[32]; /* might read less than we could */ + int nio; + + p = c->aux; + + switch(ZPTYPE(c->qid.path)){ + case Qdir: + return devdirread(c, va, n, zpdir, NZPDIR, zpgen); + case Qdata0: + nio = zqread(&p->q[0], io, nelem(io), n); + return readzio(io, nio, va, n); + case Qdata1: + nio = zqread(&p->q[0], io, nelem(io), n); + return readzio(io, nio, va, n); + default: + panic("zpread"); + } + return -1; /* not reached */ +} + +static int +zpzread(Chan *c, Kzio io[], int nio, usize n, vlong offset) +{ + ZPipe *p; + + p = c->aux; + + switch(ZPTYPE(c->qid.path)){ + case Qdir: + return devzread(c, io, nio, n, offset); + case Qdata0: + return zqread(&p->q[0], io, nio, n); + case Qdata1: + return zqread(&p->q[0], io, nio, n); + default: + panic("zpread"); + } + return -1; /* not reached */ +} + + +/* + * a write to a closed zp should cause a note to be sent to + * the process. + * If the data is already in a SG_ZIO segment, we shouldn't + * be copying it again, probably. + */ +static long +zpwrite(Chan *c, void *va, long n, vlong) +{ + ZPipe *p; + Kzio io; /* might write less than we could */ + long tot, nw; + Segment *s; + Zq *q; + char *cp; + + if(n <= 0) + return n; + p = c->aux; + switch(ZPTYPE(c->qid.path)){ + case Qdata0: + q = &p->q[1]; + break; + case Qdata1: + q = &p->q[0]; + break; + default: + q = nil; + panic("zpwrite"); + } + + s = getzkseg(); + if(waserror()){ + putseg(s); + nexterror(); + } + cp = va; + for(tot = 0; tot < n; tot += nw){ + nw = n; + if(nw > Maxatomic) + nw = Maxatomic; + io.data = alloczio(s, nw); + memmove(io.data, cp + tot, nw); + io.seg = s; + incref(s); + io.size = nw; + DBG("zpwrite: copy %Z %#p\n", &io, cp+tot); + zqwrite(q, &io, 1); + } + poperror(); + putseg(s); + return n; +} + +static int +zpzwrite(Chan *c, Kzio io[], int nio, vlong) +{ + ZPipe *p; + + p = c->aux; + + switch(ZPTYPE(c->qid.path)){ + case Qdata0: + zqwrite(&p->q[1], io, nio); + break; + + case Qdata1: + zqwrite(&p->q[0], io, nio); + break; + + default: + panic("zpwrite"); + } + + return nio; +} + + +Dev zpdevtab = { + L'∏', + "zp", + + devreset, + devinit, + devshutdown, + zpattach, + zpwalk, + zpstat, + zpopen, + devcreate, + zpclose, + zpread, + devbread, + zpwrite, + devbwrite, + devremove, + devwstat, + nil, /* power */ + nil, /* config */ + zpzread, + zpzwrite, +}; diff -Nru 0/sys/src/nix/port/edf.c 4/sys/src/nix/port/edf.c --- 0/sys/src/nix/port/edf.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/edf.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,680 @@ +/* EDF scheduling */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/edf.h" +#include + +/* debugging */ +enum { + Dontprint = 1, +}; + +#define DPRINT if(Dontprint){}else print + +static long now; /* Low order 32 bits of time in µs */ + +/* Statistics stuff */ +ulong nilcount; +ulong scheds; +ulong edfnrun; +int misseddeadlines; + +/* Edfschedlock protects modification of admission params */ +int edfinited; +QLock edfschedlock; +static Lock thelock; + +enum{ + Dl, /* Invariant for schedulability test: Dl < Rl */ + Rl, +}; + +static char *testschedulability(Proc*); +static Proc *qschedulability; + +enum { + Onemicrosecond = 1, + Onemillisecond = 1000, + Onesecond = 1000000, + OneRound = Onemillisecond/2, +}; + +static int +timeconv(Fmt *f) +{ + char buf[128], *sign; + vlong t; + + buf[0] = 0; + switch(f->r) { + case 'U': + t = va_arg(f->args, uvlong); + break; + case 't': /* vlong in nanoseconds */ + t = va_arg(f->args, long); + break; + default: + return fmtstrcpy(f, "(timeconv)"); + } + if (t < 0) { + sign = "-"; + t = -t; + } + else + sign = ""; + if (t > Onesecond){ + t += OneRound; + sprint(buf, "%s%d.%.3ds", sign, (int)(t / Onesecond), + (int)(t % Onesecond)/Onemillisecond); + }else if (t > Onemillisecond) + sprint(buf, "%s%d.%.3dms", sign, (int)(t / Onemillisecond), + (int)(t % Onemillisecond)); + else + sprint(buf, "%s%dµs", sign, (int)t); + return fmtstrcpy(f, buf); +} + +long edfcycles; + +Edf* +edflock(Proc *p) +{ + Edf *e; + + if (p->edf == nil) + return nil; + ilock(&thelock); + if((e = p->edf) && (e->flags & Admitted)){ + thelock.pc = getcallerpc(&p); +#ifdef EDFCYCLES + edfcycles -= lcycles(); +#endif + now = µs(); + return e; + } + iunlock(&thelock); + return nil; +} + +void +edfunlock(void) +{ + +#ifdef EDFCYCLES + edfcycles += lcycles(); +#endif + edfnrun++; + iunlock(&thelock); +} + +void +edfinit(Proc*p) +{ + if(!edfinited){ + fmtinstall('t', timeconv); + edfinited++; + } + now = µs(); + DPRINT("%lud edfinit %d[%s]\n", now, p->pid, statename[p->state]); + p->edf = malloc(sizeof(Edf)); + if(p->edf == nil) + error(Enomem); + return; +} + +static void +deadlineintr(Ureg*, Timer *t) +{ + /* Proc reached deadline */ + extern int panicking; + Sched *sch; + Proc *p; + + if(panicking || active.exiting) + return; + + p = t->ta; + now = µs(); + DPRINT("%lud deadlineintr %d[%s]\n", now, p->pid, statename[p->state]); + /* If we're interrupting something other than the proc pointed to by t->a, + * we've already achieved recheduling, so we need not do anything + * Otherwise, we must cause a reschedule, but if we call sched() + * here directly, the timer interrupt routine will not finish its business + * Instead, we cause the resched to happen when the interrupted proc + * returns to user space + */ + if(p == up){ + if(up->trace) + proctrace(up, SInts, 0); + up->delaysched++; + sch = procsched(up); + sch->delayedscheds++; + } +} + +static void +release(Proc *p) +{ + /* Called with edflock held */ + Edf *e; + long n; + vlong nowns; + + e = p->edf; + e->flags &= ~Yield; + if(e->d - now < 0){ + e->periods++; + e->r = now; + if((e->flags & Sporadic) == 0){ + /* + * Non sporadic processes stay true to their period; + * calculate next release time. + * Second test limits duration of while loop. + */ + if((n = now - e->t) > 0){ + if(n < e->T) + e->t += e->T; + else + e->t = now + e->T - (n % e->T); + } + }else{ + /* Sporadic processes may not be released earlier than + * one period after this release + */ + e->t = e->r + e->T; + } + e->d = e->r + e->D; + e->S = e->C; + DPRINT("%lud release %d[%s], r=%lud, d=%lud, t=%lud, S=%lud\n", + now, p->pid, statename[p->state], e->r, e->d, e->t, e->S); + if(p->trace){ + nowns = todget(nil); + proctrace(p, SRelease, nowns); + proctrace(p, SDeadline, nowns + 1000LL*e->D); + } + }else{ + DPRINT("%lud release %d[%s], too late t=%lud, called from %#p\n", + now, p->pid, statename[p->state], e->t, getcallerpc(&p)); + } +} + +static void +releaseintr(Ureg*, Timer *t) +{ + Proc *p; + extern int panicking; + Sched *sch; + Schedq *rq; + + if(panicking || active.exiting) + return; + + p = t->ta; + if((edflock(p)) == nil) + return; + sch = procsched(p); + DPRINT("%lud releaseintr %d[%s]\n", now, p->pid, statename[p->state]); + switch(p->state){ + default: + edfunlock(); + return; + case Ready: + /* remove proc from current runq */ + rq = &sch->runq[p->priority]; + if(dequeueproc(sch, rq, p) != p){ + DPRINT("releaseintr: can't find proc or lock race\n"); + release(p); /* It'll start best effort */ + edfunlock(); + return; + } + p->state = Waitrelease; + /* fall through */ + case Waitrelease: + release(p); + edfunlock(); + if(p->state == Wakeme){ + iprint("releaseintr: wakeme\n"); + } + ready(p); + if(up){ + up->delaysched++; + sch->delayedscheds++; + } + return; + case Running: + release(p); + edfrun(p, 1); + break; + case Wakeme: + release(p); + edfunlock(); + if(p->trend) + wakeup(p->trend); + p->trend = nil; + if(up){ + up->delaysched++; + sch->delayedscheds++; + } + return; + } + edfunlock(); +} + +void +edfrecord(Proc *p) +{ + long used; + Edf *e; + + if((e = edflock(p)) == nil) + return; + used = now - e->s; + if(e->d - now <= 0) + e->edfused += used; + else + e->extraused += used; + if(e->S > 0){ + if(e->S <= used){ + if(p->trace) + proctrace(p, SSlice, 0); + DPRINT("%lud edfrecord slice used up\n", now); + e->d = now; + e->S = 0; + }else + e->S -= used; + } + e->s = now; + edfunlock(); +} + +void +edfrun(Proc *p, int edfpri) +{ + Edf *e; + long tns; + Sched *sch; + + e = p->edf; + sch = procsched(p); + /* Called with edflock held */ + if(edfpri){ + tns = e->d - now; + if(tns <= 0 || e->S == 0){ + /* Deadline reached or resources exhausted, + * deschedule forthwith + */ + p->delaysched++; + sch->delayedscheds++; + e->s = now; + return; + } + if(e->S < tns) + tns = e->S; + if(tns < 20) + tns = 20; + e->tns = 1000LL * tns; /* µs to ns */ + if(e->tt == nil || e->tf != deadlineintr){ + DPRINT("%lud edfrun, deadline=%lud\n", now, tns); + }else{ + DPRINT("v"); + } + if(p->trace) + proctrace(p, SInte, todget(nil) + e->tns); + e->tmode = Trelative; + e->tf = deadlineintr; + e->ta = p; + timeradd(e); + }else{ + DPRINT("<"); + } + e->s = now; +} + +char * +edfadmit(Proc *p) +{ + char *err; + Edf *e; + int i; + Proc *r; + long tns; + + e = p->edf; + if (e->flags & Admitted) + return "task state"; /* should never happen */ + + /* simple sanity checks */ + if (e->T == 0) + return "T not set"; + if (e->C == 0) + return "C not set"; + if (e->D > e->T) + return "D > T"; + if (e->D == 0) /* if D is not set, set it to T */ + e->D = e->T; + if (e->C > e->D) + return "C > D"; + + qlock(&edfschedlock); + if (err = testschedulability(p)){ + qunlock(&edfschedlock); + return err; + } + e->flags |= Admitted; + + edflock(p); + + if(p->trace) + proctrace(p, SAdmit, 0); + + /* Look for another proc with the same period to synchronize to */ + for(i=0; (r = psincref(i)) != nil; i++) { + if(r->state == Dead || r == p){ + psdecref(r); + continue; + } + if (r->edf == nil || (r->edf->flags & Admitted) == 0){ + psdecref(r); + continue; + } + if (r->edf->T == e->T) + break; + } + if (r == nil){ + /* Can't synchronize to another proc, release now */ + e->t = now; + e->d = 0; + release(p); + if (p == up){ + DPRINT("%lud edfadmit self %d[%s], release now: r=%lud d=%lud t=%lud\n", + now, p->pid, statename[p->state], e->r, e->d, e->t); + /* We're already running */ + edfrun(p, 1); + }else{ + /* We're releasing another proc */ + DPRINT("%lud edfadmit other %d[%s], release now: r=%lud d=%lud t=%lud\n", + now, p->pid, statename[p->state], e->r, e->d, e->t); + p->ta = p; + edfunlock(); + qunlock(&edfschedlock); + releaseintr(nil, p); + return nil; + } + }else{ + /* Release in synch to something else */ + e->t = r->edf->t; + psdecref(r); + if (p == up){ + DPRINT("%lud edfadmit self %d[%s], release at %lud\n", + now, p->pid, statename[p->state], e->t); + }else{ + DPRINT("%lud edfadmit other %d[%s], release at %lud\n", + now, p->pid, statename[p->state], e->t); + if(e->tt == nil){ + e->tf = releaseintr; + e->ta = p; + tns = e->t - now; + if(tns < 20) + tns = 20; + e->tns = 1000LL * tns; + e->tmode = Trelative; + timeradd(e); + } + } + } + edfunlock(); + qunlock(&edfschedlock); + return nil; +} + +void +edfstop(Proc *p) +{ + Edf *e; + + if(e = edflock(p)){ + DPRINT("%lud edfstop %d[%s]\n", now, p->pid, statename[p->state]); + if(p->trace) + proctrace(p, SExpel, 0); + e->flags &= ~Admitted; + if(e->tt) + timerdel(e); + edfunlock(); + } +} + +static int +yfn(void *) +{ + now = µs(); + return up->trend == nil || now - up->edf->r >= 0; +} + +void +edfyield(void) +{ + /* sleep until next release */ + Edf *e; + long n; + + if((e = edflock(up)) == nil) + return; + if(up->trace) + proctrace(up, SYield, 0); + if((n = now - e->t) > 0){ + if(n < e->T) + e->t += e->T; + else + e->t = now + e->T - (n % e->T); + } + e->r = e->t; + e->flags |= Yield; + e->d = now; + if (up->tt == nil){ + n = e->t - now; + if(n < 20) + n = 20; + up->tns = 1000LL * n; + up->tf = releaseintr; + up->tmode = Trelative; + up->ta = up; + up->trend = &up->sleep; + timeradd(up); + }else if(up->tf != releaseintr) + print("edfyield: surprise! %#p\n", up->tf); + edfunlock(); + sleep(&up->sleep, yfn, nil); +} + +int +edfready(Proc *p) +{ + Edf *e; + Sched *sch; + Schedq *rq; + Proc *l, *pp; + long n; + + if((e = edflock(p)) == nil) + return 0; + + if(p->state == Wakeme && p->r){ + iprint("edfready: wakeme\n"); + } + if(e->d - now <= 0){ + /* past deadline, arrange for next release */ + if((e->flags & Sporadic) == 0){ + /* + * Non sporadic processes stay true to their period; + * calculate next release time. + */ + if((n = now - e->t) > 0){ + if(n < e->T) + e->t += e->T; + else + e->t = now + e->T - (n % e->T); + } + } + if(now - e->t < 0){ + /* Next release is in the future, schedule it */ + if(e->tt == nil || e->tf != releaseintr){ + n = e->t - now; + if(n < 20) + n = 20; + e->tns = 1000LL * n; + e->tmode = Trelative; + e->tf = releaseintr; + e->ta = p; + timeradd(e); + DPRINT("%lud edfready %d[%s], release=%lud\n", + now, p->pid, statename[p->state], e->t); + } + if(p->state == Running && (e->flags & (Yield|Yieldonblock)) == 0 && (e->flags & Extratime)){ + /* If we were running, we've overrun our CPU allocation + * or missed the deadline, continue running best-effort at low priority + * Otherwise we were blocked. If we don't yield on block, we continue + * best effort + */ + DPRINT(">"); + p->basepri = PriExtra; + p->fixedpri = 1; + edfunlock(); + return 0; /* Stick on runq[PriExtra] */ + } + DPRINT("%lud edfready %d[%s] wait release at %lud\n", + now, p->pid, statename[p->state], e->t); + p->state = Waitrelease; + edfunlock(); + return 1; /* Make runnable later */ + } + DPRINT("%lud edfready %d %s release now\n", now, p->pid, statename[p->state]); + /* release now */ + release(p); + } + edfunlock(); + DPRINT("^"); + sch = procsched(p); + rq = &sch->runq[PriEdf]; + /* insert in queue in earliest deadline order */ + lock(sch); + l = nil; + for(pp = rq->head; pp; pp = pp->rnext){ + if(pp->edf->d > e->d) + break; + l = pp; + } + p->rnext = pp; + if (l == nil) + rq->head = p; + else + l->rnext = p; + if(pp == nil) + rq->tail = p; + rq->n++; + sch->nrdy++; + sch->runvec |= 1 << PriEdf; + p->priority = PriEdf; + p->readytime = m->ticks; + p->state = Ready; + unlock(sch); + if(p->trace) + proctrace(p, SReady, 0); + return 1; +} + + +static void +testenq(Proc *p) +{ + Proc *xp, **xpp; + Edf *e; + + e = p->edf; + e->testnext = nil; + if (qschedulability == nil) { + qschedulability = p; + return; + } + SET(xp); + for (xpp = &qschedulability; *xpp; xpp = &xp->edf->testnext) { + xp = *xpp; + if (e->testtime - xp->edf->testtime < 0 + || (e->testtime == xp->edf->testtime && e->testtype < xp->edf->testtype)){ + e->testnext = xp; + *xpp = p; + return; + } + } + assert(xp->edf->testnext == nil); + xp->edf->testnext = p; +} + +static char * +testschedulability(Proc *theproc) +{ + Proc *p; + long H, G, Cb, ticks; + int steps, i; + + /* initialize */ + DPRINT("schedulability test %d\n", theproc->pid); + qschedulability = nil; + for(i=0; (p = psincref(i)) != nil; i++) { + if(p->state == Dead){ + psdecref(p); + continue; + } + if ((p->edf == nil || (p->edf->flags & Admitted) == 0) && p != theproc){ + psdecref(p); + continue; + } + p->edf->testtype = Rl; + p->edf->testtime = 0; + DPRINT("\tInit: edfenqueue %d\n", p->pid); + testenq(p); + psdecref(p); + } + H=0; + G=0; + for(steps = 0; steps < Maxsteps; steps++){ + p = qschedulability; + qschedulability = p->edf->testnext; + ticks = p->edf->testtime; + switch (p->edf->testtype){ + case Dl: + H += p->edf->C; + Cb = 0; + DPRINT("\tStep %3d, Ticks %lud, pid %d, deadline, H += %lud → %lud, Cb = %lud\n", + steps, ticks, p->pid, p->edf->C, H, Cb); + if (H+Cb>ticks){ + DPRINT("not schedulable\n"); + return "not schedulable"; + } + p->edf->testtime += p->edf->T - p->edf->D; + p->edf->testtype = Rl; + testenq(p); + break; + case Rl: + DPRINT("\tStep %3d, Ticks %lud, pid %d, release, G %lud, C%lud\n", + steps, ticks, p->pid, p->edf->C, G); + if(ticks && G <= ticks){ + DPRINT("schedulable\n"); + return nil; + } + G += p->edf->C; + p->edf->testtime += p->edf->D; + p->edf->testtype = Dl; + testenq(p); + break; + default: + assert(0); + } + } + DPRINT("probably not schedulable\n"); + return "probably not schedulable"; +} diff -Nru 0/sys/src/nix/port/edf.h 4/sys/src/nix/port/edf.h --- 0/sys/src/nix/port/edf.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/edf.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,58 @@ +enum { + Maxsteps = 200 * 100 * 2, /* 100 periods of 200 procs */ + + /* Edf.flags field */ + Admitted = 0x01, + Sporadic = 0x02, + Yieldonblock = 0x04, + Sendnotes = 0x08, + Deadline = 0x10, + Yield = 0x20, + Extratime = 0x40, + + Infinity = ~0ULL, +}; + +typedef struct Edf Edf; + +struct Edf { + /* All times in µs */ + /* time intervals */ + long D; /* Deadline */ + long Delta; /* Inherited deadline */ + long T; /* period */ + long C; /* Cost */ + long S; /* Slice: time remaining in this period */ + /* times (only low-order bits of absolute time) */ + long r; /* (this) release time */ + long d; /* (this) deadline */ + long t; /* Start of next period, t += T at release */ + long s; /* Time at which this proc was last scheduled */ + /* for schedulability testing */ + long testDelta; + int testtype; /* Release or Deadline */ + long testtime; + Proc *testnext; + /* other */ + ushort flags; + Timer; + /* Stats */ + long edfused; + long extraused; + long aged; + ulong periods; + ulong missed; +}; + +extern Lock edftestlock; /* for atomic admitting/expelling */ + +#pragma varargck type "t" long +#pragma varargck type "U" uvlong + +/* Interface: */ +Edf* edflock(Proc*); +void edfunlock(void); + + +/* sched interface, used only by edf */ +Sched* procsched(Proc*); diff -Nru 0/sys/src/nix/port/error.h 4/sys/src/nix/port/error.h --- 0/sys/src/nix/port/error.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/error.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,58 @@ +extern char Enoerror[]; /* no error */ +extern char Emount[]; /* inconsistent mount */ +extern char Eunmount[]; /* not mounted */ +extern char Eismtpt[]; /* is a mount point */ +extern char Eunion[]; /* not in union */ +extern char Emountrpc[]; /* mount rpc error */ +extern char Eshutdown[]; /* device shut down */ +extern char Enocreate[]; /* mounted directory forbids creation */ +extern char Enonexist[]; /* file does not exist */ +extern char Eexist[]; /* file already exists */ +extern char Ebadsharp[]; /* unknown device in # filename */ +extern char Enotdir[]; /* not a directory */ +extern char Eisdir[]; /* file is a directory */ +extern char Ebadchar[]; /* bad character in file name */ +extern char Efilename[]; /* file name syntax */ +extern char Eperm[]; /* permission denied */ +extern char Ebadusefd[]; /* inappropriate use of fd */ +extern char Ebadarg[]; /* bad arg in system call */ +extern char Einuse[]; /* device or object already in use */ +extern char Eio[]; /* i/o error */ +extern char Etoobig[]; /* read or write too large */ +extern char Etoosmall[]; /* read or write too small */ +extern char Enoport[]; /* network port not available */ +extern char Ehungup[]; /* i/o on hungup channel */ +extern char Ebadctl[]; /* bad process or channel control request */ +extern char Enodev[]; /* no free devices */ +extern char Eprocdied[]; /* process exited */ +extern char Enochild[]; /* no living children */ +extern char Eioload[]; /* i/o error in demand load */ +extern char Enovmem[]; /* virtual memory allocation failed */ +extern char Ebadfd[]; /* fd out of range or not open */ +extern char Enofd[]; /* no free file descriptors */ +extern char Eisstream[]; /* seek on a stream */ +extern char Ebadexec[]; /* exec header invalid */ +extern char Etimedout[]; /* connection timed out */ +extern char Econrefused[]; /* connection refused */ +extern char Econinuse[]; /* connection in use */ +extern char Eintr[]; /* interrupted */ +extern char Enomem[]; /* kernel allocate failed */ +extern char Enoswap[]; /* swap space full */ +extern char Esoverlap[]; /* segments overlap */ +extern char Eshort[]; /* i/o count too small */ +extern char Egreg[]; /* ken has left the building */ +extern char Ebadspec[]; /* bad attach specifier */ +extern char Enoreg[]; /* process has no saved registers */ +extern char Enoattach[]; /* mount/attach disallowed */ +extern char Eshortstat[]; /* stat buffer too small */ +extern char Ebadstat[]; /* malformed stat buffer */ +extern char Enegoff[]; /* negative i/o offset */ +extern char Ecmdargs[]; /* wrong #args in control message */ +extern char Ebadip[]; /* bad ip address syntax */ +extern char Edirseek[]; /* seek in directory */ +extern char Esemtimeout[]; /* timeout to hold the userspace lock */ +extern char Edownint[]; /* down interrupted */ +extern char Esemaltint[]; /* semalt interrupted */ +extern char Ebadargalt[]; /* duplicated sems in the list */ +extern char Etoomanysems[]; /* too many semaphores for alt */ +extern char Esemdead[]; /* dead sem */ diff -Nru 0/sys/src/nix/port/ethermii.c 4/sys/src/nix/port/ethermii.c --- 0/sys/src/nix/port/ethermii.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/ethermii.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,290 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "../port/netif.h" + +#include "ethermii.h" + +static int +miiprobe(Mii* mii, int mask) +{ + MiiPhy *miiphy; + int bit, oui, phyno, r, rmask; + + /* + * Probe through mii for PHYs in mask; + * return the mask of those found in the current probe. + * If the PHY has not already been probed, update + * the Mii information. + */ + rmask = 0; + for(phyno = 0; phyno < NMiiPhy; phyno++){ + bit = 1<mask & bit){ + rmask |= bit; + continue; + } + if(mii->rw(mii, 0, phyno, Bmsr, 0) == -1) + continue; + r = mii->rw(mii, 0, phyno, Phyidr1, 0)<<16; + r |= mii->rw(mii, 0, phyno, Phyidr2, 0); + oui = (r>>10) & 0xffff; + if(oui == 0xffff || oui == 0) + continue; + + if((miiphy = malloc(sizeof(MiiPhy))) == nil) + continue; + + miiphy->mii = mii; + miiphy->phyno = phyno; + miiphy->phyid = r; + miiphy->oui = oui; + + miiphy->anar = ~0; + miiphy->fc = ~0; + miiphy->mscr = ~0; + + mii->phy[phyno] = miiphy; + if(mii->curphy == nil) + mii->curphy = miiphy; + mii->mask |= bit; + mii->nphy++; + + rmask |= bit; + } + return rmask; +} + +int +miimir(Mii* mii, int r) +{ + if(mii == nil || mii->ctlr == nil || mii->curphy == nil) + return -1; + return mii->rw(mii, 0, mii->curphy->phyno, r, 0); +} + +int +miimiw(Mii* mii, int r, int data) +{ + if(mii == nil || mii->ctlr == nil || mii->curphy == nil) + return -1; + return mii->rw(mii, 1, mii->curphy->phyno, r, data); +} + +int +miireset(Mii* mii) +{ + int bmcr, timeo; + + if(mii == nil || mii->ctlr == nil || mii->curphy == nil) + return -1; + bmcr = mii->rw(mii, 0, mii->curphy->phyno, Bmcr, 0); + mii->rw(mii, 1, mii->curphy->phyno, Bmcr, BmcrR|bmcr); + for(timeo = 0; timeo < 1000; timeo++){ + bmcr = mii->rw(mii, 0, mii->curphy->phyno, Bmcr, 0); + if(!(bmcr & BmcrR)) + break; + microdelay(1); + } + if(bmcr & BmcrR) + return -1; + if(bmcr & BmcrI) + mii->rw(mii, 1, mii->curphy->phyno, Bmcr, bmcr & ~BmcrI); + return 0; +} + +int +miiane(Mii* mii, int a, int p, int e) +{ + int anar, bmsr, mscr, r, phyno; + + if(mii == nil || mii->ctlr == nil || mii->curphy == nil) + return -1; + phyno = mii->curphy->phyno; + + mii->rw(mii, 1, phyno, Bmsr, 0); + bmsr = mii->rw(mii, 0, phyno, Bmsr, 0); + if(!(bmsr & BmsrAna)) + return -1; + + if(a != ~0) + anar = (AnaTXFD|AnaTXHD|Ana10FD|Ana10HD) & a; + else if(mii->curphy->anar != ~0) + anar = mii->curphy->anar; + else{ + anar = mii->rw(mii, 0, phyno, Anar, 0); + anar &= ~(AnaAP|AnaP|AnaT4|AnaTXFD|AnaTXHD|Ana10FD|Ana10HD); + if(bmsr & Bmsr10THD) + anar |= Ana10HD; + if(bmsr & Bmsr10TFD) + anar |= Ana10FD; + if(bmsr & Bmsr100TXHD) + anar |= AnaTXHD; + if(bmsr & Bmsr100TXFD) + anar |= AnaTXFD; + } + mii->curphy->anar = anar; + + if(p != ~0) + anar |= (AnaAP|AnaP) & p; + else if(mii->curphy->fc != ~0) + anar |= mii->curphy->fc; + mii->curphy->fc = (AnaAP|AnaP) & anar; + + if(bmsr & BmsrEs){ + mscr = mii->rw(mii, 0, phyno, Mscr, 0); + mscr &= ~(Mscr1000TFD|Mscr1000THD); + if(e != ~0) + mscr |= (Mscr1000TFD|Mscr1000THD) & e; + else if(mii->curphy->mscr != ~0) + mscr = mii->curphy->mscr; + else{ + r = mii->rw(mii, 0, phyno, Esr, 0); + if(r & Esr1000THD) + mscr |= Mscr1000THD; + if(r & Esr1000TFD) + mscr |= Mscr1000TFD; + } + mii->curphy->mscr = mscr; + mii->rw(mii, 1, phyno, Mscr, mscr); + } + else + mii->curphy->mscr = 0; + mii->rw(mii, 1, phyno, Anar, anar); + + r = mii->rw(mii, 0, phyno, Bmcr, 0); + if(!(r & BmcrR)){ + r |= BmcrAne|BmcrRan; + mii->rw(mii, 1, phyno, Bmcr, r); + } + + return 0; +} + +int +miistatus(Mii* mii) +{ + MiiPhy *phy; + int anlpar, bmsr, p, r, phyno; + + if(mii == nil || mii->ctlr == nil || mii->curphy == nil) + return -1; + phy = mii->curphy; + phyno = phy->phyno; + + /* + * Check Auto-Negotiation is complete and link is up. + * (Read status twice as the Ls bit is sticky). + */ + bmsr = mii->rw(mii, 0, phyno, Bmsr, 0); + if(!(bmsr & (BmsrAnc|BmsrAna))) + return -1; + + bmsr = mii->rw(mii, 0, phyno, Bmsr, 0); + if(!(bmsr & BmsrLs)){ + phy->link = 0; + return -1; + } + + phy->speed = phy->fd = phy->rfc = phy->tfc = 0; + if(phy->mscr){ + r = mii->rw(mii, 0, phyno, Mssr, 0); + if((phy->mscr & Mscr1000TFD) && (r & Mssr1000TFD)){ + phy->speed = 1000; + phy->fd = 1; + } + else if((phy->mscr & Mscr1000THD) && (r & Mssr1000THD)) + phy->speed = 1000; + } + + anlpar = mii->rw(mii, 0, phyno, Anlpar, 0); + if(phy->speed == 0){ + r = phy->anar & anlpar; + if(r & AnaTXFD){ + phy->speed = 100; + phy->fd = 1; + } + else if(r & AnaTXHD) + phy->speed = 100; + else if(r & Ana10FD){ + phy->speed = 10; + phy->fd = 1; + } + else if(r & Ana10HD) + phy->speed = 10; + } + if(phy->speed == 0) + return -1; + + if(phy->fd){ + p = phy->fc; + r = anlpar & (AnaAP|AnaP); + if(p == AnaAP && r == (AnaAP|AnaP)) + phy->tfc = 1; + else if(p == (AnaAP|AnaP) && r == AnaAP) + phy->rfc = 1; + else if((p & AnaP) && (r & AnaP)) + phy->rfc = phy->tfc = 1; + } + + phy->link = 1; + + return 0; +} + +char* +miidumpphy(Mii* mii, char* p, char* e) +{ + int i, r; + + if(mii == nil || mii->curphy == nil) + return p; + + p = seprint(p, e, "phy: "); + for(i = 0; i < NMiiPhyr; i++){ + if(i && ((i & 0x07) == 0)) + p = seprint(p, e, "\n "); + r = mii->rw(mii, 0, mii->curphy->phyno, i, 0); + p = seprint(p, e, " %4.4ux", r); + } + p = seprint(p, e, "\n"); + + return p; +} + +void +miidetach(Mii* mii) +{ + int i; + + for(i = 0; i < NMiiPhy; i++){ + if(mii->phy[i] == nil) + continue; + free(mii); + mii->phy[i] = nil; + } + free(mii); +} + +Mii* +miiattach(void* ctlr, int mask, int (*rw)(Mii*, int, int, int, int)) +{ + Mii* mii; + + if((mii = malloc(sizeof(Mii))) == nil) + return nil; + mii->ctlr = ctlr; + mii->rw = rw; + + if(miiprobe(mii, mask) == 0){ + free(mii); + mii = nil; + } + + return mii; +} diff -Nru 0/sys/src/nix/port/ethermii.h 4/sys/src/nix/port/ethermii.h --- 0/sys/src/nix/port/ethermii.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/ethermii.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,120 @@ +typedef struct Mii Mii; +typedef struct MiiPhy MiiPhy; + +enum { /* registers */ + Bmcr = 0x00, /* Basic Mode Control */ + Bmsr = 0x01, /* Basic Mode Status */ + Phyidr1 = 0x02, /* PHY Identifier #1 */ + Phyidr2 = 0x03, /* PHY Identifier #2 */ + Anar = 0x04, /* Auto-Negotiation Advertisement */ + Anlpar = 0x05, /* AN Link Partner Ability */ + Aner = 0x06, /* AN Expansion */ + Annptr = 0x07, /* AN Next Page TX */ + Annprr = 0x08, /* AN Next Page RX */ + Mscr = 0x09, /* MASTER-SLAVE Control */ + Mssr = 0x0a, /* MASTER-SLAVE Status */ + Esr = 0x0f, /* Extended Status */ + + NMiiPhyr = 32, + NMiiPhy = 32, +}; + +enum { /* Bmcr */ + BmcrSs1 = 0x0040, /* Speed Select[1] */ + BmcrCte = 0x0080, /* Collision Test Enable */ + BmcrDm = 0x0100, /* Duplex Mode */ + BmcrRan = 0x0200, /* Restart Auto-Negotiation */ + BmcrI = 0x0400, /* Isolate */ + BmcrPd = 0x0800, /* Power Down */ + BmcrAne = 0x1000, /* Auto-Negotiation Enable */ + BmcrSs0 = 0x2000, /* Speed Select[0] */ + BmcrLe = 0x4000, /* Loopback Enable */ + BmcrR = 0x8000, /* Reset */ +}; + +enum { /* Bmsr */ + BmsrEc = 0x0001, /* Extended Capability */ + BmsrJd = 0x0002, /* Jabber Detect */ + BmsrLs = 0x0004, /* Link Status */ + BmsrAna = 0x0008, /* Auto-Negotiation Ability */ + BmsrRf = 0x0010, /* Remote Fault */ + BmsrAnc = 0x0020, /* Auto-Negotiation Complete */ + BmsrPs = 0x0040, /* Preamble Suppression Capable */ + BmsrEs = 0x0100, /* Extended Status */ + Bmsr100T2HD = 0x0200, /* 100BASE-T2 HD Capable */ + Bmsr100T2FD = 0x0400, /* 100BASE-T2 FD Capable */ + Bmsr10THD = 0x0800, /* 10BASE-T HD Capable */ + Bmsr10TFD = 0x1000, /* 10BASE-T FD Capable */ + Bmsr100TXHD = 0x2000, /* 100BASE-TX HD Capable */ + Bmsr100TXFD = 0x4000, /* 100BASE-TX FD Capable */ + Bmsr100T4 = 0x8000, /* 100BASE-T4 Capable */ +}; + +enum { /* Anar/Anlpar */ + Ana10G = 0x0001, + + Ana10HD = 0x0020, /* Advertise 10BASE-T */ + Ana10FD = 0x0040, /* Advertise 10BASE-T FD */ + AnaTXHD = 0x0080, /* Advertise 100BASE-TX */ + AnaTXFD = 0x0100, /* Advertise 100BASE-TX FD */ + AnaT4 = 0x0200, /* Advertise 100BASE-T4 */ + AnaP = 0x0400, /* Pause */ + AnaAP = 0x0800, /* Asymmetrical Pause */ + AnaRf = 0x2000, /* Remote Fault */ + AnaAck = 0x4000, /* Acknowledge */ + AnaNp = 0x8000, /* Next Page Indication */ +}; + +enum { /* Mscr */ + Mscr1000THD = 0x0100, /* Advertise 1000BASE-T HD */ + Mscr1000TFD = 0x0200, /* Advertise 1000BASE-T FD */ +}; + +enum { /* Mssr */ + Mssr1000THD = 0x0400, /* Link Partner 1000BASE-T HD able */ + Mssr1000TFD = 0x0800, /* Link Partner 1000BASE-T FD able */ +}; + +enum { /* Esr */ + Esr1000THD = 0x1000, /* 1000BASE-T HD Capable */ + Esr1000TFD = 0x2000, /* 1000BASE-T FD Capable */ + Esr1000XHD = 0x4000, /* 1000BASE-X HD Capable */ + Esr1000XFD = 0x8000, /* 1000BASE-X FD Capable */ +}; + +typedef struct Mii { + Lock; + int nphy; + int mask; + MiiPhy* phy[NMiiPhy]; + MiiPhy* curphy; + + void* ctlr; + int (*rw)(Mii*, int, int, int, int); +} Mii; + +typedef struct MiiPhy { + Mii* mii; + int phyno; + int phyid; + int oui; + + int anar; + int fc; + int mscr; + + int link; + int speed; + int fd; + int rfc; + int tfc; +}; + +extern int miiane(Mii*, int, int, int); +extern Mii* miiattach(void*, int, int (*)(Mii*, int, int, int, int)); +extern void miidetach(Mii* mii); +extern char* miidumpphy(Mii*, char*, char*); +extern int miimir(Mii*, int); +extern int miimiw(Mii*, int, int); +extern int miireset(Mii*); +extern int miistatus(Mii*); diff -Nru 0/sys/src/nix/port/fault.c 4/sys/src/nix/port/fault.c --- 0/sys/src/nix/port/fault.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/fault.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,368 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +/* + * Fault calls fixfault which ends up calling newpage, which + * might fail to allocate a page for the right color. So, we + * might enter a loop and retry forever. + * We first try with the desired color, and then with any + * other one, if we failed for some time. + */ +int +fault(uintptr addr, int read) +{ + Segment *s; + char *sps; + int i, color; + +if(up->nlocks) print("fault nlocks %d\n", up->nlocks); + + sps = up->psstate; + up->psstate = "Fault"; + spllo(); + + m->pfault++; + for(i = 0;; i++) { + s = seg(up, addr, 1); /* leaves s->lk qlocked if seg != nil */ + if(s == 0) { + up->psstate = sps; + return -1; + } + + if(!read && (s->type&SG_RONLY)) { + qunlock(&s->lk); + up->psstate = sps; + return -1; + } + + color = s->color; + if(i > 3) + color = -1; + if(fixfault(s, addr, read, 1, color) == 0) + break; + + /* + * See the comment in newpage that describes + * how to get here. + */ + + if(i > 0 && (i%1000) == 0) + print("fault: tried %d times\n", i); + } + + up->psstate = sps; + return 0; +} + +static void +faulterror(char *s, Chan *c, int freemem) +{ + char buf[ERRMAX]; + + if(c && c->path){ + snprint(buf, sizeof buf, "%s accessing %s: %s", s, c->path->s, up->errstr); + s = buf; + } + if(up->nerrlab) { + postnote(up, 1, s, NDebug); + error(s); + } + pexit(s, freemem); +} + + +int +fixfault(Segment *s, uintptr addr, int read, int dommuput, int color) +{ + int type; + int ref; + Pte **p, *etp; + uintptr soff; + uintmem pgsz; + uint mmuattr; + Page **pg, *lkp, *new; + Page *(*fn)(Segment*, uintptr); + + pgsz = m->pgsz[s->pgszi]; + addr &= ~(pgsz-1); + soff = addr-s->base; + p = &s->map[soff/PTEMAPMEM]; + if(*p == 0) + *p = ptealloc(s); + + etp = *p; + pg = &etp->pages[(soff&(PTEMAPMEM-1))/pgsz]; + type = s->type&SG_TYPE; + + if(pg < etp->first) + etp->first = pg; + if(pg > etp->last) + etp->last = pg; + + mmuattr = 0; + switch(type) { + default: + panic("fault"); + break; + + case SG_TEXT: /* Demand load */ + if(pagedout(*pg)) + pio(s, addr, soff, pg, color); + + mmuattr = PTERONLY|PTEVALID; + (*pg)->modref = PG_REF; + break; + + case SG_BSS: + case SG_SHARED: /* Zero fill on demand */ + case SG_STACK: + if(*pg == 0) { + new = newpage(1, &s, addr, pgsz, color); + if(s == 0) + return -1; + + *pg = new; + } + goto common; + + case SG_DATA: + common: /* Demand load/pagein/copy on write */ + if(pagedout(*pg)) + pio(s, addr, soff, pg, color); + + /* + * It's only possible to copy on write if + * we're the only user of the segment. + */ + if(read && conf.copymode == 0 && s->ref == 1) { + mmuattr = PTERONLY|PTEVALID; + (*pg)->modref |= PG_REF; + break; + } + + lkp = *pg; + lock(lkp); + + ref = lkp->ref; + if(ref > 1) { + unlock(lkp); + + new = newpage(0, &s, addr, pgsz, color); + if(s == 0) + return -1; + *pg = new; + copypage(lkp, *pg); + putpage(lkp); + } + else { + /* save a copy of the original for the image cache */ + if(lkp->image != nil) + duppage(lkp); + + unlock(lkp); + } + mmuattr = PTEWRITE|PTEVALID; + (*pg)->modref = PG_MOD|PG_REF; + break; + + case SG_PHYSICAL: + if(*pg == 0) { + fn = s->pseg->pgalloc; + if(fn) + *pg = (*fn)(s, addr); + else { + new = smalloc(sizeof(Page)); + new->va = addr; + new->pa = s->pseg->pa+(addr-s->base); + new->ref = 1; + new->pgszi = s->pseg->pgszi; + *pg = new; + } + } + + mmuattr = PTEVALID; + if((s->pseg->attr & SG_RONLY) == 0) + mmuattr |= PTEWRITE; + if((s->pseg->attr & SG_CACHED) == 0) + mmuattr |= PTEUNCACHED; + (*pg)->modref = PG_MOD|PG_REF; + break; + } + qunlock(&s->lk); + + if(dommuput){ + assert(segppn(s, (*pg)->pa) == (*pg)->pa); + mmuput(addr, *pg, mmuattr); + } + return 0; +} + +void +pio(Segment *s, uintptr addr, ulong soff, Page **p, int color) +{ + Page *new; + KMap *k; + Chan *c; + int n, ask; + uintmem pgsz; + char *kaddr; + ulong daddr; + Page *loadrec; + + loadrec = *p; + daddr = ask = 0; + c = nil; + pgsz = m->pgsz[s->pgszi]; + if(loadrec == nil) { /* from a text/data image */ + daddr = s->fstart+soff; + new = lookpage(s->image, daddr); + if(new != nil) { + *p = new; + return; + } + + c = s->image->c; + ask = s->flen-soff; + if(ask > pgsz) + ask = pgsz; + } + else + panic("no swap"); + + qunlock(&s->lk); + + new = newpage(0, 0, addr, pgsz, color); + k = kmap(new); + kaddr = (char*)VA(k); + + while(waserror()) { + if(strcmp(up->errstr, Eintr) == 0) + continue; + kunmap(k); + putpage(new); + faulterror(Eioload, c, 0); + } + + n = c->dev->read(c, kaddr, ask, daddr); + if(n != ask) + faulterror(Eioload, c, 0); + if(ask < pgsz) + memset(kaddr+ask, 0, pgsz-ask); + + poperror(); + kunmap(k); + + qlock(&s->lk); + if(loadrec == nil) { /* This is demand load */ + /* + * race, another proc may have gotten here first while + * s->lk was unlocked + */ + if(*p == nil) { + new->daddr = daddr; + cachepage(new, s->image); + *p = new; + } + else + putpage(new); + } + else + panic("no swap"); + + if(s->flushme) + memset((*p)->cachectl, PG_TXTFLUSH, sizeof((*p)->cachectl)); +} + +/* + * Called only in a system call + */ +int +okaddr(uintptr addr, long len, int write) +{ + Segment *s; + + if(len >= 0) { + for(;;) { + s = seg(up, addr, 0); + if(s == 0 || (write && (s->type&SG_RONLY))) + break; + + if(addr+len > s->top) { + len -= s->top - addr; + addr = s->top; + continue; + } + return 1; + } + } + return 0; +} + +void* +validaddr(void* addr, long len, int write) +{ + if(!okaddr(PTR2UINT(addr), len, write)){ + pprint("suicide: invalid address %#p/%ld in sys call pc=%#p\n", + addr, len, userpc(nil)); + pexit("Suicide", 0); + } + + return UINT2PTR(addr); +} + +/* + * &s[0] is known to be a valid address. + * Assume 2M pages, so it works for both 2M and 1G pages. + * Note this won't work for 4*KiB pages! + */ +void* +vmemchr(void *s, int c, int n) +{ + int m; + uintptr a; + void *t; + + a = PTR2UINT(s); + while(ROUNDUP(a, BIGPGSZ) != ROUNDUP(a+n-1, BIGPGSZ)){ + /* spans pages; handle this page */ + m = BIGPGSZ - (a & (BIGPGSZ-1)); + t = memchr(UINT2PTR(a), c, m); + if(t) + return t; + a += m; + n -= m; + if((a & KZERO) != KZERO) + validaddr(UINT2PTR(a), 1, 0); + } + + /* fits in one page */ + return memchr(UINT2PTR(a), c, n); +} + +Segment* +seg(Proc *p, uintptr addr, int dolock) +{ + Segment **s, **et, *n; + + et = &p->seg[NSEG]; + for(s = p->seg; s < et; s++) { + n = *s; + if(n == 0) + continue; + if(addr >= n->base && addr < n->top) { + if(dolock == 0) + return n; + + qlock(&n->lk); + if(addr >= n->base && addr < n->top) + return n; + qunlock(&n->lk); + } + } + + return 0; +} diff -Nru 0/sys/src/nix/port/image.c 4/sys/src/nix/port/image.c --- 0/sys/src/nix/port/image.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/image.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,320 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#define NFREECHAN 64 +#define IHASHSIZE 64 +#define ihash(s) imagealloc.hash[s%IHASHSIZE] + +static struct Imagealloc +{ + Lock; + Image *mru; /* head of LRU list */ + Image *lru; /* tail of LRU list */ + Image *hash[IHASHSIZE]; + QLock ireclaim; /* mutex on reclaiming free images */ + + Chan **freechan; /* free image channels */ + int nfreechan; /* number of free channels */ + int szfreechan; /* size of freechan array */ + QLock fcreclaim; /* mutex on reclaiming free channels */ +} imagealloc; + +static struct { + int calls; /* times imagereclaim was called */ + int loops; /* times the main loop was run */ + uvlong ticks; /* total time in the main loop */ + uvlong maxt; /* longest time in main loop */ + int noluck; /* # of times we couldn't get one */ + int nolock; /* # of times we couldn't get the lock */ +} irstats; + +static void +dumplru(void) +{ + Image *i; + + print("lru:"); + for(i = imagealloc.mru; i != nil; i = i->next) + print(" %p(c%p,r%d)", i, i->c, i->ref); + print("\n"); +} + +/* + * imagealloc and i must be locked. + */ +static void +imageunused(Image *i) +{ + if(i->prev != nil) + i->prev->next = i->next; + else + imagealloc.mru = i->next; + if(i->next != nil) + i->next->prev = i->prev; + else + imagealloc.lru = i->prev; + i->next = i->prev = nil; +} + +/* + * imagealloc and i must be locked. + */ +static void +imageused(Image *i) +{ + imageunused(i); + i->next = imagealloc.mru; + i->next->prev = i; + imagealloc.mru = i; + if(imagealloc.lru == nil) + imagealloc.lru = i; +} + +/* + * imagealloc must be locked. + */ +static Image* +lruimage(void) +{ + Image *i; + + for(i = imagealloc.lru; i != nil; i = i->prev) + if(i->c == nil){ + /* + * i->c will be set before releasing the + * lock on imagealloc, which means it's in use. + */ + return i; + } + return nil; +} + +/* + * On clu, set conf.nimages = 10 to exercise reclaiming. + * It won't be able to get through all of cpurc, but will reclaim. + */ +void +initimage(void) +{ + Image *i, *ie; + + DBG("initimage: %uld images\n", conf.nimage); + imagealloc.mru = malloc(conf.nimage*sizeof(Image)); + if(imagealloc.mru == nil) + panic("imagealloc: no memory"); + ie = &imagealloc.mru[conf.nimage]; + for(i = imagealloc.mru; i < ie; i++){ + i->c = nil; + i->ref = 0; + i->prev = i-1; + i->next = i+1; + } + imagealloc.mru[0].prev = nil; + imagealloc.mru[conf.nimage-1].next = nil; + imagealloc.lru = &imagealloc.mru[conf.nimage-1]; + imagealloc.freechan = malloc(NFREECHAN * sizeof(Chan*)); + imagealloc.szfreechan = NFREECHAN; + +} + +static void +imagereclaim(void) +{ + Image *i; + uvlong ticks0, ticks; + + irstats.calls++; + /* Somebody is already cleaning the page cache */ + if(!canqlock(&imagealloc.ireclaim)) + return; + DBG("imagereclaim maxt %ulld noluck %d nolock %d\n", + irstats.maxt, irstats.noluck, irstats.nolock); + ticks0 = fastticks(nil); + if(!canlock(&imagealloc)){ + /* never happen in the experiments I made */ + qunlock(&imagealloc.ireclaim); + return; + } + + for(i = imagealloc.lru; i != nil; i = i->prev){ + if(canlock(i)){ + i->ref++; /* make sure it does not go away */ + unlock(i); + pagereclaim(i); + lock(i); + DBG("imagereclaim: image %p(c%p, r%d)\n", i, i->c, i->ref); + if(i->ref == 1){ /* no pages referring to it, it's ours */ + unlock(i); + unlock(&imagealloc); + putimage(i); + break; + }else + --i->ref; + unlock(i); + } + } + + if(i == nil){ + irstats.noluck++; + unlock(&imagealloc); + } + irstats.loops++; + ticks = fastticks(nil) - ticks0; + irstats.ticks += ticks; + if(ticks > irstats.maxt) + irstats.maxt = ticks; + //print("T%llud+", ticks); + qunlock(&imagealloc.ireclaim); +} + +/* + * since close can block, this has to be called outside of + * spin locks. + */ +static void +imagechanreclaim(void) +{ + Chan *c; + + /* Somebody is already cleaning the image chans */ + if(!canqlock(&imagealloc.fcreclaim)) + return; + + /* + * We don't have to recheck that nfreechan > 0 after we + * acquire the lock, because we're the only ones who decrement + * it (the other lock contender increments it), and there's only + * one of us thanks to the qlock above. + */ + while(imagealloc.nfreechan > 0){ + lock(&imagealloc); + imagealloc.nfreechan--; + c = imagealloc.freechan[imagealloc.nfreechan]; + unlock(&imagealloc); + cclose(c); + } + + qunlock(&imagealloc.fcreclaim); +} + +Image* +attachimage(int type, Chan *c, int color, uintptr base, usize len) +{ + Image *i, **l; + + /* reclaim any free channels from reclaimed segments */ + if(imagealloc.nfreechan) + imagechanreclaim(); + + lock(&imagealloc); + + /* + * Search the image cache for remains of the text from a previous + * or currently running incarnation + */ + for(i = ihash(c->qid.path); i; i = i->hash) { + if(c->qid.path == i->qid.path) { + lock(i); + if(eqqid(c->qid, i->qid) && + eqqid(c->mqid, i->mqid) && + c->mchan == i->mchan && + c->dev->dc == i->dc) { +//subtype + goto found; + } + unlock(i); + } + } + + /* + * imagereclaim dumps pages from the free list which are cached by image + * structures. This should free some image structures. + */ + while(!(i = lruimage())) { + unlock(&imagealloc); + imagereclaim(); + sched(); + lock(&imagealloc); + } + + lock(i); + incref(c); + i->c = c; + i->dc = c->dev->dc; +//subtype + i->qid = c->qid; + i->mqid = c->mqid; + i->mchan = c->mchan; + i->color = color; + l = &ihash(c->qid.path); + i->hash = *l; + *l = i; +found: + imageused(i); + unlock(&imagealloc); + + if(i->s == 0) { + /* Disaster after commit in exec */ + if(waserror()) { + unlock(i); + pexit(Enovmem, 1); + } + i->s = newseg(type, base, len); + i->s->image = i; + i->s->color = color; + i->ref++; + poperror(); + } + else + incref(i->s); + + return i; +} + +void +putimage(Image *i) +{ + Chan *c, **cp; + Image *f, **l; + + if(i->notext) + return; + + lock(i); + if(--i->ref == 0) { + l = &ihash(i->qid.path); + mkqid(&i->qid, ~0, ~0, QTFILE); + unlock(i); + c = i->c; + + lock(&imagealloc); + for(f = *l; f; f = f->hash) { + if(f == i) { + *l = i->hash; + break; + } + l = &f->hash; + } + + /* defer freeing channel till we're out of spin lock's */ + if(imagealloc.nfreechan == imagealloc.szfreechan){ + imagealloc.szfreechan += NFREECHAN; + cp = malloc(imagealloc.szfreechan*sizeof(Chan*)); + if(cp == nil) + panic("putimage"); + memmove(cp, imagealloc.freechan, imagealloc.nfreechan*sizeof(Chan*)); + free(imagealloc.freechan); + imagealloc.freechan = cp; + } + imagealloc.freechan[imagealloc.nfreechan++] = c; + i->c = nil; /* flag as unused in lru list */ + unlock(&imagealloc); + + return; + } + unlock(i); +} diff -Nru 0/sys/src/nix/port/initcode.c 4/sys/src/nix/port/initcode.c --- 0/sys/src/nix/port/initcode.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/initcode.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,42 @@ +/* + * IMPORTANT! DO NOT ADD LIBRARY CALLS TO THIS FILE. + * The entire text image must fit on one page + * (and there's no data segment, so any read/write data must be on the stack). + */ + +#include +#include + +char cons[] = "#c/cons"; +char boot[] = "/boot/boot"; +char dev[] = "/dev"; +char c[] = "#c"; +char e[] = "#e"; +char ec[] = "#ec"; +char s[] = "#s"; +char srv[] = "/srv"; +char env[] = "/env"; + +void +startboot(char *argv0, char **argv) +{ + char buf[200]; + + USED(argv0); + /* + * open the console here so that /boot/boot, + * which could be a shell script, can inherit the open fds. + */ + open(cons, OREAD); + open(cons, OWRITE); + open(cons, OWRITE); + bind(c, dev, MAFTER); + bind(ec, env, MAFTER); + bind(e, env, MCREATE|MAFTER); + bind(s, srv, MREPL|MCREATE); + exec(boot, argv); + + rerrstr(buf, sizeof buf); + buf[sizeof buf - 1] = '\0'; + _exits(buf); +} diff -Nru 0/sys/src/nix/port/kexec.c 4/sys/src/nix/port/kexec.c --- 0/sys/src/nix/port/kexec.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/kexec.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,454 @@ +#include "u.h" +#include "tos.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/edf.h" +#include +#include "kexec.h" + + +/* XXX: MOVE ME TO K10 */ + +enum { + Maxslot = 32, +}; + +static uvlong +vl2be(uvlong v) +{ + uchar *p; + + p = (uchar*)&v; + return ((uvlong)((p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3])<<32) + |((uvlong)(p[4]<<24)|(p[5]<<16)|(p[6]<<8)|p[7]); +} + +static ulong +l2be(long l) +{ + uchar *cp; + + cp = (uchar*)&l; + return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3]; +} + +typedef struct { + Exec; + uvlong hdr[1]; +} Khdr; + +enum { + AsmNONE = 0, + AsmMEMORY = 1, + AsmRESERVED = 2, + AsmACPIRECLAIM = 3, + AsmACPINVS = 4, + + AsmDEV = 5, +}; + +Proc* +setupseg(int core) +{ + Segment *s; + uintptr ka; + Proc *p; + static Pgrp *kpgrp; + + // XXX: we're going to need this for locality domains. + USED(core); + + p = newproc(); + p->psstate = 0; + p->procmode = 0640; + p->kp = 1; + p->noswap = 1; + + p->scallnr = up->scallnr; + memmove(p->arg, up->arg, sizeof(up->arg)); + p->nerrlab = 0; + p->slash = up->slash; + p->dot = up->dot; + if(p->dot) + incref(p->dot); + + memmove(p->note, up->note, sizeof(p->note)); + p->nnote = up->nnote; + p->notified = 0; + p->lastnote = up->lastnote; + p->notify = up->notify; + p->ureg = 0; + p->dbgreg = 0; + + kstrdup(&p->user, eve); + if(kpgrp == 0) + kpgrp = newpgrp(); + p->pgrp = kpgrp; + incref(kpgrp); + + memset(p->time, 0, sizeof(p->time)); + p->time[TReal] = sys->ticks; + + procpriority(p, PriKproc, 0); + + + // XXX: kluge 4 pages of address space for this. + // how will it expand up? gives us <50 kprocs as is. + + /* + * we create the color and core at allocation time, not execution. This + * is probably not the best idea but it's a start. + */ + + // XXX: now that we are asmalloc we are no long proc. + + ka = (uintptr)KADDR(asmalloc(0, BIGPGSZ, AsmMEMORY, 1)); + s = newseg(SG_TEXT|SG_RONLY, ka, 1); + p->seg[TSEG] = s; +// s->color = acpicorecolor(core); + + /* Data. Shared. */ + // XXX; Now that the address space is all funky how are we going to handle shared data segments? + ka = (uintptr)KADDR(asmalloc(0, BIGPGSZ, AsmMEMORY, 2)); + s = newseg(SG_DATA, ka, 1); + p->seg[DSEG] = s; + s->color = p->seg[TSEG]->color; + + /* BSS. Uses asm from data map. */ + p->seg[BSEG] = newseg(SG_BSS, ka+BIGPGSZ, 1); + p->seg[BSEG]->color= up->seg[TSEG]->color; + + /* Stack */ + ka = (uintptr)KADDR(asmalloc(0, BIGPGSZ, AsmMEMORY, 1)); + p->seg[SSEG] = newseg(SG_STACK, ka, 1); + nixprepage(-1); + + return p; +} + +void +kforkexecac(Proc *p, int core, char *ufile, char **argv) +{ + Khdr hdr; + Tos *tos; + Chan *chan; + int argc, i, n; + char *a, *elem, *file, *args; + long hdrsz, magic, textsz, datasz, bsssz; + uintptr textlim, datalim, bsslim, entry, tbase, tsize, dbase, dsize, bbase, bsize, sbase, ssize, stack; + Mach *mp; + static Pgrp *kpgrp; + + DBG("kexec on core %d\n", core); + // XXX: since this is kernel code we can't do attachimage, + // we should be reading the file into kernel memory. + // this only matters if we are using ufile. + // YYY: look at dev reboot for help. + + file = nil; + elem = nil; + chan = nil; + mp = nil; + + USED(chan); + + if(waserror()){ + DBG("kforkexecac: failing: %s\n", up->errstr); + if(file) + free(file); + if(elem) + free(elem); + if(chan) + cclose(chan); + if(core > 0 && mp != nil) + mp->proc = nil; + if(core != 0) + p->ac = nil; + nexterror(); + } + + if(core != 0) + p->ac = getac(p, core); + + argc = 0; + if(ufile != nil){ + panic("ufile not implemented yet"); + file = validnamedup(ufile, 1); + DBG("kforkexecac: up %#p file %s\n", up, file); + chan = namec(file, Aopen, OEXEC, 0); + kstrdup(&elem, up->genbuf); + + hdrsz = chan->dev->read(chan, &hdr, sizeof(Khdr), 0); + DBG("wrote ufile\n"); + + if(hdrsz < 2) + error(Ebadexec); + }else{ + /* somebody already wrote in our text segment */ + hdr = *(Khdr*)p->seg[TSEG]->base; + hdrsz = sizeof(Khdr); + } + +// p = (char*)&hdr; + magic = l2be(hdr.magic); + DBG("badexec3\n"); + + if(hdrsz != sizeof(Khdr) || magic != AOUT_MAGIC) + error(Ebadexec); + if(magic & HDR_MAGIC){ + entry = vl2be(hdr.hdr[0]); + hdrsz = sizeof(Khdr); + } + else{ + entry = l2be(hdr.entry); + hdrsz = sizeof(Exec); + } + + textsz = l2be(hdr.text); + datasz = l2be(hdr.data); + bsssz = l2be(hdr.bss); + + tbase = p->seg[TSEG]->base; + tsize = tbase - p->seg[TSEG]->top; + dbase = p->seg[DSEG]->base; + dsize = dbase - p->seg[DSEG]->top; + bbase = p->seg[BSEG]->base; + bsize = bbase - p->seg[BSEG]->top; + sbase = p->seg[SSEG]->base; + ssize = sbase - p->seg[SSEG]->top; + + // XXX: we are no longer contiguous. + textlim = ROUNDUP(hdrsz+textsz, BIGPGSZ); + // XXX: we are going to be at least two pages here. + datalim = BIGPGROUND(datasz); + bsslim = BIGPGROUND(datalim+bsssz); + + // XXX: this is pretty fragile + memmove((void*)dbase, (void*)(entry+textsz), datasz); + DBG("writing data dbase %#p tbase %#p textsz %ld datasz %ld\n", dbase, tbase, textsz, datasz); +// memmove((void*)dbase, (void*)"testing data", 13); + /* + * Check the binary header for consistency, + * e.g. the entry point is within the text segment and + * the segments don't overlap each other. + */ + // XXX: max instruction size on amd64 is 15 bytes provide a check for consistency. + DBG("kexec: entry %#p tbase %#p hdrsz %ld textsz %ld\n", entry, tbase, hdrsz, textsz); + if(entry < tbase+hdrsz || entry >= tbase+hdrsz+textsz) + error(Ebadexec); + // XXX: what about the kernel stack we are making here? + DBG("kexec: testing if sizes overflow limits\n"); + if(textsz >= textlim || datasz > datalim || bsssz > bsslim) + error(Ebadexec); + + DBG("kexec: do the top of the segments overflow limits?\n"); + if(textlim >= tbase+tsize || datalim >= dbase+dsize || bsslim >= bbase+bsize) + error(Ebadexec); + + DBG("kexec: is bss below data?\n"); + if(bsslim < datalim) + error(Ebadexec); + /* + Interesting thought, the previously allocated segments for + data and text are shared and constant. The BSS and the stack + are not. What you really want is the ability to make an + executable text and data and then create child executables on + top of that. This will lower external fragmentation and allow + a bunch of communicating shared memory processes (ie. go) in + kernel space. + + Fundamentally this means that the allocation of the text and + the data should be separate from the bss and the stack. This + will require that you change the linkers as well to allow the + separation of data and bss sections. + */ + + /* + * Stack is a pointer into the temporary stack + * segment, and will move as items are pushed. + */ + + // need to work something out here with the stack. + stack = sbase+ssize-sizeof(Tos); + + + /* + * XXX: When we are linking this how do we set the tos? We will need to change trap right? + */ + tos = (Tos*)stack; + tos->cyclefreq = m->cyclefreq; + cycles((uvlong*)&tos->pcycles); + tos->pcycles = -tos->pcycles; + tos->kcycles = tos->pcycles; + tos->clock = 0; + + DBG("kexec: argument processing\n"); + if(0) + for(i = 0;; i++, argv++){ + a = *(char**)validaddr(argv, sizeof(char**), 0); + if(a == nil) + break; + a = validaddr(a, 1, 0); + n = ((char*)vmemchr(a, 0, 0x7fffffff) - a) + 1; + + if(argc > 0 && i == 0) + continue; + + stack -= n; + if(stack < sbase+ssize-4096) + error(Enovmem); + args = UINT2PTR(stack); + memmove(args, a, n); + args[n-1] = 0; + argc++; + } + // DBG("kexec: ensuring we have argc\n"); + if(0) + if(argc < 1) + error(Ebadexec); + + a = args = UINT2PTR(stack); + stack = sysexecstack(stack, argc); + // XXX: look through math on this. look at ../../9/port/ exec.c + // YYY: this looks like a Jimism for 9k. + // DBG("kexec: ensuring the stack \n"); + if(0) + if(stack-(argc+1)*sizeof(char**)-BIGPGSZ < sbase+ssize-4096) + error(Ebadexec); + + argv = (char**)stack; + *--argv = nil; + // XXX: replace USTKTOP with a new variable representing the top of stack. + if(0) + for(i = 0; i < argc; i++){ + *--argv = args + (USTKTOP-sbase+ssize); + args += strlen(args) + 1; + } + + DBG("argsing\n"); + n = args - a; + if(0) + if(n <= 0) + error(Egreg); + if(n > 128) + n = 128; + DBG("kexec: allocating args\n"); + // XXX: hangs in smalloc, not sure why. +// args = smalloc(n); +// if(waserror()){ +// DBG("erroring\n"); +// free(args); +// nexterror(); +// } +// DBG("kexec: moving args\n"); +// memmove(args, a, n); +// if(0) +// while(n > 0 && (args[n-1] & 0xc0) == 0x80) +// n--; +// args[n-1] = '\0'; + + kstrdup(&p->text, "kexecproc"); + p->args = nil; + //elem; +// elem = nil; +// p->args = args; +// p->nargs = n; + poperror(); /* p (up->args) */ + + + + + +/* + qlock(&p->debug); + + sysprocsetup(p); + qunlock(&p->debug); +*/ + + // why is this sched and not ureg? + p->sched.pc = entry; + // the real question here is how do you set up the stack? + p->sched.sp = PTR2UINT(stack-BY2SE); + p->sched.sp = STACKALIGN(p->sched.sp); + + + // XXX: what does it imply if you have a kproc that runs on an ac? + if(core > 0){ + DBG("kexec: coring %d\n", core); + mp = p->ac; + mp->icc->flushtlb = 1; + mp->icc->rc = ICCOK; + + DBG("kexec: exotic proc on cpu%d\n", mp->machno); + qlock(&p->debug); + if(waserror()){ + DBG("kexec: had error"); + qunlock(&p->debug); + nexterror(); + } + p->nicc++; + p->state = Exotic; + p->psstate = 0; + DBG("kexec: unlocking"); + qunlock(&p->debug); + poperror(); + mfence(); + mp->icc->fn = (void*)entry; + sched(); + }else{ + DBG("kexec: readying\n"); + ready(p); + p->newtlb = 1; + mmuflush(); + } + DBG("kforkexecac up %#p done\n" + "textsz %lx datasz %lx bsssz %lx hdrsz %lx\n" + "textlim %ullx datalim %ullx bsslim %ullx\n", up, + textsz, datasz, bsssz, hdrsz, textlim, datalim, bsslim); +} + +void +syskforkexecac(Ar0* ar0, va_list list) +{ +// int core; +// uintptr base, size; +// char *file, **argv; + //XXX: get system call working. + USED(ar0, list); + + // XXX: fix sysexecregs + panic("syskforkexecac: don't call me yet"); + /* + * void* syskforkexecac(uintptr base, size, int core, char *ufile, char **argv) + */ +// base = va_arg(list, uintptr); +// size = va_arg(list, uintptr); +// core = va_arg(list, unsigned int); +// file = va_arg(list, char*); +// file = validaddr(file, 1, 0); +// argv = va_arg(list, char**); +// evenaddr(PTR2UINT(argv)); + // XXX: going to need to setup segs here. + //kforkexecac(p, core, file, argv); + // this is not going to work. I need to think about it. + // ar0->v = sysexecregs(entry, stack - PTR2UINT(argv), argc); + +} + + +void +printhello(void) +{ + print("hello\n"); +} + +void +printargs(char *arg) +{ + print("%#p %s\n", arg, arg); +} diff -Nru 0/sys/src/nix/port/kexec.h 4/sys/src/nix/port/kexec.h --- 0/sys/src/nix/port/kexec.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/kexec.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,28 @@ +typedef struct Kvalue Kvalue; +typedef struct Kexecgrp Kexecgrp; + + +/* Kexec structures */ +struct Kvalue +{ + uintptr addr; + uvlong size; + int len; + int inuse; + Kvalue *link; + Qid qid; +}; + +struct Kexecgrp +{ + Ref; + RWlock; + Kvalue **ent; + int nent; + int ment; + ulong path; /* qid.path of next Kvalue to be allocated */ + ulong vers; /* of Kexecgrp */ +}; + +void kforkexecac(Proc*, int, char*, char**); +Proc* setupseg(int core); diff -Nru 0/sys/src/nix/port/latin1.c 4/sys/src/nix/port/latin1.c --- 0/sys/src/nix/port/latin1.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/latin1.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,76 @@ +#include "u.h" + +/* + * The code makes two assumptions: strlen(ld) is 1 or 2; latintab[i].ld can be a + * prefix of latintab[j].ld only when j=5) + return unicode(k); + else + return -5; + for(l=latintab; l->ld!=0; l++) + if(k[0] == l->ld[0]){ + if(n == 1) + return -2; + if(l->ld[1] == 0) + c = k[1]; + else if(l->ld[1] != k[1]) + continue; + else if(n == 2) + return -3; + else + c = k[2]; + for(p=l->si; *p!=0; p++) + if(*p == c) + return l->so[p - l->si]; + return -1; + } + return -1; +} diff -Nru 0/sys/src/nix/port/latin1.h 4/sys/src/nix/port/latin1.h --- 0/sys/src/nix/port/latin1.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/latin1.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,100 @@ + " ", " i", L"␣ı", + "!~", "-=~", L"≄≇≉", + "!", "!<=>?bmp", L"¡≮≠≯‽⊄∉⊅", + "\"*", "IUiu", L"ΪΫϊϋ", + "\"", "\"AEIOUYaeiouy", L"¨ÄËÏÖÜŸäëïöüÿ", + "$*", "fhk", L"ϕϑϰ", + "$", "BEFHILMRVaefglopv", L"ℬℰℱℋℐℒℳℛƲɑℯƒℊℓℴ℘ʋ", + "\'\"", "Uu", L"Ǘǘ", + "\'", "\'ACEILNORSUYZacegilnorsuyz", L"´ÁĆÉÍĹŃÓŔŚÚÝŹáćéģíĺńóŕśúýź", + "*", "*ABCDEFGHIKLMNOPQRSTUWXYZabcdefghiklmnopqrstuwxyz", L"∗ΑΒΞΔΕΦΓΘΙΚΛΜΝΟΠΨΡΣΤΥΩΧΗΖαβξδεφγθικλμνοπψρστυωχηζ", + "+", "-O", L"±⊕", + ",", ",ACEGIKLNORSTUacegiklnorstu", L"¸ĄÇĘĢĮĶĻŅǪŖŞŢŲąçęģįķļņǫŗşţų", + "-*", "l", L"ƛ", + "-", "+-2:>DGHILOTZbdghiltuz~", L"∓­ƻ÷→ÐǤĦƗŁ⊖ŦƵƀðǥℏɨłŧʉƶ≂", + ".", ".CEGILOZceglz", L"·ĊĖĠİĿ⊙Żċėġŀż", + "/", "Oo", L"Øø", + "1", ".234568", L"․½⅓¼⅕⅙⅛", + "2", "-.35", L"ƻ‥⅔⅖", + "3", ".458", L"…¾⅗⅜", + "4", "5", L"⅘", + "5", "68", L"⅚⅝", + "7", "8", L"⅞", + ":", "()-=", L"☹☺÷≔", + "~", L"←«≤≶≲", + "=", ":<=>OV", L"≕⋜≡⋝⊜⇒", + ">!", "=~", L"≩⋧", + ">", "<=>~", L"≷≥»≳", + "?", "!?", L"‽¿", + "@\'", "\'", L"ъ", + "@@", "\'EKSTYZekstyz", L"ьЕКСТЫЗекстыз", + "@C", "Hh", L"ЧЧ", + "@E", "Hh", L"ЭЭ", + "@K", "Hh", L"ХХ", + "@S", "CHch", L"ЩШЩШ", + "@T", "Ss", L"ЦЦ", + "@Y", "AEOUaeou", L"ЯЕЁЮЯЕЁЮ", + "@Z", "Hh", L"ЖЖ", + "@c", "h", L"ч", + "@e", "h", L"э", + "@k", "h", L"х", + "@s", "ch", L"щш", + "@t", "s", L"ц", + "@y", "aeou", L"яеёю", + "@z", "h", L"ж", + "@", "ABDFGIJLMNOPRUVXabdfgijlmnopruvx", L"АБДФГИЙЛМНОПРУВХабдфгийлмнопрувх", + "A", "E", L"Æ", + "C", "ACU", L"⋂ℂ⋃", + "Dv", "Zz", L"DŽDž", + "D", "-e", L"Ð∆", + "G", "-", L"Ǥ", + "H", "-H", L"Ħℍ", + "I", "-J", L"ƗIJ", + "L", "&-Jj|", L"⋀ŁLJLj⋁", + "M", "#48bs", L"♮♩♪♭♯", + "N", "JNj", L"NJℕNj", + "O", "*+-./=EIcoprx", L"⊛⊕⊖⊙⊘⊜ŒƢ©⊚℗®⊗", + "P", "P", L"ℙ", + "Q", "Q", L"ℚ", + "R", "R", L"ℝ", + "S", "123S", L"¹²³§", + "T", "-u", L"Ŧ⊨", + "V", "=", L"⇐", + "Y", "R", L"Ʀ", + "Z", "-ACSZ", L"Ƶℤ", + "^", "ACEGHIJOSUWYaceghijosuwy", L"ÂĈÊĜĤÎĴÔŜÛŴŶâĉêĝĥîĵôŝûŵŷ", + "_\"", "AUau", L"ǞǕǟǖ", + "_,", "Oo", L"Ǭǭ", + "_.", "Aa", L"Ǡǡ", + "_", "AEIOU_aeiou", L"ĀĒĪŌŪ¯āēīōū", + "`\"", "Uu", L"Ǜǜ", + "`", "AEIOUaeiou", L"ÀÈÌÒÙàèìòù", + "a", "ben", L"↔æ∠", + "b", "()+-0123456789=bknpqru", L"₍₎₊₋₀₁₂₃₄₅₆₇₈₉₌♝♚♞♟♛♜•", + "c", "$Oagu", L"¢©∩≅∪", + "dv", "z", L"dž", + "d", "-adegz", L"ð↓‡°†ʣ", + "e", "$lmns", L"€⋯—–∅", + "f", "a", L"∀", + "g", "$-r", L"¤ǥ∇", + "h", "-v", L"ℏƕ", + "i", "-bfjps", L"ɨ⊆∞ij⊇∫", + "l", "\"$&\'-jz|", L"“£∧‘łlj⋄∨", + "m", "iou", L"µ∈×", + "n", "jo", L"nj¬", + "o", "AOUaeiu", L"Å⊚Ůåœƣů", + "p", "Odgrt", L"℗∂¶∏∝", + "r", "\"\'O", L"”’®", + "s", "()+-0123456789=abnoprstu", L"⁽⁾⁺⁻⁰ⁱ⁲⁳⁴⁵⁶⁷⁸⁹⁼ª⊂ⁿº⊃√ß∍∑", + "t", "-efmsu", L"ŧ∃∴™ς⊢", + "u", "-AEGIOUaegiou", L"ʉĂĔĞĬŎŬ↑ĕğĭŏŭ", + "v\"", "Uu", L"Ǚǚ", + "v", "ACDEGIKLNORSTUZacdegijklnorstuz", L"ǍČĎĚǦǏǨĽŇǑŘŠŤǓŽǎčďěǧǐǰǩľňǒřšťǔž", + "w", "bknpqr", L"♗♔♘♙♕♖", + "x", "O", L"⊗", + "y", "$", L"¥", + "z", "-", L"ƶ", + "|", "Pp|", L"Þþ¦", + "~!", "=", L"≆", + "~", "-=AINOUainou~", L"≃≅ÃĨÑÕŨãĩñõũ≈", diff -Nru 0/sys/src/nix/port/led.c 4/sys/src/nix/port/led.c --- 0/sys/src/nix/port/led.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/led.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,62 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "../port/error.h" +#include "fns.h" +#include "led.h" + +static char *ibpinames[Ibpilast] = { +[Ibpinone] "none", +[Ibpinormal] "normal", +[Ibpilocate] "locate", +[Ibpifail] "fail", +[Ibpirebuild] "rebuild", +[Ibpipfa] "pfa", +[Ibpispare] "spare", +[Ibpicritarray] "critarray", +[Ibpifailarray] "failarray", +}; + +char* +ledname(int c) +{ + if(c >= 0 && c < Ibpilast) + return ibpinames[c]; + return "bad index"; +} + + int +name2led(char *s) +{ + int i; + + for(i = 0; i < nelem(ibpinames); i++) + if(strcmp(ibpinames[i], s) == 0) + return i; + return -1; +} + +long +ledr(Ledport *p, Chan*, void *a, long n, vlong off) +{ + char buf[64]; + + snprint(buf, sizeof buf, "%s\n", ledname(p->led)); + return readstr(off, a, n, buf); +} + +long +ledw(Ledport *p, Chan*, void *a, long n, vlong) +{ + int i; + Cmdbuf *cb; + + cb = parsecmd(a, n); + i = name2led(cb->f[0]); + free(cb); + if(i == -1) + error(Ebadarg); + p->led = i; + return n; +} diff -Nru 0/sys/src/nix/port/led.h 4/sys/src/nix/port/led.h --- 0/sys/src/nix/port/led.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/led.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,26 @@ +typedef struct Ledport Ledport; + +struct Ledport { + uchar nled; + uchar led; + ushort ledbits; /* implementation dependent */ +}; + +/* http://en.wikipedia.org/wiki/IBPI */ +enum { + Ibpinone, + Ibpinormal, + Ibpilocate, + Ibpifail, + Ibpirebuild, + Ibpipfa, + Ibpispare, + Ibpicritarray, + Ibpifailarray, + Ibpilast, +}; + +char *ledname(int); +int name2led(char*); +long ledr(Ledport*, Chan*, void*, long, vlong); +long ledw(Ledport*, Chan*, void*, long, vlong); diff -Nru 0/sys/src/nix/port/lib.h 4/sys/src/nix/port/lib.h --- 0/sys/src/nix/port/lib.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/lib.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,321 @@ +/* + * functions (possibly) linked in, complete, from libc. + */ +#define nelem(x) (sizeof(x)/sizeof((x)[0])) +#define offsetof(s, m) (ulong)(&(((s*)0)->m)) +#define assert(x) if(x){}else _assert("x") + +/* + * mem routines + */ +extern void* memccpy(void*, void*, int, ulong); +extern void* memset(void*, int, ulong); +extern int memcmp(void*, void*, ulong); +extern void* memmove(void*, void*, ulong); +extern void* memchr(void*, int, ulong); + +/* + * string routines + */ +extern char* strcat(char*, char*); +extern char* strchr(char*, int); +extern int strcmp(char*, char*); +extern char* strcpy(char*, char*); +extern char* strecpy(char*, char*, char*); +extern char* strncat(char*, char*, long); +extern char* strncpy(char*, char*, long); +extern int strncmp(char*, char*, long); +extern char* strrchr(char*, int); +extern long strlen(char*); +extern char* strstr(char*, char*); +extern int cistrncmp(char*, char*, int); +extern int cistrcmp(char*, char*); +extern int tokenize(char*, char**, int); + +enum +{ + UTFmax = 3, /* maximum bytes per rune */ + Runesync = 0x80, /* cannot represent part of a UTF sequence */ + Runeself = 0x80, /* rune and UTF sequences are the same (<) */ + Runeerror = 0xFFFD, /* decoding error in UTF */ +}; + +/* + * rune routines + */ +extern int runetochar(char*, Rune*); +extern int chartorune(Rune*, char*); +extern int runelen(long); +extern int fullrune(char*, int); +extern int utflen(char*); +extern int utfnlen(char*, long); +extern char* utfrune(char*, long); + +/* + * malloc + */ +extern void* malloc(ulong); +extern void* mallocz(ulong, int); +extern void free(void*); +extern ulong msize(void*); +extern void* mallocalign(ulong, ulong, long, ulong); +extern void setmalloctag(void*, ulong); +extern void setrealloctag(void*, ulong); +extern ulong getmalloctag(void*); +extern ulong getrealloctag(void*); +extern void* realloc(void *, ulong); + +/* + * print routines + */ +typedef struct Fmt Fmt; +struct Fmt{ + uchar runes; /* output buffer is runes or chars? */ + void *start; /* of buffer */ + void *to; /* current place in the buffer */ + void *stop; /* end of the buffer; overwritten if flush fails */ + int (*flush)(Fmt *); /* called when to == stop */ + void *farg; /* to make flush a closure */ + int nfmt; /* num chars formatted so far */ + va_list args; /* args passed to dofmt */ + int r; /* % format Rune */ + int width; + int prec; + ulong flags; +}; + +enum { + FmtWidth = 1, + FmtLeft = FmtWidth<<1, + FmtPrec = FmtLeft<<1, + FmtSharp = FmtPrec<<1, + FmtSpace = FmtSharp<<1, + FmtSign = FmtSpace<<1, + FmtZero = FmtSign<<1, + FmtUnsigned = FmtZero<<1, + FmtShort = FmtUnsigned<<1, + FmtLong = FmtShort<<1, + FmtVLong = FmtLong<<1, + FmtComma = FmtVLong<<1, + FmtByte = FmtComma<<1, + + FmtFlag = FmtByte<<1 +}; + +extern int print(char*, ...); +extern char* seprint(char*, char*, char*, ...); +extern char* vseprint(char*, char*, char*, va_list); +extern int snprint(char*, int, char*, ...); +extern int vsnprint(char*, int, char*, va_list); +extern int sprint(char*, char*, ...); + +#pragma varargck argpos fmtprint 2 +#pragma varargck argpos print 1 +#pragma varargck argpos seprint 3 +#pragma varargck argpos snprint 3 +#pragma varargck argpos sprint 2 + +#pragma varargck type "lld" vlong +#pragma varargck type "llx" vlong +#pragma varargck type "llb" vlong +#pragma varargck type "lld" uvlong +#pragma varargck type "llx" uvlong +#pragma varargck type "llb" uvlong +#pragma varargck type "ld" long +#pragma varargck type "lx" long +#pragma varargck type "lb" long +#pragma varargck type "ld" ulong +#pragma varargck type "lx" ulong +#pragma varargck type "lb" ulong +#pragma varargck type "d" int +#pragma varargck type "x" int +#pragma varargck type "c" int +#pragma varargck type "C" int +#pragma varargck type "b" int +#pragma varargck type "d" uint +#pragma varargck type "x" uint +#pragma varargck type "c" uint +#pragma varargck type "C" uint +#pragma varargck type "b" uint +#pragma varargck type "s" char* +#pragma varargck type "q" char* +#pragma varargck type "S" Rune* +#pragma varargck type "%" void +#pragma varargck type "p" uintptr +#pragma varargck type "p" void* +#pragma varargck type "H" void* +#pragma varargck flag ',' +#pragma varargck flag ' ' +#pragma varargck flag 'h' + +extern int fmtinstall(int, int (*)(Fmt*)); +extern int fmtprint(Fmt*, char*, ...); +extern int fmtstrcpy(Fmt*, char*); +extern char* fmtstrflush(Fmt*); +extern int fmtstrinit(Fmt*); + +/* + * quoted strings + */ +extern void quotefmtinstall(void); + +/* + * Time-of-day + */ +extern void cycles(uvlong*); /* 64-bit value of the cycle counter if there is one, 0 if there isn't */ + +/* + * NIX core types + */ +enum +{ + NIXTC = 0, /* time shared */ + NIXKC, /* kernel */ + NIXAC, /* application */ + NIXXC, /* exclusive */ + NIXOC, /* offline */ + NIXUC, /* uninitalized */ + NIXSC, /* stopping */ + NIXQC, /* quiescent */ + NIXROLES, +}; + +/* + * one-of-a-kind + */ +extern int abs(int); +extern int atoi(char*); +extern char* cleanname(char*); +extern int dec64(uchar*, int, char*, int); +extern uintptr getcallerpc(void*); +extern int getfields(char*, char**, int, int, char*); +extern int gettokens(char *, char **, int, char *); +extern long strtol(char*, char**, int); +extern ulong strtoul(char*, char**, int); +extern vlong strtoll(char*, char**, int); +extern uvlong strtoull(char*, char**, int); +extern void qsort(void*, long, long, int (*)(void*, void*)); +/* + * Syscall data structures + */ +#define MORDER 0x0003 /* mask for bits defining order of mounting */ +#define MREPL 0x0000 /* mount replaces object */ +#define MBEFORE 0x0001 /* mount goes before others in union directory */ +#define MAFTER 0x0002 /* mount goes after others in union directory */ +#define MCREATE 0x0004 /* permit creation in mounted directory */ +#define MCACHE 0x0010 /* cache some data */ +#define MMASK 0x0017 /* all bits on */ + +#define OREAD 0 /* open for read */ +#define OWRITE 1 /* write */ +#define ORDWR 2 /* read and write */ +#define OEXEC 3 /* execute, == read but check execute permission */ +#define OTRUNC 16 /* or'ed in (except for exec), truncate file first */ +#define OCEXEC 32 /* or'ed in, close on exec */ +#define ORCLOSE 64 /* or'ed in, remove on close */ +#define OEXCL 0x1000 /* or'ed in, exclusive create */ + +#define NCONT 0 /* continue after note */ +#define NDFLT 1 /* terminate after note */ +#define NSAVE 2 /* clear note but hold state */ +#define NRSTR 3 /* restore saved state */ + +typedef struct Qid Qid; +typedef struct Dir Dir; +typedef struct OWaitmsg OWaitmsg; +typedef struct Waitmsg Waitmsg; + +#define ERRMAX 128 /* max length of error string */ +#define KNAMELEN 28 /* max length of name held in kernel */ + +/* bits in Qid.type */ +#define QTDIR 0x80 /* type bit for directories */ +#define QTAPPEND 0x40 /* type bit for append only files */ +#define QTEXCL 0x20 /* type bit for exclusive use files */ +#define QTMOUNT 0x10 /* type bit for mounted channel */ +#define QTAUTH 0x08 /* type bit for authentication file */ +#define QTFILE 0x00 /* plain file */ + +/* bits in Dir.mode */ +#define DMDIR 0x80000000 /* mode bit for directories */ +#define DMAPPEND 0x40000000 /* mode bit for append only files */ +#define DMEXCL 0x20000000 /* mode bit for exclusive use files */ +#define DMMOUNT 0x10000000 /* mode bit for mounted channel */ +#define DMREAD 0x4 /* mode bit for read permission */ +#define DMWRITE 0x2 /* mode bit for write permission */ +#define DMEXEC 0x1 /* mode bit for execute permission */ + +struct Qid +{ + uvlong path; + ulong vers; + uchar type; +}; + +struct Dir { + /* system-modified data */ + ushort type; /* server type */ + uint dev; /* server subtype */ + /* file data */ + Qid qid; /* unique id from server */ + ulong mode; /* permissions */ + ulong atime; /* last read time */ + ulong mtime; /* last write time */ + vlong length; /* file length: see */ + char *name; /* last element of path */ + char *uid; /* owner name */ + char *gid; /* group name */ + char *muid; /* last modifier name */ +}; + +struct OWaitmsg +{ + char pid[12]; /* of loved one */ + char time[3*12]; /* of loved one and descendants */ + char msg[64]; /* compatibility BUG */ +}; + +struct Waitmsg +{ + int pid; /* of loved one */ + ulong time[3]; /* of loved one and descendants */ + char msg[ERRMAX]; /* actually variable-size in user mode */ +}; + +/* + * Zero-copy I/O + */ +typedef struct Zio Zio; + +struct Zio +{ + void* data; + ulong size; +}; + +extern char etext[]; +extern char edata[]; +extern char end[]; + +/* + * Nix optimistic semaphores + */ + +/* + * Userspace spin lock (libc's Lock). + */ +typedef struct Ulock Ulock; + +struct Ulock{ + int val; +}; + +typedef struct Sem Sem; + +struct Sem +{ + int tickets; + int waiting; /* procs that may be waiting in the sem */ + int going; /* procs calling down, transiting to the kernel */ + Ulock userlock; /* userspace spin lock */ +}; diff -Nru 0/sys/src/nix/port/mul64fract.c 4/sys/src/nix/port/mul64fract.c --- 0/sys/src/nix/port/mul64fract.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/mul64fract.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,39 @@ +#include "u.h" + +/* mul64fract(uvlong*r, uvlong a, uvlong b) + * + * Multiply two 64 numbers and return the middle 64 bits of the 128 bit result. + * + * The assumption is that one of the numbers is a + * fixed point number with the integer portion in the + * high word and the fraction in the low word. + * + * There should be an assembler version of this routine + * for each architecture. This one is intended to + * make ports easier. + * + * ignored r0 = lo(a0*b0) + * lsw of result r1 = hi(a0*b0) +lo(a0*b1) +lo(a1*b0) + * msw of result r2 = hi(a0*b1) +hi(a1*b0) +lo(a1*b1) + * ignored r3 = hi(a1*b1) + */ + +void +mul64fract(uvlong *r, uvlong a, uvlong b) +{ + uvlong bh, bl; + uvlong ah, al; + uvlong res; + + bl = b & 0xffffffffULL; + bh = b >> 32; + al = a & 0xffffffffULL; + ah = a >> 32; + + res = (al*bl)>>32; + res += (al*bh); + res += (ah*bl); + res += (ah*bh)<<32; + + *r = res; +} diff -Nru 0/sys/src/nix/port/netif.c 4/sys/src/nix/port/netif.c --- 0/sys/src/nix/port/netif.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/netif.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,698 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/netif.h" + +static int netown(Netfile*, char*, int); +static int openfile(Netif*, int); +static char* matchtoken(char*, char*); +static char* netmulti(Netif*, Netfile*, uchar*, int); +static int parseaddr(uchar*, char*, int); + +/* + * set up a new network interface + */ +void +netifinit(Netif *nif, char *name, int nfile, ulong limit) +{ + strncpy(nif->name, name, KNAMELEN-1); + nif->name[KNAMELEN-1] = 0; + nif->nfile = nfile; + nif->f = malloc(nfile*sizeof(Netfile*)); + if(nif->f == nil) + panic("netifinit: no memory"); + memset(nif->f, 0, nfile*sizeof(Netfile*)); + nif->limit = limit; +} + +/* + * generate a 3 level directory + */ +static int +netifgen(Chan *c, char*, Dirtab *vp, int, int i, Dir *dp) +{ + Qid q; + Netif *nif = (Netif*)vp; + Netfile *f; + int t; + int perm; + char *o; + + q.type = QTFILE; + q.vers = 0; + + /* top level directory contains the name of the network */ + if(c->qid.path == 0){ + switch(i){ + case DEVDOTDOT: + q.path = 0; + q.type = QTDIR; + devdir(c, q, ".", 0, eve, 0555, dp); + break; + case 0: + q.path = N2ndqid; + q.type = QTDIR; + strcpy(up->genbuf, nif->name); + devdir(c, q, up->genbuf, 0, eve, 0555, dp); + break; + default: + return -1; + } + return 1; + } + + /* second level contains clone plus all the conversations */ + t = NETTYPE(c->qid.path); + if(t == N2ndqid || t == Ncloneqid || t == Naddrqid + || t == Nstatqid || t == Nifstatqid || t == Nmtuqid){ + switch(i) { + case DEVDOTDOT: + q.type = QTDIR; + q.path = 0; + devdir(c, q, ".", 0, eve, DMDIR|0555, dp); + break; + case 0: + q.path = Ncloneqid; + devdir(c, q, "clone", 0, eve, 0666, dp); + break; + case 1: + q.path = Naddrqid; + devdir(c, q, "addr", 0, eve, 0666, dp); + break; + case 2: + q.path = Nstatqid; + devdir(c, q, "stats", 0, eve, 0444, dp); + break; + case 3: + q.path = Nifstatqid; + devdir(c, q, "ifstats", 0, eve, 0444, dp); + break; + case 4: + q.path = Nmtuqid; + devdir(c, q, "mtu", 0, eve, 0444, dp); + break; + default: + i -= 5; + if(i >= nif->nfile) + return -1; + if(nif->f[i] == 0) + return 0; + q.type = QTDIR; + q.path = NETQID(i, N3rdqid); + snprint(up->genbuf, sizeof up->genbuf, "%d", i); + devdir(c, q, up->genbuf, 0, eve, DMDIR|0555, dp); + break; + } + return 1; + } + + /* third level */ + f = nif->f[NETID(c->qid.path)]; + if(f == 0) + return 0; + if(*f->owner){ + o = f->owner; + perm = f->mode; + } else { + o = eve; + perm = 0666; + } + switch(i){ + case DEVDOTDOT: + q.type = QTDIR; + q.path = N2ndqid; + strcpy(up->genbuf, nif->name); + devdir(c, q, up->genbuf, 0, eve, DMDIR|0555, dp); + break; + case 0: + q.path = NETQID(NETID(c->qid.path), Ndataqid); + devdir(c, q, "data", 0, o, perm, dp); + break; + case 1: + q.path = NETQID(NETID(c->qid.path), Nctlqid); + devdir(c, q, "ctl", 0, o, perm, dp); + break; + case 2: + q.path = NETQID(NETID(c->qid.path), Nstatqid); + devdir(c, q, "stats", 0, eve, 0444, dp); + break; + case 3: + q.path = NETQID(NETID(c->qid.path), Ntypeqid); + devdir(c, q, "type", 0, eve, 0444, dp); + break; + case 4: + q.path = NETQID(NETID(c->qid.path), Nifstatqid); + devdir(c, q, "ifstats", 0, eve, 0444, dp); + break; + default: + return -1; + } + return 1; +} + +Walkqid* +netifwalk(Netif *nif, Chan *c, Chan *nc, char **name, int nname) +{ + return devwalk(c, nc, name, nname, (Dirtab *)nif, 0, netifgen); +} + +Chan* +netifopen(Netif *nif, Chan *c, int omode) +{ + int id; + Netfile *f; + + id = 0; + if(c->qid.type & QTDIR){ + if(omode != OREAD) + error(Eperm); + } else { + switch(NETTYPE(c->qid.path)){ + case Ndataqid: + case Nctlqid: + id = NETID(c->qid.path); + openfile(nif, id); + break; + case Ncloneqid: + id = openfile(nif, -1); + c->qid.path = NETQID(id, Nctlqid); + break; + default: + if(omode != OREAD) + error(Ebadarg); + } + switch(NETTYPE(c->qid.path)){ + case Ndataqid: + case Nctlqid: + f = nif->f[id]; + if(netown(f, up->user, omode&7) < 0){ + netifclose(nif, c); + error(Eperm); + } + break; + } + } + c->mode = openmode(omode); + c->flag |= COPEN; + c->offset = 0; + c->iounit = qiomaxatomic; + return c; +} + +long +netifread(Netif *nif, Chan *c, void *a, long n, vlong off) +{ + int i, j; + Netfile *f; + char *p; + long offset; + + if(c->qid.type & QTDIR) + return devdirread(c, a, n, (Dirtab*)nif, 0, netifgen); + + offset = off; + switch(NETTYPE(c->qid.path)){ + case Ndataqid: + f = nif->f[NETID(c->qid.path)]; + return qread(f->iq, a, n); + case Nctlqid: + return readnum(offset, a, n, NETID(c->qid.path), NUMSIZE); + case Nstatqid: + p = malloc(READSTR); + if(p == nil) + error(Enomem); + j = snprint(p, READSTR, "in: %llud\n", nif->inpackets); + j += snprint(p+j, READSTR-j, "link: %d\n", nif->link); + j += snprint(p+j, READSTR-j, "out: %llud\n", nif->outpackets); + j += snprint(p+j, READSTR-j, "crc errs: %llud\n", nif->crcs); + j += snprint(p+j, READSTR-j, "overflows: %llud\n", nif->overflows); + j += snprint(p+j, READSTR-j, "soft overflows: %llud\n", nif->soverflows); + j += snprint(p+j, READSTR-j, "framing errs: %llud\n", nif->frames); + j += snprint(p+j, READSTR-j, "buffer errs: %llud\n", nif->buffs); + j += snprint(p+j, READSTR-j, "output errs: %llud\n", nif->oerrs); + j += snprint(p+j, READSTR-j, "prom: %d\n", nif->prom); + j += snprint(p+j, READSTR-j, "mbps: %d\n", nif->mbps); + j += snprint(p+j, READSTR-j, "addr: "); + for(i = 0; i < nif->alen; i++) + j += snprint(p+j, READSTR-j, "%2.2ux", nif->addr[i]); + snprint(p+j, READSTR-j, "\n"); + n = readstr(offset, a, n, p); + free(p); + return n; + case Naddrqid: + p = malloc(READSTR); + j = 0; + for(i = 0; i < nif->alen; i++) + j += snprint(p+j, READSTR-j, "%2.2ux", nif->addr[i]); + n = readstr(offset, a, n, p); + free(p); + return n; + case Ntypeqid: + f = nif->f[NETID(c->qid.path)]; + return readnum(offset, a, n, f->type, NUMSIZE); + case Nifstatqid: + return 0; + case Nmtuqid: + snprint(up->genbuf, sizeof up->genbuf, "%11.ud %11.ud %11.ud\n", nif->minmtu, nif->mtu, nif->maxmtu); + return readstr(offset, a, n, up->genbuf); + } + error(Ebadarg); + return -1; /* not reached */ +} + +Block* +netifbread(Netif *nif, Chan *c, long n, vlong offset) +{ + if((c->qid.type & QTDIR) || NETTYPE(c->qid.path) != Ndataqid) + return devbread(c, n, offset); + + return qbread(nif->f[NETID(c->qid.path)]->iq, n); +} + +/* + * make sure this type isn't already in use on this device + */ +static int +typeinuse(Netif *nif, int type) +{ + Netfile *f, **fp, **efp; + + if(type <= 0) + return 0; + + efp = &nif->f[nif->nfile]; + for(fp = nif->f; fp < efp; fp++){ + f = *fp; + if(f == 0) + continue; + if(f->type == type) + return 1; + } + return 0; +} + +/* + * the devxxx.c that calls us handles writing data, it knows best + */ +long +netifwrite(Netif *nif, Chan *c, void *a, long n) +{ + Netfile *f; + int type, mtu; + char *p, buf[64]; + uchar binaddr[Nmaxaddr]; + + if(NETTYPE(c->qid.path) != Nctlqid) + error(Eperm); + + if(n >= sizeof(buf)) + n = sizeof(buf)-1; + memmove(buf, a, n); + buf[n] = 0; + + if(waserror()){ + qunlock(nif); + nexterror(); + } + + qlock(nif); + f = nif->f[NETID(c->qid.path)]; + if((p = matchtoken(buf, "connect")) != 0){ + qclose(f->iq); + type = atoi(p); + if(typeinuse(nif, type)) + error(Einuse); + f->type = type; + if(f->type < 0) + nif->all++; + qreopen(f->iq); + } else if(matchtoken(buf, "promiscuous")){ + if(f->prom == 0){ + if(nif->prom == 0 && nif->promiscuous != nil) + nif->promiscuous(nif->arg, 1); + f->prom = 1; + nif->prom++; + } + } else if((p = matchtoken(buf, "scanbs")) != 0){ + /* scan for base stations */ + if(f->scan == 0){ + type = atoi(p); + if(type < 5) + type = 5; + if(nif->scanbs != nil) + nif->scanbs(nif->arg, type); + f->scan = type; + nif->scan++; + } + } else if((p = matchtoken(buf, "mtu")) != 0){ + /* poor planning. */ + if(!iseve()) + error(Eperm); + mtu = atoi(p); + /* zero resets default. */ + if(mtu != 0) + if(mtu < nif->minmtu || mtu > nif->maxmtu) + error(Ebadarg); + if(nif->hwmtu) + nif->mtu = nif->hwmtu(nif->arg, mtu); + else + nif->mtu = mtu; + } else if(matchtoken(buf, "l2bridge")){ + f->bridge |= 2; + } else if(matchtoken(buf, "bridge")){ + f->bridge |= 1; + } else if(matchtoken(buf, "headersonly")){ + f->headersonly = 1; + } else if((p = matchtoken(buf, "addmulti")) != 0){ + if(parseaddr(binaddr, p, nif->alen) < 0) + error("bad address"); + p = netmulti(nif, f, binaddr, 1); + if(p) + error(p); + } else if((p = matchtoken(buf, "remmulti")) != 0){ + if(parseaddr(binaddr, p, nif->alen) < 0) + error("bad address"); + p = netmulti(nif, f, binaddr, 0); + if(p) + error(p); + } else + n = -1; + qunlock(nif); + poperror(); + return n; +} + +long +netifwstat(Netif *nif, Chan *c, uchar *db, long n) +{ + Dir *dir; + Netfile *f; + int l; + + f = nif->f[NETID(c->qid.path)]; + if(f == 0) + error(Enonexist); + + if(netown(f, up->user, OWRITE) < 0) + error(Eperm); + + dir = smalloc(sizeof(Dir)+n); + l = convM2D(db, n, &dir[0], (char*)&dir[1]); + if(l == 0){ + free(dir); + error(Eshortstat); + } + if(!emptystr(dir[0].uid)) + strncpy(f->owner, dir[0].uid, KNAMELEN); + if(dir[0].mode != ~0UL) + f->mode = dir[0].mode; + free(dir); + return l; +} + +long +netifstat(Netif *nif, Chan *c, uchar *db, long n) +{ + return devstat(c, db, n, (Dirtab *)nif, 0, netifgen); +} + +void +netifclose(Netif *nif, Chan *c) +{ + Netfile *f; + int t; + Netaddr *ap; + + if((c->flag & COPEN) == 0) + return; + + t = NETTYPE(c->qid.path); + if(t != Ndataqid && t != Nctlqid) + return; + + f = nif->f[NETID(c->qid.path)]; + qlock(f); + if(--(f->inuse) == 0){ + if(f->prom){ + qlock(nif); + if(--(nif->prom) == 0 && nif->promiscuous != nil) + nif->promiscuous(nif->arg, 0); + qunlock(nif); + f->prom = 0; + } + if(f->scan){ + qlock(nif); + if(--(nif->scan) == 0 && nif->scanbs != nil) + nif->scanbs(nif->arg, 0); + qunlock(nif); + f->prom = 0; + f->scan = 0; + } + if(f->nmaddr){ + qlock(nif); + t = 0; + for(ap = nif->maddr; ap; ap = ap->next){ + if(f->maddr[t/8] & (1<<(t%8))) + netmulti(nif, f, ap->addr, 0); + } + qunlock(nif); + f->nmaddr = 0; + } + if(f->type < 0){ + qlock(nif); + --(nif->all); + qunlock(nif); + } + f->owner[0] = 0; + f->type = 0; + f->bridge = 0; + f->headersonly = 0; + qclose(f->iq); + } + qunlock(f); +} + +Lock netlock; + +static int +netown(Netfile *p, char *o, int omode) +{ + static int access[] = { 0400, 0200, 0600, 0100 }; + int mode; + int t; + + lock(&netlock); + if(*p->owner){ + if(strncmp(o, p->owner, KNAMELEN) == 0) /* User */ + mode = p->mode; + else if(strncmp(o, eve, KNAMELEN) == 0) /* Bootes is group */ + mode = p->mode<<3; + else + mode = p->mode<<6; /* Other */ + + t = access[omode&3]; + if((t & mode) == t){ + unlock(&netlock); + return 0; + } else { + unlock(&netlock); + return -1; + } + } + strncpy(p->owner, o, KNAMELEN); + p->mode = 0660; + unlock(&netlock); + return 0; +} + +/* + * Increment the reference count of a network device. + * If id < 0, return an unused ether device. + */ +static int +openfile(Netif *nif, int id) +{ + Netfile *f, **fp, **efp; + + if(id >= 0){ + f = nif->f[id]; + if(f == 0) + error(Enodev); + qlock(f); + qreopen(f->iq); + f->inuse++; + qunlock(f); + return id; + } + + qlock(nif); + if(waserror()){ + qunlock(nif); + nexterror(); + } + efp = &nif->f[nif->nfile]; + for(fp = nif->f; fp < efp; fp++){ + f = *fp; + if(f == 0){ + f = malloc(sizeof(Netfile)); + if(f == 0) + exhausted("memory"); + f->iq = qopen(nif->limit, Qmsg, 0, 0); + if(f->iq == nil){ + free(f); + exhausted("memory"); + } + *fp = f; + qlock(f); + } else { + qlock(f); + if(f->inuse){ + qunlock(f); + continue; + } + } + f->inuse = 1; + qreopen(f->iq); + netown(f, up->user, 0); + qunlock(f); + qunlock(nif); + poperror(); + return fp - nif->f; + } + error(Enodev); + return -1; /* not reached */ +} + +/* + * look for a token starting a string, + * return a pointer to first non-space char after it + */ +static char* +matchtoken(char *p, char *token) +{ + int n; + + n = strlen(token); + if(strncmp(p, token, n)) + return 0; + p += n; + if(*p == 0) + return p; + if(*p != ' ' && *p != '\t' && *p != '\n') + return 0; + while(*p == ' ' || *p == '\t' || *p == '\n') + p++; + return p; +} + +static ulong +hash(uchar *a, int len) +{ + ulong sum = 0; + + while(len-- > 0) + sum = (sum << 1) + *a++; + return sum%Nmhash; +} + +int +activemulti(Netif *nif, uchar *addr, int alen) +{ + Netaddr *hp; + + for(hp = nif->mhash[hash(addr, alen)]; hp; hp = hp->hnext) + if(memcmp(addr, hp->addr, alen) == 0){ + if(hp->ref) + return 1; + else + break; + } + return 0; +} + +static int +parseaddr(uchar *to, char *from, int alen) +{ + char nip[4]; + char *p; + int i; + + p = from; + for(i = 0; i < alen; i++){ + if(*p == 0) + return -1; + nip[0] = *p++; + if(*p == 0) + return -1; + nip[1] = *p++; + nip[2] = 0; + to[i] = strtoul(nip, 0, 16); + if(*p == ':') + p++; + } + return 0; +} + +/* + * keep track of multicast addresses + */ +static char* +netmulti(Netif *nif, Netfile *f, uchar *addr, int add) +{ + Netaddr **l, *ap; + int i; + ulong h; + + if(nif->multicast == nil) + return "interface does not support multicast"; + + l = &nif->maddr; + i = 0; + for(ap = *l; ap; ap = *l){ + if(memcmp(addr, ap->addr, nif->alen) == 0) + break; + i++; + l = &ap->next; + } + + if(add){ + if(ap == 0){ + *l = ap = smalloc(sizeof(*ap)); + memmove(ap->addr, addr, nif->alen); + ap->next = 0; + ap->ref = 1; + h = hash(addr, nif->alen); + ap->hnext = nif->mhash[h]; + nif->mhash[h] = ap; + } else { + ap->ref++; + } + if(ap->ref == 1){ + nif->nmaddr++; + nif->multicast(nif->arg, addr, 1); + } + if(i < 8*sizeof(f->maddr)){ + if((f->maddr[i/8] & (1<<(i%8))) == 0) + f->nmaddr++; + f->maddr[i/8] |= 1<<(i%8); + } + } else { + if(ap == 0 || ap->ref == 0) + return 0; + ap->ref--; + if(ap->ref == 0){ + nif->nmaddr--; + nif->multicast(nif->arg, addr, 0); + } + if(i < 8*sizeof(f->maddr)){ + if((f->maddr[i/8] & (1<<(i%8))) != 0) + f->nmaddr--; + f->maddr[i/8] &= ~(1<<(i%8)); + } + } + return 0; +} diff -Nru 0/sys/src/nix/port/netif.h 4/sys/src/nix/port/netif.h --- 0/sys/src/nix/port/netif.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/netif.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,121 @@ +typedef struct Netaddr Netaddr; +typedef struct Netfile Netfile; +typedef struct Netif Netif; + +enum +{ + Nmaxaddr= 64, + Nmhash= 31, + + Ncloneqid= 1, + Naddrqid, + N2ndqid, + N3rdqid, + Ndataqid, + Nctlqid, + Nstatqid, + Ntypeqid, + Nifstatqid, + Nmtuqid, +}; + +/* + * Macros to manage Qid's used for multiplexed devices + */ +#define NETTYPE(x) (((ulong)x)&0x1f) +#define NETID(x) ((((ulong)x))>>5) +#define NETQID(i,t) ((((ulong)i)<<5)|(t)) + +/* + * one per multiplexed connection + */ +struct Netfile +{ + QLock; + + int inuse; + ulong mode; + char owner[KNAMELEN]; + + int type; /* multiplexor type */ + int prom; /* promiscuous mode */ + int scan; /* base station scanning interval */ + int bridge; /* bridge mode */ + int headersonly; /* headers only - no data */ + uchar maddr[8]; /* bitmask of multicast addresses requested */ + int nmaddr; /* number of multicast addresses */ + + Queue* iq; /* input */ +}; + +/* + * a network address + */ +struct Netaddr +{ + Netaddr *next; /* allocation chain */ + Netaddr *hnext; + uchar addr[Nmaxaddr]; + int ref; +}; + +/* + * a network interface + */ +struct Netif +{ + QLock; + + /* multiplexing */ + char name[KNAMELEN]; /* for top level directory */ + int nfile; /* max number of Netfiles */ + Netfile **f; + + /* about net */ + int limit; /* flow control */ + int alen; /* address length */ + int mbps; /* megabits per sec */ + int link; /* link status */ + int minmtu; + int maxmtu; + int mtu; + uchar addr[Nmaxaddr]; + uchar bcast[Nmaxaddr]; + Netaddr *maddr; /* known multicast addresses */ + int nmaddr; /* number of known multicast addresses */ + Netaddr *mhash[Nmhash]; /* hash table of multicast addresses */ + int prom; /* number of promiscuous opens */ + int scan; /* number of base station scanners */ + int all; /* number of -1 multiplexors */ + + Queue* oq; /* output */ + + /* statistics */ + uvlong misses; + uvlong inpackets; + uvlong outpackets; + uvlong crcs; /* input crc errors */ + uvlong oerrs; /* output errors */ + uvlong frames; /* framing errors */ + uvlong overflows; /* packet overflows */ + uvlong buffs; /* buffering errors */ + uvlong soverflows; /* software overflow */ + + /* routines for touching the hardware */ + void *arg; + void (*promiscuous)(void*, int); + void (*multicast)(void*, uchar*, int); + int (*hwmtu)(void*, int); /* get/set mtu */ + void (*scanbs)(void*, uint); /* scan for base stations */ +}; + +void netifinit(Netif*, char*, int, ulong); +Walkqid* netifwalk(Netif*, Chan*, Chan*, char **, int); +Chan* netifopen(Netif*, Chan*, int); +void netifclose(Netif*, Chan*); +long netifread(Netif*, Chan*, void*, long, vlong); +Block* netifbread(Netif*, Chan*, long, vlong); +long netifwrite(Netif*, Chan*, void*, long); +long netifwstat(Netif*, Chan*, uchar*, long); +long netifstat(Netif*, Chan*, uchar*, long); +int activemulti(Netif*, uchar*, int); diff -Nru 0/sys/src/nix/port/page.c 4/sys/src/nix/port/page.c --- 0/sys/src/nix/port/page.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/page.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,687 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +enum +{ + Nstartpgs = 32, + Nminfree = 3, + Nfreepgs = 512, +}; + +typedef struct Pgnd Pgnd; +enum +{ + Punused = 0, + Pused, + Pfreed, +}; + +struct Pgnd +{ + uintmem pa; + int sts; +}; + +#define pghash(daddr) pga.hash[(daddr>>PGSHFT)&(PGHSIZE-1)] +Pgalloc pga; /* new allocator */ + +char* +seprintpagestats(char *s, char *e) +{ + int i; + + lock(&pga); + for(i = 0; i < m->npgsz; i++) + if(m->pgsz[i] != 0) + s = seprint(s, e, "%ud/%d %dK user pages avail\n", + pga.pgsza[i].freecount, + pga.pgsza[i].npages.ref, m->pgsz[i]/KiB); + unlock(&pga); + return s; +} + +/* + * Preallocate some pages: + * some 2M ones will be used by the first process. + * some 1G ones will be allocated for each domain so processes may use them. + */ +void +pageinit(void) +{ + int si, i, color; + Page *pg; + + pga.userinit = 1; + DBG("pageinit: npgsz = %d\n", m->npgsz); + /* + * Don't pre-allocate 4K pages, we are not using them anymore. + */ + for(si = 1; si < m->npgsz; si++){ + for(i = 0; i < Nstartpgs; i++){ + if(si < 2) + color = -1; + else + color = i; + pg = pgalloc(m->pgsz[si], color); + if(pg == nil){ + DBG("pageinit: pgalloc failed. breaking.\n"); + break; /* don't consume more memory */ + } + DBG("pageinit: alloced pa %#P sz %#ux color %d\n", + pg->pa, m->pgsz[si], pg->color); + lock(&pga); + pg->ref = 0; + pagechainhead(pg); + unlock(&pga); + } + } + + pga.userinit = 0; +} + +int +getpgszi(usize size) +{ + int si; + + for(si = 0; si < m->npgsz; si++) + if(size == m->pgsz[si]) + return si; + print("getpgszi: size %#ulx not found\n", size); + return -1; +} + +Page* +pgalloc(usize size, int color) +{ + Page *pg; + int si; + + si = getpgszi(size); + if((pg = malloc(sizeof(Page))) == nil){ + DBG("pgalloc: malloc failed\n"); + return nil; + } + memset(pg, 0, sizeof *pg); + if((pg->pa = physalloc(size, &color, pg)) == 0){ + DBG("pgalloc: physalloc failed: size %#ulx color %d\n", size, color); + free(pg); + return nil; + } + pg->pgszi = si; /* size index */ + incref(&pga.pgsza[si].npages); + pg->color = color; + return pg; +} + +void +pgfree(Page* pg) +{ + decref(&pga.pgsza[pg->pgszi].npages); + physfree(pg->pa, m->pgsz[pg->pgszi]); + free(pg); +} + +void +pageunchain(Page *p) +{ + Pgsza *pa; + + if(canlock(&pga)) + panic("pageunchain"); + pa = &pga.pgsza[p->pgszi]; + if(p->prev) + p->prev->next = p->next; + else + pa->head = p->next; + if(p->next) + p->next->prev = p->prev; + else + pa->tail = p->prev; + p->prev = p->next = nil; + pa->freecount--; +} + +void +pagechaintail(Page *p) +{ + Pgsza *pa; + + if(canlock(&pga)) + panic("pagechaintail"); + pa = &pga.pgsza[p->pgszi]; + if(pa->tail) { + p->prev = pa->tail; + pa->tail->next = p; + } + else { + pa->head = p; + p->prev = 0; + } + pa->tail = p; + p->next = 0; + pa->freecount++; +} + +void +pagechainhead(Page *p) +{ + Pgsza *pa; + + if(canlock(&pga)) + panic("pagechainhead"); + pa = &pga.pgsza[p->pgszi]; + if(pa->head) { + p->next = pa->head; + pa->head->prev = p; + } + else { + pa->tail = p; + p->next = 0; + } + pa->head = p; + p->prev = 0; + pa->freecount++; +} + +static Page* +findpg(Page *pl, int color) +{ + Page *p; + + for(p = pl; p != nil; p = p->next) + if(color == NOCOLOR || p->color == color) + return p; + return nil; +} +/* + * can be called with up == nil during boot. + */ +Page* +newpage(int clear, Segment **s, uintptr va, usize size, int color) +{ + Page *p; + KMap *k; + uchar ct; + Pgsza *pa; + int i, dontalloc, si; + static int once; + + si = getpgszi(size); + pa = &pga.pgsza[si]; + + lock(&pga); + /* + * Beware, new page may enter a loop even if this loop does not + * loop more than once, if the segment is lost and fault calls us + * again. Either way, we accept any color if we failed a couple of times. + */ + for(i = 0;; i++){ + if(i > 3) + color = NOCOLOR; + + /* + * 1. try to reuse a free one. + */ + p = findpg(pa->head, color); + if(p != nil) + break; + + /* + * 2. try to allocate a new one from physical memory + */ + p = pgalloc(size, color); + if(p != nil){ + pagechainhead(p); + break; + } + + /* + * 3. out of memory, try with the pager. + * but release the segment (if any) while in the pager. + */ + unlock(&pga); + + dontalloc = 0; + if(s && *s) { + qunlock(&((*s)->lk)); + *s = 0; + dontalloc = 1; + } + + /* + * Try to get any page of the desired color + * or any color for NOCOLOR. + */ + kickpager(si, color); + + /* + * If called from fault and we lost the segment from + * underneath don't waste time allocating and freeing + * a page. Fault will call newpage again when it has + * reacquired the segment locks + */ + if(dontalloc) + return 0; + + lock(&pga); + } + + assert(p != nil); + ct = PG_NEWCOL; + + pageunchain(p); + + lock(p); + if(p->ref != 0) + panic("newpage pa %#ullx", p->pa); + + uncachepage(p); + p->ref++; + p->va = va; + p->modref = 0; + for(i = 0; i < nelem(p->cachectl); i++) + p->cachectl[i] = ct; + unlock(p); + unlock(&pga); + + if(clear) { + k = kmap(p); + memset((void*)VA(k), 0, m->pgsz[p->pgszi]); + kunmap(k); + } + DBG("newpage: va %#p pa %#ullx pgsz %#ux color %d\n", + p->va, p->pa, m->pgsz[p->pgszi], p->color); + + return p; +} + +void +putpage(Page *p) +{ + Pgsza *pa; + int rlse; + + lock(&pga); + lock(p); + + if(p->ref == 0) + panic("putpage"); + + if(--p->ref > 0) { + unlock(p); + unlock(&pga); + return; + } + rlse = 0; + if(p->image != nil) + pagechaintail(p); + else{ + /* + * Free pages if we have plenty in the free list. + */ + pa = &pga.pgsza[p->pgszi]; + if(pa->freecount > Nfreepgs) + rlse = 1; + else + pagechainhead(p); + } + if(pga.r.p != nil) + wakeup(&pga.r); + unlock(p); + if(rlse) + pgfree(p); + unlock(&pga); +} + +/* + * Get an auxiliary page. + * Don't do so if less than Nminfree pages. + * Only used by cache. + * The interface must specify page size. + */ +Page* +auxpage(usize size) +{ + Page *p; + Pgsza *pa; + int si; + + si = getpgszi(size); + lock(&pga); + pa = &pga.pgsza[si]; + p = pa->head; + if(pa->freecount < Nminfree){ + unlock(&pga); + return nil; + } + pageunchain(p); + lock(p); + if(p->ref != 0) + panic("auxpage"); + p->ref++; + uncachepage(p); + unlock(p); + unlock(&pga); + + return p; +} + +static int dupretries = 15000; + +int +duppage(Page *p) /* Always call with p locked */ +{ + Pgsza *pa; + Page *np; + int color; + int retries; + + retries = 0; +retry: + + if(retries++ > dupretries){ + print("duppage %d, up %#p\n", retries, up); + dupretries += 100; + if(dupretries > 100000) + panic("duppage\n"); + uncachepage(p); + return 1; + } + + + /* don't dup pages with no image */ + if(p->ref == 0 || p->image == nil || p->image->notext) + return 0; + + /* + * normal lock ordering is to call + * lock(&pga) before lock(p). + * To avoid deadlock, we have to drop + * our locks and try again. + */ + if(!canlock(&pga)){ + unlock(p); + if(up) + sched(); + lock(p); + goto retry; + } + + pa = &pga.pgsza[p->pgszi]; + /* No freelist cache when memory is very low */ + if(pa->freecount < Nminfree){ + unlock(&pga); + uncachepage(p); + return 1; + } + + color = p->color; + for(np = pa->head; np; np = np->next) + if(np->color == color) + break; + + /* No page of the correct color */ + if(np == 0){ + unlock(&pga); + uncachepage(p); + return 1; + } + + pageunchain(np); + pagechaintail(np); + /* + * XXX - here's a bug? - np is on the freelist but it's not really free. + * when we unlock palloc someone else can come in, decide to + * use np, and then try to lock it. they succeed after we've + * run copypage and cachepage and unlock(np). then what? + * they call pageunchain before locking(np), so it's removed + * from the freelist, but still in the cache because of + * cachepage below. if someone else looks in the cache + * before they remove it, the page will have a nonzero ref + * once they finally lock(np). + * + * What I know is that not doing the pagechaintail, but + * doing it at the end, to prevent the race, leads to a + * deadlock, even following the pga, pg lock ordering. -nemo + */ + lock(np); + unlock(&pga); + + /* Cache the new version */ + uncachepage(np); + np->va = p->va; + np->daddr = p->daddr; + copypage(p, np); + cachepage(np, p->image); + unlock(np); + uncachepage(p); + + return 0; +} + +void +copypage(Page *f, Page *t) +{ + KMap *ks, *kd; + + if(f->pgszi != t->pgszi || t->pgszi < 0) + panic("copypage"); + ks = kmap(f); + kd = kmap(t); + memmove((void*)VA(kd), (void*)VA(ks), m->pgsz[t->pgszi]); + kunmap(ks); + kunmap(kd); +} + +void +uncachepage(Page *p) /* Always called with a locked page */ +{ + Page **l, *f; + + if(p->image == 0) + return; + + lock(&pga.hashlock); + l = &pghash(p->daddr); + for(f = *l; f; f = f->hash){ + if(f == p){ + *l = p->hash; + break; + } + l = &f->hash; + } + unlock(&pga.hashlock); + putimage(p->image); + p->image = 0; + p->daddr = 0; +} + +void +cachepage(Page *p, Image *i) +{ + Page **l; + + /* If this ever happens it should be fixed by calling + * uncachepage instead of panic. I think there is a race + * with pio in which this can happen. Calling uncachepage is + * correct - I just wanted to see if we got here. + */ + if(p->image) + panic("cachepage"); + + incref(i); + lock(&pga.hashlock); + p->image = i; + l = &pghash(p->daddr); + p->hash = *l; + *l = p; + unlock(&pga.hashlock); +} + +void +cachedel(Image *i, ulong daddr) +{ + Page *f, **l; + + lock(&pga.hashlock); + l = &pghash(daddr); + for(f = *l; f; f = f->hash){ + if(f->image == i && f->daddr == daddr){ + lock(f); + if(f->image == i && f->daddr == daddr){ + *l = f->hash; + putimage(f->image); + f->image = nil; + f->daddr = 0; + } + unlock(f); + break; + } + l = &f->hash; + } + unlock(&pga.hashlock); +} + +Page * +lookpage(Image *i, ulong daddr) +{ + Page *f; + + lock(&pga.hashlock); + for(f = pghash(daddr); f; f = f->hash){ + if(f->image == i && f->daddr == daddr){ + unlock(&pga.hashlock); + + lock(&pga); + lock(f); + if(f->image != i || f->daddr != daddr){ + unlock(f); + unlock(&pga); + return 0; + } + if(++f->ref == 1) + pageunchain(f); + unlock(&pga); + unlock(f); + + return f; + } + } + unlock(&pga.hashlock); + + return nil; +} + +/* + * Called from imagereclaim, to try to release Images. + * The argument shows the preferred image to release pages from. + * All images will be tried, from lru to mru. + */ +uvlong +pagereclaim(Image *i) +{ + Page *p; + uvlong ticks; + + lock(&pga); + ticks = fastticks(nil); + + /* + * All the pages with images backing them are at the + * end of the list (see putpage) so start there and work + * backward. + */ + for(p = pga.pgsza[0].tail; p && p->image == i; p = p->prev){ + if(p->ref == 0 && canlock(p)){ + if(p->ref == 0) { + uncachepage(p); + } + unlock(p); + } + } + ticks = fastticks(nil) - ticks; + unlock(&pga); + + return ticks; +} + +Pte* +ptecpy(Segment *s, Pte *old) +{ + Pte *new; + Page **src, **dst; + + new = ptealloc(s); + dst = &new->pages[old->first-old->pages]; + new->first = dst; + for(src = old->first; src <= old->last; src++, dst++) + if(*src){ + if(onswap(*src)) + panic("ptecpy: no swap"); + else{ + lock(*src); + (*src)->ref++; + unlock(*src); + } + new->last = dst; + *dst = *src; + } + + return new; +} + +Pte* +ptealloc(Segment *s) +{ + Pte *new; + + new = smalloc(sizeof(Pte) + sizeof(Page*)*s->ptepertab); + new->first = &new->pages[s->ptepertab]; + new->last = new->pages; + return new; +} + +void +freepte(Segment *s, Pte *p) +{ + int ref; + void (*fn)(Page*); + Page *pt, **pg, **ptop; + + switch(s->type&SG_TYPE) { + case SG_PHYSICAL: + fn = s->pseg->pgfree; + ptop = &p->pages[s->ptepertab]; + if(fn) { + for(pg = p->pages; pg < ptop; pg++) { + if(*pg == 0) + continue; + (*fn)(*pg); + *pg = 0; + } + break; + } + for(pg = p->pages; pg < ptop; pg++) { + pt = *pg; + if(pt == 0) + continue; + lock(pt); + ref = --pt->ref; + unlock(pt); + if(ref == 0) + free(pt); + } + break; + default: + for(pg = p->first; pg <= p->last; pg++) + if(*pg) { + putpage(*pg); + *pg = 0; + } + } + free(p); +} diff -Nru 0/sys/src/nix/port/pager.c 4/sys/src/nix/port/pager.c --- 0/sys/src/nix/port/pager.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/pager.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,335 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +/* + * There's no pager process here. + * One process waiting for memory becomes the pager, + * during the call to kickpager() + */ + +enum +{ + Minpages = 2 +}; + +static QLock pagerlck; +static struct +{ + ulong ntext; + ulong nbig; + ulong nall; +} pstats; + +void +swapinit(void) +{ +} + +void +putswap(Page*) +{ + panic("putswap"); +} + +void +dupswap(Page*) +{ + panic("dupswap"); +} + +int +swapcount(ulong daddr) +{ + USED(daddr); + return 0; +} + +static int +canflush(Proc *p, Segment *s) +{ + int i, x; + + lock(s); + if(s->ref == 1) { /* Easy if we are the only user */ + s->ref++; + unlock(s); + return canpage(p); + } + s->ref++; + unlock(s); + + /* Now we must do hardwork to ensure all processes which have tlb + * entries for this segment will be flushed if we succeed in paging it out + */ + for(x = 0; (p = psincref(x)) != nil; x++){ + if(p->state != Dead) { + for(i = 0; i < NSEG; i++){ + if(p->seg[i] == s && !canpage(p)){ + psdecref(p); + return 0; + } + } + } + psdecref(p); + } + return 1; +} + +static int +pageout(Proc *p, Segment *s) +{ + int i, size, n; + Pte *l; + Page **pg, *entry; + + if((s->type&SG_TYPE) != SG_TEXT) + panic("pageout"); + + if(!canqlock(&s->lk)) /* We cannot afford to wait, we will surely deadlock */ + return 0; + + if(s->steal){ /* Protected by /dev/proc */ + qunlock(&s->lk); + return 0; + } + + if(!canflush(p, s)){ /* Able to invalidate all tlbs with references */ + qunlock(&s->lk); + putseg(s); + return 0; + } + + if(waserror()){ + qunlock(&s->lk); + putseg(s); + return 0; + } + + /* Pass through the pte tables looking for text memory pages to put */ + n = 0; + size = s->mapsize; + for(i = 0; i < size; i++){ + l = s->map[i]; + if(l == 0) + continue; + for(pg = l->first; pg < l->last; pg++){ + entry = *pg; + if(pagedout(entry)) + continue; + n++; + if(entry->modref & PG_REF){ + entry->modref &= ~PG_REF; + continue; + } + putpage(*pg); + *pg = nil; + } + } + poperror(); + qunlock(&s->lk); + putseg(s); + return n; +} + +static void +pageouttext(int pgszi, int color) +{ + + Proc *p; + Pgsza *pa; + int i, n, np, x; + Segment *s; + int prepaged; + + USED(color); + pa = &pga.pgsza[pgszi]; + n = x = 0; + prepaged = 0; + + /* + * Try first to steal text pages from non-prepaged processes, + * then from anyone. + */ +Again: + do{ + if((p = psincref(x)) == nil) + break; + np = 0; + if(p->prepagemem == 0 || prepaged != 0) + if(p->state != Dead && p->noswap == 0 && canqlock(&p->seglock)){ + for(i = 0; i < NSEG; i++){ + if((s = p->seg[i]) == nil) + continue; + if((s->type&SG_TYPE) == SG_TEXT) + np = pageout(p, s); + } + qunlock(&p->seglock); + } + /* + * else process dead or locked or changing its segments + */ + psdecref(p); + n += np; + if(np > 0) + DBG("pager: %d from proc #%d %#p\n", np, x, p); + x++; + }while(pa->freecount < Minpages); + + if(pa->freecount < Minpages && prepaged++ == 0) + goto Again; +} + +static void +freepages(int si, int once) +{ + Pgsza *pa; + Page *p; + + for(; si < m->npgsz; si++){ + pa = &pga.pgsza[si]; + if(pa->freecount > 0){ + DBG("kickpager() up %#p: releasing %udK pages\n", + up, m->pgsz[si]/KiB); + lock(&pga); + if(pa->freecount == 0){ + unlock(&pga); + continue; + } + p = pa->head; + pageunchain(p); + unlock(&pga); + if(p->ref != 0) + panic("freepages pa %#ullx", p->pa); + pgfree(p); + if(once) + break; + } + } +} + +static int +tryalloc(int pgszi, int color) +{ + Page *p; + + p = pgalloc(m->pgsz[pgszi], color); + if(p != nil){ + lock(&pga); + pagechainhead(p); + unlock(&pga); + return 0; + } + return -1; +} + +static int +hascolor(Page *pl, int color) +{ + Page *p; + + lock(&pga); + for(p = pl; p != nil; p = p->next) + if(color == NOCOLOR || p->color == color){ + unlock(&pga); + return 1; + } + unlock(&pga); + return 0; +} + +/* + * Someone couldn't find pages of the given size index and color. + * (color may be NOCOLOR if the caller is trying to get any page + * and is desperate). + * Many processes may be calling this at the same time, + * The first one operates as a pager and does what it can. + */ +void +kickpager(int pgszi, int color) +{ + Pgsza *pa; + + if(DBGFLG>1) + DBG("kickpager() %#p\n", up); + if(waserror()) + panic("error in kickpager"); + qlock(&pagerlck); + pa = &pga.pgsza[pgszi]; + + /* + * 1. did anyone else release one for us in the mean time? + */ + if(hascolor(pa->head, color)) + goto Done; + + /* + * 2. try allocating from physical memory + */ + tryalloc(pgszi, color); + if(hascolor(pa->head, color)) + goto Done; + + /* + * If pgszi is <= text page size, try releasing text pages. + */ + if(m->pgsz[pgszi] <= 2*MiB){ + pstats.ntext++; + DBG("kickpager() up %#p: reclaiming text pages\n", up); + pageouttext(pgszi, color); + tryalloc(pgszi, color); + if(hascolor(pa->head, color)){ + DBG("kickpager() found %ud free\n", pa->freecount); + goto Done; + } + } + + /* + * Try releasing memory from bigger pages. + */ + pstats.nbig++; + freepages(pgszi+1, 1); + tryalloc(pgszi, color); + if(hascolor(pa->head, color)){ + DBG("kickpager() found %ud free\n", pa->freecount); + goto Done; + } + + /* + * Really the last resort. Try releasing memory from all pages. + */ + pstats.nall++; + DBG("kickpager() up %#p: releasing all pages\n", up); + freepages(0, 0); + tryalloc(pgszi, color); + if(pa->freecount > 0){ + DBG("kickpager() found %ud free\n", pa->freecount); + goto Done; + } + + /* + * What else can we do? + * But don't panic if we are still trying to get memory of + * a particular color and there's none. We'll retry asking + * for any color. + */ + if(color == NOCOLOR) + panic("kickpager(): no physical memory"); + +Done: + poperror(); + qunlock(&pagerlck); + if(DBGFLG>1) + DBG("kickpager() done %#p\n", up); +} + +void +pagersummary(void) +{ + print("ntext %uld nbig %uld nall %uld\n", + pstats.ntext, pstats.nbig, pstats.nall); + print("no swap\n"); +} diff -Nru 0/sys/src/nix/port/parse.c 4/sys/src/nix/port/parse.c --- 0/sys/src/nix/port/parse.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/parse.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,114 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +/* + * Generous estimate of number of fields, including terminal nil pointer + */ +static int +ncmdfield(char *p, int n) +{ + int white, nwhite; + char *ep; + int nf; + + if(p == nil) + return 1; + + nf = 0; + ep = p+n; + white = 1; /* first text will start field */ + while(p < ep){ + nwhite = (strchr(" \t\r\n", *p++ & 0xFF) != 0); /* UTF is irrelevant */ + if(white && !nwhite) /* beginning of field */ + nf++; + white = nwhite; + } + return nf+1; /* +1 for nil */ +} + +/* + * parse a command written to a device + */ +Cmdbuf* +parsecmd(char *p, int n) +{ + Cmdbuf *volatile cb; + int nf; + char *sp; + + nf = ncmdfield(p, n); + + /* allocate Cmdbuf plus string pointers plus copy of string including \0 */ + sp = smalloc(sizeof(*cb) + nf * sizeof(char*) + n + 1); + cb = (Cmdbuf*)sp; + cb->f = (char**)(&cb[1]); + cb->buf = (char*)(&cb->f[nf]); + + if(up!=nil && waserror()){ + free(cb); + nexterror(); + } + memmove(cb->buf, p, n); + if(up != nil) + poperror(); + + /* dump new line and null terminate */ + if(n > 0 && cb->buf[n-1] == '\n') + n--; + cb->buf[n] = '\0'; + + cb->nf = tokenize(cb->buf, cb->f, nf-1); + cb->f[cb->nf] = nil; + + return cb; +} + +/* + * Reconstruct original message, for error diagnostic + */ +void +cmderror(Cmdbuf *cb, char *s) +{ + int i; + char *p, *e; + + p = up->genbuf; + e = p+ERRMAX-10; + p = seprint(p, e, "%s \"", s); + for(i=0; inf; i++){ + if(i > 0) + p = seprint(p, e, " "); + p = seprint(p, e, "%q", cb->f[i]); + } + strcpy(p, "\""); + error(up->genbuf); +} + +/* + * Look up entry in table + */ +Cmdtab* +lookupcmd(Cmdbuf *cb, Cmdtab *ctab, int nctab) +{ + int i; + Cmdtab *ct; + + if(cb->nf == 0) + error("empty control message"); + + for(ct = ctab, i=0; icmd, "*") !=0) /* wildcard always matches */ + if(strcmp(ct->cmd, cb->f[0]) != 0) + continue; + if(ct->narg != 0 && ct->narg != cb->nf) + cmderror(cb, Ecmdargs); + return ct; + } + + cmderror(cb, "unknown control message"); + return nil; +} diff -Nru 0/sys/src/nix/port/pgrp.c 4/sys/src/nix/port/pgrp.c --- 0/sys/src/nix/port/pgrp.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/pgrp.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,311 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +static Ref pgrpid; +static Ref mountid; + +void +pgrpnote(ulong noteid, char *a, long n, int flag) +{ + int i; + Proc *p; + char buf[ERRMAX]; + + if(n >= ERRMAX-1) + error(Etoobig); + + memmove(buf, a, n); + buf[n] = 0; + for(i = 0; (p = psincref(i)) != nil; i++){ + if(p == up || p->state == Dead || p->noteid != noteid || p->kp){ + psdecref(p); + continue; + } + qlock(&p->debug); + if(p->pid == 0 || p->noteid != noteid){ + qunlock(&p->debug); + psdecref(p); + continue; + } + if(!waserror()) { + postnote(p, 0, buf, flag); + poperror(); + } + qunlock(&p->debug); + psdecref(p); + } +} + +Pgrp* +newpgrp(void) +{ + Pgrp *p; + + p = smalloc(sizeof(Pgrp)); + p->ref = 1; + p->pgrpid = incref(&pgrpid); + return p; +} + +Rgrp* +newrgrp(void) +{ + Rgrp *r; + + r = smalloc(sizeof(Rgrp)); + r->ref = 1; + return r; +} + +void +closergrp(Rgrp *r) +{ + if(decref(r) == 0) + free(r); +} + +void +closepgrp(Pgrp *p) +{ + Mhead **h, **e, *f, *next; + + if(decref(p) != 0) + return; + + qlock(&p->debug); + wlock(&p->ns); + p->pgrpid = -1; + + e = &p->mnthash[MNTHASH]; + for(h = p->mnthash; h < e; h++) { + for(f = *h; f; f = next) { + wlock(&f->lock); + cclose(f->from); + mountfree(f->mount); + f->mount = nil; + next = f->hash; + wunlock(&f->lock); + putmhead(f); + } + } + wunlock(&p->ns); + qunlock(&p->debug); + free(p); +} + +void +pgrpinsert(Mount **order, Mount *mount) +{ + Mount *f; + + mount->order = 0; + if(*order == 0) { + *order = mount; + return; + } + for(f = *order; f; f = f->order) { + if(mount->mountid < f->mountid) { + mount->order = f; + *order = mount; + return; + } + order = &f->order; + } + *order = mount; +} + +/* + * pgrpcpy MUST preserve the mountid allocation order of the parent group + */ +void +pgrpcpy(Pgrp *to, Pgrp *from) +{ + int i; + Mount *n, *mount, **link, *order; + Mhead *f, **tom, **l, *mh; + + wlock(&from->ns); + order = 0; + tom = to->mnthash; + for(i = 0; i < MNTHASH; i++) { + l = tom++; + for(f = from->mnthash[i]; f; f = f->hash) { + rlock(&f->lock); + mh = newmhead(f->from); + *l = mh; + l = &mh->hash; + link = &mh->mount; + for(mount = f->mount; mount != nil; mount = mount->next) { + n = newmount(mh, mount->to, mount->mflag, mount->spec); + mount->copy = n; + pgrpinsert(&order, mount); + *link = n; + link = &n->next; + } + runlock(&f->lock); + } + } + /* + * Allocate mount ids in the same sequence as the parent group + */ + lock(&mountid); + for(mount = order; mount != nil; mount = mount->order) + mount->copy->mountid = mountid.ref++; + unlock(&mountid); + wunlock(&from->ns); +} + +Fgrp* +dupfgrp(Fgrp *f) +{ + Fgrp *new; + Chan *c; + int i; + + new = smalloc(sizeof(Fgrp)); + if(f == nil){ + new->fd = smalloc(DELTAFD*sizeof(Chan*)); + new->nfd = DELTAFD; + new->ref = 1; + return new; + } + + lock(f); + /* Make new fd list shorter if possible, preserving quantization */ + new->nfd = f->maxfd+1; + i = new->nfd%DELTAFD; + if(i != 0) + new->nfd += DELTAFD - i; + new->fd = malloc(new->nfd*sizeof(Chan*)); + if(new->fd == nil){ + unlock(f); + free(new); + error("no memory for fgrp"); + } + new->ref = 1; + + new->maxfd = f->maxfd; + for(i = 0; i <= f->maxfd; i++) { + if(c = f->fd[i]){ + incref(c); + new->fd[i] = c; + } + } + unlock(f); + + return new; +} + +void +closefgrp(Fgrp *f) +{ + int i; + Chan *c; + + if(f == 0) + return; + + if(decref(f) != 0) + return; + + /* + * If we get into trouble, forceclosefgrp + * will bail us out. + */ + up->closingfgrp = f; + for(i = 0; i <= f->maxfd; i++){ + if(c = f->fd[i]){ + f->fd[i] = nil; + cclose(c); + } + } + up->closingfgrp = nil; + + free(f->fd); + free(f); +} + +/* + * Called from sleep because up is in the middle + * of closefgrp and just got a kill ctl message. + * This usually means that up has wedged because + * of some kind of deadly embrace with mntclose + * trying to talk to itself. To break free, hand the + * unclosed channels to the close queue. Once they + * are finished, the blocked cclose that we've + * interrupted will finish by itself. + */ +void +forceclosefgrp(void) +{ + int i; + Chan *c; + Fgrp *f; + + if(up->procctl != Proc_exitme || up->closingfgrp == nil){ + print("bad forceclosefgrp call"); + return; + } + + f = up->closingfgrp; + for(i = 0; i <= f->maxfd; i++){ + if(c = f->fd[i]){ + f->fd[i] = nil; + ccloseq(c); + } + } +} + +Mount* +newmount(Mhead *mh, Chan *to, int flag, char *spec) +{ + Mount *mount; + + mount = smalloc(sizeof(Mount)); + mount->to = to; + mount->head = mh; + incref(to); + mount->mountid = incref(&mountid); + mount->mflag = flag; + if(spec != 0) + kstrdup(&mount->spec, spec); + + return mount; +} + +void +mountfree(Mount *mount) +{ + Mount *f; + + while(mount != nil) { + f = mount->next; + cclose(mount->to); + mount->mountid = 0; + free(mount->spec); + free(mount); + mount = f; + } +} + +void +resrcwait(char *reason) +{ + char *p; + + if(up == nil) + panic("resrcwait"); + + p = up->psstate; + if(reason) { + up->psstate = reason; + print("%s\n", reason); + } + + tsleep(&up->sleep, return0, 0, 300); + up->psstate = p; +} diff -Nru 0/sys/src/nix/port/pmc.h 4/sys/src/nix/port/pmc.h --- 0/sys/src/nix/port/pmc.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/pmc.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,24 @@ + +enum{ + PmcCtlNullval = 0xdead, +}; + +typedef struct PmcCtlCtrId PmcCtlCtrId; + + +struct PmcCtlCtrId { + char portdesc[KNAMELEN]; + char archdesc[KNAMELEN]; +}; + +int pmcnregs(void); +int pmcsetctl(u32int coreno, PmcCtl *p, u32int regno); +int pmctrans(PmcCtl *p); +int pmcgetctl(u32int coreno, PmcCtl *p, u32int regno); +int pmcdescstr(char *str, int nstr); +int pmcctlstr(char *str, int nstr, PmcCtl *p); +u64int pmcgetctr(u32int coreno, u32int regno); +int pmcsetctr(u32int coreno, u64int v, u32int regno); + +void pmcupdate(Mach *m); +extern void (*_pmcupdate)(Mach *m); diff -Nru 0/sys/src/nix/port/portclock.c 4/sys/src/nix/port/portclock.c --- 0/sys/src/nix/port/portclock.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/portclock.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,292 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "ureg.h" + +struct Timers +{ + Lock; + Timer *head; +}; + +static Timers timers[MACHMAX]; + +ulong intrcount[MACHMAX]; +ulong fcallcount[MACHMAX]; + +static vlong +tadd(Timers *tt, Timer *nt) +{ + vlong when; + Timer *t, **last; + + /* Called with tt locked */ + assert(nt->tt == nil); + switch(nt->tmode){ + default: + panic("timer"); + break; + case Trelative: + if(nt->tns <= 0) + nt->tns = 1; + nt->twhen = fastticks(nil) + ns2fastticks(nt->tns); + break; + case Tperiodic: + /* + * Periodic timers must have a period of at least 100µs. + */ + assert(nt->tns >= 100000); + if(nt->twhen == 0){ + /* + * Look for another timer at the + * same frequency for combining. + */ + for(t = tt->head; t; t = t->tnext){ + if(t->tmode == Tperiodic && t->tns == nt->tns) + break; + } + if(t) + nt->twhen = t->twhen; + else + nt->twhen = fastticks(nil); + } + + /* + * The new time must be in the future. + * ns2fastticks() can return 0 if the tod clock + * has been adjusted by, e.g. timesync. + */ + when = ns2fastticks(nt->tns); + if(when == 0) + when = 1; + nt->twhen += when; + break; + } + + for(last = &tt->head; t = *last; last = &t->tnext){ + if(t->twhen > nt->twhen) + break; + } + nt->tnext = *last; + *last = nt; + nt->tt = tt; + if(last == &tt->head) + return nt->twhen; + return 0; +} + +static vlong +tdel(Timer *dt) +{ + Timer *t, **last; + Timers *tt; + + tt = dt->tt; + if(tt == nil) + return 0; + for(last = &tt->head; t = *last; last = &t->tnext){ + if(t == dt){ + assert(dt->tt); + dt->tt = nil; + *last = t->tnext; + break; + } + } + if(last == &tt->head && tt->head) + return tt->head->twhen; + return 0; +} + +/* add or modify a timer */ +void +timeradd(Timer *nt) +{ + Timers *tt; + vlong when; + + /* Must lock Timer struct before Timers struct */ + ilock(nt); + if(tt = nt->tt){ + ilock(tt); + tdel(nt); + iunlock(tt); + } + tt = &timers[m->machno]; + ilock(tt); + when = tadd(tt, nt); + if(when) + timerset(when); + iunlock(tt); + iunlock(nt); +} + + +void +timerdel(Timer *dt) +{ + Timers *tt; + vlong when; + + ilock(dt); + if(tt = dt->tt){ + ilock(tt); + when = tdel(dt); + if(when && tt == &timers[m->machno]) + timerset(tt->head->twhen); + iunlock(tt); + } + iunlock(dt); +} + +void +hzclock(Ureg *ur) +{ + uintptr pc; + + m->ticks++; + if(m->machno == 0) + sys->ticks = m->ticks; + + pc = userpc(ur); + if(m->proc) + m->proc->pc = pc; + + if(m->mmuflush){ + if(up) + mmuflush(); + m->mmuflush = 0; + } + + accounttime(); + kmapinval(); + + if(kproftimer != nil) + kproftimer(pc); + + if(m->nixrole == NIXUC) + return; + + if(active.exiting) { + iprint("someone's exiting\n"); + exit(0); + } + + checkalarms(); + + if(up && up->state == Running) + hzsched(); /* in proc.c */ +} + +void +timerintr(Ureg *u, vlong) +{ + Timer *t; + Timers *tt; + vlong when, now; + int callhzclock; + + intrcount[m->machno]++; + callhzclock = 0; + tt = &timers[m->machno]; + now = fastticks(nil); + ilock(tt); + while(t = tt->head){ + /* + * No need to ilock t here: any manipulation of t + * requires tdel(t) and this must be done with a + * lock to tt held. We have tt, so the tdel will + * wait until we're done + */ + when = t->twhen; + if(when > now){ + timerset(when); + iunlock(tt); + if(callhzclock) + hzclock(u); + return; + } + tt->head = t->tnext; + assert(t->tt == tt); + t->tt = nil; + fcallcount[m->machno]++; + iunlock(tt); + if(t->tf) + (*t->tf)(u, t); + else + callhzclock++; + ilock(tt); + if(t->tmode == Tperiodic) + tadd(tt, t); + } + iunlock(tt); +} + +void +timersinit(void) +{ + Timer *t; + + /* + * T->tf == nil means the HZ clock for this processor. + */ + todinit(); + t = malloc(sizeof(*t)); + t->tmode = Tperiodic; + t->tt = nil; + t->tns = 1000000000/HZ; + t->tf = nil; + timeradd(t); +} + +Timer* +addclock0link(void (*f)(void), int ms) +{ + Timer *nt; + vlong when; + + /* Synchronize to hztimer if ms is 0 */ + nt = malloc(sizeof(Timer)); + if(ms == 0) + ms = 1000/HZ; + nt->tns = (vlong)ms*1000000LL; + nt->tmode = Tperiodic; + nt->tt = nil; + nt->tf = (void (*)(Ureg*, Timer*))f; + + ilock(&timers[0]); + when = tadd(&timers[0], nt); + if(when) + timerset(when); + iunlock(&timers[0]); + return nt; +} + +/* + * This tk2ms avoids overflows that the macro version is prone to. + * It is a LOT slower so shouldn't be used if you're just converting + * a delta. + */ +ulong +tk2ms(ulong ticks) +{ + uvlong t, hz; + + t = ticks; + hz = HZ; + t *= 1000L; + t = t/hz; + ticks = t; + return ticks; +} + +ulong +ms2tk(ulong ms) +{ + /* avoid overflows at the cost of precision */ + if(ms >= 1000000000/HZ) + return (ms/1000)*HZ; + return (ms*HZ+500)/1000; +} diff -Nru 0/sys/src/nix/port/portdat.h 4/sys/src/nix/port/portdat.h --- 0/sys/src/nix/port/portdat.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/portdat.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1138 @@ +typedef struct Alarms Alarms; +typedef struct Block Block; +typedef struct Chan Chan; +typedef struct Cmdbuf Cmdbuf; +typedef struct Cmdtab Cmdtab; +typedef struct Confmem Confmem; +typedef struct Dev Dev; +typedef struct DevConf DevConf; +typedef struct Dirtab Dirtab; +typedef struct Edf Edf; +typedef struct Egrp Egrp; +typedef struct Evalue Evalue; +typedef struct Fgrp Fgrp; +typedef struct Image Image; +typedef struct Kzio Kzio; +typedef struct Log Log; +typedef struct Logflag Logflag; +typedef struct Lockstats Lockstats; +typedef struct Mhead Mhead; +typedef struct Mnt Mnt; +typedef struct Mntcache Mntcache; +typedef struct Mntrpc Mntrpc; +typedef struct Mntwalk Mntwalk; +typedef struct Mount Mount; +typedef struct Note Note; +typedef struct Page Page; +typedef struct Pallocmem Pallocmem; +typedef struct Path Path; +typedef struct Perf Perf; +typedef struct Pgalloc Pgalloc; +typedef struct Pgrp Pgrp; +typedef struct Pgsza Pgsza; +typedef struct Physseg Physseg; +typedef struct PhysUart PhysUart; +typedef struct Proc Proc; +typedef struct Procalloc Procalloc; +typedef struct Pte Pte; +typedef struct QLock QLock; +typedef struct QLockstats QLockstats; +typedef struct Queue Queue; +typedef struct Ref Ref; +typedef struct Rendez Rendez; +typedef struct Rgrp Rgrp; +typedef struct RWlock RWlock; +typedef struct Sched Sched; +typedef struct Schedq Schedq; +typedef struct Segment Segment; +typedef struct Ksem Ksem; +typedef struct Sema Sema; +typedef struct Ksems Ksems; +typedef struct Timer Timer; +typedef struct Timers Timers; +typedef struct Uart Uart; +typedef struct Waitq Waitq; +typedef struct Waitstats Waitstats; +typedef struct Walkqid Walkqid; +typedef struct Watchdog Watchdog; +typedef struct Zseg Zseg; +typedef int Devgen(Chan*, char*, Dirtab*, int, int, Dir*); + +#pragma incomplete DevConf +#pragma incomplete Edf +#pragma incomplete Mntcache +#pragma incomplete Mntrpc +#pragma incomplete Queue +#pragma incomplete Timers + +#include + +struct Ref +{ + Lock; + int ref; +}; + +struct Rendez +{ + Lock; + Proc *p; +}; + +enum{ + NWstats = 500, + WSlock = 0, + WSqlock, + WSslock, +}; + +/* + * different arrays with stat info, so we can memset any of them + * to 0 to clear stats. + */ +struct Waitstats +{ + int on; + int npcs; + int* type; + uintptr* pcs; + int* ns; + uvlong* wait; + uvlong* total; +}; +extern Waitstats waitstats; + +struct Lockstats +{ + uvlong locks; + uvlong glare; + uvlong inglare; +}; +extern Lockstats lockstats; + +struct QLockstats +{ + uvlong rlock; + uvlong rlockq; + uvlong wlock; + uvlong wlockq; + uvlong qlock; + uvlong qlockq; +}; +extern QLockstats qlockstats; + +struct QLock +{ + Lock use; /* to access Qlock structure */ + Proc *head; /* next process waiting for object */ + Proc *tail; /* last process waiting for object */ + int locked; /* flag */ + uintptr pc; +}; + +struct RWlock +{ + Lock use; + Proc *head; /* list of waiting processes */ + Proc *tail; + uintptr wpc; /* pc of writer */ + Proc *wproc; /* writing proc */ + int readers; /* number of readers */ + int writer; /* number of writers */ +}; + +struct Alarms +{ + QLock; + Proc *head; +}; + +/* + * Access types in namec & channel flags + */ +enum +{ + Aaccess, /* as in stat, wstat */ + Abind, /* for left-hand-side of bind */ + Atodir, /* as in chdir */ + Aopen, /* for i/o */ + Amount, /* to be mounted or mounted upon */ + Acreate, /* is to be created */ + Aremove, /* will be removed by caller */ + + COPEN = 0x0001, /* for i/o */ + CMSG = 0x0002, /* the message channel for a mount */ +/*rsc CCREATE = 0x0004, /* permits creation if c->mnt */ + CCEXEC = 0x0008, /* close on exec */ + CFREE = 0x0010, /* not in use */ + CRCLOSE = 0x0020, /* remove on close */ + CCACHE = 0x0080, /* client cache */ +}; + +/* flag values */ +enum +{ + BINTR = (1<<0), + + Bipck = (1<<2), /* ip checksum */ + Budpck = (1<<3), /* udp checksum */ + Btcpck = (1<<4), /* tcp checksum */ + Bpktck = (1<<5), /* packet checksum */ +}; + +struct Block +{ + Block* next; + Block* list; + uchar* rp; /* first unconsumed byte */ + uchar* wp; /* first empty byte */ + uchar* lim; /* 1 past the end of the buffer */ + uchar* base; /* start of the buffer */ + void (*free)(Block*); + ushort flag; + ushort checksum; /* IP checksum of complete packet (minus media header) */ +}; +#define BLEN(s) ((s)->wp - (s)->rp) +#define BALLOC(s) ((s)->lim - (s)->base) + +struct Chan +{ + Ref; /* the Lock in this Ref is also Chan's lock */ + Chan* next; /* allocation */ + Chan* link; + vlong offset; /* in fd */ + vlong devoffset; /* in underlying device; see read */ + Dev* dev; + uint devno; + ushort mode; /* read/write */ + ushort flag; + Qid qid; + int fid; /* for devmnt */ + uint iounit; /* chunk size for i/o; 0==default */ + Mhead* umh; /* mount point that derived Chan; used in unionread */ + Chan* umc; /* channel in union; held for union read */ + QLock umqlock; /* serialize unionreads */ + int uri; /* union read index */ + int dri; /* devdirread index */ + uchar* dirrock; /* directory entry rock for translations */ + int nrock; + int mrock; + QLock rockqlock; + int ismtpt; + Mntcache*mc; /* Mount cache pointer */ + Mnt* mux; /* Mnt for clients using me for messages */ + union { + void* aux; + Qid pgrpid; /* for #p/notepg */ + }; + Chan* mchan; /* channel to mounted server */ + Qid mqid; /* qid of root of mount point */ + Path* path; +}; + +struct Path +{ + Ref; + char* s; + Chan** mtpt; /* mtpt history */ + int len; /* strlen(s) */ + int alen; /* allocated length of s */ + int mlen; /* number of path elements */ + int malen; /* allocated length of mtpt */ +}; + +struct Dev +{ + int dc; + char* name; + + void (*reset)(void); + void (*init)(void); + void (*shutdown)(void); + Chan* (*attach)(char*); + Walkqid*(*walk)(Chan*, Chan*, char**, int); + long (*stat)(Chan*, uchar*, long); + Chan* (*open)(Chan*, int); + void (*create)(Chan*, char*, int, int); + void (*close)(Chan*); + long (*read)(Chan*, void*, long, vlong); + Block* (*bread)(Chan*, long, vlong); + long (*write)(Chan*, void*, long, vlong); + long (*bwrite)(Chan*, Block*, vlong); + void (*remove)(Chan*); + long (*wstat)(Chan*, uchar*, long); + void (*power)(int); /* power mgt: power(1) => on, power (0) => off */ + int (*config)(int, char*, DevConf*); /* returns 0 on error */ + int (*zread)(Chan*, Kzio*, int, usize, vlong); + int (*zwrite)(Chan*, Kzio*, int, vlong); +}; + +struct Dirtab +{ + char name[KNAMELEN]; + Qid qid; + vlong length; + long perm; +}; + +struct Walkqid +{ + Chan *clone; + int nqid; + Qid qid[1]; +}; + +struct Mntwalk /* state for /proc/#/ns */ +{ + int cddone; + Mhead* mh; + Mount* cm; +}; + +struct Mount +{ + int mountid; + Mount* next; + Mhead* head; + Mount* copy; + Mount* order; + Chan* to; /* channel replacing channel */ + int mflag; + char *spec; +}; + +struct Mhead +{ + Ref; + RWlock lock; + Chan* from; /* channel mounted upon */ + Mount* mount; /* what's mounted upon it */ + Mhead* hash; /* Hash chain */ +}; + +struct Mnt +{ + Lock; + /* references are counted using c->ref; channels on this mount point incref(c->mchan) == Mnt.c */ + Chan *c; /* Channel to file service */ + Proc *rip; /* Reader in progress */ + Mntrpc *queue; /* Queue of pending requests on this channel */ + uint id; /* Multiplexer id for channel check */ + Mnt *list; /* Free list */ + int flags; /* cache */ + int msize; /* data + IOHDRSZ */ + char *version; /* 9P version */ + Queue *q; /* input queue */ +}; + +enum +{ + NUser, /* note provided externally */ + NExit, /* deliver note quietly */ + NDebug, /* print debug message */ +}; + +struct Note +{ + char msg[ERRMAX]; + int flag; /* whether system posted it */ +}; + +enum +{ + PG_NOFLUSH = 0, + PG_TXTFLUSH = 1, /* flush dcache and invalidate icache */ + PG_DATFLUSH = 2, /* flush both i & d caches (UNUSED) */ + PG_NEWCOL = 3, /* page has been recolored */ + + PG_MOD = 0x01, /* software modified bit */ + PG_REF = 0x02, /* software referenced bit */ +}; + +struct Page +{ + Lock; + uintmem pa; /* Physical address in memory */ + uintptr va; /* Virtual address for user */ + ulong daddr; /* Disc address on swap */ + int ref; /* Reference count */ + uchar modref; /* Simulated modify/reference bits */ + int color; /* Cache coloring */ + char cachectl[MACHMAX]; /* Cache flushing control for mmuput */ + Image *image; /* Associated text or swap image */ + Page *next; /* Lru free list */ + Page *prev; + Page *hash; /* Image hash chains */ + int pgszi; /* size index in m->pgsz[] */ +}; + +struct Image +{ + Ref; + Chan *c; /* channel to text file */ + Qid qid; /* Qid for page cache coherence */ + Qid mqid; + Chan *mchan; + int dc; /* Device type of owning channel */ +//subtype + Segment *s; /* TEXT segment for image if running */ + Image *hash; /* Qid hash chains */ + Image *next; /* Free list or lru list */ + Image *prev; /* lru list */ + int notext; /* no file associated */ + int color; +}; + +/* + * virtual MMU + */ +#define PTEMAPMEM (1ULL*GiB) +#define SEGMAPSIZE 1984 +#define SSEGMAPSIZE 16 /* XXX: shouldn't be 32 at least? */ + +/* + * Interface between fixfault and mmuput. + */ +enum { + PTEVALID = 1<<0, + PTEWRITE = 1<<1, + PTERONLY = 0<<1, + PTEUNCACHED = 1<<4, +}; + +struct Pte +{ + Page **first; /* First used entry */ + Page **last; /* Last used entry */ + Page *pages[]; /* Page map for this chunk of pte */ +}; + +/* Segment types */ +enum +{ + SG_TYPE = 07, /* Mask type of segment */ + SG_TEXT = 00, + SG_DATA = 01, + SG_BSS = 02, + SG_STACK = 03, + SG_SHARED = 04, + SG_PHYSICAL = 05, + + SG_CACHED = 0020, /* Physseg can be cached */ + SG_RONLY = 0040, /* Segment is read only */ + SG_CEXEC = 0100, /* Detach at exec */ + SG_ZIO = 0200, /* used for zero copy */ + SG_KZIO = 0400, /* kernel zero copy segment */ +}; + +#define PG_ONSWAP 1 +#define onswap(s) (PTR2UINT(s) & PG_ONSWAP) +#define pagedout(s) (PTR2UINT(s) == 0 || onswap(s)) +#define swapaddr(s) (PTR2UINT(s) & ~PG_ONSWAP) + +#define SEGMAXPG (SEGMAPSIZE) + +struct Physseg +{ + uint attr; /* Segment attributes */ + char *name; /* Attach name */ + uintmem pa; /* Physical address */ + usize size; /* Maximum segment size in pages */ + int pgszi; /* Page size index in Mach */ + Page *(*pgalloc)(Segment*, uintptr); /* Allocation if we need it */ + void (*pgfree)(Page*); + uintptr gva; /* optional global virtual address */ +}; + +struct Sema +{ + Rendez; + int* addr; + int waiting; + Sema* next; + Sema* prev; +}; + +enum{ + Semok, + Semdead, +}; + +/* NIX semaphores */ +struct Ksem +{ + Lock; + Sem * sem; /* user-level semaphore */ + int state; + Proc **q; + int nq; + int nowait; + RWlock *semaltlock; /* located in the Segment */ + Ksem *next; /* in list of semaphores for this Segment */ +}; + +/* NIX semaphores */ +struct Ksems +{ + Ksem** s; +}; + +/* Zero copy per-segment information (locked using Segment.lk) */ +struct Zseg +{ + void* map; /* memory map for buffers within this segment */ + uintptr *addr; /* array of addresses released */ + int naddr; /* size allocated for the array */ + int end; /* 1+ last used index in addr */ + Rendez rr; /* process waiting to read free addresses */ +}; + +#define NOCOLOR -1 + +struct Segment +{ + Ref; + QLock lk; + ushort steal; /* Page stealer lock */ + ushort type; /* segment type */ + int pgszi; /* page size index in Mach MMMU */ + uint ptepertab; + int color; + uintptr base; /* virtual base */ + uintptr top; /* virtual top */ + usize size; /* size in pages */ + ulong fstart; /* start address in file for demand load */ + ulong flen; /* length of segment in file */ + int flushme; /* maintain icache for this segment */ + Image *image; /* text in file attached to this segment */ + Physseg *pseg; + ulong* profile; /* Tick profile area */ + Pte **map; + int mapsize; + Pte *ssegmap[SSEGMAPSIZE]; + Lock semalock; + Sema sema; + Ksems sems; + RWlock semaltlock; /* For nix sems, alt */ + Zseg zseg; +}; + +/* + * NIX zero-copy IO structure. + */ +struct Kzio +{ + Zio; + Segment* seg; +}; + +enum +{ + RENDLOG = 5, + RENDHASH = 1<rendhash[(s)&((1<mnthash[(qid).path&((1<npgsz page sizes */ + Page* hash[PGHSIZE]; /* only used for user pages */ + Lock hashlock; + Rendez r; /* sleep for free mem */ + QLock pwait; /* queue of procs waiting for this pgsz */ +}; + +struct Waitq +{ + Waitmsg w; + Waitq *next; +}; + +/* + * fasttick timer interrupts + */ +enum { + /* Mode */ + Trelative, /* timer programmed in ns from now */ + Tperiodic, /* periodic timer, period in ns */ +}; + +struct Timer +{ + /* Public interface */ + int tmode; /* See above */ + vlong tns; /* meaning defined by mode */ + void (*tf)(Ureg*, Timer*); + void *ta; + /* Internal */ + Lock; + Timers *tt; /* Timers queue this timer runs on */ + vlong twhen; /* ns represented in fastticks */ + Timer *tnext; +}; + +enum +{ + RFNAMEG = (1<<0), + RFENVG = (1<<1), + RFFDG = (1<<2), + RFNOTEG = (1<<3), + RFPROC = (1<<4), + RFMEM = (1<<5), + RFNOWAIT = (1<<6), + RFCNAMEG = (1<<10), + RFCENVG = (1<<11), + RFCFDG = (1<<12), + RFREND = (1<<13), + RFNOMNT = (1<<14), + RFPREPAGE = (1<<15), + RFCPREPAGE = (1<<16), + RFCORE = (1<<17), + RFCCORE = (1<<18), +}; + +/* execac */ +enum +{ + EXTC = 0, /* exec on time-sharing */ + EXAC, /* want an AC for the exec'd image */ + EXXC, /* want an XC for the exec'd image */ +}; + +/* + * process memory segments - NSEG always last ! + * HSEG is a potentially huge bss segment. + */ +enum +{ + SSEG, TSEG, DSEG, BSEG, HSEG, ESEG, LSEG, SEG1, SEG2, SEG3, SEG4, NSEG +}; + +enum +{ + Dead = 0, /* Process states */ + Moribund, + Ready, + Scheding, + Running, + Queueing, + QueueingR, + QueueingW, + Wakeme, + Broken, + Stopped, + Rendezvous, + Waitrelease, + Exotic, /* NIX */ + Semdown, + Semalt, + + Proc_stopme = 1, /* devproc requests */ + Proc_exitme, + Proc_traceme, + Proc_exitbig, + Proc_tracesyscall, + Proc_toac, + Proc_totc, + + TUser = 0, /* Proc.time */ + TSys, + TReal, + TCUser, + TCSys, + TCReal, + Ntime, + + NERR = 64, + NNOTE = 5, + + Npriq = 20, /* number of scheduler priority levels */ + Nrq = Npriq+2, /* number of priority levels including real time */ + PriRelease = Npriq, /* released edf processes */ + PriEdf = Npriq+1, /* active edf processes */ + PriNormal = 10, /* base priority for normal processes */ + PriExtra = Npriq-1, /* edf processes at high best-effort pri */ + PriKproc = 13, /* base priority for kernel processes */ + PriRoot = 13, /* base priority for root processes */ +}; + +struct Schedq +{ + Proc* head; + Proc* tail; + int n; +}; + +struct Sched +{ + Lock; /* runq */ + int nrdy; + uint delayedscheds; /* statistics */ + int skipscheds; + int preempts; + int schedgain; + int balancetime; + Schedq runq[Nrq]; + uint runvec; + int nmach; /* # of cores with this color */ + uint nrunhz; /* to compute load */ + int nrunning; +}; + +typedef union Ar0 Ar0; +union Ar0 { + int i; + long l; + uintptr p; + usize u; + void* v; +}; + +typedef struct Nixpctl Nixpctl; +#pragma incomplete Nixpctl + +struct Proc +{ + Label sched; /* known to l.s */ + char *kstack; /* known to l.s */ + void *dbgreg; /* known to l.s User registers for devproc */ + Mach *mach; /* machine running this proc */ + char *text; + char *user; + char *args; + int nargs; /* number of bytes of args */ + Proc *rnext; /* next process in run queue */ + Proc *qnext; /* next process on queue for a QLock */ + QLock *qlock; /* addr of qlock being queued for DEBUG */ + int state; + char *psstate; /* What /proc/#/status reports */ + Segment *seg[NSEG]; + QLock seglock; /* locked whenever seg[] changes */ + int pid; + int index; /* index (slot) in proc array */ + int ref; /* indirect reference */ + int noteid; /* Equivalent of note group */ + Proc *pidhash; /* next proc in pid hash */ + + Lock exl; /* Lock count and waitq */ + Waitq *waitq; /* Exited processes wait children */ + int nchild; /* Number of living children */ + int nwait; /* Number of uncollected wait records */ + QLock qwaitr; + Rendez waitr; /* Place to hang out in wait */ + Proc *parent; + + Pgrp *pgrp; /* Process group for namespace */ + Egrp *egrp; /* Environment group */ + Fgrp *fgrp; /* File descriptor group */ + Rgrp *rgrp; /* Rendez group */ + + Fgrp *closingfgrp; /* used during teardown */ + + int parentpid; + ulong time[Ntime]; /* User, Sys, Real; child U, S, R */ + + uvlong kentry; /* Kernel entry time stamp (for profiling) */ + /* + * pcycles: cycles spent in this process (updated on procsave/restore) + * when this is the current proc and we're in the kernel + * (procrestores outnumber procsaves by one) + * the number of cycles spent in the proc is pcycles + cycles() + * when this is not the current process or we're in user mode + * (procrestores and procsaves balance), it is pcycles. + */ + vlong pcycles; + + int insyscall; + + QLock debug; /* to access debugging elements of User */ + Proc *pdbg; /* the debugging process */ + uint procmode; /* proc device file mode */ + uint privatemem; /* proc does not let anyone read mem */ + int hang; /* hang at next exec for debug */ + int procctl; /* Control for /proc debugging */ + uintptr pc; /* DEBUG only */ + + Lock rlock; /* sync sleep/wakeup with postnote */ + Rendez *r; /* rendezvous point slept on */ + Rendez sleep; /* place for syssleep/debug */ + int notepending; /* note issued but not acted on */ + int kp; /* true if a kernel process */ + Proc *palarm; /* Next alarm time */ + ulong alarm; /* Time of call */ + int newtlb; /* Pager has changed my pte's, I must flush */ + int noswap; /* process is not swappable */ + + uintptr rendtag; /* Tag for rendezvous */ + uintptr rendval; /* Value for rendezvous */ + Proc *rendhash; /* Hash list for tag values */ + + Timer; /* For tsleep and real-time */ + Rendez *trend; + int (*tfn)(void*); + void (*kpfun)(void*); + void *kparg; + + int scallnr; /* system call number */ + uchar arg[MAXSYSARG*sizeof(void*)]; /* system call arguments */ + int nerrlab; + Label errlab[NERR]; + char *syserrstr; /* last error from a system call, errbuf0 or 1 */ + char *errstr; /* reason we're unwinding the error stack, errbuf1 or 0 */ + char errbuf0[ERRMAX]; + char errbuf1[ERRMAX]; + char genbuf[128]; /* buffer used e.g. for last name element from namec */ + Chan *slash; + Chan *dot; + + Note note[NNOTE]; + short nnote; + short notified; /* sysnoted is due */ + Note lastnote; + void (*notify)(void*, char*); + + Lock *lockwait; + Lock *lastlock; /* debugging */ + Lock *lastilock; /* debugging */ + + Mach *wired; + Mach *mp; /* machine this process last ran on */ + int nlocks; /* number of locks held by proc */ + uint delaysched; + uint priority; /* priority level */ + uint basepri; /* base priority level */ + int fixedpri; /* priority level does not change */ + uint cpu; /* cpu average */ + ulong lastupdate; + ulong readytime; /* time process came ready */ + ulong movetime; /* last time process switched processors */ + int preempted; /* true if this process hasn't finished the interrupt + * that last preempted it + */ + Edf *edf; /* if non-null, real-time proc, edf contains scheduling params */ + int trace; /* process being traced? */ + + uintptr qpc; /* pc calling last blocking qlock */ + + int setargs; + + void *ureg; /* User registers for notes */ + int color; + + int fcount; + char* syscalltrace; + + /* NIX */ + Mach *ac; + Page *acpml4; + Ksem *waitsem; + int prepagemem; + Nixpctl *nixpctl; /* NIX queue based system calls */ + + uint ntrap; /* # of traps while in this process */ + uint nintr; /* # of intrs while in this process */ + uint nsyscall; /* # of syscalls made by the process */ + uint nactrap; /* # of traps in the AC for this process */ + uint nacsyscall; /* # of syscalls in the AC for this process */ + uint nicc; /* # of ICCs for the process */ + uvlong actime1; /* ticks as of last call in AC */ + uvlong actime; /* ∑time from call in AC to ret to AC, and... */ + uvlong tctime; /* ∑time from call received to call handled */ + int nqtrap; /* # of traps in last quantum */ + int nqsyscall; /* # of syscalls in the last quantum */ + int nfullq; + int semawaken; /* nix sems */ + + /* + * machine specific fpu, mmu and notify + */ + PFPU; + PMMU; + PNOTIFY; +}; + +struct Procalloc +{ + Lock; + int nproc; + Proc* ht[128]; + Proc* arena; + Proc* free; +}; + +enum +{ + PRINTSIZE = 256, + NUMSIZE = 12, /* size of formatted number */ + MB = 1024*1024, + READSTR = 4000, /* temporary buffer size for device reads */ +}; + +extern Conf conf; +extern char* conffile; +extern int cpuserver; +extern char* eve; +extern char hostdomain[]; +extern uchar initcode[]; +extern int kbdbuttons; +extern Ref noteidalloc; +extern int nphysseg; +extern int nsyscall; +extern Pgalloc pga; +extern Physseg physseg[]; +extern Procalloc procalloc; +extern uint qiomaxatomic; +extern char* statename[]; +extern char* sysname; +extern struct { + char* n; + void (*f)(Ar0*, va_list); + Ar0 r; +} systab[]; + +enum +{ + LRESPROF = 3, +}; + +/* + * action log + */ +struct Log { + Lock; + int opens; + char* buf; + char *end; + char *rptr; + int len; + int nlog; + int minread; + + int logmask; /* mask of things to debug */ + + QLock readq; + Rendez readr; +}; + +struct Logflag { + char* name; + int mask; +}; + +struct Cmdbuf +{ + char *buf; + char **f; + int nf; +}; + +struct Cmdtab +{ + int index; /* used by client to switch on result */ + char *cmd; /* command name */ + int narg; /* expected #args; 0 ==> variadic */ +}; + +/* + * routines to access UART hardware + */ +struct PhysUart +{ + char* name; + Uart* (*pnp)(void); + void (*enable)(Uart*, int); + void (*disable)(Uart*); + void (*kick)(Uart*); + void (*dobreak)(Uart*, int); + int (*baud)(Uart*, int); + int (*bits)(Uart*, int); + int (*stop)(Uart*, int); + int (*parity)(Uart*, int); + void (*modemctl)(Uart*, int); + void (*rts)(Uart*, int); + void (*dtr)(Uart*, int); + long (*status)(Uart*, void*, long, long); + void (*fifo)(Uart*, int); + void (*power)(Uart*, int); + int (*getc)(Uart*); /* polling version for rdb */ + void (*putc)(Uart*, int); /* polling version for iprint */ + void (*poll)(Uart*); /* polled interrupt routine */ +}; + +enum { + Stagesize= 2048 +}; + +/* + * software UART + */ +struct Uart +{ + void* regs; /* hardware stuff */ + char* name; /* internal name */ + uint freq; /* clock frequency */ + int bits; /* bits per character */ + int stop; /* stop bits */ + int parity; /* even, odd or no parity */ + int baud; /* baud rate */ + PhysUart*phys; + int console; /* used as a serial console */ + int special; /* internal kernel device */ + Uart* next; /* list of allocated uarts */ + + QLock; + int type; /* ?? */ + int dev; + int opens; + + int enabled; + Uart *elist; /* next enabled interface */ + + int perr; /* parity errors */ + int ferr; /* framing errors */ + int oerr; /* rcvr overruns */ + int berr; /* no input buffers */ + int serr; /* input queue overflow */ + + /* buffers */ + int (*putc)(Queue*, int); + Queue *iq; + Queue *oq; + + Lock rlock; + uchar istage[Stagesize]; + uchar *iw; + uchar *ir; + uchar *ie; + + Lock tlock; /* transmit */ + uchar ostage[Stagesize]; + uchar *op; + uchar *oe; + int drain; + + int modem; /* hardware flow control on */ + int xonoff; /* software flow control on */ + int blocked; + int cts, dsr, dcd; /* keep track of modem status */ + int ctsbackoff; + int hup_dsr, hup_dcd; /* send hangup upstream? */ + int dohup; + + Rendez r; +}; + +extern Uart* consuart; + +/* + * performance timers, all units in perfticks + */ +struct Perf +{ + uvlong intrts; /* time of last interrupt */ + uvlong inintr; /* time since last clock tick in interrupt handlers */ + uvlong avg_inintr; /* avg time per clock tick in interrupt handlers */ + uvlong inidle; /* time since last clock tick in idle loop */ + uvlong avg_inidle; /* avg time per clock tick in idle loop */ + uvlong last; /* value of perfticks() at last clock tick */ + uvlong period; /* perfticks() per clock tick */ +}; + +struct Watchdog +{ + void (*enable)(void); /* watchdog enable */ + void (*disable)(void); /* watchdog disable */ + void (*restart)(void); /* watchdog restart */ + void (*stat)(char*, char*); /* watchdog statistics */ +}; + +/* queue state bits, Qmsg, Qcoalesce, and Qkick can be set in qopen */ +enum +{ + /* Queue.state */ + Qstarve = (1<<0), /* consumer starved */ + Qmsg = (1<<1), /* message stream */ + Qclosed = (1<<2), /* queue has been closed/hungup */ + Qflow = (1<<3), /* producer flow controlled */ + Qcoalesce = (1<<4), /* coallesce packets on read */ + Qkick = (1<<5), /* always call the kick routine after qwrite */ +}; + +#define DEVDOTDOT -1 + +#pragma varargck type "I" uchar* +#pragma varargck type "V" uchar* +#pragma varargck type "E" uchar* +#pragma varargck type "M" uchar* +#pragma varargck type "Z" Kzio* diff -Nru 0/sys/src/nix/port/portfns.h 4/sys/src/nix/port/portfns.h --- 0/sys/src/nix/port/portfns.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/portfns.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,401 @@ +void _assert(char*); +void accounttime(void); +void acsched(void); +void addbootfile(char*, uchar*, ulong); +Timer* addclock0link(void (*)(void), int); +int addconsdev(Queue*, void (*fn)(char*,int), int, int); +int addkbdq(Queue*, int); +int addphysseg(Physseg*); +void addwaitstat(uintptr pc, uvlong t0, int type); +void addwatchdog(Watchdog*); +int adec(int*); +Block* adjustblock(Block*, int); +int ainc(int*); +void alarmkproc(void*); +Block* allocb(int); +void* alloczio(Segment*, long); +int anyhigher(void); +int anyready(void); +Image* attachimage(int, Chan*, int, uintptr, usize); +Page* auxpage(usize); +Block* bl2mem(uchar*, Block*, int); +int blocklen(Block*); +void bootlinks(void); +void cachedel(Image*, ulong); +void cachepage(Page*, Image*); +void callwithureg(void (*)(Ureg*)); +int canlock(Lock*); +int canpage(Proc*); +int canqlock(QLock*); +int canrlock(RWlock*); +Chan* cclone(Chan*); +void cclose(Chan*); +void ccloseq(Chan*); +void chanfree(Chan*); +char* chanpath(Chan*); +void checkalarms(void); +void checkb(Block*, char*); +void clearwaitstats(void); +void closeegrp(Egrp*); +void closefgrp(Fgrp*); +void closepgrp(Pgrp*); +void closergrp(Rgrp*); +void cmderror(Cmdbuf*, char*); +int cmount(Chan**, Chan*, int, char*); +Block* concatblock(Block*); +void confinit(void); +int consactive(void); +void (*consdebug)(void); +void (*consputs)(char*, int); +Block* copyblock(Block*, int); +void copypage(Page*, Page*); +void cunmount(Chan*, Chan*); +Segment* data2txt(Segment*); +uintptr dbgpc(Proc*); +int decrypt(void*, void*, int); +void delay(int); +void delconsdevs(void); +Proc* dequeueproc(Sched*, Schedq*, Proc*); +Chan* devattach(int, char*); +Block* devbread(Chan*, long, vlong); +long devbwrite(Chan*, Block*, vlong); +Chan* devclone(Chan*); +int devconfig(int, char *, DevConf *); +void devcreate(Chan*, char*, int, int); +void devdir(Chan*, Qid, char*, vlong, char*, long, Dir*); +long devdirread(Chan*, char*, long, Dirtab*, int, Devgen*); +Devgen devgen; +void devinit(void); +Chan* devopen(Chan*, int, Dirtab*, int, Devgen*); +void devpermcheck(char*, int, int); +void devpower(int); +void devremove(Chan*); +void devreset(void); +void devshutdown(void); +long devstat(Chan*, uchar*, long, Dirtab*, int, Devgen*); +Dev* devtabget(int, int); +void devtabinit(void); +long devtabread(Chan*, void*, long, vlong); +void devtabreset(void); +void devtabshutdown(void); +Walkqid* devwalk(Chan*, Chan*, char**, int, Dirtab*, int, Devgen*); +long devwstat(Chan*, uchar*, long); +int devzread(Chan*, Kzio*, int, usize, vlong); +int devzwrite(Chan*, Kzio*, int, vlong); +void drawactive(int); +void drawcmap(void); +void dumpaproc(Proc*); +void dumpregs(Ureg*); +void dumpstack(void); +void dumpzseg(Segment*); +Fgrp* dupfgrp(Fgrp*); +int duppage(Page*); +Segment* dupseg(Segment**, int, int); +void dupswap(Page*); +char* edfadmit(Proc*); +void edfinit(Proc*); +int edfready(Proc*); +void edfrecord(Proc*); +void edfrun(Proc*, int); +void edfstop(Proc*); +void edfyield(void); +int emptystr(char*); +int encrypt(void*, void*, int); +void envcpy(Egrp*, Egrp*); +int eqchanddq(Chan*, int, uint, Qid, int); +int eqqid(Qid, Qid); +void error(char*); +void exhausted(char*); +void exit(int); +uvlong fastticks(uvlong*); +uvlong fastticks2ns(uvlong); +uvlong fastticks2us(uvlong); +int fault(uintptr, int); +void fdclose(int, int); +Chan* fdtochan(int, int, int, int); +int findmount(Chan**, Mhead**, int, uint, Qid); +int fixfault(Segment*, uintptr, int, int, int); +void fmtinit(void); +void forceclosefgrp(void); +void free(void*); +void freeb(Block*); +void freeblist(Block*); +int freebroken(void); +void freepte(Segment*, Pte*); +void getcolor(ulong, ulong*, ulong*, ulong*); +char* getconfenv(void); +int getpgszi(ulong); +Segment* getzkseg(void); +void gotolabel(Label*); +int haswaitq(void*); +void hnputl(void*, uint); +void hnputs(void*, ushort); +void hnputv(void*, uvlong); +long hostdomainwrite(char*, long); +long hostownerwrite(char*, long); +void hzsched(void); +Block* iallocb(int); +void iallocsummary(void); +void ilock(Lock*); +void initimage(void); +int iprint(char*, ...); +void isdir(Chan*); +int iseve(void); +int islo(void); +Segment* isoverlap(Proc*, uintptr, usize); +int isphysseg(char*); +void iunlock(Lock*); +void ixsummary(void); +int kbdcr2nl(Queue*, int); +int kbdgetmap(uint, int*, int*, Rune*); +int kbdputc(Queue*, int); +void kbdputmap(ushort, ushort, Rune); +void kickpager(int, int); +void killbig(char*); +void kproc(char*, void(*)(void*), void*); +void kprocchild(Proc*, void (*)(void*), void*); +void (*kproftimer)(uintptr); +void ksetenv(char*, char*, int); +void kstrcpy(char*, char*, int); +void kstrdup(char**, char*); +long latin1(Rune*, int); +int lock(Lock*); +void log(Log*, int, char*, ...); +void logclose(Log*); +char* logctl(Log*, int, char**, Logflag*); +void logn(Log*, int, void*, int); +void logopen(Log*); +long logread(Log*, void*, ulong, long); +Page* lookpage(Image*, ulong); +Cmdtab* lookupcmd(Cmdbuf*, Cmdtab*, int); +void mallocinit(void); +long mallocreadsummary(Chan*, void*, long, long); +void mallocsummary(void); +Block* mem2bl(uchar*, int); +void (*mfcinit)(void); +void (*mfcopen)(Chan*); +int (*mfcread)(Chan*, uchar*, int, vlong); +void (*mfcupdate)(Chan*, uchar*, int, vlong); +void (*mfcwrite)(Chan*, uchar*, int, vlong); +void mfreeseg(Segment*, uintptr, int); +void microdelay(int); +uvlong mk64fract(uvlong, uvlong); +void mkqid(Qid*, vlong, ulong, int); +void mmuflush(void); +void mmuput(uintptr, Page*, uint); +void mmurelease(Proc*); +void mmuswitch(Proc*); +Chan* mntauth(Chan*, char*); +usize mntversion(Chan*, u32int, char*, usize); +void mountfree(Mount*); +uvlong ms2fastticks(ulong); +#define MS2NS(n) (((vlong)(n))*1000000LL) +ulong ms2tk(ulong); +void mul64fract(uvlong*, uvlong, uvlong); +void muxclose(Mnt*); +void (*waitwhile)(void *, uintptr); +Chan* namec(char*, int, int, int); +void nameerror(char*, char*); +Chan* newchan(void); +int newfd(Chan*); +Mhead* newmhead(Chan*); +Mount* newmount(Mhead*, Chan*, int, char*); +Page* newpage(int, Segment **, uintptr, usize, int); +Path* newpath(char*); +Pgrp* newpgrp(void); +Proc* newproc(void); +Rgrp* newrgrp(void); +Segment* newseg(int, uintptr, u64int); +void newzmap(Segment*); +void nexterror(void); +uint nhgetl(void*); +ushort nhgets(void*); +uvlong nhgetv(void*); +void nixprepage(int); +int nrand(int); +uvlong ns2fastticks(uvlong); +int okaddr(uintptr, long, int); +int openmode(int); +Block* packblock(Block*); +Block* padblock(Block*, int); +void pagechainhead(Page*); +void pageinit(void); +ulong pagenumber(Page*); +uvlong pagereclaim(Image*); +void pagersummary(void); +void pageunchain(Page*); +void panic(char*, ...); +Cmdbuf* parsecmd(char *a, int n); +void pathclose(Path*); +ulong perfticks(void); +void pexit(char*, int); +Page* pgalloc(usize, int); +void pgfree(Page*); +void pgrpcpy(Pgrp*, Pgrp*); +void pgrpnote(ulong, char*, long, int); +uintmem physalloc(u64int, int*, void*); +void physdump(void); +void physfree(uintmem, u64int); +void physinit(uintmem, u64int); +void* phystag(uintmem); +void pio(Segment*, uintptr, ulong, Page**, int); +#define poperror() up->nerrlab-- +void portwaitwhile(void*, uintptr); +int postnote(Proc*, int, char*, int); +int pprint(char*, ...); +int preempted(void); +void prflush(void); +void printinit(void); +ulong procalarm(ulong); +void procctl(Proc*); +void procdump(void); +int procfdprint(Chan*, int, int, char*, int); +void procflushseg(Segment*); +void procinit0(void); +void procpriority(Proc*, int, int); +void procrestore(Proc*); +void procsave(Proc*); +void (*proctrace)(Proc*, int, vlong); +void proctracepid(Proc*); +void procwired(Proc*, int); +void psdecref(Proc*); +Proc* psincref(int); +int psindex(int); +void psinit(int); +Pte* ptealloc(Segment*); +Pte* ptecpy(Segment*,Pte*); +int pullblock(Block**, int); +Block* pullupblock(Block*, int); +Block* pullupqueue(Queue*, int); +void putimage(Image*); +void putmhead(Mhead*); +void putpage(Page*); +void putseg(Segment*); +void putstrn(char*, int); +void putswap(Page*); +int pwait(Waitmsg*); +void qaddlist(Queue*, Block*); +Block* qbread(Queue*, int); +long qbwrite(Queue*, Block*); +Queue* qbypass(void (*)(void*, Block*), void*); +int qcanread(Queue*); +void qclose(Queue*); +int qconsume(Queue*, void*, int); +Block* qcopy(Queue*, int, ulong); +int qdiscard(Queue*, int); +void qflush(Queue*); +void qfree(Queue*); +int qfull(Queue*); +Block* qget(Queue*); +void qhangup(Queue*, char*); +int qisclosed(Queue*); +int qiwrite(Queue*, void*, int); +int qlen(Queue*); +void qlock(QLock*); +void qnoblock(Queue*, int); +Queue* qopen(int, int, void (*)(void*), void*); +int qpass(Queue*, Block*); +int qpassnolim(Queue*, Block*); +int qproduce(Queue*, void*, int); +void qputback(Queue*, Block*); +long qread(Queue*, void*, int); +Block* qremove(Queue*); +void qreopen(Queue*); +void qsetlimit(Queue*, int); +void qunlock(QLock*); +int qwindow(Queue*); +int qwrite(Queue*, void*, int); +int rand(void); +void randominit(void); +ulong randomread(void*, ulong); +void rdb(void); +int readnum(ulong, char*, ulong, ulong, int); +long readstr(long, char*, long, char*); +void ready(Proc*); +long readzio(Kzio[], int, void*, long); +void reboot(void*, void*, long); +void rebootcmd(int, char**); +void relocateseg(Segment*, uintptr); +void renameuser(char*, char*); +void resched(char*); +void resrcwait(char*); +int return0(void*); +void rlock(RWlock*); +long rtctime(void); +int runac(Mach *m, void(*func)(void), int flushtlb, void *a, long n); +void runlock(RWlock*); +Proc* runproc(void); +void sched(void); +void scheddump(void); +void schedinit(void); +long seconds(void); +Segment* seg(Proc*, uintptr, int); +void segclock(uintptr); +Ksem* segmksem(Segment*, Sem*); +void segpage(Segment*, Page*); +uintmem segppn(Segment*, uintmem); +int semadec(int*); +int semainc(int*); +char* seprintpagestats(char*, char*); +char* seprintphysstats(char*, char*); +int setcolor(ulong, ulong, ulong, ulong); +void setkernur(Ureg*, Proc*); +int setlabel(Label*); +void setregisters(Ureg*, char*, char*, int); +char* skipslash(char*); +void sleep(Rendez*, int (*)(void*), void*); +void* smalloc(ulong); +char* srvname(Chan*); +void startwaitstats(int); +int swapcount(ulong); +void swapinit(void); +void syscallfmt(int, va_list list); +void sysretfmt(int, va_list, Ar0*, uvlong, uvlong); +void sysrforkchild(Proc*, Proc*); +void timeradd(Timer*); +void timerdel(Timer*); +void timerintr(Ureg*, vlong); +void timerset(uvlong); +void timersinit(void); +ulong tk2ms(ulong); +#define TK2MS(x) ((x)*(1000/HZ)) +uvlong tod2fastticks(vlong); +vlong todget(vlong*); +void todinit(void); +void todset(vlong, vlong, int); +void todsetfreq(vlong); +Block* trimblock(Block*, int, int); +void tsleep(Rendez*, int (*)(void*), void*, long); +Uart* uartconsole(int, char*); +int uartctl(Uart*, char*); +int uartgetc(void); +void uartkick(void*); +void uartputc(int); +void uartputs(char*, int); +void uartrecv(Uart*, char); +int uartstageoutput(Uart*); +void unbreak(Proc*); +void uncachepage(Page*); +void unlock(Lock*); +void userinit(void); +uintptr userpc(Ureg*); +long userwrite(char*, long); +void* validaddr(void*, long, int); +void validname(char*, int); +char* validnamedup(char*, int); +void validstat(uchar*, usize); +void* vmemchr(void*, int, int); +Proc* wakeup(Rendez*); +int walk(Chan**, char**, int, int, int*); +void wlock(RWlock*); +void wunlock(RWlock*); +void yield(void); +uintptr zgetaddr(Segment*); +void zgrow(Segment*); +int ziofmt(Fmt*); +int zputaddr(Segment*, uintptr); +ulong µs(void); + +#pragma varargck argpos iprint 1 +#pragma varargck argpos panic 1 +#pragma varargck argpos pprint 1 diff -Nru 0/sys/src/nix/port/portusbehci.h 4/sys/src/nix/port/portusbehci.h --- 0/sys/src/nix/port/portusbehci.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/portusbehci.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,144 @@ +/* + * ECHI portable hardware definitions + */ + +typedef struct Ecapio Ecapio; +typedef struct Edbgio Edbgio; + +#pragma incomplete Ecapio; +#pragma incomplete Edbgio; + +/* + * EHCI interface registers and bits + */ +enum +{ + /* Ecapio->parms reg. */ + Cnports = 0xF, /* nport bits */ + Cdbgportshift = 20, /* debug port */ + Cdbgportmask = 0xF, + + /* Ecapio->capparms bits */ + C64 = 1<<0, /* 64-bits */ + Cpfl = 1<<1, /* program'ble frame list: can be <1024 */ + Casp = 1<<2, /* asynch. sched. park */ + Ceecpshift = 8, /* extended capabilities ptr. */ + Ceecpmask = (1<<8) - 1, + + Clegacy = 1, /* legacy support cap. id */ + CLbiossem = 2, /* legacy cap. bios sem. */ + CLossem = 3, /* legacy cap. os sem */ + CLcontrol = 4, /* legacy support control & status */ + + /* typed links */ + Lterm = 1, + Litd = 0<<1, + Lqh = 1<<1, + Lsitd = 2<<1, + Lfstn = 3<<1, /* we don't use these */ + + /* Cmd reg. */ + Cstop = 0x00000, /* stop running */ + Crun = 0x00001, /* start operation */ + Chcreset = 0x00002, /* host controller reset */ + Cflsmask = 0x0000C, /* frame list size bits */ + Cfls1024 = 0x00000, /* frame list size 1024 */ + Cfls512 = 0x00004, /* frame list size 512 frames */ + Cfls256 = 0x00008, /* frame list size 256 frames */ + Cpse = 0x00010, /* periodic sched. enable */ + Case = 0x00020, /* async sched. enable */ + Ciasync = 0x00040, /* interrupt on async advance doorbell */ + /* interrupt threshold ctl. in µframes (1-32 in powers of 2) */ + Citcshift = 16, + Citcmask = 0xff << Citcshift, + + /* Sts reg. */ + Sasyncss = 0x08000, /* aync schedule status */ + Speriodss = 0x04000, /* periodic schedule status */ + Srecl = 0x02000, /* reclamnation (empty async sched.) */ + Shalted = 0x01000, /* h.c. is halted */ + Sasync = 0x00020, /* interrupt on async advance */ + Sherr = 0x00010, /* host system error */ + Sfrroll = 0x00008, /* frame list roll over */ + Sportchg = 0x00004, /* port change detect */ + Serrintr = 0x00002, /* error interrupt */ + Sintr = 0x00001, /* interrupt */ + Sintrs = 0x0003F, /* interrupts status */ + + /* Intr reg. */ + Iusb = 0x01, /* intr. on usb */ + Ierr = 0x02, /* intr. on usb error */ + Iportchg = 0x04, /* intr. on port change */ + Ifrroll = 0x08, /* intr. on frlist roll over */ + Ihcerr = 0x10, /* intr. on host error */ + Iasync = 0x20, /* intr. on async advance enable */ + Iall = 0x3F, /* all interrupts */ + + /* Config reg. */ + Callmine = 1, /* route all ports to us */ + + /* Portsc reg. */ + Pspresent = 0x00000001, /* device present */ + Psstatuschg = 0x00000002, /* Pspresent changed */ + Psenable = 0x00000004, /* device enabled */ + Pschange = 0x00000008, /* Psenable changed */ + Psresume = 0x00000040, /* resume detected */ + Pssuspend = 0x00000080, /* port suspended */ + Psreset = 0x00000100, /* port reset */ + Pspower = 0x00001000, /* port power on */ + Psowner = 0x00002000, /* port owned by companion */ + Pslinemask = 0x00000C00, /* line status bits */ + Pslow = 0x00000400, /* low speed device */ + + /* Debug port csw reg. */ + Cowner = 0x40000000, /* port owned by ehci */ + Cenable = 0x10000000, /* debug port enabled */ + Cdone = 0x00010000, /* request is done */ + Cbusy = 0x00000400, /* port in use by a driver */ + Cerrmask= 0x00000380, /* error code bits */ + Chwerr = 0x00000100, /* hardware error */ + Cterr = 0x00000080, /* transaction error */ + Cfailed = 0x00000040, /* transaction did fail */ + Cgo = 0x00000020, /* execute the transaction */ + Cwrite = 0x00000010, /* request is a write */ + Clen = 0x0000000F, /* data len */ + + /* Debug port pid reg. */ + Prpidshift = 16, /* received pid */ + Prpidmask = 0xFF, + Pspidshift = 8, /* sent pid */ + Pspidmask = 0xFF, + Ptokshift = 0, /* token pid */ + Ptokmask = 0xFF, + + Ptoggle = 0x00008800, /* to update toggles */ + Ptogglemask = 0x0000FF00, + + /* Debug port addr reg. */ + Adevshift = 8, /* device address */ + Adevmask = 0x7F, + Aepshift = 0, /* endpoint number */ + Aepmask = 0xF, +}; + +/* + * Capability registers (hw) + */ +struct Ecapio +{ + u32int cap; /* 00 controller capability register */ + u32int parms; /* 04 structural parameters register */ + u32int capparms; /* 08 capability parameters */ + u32int portroute; /* 0c not on the CS5536 */ +}; + +/* + * Debug port registers (hw) + */ +struct Edbgio +{ + u32int csw; /* control and status */ + u32int pid; /* USB pid */ + uchar data[8]; /* data buffer */ + u32int addr; /* device and endpoint addresses */ +}; diff -Nru 0/sys/src/nix/port/print.c 4/sys/src/nix/port/print.c --- 0/sys/src/nix/port/print.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/print.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,32 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +static Lock fmtl; + +void +_fmtlock(void) +{ + lock(&fmtl); +} + +void +_fmtunlock(void) +{ + unlock(&fmtl); +} + +int +_efgfmt(Fmt*) +{ + return -1; +} + +void +fmtinit(void) +{ + quotefmtinstall(); + archfmtinstall(); +} diff -Nru 0/sys/src/nix/port/proc.c 4/sys/src/nix/port/proc.c --- 0/sys/src/nix/port/proc.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/proc.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1900 @@ +#include +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/edf.h" +#include "errstr.h" +#include + +enum +{ + Scaling=2, + + AMPmincores = 5, +}; + +Ref noteidalloc; + +static Ref pidalloc; + +static Sched run; + + +struct Procalloc procalloc; + +extern Proc* psalloc(void); +extern void pshash(Proc*); +extern void psrelease(Proc*); +extern void psunhash(Proc*); + +static int reprioritize(Proc*); +static void updatecpu(Proc*); + +static void rebalance(void); + +char *statename[] = +{ /* BUG: generate automatically */ + "Dead", + "Moribund", + "Ready", + "Scheding", + "Running", + "Queueing", + "QueueingR", + "QueueingW", + "Wakeme", + "Broken", + "Stopped", + "Rendez", + "Waitrelease", + "Exotic", + "Down", +}; + +Sched* +procsched(Proc *) +{ + return &run; +} + +/* + * bad planning, once more. + */ +void +procinit0(void) +{ + run.schedgain = 30; + +} + +/* + * Always splhi()'ed. + */ +void +schedinit(void) /* never returns */ +{ + Edf *e; + + m->inidle = 1; + m->proc = nil; + ainc(&run.nmach); + + setlabel(&m->sched); + if(up) { + if((e = up->edf) && (e->flags & Admitted)) + edfrecord(up); + m->qstart = 0; + m->qexpired = 0; + coherence(); + m->proc = 0; + switch(up->state) { + case Running: + ready(up); + break; + case Moribund: + up->state = Dead; + stopac(); + edfstop(up); + if (up->edf) + free(up->edf); + up->edf = nil; + + /* + * Holding locks from pexit: + * procalloc + * pga + */ + mmurelease(up); + unlock(&pga); + + psrelease(up); + unlock(&procalloc); + break; + } + if(up->state != Exotic) + adec(&run.nrunning); + up->mach = nil; + updatecpu(up); + up = nil; + } + sched(); +} + +/* + * Check if the stack has more than 4*KiB free. + * Do not call panic, the stack is gigantic. + */ +static void +stackok(void) +{ + char dummy; + + if(&dummy < (char*)up->kstack + 4*KiB){ +print("kstack is %p %p\n", up->kstack, &dummy); + print("tc kernel stack overflow, cpu%d stopped\n", m->machno); + DONE(); + } +} + +/* + * If changing this routine, look also at sleep(). It + * contains a copy of the guts of sched(). + */ +void +sched(void) +{ + Proc *p; + + if(m->ilockdepth) + panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p", + m->machno, + m->ilockdepth, + up? up->lastilock: nil, + (up && up->lastilock)? up->lastilock->pc: 0, + getcallerpc(&p+2)); + + if(up){ + /* + * Delay the sched until the process gives up the locks + * it is holding. This avoids dumb lock loops. + * Don't delay if the process is Moribund. + * It called sched to die. + * But do sched eventually. This avoids a missing unlock + * from hanging the entire kernel. + * But don't reschedule procs holding palloc or procalloc. + * Those are far too important to be holding while asleep. + * + * This test is not exact. There can still be a few + * instructions in the middle of taslock when a process + * holds a lock but Lock.p has not yet been initialized. + */ + if(up->nlocks) + if(up->state != Moribund) + if(up->delaysched < 20 + || pga.Lock.p == up + || procalloc.Lock.p == up){ + up->delaysched++; + run.delayedscheds++; + return; + } + up->delaysched = 0; + + splhi(); + /* statistics */ + if(up->nqtrap == 0 && up->nqsyscall == 0) + up->nfullq++; + m->cs++; + + stackok(); + + procsave(up); + mmuflushtlb(m->pml4->pa); + if(setlabel(&up->sched)){ + procrestore(up); + spllo(); + return; + } + gotolabel(&m->sched); + } + + m->inidle = 1; + p = runproc(); /* core 0 never returns */ + m->inidle = 0; + + if(!p->edf){ + updatecpu(p); + p->priority = reprioritize(p); + } + up = p; + m->qstart = m->ticks; + up->nqtrap = 0; + up->nqsyscall = 0; + up->state = Running; + up->mach = m; + m->proc = up; + mmuswitch(up); + + assert(!up->wired || up->wired == m); + gotolabel(&up->sched); +} + +int +anyready(void) +{ + return run.runvec; +} + +int +anyhigher(void) +{ + return run.runvec & ~((1<<(up->priority+1))-1); +} + +int +anyactive(void) +{ + return run.runvec || run.nrunning; +} + +/* + * here once per clock tick to see if we should resched + */ + +void +hzsched(void) +{ + /* once a second, rebalance will reprioritize ready procs */ + if(m->machno == 0){ + rebalance(); + return; + } + + /* with <= 4 cores, we use SMP and core 0 does not set qexpired for us */ + if(sys->nmach <= AMPmincores) + if(m->ticks - m->qstart >= HZ/10) + m->qexpired = 1; + + /* unless preempted, get to run */ + if(m->qexpired && anyready()) + up->delaysched++; + + /* BUG, not enough if the number of cores can change */ + if(isbooting(m) && sys->nmach > AMPmincores) + sched(); + +} + +/* + * here at the end of non-clock interrupts to see if we should preempt the + * current process. Returns 1 if preempted, 0 otherwise. + */ +int +preempted(void) +{ + if(up && up->state == Running) + if(up->preempted == 0) + if(anyhigher()) + if(!active.exiting){ + /* Core 0 is dispatching all interrupts, so no core + * actually running a user process is ever going call preempted, unless + * we consider IPIs for preemption or we distribute interrupts. + * But we are going to use SMP for machines with few cores. + panic("preemted used"); + */ + + up->preempted = 1; + sched(); + splhi(); + up->preempted = 0; + return 1; + } + return 0; +} + +/* + * Update the cpu time average for this particular process, + * which is about to change from up -> not up or vice versa. + * p->lastupdate is the last time an updatecpu happened. + * + * The cpu time average is a decaying average that lasts + * about D clock ticks. D is chosen to be approximately + * the cpu time of a cpu-intensive "quick job". A job has to run + * for approximately D clock ticks before we home in on its + * actual cpu usage. Thus if you manage to get in and get out + * quickly, you won't be penalized during your burst. Once you + * start using your share of the cpu for more than about D + * clock ticks though, your p->cpu hits 1000 (1.0) and you end up + * below all the other quick jobs. Interactive tasks, because + * they basically always use less than their fair share of cpu, + * will be rewarded. + * + * If the process has not been running, then we want to + * apply the filter + * + * cpu = cpu * (D-1)/D + * + * n times, yielding + * + * cpu = cpu * ((D-1)/D)^n + * + * but D is big enough that this is approximately + * + * cpu = cpu * (D-n)/D + * + * so we use that instead. + * + * If the process has been running, we apply the filter to + * 1 - cpu, yielding a similar equation. Note that cpu is + * stored in fixed point (* 1000). + * + * Updatecpu must be called before changing up, in order + * to maintain accurate cpu usage statistics. It can be called + * at any time to bring the stats for a given proc up-to-date. + */ +static void +updatecpu(Proc *p) +{ + int D, n, t, ocpu; + + if(p->edf) + return; + + t = sys->ticks*Scaling + Scaling/2; + n = t - p->lastupdate; + p->lastupdate = t; + + if(n == 0) + return; + D = run.schedgain*HZ*Scaling; + if(n > D) + n = D; + + ocpu = p->cpu; + if(p != up) + p->cpu = (ocpu*(D-n))/D; + else{ + t = 1000 - ocpu; + t = (t*(D-n))/D; + p->cpu = 1000 - t; + } + +//iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu); +} + +/* + * On average, p has used p->cpu of a cpu recently. + * Its fair share is nmach/m->load of a cpu. If it has been getting + * too much, penalize it. If it has been getting not enough, reward it. + * I don't think you can get much more than your fair share that + * often, so most of the queues are for using less. Having a priority + * of 3 means you're just right. Having a higher priority (up to p->basepri) + * means you're not using as much as you could. + */ +static int +reprioritize(Proc *p) +{ + int fairshare, n, load, ratio; + + load = sys->load; + if(load == 0) + return p->basepri; + + /* + * fairshare = 1.000 * conf.nproc * 1.000/load, + * except the decimal point is moved three places + * on both load and fairshare. + */ + fairshare = (sys->nmach*1000*1000)/load; + n = p->cpu; + if(n == 0) + n = 1; + ratio = (fairshare+n/2) / n; + if(ratio > p->basepri) + ratio = p->basepri; + if(ratio < 0) + panic("reprioritize"); +//iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio); + return ratio; +} + +/* + * add a process to a scheduling queue + */ +static void +queueproc(Sched *sch, Schedq *rq, Proc *p, int locked) +{ + int pri; + + pri = rq - sch->runq; + if(!locked) + lock(sch); + else if(canlock(sch)) + panic("queueproc: locked and can lock"); + p->priority = pri; + p->rnext = 0; + if(rq->tail) + rq->tail->rnext = p; + else + rq->head = p; + rq->tail = p; + rq->n++; + sch->nrdy++; + sch->runvec |= 1<head; p; p = p->rnext){ + if(p == tp) + break; + l = p; + } + + /* + * p->mach==0 only when process state is saved + */ + + if(p == 0 || p->mach){ + unlock(sch); + return nil; + } + if(p->rnext == 0) + rq->tail = l; + if(l) + l->rnext = p->rnext; + else + rq->head = p->rnext; + if(rq->head == nil) + sch->runvec &= ~(1<<(rq-sch->runq)); + rq->n--; + sch->nrdy--; + if(p->state != Ready) + print("dequeueproc %s %d %s\n", p->text, p->pid, statename[p->state]); + + unlock(sch); + return p; +} + +static void +schedready(Sched *sch, Proc *p, int locked) +{ + Mpl pl; + int pri; + Schedq *rq; + + pl = splhi(); + if(edfready(p)){ + splx(pl); + return; + } + + updatecpu(p); + pri = reprioritize(p); + p->priority = pri; + rq = &sch->runq[pri]; + p->state = Ready; + queueproc(sch, rq, p, locked); + if(p->trace) + proctrace(p, SReady, 0); + splx(pl); +} + +/* + * ready(p) picks a new priority for a process and sticks it in the + * runq for that priority. + */ +void +ready(Proc *p) +{ + if(p->state == Exotic) + adec(&run.nrunning); + + schedready(procsched(p), p, 0); +} + +/* + * yield the processor and drop our priority + */ +void +yield(void) +{ + if(anyready()){ + /* pretend we just used 1/2 tick */ + up->lastupdate -= Scaling/2; + sched(); + } +} + +/* + * recalculate priorities once a second. We need to do this + * since priorities will otherwise only be recalculated when + * the running process blocks. + */ +static void +rebalance(void) +{ + Mpl pl; + int pri, npri, t; + Schedq *rq; + Proc *p; + + t = m->ticks; + if(t - run.balancetime < HZ) + return; + run.balancetime = t; + + for(pri=0, rq=run.runq; prihead; + if(p == nil) + continue; + if(p->mp != m) + continue; + if(pri == p->basepri) + continue; + updatecpu(p); + npri = reprioritize(p); + if(npri != pri){ + pl = splhi(); + p = dequeueproc(&run, rq, p); + if(p) + queueproc(&run, &run.runq[npri], p, 0); + splx(pl); + goto another; + } + } +} + +/* + * Process p is ready to run, but there's no available core. + * Try to make a core available by + * 1. preempting a process with lower priority, or + * 2. preempting one with the same priority that had more than HZ/10, or + * 3. rescheduling one that run more than HZ, in the hope he gets his priority lowered. + */ +static void +preemptfor(Proc *p) +{ + ulong delta; + uint i, j, rr; + Proc *mup; + Mach *mp; + + assert(m->machno == 0); + /* + * try to preempt a lower priority process first, default back to + * round robin otherwise. + */ + for(rr = 0; rr < 2; rr++) + for(i = 0; i < MACHMAX; i++){ + j = pickcore(p->color, i); + if((mp = sys->machptr[j]) != nil && mp->nixrole == NIXTC){ + if(mp == m) + continue; + if(isbooting(mp)) + continue; + /* + * Caution here: mp->proc can change, even die. + */ + mup = mp->proc; + if(mup == nil) /* one got idle */ + return; + delta = mp->ticks - mp->qstart; + if(mup->priority < p->priority){ + mp->qexpired = 1; + return; + } + if(rr && mup->priority == p->priority && delta > HZ/10){ + mp->qexpired = 1; + return; + } + if(rr & delta > HZ){ + mp->qexpired = 1; + return; + } + } + } +} + +/* + * Scheduling thread run as the main loop of cpu 0 + * Used in AMP sched. + */ +static void +mach0sched(void) +{ + Schedq *rq; + Proc *p; + Mach *mp; + ulong start, now; + int n, i, j; + + assert(m->machno == 0); + acmodeset(NIXKC); /* we don't time share any more */ + n = 0; + start = perfticks(); +loop: + + /* + * find a ready process that we might run. + */ + spllo(); + for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--) + for(p = rq->head; p; p = p->rnext){ + /* + * wired processes may only run when their core is available. + */ + if(p->wired != nil){ + if(p->wired->proc == nil) + goto found; + continue; + } + /* + * find a ready process that did run at an available core + * or one that has not moved for some time. + */ + if(p->mp == nil || p->mp->proc == nil || n>0) + goto found; + } + /* waste time or halt the CPU */ + if(!anyactive()) + idlehands(); + /* remember how much time we're here */ + now = perfticks(); + m->perf.inidle += now-start; + start = now; + n++; + goto loop; + +found: + assert(m->machno == 0); + splhi(); + /* + * find a core for this process, but honor wiring. + */ + mp = p->wired; + if(mp != nil){ + if(mp->proc != nil) + goto loop; + }else{ + for(i = 0; i < MACHMAX; i++){ + j = pickcore(p->color, i); + if((mp = sys->machptr[j]) != nil && mp->nixrole == NIXTC){ + if(isbooting(mp)) + continue; + if(mp != m && mp->proc == nil) + break; + } + } + if(i == MACHMAX){ + preemptfor(p); + goto loop; + } + } + + p = dequeueproc(&run, rq, p); + mp->proc = p; + if(p != nil){ + p->state = Scheding; + p->mp = mp; + } + + n = 0; + goto loop; +} + +/* + * SMP performs better than AMP with few cores. + * So, leave this here by now. We should probably + * write a unified version of runproc good enough for + * both SMP and AMP. + */ +static Proc* +smprunproc(void) +{ + Schedq *rq; + Proc *p; + ulong start, now; + int i; + + start = perfticks(); + run.preempts++; + +loop: + /* + * find a process that last ran on this processor (affinity), + * or one that hasn't moved in a while (load balancing). Every + * time around the loop affinity goes down. + */ + spllo(); + if(isbooting(m)) + tcquiesce(); + for(i = 0;; i++){ + /* + * find the highest priority target process that this + * processor can run given affinity constraints. + * + */ + for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--){ + for(p = rq->head; p; p = p->rnext){ + if(p->mp == nil || p->mp == sys->machptr[m->machno] + || (!p->wired && i > 0)) + goto found; + } + } + + /* waste time or halt the CPU */ + if(!anyactive()) + idlehands(); + if(isbooting(m)) + tcquiesce(); + /* remember how much time we're here */ + now = perfticks(); + m->perf.inidle += now-start; + start = now; + } + +found: + splhi(); + p = dequeueproc(&run, rq, p); + if(p == nil) + goto loop; + + p->state = Scheding; + p->mp = sys->machptr[m->machno]; + + if(edflock(p)){ + edfrun(p, rq == &run.runq[PriEdf]); /* start deadline timer and do admin */ + edfunlock(); + } + if(p->trace) + proctrace(p, SRun, 0); + ainc(&run.nrunning); + return p; +} + +/* + * pick a process to run. + * most of this is used in AMP sched. + * (on a quad core or less, we use SMP). + * In the case of core 0 we always return nil, but + * schedule the picked process at any other available TC. + * In the case of other cores we wait until a process is given + * by core 0. + */ +Proc* +runproc(void) +{ + Schedq *rq; + Proc *p; + ulong start, now; + + if(sys->nmach <= AMPmincores) + return smprunproc(); + + start = perfticks(); + run.preempts++; + rq = nil; + if(m->machno != 0){ + do{ + spllo(); + while(m->proc == nil){ + if(isbooting(m)){ + coherence(); + if(m->proc != nil) + break; + tcquiesce(); + } + //idlehands(); + waitwhile(&m->proc, (uintptr)nil); + } + now = perfticks(); + m->perf.inidle += now-start; + start = now; + splhi(); + p = m->proc; + }while(p == nil); + p->state = Scheding; + p->mp = sys->machptr[m->machno]; + + if(edflock(p)){ + edfrun(p, rq == &run.runq[PriEdf]); /* start deadline timer and do admin */ + edfunlock(); + } + if(p->trace) + proctrace(p, SRun, 0); + ainc(&run.nrunning); + return p; + } + + mach0sched(); + return nil; /* not reached */ +} + +int +canpage(Proc *p) +{ + int ok; + Sched *sch; + + splhi(); + sch = procsched(p); + lock(sch); + /* Only reliable way to see if we are Running */ + if(p->mach == 0) { + p->newtlb = 1; + ok = 1; + } + else + ok = 0; + unlock(sch); + spllo(); + + return ok; +} + +Proc* +newproc(void) +{ + Proc *p; + + p = psalloc(); + + p->state = Scheding; + p->psstate = "New"; + p->mach = 0; + p->qnext = 0; + p->nchild = 0; + p->nwait = 0; + p->waitq = 0; + p->parent = 0; + p->pgrp = 0; + p->egrp = 0; + p->fgrp = 0; + p->rgrp = 0; + p->pdbg = 0; + p->kp = 0; + if(up != nil && up->procctl == Proc_tracesyscall) + p->procctl = Proc_tracesyscall; + else + p->procctl = 0; + p->syscalltrace = nil; + p->notepending = 0; + p->ureg = 0; + p->privatemem = 0; + p->noswap = 0; + p->errstr = p->errbuf0; + p->syserrstr = p->errbuf1; + p->errbuf0[0] = '\0'; + p->errbuf1[0] = '\0'; + p->nlocks = 0; + p->delaysched = 0; + p->trace = 0; + kstrdup(&p->user, "*nouser"); + kstrdup(&p->text, "*notext"); + kstrdup(&p->args, ""); + p->nargs = 0; + p->setargs = 0; + memset(p->seg, 0, sizeof p->seg); + p->pid = incref(&pidalloc); + pshash(p); + p->noteid = incref(¬eidalloc); + if(p->pid <= 0 || p->noteid <= 0) + panic("pidalloc"); + if(p->kstack == 0) + p->kstack = smalloc(KSTACK); + + /* sched params */ + p->mp = 0; + p->wired = 0; + procpriority(p, PriNormal, 0); + p->cpu = 0; + p->lastupdate = sys->ticks*Scaling; + p->edf = nil; + + p->ntrap = 0; + p->nintr = 0; + p->nsyscall = 0; + p->nactrap = 0; + p->nacsyscall = 0; + p->nicc = 0; + p->actime = 0ULL; + p->tctime = 0ULL; + p->ac = nil; + p->nfullq = 0; + memset(&p->PMMU, 0, sizeof p->PMMU); + return p; +} + +/* + * wire this proc to a machine + */ +void +procwired(Proc *p, int bm) +{ + Proc *pp; + int i; + char nwired[MACHMAX]; + Mach *wm; + + if(bm < 0){ + /* pick a machine to wire to */ + memset(nwired, 0, sizeof(nwired)); + p->wired = 0; + for(i=0; (pp = psincref(i)) != nil; i++){ + wm = pp->wired; + if(wm && pp->pid) + nwired[wm->machno]++; + psdecref(pp); + } + bm = 0; + for(i=0; inmach; i++) + if(nwired[i] < nwired[bm]) + bm = i; + } else { + /* use the virtual machine requested */ + bm = bm % sys->nmach; + } + + p->wired = sys->machptr[bm]; + p->mp = p->wired; + + /* + * adjust our color to the new domain. + */ + if(up == nil || p != up) + return; + up->color = corecolor(up->mp->machno); + qlock(&up->seglock); + for(i = 0; i < NSEG; i++) + if(up->seg[i]) + up->seg[i]->color = up->color; + qunlock(&up->seglock); +} + +void +procpriority(Proc *p, int pri, int fixed) +{ + if(pri >= Npriq) + pri = Npriq - 1; + else if(pri < 0) + pri = 0; + p->basepri = pri; + p->priority = pri; + if(fixed){ + p->fixedpri = 1; + } else { + p->fixedpri = 0; + } +} + +/* + * sleep if a condition is not true. Another process will + * awaken us after it sets the condition. When we awaken + * the condition may no longer be true. + * + * we lock both the process and the rendezvous to keep r->p + * and p->r synchronized. + */ +void +sleep(Rendez *r, int (*f)(void*), void *arg) +{ + Mpl pl; + + pl = splhi(); + + if(up->nlocks) + print("process %d sleeps with %d locks held, last lock %#p locked at pc %#p, sleep called from %#p\n", + up->pid, up->nlocks, up->lastlock, up->lastlock->pc, getcallerpc(&r)); + lock(r); + lock(&up->rlock); + if(r->p){ + print("double sleep called from %#p, %d %d\n", + getcallerpc(&r), r->p->pid, up->pid); + dumpstack(); + } + + /* + * Wakeup only knows there may be something to do by testing + * r->p in order to get something to lock on. + * Flush that information out to memory in case the sleep is + * committed. + */ + r->p = up; + + if((*f)(arg) || up->notepending){ + /* + * if condition happened or a note is pending + * never mind + */ + r->p = nil; + unlock(&up->rlock); + unlock(r); + } else { + /* + * now we are committed to + * change state and call scheduler + */ + if(up->trace) + proctrace(up, SSleep, 0); + up->state = Wakeme; + up->r = r; + + /* statistics */ + m->cs++; + + procsave(up); + mmuflushtlb(m->pml4->pa); + if(setlabel(&up->sched)) { + /* + * here when the process is awakened + */ + procrestore(up); + } else { + /* + * here to go to sleep (i.e. stop Running) + */ + unlock(&up->rlock); + unlock(r); + gotolabel(&m->sched); + } + } + + if(up->notepending) { + up->notepending = 0; + splx(pl); + if(up->procctl == Proc_exitme && up->closingfgrp) + forceclosefgrp(); + error(Eintr); + } + + splx(pl); +} + +static int +tfn(void *arg) +{ + return up->trend == nil || up->tfn(arg); +} + +void +twakeup(Ureg*, Timer *t) +{ + Proc *p; + Rendez *trend; + + p = t->ta; + trend = p->trend; + p->trend = 0; + if(trend) + wakeup(trend); +} + +void +tsleep(Rendez *r, int (*fn)(void*), void *arg, long ms) +{ + if (up->tt){ + print("tsleep: timer active: mode %d, tf %#p\n", + up->tmode, up->tf); + timerdel(up); + } + up->tns = MS2NS(ms); + up->tf = twakeup; + up->tmode = Trelative; + up->ta = up; + up->trend = r; + up->tfn = fn; + timeradd(up); + + if(waserror()){ + timerdel(up); + nexterror(); + } + sleep(r, tfn, arg); + if (up->tt) + timerdel(up); + up->twhen = 0; + poperror(); +} + +/* + * Expects that only one process can call wakeup for any given Rendez. + * We hold both locks to ensure that r->p and p->r remain consistent. + * Richard Miller has a better solution that doesn't require both to + * be held simultaneously, but I'm a paranoid - presotto. + */ +Proc* +wakeup(Rendez *r) +{ + Mpl pl; + Proc *p; + + pl = splhi(); + + lock(r); + p = r->p; + + if(p != nil){ + lock(&p->rlock); + if(p->state != Wakeme || p->r != r) + panic("wakeup: state"); + r->p = nil; + p->r = nil; + ready(p); + unlock(&p->rlock); + } + unlock(r); + + splx(pl); + + return p; +} + +/* + * if waking a sleeping process, this routine must hold both + * p->rlock and r->lock. However, it can't know them in + * the same order as wakeup causing a possible lock ordering + * deadlock. We break the deadlock by giving up the p->rlock + * lock if we can't get the r->lock and retrying. + */ +int +postnote(Proc *p, int dolock, char *n, int flag) +{ + Mpl pl; + int ret; + Rendez *r; + Proc *d, **l; + + if(dolock) + qlock(&p->debug); + + if(flag != NUser && (p->notify == 0 || p->notified)) + p->nnote = 0; + + ret = 0; + if(p->nnote < NNOTE) { + strcpy(p->note[p->nnote].msg, n); + p->note[p->nnote++].flag = flag; + ret = 1; + } + p->notepending = 1; + + /* NIX */ + if(p->state == Exotic){ + /* it could be that the process is not running + * in the AC when we interrupt the AC, but then + * we'd only get an extra interrupt in the AC, and + * nothing should happen. + */ + intrac(p); + } + + if(dolock) + qunlock(&p->debug); + + /* this loop is to avoid lock ordering problems. */ + for(;;){ + pl = splhi(); + lock(&p->rlock); + r = p->r; + + /* waiting for a wakeup? */ + if(r == nil) + break; /* no */ + + /* try for the second lock */ + if(canlock(r)){ + if(p->state != Wakeme || r->p != p) + panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state); + p->r = nil; + r->p = nil; + ready(p); + unlock(r); + break; + } + + /* give other process time to get out of critical section and try again */ + unlock(&p->rlock); + splx(pl); + sched(); + } + unlock(&p->rlock); + splx(pl); + + if(p->state != Rendezvous){ + if(p->state == Semdown || p->state == Semalt) + ready(p); + return ret; + } + /* Try and pull out of a rendezvous */ + lock(p->rgrp); + if(p->state == Rendezvous) { + p->rendval = ~0; + l = &REND(p->rgrp, p->rendtag); + for(d = *l; d; d = d->rendhash) { + if(d == p) { + *l = p->rendhash; + break; + } + l = &d->rendhash; + } + ready(p); + } + unlock(p->rgrp); + return ret; +} + +/* + * weird thing: keep at most NBROKEN around + */ +#define NBROKEN 4 +struct +{ + QLock; + int n; + Proc *p[NBROKEN]; +}broken; + +void +addbroken(Proc *p) +{ + qlock(&broken); + if(broken.n == NBROKEN) { + ready(broken.p[0]); + memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1)); + --broken.n; + } + broken.p[broken.n++] = p; + qunlock(&broken); + + stopac(); + edfstop(up); + p->state = Broken; + p->psstate = 0; + sched(); +} + +void +unbreak(Proc *p) +{ + int b; + + qlock(&broken); + for(b=0; b < broken.n; b++) + if(broken.p[b] == p) { + broken.n--; + memmove(&broken.p[b], &broken.p[b+1], + sizeof(Proc*)*(NBROKEN-(b+1))); + ready(p); + break; + } + qunlock(&broken); +} + +int +freebroken(void) +{ + int i, n; + + qlock(&broken); + n = broken.n; + for(i=0; infullq > 0) + iprint(" %s=%d", up->text, up->nfullq); + if(0 && up->nicc > 0) + iprint(" [%s nicc %ud tctime %ulld actime %ulld]\n", + up->text, up->nicc, up->tctime, up->actime); + if(up->syscalltrace != nil) + free(up->syscalltrace); + up->syscalltrace = nil; + up->alarm = 0; + + if (up->tt) + timerdel(up); + if(up->trace) + proctrace(up, SDead, 0); + + /* nil out all the resources under lock (free later) */ + qlock(&up->debug); + fgrp = up->fgrp; + up->fgrp = nil; + egrp = up->egrp; + up->egrp = nil; + rgrp = up->rgrp; + up->rgrp = nil; + pgrp = up->pgrp; + up->pgrp = nil; + dot = up->dot; + up->dot = nil; + qunlock(&up->debug); + + + if(fgrp) + closefgrp(fgrp); + if(egrp) + closeegrp(egrp); + if(rgrp) + closergrp(rgrp); + if(dot) + cclose(dot); + if(pgrp) + closepgrp(pgrp); + + /* + * if not a kernel process and have a parent, + * do some housekeeping. + */ + if(up->kp == 0) { + p = up->parent; + if(p == 0) { + if(exitstr == 0) + exitstr = "unknown"; + panic("boot process died: %s", exitstr); + } + + while(waserror()) + ; + + wq = smalloc(sizeof(Waitq)); + poperror(); + + wq->w.pid = up->pid; + utime = up->time[TUser] + up->time[TCUser]; + stime = up->time[TSys] + up->time[TCSys]; + wq->w.time[TUser] = tk2ms(utime); + wq->w.time[TSys] = tk2ms(stime); + wq->w.time[TReal] = tk2ms(sys->ticks - up->time[TReal]); + if(exitstr && exitstr[0]) + snprint(wq->w.msg, sizeof(wq->w.msg), "%s %d: %s", + up->text, up->pid, exitstr); + else + wq->w.msg[0] = '\0'; + + lock(&p->exl); + /* + * Check that parent is still alive. + */ + if(p->pid == up->parentpid && p->state != Broken) { + p->nchild--; + p->time[TCUser] += utime; + p->time[TCSys] += stime; + /* + * If there would be more than 128 wait records + * processes for my parent, then don't leave a wait + * record behind. This helps prevent badly written + * daemon processes from accumulating lots of wait + * records. + */ + if(p->nwait < 128) { + wq->next = p->waitq; + p->waitq = wq; + p->nwait++; + wq = nil; + wakeup(&p->waitr); + } + } + unlock(&p->exl); + if(wq) + free(wq); + } + + if(!freemem) + addbroken(up); + + qlock(&up->seglock); + es = &up->seg[NSEG]; + for(s = up->seg; s < es; s++) { + if(*s) { + putseg(*s); + *s = 0; + } + } + qunlock(&up->seglock); + + lock(&up->exl); /* Prevent my children from leaving waits */ + psunhash(up); + up->pid = 0; + wakeup(&up->waitr); + unlock(&up->exl); + + for(f = up->waitq; f; f = next) { + next = f->next; + free(f); + } + + /* release debuggers */ + qlock(&up->debug); + if(up->pdbg) { + wakeup(&up->pdbg->sleep); + up->pdbg = 0; + } + qunlock(&up->debug); + + /* Sched must not loop for these locks */ + lock(&procalloc); + lock(&pga); + + stopac(); + edfstop(up); + up->state = Moribund; + sched(); + panic("pexit"); +} + +int +haswaitq(void *x) +{ + Proc *p; + + p = (Proc *)x; + return p->waitq != 0; +} + +int +pwait(Waitmsg *w) +{ + int cpid; + Waitq *wq; + + if(!canqlock(&up->qwaitr)) + error(Einuse); + + if(waserror()) { + qunlock(&up->qwaitr); + nexterror(); + } + + lock(&up->exl); + if(up->nchild == 0 && up->waitq == 0) { + unlock(&up->exl); + error(Enochild); + } + unlock(&up->exl); + + sleep(&up->waitr, haswaitq, up); + + lock(&up->exl); + wq = up->waitq; + up->waitq = wq->next; + up->nwait--; + unlock(&up->exl); + + qunlock(&up->qwaitr); + poperror(); + + if(w) + memmove(w, &wq->w, sizeof(Waitmsg)); + cpid = wq->w.pid; + free(wq); + + return cpid; +} + +void +dumpaproc(Proc *p) +{ + uintptr bss; + char *s; + + if(p == 0) + return; + + bss = 0; + if(p->seg[HSEG]) + bss = p->seg[HSEG]->top; + else if(p->seg[BSEG]) + bss = p->seg[BSEG]->top; + + s = p->psstate; + if(s == 0) + s = statename[p->state]; + print("%3d:%10s pc %#p dbgpc %#p %8s (%s) ut %ld st %ld bss %#p qpc %#p nl %d nd %ud lpc %#p pri %ud\n", + p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state], + p->time[0], p->time[1], bss, p->qpc, p->nlocks, + p->delaysched, p->lastlock ? p->lastlock->pc : 0, p->priority); +} + +void +procdump(void) +{ + int i; + Proc *p; + + if(up) + print("up %d\n", up->pid); + else + print("no current process\n"); + for(i=0; (p = psincref(i)) != nil; i++) { + if(p->state != Dead) + dumpaproc(p); + psdecref(p); + } +} + +/* + * wait till all processes have flushed their mmu + * state about segement s + */ +void +procflushseg(Segment *s) +{ + int i, ns, nm, nwait; + Proc *p; + Mach *mp; + + /* + * tell all processes with this + * segment to flush their mmu's + */ + nwait = 0; + for(i=0; (p = psincref(i)) != nil; i++) { + if(p->state == Dead){ + psdecref(p); + continue; + } + for(ns = 0; ns < NSEG; ns++){ + if(p->seg[ns] == s){ + p->newtlb = 1; + for(nm = 0; nm < MACHMAX; nm++) + if((mp = sys->machptr[nm]) != nil && mp->nixrole != NIXUC) + if(mp->proc == p){ + mp->mmuflush = 1; + nwait++; + } + break; + } + } + psdecref(p); + } + + if(nwait == 0) + return; + + /* + * wait for all processors to take a clock interrupt + * and flush their mmu's. + * NIX BUG: this won't work if another core is in AC mode. + * In that case we must IPI it, but only if that core is + * using this segment. + */ + for(i = 0; i < MACHMAX; i++) + if((mp = sys->machptr[i]) != nil && mp->nixrole != NIXUC) + if(mp != m) + while(mp->mmuflush) + sched(); +} + +void +scheddump(void) +{ + Proc *p; + Schedq *rq; + + for(rq = &run.runq[Nrq-1]; rq >= run.runq; rq--){ + if(rq->head == 0) + continue; + print("run[%ld]:", rq-run.runq); + for(p = rq->head; p; p = p->rnext) + print(" %d(%lud)", p->pid, m->ticks - p->readytime); + print("\n"); + delay(150); + } + print("nrdy %d\n", run.nrdy); +} + +void +kproc(char *name, void (*func)(void *), void *arg) +{ + Proc *p; + static Pgrp *kpgrp; + + p = newproc(); + p->psstate = 0; + p->procmode = 0640; + p->kp = 1; + p->noswap = 1; + + p->scallnr = up->scallnr; + memmove(p->arg, up->arg, sizeof(up->arg)); + p->nerrlab = 0; + p->slash = up->slash; + p->dot = up->dot; + if(p->dot) + incref(p->dot); + + memmove(p->note, up->note, sizeof(p->note)); + p->nnote = up->nnote; + p->notified = 0; + p->lastnote = up->lastnote; + p->notify = up->notify; + p->ureg = 0; + p->dbgreg = 0; + + procpriority(p, PriKproc, 0); + + kprocchild(p, func, arg); + + kstrdup(&p->user, eve); + kstrdup(&p->text, name); + if(kpgrp == 0) + kpgrp = newpgrp(); + p->pgrp = kpgrp; + incref(kpgrp); + + memset(p->time, 0, sizeof(p->time)); + p->time[TReal] = sys->ticks; + ready(p); + /* + * since the bss/data segments are now shareable, + * any mmu info about this process is now stale + * and has to be discarded. + */ + p->newtlb = 1; + mmuflush(); +} + +/* + * called splhi() by notify(). See comment in notify for the + * reasoning. + */ +void +procctl(Proc *p) +{ + Mpl pl; + char *state; + + switch(p->procctl) { + case Proc_exitbig: + spllo(); + pexit("Killed: Insufficient physical memory", 1); + + case Proc_exitme: + spllo(); /* pexit has locks in it */ + pexit("Killed", 1); + + case Proc_traceme: + if(p->nnote == 0) + return; + /* No break */ + + case Proc_stopme: + p->procctl = 0; + state = p->psstate; + p->psstate = "Stopped"; + /* free a waiting debugger */ + pl = spllo(); + qlock(&p->debug); + if(p->pdbg) { + wakeup(&p->pdbg->sleep); + p->pdbg = 0; + } + qunlock(&p->debug); + splhi(); + p->state = Stopped; + sched(); + p->psstate = state; + splx(pl); + return; + + case Proc_toac: + p->procctl = 0; + /* + * This pretends to return from the system call, + * by moving to a core, but never returns (unless + * the process gets moved back to a TC.) + */ + spllo(); + if(p->ac == nil) + getac(p, -1); + runacore(); + return; + + case Proc_totc: + p->procctl = 0; + if(p != up) + panic("procctl: stopac: p != up"); + spllo(); + stopac(); + return; + } +} + +void +error(char *err) +{ + spllo(); + + assert(up->nerrlab < NERR); + kstrcpy(up->errstr, err, ERRMAX); + setlabel(&up->errlab[NERR-1]); + nexterror(); +} + +void +nexterror(void) +{ + gotolabel(&up->errlab[--up->nerrlab]); +} + +void +exhausted(char *resource) +{ + char buf[ERRMAX]; + + sprint(buf, "no free %s", resource); + iprint("%s\n", buf); + error(buf); +} + +void +killbig(char *why) +{ + int i, x; + Segment *s; + ulong l, max; + Proc *p, *kp; + + max = 0; + kp = nil; + for(x = 0; (p = psincref(x)) != nil; x++) { + if(p->state == Dead || p->kp){ + psdecref(p); + continue; + } + l = 0; + for(i=1; iseg[i]; + if(s != 0) + l += s->top - s->base; + } + if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) { + if(kp != nil) + psdecref(kp); + kp = p; + max = l; + } + else + psdecref(p); + } + if(kp == nil) + return; + + print("%d: %s killed: %s\n", kp->pid, kp->text, why); + for(x = 0; (p = psincref(x)) != nil; x++) { + if(p->state == Dead || p->kp){ + psdecref(p); + continue; + } + if(p != kp && p->seg[BSEG] && p->seg[BSEG] == kp->seg[BSEG]) + p->procctl = Proc_exitbig; + psdecref(p); + } + + kp->procctl = Proc_exitbig; + for(i = 0; i < NSEG; i++) { + s = kp->seg[i]; + if(s != 0 && canqlock(&s->lk)) { + mfreeseg(s, s->base, (s->top - s->base)/BIGPGSZ); + qunlock(&s->lk); + } + } + psdecref(kp); +} + +/* + * change ownership to 'new' of all processes owned by 'old'. Used when + * eve changes. + */ +void +renameuser(char *old, char *new) +{ + int i; + Proc *p; + + for(i = 0; (p = psincref(i)) != nil; i++){ + if(p->user!=nil && strcmp(old, p->user)==0) + kstrdup(&p->user, new); + psdecref(p); + } +} + +/* + * time accounting called by clock() splhi'd + * only cpu1 computes system load average + * but the system load average is accounted for cpu0. + */ +void +accounttime(void) +{ + Proc *p; + ulong n, per; + + p = m->proc; + if(p) { + if(m->machno == 1) + run.nrunhz++; + p->time[p->insyscall]++; + } + + /* calculate decaying duty cycles */ + n = perfticks(); + per = n - m->perf.last; + m->perf.last = n; + per = (m->perf.period*(HZ-1) + per)/HZ; + if(per != 0) + m->perf.period = per; + + m->perf.avg_inidle = (m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ; + m->perf.inidle = 0; + + m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ; + m->perf.inintr = 0; + + /* only one processor gets to compute system load averages. + * it has to be mach 1 when we use AMP. + */ + if(sys->nmach > 1 && m->machno != 1) + return; + + /* + * calculate decaying load average. + * if we decay by (n-1)/n then it takes + * n clock ticks to go from load L to .36 L once + * things quiet down. it takes about 5 n clock + * ticks to go to zero. so using HZ means this is + * approximately the load over the last second, + * with a tail lasting about 5 seconds. + */ + n = run.nrunhz; + run.nrunhz = 0; + n = (run.nrdy+n)*1000; + sys->load = (sys->load*(HZ-1)+n)/HZ; +} + +void +halt(void) +{ + if(run.nrdy != 0) + return; + hardhalt(); +} diff -Nru 0/sys/src/nix/port/ps.c 4/sys/src/nix/port/ps.c --- 0/sys/src/nix/port/ps.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/ps.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,127 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +int +nprocs(void) +{ + return procalloc.nproc; +} + +void +pshash(Proc *p) +{ + int h; + + h = p->pid % nelem(procalloc.ht); + lock(&procalloc); + p->pidhash = procalloc.ht[h]; + procalloc.ht[h] = p; + unlock(&procalloc); +} + +void +psunhash(Proc *p) +{ + int h; + Proc **l; + + h = p->pid % nelem(procalloc.ht); + lock(&procalloc); + for(l = &procalloc.ht[h]; *l != nil; l = &(*l)->pidhash) + if(*l == p){ + *l = p->pidhash; + break; + } + unlock(&procalloc); +} + +int +psindex(int pid) +{ + Proc *p; + int h; + int s; + + s = -1; + h = pid % nelem(procalloc.ht); + lock(&procalloc); + for(p = procalloc.ht[h]; p != nil; p = p->pidhash) + if(p->pid == pid){ + s = p->index; + break; + } + unlock(&procalloc); + return s; +} + +Proc* +psincref(int i) +{ + /* + * Placeholder. + */ + if(i >= conf.nproc) + return nil; + return &procalloc.arena[i]; +} + +void +psdecref(Proc *p) +{ + /* + * Placeholder. + */ + USED(p); +} + +void +psrelease(Proc* p) +{ + p->qnext = procalloc.free; + procalloc.free = p; + procalloc.nproc--; +} + +Proc* +psalloc(void) +{ + Proc *p; + + lock(&procalloc); + for(;;) { + if(p = procalloc.free) + break; + + unlock(&procalloc); + resrcwait("no procs"); + lock(&procalloc); + } + procalloc.free = p->qnext; + procalloc.nproc++; + unlock(&procalloc); + + return p; +} + +void +psinit(int nproc) +{ + Proc *p; + int i; + + procalloc.free = malloc(nproc*sizeof(Proc)); + if(procalloc.free == nil) + panic("cannot allocate %ud procs (%udMB)\n", nproc, nproc*sizeof(Proc)/(1024*1024)); + procalloc.arena = procalloc.free; + + p = procalloc.free; + for(i=0; iqnext = p+1; + p->index = i; + } + p->qnext = 0; + p->index = i; +} diff -Nru 0/sys/src/nix/port/qio.c 4/sys/src/nix/port/qio.c --- 0/sys/src/nix/port/qio.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/qio.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1543 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +static ulong padblockcnt; +static ulong concatblockcnt; +static ulong pullupblockcnt; +static ulong copyblockcnt; +static ulong consumecnt; +static ulong producecnt; +static ulong qcopycnt; + +static int debugging; + +#define QDEBUG if(0) + +/* + * IO queues + */ +typedef struct Queue Queue; + +struct Queue +{ + Lock; + + Block* bfirst; /* buffer */ + Block* blast; + + int len; /* bytes allocated to queue */ + int dlen; /* data bytes in queue */ + int limit; /* max bytes in queue */ + int inilim; /* initial limit */ + int state; + int noblock; /* true if writes return immediately when q full */ + int eof; /* number of eofs read by user */ + + void (*kick)(void*); /* restart output */ + void (*bypass)(void*, Block*); /* bypass queue altogether */ + void* arg; /* argument to kick */ + + QLock rlock; /* mutex for reading processes */ + Rendez rr; /* process waiting to read */ + QLock wlock; /* mutex for writing processes */ + Rendez wr; /* process waiting to write */ + + char err[ERRMAX]; +}; + +enum +{ + Maxatomic = 64*1024, +}; + +uint qiomaxatomic = Maxatomic; + +void +ixsummary(void) +{ + debugging ^= 1; + iallocsummary(); + print("pad %lud, concat %lud, pullup %lud, copy %lud\n", + padblockcnt, concatblockcnt, pullupblockcnt, copyblockcnt); + print("consume %lud, produce %lud, qcopy %lud\n", + consumecnt, producecnt, qcopycnt); +} + +/* + * free a list of blocks + */ +void +freeblist(Block *b) +{ + Block *next; + + for(; b != 0; b = next){ + next = b->next; + b->next = 0; + freeb(b); + } +} + +/* + * pad a block to the front (or the back if size is negative) + */ +Block* +padblock(Block *bp, int size) +{ + int n; + Block *nbp; + + QDEBUG checkb(bp, "padblock 1"); + if(size >= 0){ + if(bp->rp - bp->base >= size){ + bp->rp -= size; + return bp; + } + + if(bp->next) + panic("padblock %#p", getcallerpc(&bp)); + n = BLEN(bp); + padblockcnt++; + nbp = allocb(size+n); + nbp->rp += size; + nbp->wp = nbp->rp; + memmove(nbp->wp, bp->rp, n); + nbp->wp += n; + freeb(bp); + nbp->rp -= size; + } else { + size = -size; + + if(bp->next) + panic("padblock %#p", getcallerpc(&bp)); + + if(bp->lim - bp->wp >= size) + return bp; + + n = BLEN(bp); + padblockcnt++; + nbp = allocb(size+n); + memmove(nbp->wp, bp->rp, n); + nbp->wp += n; + freeb(bp); + } + QDEBUG checkb(nbp, "padblock 1"); + return nbp; +} + +/* + * return count of bytes in a string of blocks + */ +int +blocklen(Block *bp) +{ + int len; + + len = 0; + while(bp) { + len += BLEN(bp); + bp = bp->next; + } + return len; +} + +/* + * return count of space in blocks + */ +int +blockalloclen(Block *bp) +{ + int len; + + len = 0; + while(bp) { + len += BALLOC(bp); + bp = bp->next; + } + return len; +} + +/* + * copy the string of blocks into + * a single block and free the string + */ +Block* +concatblock(Block *bp) +{ + int len; + Block *nb, *f; + + if(bp->next == 0) + return bp; + + nb = allocb(blocklen(bp)); + for(f = bp; f; f = f->next) { + len = BLEN(f); + memmove(nb->wp, f->rp, len); + nb->wp += len; + } + concatblockcnt += BLEN(nb); + freeblist(bp); + QDEBUG checkb(nb, "concatblock 1"); + return nb; +} + +/* + * make sure the first block has at least n bytes + */ +Block* +pullupblock(Block *bp, int n) +{ + int i; + Block *nbp; + + /* + * this should almost always be true, it's + * just to avoid every caller checking. + */ + if(BLEN(bp) >= n) + return bp; + + /* + * if not enough room in the first block, + * add another to the front of the list. + */ + if(bp->lim - bp->rp < n){ + nbp = allocb(n); + nbp->next = bp; + bp = nbp; + } + + /* + * copy bytes from the trailing blocks into the first + */ + n -= BLEN(bp); + while(nbp = bp->next){ + i = BLEN(nbp); + if(i > n) { + memmove(bp->wp, nbp->rp, n); + pullupblockcnt++; + bp->wp += n; + nbp->rp += n; + QDEBUG checkb(bp, "pullupblock 1"); + return bp; + } else { + /* shouldn't happen but why crash if it does */ + if(i < 0){ + print("pullupblock -ve length, from %#p\n", + getcallerpc(&bp)); + i = 0; + } + memmove(bp->wp, nbp->rp, i); + pullupblockcnt++; + bp->wp += i; + bp->next = nbp->next; + nbp->next = 0; + freeb(nbp); + n -= i; + if(n == 0){ + QDEBUG checkb(bp, "pullupblock 2"); + return bp; + } + } + } + freeb(bp); + return 0; +} + +/* + * make sure the first block has at least n bytes + */ +Block* +pullupqueue(Queue *q, int n) +{ + Block *b; + + if(BLEN(q->bfirst) >= n) + return q->bfirst; + q->bfirst = pullupblock(q->bfirst, n); + for(b = q->bfirst; b != nil && b->next != nil; b = b->next) + ; + q->blast = b; + return q->bfirst; +} + +/* + * trim to len bytes starting at offset + */ +Block * +trimblock(Block *bp, int offset, int len) +{ + long l; + Block *nb, *startb; + + QDEBUG checkb(bp, "trimblock 1"); + if(blocklen(bp) < offset+len) { + freeblist(bp); + return nil; + } + + while((l = BLEN(bp)) < offset) { + offset -= l; + nb = bp->next; + bp->next = nil; + freeb(bp); + bp = nb; + } + + startb = bp; + bp->rp += offset; + + while((l = BLEN(bp)) < len) { + len -= l; + bp = bp->next; + } + + bp->wp -= (BLEN(bp) - len); + + if(bp->next) { + freeblist(bp->next); + bp->next = nil; + } + + return startb; +} + +/* + * copy 'count' bytes into a new block + */ +Block* +copyblock(Block *bp, int count) +{ + int l; + Block *nbp; + + QDEBUG checkb(bp, "copyblock 0"); + if(bp->flag & BINTR){ + nbp = iallocb(count); + if(nbp == nil) + return nil; + }else + nbp = allocb(count); + for(; count > 0 && bp != 0; bp = bp->next){ + l = BLEN(bp); + if(l > count) + l = count; + memmove(nbp->wp, bp->rp, l); + nbp->wp += l; + count -= l; + } + if(count > 0){ + memset(nbp->wp, 0, count); + nbp->wp += count; + } + copyblockcnt++; + QDEBUG checkb(nbp, "copyblock 1"); + + return nbp; +} + +Block* +adjustblock(Block* bp, int len) +{ + int n; + Block *nbp; + + if(len < 0){ + freeb(bp); + return nil; + } + + if(bp->rp+len > bp->lim){ + nbp = copyblock(bp, len); + freeblist(bp); + QDEBUG checkb(nbp, "adjustblock 1"); + + return nbp; + } + + n = BLEN(bp); + if(len > n) + memset(bp->wp, 0, len-n); + bp->wp = bp->rp+len; + QDEBUG checkb(bp, "adjustblock 2"); + + return bp; +} + + +/* + * throw away up to count bytes from a + * list of blocks. Return count of bytes + * thrown away. + */ +int +pullblock(Block **bph, int count) +{ + Block *bp; + int n, bytes; + + bytes = 0; + if(bph == nil) + return 0; + + while(*bph != nil && count != 0) { + bp = *bph; + n = BLEN(bp); + if(count < n) + n = count; + bytes += n; + count -= n; + bp->rp += n; + QDEBUG checkb(bp, "pullblock "); + if(BLEN(bp) == 0) { + *bph = bp->next; + bp->next = nil; + freeb(bp); + } + } + return bytes; +} + +/* + * get next block from a queue, return null if nothing there + */ +Block* +qget(Queue *q) +{ + int dowakeup; + Block *b; + + /* sync with qwrite */ + ilock(q); + + b = q->bfirst; + if(b == nil){ + q->state |= Qstarve; + iunlock(q); + return nil; + } + q->bfirst = b->next; + b->next = 0; + q->len -= BALLOC(b); + q->dlen -= BLEN(b); + QDEBUG checkb(b, "qget"); + + /* if writer flow controlled, restart */ + if((q->state & Qflow) && q->len < q->limit/2){ + q->state &= ~Qflow; + dowakeup = 1; + } else + dowakeup = 0; + + iunlock(q); + + if(dowakeup) + wakeup(&q->wr); + + return b; +} + +/* + * throw away the next 'len' bytes in the queue + */ +int +qdiscard(Queue *q, int len) +{ + Block *b; + int dowakeup, n, sofar; + + ilock(q); + for(sofar = 0; sofar < len; sofar += n){ + b = q->bfirst; + if(b == nil) + break; + QDEBUG checkb(b, "qdiscard"); + n = BLEN(b); + if(n <= len - sofar){ + q->bfirst = b->next; + b->next = 0; + q->len -= BALLOC(b); + q->dlen -= BLEN(b); + freeb(b); + } else { + n = len - sofar; + b->rp += n; + q->dlen -= n; + } + } + + /* + * if writer flow controlled, restart + * + * This used to be + * q->len < q->limit/2 + * but it slows down tcp too much for certain write sizes. + * I really don't understand it completely. It may be + * due to the queue draining so fast that the transmission + * stalls waiting for the app to produce more data. - presotto + * + * changed back from q->len < q->limit for reno tcp. - jmk + */ + if((q->state & Qflow) && q->len < q->limit/2){ + q->state &= ~Qflow; + dowakeup = 1; + } else + dowakeup = 0; + + iunlock(q); + + if(dowakeup) + wakeup(&q->wr); + + return sofar; +} + +/* + * Interrupt level copy out of a queue, return # bytes copied. + */ +int +qconsume(Queue *q, void *vp, int len) +{ + Block *b; + int n, dowakeup; + uchar *p = vp; + Block *tofree = nil; + + /* sync with qwrite */ + ilock(q); + + for(;;) { + b = q->bfirst; + if(b == 0){ + q->state |= Qstarve; + iunlock(q); + return -1; + } + QDEBUG checkb(b, "qconsume 1"); + + n = BLEN(b); + if(n > 0) + break; + q->bfirst = b->next; + q->len -= BALLOC(b); + + /* remember to free this */ + b->next = tofree; + tofree = b; + }; + + if(n < len) + len = n; + memmove(p, b->rp, len); + consumecnt += n; + b->rp += len; + q->dlen -= len; + + /* discard the block if we're done with it */ + if((q->state & Qmsg) || len == n){ + q->bfirst = b->next; + b->next = 0; + q->len -= BALLOC(b); + q->dlen -= BLEN(b); + + /* remember to free this */ + b->next = tofree; + tofree = b; + } + + /* if writer flow controlled, restart */ + if((q->state & Qflow) && q->len < q->limit/2){ + q->state &= ~Qflow; + dowakeup = 1; + } else + dowakeup = 0; + + iunlock(q); + + if(dowakeup) + wakeup(&q->wr); + + if(tofree != nil) + freeblist(tofree); + + return len; +} + +int +qpass(Queue *q, Block *b) +{ + int dlen, len, dowakeup; + + /* sync with qread */ + dowakeup = 0; + ilock(q); + if(q->len >= q->limit){ + freeblist(b); + iunlock(q); + return -1; + } + if(q->state & Qclosed){ + len = BALLOC(b); + freeblist(b); + iunlock(q); + return len; + } + + /* add buffer to queue */ + if(q->bfirst) + q->blast->next = b; + else + q->bfirst = b; + len = BALLOC(b); + dlen = BLEN(b); + QDEBUG checkb(b, "qpass"); + while(b->next){ + b = b->next; + QDEBUG checkb(b, "qpass"); + len += BALLOC(b); + dlen += BLEN(b); + } + q->blast = b; + q->len += len; + q->dlen += dlen; + + if(q->len >= q->limit/2) + q->state |= Qflow; + + if(q->state & Qstarve){ + q->state &= ~Qstarve; + dowakeup = 1; + } + iunlock(q); + + if(dowakeup) + wakeup(&q->rr); + + return len; +} + +int +qpassnolim(Queue *q, Block *b) +{ + int dlen, len, dowakeup; + + /* sync with qread */ + dowakeup = 0; + ilock(q); + + if(q->state & Qclosed){ + len = BALLOC(b); + freeblist(b); + iunlock(q); + return len; + } + + /* add buffer to queue */ + if(q->bfirst) + q->blast->next = b; + else + q->bfirst = b; + len = BALLOC(b); + dlen = BLEN(b); + QDEBUG checkb(b, "qpass"); + while(b->next){ + b = b->next; + QDEBUG checkb(b, "qpass"); + len += BALLOC(b); + dlen += BLEN(b); + } + q->blast = b; + q->len += len; + q->dlen += dlen; + + if(q->len >= q->limit/2) + q->state |= Qflow; + + if(q->state & Qstarve){ + q->state &= ~Qstarve; + dowakeup = 1; + } + iunlock(q); + + if(dowakeup) + wakeup(&q->rr); + + return len; +} + +/* + * if the allocated space is way out of line with the used + * space, reallocate to a smaller block + */ +Block* +packblock(Block *bp) +{ + Block **l, *nbp; + int n; + + for(l = &bp; *l; l = &(*l)->next){ + nbp = *l; + n = BLEN(nbp); + if((n<<2) < BALLOC(nbp)){ + *l = allocb(n); + memmove((*l)->wp, nbp->rp, n); + (*l)->wp += n; + (*l)->next = nbp->next; + freeb(nbp); + } + } + + return bp; +} + +int +qproduce(Queue *q, void *vp, int len) +{ + Block *b; + int dowakeup; + uchar *p = vp; + + /* sync with qread */ + dowakeup = 0; + ilock(q); + + /* no waiting receivers, room in buffer? */ + if(q->len >= q->limit){ + q->state |= Qflow; + iunlock(q); + return -1; + } + + /* save in buffer */ + b = iallocb(len); + if(b == 0){ + iunlock(q); + return 0; + } + memmove(b->wp, p, len); + producecnt += len; + b->wp += len; + if(q->bfirst) + q->blast->next = b; + else + q->bfirst = b; + q->blast = b; + /* b->next = 0; done by iallocb() */ + q->len += BALLOC(b); + q->dlen += BLEN(b); + QDEBUG checkb(b, "qproduce"); + + if(q->state & Qstarve){ + q->state &= ~Qstarve; + dowakeup = 1; + } + + if(q->len >= q->limit) + q->state |= Qflow; + iunlock(q); + + if(dowakeup) + wakeup(&q->rr); + + return len; +} + +/* + * copy from offset in the queue + */ +Block* +qcopy(Queue *q, int len, ulong offset) +{ + int sofar; + int n; + Block *b, *nb; + uchar *p; + + nb = allocb(len); + + ilock(q); + + /* go to offset */ + b = q->bfirst; + for(sofar = 0; ; sofar += n){ + if(b == nil){ + iunlock(q); + return nb; + } + n = BLEN(b); + if(sofar + n > offset){ + p = b->rp + offset - sofar; + n -= offset - sofar; + break; + } + QDEBUG checkb(b, "qcopy"); + b = b->next; + } + + /* copy bytes from there */ + for(sofar = 0; sofar < len;){ + if(n > len - sofar) + n = len - sofar; + memmove(nb->wp, p, n); + qcopycnt += n; + sofar += n; + nb->wp += n; + b = b->next; + if(b == nil) + break; + n = BLEN(b); + p = b->rp; + } + iunlock(q); + + return nb; +} + +/* + * called by non-interrupt code + */ +Queue* +qopen(int limit, int msg, void (*kick)(void*), void *arg) +{ + Queue *q; + + q = malloc(sizeof(Queue)); + if(q == 0) + return 0; + + q->limit = q->inilim = limit; + q->kick = kick; + q->arg = arg; + q->state = msg; + + q->state |= Qstarve; + q->eof = 0; + q->noblock = 0; + + return q; +} + +/* open a queue to be bypassed */ +Queue* +qbypass(void (*bypass)(void*, Block*), void *arg) +{ + Queue *q; + + q = malloc(sizeof(Queue)); + if(q == 0) + return 0; + + q->limit = 0; + q->arg = arg; + q->bypass = bypass; + q->state = 0; + + return q; +} + +static int +notempty(void *a) +{ + Queue *q = a; + + return (q->state & Qclosed) || q->bfirst != 0; +} + +/* + * wait for the queue to be non-empty or closed. + * called with q ilocked. + */ +static int +qwait(Queue *q) +{ + /* wait for data */ + for(;;){ + if(q->bfirst != nil) + break; + + if(q->state & Qclosed){ + if(++q->eof > 3) + return -1; + if(*q->err && strcmp(q->err, Ehungup) != 0) + return -1; + return 0; + } + + q->state |= Qstarve; /* flag requesting producer to wake me */ + iunlock(q); + sleep(&q->rr, notempty, q); + ilock(q); + } + return 1; +} + +/* + * add a block list to a queue + */ +void +qaddlist(Queue *q, Block *b) +{ + /* queue the block */ + if(q->bfirst) + q->blast->next = b; + else + q->bfirst = b; + q->len += blockalloclen(b); + q->dlen += blocklen(b); + while(b->next) + b = b->next; + q->blast = b; +} + +/* + * called with q ilocked + */ +Block* +qremove(Queue *q) +{ + Block *b; + + b = q->bfirst; + if(b == nil) + return nil; + q->bfirst = b->next; + b->next = nil; + q->dlen -= BLEN(b); + q->len -= BALLOC(b); + QDEBUG checkb(b, "qremove"); + return b; +} + +/* + * copy the contents of a string of blocks into + * memory. emptied blocks are freed. return + * pointer to first unconsumed block. + */ +Block* +bl2mem(uchar *p, Block *b, int n) +{ + int i; + Block *next; + + for(; b != nil; b = next){ + i = BLEN(b); + if(i > n){ + memmove(p, b->rp, n); + b->rp += n; + return b; + } + memmove(p, b->rp, i); + n -= i; + p += i; + b->rp += i; + next = b->next; + freeb(b); + } + return nil; +} + +/* + * copy the contents of memory into a string of blocks. + * return nil on error. + */ +Block* +mem2bl(uchar *p, int len) +{ + int n; + Block *b, *first, **l; + + first = nil; + l = &first; + if(waserror()){ + freeblist(first); + nexterror(); + } + do { + n = len; + if(n > Maxatomic) + n = Maxatomic; + + *l = b = allocb(n); + setmalloctag(b, (up->text[0]<<24)|(up->text[1]<<16)|(up->text[2]<<8)|up->text[3]); + memmove(b->wp, p, n); + b->wp += n; + p += n; + len -= n; + l = &b->next; + } while(len > 0); + poperror(); + + return first; +} + +/* + * put a block back to the front of the queue + * called with q ilocked + */ +void +qputback(Queue *q, Block *b) +{ + b->next = q->bfirst; + if(q->bfirst == nil) + q->blast = b; + q->bfirst = b; + q->len += BALLOC(b); + q->dlen += BLEN(b); +} + +/* + * flow control, get producer going again + * called with q ilocked + */ +static void +qwakeup_iunlock(Queue *q) +{ + int dowakeup; + + /* if writer flow controlled, restart */ + if((q->state & Qflow) && q->len < q->limit/2){ + q->state &= ~Qflow; + dowakeup = 1; + } + else + dowakeup = 0; + + iunlock(q); + + /* wakeup flow controlled writers */ + if(dowakeup){ + if(q->kick) + q->kick(q->arg); + wakeup(&q->wr); + } +} + +/* + * get next block from a queue (up to a limit) + */ +Block* +qbread(Queue *q, int len) +{ + Block *b, *nb; + int n; + + qlock(&q->rlock); + if(waserror()){ + qunlock(&q->rlock); + nexterror(); + } + + ilock(q); + switch(qwait(q)){ + case 0: + /* queue closed */ + iunlock(q); + qunlock(&q->rlock); + poperror(); + return nil; + case -1: + /* multiple reads on a closed queue */ + iunlock(q); + error(q->err); + } + + /* if we get here, there's at least one block in the queue */ + b = qremove(q); + n = BLEN(b); + + /* split block if it's too big and this is not a message queue */ + nb = b; + if(n > len){ + if((q->state&Qmsg) == 0){ + n -= len; + b = allocb(n); + memmove(b->wp, nb->rp+len, n); + b->wp += n; + qputback(q, b); + } + nb->wp = nb->rp + len; + } + + /* restart producer */ + qwakeup_iunlock(q); + + poperror(); + qunlock(&q->rlock); + return nb; +} + +/* + * read a queue. if no data is queued, post a Block + * and wait on its Rendez. + */ +long +qread(Queue *q, void *vp, int len) +{ + Block *b, *first, **l; + int blen, n; + + qlock(&q->rlock); + if(waserror()){ + qunlock(&q->rlock); + nexterror(); + } + + ilock(q); +again: + switch(qwait(q)){ + case 0: + /* queue closed */ + iunlock(q); + qunlock(&q->rlock); + poperror(); + return 0; + case -1: + /* multiple reads on a closed queue */ + iunlock(q); + error(q->err); + } + + /* if we get here, there's at least one block in the queue */ + if(q->state & Qcoalesce){ + /* when coalescing, 0 length blocks just go away */ + b = q->bfirst; + if(BLEN(b) <= 0){ + freeb(qremove(q)); + goto again; + } + + /* grab the first block plus as many + * following blocks as will completely + * fit in the read. + */ + n = 0; + l = &first; + blen = BLEN(b); + for(;;) { + *l = qremove(q); + l = &b->next; + n += blen; + + b = q->bfirst; + if(b == nil) + break; + blen = BLEN(b); + if(n+blen > len) + break; + } + } else { + first = qremove(q); + n = BLEN(first); + } + + /* copy to user space outside of the ilock */ + iunlock(q); + b = bl2mem(vp, first, len); + ilock(q); + + /* take care of any left over partial block */ + if(b != nil){ + n -= BLEN(b); + if(q->state & Qmsg) + freeb(b); + else + qputback(q, b); + } + + /* restart producer */ + qwakeup_iunlock(q); + + poperror(); + qunlock(&q->rlock); + return n; +} + +static int +qnotfull(void *a) +{ + Queue *q = a; + + return q->len < q->limit || (q->state & Qclosed); +} + +ulong noblockcnt; + +/* + * add a block to a queue obeying flow control + */ +long +qbwrite(Queue *q, Block *b) +{ + int n, dowakeup; + + n = BLEN(b); + + if(q->bypass){ + (*q->bypass)(q->arg, b); + return n; + } + + dowakeup = 0; + qlock(&q->wlock); + if(waserror()){ + if(b != nil) + freeb(b); + qunlock(&q->wlock); + nexterror(); + } + + ilock(q); + + /* give up if the queue is closed */ + if(q->state & Qclosed){ + iunlock(q); + error(q->err); + } + + /* if nonblocking, don't queue over the limit */ + if(q->len >= q->limit){ + if(q->noblock){ + iunlock(q); + freeb(b); + noblockcnt += n; + qunlock(&q->wlock); + poperror(); + return n; + } + } + + /* queue the block */ + if(q->bfirst) + q->blast->next = b; + else + q->bfirst = b; + q->blast = b; + b->next = 0; + q->len += BALLOC(b); + q->dlen += n; + QDEBUG checkb(b, "qbwrite"); + b = nil; + + /* make sure other end gets awakened */ + if(q->state & Qstarve){ + q->state &= ~Qstarve; + dowakeup = 1; + } + iunlock(q); + + /* get output going again */ + if(q->kick && (dowakeup || (q->state&Qkick))) + q->kick(q->arg); + + /* wakeup anyone consuming at the other end */ + if(dowakeup) + wakeup(&q->rr); + + /* + * flow control, wait for queue to get below the limit + * before allowing the process to continue and queue + * more. We do this here so that postnote can only + * interrupt us after the data has been queued. This + * means that things like 9p flushes and ssl messages + * will not be disrupted by software interrupts. + * + * Note - this is moderately dangerous since a process + * that keeps getting interrupted and rewriting will + * queue infinite crud. + */ + for(;;){ + if(q->noblock || qnotfull(q)) + break; + + ilock(q); + q->state |= Qflow; + iunlock(q); + sleep(&q->wr, qnotfull, q); + } + USED(b); + + qunlock(&q->wlock); + poperror(); + return n; +} + +/* + * write to a queue. only Maxatomic bytes at a time is atomic. + */ +int +qwrite(Queue *q, void *vp, int len) +{ + int n, sofar; + Block *b; + uchar *p = vp; + + QDEBUG if(!islo()) + print("qwrite hi %#p\n", getcallerpc(&q)); + + sofar = 0; + do { + n = len-sofar; + if(n > Maxatomic) + n = Maxatomic; + + b = allocb(n); + setmalloctag(b, (up->text[0]<<24)|(up->text[1]<<16)|(up->text[2]<<8)|up->text[3]); + if(waserror()){ + freeb(b); + nexterror(); + } + memmove(b->wp, p+sofar, n); + poperror(); + b->wp += n; + + qbwrite(q, b); + + sofar += n; + } while(sofar < len && (q->state & Qmsg) == 0); + + return len; +} + +/* + * used by print() to write to a queue. Since we may be splhi or not in + * a process, don't qlock. + * + * this routine merges adjacent blocks if block n+1 will fit into + * the free space of block n. + */ +int +qiwrite(Queue *q, void *vp, int len) +{ + int n, sofar, dowakeup; + Block *b; + uchar *p = vp; + + dowakeup = 0; + + sofar = 0; + do { + n = len-sofar; + if(n > Maxatomic) + n = Maxatomic; + + b = iallocb(n); + if(b == nil) + break; + memmove(b->wp, p+sofar, n); + b->wp += n; + + ilock(q); + + /* we use an artificially high limit for kernel prints since anything + * over the limit gets dropped + */ + if(q->dlen >= 16*1024){ + iunlock(q); + freeb(b); + break; + } + + QDEBUG checkb(b, "qiwrite"); + if(q->bfirst) + q->blast->next = b; + else + q->bfirst = b; + q->blast = b; + q->len += BALLOC(b); + q->dlen += n; + + if(q->state & Qstarve){ + q->state &= ~Qstarve; + dowakeup = 1; + } + + iunlock(q); + + if(dowakeup){ + if(q->kick) + q->kick(q->arg); + wakeup(&q->rr); + } + + sofar += n; + } while(sofar < len && (q->state & Qmsg) == 0); + + return sofar; +} + +/* + * be extremely careful when calling this, + * as there is no reference accounting + */ +void +qfree(Queue *q) +{ + qclose(q); + free(q); +} + +/* + * Mark a queue as closed. No further IO is permitted. + * All blocks are released. + */ +void +qclose(Queue *q) +{ + Block *bfirst; + + if(q == nil) + return; + + /* mark it */ + ilock(q); + q->state |= Qclosed; + q->state &= ~(Qflow|Qstarve); + strcpy(q->err, Ehungup); + bfirst = q->bfirst; + q->bfirst = 0; + q->len = 0; + q->dlen = 0; + q->noblock = 0; + iunlock(q); + + /* free queued blocks */ + freeblist(bfirst); + + /* wake up readers/writers */ + wakeup(&q->rr); + wakeup(&q->wr); +} + +/* + * Mark a queue as closed. Wakeup any readers. Don't remove queued + * blocks. + */ +void +qhangup(Queue *q, char *msg) +{ + /* mark it */ + ilock(q); + q->state |= Qclosed; + if(msg == 0 || *msg == 0) + strcpy(q->err, Ehungup); + else + strncpy(q->err, msg, ERRMAX-1); + iunlock(q); + + /* wake up readers/writers */ + wakeup(&q->rr); + wakeup(&q->wr); +} + +/* + * return non-zero if the q is hungup + */ +int +qisclosed(Queue *q) +{ + return q->state & Qclosed; +} + +/* + * mark a queue as no longer hung up + */ +void +qreopen(Queue *q) +{ + ilock(q); + q->state &= ~Qclosed; + q->state |= Qstarve; + q->eof = 0; + q->limit = q->inilim; + iunlock(q); +} + +/* + * return bytes queued + */ +int +qlen(Queue *q) +{ + return q->dlen; +} + +/* + * return space remaining before flow control + */ +int +qwindow(Queue *q) +{ + int l; + + l = q->limit - q->len; + if(l < 0) + l = 0; + return l; +} + +/* + * return true if we can read without blocking + */ +int +qcanread(Queue *q) +{ + return q->bfirst!=0; +} + +/* + * change queue limit + */ +void +qsetlimit(Queue *q, int limit) +{ + q->limit = limit; +} + +/* + * set blocking/nonblocking + */ +void +qnoblock(Queue *q, int onoff) +{ + q->noblock = onoff; +} + +/* + * flush the output queue + */ +void +qflush(Queue *q) +{ + Block *bfirst; + + /* mark it */ + ilock(q); + bfirst = q->bfirst; + q->bfirst = 0; + q->len = 0; + q->dlen = 0; + iunlock(q); + + /* free queued blocks */ + freeblist(bfirst); + + /* wake up readers/writers */ + wakeup(&q->wr); +} + +int +qfull(Queue *q) +{ + return q->state & Qflow; +} + +int +qstate(Queue *q) +{ + return q->state; +} diff -Nru 0/sys/src/nix/port/qlock.c 4/sys/src/nix/port/qlock.c --- 0/sys/src/nix/port/qlock.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/qlock.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,277 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include + +QLockstats qlockstats; + +static void +lockstat(uintptr pc, uvlong w) +{ + addwaitstat(pc, w, WSqlock); +} + +static void +slockstat(uintptr pc, uvlong w) +{ + addwaitstat(pc, w, WSslock); +} + +void +qlock(QLock *q) +{ + Proc *p; + uvlong t0; + + cycles(&t0); + if(m->ilockdepth != 0) + print("qlock: %#p: ilockdepth %d", getcallerpc(&q), m->ilockdepth); + if(up != nil && up->nlocks) + print("qlock: %#p: nlocks %d", getcallerpc(&q), up->nlocks); + + if(!canlock(&q->use)){ + lock(&q->use); + slockstat(getcallerpc(&q), t0); + } + qlockstats.qlock++; + if(!q->locked) { + q->locked = 1; + q->pc = getcallerpc(&q); + unlock(&q->use); + return; + } + if(up == nil) + panic("qlock"); + qlockstats.qlockq++; + p = q->tail; + if(p == 0) + q->head = up; + else + p->qnext = up; + q->tail = up; + up->qnext = 0; + up->state = Queueing; + up->qpc = getcallerpc(&q); + if(up->trace) + proctrace(up, SLock, 0); + unlock(&q->use); + sched(); + lockstat(getcallerpc(&q), t0); +} + +int +canqlock(QLock *q) +{ + if(!canlock(&q->use)) + return 0; + if(q->locked){ + unlock(&q->use); + return 0; + } + q->locked = 1; + q->pc = getcallerpc(&q); + unlock(&q->use); + + return 1; +} + +void +qunlock(QLock *q) +{ + Proc *p; + uvlong t0; + + if(!canlock(&q->use)){ + cycles(&t0); + lock(&q->use); + slockstat(getcallerpc(&q), t0); + } + if (q->locked == 0) + print("qunlock called with qlock not held, from %#p\n", + getcallerpc(&q)); + p = q->head; + if(p){ + q->head = p->qnext; + if(q->head == 0) + q->tail = 0; + unlock(&q->use); + q->pc = p->qpc; + ready(p); + return; + } + q->locked = 0; + q->pc = 0; + unlock(&q->use); +} + +void +rlock(RWlock *q) +{ + Proc *p; + uvlong t0; + + cycles(&t0); + if(!canlock(&q->use)){ + lock(&q->use); + slockstat(getcallerpc(&q), t0); + } + qlockstats.rlock++; + if(q->writer == 0 && q->head == nil){ + /* no writer, go for it */ + q->readers++; + unlock(&q->use); + return; + } + + qlockstats.rlockq++; + p = q->tail; + if(up == nil) + panic("rlock"); + if(p == 0) + q->head = up; + else + p->qnext = up; + q->tail = up; + up->qnext = 0; + up->state = QueueingR; + if(up->trace) + proctrace(up, SLock, 0); + unlock(&q->use); + sched(); + lockstat(getcallerpc(&q), t0); +} + +void +runlock(RWlock *q) +{ + Proc *p; + uvlong t0; + + if(!canlock(&q->use)){ + cycles(&t0); + lock(&q->use); + slockstat(getcallerpc(&q), t0); + } + p = q->head; + if(--(q->readers) > 0 || p == nil){ + unlock(&q->use); + return; + } + + /* start waiting writer */ + if(p->state != QueueingW) + panic("runlock"); + q->head = p->qnext; + if(q->head == 0) + q->tail = 0; + q->writer = 1; + unlock(&q->use); + ready(p); +} + +void +wlock(RWlock *q) +{ + Proc *p; + uvlong t0; + + cycles(&t0); + if(!canlock(&q->use)){ + lock(&q->use); + slockstat(getcallerpc(&q), t0); + } + qlockstats.wlock++; + if(q->readers == 0 && q->writer == 0){ + /* noone waiting, go for it */ + q->wpc = getcallerpc(&q); + q->wproc = up; + q->writer = 1; + unlock(&q->use); + return; + } + + /* wait */ + qlockstats.wlockq++; + p = q->tail; + if(up == nil) + panic("wlock"); + if(p == nil) + q->head = up; + else + p->qnext = up; + q->tail = up; + up->qnext = 0; + up->state = QueueingW; + if(up->trace) + proctrace(up, SLock, 0); + unlock(&q->use); + sched(); + lockstat(getcallerpc(&q), t0); +} + +void +wunlock(RWlock *q) +{ + Proc *p; + uvlong t0; + + if(!canlock(&q->use)){ + cycles(&t0); + lock(&q->use); + slockstat(getcallerpc(&q), t0); + } + p = q->head; + if(p == nil){ + q->writer = 0; + unlock(&q->use); + return; + } + if(p->state == QueueingW){ + /* start waiting writer */ + q->head = p->qnext; + if(q->head == nil) + q->tail = nil; + unlock(&q->use); + ready(p); + return; + } + + if(p->state != QueueingR) + panic("wunlock"); + + /* waken waiting readers */ + while(q->head != nil && q->head->state == QueueingR){ + p = q->head; + q->head = p->qnext; + q->readers++; + ready(p); + } + if(q->head == nil) + q->tail = nil; + q->writer = 0; + unlock(&q->use); +} + +/* same as rlock but punts if there are any writers waiting */ +int +canrlock(RWlock *q) +{ + uvlong t0; + + if(!canlock(&q->use)){ + cycles(&t0); + lock(&q->use); + slockstat(getcallerpc(&q), t0); + } + qlockstats.rlock++; + if(q->writer == 0 && q->head == nil){ + /* no writer, go for it */ + q->readers++; + unlock(&q->use); + return 1; + } + unlock(&q->use); + return 0; +} diff -Nru 0/sys/src/nix/port/qmalloc.c 4/sys/src/nix/port/qmalloc.c --- 0/sys/src/nix/port/qmalloc.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/qmalloc.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,670 @@ +/* + * malloc + * + * Uses Quickfit (see SIGPLAN Notices October 1988) + * with allocator from Kernighan & Ritchie + * + * This is a placeholder. + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +typedef double Align; +typedef union Header Header; +typedef struct Qlist Qlist; + +union Header { + struct { + Header* next; + uint size; + } s; + Align al; +}; + +struct Qlist { + Lock lk; + Header* first; + + uint nalloc; +}; + +enum { + Unitsz = sizeof(Header), /* 16 bytes on amd64 */ +}; + +#define NUNITS(n) (HOWMANY(n, Unitsz) + 1) +#define NQUICK ((512/Unitsz)+1) /* 33 on amd64 */ + +static Qlist quicklist[NQUICK+1]; +static Header misclist; +static Header *rover; +static unsigned tailsize; +static unsigned tailnunits; +static Header *tailbase; +static Header *tailptr; +static Header checkval; +static int morecore(unsigned); + +enum +{ + QSmalign = 0, + QSmalignquick, + QSmalignrover, + QSmalignfront, + QSmalignback, + QSmaligntail, + QSmalignnottail, + QSmalloc, + QSmallocrover, + QSmalloctail, + QSfree, + QSfreetail, + QSfreequick, + QSfreenext, + QSfreeprev, + QSmax +}; + +static void qfreeinternal(void*); +static int qstats[QSmax]; +static char* qstatstr[QSmax] = { +[QSmalign] "malign", +[QSmalignquick] "malignquick", +[QSmalignrover] "malignrover", +[QSmalignfront] "malignfront", +[QSmalignback] "malignback", +[QSmaligntail] "maligntail", +[QSmalignnottail] "malignnottail", +[QSmalloc] "malloc", +[QSmallocrover] "mallocrover", +[QSmalloctail] "malloctail", +[QSfree] "free", +[QSfreetail] "freetail", +[QSfreequick] "freequick", +[QSfreenext] "freenext", +[QSfreeprev] "freeprev", +}; + +static Lock mainlock; + +#define MLOCK ilock(&mainlock) +#define MUNLOCK iunlock(&mainlock) +#define QLOCK(l) ilock(l) +#define QUNLOCK(l) iunlock(l) + +#define tailalloc(p, n) ((p)=tailptr, tailsize -= (n), tailptr+=(n),\ + (p)->s.size=(n), (p)->s.next = &checkval) + +#define ISPOWEROF2(x) (/*((x) != 0) && */!((x) & ((x)-1))) +#define ALIGNHDR(h, a) (Header*)((((uintptr)(h))+((a)-1)) & ~((a)-1)) + + +/* + * Experiment: per-core quick lists. + * change quicklist to be + * static Qlist quicklist[MACHMAX][NQUICK+1]; + * and define QLIST to be quicklist[m->machno] + * + * using quicklist[m->machno] runs out of memory soon. + * using quicklist[m->machno%4] yields times worse than using quicklist! + */ +#define QLIST quicklist + +static void* +qmallocalign(usize nbytes, uintptr align, long offset, usize span) +{ + Qlist *qlist; + uintptr aligned; + Header **pp, *p, *q, *r; + uint naligned, nunits, n; + + if(nbytes == 0 || offset != 0 || span != 0) + return nil; + + if(!ISPOWEROF2(align) || align < sizeof(Header)) + return nil; + + qstats[QSmalign]++; + nunits = NUNITS(nbytes); + if(nunits <= NQUICK){ + /* + * Look for a conveniently aligned block + * on one of the quicklists. + */ + qlist = &QLIST[nunits]; + QLOCK(&qlist->lk); + pp = &qlist->first; + for(p = *pp; p != nil; p = p->s.next){ + if(ALIGNED(p+1, align)){ + *pp = p->s.next; + p->s.next = &checkval; + QUNLOCK(&qlist->lk); + qstats[QSmalignquick]++; + return p+1; + } + pp = &p->s.next; + } + QUNLOCK(&qlist->lk); + } + + MLOCK; + if(nunits > tailsize) { + /* hard way */ + if((q = rover) != nil){ + do { + p = q->s.next; + if(p->s.size < nunits) + continue; + aligned = ALIGNED(p+1, align); + naligned = NUNITS(align)-1; + if(!aligned && p->s.size < nunits+naligned) + continue; + + /* + * This block is big enough, remove it + * from the list. + */ + q->s.next = p->s.next; + rover = q; + qstats[QSmalignrover]++; + + /* + * Free any runt in front of the alignment. + */ + if(!aligned){ + r = p; + p = ALIGNHDR(p+1, align) - 1; + n = p - r; + p->s.size = r->s.size - n; + + r->s.size = n; + r->s.next = &checkval; + qfreeinternal(r+1); + qstats[QSmalignfront]++; + } + + /* + * Free any residue after the aligned block. + */ + if(p->s.size > nunits){ + r = p+nunits; + r->s.size = p->s.size - nunits; + r->s.next = &checkval; + qstats[QSmalignback]++; + qfreeinternal(r+1); + + p->s.size = nunits; + } + + p->s.next = &checkval; + MUNLOCK; + return p+1; + } while((q = p) != rover); + } + if((n = morecore(nunits)) == 0){ + MUNLOCK; + return nil; + } + tailsize += n; + } + + q = ALIGNHDR(tailptr+1, align); + if(q == tailptr+1){ + tailalloc(p, nunits); + qstats[QSmaligntail]++; + } + else{ + naligned = NUNITS(align)-1; + if(tailsize < nunits+naligned){ + /* + * There are at least nunits, + * get enough for alignment. + */ + if((n = morecore(naligned)) == 0){ + MUNLOCK; + return nil; + } + tailsize += n; + } + /* + * Save the residue before the aligned allocation + * and free it after the tail pointer has been bumped + * for the main allocation. + */ + n = q-tailptr - 1; + tailalloc(r, n); + tailalloc(p, nunits); + qstats[QSmalignnottail]++; + qfreeinternal(r+1); + } + MUNLOCK; + + return p+1; +} + +static void* +qmalloc(usize nbytes) +{ + Qlist *qlist; + Header *p, *q; + uint nunits, n; + +///* FIXME: (ignore for now) + if(nbytes == 0) + return nil; +//*/ + + qstats[QSmalloc]++; + nunits = NUNITS(nbytes); + if(nunits <= NQUICK){ + qlist = &QLIST[nunits]; + QLOCK(&qlist->lk); + if((p = qlist->first) != nil){ + qlist->first = p->s.next; + qlist->nalloc++; + QUNLOCK(&qlist->lk); + p->s.next = &checkval; + return p+1; + } + QUNLOCK(&qlist->lk); + } + + MLOCK; + if(nunits > tailsize) { + /* hard way */ + if((q = rover) != nil){ + do { + p = q->s.next; + if(p->s.size >= nunits) { + if(p->s.size > nunits) { + p->s.size -= nunits; + p += p->s.size; + p->s.size = nunits; + } else + q->s.next = p->s.next; + p->s.next = &checkval; + rover = q; + qstats[QSmallocrover]++; + MUNLOCK; + return p+1; + } + } while((q = p) != rover); + } + if((n = morecore(nunits)) == 0){ + MUNLOCK; + return nil; + } + tailsize += n; + } + qstats[QSmalloctail]++; + tailalloc(p, nunits); + MUNLOCK; + + return p+1; +} + +/* + * TODO: should not gain the MLOCK for + * releasing memory into the QLIST. + */ +static void +qfreeinternal(void* ap) +{ + Qlist *qlist; + Header *p, *q; + uint nunits; + + if(ap == nil) + return; + qstats[QSfree]++; + + p = (Header*)ap - 1; + if((nunits = p->s.size) == 0 || p->s.next != &checkval) + panic("malloc: corrupt allocation arena\n"); + if(tailptr != nil && p+nunits == tailptr) { + /* block before tail */ + tailptr = p; + tailsize += nunits; + qstats[QSfreetail]++; + return; + } + if(nunits <= NQUICK) { + qlist = &QLIST[nunits]; + QLOCK(&qlist->lk); + p->s.next = qlist->first; + qlist->first = p; + QUNLOCK(&qlist->lk); + qstats[QSfreequick]++; + return; + } + if((q = rover) == nil) { + q = &misclist; + q->s.size = 0; + q->s.next = q; + } + for(; !(p > q && p < q->s.next); q = q->s.next) + if(q >= q->s.next && (p > q || p < q->s.next)) + break; + if(p+p->s.size == q->s.next) { + p->s.size += q->s.next->s.size; + p->s.next = q->s.next->s.next; + qstats[QSfreenext]++; + } else + p->s.next = q->s.next; + if(q+q->s.size == p) { + q->s.size += p->s.size; + q->s.next = p->s.next; + qstats[QSfreeprev]++; + } else + q->s.next = p; + rover = q; +} + +ulong +msize(void* ap) +{ + Header *p; + uint nunits; + + if(ap == nil) + return 0; + + p = (Header*)ap - 1; + if((nunits = p->s.size) == 0 || p->s.next != &checkval) + panic("malloc: corrupt allocation arena\n"); + + return (nunits-1) * sizeof(Header); +} + +static void +mallocreadfmt(char* s, char* e) +{ + char *p; + Header *q; + int i, n, t; + Qlist *qlist; + + p = seprint(s, e, + "%llud memory\n" + "%d pagesize\n" + "%llud kernel\n", + (uvlong)conf.npage*PGSZ, + PGSZ, + (uvlong)conf.npage-conf.upages); + + t = 0; + for(i = 0; i <= NQUICK; i++) { + n = 0; + qlist = &QLIST[i]; + QLOCK(&qlist->lk); + for(q = qlist->first; q != nil; q = q->s.next){ +// if(q->s.size != i) +// p = seprint(p, e, "q%d\t%#p\t%ud\n", +// i, q, q->s.size); + n++; + } + QUNLOCK(&qlist->lk); + +// if(n != 0) +// p = seprint(p, e, "q%d %d\n", i, n); + t += n * i*sizeof(Header); + } + p = seprint(p, e, "quick: %ud bytes total\n", t); + + MLOCK; + if((q = rover) != nil){ + i = t = 0; + do { + t += q->s.size; + i++; +// p = seprint(p, e, "m%d\t%#p\n", q->s.size, q); + } while((q = q->s.next) != rover); + + p = seprint(p, e, "rover: %d blocks %ud bytes total\n", + i, t*sizeof(Header)); + } + p = seprint(p, e, "total allocated %lud, %ud remaining\n", + (tailptr-tailbase)*sizeof(Header), tailnunits*sizeof(Header)); + + for(i = 0; i < nelem(qstats); i++){ + if(qstats[i] == 0) + continue; + p = seprint(p, e, "%s %ud\n", qstatstr[i], qstats[i]); + } + MUNLOCK; +} + +long +mallocreadsummary(Chan*, void *a, long n, long offset) +{ + char *alloc; + + alloc = malloc(16*READSTR); + mallocreadfmt(alloc, alloc+16*READSTR); + n = readstr(offset, a, n, alloc); + free(alloc); + + return n; +} + +void +mallocsummary(void) +{ + Header *q; + int i, n, t; + Qlist *qlist; + + t = 0; + for(i = 0; i <= NQUICK; i++) { + n = 0; + qlist = &QLIST[i]; + QLOCK(&qlist->lk); + for(q = qlist->first; q != nil; q = q->s.next){ + if(q->s.size != i) + DBG("q%d\t%#p\t%ud\n", i, q, q->s.size); + n++; + } + QUNLOCK(&qlist->lk); + + t += n * i*sizeof(Header); + } + print("quick: %ud bytes total\n", t); + + MLOCK; + if((q = rover) != nil){ + i = t = 0; + do { + t += q->s.size; + i++; + } while((q = q->s.next) != rover); + } + MUNLOCK; + + if(i != 0){ + print("rover: %d blocks %ud bytes total\n", + i, t*sizeof(Header)); + } + print("total allocated %lud, %ud remaining\n", + (tailptr-tailbase)*sizeof(Header), tailnunits*sizeof(Header)); + + for(i = 0; i < nelem(qstats); i++){ + if(qstats[i] == 0) + continue; + print("%s %ud\n", qstatstr[i], qstats[i]); + } +} + +void +free(void* ap) +{ + MLOCK; + qfreeinternal(ap); + MUNLOCK; +} + +void* +malloc(ulong size) +{ + void* v; + + if((v = qmalloc(size)) != nil) + memset(v, 0, size); + + return v; +} + +void* +mallocz(ulong size, int clr) +{ + void *v; + + if((v = qmalloc(size)) != nil && clr) + memset(v, 0, size); + + return v; +} + +void* +mallocalign(ulong nbytes, ulong align, long offset, ulong span) +{ + void *v; + + /* + * Should this memset or should it be left to the caller? + */ + if((v = qmallocalign(nbytes, align, offset, span)) != nil) + memset(v, 0, nbytes); + + return v; +} + +void* +smalloc(ulong size) +{ + void *v; + + while((v = malloc(size)) == nil) + tsleep(&up->sleep, return0, 0, 100); + memset(v, 0, size); + + return v; +} + +void* +realloc(void* ap, ulong size) +{ + void *v; + Header *p; + ulong osize; + uint nunits, ounits; + + /* + * Easy stuff: + * free and return nil if size is 0 + * (implementation-defined behaviour); + * behave like malloc if ap is nil; + * check for arena corruption; + * do nothing if units are the same. + */ + if(size == 0){ + MLOCK; + qfreeinternal(ap); + MUNLOCK; + + return nil; + } + if(ap == nil) + return qmalloc(size); + + p = (Header*)ap - 1; + if((ounits = p->s.size) == 0 || p->s.next != &checkval) + panic("realloc: corrupt allocation arena\n"); + + if((nunits = NUNITS(size)) == ounits) + return ap; + + /* + * Slightly harder: + * if this allocation abuts the tail, try to just + * adjust the tailptr. + */ + MLOCK; + if(tailptr != nil && p+ounits == tailptr){ + if(ounits > nunits){ + p->s.size = nunits; + tailsize += ounits-nunits; + tailptr -= ounits-nunits; + MUNLOCK; + return ap; + } + if(tailsize >= nunits-ounits){ + p->s.size = nunits; + tailsize -= nunits-ounits; + tailptr += nunits-ounits; + MUNLOCK; + return ap; + } + } + MUNLOCK; + + /* + * Worth doing if it's a small reduction? + * Do it anyway if <= NQUICK? + if((ounits-nunits) < 2) + return ap; + */ + + /* + * Too hard (or can't be bothered): + * allocate, copy and free. + * What does the standard say for failure here? + */ + if((v = qmalloc(size)) != nil){ + osize = (ounits-1)*sizeof(Header); + if(size < osize) + osize = size; + memmove(v, ap, osize); + MLOCK; + qfreeinternal(ap); + MUNLOCK; + } + + return v; +} + +void +setmalloctag(void*, ulong) +{ +} + +void +mallocinit(void) +{ + if(tailptr != nil) + return; + + tailbase = UINT2PTR(sys->vmunused); + tailptr = tailbase; + tailnunits = NUNITS(sys->vmend - sys->vmunused); + print("base %#p ptr %#p nunits %ud\n", tailbase, tailptr, tailnunits); +} + +static int +morecore(uint nunits) +{ + /* + * First (simple) cut. + * Pump it up when you don't really need it. + * Pump it up until you can feel it. + */ + if(nunits < NUNITS(128*KiB)) + nunits = NUNITS(128*KiB); + if(nunits > tailnunits) + nunits = tailnunits; + tailnunits -= nunits; + + return nunits; +} diff -Nru 0/sys/src/nix/port/rdb.c 4/sys/src/nix/port/rdb.c --- 0/sys/src/nix/port/rdb.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/rdb.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,108 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "ureg.h" + +#define DBG if(0)scrprint +#pragma varargck argpos scrprint 1 +static Ureg ureg; + +static void +scrprint(char *fmt, ...) +{ + char buf[128]; + va_list va; + int n; + + va_start(va, fmt); + n = vseprint(buf, buf+sizeof buf, fmt, va)-buf; + va_end(va); + putstrn(buf, n); +} + +static char* +getline(void) +{ + static char buf[128]; + int i, c; + + for(;;){ + for(i=0; i 4){ + mesg(Rerr, Ecount); + break; + } + a = addr(min+0); + scrprint("mput %.8lux\n", a); + memmove(a, min+5, n); + mesg(Rmput, mout); + break; + * + */ + default: + DBG("unknown %c\n", *req); + iprint("Eunknown message\n"); + break; + } + } +} + +void +rdb(void) +{ + splhi(); + iprint("rdb..."); + callwithureg(talkrdb); +} diff -Nru 0/sys/src/nix/port/rebootcmd.c 4/sys/src/nix/port/rebootcmd.c --- 0/sys/src/nix/port/rebootcmd.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/rebootcmd.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,102 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include + +static ulong +l2be(long l) +{ + uchar *cp; + + cp = (uchar*)&l; + return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3]; +} + + +static void +readn(Chan *c, void *vp, long n) +{ + char *p; + long nn; + + p = vp; + while(n > 0) { + nn = c->dev->read(c, p, n, c->offset); + if(nn == 0) + error(Eshort); + c->offset += nn; + p += nn; + n -= nn; + } +} + +static void +setbootcmd(int argc, char *argv[]) +{ + char *buf, *p, *ep; + int i; + + buf = malloc(1024); + if(buf == nil) + error(Enomem); + p = buf; + ep = buf + 1024; + for(i=0; igenbuf, sizeof up->genbuf, __VA_ARGS__); + +enum { + Nctlr = 32, + Maxpath = 128, +}; + +enum { + /* sync with ahci.h */ + Dllba = 1<<0, + Dsmart = 1<<1, + Dpower = 1<<2, + Dnop = 1<<3, + Datapi = 1<<4, + Datapi16= 1<<5, +}; + +static char *flagname[] = { + "llba", + "smart", + "power", + "nop", + "atapi", + "atapi16", +}; + +typedef struct Ctlr Ctlr; +struct Ctlr{ + QLock; + + Ctlr *next; + SDunit *unit; + + char path[Maxpath]; + Chan *c; + + ulong vers; + uchar mediachange; + uchar flag; + uchar smart; + uchar smartrs; + uchar feat; + + uvlong sectors; + char serial[20+1]; + char firmware[8+1]; + char model[40+1]; + char ident[0x100]; +}; + +void aoeidmove(char *p, ushort *a, unsigned n); + +static Lock ctlrlock; +static Ctlr *head; +static Ctlr *tail; + +SDifc sdaoeifc; + +static ushort +gbit16(void *a) +{ + uchar *i; + + i = a; + return i[1] << 8 | i[0]; +} + +static u32int +gbit32(void *a) +{ + u32int j; + uchar *i; + + i = a; + j = i[3] << 24; + j |= i[2] << 16; + j |= i[1] << 8; + j |= i[0]; + return j; +} + +static uvlong +gbit64(void *a) +{ + uchar *i; + + i = a; + return (uvlong)gbit32(i+4)<<32 | gbit32(i); +} + +static int +identify(Ctlr *c, ushort *id) +{ + int i; + uchar oserial[21]; + uvlong osectors, s; + + osectors = c->sectors; + memmove(oserial, c->serial, sizeof c->serial); + + c->feat &= ~(Dllba|Dpower|Dsmart|Dnop); + i = gbit16(id+83) | gbit16(id+86); + if(i & (1<<10)){ + c->feat |= Dllba; + s = gbit64(id+100); + }else + s = gbit32(id+60); + + i = gbit16(id+83); + if((i>>14) == 1) { + if(i & (1<<3)) + c->feat |= Dpower; + i = gbit16(id+82); + if(i & 1) + c->feat |= Dsmart; + if(i & (1<<14)) + c->feat |= Dnop; + } + + aoeidmove(c->serial, id+10, 20); + aoeidmove(c->firmware, id+23, 8); + aoeidmove(c->model, id+27, 40); + + if((osectors == 0 || osectors != s) && + memcmp(oserial, c->serial, sizeof oserial) != 0){ + c->sectors = s; + c->mediachange = 1; + c->vers++; + } + return 0; +} + +/* must call with d qlocked */ +static int +aoeidentify(Ctlr *d, SDunit *u) +{ + Chan *c; + + c = nil; + if(waserror()){ + if(c) + cclose(c); + iprint("aoeidentify: %s\n", up->errstr); + nexterror(); + } + + uprint("%s/ident", d->path); + c = namec(up->genbuf, Aopen, OREAD, 0); + c->dev->read(c, d->ident, sizeof d->ident, 0); + + poperror(); + cclose(c); + + d->feat = 0; + d->smart = 0; + identify(d, (ushort*)d->ident); + + memset(u->inquiry, 0, sizeof u->inquiry); + u->inquiry[2] = 2; + u->inquiry[3] = 2; + u->inquiry[4] = sizeof u->inquiry - 4; + memmove(u->inquiry+8, d->model, 40); + + return 0; +} + +static Ctlr* +ctlrlookup(char *path) +{ + Ctlr *c; + + lock(&ctlrlock); + for(c = head; c; c = c->next) + if(strcmp(c->path, path) == 0) + break; + unlock(&ctlrlock); + return c; +} + +static Ctlr* +newctlr(char *path) +{ + Ctlr *c; + + /* race? */ + if(ctlrlookup(path)) + error(Eexist); + + if((c = malloc(sizeof *c)) == nil) + return 0; + kstrcpy(c->path, path, sizeof c->path); + lock(&ctlrlock); + if(head != nil) + tail->next = c; + else + head = c; + tail = c; + unlock(&ctlrlock); + return c; +} + +static void +delctlr(Ctlr *c) +{ + Ctlr *x, *prev; + + lock(&ctlrlock); + + for(prev = 0, x = head; x; prev = x, x = c->next) + if(strcmp(c->path, x->path) == 0) + break; + if(x == 0){ + unlock(&ctlrlock); + error(Enonexist); + } + + if(prev) + prev->next = x->next; + else + head = x->next; + if(x->next == nil) + tail = prev; + unlock(&ctlrlock); + + if(x->c) + cclose(x->c); + free(x); +} + +static SDev* +aoeprobe(char *path, SDev *s) +{ + int n, i; + char *p; + Chan *c; + Ctlr *ctlr; + + if((p = strrchr(path, '/')) == 0) + error(Ebadarg); + *p = 0; + uprint("%s/ctl", path); + *p = '/'; + + c = namec(up->genbuf, Aopen, OWRITE, 0); + if(waserror()) { + cclose(c); + nexterror(); + } + n = uprint("discover %s", p+1); + c->dev->write(c, up->genbuf, n, 0); + poperror(); + cclose(c); + + for(i = 0;; i += 200){ + if(i > 8000 || waserror()) + error(Etimedout); + tsleep(&up->sleep, return0, 0, 200); + poperror(); + + uprint("%s/ident", path); + if(waserror()) + continue; + c = namec(up->genbuf, Aopen, OREAD, 0); + poperror(); + cclose(c); + + ctlr = newctlr(path); + break; + } + + if(s == nil && (s = malloc(sizeof *s)) == nil) + return nil; + s->ctlr = ctlr; + s->ifc = &sdaoeifc; + s->nunit = 1; + return s; +} + +static char *probef[32]; +static int nprobe; + +static int +pnpprobeid(char *s) +{ + int id; + + if(strlen(s) < 2) + return 0; + id = 'e'; + if(s[1] == '!') + id = s[0]; + return id; +} + +static SDev* +aoepnp(void) +{ + int i, id; + char *p; + SDev *h, *t, *s; + + if((p = getconf("aoedev")) == 0) + return 0; + nprobe = tokenize(p, probef, nelem(probef)); + h = t = 0; + for(i = 0; i < nprobe; i++){ + id = pnpprobeid(probef[i]); + if(id == 0) + continue; + s = malloc(sizeof *s); + if(s == nil) + break; + s->ctlr = 0; + s->idno = id; + s->ifc = &sdaoeifc; + s->nunit = 1; + + if(h) + t->next = s; + else + h = s; + t = s; + } + return h; +} + +static Ctlr* +pnpprobe(SDev *sd) +{ + int j; + char *p; + static int i; + + if(i > nprobe) + return 0; + p = probef[i++]; + if(strlen(p) < 2) + return 0; + if(p[1] == '!') + p += 2; + + for(j = 0;; j += 200){ + if(j > 8000){ + print("#æ: pnpprobe: %s: %s\n", probef[i-1], up->errstr); + return 0; + } + if(waserror()){ + tsleep(&up->sleep, return0, 0, 200); + continue; + } + sd = aoeprobe(p, sd); + poperror(); + break; + } + print("#æ: pnpprobe establishes %sin %dms\n", probef[i-1], j); + return sd->ctlr; +} + + +static int +aoeverify(SDunit *u) +{ + SDev *s; + Ctlr *c; + + s = u->dev; + c = s->ctlr; + if(c == nil && (s->ctlr = c = pnpprobe(s)) == nil) + return 0; + c->mediachange = 1; + return 1; +} + +static int +aoeconnect(SDunit *u, Ctlr *c) +{ + qlock(c); + if(waserror()){ + qunlock(c); + return -1; + } + + aoeidentify(u->dev->ctlr, u); + if(c->c) + cclose(c->c); + c->c = 0; + uprint("%s/data", c->path); + c->c = namec(up->genbuf, Aopen, ORDWR, 0); + qunlock(c); + poperror(); + + return 0; +} + +static int +aoeonline(SDunit *u) +{ + Ctlr *c; + int r; + + c = u->dev->ctlr; + r = 0; + + if((c->feat&Datapi) && c->mediachange){ + if(aoeconnect(u, c) == 0 && (r = scsionline(u)) > 0) + c->mediachange = 0; + return r; + } + + if(c->mediachange){ + if(aoeconnect(u, c) == -1) + return 0; + r = 2; + c->mediachange = 0; + u->sectors = c->sectors; + u->secsize = Aoesectsz; + } else + r = 1; + + return r; +} + +static int +aoerio(SDreq *r) +{ + int i, count; + uvlong lba; + char *name; + uchar *cmd; + long (*rio)(Chan*, void*, long, vlong); + Ctlr *c; + SDunit *unit; + + unit = r->unit; + c = unit->dev->ctlr; +// if(c->feat & Datapi) +// return aoeriopkt(r, d); + + cmd = r->cmd; + name = unit->name; + + if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91){ +// qlock(c); +// i = flushcache(); +// qunlock(c); +// if(i == 0) +// return sdsetsense(r, SDok, 0, 0, 0); + return sdsetsense(r, SDcheck, 3, 0xc, 2); + } + + if((i = sdfakescsi(r, c->ident, sizeof c->ident)) != SDnostatus){ + r->status = i; + return i; + } + + switch(*cmd){ + case 0x88: + case 0x28: + rio = c->c->dev->read; + break; + case 0x8a: + case 0x2a: + rio = c->c->dev->write; + break; + default: + print("%s: bad cmd 0x%.2ux\n", name, cmd[0]); + r->status = SDcheck; + return SDcheck; + } + + if(r->data == nil) + return SDok; + + if(r->clen == 16){ + if(cmd[2] || cmd[3]) + return sdsetsense(r, SDcheck, 3, 0xc, 2); + lba = (uvlong)cmd[4]<<40 | (uvlong)cmd[5]<<32; + lba |= cmd[6]<<24 | cmd[7]<<16 | cmd[8]<<8 | cmd[9]; + count = cmd[10]<<24 | cmd[11]<<16 | cmd[12]<<8 | cmd[13]; + }else{ + lba = cmd[2]<<24 | cmd[3]<<16 | cmd[4]<<8 | cmd[5]; + count = cmd[7]<<8 | cmd[8]; + } + + count *= Aoesectsz; + + if(r->dlen < count) + count = r->dlen & ~0x1ff; + + if(waserror()){ + if(strcmp(up->errstr, Echange) == 0 || + strcmp(up->errstr, Enotup) == 0) + unit->sectors = 0; + nexterror(); + } + r->rlen = rio(c->c, r->data, count, Aoesectsz * lba); + poperror(); + r->status = SDok; + return SDok; +} + +static char *smarttab[] = { + "unset", + "error", + "threshold exceeded", + "normal" +}; + +static char * +pflag(char *s, char *e, uchar f) +{ + uchar i, m; + + for(i = 0; i < 8; i++){ + m = 1 << i; + if(f & m) + s = seprint(s, e, "%s ", flagname[i]); + } + return seprint(s, e, "\n"); +} + +static int +aoerctl(SDunit *u, char *p, int l) +{ + Ctlr *c; + char *e, *op; + + if((c = u->dev->ctlr) == nil) + return 0; + e = p+l; + op = p; + + p = seprint(p, e, "model\t%s\n", c->model); + p = seprint(p, e, "serial\t%s\n", c->serial); + p = seprint(p, e, "firm %s\n", c->firmware); + if(c->smartrs == 0xff) + p = seprint(p, e, "smart\tenable error\n"); + else if(c->smartrs == 0) + p = seprint(p, e, "smart\tdisabled\n"); + else + p = seprint(p, e, "smart\t%s\n", smarttab[c->smart]); + p = seprint(p, e, "flag "); + p = pflag(p, e, c->feat); + p = seprint(p, e, "geometry %llud %d\n", c->sectors, Aoesectsz); + return p-op; +} + +static int +aoewctl(SDunit *, Cmdbuf *cmd) +{ + cmderror(cmd, Ebadarg); + return 0; +} + +static SDev* +aoeprobew(DevConf *c) +{ + char *p; + + p = strchr(c->type, '/'); + if(p == nil || strlen(p) > Maxpath - 11) + error(Ebadarg); + if(p[1] == '#') + p++; /* hack */ + if(ctlrlookup(p)) + error(Einuse); + return aoeprobe(p, 0); +} + +static void +aoeclear(SDev *s) +{ + delctlr((Ctlr *)s->ctlr); +} + +static char* +aoertopctl(SDev *s, char *p, char *e) +{ + Ctlr *c; + + c = s->ctlr; + return seprint(p, e, "%s aoe %s\n", s->name, c->path); +} + +static int +aoewtopctl(SDev *, Cmdbuf *cmd) +{ + switch(cmd->nf){ + default: + cmderror(cmd, Ebadarg); + } + return 0; +} + +SDifc sdaoeifc = { + "aoe", + + aoepnp, + nil, /* legacy */ + nil, /* enable */ + nil, /* disable */ + + aoeverify, + aoeonline, + aoerio, + aoerctl, + aoewctl, + + scsibio, + aoeprobew, /* probe */ + aoeclear, /* clear */ + aoertopctl, + aoewtopctl, +}; diff -Nru 0/sys/src/nix/port/sdatafis.c 4/sys/src/nix/port/sdatafis.c --- 0/sys/src/nix/port/sdatafis.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/sdatafis.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,292 @@ +/* + * ata analog to sdscsi + * copyright © 2010 erik quanstrom + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/sd.h" +#include +#include "sdfis.h" + +#define reqio(r) (r)->unit->dev->ifc->ataio(r) +#define dprint(...) print(__VA_ARGS__) + +static char* +dnam(SDunit *u) +{ + return u->name; +} + +static int +settxmode(SDunit *u, Sfis *f, uchar x) +{ + int t; + SDreq r; + + memset(&r, 0, sizeof r); + r.unit = u; + if((t = txmodefis(f, r.cmd, x)) == -1) + return 0; + r.clen = 16; + r.ataproto = t; + r.timeout = totk(Ms2tk(1*1000)); + return reqio(&r); +} + +static int +flushcache(SDunit *u, Sfis *f) +{ + SDreq r; + + memset(&r, 0, sizeof r); + r.unit = u; + r.clen = 16; + r.ataproto = flushcachefis(f, r.cmd); + r.timeout = totk(Ms2tk(60*1000)); + return reqio(&r); +} + +static int +setfeatures(SDunit *u, Sfis *f, uchar x, uint w) +{ + SDreq r; + + memset(&r, 0, sizeof r); + r.unit = u; + r.clen = 16; + r.ataproto = featfis(f, r.cmd, x); + r.timeout = totk(w); + return reqio(&r); +} + +static int +identify1(SDunit *u, Sfis *f, void *id) +{ + SDreq r; + + memset(&r, 0, sizeof r); + r.unit = u; + r.clen = 16; + r.ataproto = identifyfis(f, r.cmd); + r.data = id; + r.dlen = 0x200; + r.timeout = totk(Ms2tk(3*1000)); + return reqio(&r); +} + +static int +identify0(SDunit *u, Sfisx *f, ushort *id) +{ + int i, n; + vlong osectors, s; + uchar oserial[21]; + + for(i = 0;; i++){ + if(i > 5 || identify1(u, f, id) != 0) + return -1; + n = idpuis(id); + if(n & Pspinup && setfeatures(u, f, 7, 20*1000) == -1) + dprint("%s: puis spinup fail\n", dnam(u)); + if(n & Pidready) + break; + } + + s = idfeat(f, id); + if(s == -1) + return -1; + if((f->feat&Dlba) == 0){ + dprint("%s: no lba support\n", dnam(u)); + return -1; + } + osectors = u->sectors; + memmove(oserial, f->serial, sizeof f->serial); + + f->sectors = s; + f->secsize = idss(f, id); + + idmove(f->serial, id+10, 20); + idmove(f->firmware, id+23, 8); + idmove(f->model, id+27, 40); + f->wwn = idwwn(f, id); + memset(u->inquiry, 0, sizeof u->inquiry); + u->inquiry[2] = 2; + u->inquiry[3] = 2; + u->inquiry[4] = sizeof u->inquiry - 4; + memmove(u->inquiry+8, f->model, 40); + + if(osectors != s || memcmp(oserial, f->serial, sizeof oserial)){ + f->drivechange = 1; + u->sectors = 0; + } + return 0; +} + +static int +identify(SDunit *u, Sfisx *f) +{ + int r; + ushort *id; + + id = malloc(0x200); + if(id == nil) + error(Enomem); + r = identify0(u, f, id); + free(id); + return r; +} + +void +pronline(SDunit *u, Sfisx *f) +{ + char *s, *t; + + if(f->type == Sas) + s = "sas"; + else{ + s = "lba"; + if(f->feat & Dllba) + s = "llba"; + if(f->feat & Datapi) + s = "atapi"; + } + t = ""; + if(f->drivechange) + t = "[newdrive]"; + print("%s: %s %,lld sectors\n", dnam(u), s, f->sectors); + print(" %s %s %s %s\n", f->model, f->firmware, f->serial, t); +} + +int +ataonline0(SDunit *u, Sfisx *f) +{ + if(identify(u, f) != 0){ + dprint("%s: identify failure\n", dnam(u)); + return SDeio; + } + if(f->feat & Dpower && setfeatures(u, f, 0x85, 3*1000) != 0) + f->feat &= ~Dpower; + if(settxmode(u, f, f->udma) != 0){ + dprint("%s: can't set tx mode udma %d\n", dnam(u), f->udma); + return SDeio; + } + return SDok; +} + + +int +ataonline(SDunit *u, Sfisx *f) +{ + int r; + + wlock(f); + if(waserror()) + r = SDeio; + else{ + r = ataonline0(u, f); + poperror(); + } + wunlock(f); + return r; +} + +static int +ereqio(Sfisx *f, SDreq *r) +{ + int rv; + + rv = -1; + rlock(f); + if(!waserror()){ + rv = reqio(r); + poperror(); + } + runlock(f); + return rv; +} + +static int +ataexec(Sfisx *f, SDreq *r) +{ + ulong s, t; + + for(t = r->timeout; setreqto(r, t) != -1; edelay(250, t)){ + if((s = ereqio(f, r)) != SDok) + return s; + switch(r->status){ + default: + return r->status; + case SDtimeout: + case SDretry: + continue; + } + } + return -1; +} + +long +atabio(SDunit* u, Sfisx *f, int lun, int write, void *d0, long count0, uvlong lba) +{ + char *data; + uint llba, n, count; + SDreq r; +// Sfisx *f; + +// f = u->f; + memset(&r, 0, sizeof r); + r.unit = u; + r.lun = lun; + llba = (f->feat & Dlba) != 0; + r.clen = 16; + data = d0; + r.timeout = gettotk(f); + for(count = count0; count > 0; count -= n){ + n = count; + if(llba && n > 65536) + n = 65536; + else if(!llba && n > 256) + n = 256; + if(n > f->atamaxxfr) + n = f->atamaxxfr; + r.data = data; + r.dlen = n*f->secsize; + r.ataproto = rwfis(f, r.cmd, write, n, lba); + r.write = (r.ataproto & Pout) != 0; + if(ataexec(f, &r) != SDok) + return -1; + data += r.dlen; + lba += n; + } + return count0 * f->secsize; +} + +int +atariosata(SDunit *u, Sfisx *f, SDreq *r) +{ + uchar *cmd; + int i, n, count, rw; + uvlong lba; + + cmd = r->cmd; + if(cmd[0] == 0x35 || cmd[0] == 0x91){ + if(flushcache(u, f) == 0) + return sdsetsense(r, SDok, 0, 0, 0); /* stupid scuzz */ + return sdsetsense(r, SDcheck, 3, 0xc, 2); + } + if((i = sdfakescsi(r)) != SDnostatus){ + r->status = i; + return i; + } + if((i = sdfakescsirw(r, &lba, &count, &rw)) != SDnostatus) + return i; + n = atabio(u, f, r->lun, r->write, r->data, count, lba); + if(n == -1) + return SDeio; + r->rlen = n; + return sdsetsense(r, SDok, 0, 0, 0); /* stupid scuzz */ +} diff -Nru 0/sys/src/nix/port/sdfis.h 4/sys/src/nix/port/sdfis.h --- 0/sys/src/nix/port/sdfis.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/sdfis.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,67 @@ +#define Ms2tk(t) (((t)*HZ)/1000) + +enum { + Sas = 1, + Sata, + + Spd15 = 8, + Spd30 = 9, + Spd60 = 10, + + SDspinup = SDretry - 1, + SDrate = SDretry - 2, +}; + +/* + * the lock allows ataonline to exclude other commands + * during the online process. we could extend this to allow + * for exclusive access for periods of time. + */ +typedef struct Sfisx Sfisx; +struct Sfisx{ + uchar type; + Sfis; + Cfis; /* sas and media info */ + uint sasspd; /* botch; move to fis.h */ + uchar *oaf; + + RWlock; + int drivechange; + char serial[20+1]; + char firmware[8+1]; + char model[40+1]; + uvlong wwn; + uvlong sectors; + ulong secsize; + uint tler; /* time limit for error recovery */ + uint atamaxxfr; + uint maxspd; +}; + +int tur(SDunit *, int, uint*); + +int ataonline(SDunit*, Sfisx*); +//long atabio(SDunit*, int, int, void*, long, uvlong); +long atabio(SDunit*, Sfisx*, int, int, void*, long, uvlong); + +int scsionlinex(SDunit*, Sfisx*); +//long scsibio(SDunit*, int, int, void*, long, uvlong); +long scsibiox(SDunit*, Sfisx*, int, int, void*, long, uvlong); + +//int atariosata(SDunit*, SDreq*); +int atariosata(SDunit*, Sfisx*, SDreq*); + +char *sfisxrdctl(Sfisx*, char*, char*); +void pronline(SDunit*, Sfisx*); + +ulong totk(ulong); +int setreqto(SDreq*, ulong); +ulong gettotk(Sfisx*); + +uvlong getle(uchar*, int); +void putle(uchar*, uvlong, int); +uvlong getbe(uchar*, int); +void putbe(uchar*, uvlong, int); + +/* junk to make private later */ +void edelay(ulong, ulong); diff -Nru 0/sys/src/nix/port/sdscsi.c 4/sys/src/nix/port/sdscsi.c --- 0/sys/src/nix/port/sdscsi.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/sdscsi.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,422 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/sd.h" + +static int +scsitest(SDreq* r) +{ + r->write = 0; + memset(r->cmd, 0, sizeof(r->cmd)); + r->cmd[1] = r->lun<<5; + r->clen = 6; + r->data = nil; + r->dlen = 0; + r->flags = 0; + + r->status = ~0; + + return r->unit->dev->ifc->rio(r); +} + +int +scsiverify(SDunit* unit) +{ + SDreq *r; + int i, status; + uchar *inquiry; + + if((r = malloc(sizeof(SDreq))) == nil) + return 0; + if((inquiry = sdmalloc(sizeof(unit->inquiry))) == nil){ + free(r); + return 0; + } + r->unit = unit; + r->lun = 0; /* ??? */ + + memset(unit->inquiry, 0, sizeof(unit->inquiry)); + r->write = 0; + r->cmd[0] = 0x12; + r->cmd[1] = r->lun<<5; + r->cmd[4] = sizeof(unit->inquiry)-1; + r->clen = 6; + r->data = inquiry; + r->dlen = sizeof(unit->inquiry)-1; + r->flags = 0; + + r->status = ~0; + if(unit->dev->ifc->rio(r) != SDok){ + free(r); + return 0; + } + memmove(unit->inquiry, inquiry, r->dlen); + free(inquiry); + + SET(status); + for(i = 0; i < 3; i++){ + while((status = scsitest(r)) == SDbusy) + ; + if(status == SDok || status != SDcheck) + break; + if(!(r->flags & SDvalidsense)) + break; + if((r->sense[2] & 0x0F) != 0x02) + continue; + + /* + * Unit is 'not ready'. + * If it is in the process of becoming ready or needs + * an initialising command, set status so it will be spun-up + * below. + * If there's no medium, that's OK too, but don't + * try to spin it up. + */ + if(r->sense[12] == 0x04){ + if(r->sense[13] == 0x02 || r->sense[13] == 0x01){ + status = SDok; + break; + } + } + if(r->sense[12] == 0x3A) + break; + } + + if(status == SDok){ + /* + * Try to ensure a direct-access device is spinning. + * Don't wait for completion, ignore the result. + */ + if((unit->inquiry[0] & 0x1F) == 0){ + memset(r->cmd, 0, sizeof(r->cmd)); + r->write = 0; + r->cmd[0] = 0x1B; + r->cmd[1] = (r->lun<<5)|0x01; + r->cmd[4] = 1; + r->clen = 6; + r->data = nil; + r->dlen = 0; + r->flags = 0; + + r->status = ~0; + unit->dev->ifc->rio(r); + } + } + free(r); + + if(status == SDok || status == SDcheck) + return 1; + return 0; +} + +static int +scsirio(SDreq* r) +{ + /* + * Perform an I/O request, returning + * -1 failure + * 0 ok + * 1 no medium present + * 2 retry + * The contents of r may be altered so the + * caller should re-initialise if necesary. + */ + r->status = ~0; + switch(r->unit->dev->ifc->rio(r)){ + default: + break; + case SDcheck: + if(!(r->flags & SDvalidsense)) + break; + switch(r->sense[2] & 0x0F){ + case 0x00: /* no sense */ + case 0x01: /* recovered error */ + return 2; + case 0x06: /* check condition */ + /* + * 0x28 - not ready to ready transition, + * medium may have changed. + * 0x29 - power on or some type of reset. + */ + if(r->sense[12] == 0x28 && r->sense[13] == 0) + return 2; + if(r->sense[12] == 0x29) + return 2; + break; + case 0x02: /* not ready */ + /* + * If no medium present, bail out. + * If unit is becoming ready, rather than not + * not ready, wait a little then poke it again. */ + if(r->sense[12] == 0x3A) + break; + if(r->sense[12] != 0x04 || r->sense[13] != 0x01) + break; + + while(waserror()) + ; + tsleep(&up->sleep, return0, 0, 500); + poperror(); + scsitest(r); + return 2; + default: + break; + } + break; + case SDok: + return 0; + } + return -1; +} + +int +scsionline(SDunit* unit) +{ + SDreq *r; + uchar *p; + int ok, retries; + + if((r = malloc(sizeof(SDreq))) == nil) + return 0; + if((p = sdmalloc(8)) == nil){ + free(r); + return 0; + } + + ok = 0; + + r->unit = unit; + r->lun = 0; /* ??? */ + for(retries = 0; retries < 10; retries++){ + /* + * Read-capacity is mandatory for DA, WORM, CD-ROM and + * MO. It may return 'not ready' if type DA is not + * spun up, type MO or type CD-ROM are not loaded or just + * plain slow getting their act together after a reset. + */ + r->write = 0; + memset(r->cmd, 0, sizeof(r->cmd)); + r->cmd[0] = 0x25; + r->cmd[1] = r->lun<<5; + r->clen = 10; + r->data = p; + r->dlen = 8; + r->flags = 0; + + r->status = ~0; + switch(scsirio(r)){ + default: + break; + case 0: + unit->sectors = (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3]; + unit->secsize = (p[4]<<24)|(p[5]<<16)|(p[6]<<8)|p[7]; + + /* + * Some ATAPI CD readers lie about the block size. + * Since we don't read audio via this interface + * it's okay to always fudge this. + */ + if(unit->secsize == 2352) + unit->secsize = 2048; + /* + * Devices with removable media may return 0 sectors + * when they have empty media (e.g. sata dvd writers); + * if so, keep the count zero. + * + * Read-capacity returns the LBA of the last sector, + * therefore the number of sectors must be incremented. + */ + if(unit->sectors != 0) + unit->sectors++; + ok = 1; + break; + case 1: + ok = 1; + break; + case 2: + continue; + } + break; + } + free(p); + free(r); + + if(ok) + return ok+retries; + else + return 0; +} + +int +scsiexec(SDunit* unit, int write, uchar* cmd, int clen, void* data, int* dlen) +{ + SDreq *r; + int status; + + if((r = malloc(sizeof(SDreq))) == nil) + return SDmalloc; + r->unit = unit; + r->lun = cmd[1]>>5; /* ??? */ + r->write = write; + memmove(r->cmd, cmd, clen); + r->clen = clen; + r->data = data; + if(dlen) + r->dlen = *dlen; + r->flags = 0; + + r->status = ~0; + + /* + * Call the device-specific I/O routine. + * There should be no calls to 'error()' below this + * which percolate back up. + */ + switch(status = unit->dev->ifc->rio(r)){ + case SDok: + if(dlen) + *dlen = r->rlen; + /*FALLTHROUGH*/ + case SDcheck: + /*FALLTHROUGH*/ + default: + /* + * It's more complicated than this. There are conditions + * which are 'ok' but for which the returned status code + * is not 'SDok'. + * Also, not all conditions require a reqsense, might + * need to do a reqsense here and make it available to the + * caller somehow. + * + * Mañana. + */ + break; + } + sdfree(r); + + return status; +} + +static void +scsifmt10(SDreq *r, int write, int lun, long nb, vlong bno) +{ + uchar *c; + + c = r->cmd; + if(write == 0) + c[0] = 0x28; + else + c[0] = 0x2A; + c[1] = lun<<5; + c[2] = bno>>24; + c[3] = bno>>16; + c[4] = bno>>8; + c[5] = bno; + c[6] = 0; + c[7] = nb>>8; + c[8] = nb; + c[9] = 0; + + r->clen = 10; +} + +static void +scsifmt16(SDreq *r, int write, int lun, long nb, vlong bno) +{ + uchar *c; + + c = r->cmd; + if(write == 0) + c[0] = 0x88; + else + c[0] = 0x8A; + c[1] = lun<<5; /* so wrong */ + c[2] = bno>>56; + c[3] = bno>>48; + c[4] = bno>>40; + c[5] = bno>>32; + c[6] = bno>>24; + c[7] = bno>>16; + c[8] = bno>>8; + c[9] = bno; + c[10] = nb>>24; + c[11] = nb>>16; + c[12] = nb>>8; + c[13] = nb; + c[14] = 0; + c[15] = 0; + + r->clen = 16; +} + +long +scsibio(SDunit* unit, int lun, int write, void* data, long nb, vlong bno) +{ + SDreq *r; + long rlen; + + if((r = malloc(sizeof(SDreq))) == nil) + error(Enomem); + r->unit = unit; + r->lun = lun; +again: + r->write = write; + if(bno >= (1ULL<<32)) + scsifmt16(r, write, lun, nb, bno); + else + scsifmt10(r, write, lun, nb, bno); + r->data = data; + r->dlen = nb*unit->secsize; + r->flags = 0; + + r->status = ~0; + switch(scsirio(r)){ + default: + rlen = -1; + break; + case 0: + rlen = r->rlen; + break; + case 2: + rlen = -1; + if(!(r->flags & SDvalidsense)) + break; + switch(r->sense[2] & 0x0F){ + default: + break; + case 0x01: /* recovered error */ + print("%s: recovered error at sector %llud\n", + unit->name, bno); + rlen = r->rlen; + break; + case 0x06: /* check condition */ + /* + * Check for a removeable media change. + * If so, mark it by zapping the geometry info + * to force an online request. + */ + if(r->sense[12] != 0x28 || r->sense[13] != 0) + break; + if(unit->inquiry[1] & 0x80) + unit->sectors = 0; + break; + case 0x02: /* not ready */ + /* + * If unit is becoming ready, + * rather than not not ready, try again. + */ + if(r->sense[12] == 0x04 && r->sense[13] == 0x01) + goto again; + break; + } + break; + } + free(r); + + return rlen; +} + diff -Nru 0/sys/src/nix/port/sdscsifis.c 4/sys/src/nix/port/sdscsifis.c --- 0/sys/src/nix/port/sdscsifis.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/sdscsifis.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,662 @@ +/* + * sas-able sdscsi + * copyright © 2010 erik quanstrom + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/sd.h" +#include +#include "sdfis.h" + +#define reqio(r) (r)->unit->dev->ifc->rio(r) +#define dprint(...) print(__VA_ARGS__) +#define Ticks sys->ticks +#define generror(...) snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__) + +enum { + Xtimeout = 5*1000, + Deftimeout = 30*1000, /* default timeout */ + Forever = 600*1000, /* default raw timeout */ + + /* oob signals */ + Oobspinup = 0, + Oobpowerloss = 1, +}; + +static uvlong border = 0x0001020304050607ull; +static uvlong lorder = 0x0706050403020100ull; + +uvlong +getle(uchar *t, int w) +{ + uint i; + uvlong r; + + r = 0; + for(i = w; i != 0; ) + r = r<<8 | t[--i]; + return r; +} + +void +putle(uchar *t, uvlong r, int w) +{ + uchar *o, *f; + uint i; + + f = (uchar*)&r; + o = (uchar*)&lorder; + for(i = 0; i < w; i++) + t[o[i]] = f[i]; +} + +uvlong +getbe(uchar *t, int w) +{ + uint i; + uvlong r; + + r = 0; + for(i = 0; i < w; i++) + r = r<<8 | t[i]; + return r; +} + +void +putbe(uchar *t, uvlong r, int w) +{ + uchar *o, *f; + uint i; + + f = (uchar*)&r; + o = (uchar*)&border + (sizeof border-w); + for(i = 0; i < w; i++) + t[i] = f[o[i]]; +} + +static char* +unam(SDunit *u) +{ + return u->name; +} + +static uint +asckey(SDreq *r) +{ + uint fmt, n; + uchar *s; + + s = r->sense; +// if((s[0] & 0x80) == 0){ +// dprint("%s: non-scsi sense %.2ux\n", unam(r->unit), s[0]); +// return ~0; +// } + fmt = s[0] & 0x7f; + n = 18; /* botch should be r->slen; */ + /* spc3 §4.5.3; 0x71 is deferred. */ + if(n >= 18 && (fmt == 0x70 || fmt == 0x71)) + return (s[2] & 0xf)<<16 | s[12]<<8 | s[13]; + dprint("%s: cmd %.2ux unknown sense fmt %.2ux\n", unam(r->unit), r->cmd[0], fmt); + return (s[2] & 0xf)<<16 | s[12]<<8 | s[13]; +} + +/* + * other suspects: + * key asc/q + * 02 0401 becoming ready + * 040b target port in standby state + * 0b01 overtemp + * 0b0[345] background * + * 0c01 write error - recovered with auto reallocation + * 0c02 write error - auto reallocation failed + * 0c03 write error - recommend reassignment + * 17* recovered data + * 18* recovered data + * 5d* smart-style reporting (disk/smart handles) + * 5e* power state change + */ +static int +classify(SDunit *u, int key) +{ + switch(key>>16){ + case 0x00: + case 0x01: + return SDretry; + } + + if(key == 0x062902 || key == 0x062901 || key == 0x062900){ + dprint("%s: power on sense\n", unam(u)); + return SDretry; + } + if(key == 0x062800 && u->inquiry[1] & 0x80){ + dprint("%s: media change\n", unam(u)); + u->sectors = 0; + } + if(key == 0x020401){ + dprint("%s: becoming ready\n", unam(u)); + return SDretry; + } + if(key == 0x020411){ + dprint("%s: need notify (enable spinup)\n", unam(u)); + return SDspinup; + } + return SDcheck; +} + +ulong +totk(ulong u) +{ + if(u == 0) + u = Deftimeout; + return Ticks + u; +} + +ulong +gettotk(Sfisx *f) +{ + return totk(f->tler); +} + +int +setreqto(SDreq *r, ulong tk) +{ + long ms; + + ms = TK2MS(tk - Ticks); + if(ms < 2) + return -1; + if(ms > 750) + ms = 750; + r->timeout = Ticks + Ms2tk(ms); + return 0; +} + +static int +ereqio(Sfisx *f, SDreq *r) +{ + int rv; + + if(f == nil) + return reqio(r); + rv = -1; + rlock(f); + if(!waserror()){ + rv = reqio(r); + poperror(); + } + runlock(f); + return rv; +} + +void +oob(Sfisx *f, SDunit *u, int oobmsg) +{ + SDreq *r; + + r = malloc(sizeof r); + if(r == nil) + return; + r->cmd[0] = 0xf0; + r->cmd[1] = 0xca; + r->cmd[2] = 0xfe; + r->cmd[3] = 0xba; + putbe(r->cmd + 4, oobmsg, 4); + r->clen = 16; + r->unit = u; + r->timeout = totk(Ms2tk(Xtimeout)); + if(!waserror()){ + ereqio(f, r); + poperror(); + } + free(r); +} + +int +scsiriox(Sfisx *f, SDreq *r) +{ + int t, s; + + r->status = ~0; + if(r->timeout == 0) + r->timeout = totk(Ms2tk(Forever)); + for(t = r->timeout; setreqto(r, t) != -1;){ + s = ereqio(f, r); + if(s == SDcheck && r->flags & SDvalidsense) + s = classify(r->unit, asckey(r)); + switch(s){ + default: + return s; + case SDspinup: + print("%s: OOB\n", unam(r->unit)); + /* don't acknowledge oobspinup */ + oob(f, r->unit, Oobspinup); + } + } + sdsetsense(r, SDcheck, 0x02, 0x3e, 0x02); + return SDtimeout; +} + +void +edelay(ulong ms, ulong tk) +{ + int d; + + d = TK2MS(tk - Ticks); + if(d <= 0) + return; + if(d < ms) + ms = d/2; + if(ms <= 0) + return; + if(up){ + while(waserror()) + ; + tsleep(&up->sleep, return0, 0, ms); + poperror(); + }else + delay(ms); +} + +static int +scsiexec(Sfisx *f, SDreq *r) +{ + ulong s, t; + + for(t = r->timeout; setreqto(r, t) != -1; edelay(250, t)){ + if((s = scsiriox(f, r)) != SDok) + return s; + switch(r->status){ + default: + return r->status; + case SDtimeout: + case SDretry: + continue; + } + } + return -1; +} + +/* open address fises */ +enum{ + Initiator = 0x80, + Openaddr = 1, + Awms = 0x8000, + Smp = 0, + Ssp = 1, + Stp = 2, +}; + +static void +oafis(Cfis *f, uchar *c, int type, int spd) +{ + c[0] = Initiator | type<<4 | Openaddr; + c[1] = spd; + if(type == Smp) + memset(c + 2, 0xff, 2); + else + memmove(c + 2, f->ict, 2); + memmove(c + 4, f->tsasaddr, 8); /* dest "port identifier" §4.2.6 */ + memmove(c + 12, f->ssasaddr, 8); +} + +static int +inquiry(SDunit *u) +{ + SDreq r; + + memset(&r, 0, sizeof r); + r.cmd[0] = 0x12; + r.cmd[4] = 0xff; + r.clen = 6; + r.unit = u; + r.timeout = totk(Ms2tk(Xtimeout)); + r.data = u->inquiry; + r.dlen = sizeof u->inquiry; + return scsiexec(nil, &r); +} + +int +tur(SDunit *u, int timeout, uint *key) +{ + int rv; + SDreq r; + + memset(&r, 0, sizeof r); + r.clen = 6; + r.unit = u; + r.timeout = totk(timeout); + rv = scsiexec(nil, &r); + *key = r.status; + if(r.flags & SDvalidsense) + *key = asckey(&r); + return rv; +} + +static int +sasvpd(SDunit *u, uchar *buf, int l) +{ + SDreq r; + + memset(&r, 0, sizeof r); + r.cmd[0] = 0x12; + r.cmd[1] = 1; + r.cmd[2] = 0x80; + r.cmd[4] = l; + r.clen = 6; + r.data = buf; + r.dlen = l; + r.unit = u; + r.timeout = totk(Ms2tk(Xtimeout)); + return scsiexec(nil, &r); +} + +static int +capacity10(SDunit *u, uchar *buf, int l) +{ + SDreq r; + + memset(&r, 0, sizeof r); + r.cmd[0] = 0x25; + r.clen = 10; + r.data = buf; + r.dlen = l; + r.unit = u; + r.timeout = totk(Ms2tk(Xtimeout)); + return scsiexec(nil, &r); +} + +static int +capacity16(SDunit *u, uchar *buf, int l) +{ + SDreq r; + + memset(&r, 0, sizeof r); + r.cmd[0] = 0x9e; + r.cmd[1] = 0x10; + r.cmd[13] = l; + r.clen = 16; + r.data = buf; + r.dlen = l; + r.unit = u; + r.timeout = totk(Ms2tk(Xtimeout)); + return scsiexec(nil, &r); +} + +startstop(SDunit *u, int code) +{ + SDreq r; + + memset(&r, 0, sizeof r); + r.cmd[0] = 0x1b; + r.cmd[1] = 0<<5 | 1; /* lun depricated */ + r.cmd[4] = code; + r.clen = 6; + r.unit = u; + r.timeout = totk(Ms2tk(Xtimeout)); + return scsiexec(nil, &r); +} + +static void +frmove(char *p, uchar *c, int n) +{ + char *s, *e; + + memmove(p, c, n); + p[n] = 0; + for(e = p + n - 1; e > p && *e == ' '; e--) + *e = 0; + for(s = p; *s == ' '; ) + s++; + memmove(p, s, (e - s) + 2); +} + +static void +chkinquiry(Sfisx *f, uchar *c) +{ + char buf[32], buf2[32], omod[sizeof f->model]; + + memmove(omod, f->model, sizeof f->model); + frmove(buf, c + 8, 8); + frmove(buf2, c + 16, 16); + snprint(f->model, sizeof f->model, "%s %s", buf, buf2); + frmove(f->firmware, c + 23, 4); + if(memcmp(omod, f->model, sizeof omod) != 0) + f->drivechange = 1; +} + +static void +chkvpd(Sfisx *f, uchar *c, int n) +{ + char buf[sizeof f->serial]; + int l; + + if(n > sizeof buf - 1) + n = sizeof buf - 1; + l = c[3]; + if(l > n) + l = n; + frmove(buf, c + 4, l); + if(strcmp(buf, f->serial) != 0) + f->drivechange = 1; + memmove(f->serial, buf, sizeof buf); +} + +static int +adjcapacity(Sfisx *f, uvlong ns, uint nss) +{ + if(ns != 0) + ns++; + if(nss == 2352) + nss = 2048; + if(f->sectors != ns || f->secsize != nss){ + f->drivechange = 1; + f->sectors = ns; + f->secsize = nss; + } + return 0; +} + +static int +chkcapacity10(uchar *p, uvlong *ns, uint *nss) +{ + *ns = getbe(p, 4); + *nss = getbe(p + 4, 4); + return 0; +} + +static int +chkcapacity16(uchar *p, uvlong *ns, uint *nss) +{ + *ns = getbe(p, 8); + *nss = getbe(p + 8, 4); + return 0; +} + +typedef struct Spdtab Spdtab; +struct Spdtab { + int spd; + char *s; +}; +Spdtab spdtab[] = { +[1] Spd60, "6.0gbps", + Spd30, "3.0gbps", + Spd15, "1.5gbps", +}; + +static int +sasspd(SDunit *u, Sfisx *f) +{ + int i, r; + uint key; + + for(i = 1;; i++){ + if(i == nelem(spdtab)) + return SDrate; + if(spdtab[i].spd > f->maxspd) + continue; + oafis(f, f->oaf, Ssp, spdtab[i].spd); + dprint("%s: rate %s\n", unam(u), spdtab[i].s); + /* this timeout is too long */ + if((r = tur(u, 2*Xtimeout, &key)) == SDok){ + f->sasspd = i; + return SDok; + } + dprint("%s: key is %d / key=%.6ux\n", unam(u), key, key); + if(key != SDrate || ++i == nelem(spdtab)) + return r; + } +} + +static int +scsionline0(SDunit *u, Sfisx *f) +{ + uchar buf[0x40]; + int r; + uint nss; + uvlong ns; + + /* todo: cap the total sasprobe time, not just cmds */ + f->sasspd = 0; + if(f->maxspd != 0 && (r = sasspd(u, f)) != 0) + return r; + if((r = inquiry(u)) != SDok) + return r; + chkinquiry(f, u->inquiry); + /* vpd 0x80 (unit serial) is not mandatory; spc-4 §7.7 */ + memset(buf, 0, sizeof buf); + if(sasvpd(u, buf, sizeof buf) == SDok) + chkvpd(f, buf, sizeof buf); + else{ + if(f->serial[0]) + f->drivechange = 1; + f->serial[0] = 0; + } + if((r = capacity10(u, buf, 8)) != SDok) + return r; + chkcapacity10(buf, &ns, &nss); + if(ns == 0xffffffff){ + if((r = capacity16(u, buf, 16)) != SDok) + return r; + chkcapacity16(buf, &ns, &nss); + } + adjcapacity(f, ns, nss); + startstop(u, 0<<4 | 1); + return 0; +} + +int +scsionlinex(SDunit *u, Sfisx *f) +{ + int r; + + wlock(f); + if(waserror()) + r = SDeio; + else{ + r = scsionline0(u, f); + poperror(); + } + wunlock(f); + return r; +} + +static int +rwcdb(Sfis*, uchar *c, int write, ulong count, uvlong lba) +{ + int is16; + static uchar tab[2][2] = {0x28, 0x88, 0x2a, 0x8a}; + + is16 = lba>0xffffffffull; + c[0] = tab[write][is16]; + if(is16){ + putbe(c + 2, lba, 8); + putbe(c + 10, count, 4); + c[14] = 0; + c[15] = 0; + return 16; + }else{ + putbe(c + 2, lba, 4); + c[6] = 0; + putbe(c + 7, count, 2); + return 10; + } +} + +static void +setlun(uchar *c, int lun) +{ + c[1] = lun<<5; /* wrong for anything but ancient h/w */ +} + +long +scsibiox(SDunit *u, Sfisx *f, int lun, int write, void *data, long count, uvlong lba) +{ + SDreq r; + + memset(&r, 0, sizeof r); + r.unit = u; + r.lun = lun; + r.data = data; + r.dlen = count * u->secsize; + r.clen = rwcdb(f, r.cmd, write, count, lba); + setlun(r.cmd, lun); + r.timeout = gettotk(f); + + switch(scsiexec(f, &r)){ + case 0: + if((r.flags & SDvalidsense) == 0 || r.sense[2] == 0) + return r.rlen; + default: + generror("%s cmd %.2ux sense %.6ux", Eio, r.cmd[0], asckey(&r)); + error(up->genbuf); + return -1; + case SDtimeout: + generror("%s cmd %.2ux timeout", Eio, r.cmd[0]); + error(up->genbuf); + return -1; + } +} + +static char* +rctlsata(Sfis *f, char *p, char *e) +{ + p = seprint(p, e, "flag\t"); + p = pflag(p, e, f); + p = seprint(p, e, "udma\t%d\n", f->udma); + return p; +} + +static char* +rctlsas(Sfisx *f, char *p, char *e) +{ + char *s; + + s = "none"; + if(f->sasspd < nelem(spdtab)) + if(spdtab[f->sasspd].s != nil) +// if(f->state == Dnew || f->state == Dready) + s = spdtab[f->sasspd].s; + p = seprint(p, e, "sasspd %s\n", s); + return p; +} + +char* +sfisxrdctl(Sfisx *f, char *p, char *e) +{ + p = seprint(p, e, "model\t%s\n", f->model); + p = seprint(p, e, "serial\t%s\n", f->serial); + p = seprint(p, e, "firm\t%s\n", f->firmware); + p = seprint(p, e, "wwn\t%llux\n", f->wwn); + p = seprint(p, e, "tler\t%ud\n", f->tler); + if(f->type == Sata) + p = rctlsata(f, p, e); + if(f->type == Sas) + p = rctlsas(f, p, e); + return p; +} diff -Nru 0/sys/src/nix/port/segment.c 4/sys/src/nix/port/segment.c --- 0/sys/src/nix/port/segment.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/segment.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,413 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +uintmem +segppn(Segment *s, uintmem pa) +{ + uintmem pgsz; + + pgsz = m->pgsz[s->pgszi]; + pa &= ~(pgsz-1); + return pa; +} + +/* + * Sizes are given in multiples of BIGPGSZ. + * The actual page size used is either BIGPGSZ or 1*GiB + * if base is aligned to 1G and size is >= 1G and we support 1G pages. + */ +Segment * +newseg(int type, uintptr base, u64int size) +{ + Segment *s; + int mapsize; + uint pgsz; + + if(size > SEGMAPSIZE*(PTEMAPMEM/BIGPGSZ)) + error(Enovmem); + + pgsz = BIGPGSZ; + if(size*BIGPGSZ >= 1*GiB && getpgszi(1*GiB) >= 0 && + (base&(1ULL*GiB-1)) == 0 && ((size*BIGPGSZ)&(1ULL*GiB-1)) == 0){ + DBG("newseg: using 1G pages\n"); + pgsz = 1*GiB; + } + s = smalloc(sizeof(Segment)); + s->ref = 1; + s->type = type; + s->base = base; + s->ptepertab = PTEMAPMEM/pgsz; + s->top = base+(size*BIGPGSZ); + s->size = size; + s->pgszi = getpgszi(pgsz); + if(s->pgszi < 0) + panic("newseg: getpgszi %d", pgsz); + s->sema.prev = &s->sema; + s->sema.next = &s->sema; + s->color = NOCOLOR; + + mapsize = HOWMANY(size*BIGPGSZ/pgsz, s->ptepertab); + if(mapsize > nelem(s->ssegmap)){ + mapsize *= 2; + if(mapsize > (SEGMAPSIZE*s->ptepertab)) + mapsize = (SEGMAPSIZE*s->ptepertab); + s->map = smalloc(mapsize*sizeof(Pte*)); + s->mapsize = mapsize; + } + else{ + s->map = s->ssegmap; + s->mapsize = nelem(s->ssegmap); + } + + return s; +} + +#define NHASH 101 +#define SHASH(np) (PTR2UINT(np)%NHASH) + +Ksem* +segmksem(Segment *sg, Sem *us) +{ + Ksem *s, **l; + + qlock(&sg->lk); + if(sg->sems.s == nil) + sg->sems.s = mallocz(NHASH * sizeof(Ksem*), 1); + for(l = &sg->sems.s[SHASH(us)]; (s = *l) != nil; l = &s->next) + if(s->sem == us){ + qunlock(&sg->lk); + return s; + } + s = mallocz(sizeof *s, 1); + s->sem = us; + s->state = Semok; + s->semaltlock = &sg->semaltlock; + *l = s; + qunlock(&sg->lk); + return s; +} + +static void +freesems(Segment *sg) +{ + int i; + Ksem *s, *aux; + + if(sg->sems.s == nil) + return; + + for(i=0; isems.s[i]; + while(s != nil){ + aux = s; + s = s->next; + free(aux->q); + free(aux); + } + } + free(sg->sems.s); + sg->sems.s = nil; +} + +void +putseg(Segment *s) +{ + Pte **pp, **emap; + Image *i; + extern void freezseg(Segment*); + + if(s == 0) + return; + + i = s->image; + if(i != 0) { + lock(i); + lock(s); + if(i->s == s && s->ref == 1) + i->s = 0; + unlock(i); + } + else + lock(s); + + s->ref--; + if(s->ref != 0) { + unlock(s); + return; + } + unlock(s); + + qlock(&s->lk); + if(i) + putimage(i); + + emap = &s->map[s->mapsize]; + for(pp = s->map; pp < emap; pp++) + if(*pp) + freepte(s, *pp); + + qunlock(&s->lk); + if(s->map != s->ssegmap) + free(s->map); + if(s->profile != 0) + free(s->profile); + if(s->sems.s != nil) + freesems(s); + if(s->type&SG_ZIO) + freezseg(s); + free(s); +} + +void +relocateseg(Segment *s, uintptr offset) +{ + Page **pg, *x; + Pte *pte, **p, **endpte; + + endpte = &s->map[s->mapsize]; + for(p = s->map; p < endpte; p++) { + if(*p == 0) + continue; + pte = *p; + for(pg = pte->first; pg <= pte->last; pg++) { + if(x = *pg) + x->va += offset; + } + } +} + +Segment* +dupseg(Segment **seg, int segno, int share) +{ + int i, size; + Pte *pte; + Segment *n, *s; + + SET(n); + s = seg[segno]; + + qlock(&s->lk); + if(waserror()){ + qunlock(&s->lk); + nexterror(); + } + switch(s->type&SG_TYPE) { + case SG_TEXT: /* New segment shares pte set */ + case SG_SHARED: + case SG_PHYSICAL: + goto sameseg; + + case SG_STACK: + n = newseg(s->type, s->base, s->size); + break; + + case SG_BSS: /* Just copy on write */ + if(share) + goto sameseg; + n = newseg(s->type, s->base, s->size); + break; + + case SG_DATA: /* Copy on write plus demand load info */ + if(segno == TSEG){ + poperror(); + qunlock(&s->lk); + return data2txt(s); + } + + if(share) + goto sameseg; + n = newseg(s->type, s->base, s->size); + + incref(s->image); + n->image = s->image; + n->fstart = s->fstart; + n->flen = s->flen; + n->pgszi = s->pgszi; + n->color = s->color; + n->ptepertab = s->ptepertab; + break; + } + size = s->mapsize; + for(i = 0; i < size; i++) + if(pte = s->map[i]) + n->map[i] = ptecpy(n, pte); + + n->flushme = s->flushme; + if(s->ref > 1) + procflushseg(s); + poperror(); + qunlock(&s->lk); + return n; + +sameseg: + incref(s); + poperror(); + qunlock(&s->lk); + return s; +} + +void +segpage(Segment *s, Page *p) +{ + Pte **pte; + uintptr soff; + uintmem pgsz; + Page **pg; + + if(s->pgszi < 0) + s->pgszi = p->pgszi; + if(s->color == NOCOLOR) + s->color = p->color; + if(s->pgszi != p->pgszi) + panic("segpage: s->pgszi != p->pgszi"); + + if(p->va < s->base || p->va >= s->top) + panic("segpage: p->va < s->base || p->va >= s->top"); + + soff = p->va - s->base; + pte = &s->map[soff/PTEMAPMEM]; + if(*pte == 0) + *pte = ptealloc(s); + pgsz = m->pgsz[s->pgszi]; + pg = &(*pte)->pages[(soff&(PTEMAPMEM-1))/pgsz]; + *pg = p; + if(pg < (*pte)->first) + (*pte)->first = pg; + if(pg > (*pte)->last) + (*pte)->last = pg; +} + +/* + * called with s->lk locked + */ +void +mfreeseg(Segment *s, uintptr start, int pages) +{ + int i, j, size; + uintptr soff; + uintmem pgsz; + Page *pg; + Page *list; + + pgsz = m->pgsz[s->pgszi]; + soff = start-s->base; + j = (soff&(PTEMAPMEM-1))/pgsz; + + size = s->mapsize; + list = nil; + for(i = soff/PTEMAPMEM; i < size; i++) { + if(pages <= 0) + break; + if(s->map[i] == 0) { + pages -= s->ptepertab-j; + j = 0; + continue; + } + while(j < s->ptepertab) { + pg = s->map[i]->pages[j]; + /* + * We want to zero s->map[i]->page[j] and putpage(pg), + * but we have to make sure other processors flush the + * entry from their TLBs before the page is freed. + * We construct a list of the pages to be freed, zero + * the entries, then (below) call procflushseg, and call + * putpage on the whole list. + * + * Swapped-out pages don't appear in TLBs, so it's okay + * to putswap those pages before procflushseg. + */ + if(pg){ + if(onswap(pg)) + putswap(pg); + else{ + pg->next = list; + list = pg; + } + s->map[i]->pages[j] = 0; + } + if(--pages == 0) + goto out; + j++; + } + j = 0; + } +out: + /* flush this seg in all other processes */ + if(s->ref > 1) + procflushseg(s); + + /* free the pages */ + for(pg = list; pg != nil; pg = list){ + list = list->next; + putpage(pg); + } +} + +Segment* +isoverlap(Proc* p, uintptr va, usize len) +{ + int i; + Segment *ns; + uintptr newtop; + + newtop = va+len; + for(i = 0; i < NSEG; i++) { + ns = p->seg[i]; + if(ns == 0) + continue; + if((newtop > ns->base && newtop <= ns->top) || + (va >= ns->base && va < ns->top)) + return ns; + } + return nil; +} + +void +segclock(uintptr pc) +{ + Segment *s; + + s = up->seg[TSEG]; + if(s == 0 || s->profile == 0) + return; + + s->profile[0] += TK2MS(1); + if(pc >= s->base && pc < s->top) { + pc -= s->base; + s->profile[pc>>LRESPROF] += TK2MS(1); + } +} + +static void +prepageseg(int i) +{ + Segment *s; + uintptr addr, pgsz; + + s = up->seg[i]; + if(s == nil) + return; + DBG("prepage: base %#p top %#p\n", s->base, s->top); + pgsz = m->pgsz[s->pgszi]; + for(addr = s->base; addr < s->top; addr += pgsz) + fault(addr, i == TSEG); +} + +/* + * BUG: should depend only in segment attributes, not in + * the slot used in up->seg. + */ +void +nixprepage(int i) +{ + if(i >= 0) + prepageseg(i); + else + for(i = 0; i < NSEG; i++) + prepageseg(i); +} + diff -Nru 0/sys/src/nix/port/sysauth.c 4/sys/src/nix/port/sysauth.c --- 0/sys/src/nix/port/sysauth.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/sysauth.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,183 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include + +char *eve; +char hostdomain[DOMLEN]; + +/* + * return true if current user is eve + */ +int +iseve(void) +{ + return strcmp(eve, up->user) == 0; +} + +void +sysfversion(Ar0* ar0, va_list list) +{ + Chan *c; + char *version; + int fd; + u32int msize; + usize nversion; + + /* + * int fversion(int fd, int bufsize, char *version, int nversion); + * should be + * usize fversion(int fd, u32int msize, char *version, usize nversion); + */ + fd = va_arg(list, int); + msize = va_arg(list, u32int); + version = va_arg(list, char*); + nversion = va_arg(list, usize); + version = validaddr(version, nversion, 1); + /* check there's a NUL in the version string */ + if(nversion == 0 || memchr(version, 0, nversion) == nil) + error(Ebadarg); + + c = fdtochan(fd, ORDWR, 0, 1); + if(waserror()){ + cclose(c); + nexterror(); + } + + ar0->u = mntversion(c, msize, version, nversion); + + cclose(c); + poperror(); +} + +void +sys_fsession(Ar0* ar0, va_list list) +{ + int fd; + char *trbuf; + + /* + * int fsession(int fd, char trbuf[TICKREQLEN]); + * + * Deprecated; backwards compatibility only. + */ + fd = va_arg(list, int); + trbuf = va_arg(list, char*); + + USED(fd); + trbuf = validaddr(trbuf, 1, 1); + *trbuf = '\0'; + + ar0->i = 0; +} + +void +sysfauth(Ar0* ar0, va_list list) +{ + Chan *c, *ac; + char *aname; + int fd; + + /* + * int fauth(int fd, char *aname); + */ + fd = va_arg(list, int); + aname = va_arg(list, char*); + + aname = validaddr(aname, 1, 0); + aname = validnamedup(aname, 1); + if(waserror()){ + free(aname); + nexterror(); + } + c = fdtochan(fd, ORDWR, 0, 1); + if(waserror()){ + cclose(c); + nexterror(); + } + + ac = mntauth(c, aname); + /* at this point ac is responsible for keeping c alive */ + cclose(c); + poperror(); /* c */ + free(aname); + poperror(); /* aname */ + + if(waserror()){ + cclose(ac); + nexterror(); + } + + fd = newfd(ac); + if(fd < 0) + error(Enofd); + poperror(); /* ac */ + + /* always mark it close on exec */ + ac->flag |= CCEXEC; + + ar0->i = fd; +} + +/* + * called by devcons() for user device + * + * anyone can become none + */ +long +userwrite(char* a, long n) +{ + if(n != 4 || strncmp(a, "none", 4) != 0) + error(Eperm); + kstrdup(&up->user, "none"); + up->basepri = PriNormal; + + return n; +} + +/* + * called by devcons() for host owner/domain + * + * writing hostowner also sets user + */ +long +hostownerwrite(char* a, long n) +{ + char buf[128]; + + if(!iseve()) + error(Eperm); + if(n <= 0 || n >= sizeof buf) + error(Ebadarg); + memmove(buf, a, n); + buf[n] = 0; + + renameuser(eve, buf); + kstrdup(&eve, buf); + kstrdup(&up->user, buf); + up->basepri = PriNormal; + + return n; +} + +long +hostdomainwrite(char* a, long n) +{ + char buf[DOMLEN]; + + if(!iseve()) + error(Eperm); + if(n >= DOMLEN) + error(Ebadarg); + memset(buf, 0, DOMLEN); + strncpy(buf, a, n); + if(buf[0] == 0) + error(Ebadarg); + memmove(hostdomain, buf, DOMLEN); + + return n; +} diff -Nru 0/sys/src/nix/port/syscallfmt.c 4/sys/src/nix/port/syscallfmt.c --- 0/sys/src/nix/port/syscallfmt.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/syscallfmt.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,459 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "/sys/src/libc/9syscall/sys.h" + +/* + * Print functions for system call tracing. + */ +static void +fmtrwdata(Fmt* f, char* a, int n, char* suffix) +{ + int i; + char *t; + + if(a == nil){ + fmtprint(f, "0x0%s", suffix); + return; + } + a = validaddr(a, n, 0); + t = smalloc(n+1); + for(i = 0; i < n; i++){ + if(a[i] > 0x20 && a[i] < 0x7f) + t[i] = a[i]; + else + t[i] = '.'; + } + + fmtprint(f, " %#p/\"%s\"%s", a, t, suffix); + free(t); +} + +static void +fmtuserstring(Fmt* f, char* a, char* suffix) +{ + int n; + char *t; + + if(a == nil){ + fmtprint(f, "0/\"\"%s", suffix); + return; + } + a = validaddr(a, 1, 0); + n = ((char*)vmemchr(a, 0, 0x7fffffff) - a) + 1; + t = smalloc(n); + memmove(t, a, n); + t[n] = 0; + fmtprint(f, "%#p/\"%s\"%s", a, t, suffix); + free(t); +} + +/* + */ +void +syscallfmt(int syscallno, va_list list) +{ + long l; + ulong ul; + Fmt fmt; + void *v; + vlong vl; + uintptr p; + int i[2], len, **ip; + char *a, **argv; + + fmtstrinit(&fmt); + fmtprint(&fmt, "%d %s ", up->pid, up->text); + + if(syscallno > nsyscall) + fmtprint(&fmt, " %d ", syscallno); + else + fmtprint(&fmt, "%s ", systab[syscallno].n); + + if(up->syscalltrace != nil) + free(up->syscalltrace); + + switch(syscallno){ + case SYSR1: + p = va_arg(list, uintptr); + fmtprint(&fmt, "%#p", p); + break; + case _ERRSTR: /* deprecated */ + case CHDIR: + case EXITS: + case REMOVE: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, ""); + break; + case BIND: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + i[0] = va_arg(list, int); + fmtprint(&fmt, "%#ux", i[0]); + break; + case CLOSE: + case NOTED: + i[0] = va_arg(list, int); + fmtprint(&fmt, "%d", i[0]); + break; + case DUP: + i[0] = va_arg(list, int); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%d %d", i[0], i[1]); + break; + case ALARM: + l = va_arg(list, unsigned long); + fmtprint(&fmt, "%#lud ", l); + break; + case EXECAC: + i[0] = va_arg(list, int); + fmtprint(&fmt, "%d", i[0]); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + argv = va_arg(list, char**); + evenaddr(PTR2UINT(argv)); + for(;;){ + a = *(char**)validaddr(argv, sizeof(char**), 0); + if(a == nil) + break; + fmtprint(&fmt, " "); + fmtuserstring(&fmt, a, ""); + argv++; + } + break; + case EXEC: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, ""); + argv = va_arg(list, char**); + evenaddr(PTR2UINT(argv)); + for(;;){ + a = *(char**)validaddr(argv, sizeof(char**), 0); + if(a == nil) + break; + fmtprint(&fmt, " "); + fmtuserstring(&fmt, a, ""); + argv++; + } + break; + case _FSESSION: /* deprecated */ + case _FSTAT: /* deprecated */ + case _FWSTAT: /* obsolete */ + i[0] = va_arg(list, int); + a = va_arg(list, char*); + fmtprint(&fmt, "%d %#p", i[0], a); + break; + case FAUTH: + i[0] = va_arg(list, int); + a = va_arg(list, char*); + fmtprint(&fmt, "%d", i[0]); + fmtuserstring(&fmt, a, ""); + break; + case SEGBRK: + case RENDEZVOUS: + v = va_arg(list, void*); + fmtprint(&fmt, "%#p ", v); + v = va_arg(list, void*); + fmtprint(&fmt, "%#p", v); + break; + case _MOUNT: /* deprecated */ + i[0] = va_arg(list, int); + fmtprint(&fmt, "%d ", i[0]); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + i[0] = va_arg(list, int); + fmtprint(&fmt, "%#ux ", i[0]); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, ""); + break; + case OPEN: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + i[0] = va_arg(list, int); + fmtprint(&fmt, "%#ux", i[0]); + break; + case OSEEK: /* deprecated */ + i[0] = va_arg(list, int); + l = va_arg(list, long); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%d %ld %d", i[0], l, i[1]); + break; + case SLEEP: + l = va_arg(list, long); + fmtprint(&fmt, "%ld", l); + break; + case _STAT: /* obsolete */ + case _WSTAT: /* obsolete */ + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + a = va_arg(list, char*); + fmtprint(&fmt, "%#p", a); + break; + case RFORK: + i[0] = va_arg(list, int); + fmtprint(&fmt, "%#ux", i[0]); + break; + case PIPE: + case BRK_: + v = va_arg(list, int*); + fmtprint(&fmt, "%#p", v); + break; + case CREATE: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + i[0] = va_arg(list, int); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%#ux %#ux", i[0], i[1]); + break; + case FD2PATH: + case FSTAT: + case FWSTAT: + i[0] = va_arg(list, int); + a = va_arg(list, char*); + l = va_arg(list, unsigned long); + fmtprint(&fmt, "%d %#p %lud", i[0], a, l); + break; + case NOTIFY: + case SEGDETACH: + case _WAIT: /* deprecated */ + v = va_arg(list, void*); + fmtprint(&fmt, "%#p", v); + break; + case SEGATTACH: + i[0] = va_arg(list, int); + fmtprint(&fmt, "%d ", i[0]); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + /*FALLTHROUGH*/ + case SEGFREE: + case SEGFLUSH: + v = va_arg(list, void*); + l = va_arg(list, unsigned long); + fmtprint(&fmt, "%#p %lud", v, l); + break; + case UNMOUNT: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, ""); + break; + case SEMACQUIRE: + case SEMRELEASE: + v = va_arg(list, int*); + i[0] = va_arg(list, int); + fmtprint(&fmt, "%#p %d", v, i[0]); + break; + case TSEMACQUIRE: + v = va_arg(list, int*); + l = va_arg(list, ulong); + fmtprint(&fmt, "%#p %ld", v, l); + break; + case SEMSLEEP: + case SEMWAKEUP: + v = va_arg(list, int*); + fmtprint(&fmt, "%#p", v); + break; + case SEMALT: + ip = va_arg(list, int**); + i[0] = va_arg(list, int); + validaddr(ip, sizeof(int*)*i[0], 0); + fmtprint(&fmt, "%#p %d", ip, i[0]); + break; + case SEEK: + v = va_arg(list, vlong*); + i[0] = va_arg(list, int); + vl = va_arg(list, vlong); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%#p %d %#llux %d", v, i[0], vl, i[1]); + break; + case FVERSION: + i[0] = va_arg(list, int); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%d %d ", i[0], i[1]); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + l = va_arg(list, unsigned long); + fmtprint(&fmt, "%lud", l); + break; + case WSTAT: + case STAT: + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + /*FALLTHROUGH*/ + case ERRSTR: + case AWAIT: + a = va_arg(list, char*); + l = va_arg(list, unsigned long); + fmtprint(&fmt, "%#p %lud", a, l); + break; + case MOUNT: + i[0] = va_arg(list, int); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%d %d ", i[0], i[1]); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, " "); + i[0] = va_arg(list, int); + fmtprint(&fmt, "%#ux ", i[0]); + a = va_arg(list, char*); + fmtuserstring(&fmt, a, ""); + break; + case _READ: /* deprecated */ + case PREAD: + i[0] = va_arg(list, int); + v = va_arg(list, void*); + l = va_arg(list, long); + fmtprint(&fmt, "%d %#p %ld", i[0], v, l); + if(syscallno == PREAD){ + vl = va_arg(list, vlong); + fmtprint(&fmt, " %lld", vl); + } + break; + case _WRITE: /* deprecated */ + case PWRITE: + i[0] = va_arg(list, int); + v = va_arg(list, void*); + l = va_arg(list, long); + fmtprint(&fmt, "%d ", i[0]); + len = MIN(l, 64); + fmtrwdata(&fmt, v, len, " "); + fmtprint(&fmt, "%ld", l); + if(syscallno == PWRITE){ + vl = va_arg(list, vlong); + fmtprint(&fmt, " %lld", vl); + } + break; + case ZIOPREAD: + i[0] = va_arg(list, int); + v = va_arg(list, void*); + i[1] = va_arg(list, int); + ul = va_arg(list, usize); + vl = va_arg(list, vlong); + fmtprint(&fmt, "%d %#p %d %ld %ulld", i[0], v, i[1], ul, vl); + break; + case ZIOPWRITE: + i[0] = va_arg(list, int); + v = va_arg(list, void*); + i[1] = va_arg(list, int); + vl = va_arg(list, vlong); + fmtprint(&fmt, "%d %#p %d %ulld", i[0], v, i[1], vl); + break; + case ZIOFREE: + v = va_arg(list, void*); + i[1] = va_arg(list, int); + fmtprint(&fmt, "%#p %d", v, i[1]); + case NIXSYSCALL: + break; + } + up->syscalltrace = fmtstrflush(&fmt); +} + +void +sysretfmt(int syscallno, va_list list, Ar0* ar0, uvlong start, uvlong stop) +{ + long l; + void* v; + Fmt fmt; + vlong vl; + int i, len; + char *a, *errstr; + + fmtstrinit(&fmt); + + if(up->syscalltrace) + free(up->syscalltrace); + + errstr = "\"\""; + switch(syscallno){ + default: + if(ar0->i == -1) + errstr = up->errstr; + fmtprint(&fmt, " = %d", ar0->i); + break; + case ALARM: + case _WRITE: + case PWRITE: + if(ar0->l == -1) + errstr = up->errstr; + fmtprint(&fmt, " = %ld", ar0->l); + break; + case EXEC: + case EXECAC: + case SEGBRK: + case SEGATTACH: + case RENDEZVOUS: + if(ar0->v == (void*)-1) + errstr = up->errstr; + fmtprint(&fmt, " = %#p", ar0->v); + break; + case AWAIT: + a = va_arg(list, char*); + l = va_arg(list, unsigned long); + if(ar0->i > 0){ + fmtuserstring(&fmt, a, " "); + fmtprint(&fmt, "%lud = %d", l, ar0->i); + } + else{ + fmtprint(&fmt, "%#p/\"\" %lud = %d", a, l, ar0->i); + errstr = up->errstr; + } + break; + case _ERRSTR: + case ERRSTR: + a = va_arg(list, char*); + if(syscallno == _ERRSTR) + l = 64; + else + l = va_arg(list, unsigned long); + if(ar0->i > 0){ + fmtuserstring(&fmt, a, " "); + fmtprint(&fmt, "%lud = %d", l, ar0->i); + } + else{ + fmtprint(&fmt, "\"\" %lud = %d", l, ar0->i); + errstr = up->errstr; + } + break; + case FD2PATH: + i = va_arg(list, int); + USED(i); + a = va_arg(list, char*); + l = va_arg(list, unsigned long); + if(ar0->i > 0){ + fmtuserstring(&fmt, a, " "); + fmtprint(&fmt, "%lud = %d", l, ar0->i); + } + else{ + fmtprint(&fmt, "\"\" %lud = %d", l, ar0->i); + errstr = up->errstr; + } + break; + case _READ: + case PREAD: + i = va_arg(list, int); + USED(i); + v = va_arg(list, void*); + l = va_arg(list, long); + if(ar0->l > 0){ + len = MIN(ar0->l, 64); + fmtrwdata(&fmt, v, len, ""); + } + else{ + fmtprint(&fmt, "/\"\""); + errstr = up->errstr; + } + fmtprint(&fmt, " %ld", l); + if(syscallno == PREAD){ + vl = va_arg(list, vlong); + fmtprint(&fmt, " %lld", vl); + } + fmtprint(&fmt, " = %d", ar0->i); + break; + } + fmtprint(&fmt, " %s %#llud %#llud\n", errstr, start, stop); + + up->syscalltrace = fmtstrflush(&fmt); +} diff -Nru 0/sys/src/nix/port/sysfile.c 4/sys/src/nix/port/sysfile.c --- 0/sys/src/nix/port/sysfile.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/sysfile.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1579 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +/* + * The sys*() routines needn't poperror() as they return directly to syscall(). + */ + +static void +unlockfgrp(Fgrp *f) +{ + int ex; + + ex = f->exceed; + f->exceed = 0; + unlock(f); + if(ex) + pprint("warning: process exceeds %d file descriptors\n", ex); +} + +static int +growfd(Fgrp *f, int fd) /* fd is always >= 0 */ +{ + Chan **newfd, **oldfd; + + if(fd < f->nfd) + return 0; + if(fd >= f->nfd+DELTAFD) + return -1; /* out of range */ + /* + * Unbounded allocation is unwise; besides, there are only 16 bits + * of fid in 9P + */ + if(f->nfd >= 5000){ + Exhausted: + print("no free file descriptors\n"); + return -1; + } + newfd = malloc((f->nfd+DELTAFD)*sizeof(Chan*)); + if(newfd == 0) + goto Exhausted; + oldfd = f->fd; + memmove(newfd, oldfd, f->nfd*sizeof(Chan*)); + f->fd = newfd; + free(oldfd); + f->nfd += DELTAFD; + if(fd > f->maxfd){ + if(fd/100 > f->maxfd/100) + f->exceed = (fd/100)*100; + f->maxfd = fd; + } + return 1; +} + +/* + * this assumes that the fgrp is locked + */ +static int +findfreefd(Fgrp *f, int start) +{ + int fd; + + for(fd=start; fdnfd; fd++) + if(f->fd[fd] == 0) + break; + if(fd >= f->nfd && growfd(f, fd) < 0) + return -1; + return fd; +} + +int +newfd(Chan *c) +{ + int fd; + Fgrp *f; + + f = up->fgrp; + lock(f); + fd = findfreefd(f, 0); + if(fd < 0){ + unlockfgrp(f); + return -1; + } + if(fd > f->maxfd) + f->maxfd = fd; + f->fd[fd] = c; + unlockfgrp(f); + return fd; +} + +static int +newfd2(int fd[2], Chan *c[2]) +{ + Fgrp *f; + + f = up->fgrp; + lock(f); + fd[0] = findfreefd(f, 0); + if(fd[0] < 0){ + unlockfgrp(f); + return -1; + } + fd[1] = findfreefd(f, fd[0]+1); + if(fd[1] < 0){ + unlockfgrp(f); + return -1; + } + if(fd[1] > f->maxfd) + f->maxfd = fd[1]; + f->fd[fd[0]] = c[0]; + f->fd[fd[1]] = c[1]; + unlockfgrp(f); + + return 0; +} + +Chan* +fdtochan(int fd, int mode, int chkmnt, int iref) +{ + Chan *c; + Fgrp *f; + + c = nil; + f = up->fgrp; + + lock(f); + if(fd<0 || f->nfd<=fd || (c = f->fd[fd])==0) { + unlock(f); + error(Ebadfd); + } + if(iref) + incref(c); + unlock(f); + + if(chkmnt && (c->flag&CMSG)) { + if(iref) + cclose(c); + error(Ebadusefd); + } + + if(mode<0 || c->mode==ORDWR) + return c; + + if((mode&OTRUNC) && c->mode==OREAD) { + if(iref) + cclose(c); + error(Ebadusefd); + } + + if((mode&~OTRUNC) != c->mode) { + if(iref) + cclose(c); + error(Ebadusefd); + } + + return c; +} + +int +openmode(int omode) +{ + omode &= ~(OTRUNC|OCEXEC|ORCLOSE); + if(omode > OEXEC) + error(Ebadarg); + if(omode == OEXEC) + return OREAD; + return omode; +} + +void +sysfd2path(Ar0* ar0, va_list list) +{ + Chan *c; + char *buf; + int fd; + usize nbuf; + + /* + * int fd2path(int fd, char* buf, int nbuf); + * should be + * int fd2path(int fd, char* buf, usize nbuf); + */ + fd = va_arg(list, int); + buf = va_arg(list, char*); + nbuf = va_arg(list, usize); + buf = validaddr(buf, nbuf, 1); + + c = fdtochan(fd, -1, 0, 1); + snprint(buf, nbuf, "%s", chanpath(c)); + cclose(c); + + ar0->i = 0; +} + +void +syspipe(Ar0* ar0, va_list list) +{ + int *a, fd[2]; + Chan *c[2]; + static char *datastr[] = {"data", "data1"}; + + /* + * int pipe(int fd[2]); + */ + a = va_arg(list, int*); + a = validaddr(a, sizeof(fd), 1); + evenaddr(PTR2UINT(a)); + + c[0] = namec("#|", Atodir, 0, 0); + c[1] = nil; + fd[0] = -1; + fd[1] = -1; + + if(waserror()){ + cclose(c[0]); + if(c[1]) + cclose(c[1]); + nexterror(); + } + c[1] = cclone(c[0]); + if(walk(&c[0], datastr+0, 1, 1, nil) < 0) + error(Egreg); + if(walk(&c[1], datastr+1, 1, 1, nil) < 0) + error(Egreg); + c[0] = c[0]->dev->open(c[0], ORDWR); + c[1] = c[1]->dev->open(c[1], ORDWR); + if(newfd2(fd, c) < 0) + error(Enofd); + poperror(); + + a[0] = fd[0]; + a[1] = fd[1]; + + ar0->i = 0; +} + +void +sysdup(Ar0* ar0, va_list list) +{ + int nfd, ofd; + Chan *nc, *oc; + Fgrp *f; + + /* + * int dup(int oldfd, int newfd); + * + * Close after dup'ing, so date > #d/1 works + */ + ofd = va_arg(list, int); + oc = fdtochan(ofd, -1, 0, 1); + nfd = va_arg(list, int); + + if(nfd != -1){ + f = up->fgrp; + lock(f); + if(nfd < 0 || growfd(f, nfd) < 0) { + unlockfgrp(f); + cclose(oc); + error(Ebadfd); + } + if(nfd > f->maxfd) + f->maxfd = nfd; + + nc = f->fd[nfd]; + f->fd[nfd] = oc; + unlockfgrp(f); + if(nc != nil) + cclose(nc); + }else{ + if(waserror()) { + cclose(oc); + nexterror(); + } + nfd = newfd(oc); + if(nfd < 0) + error(Enofd); + poperror(); + } + + ar0->i = nfd; +} + +void +sysopen(Ar0* ar0, va_list list) +{ + char *aname; + int fd, omode; + Chan *c; + + /* + * int open(char* file, int omode); + */ + aname = va_arg(list, char*); + omode = va_arg(list, int); + openmode(omode); /* error check only */ + + c = nil; + if(waserror()){ + if(c != nil) + cclose(c); + nexterror(); + } + aname = validaddr(aname, 1, 0); + c = namec(aname, Aopen, omode, 0); + fd = newfd(c); + if(fd < 0) + error(Enofd); + poperror(); + + ar0->i = fd; +} + +void +fdclose(int fd, int flag) +{ + int i; + Chan *c; + Fgrp *f; + + f = up->fgrp; + lock(f); + c = f->fd[fd]; + if(c == nil){ + /* can happen for users with shared fd tables */ + unlock(f); + return; + } + if(flag){ + if(c == nil || !(c->flag&flag)){ + unlock(f); + return; + } + } + f->fd[fd] = nil; + if(fd == f->maxfd) + for(i = fd; --i >= 0 && f->fd[i] == 0; ) + f->maxfd = i; + + unlock(f); + cclose(c); +} + +void +sysclose(Ar0* ar0, va_list list) +{ + int fd; + + /* + * int close(int fd); + */ + fd = va_arg(list, int); + + fdtochan(fd, -1, 0, 0); + fdclose(fd, 0); + + ar0->i = 0; +} + +static long +unionread(Chan *c, void *va, long n) +{ + int i; + long nr; + Mhead *mh; + Mount *mount; + + qlock(&c->umqlock); + mh = c->umh; + rlock(&mh->lock); + mount = mh->mount; + /* bring mount in sync with c->uri and c->umc */ + for(i = 0; mount != nil && i < c->uri; i++) + mount = mount->next; + + nr = 0; + while(mount != nil){ + /* Error causes component of union to be skipped */ + if(mount->to && !waserror()){ + if(c->umc == nil){ + c->umc = cclone(mount->to); + c->umc = c->umc->dev->open(c->umc, OREAD); + } + + nr = c->umc->dev->read(c->umc, va, n, c->umc->offset); + c->umc->offset += nr; + poperror(); + } + if(nr > 0) + break; + + /* Advance to next element */ + c->uri++; + if(c->umc){ + cclose(c->umc); + c->umc = nil; + } + mount = mount->next; + } + runlock(&mh->lock); + qunlock(&c->umqlock); + return nr; +} + +static void +unionrewind(Chan *c) +{ + qlock(&c->umqlock); + c->uri = 0; + if(c->umc){ + cclose(c->umc); + c->umc = nil; + } + qunlock(&c->umqlock); +} + +static usize +dirfixed(uchar *p, uchar *e, Dir *d) +{ + int len; + Dev *dev; + + len = GBIT16(p)+BIT16SZ; + if(p + len > e) + return 0; + + p += BIT16SZ; /* ignore size */ + dev = devtabget(GBIT16(p), 1); //XDYNX + if(dev != nil){ + d->type = dev->dc; + //devtabdecr(dev); + } + else + d->type = -1; + p += BIT16SZ; + d->dev = GBIT32(p); + p += BIT32SZ; + d->qid.type = GBIT8(p); + p += BIT8SZ; + d->qid.vers = GBIT32(p); + p += BIT32SZ; + d->qid.path = GBIT64(p); + p += BIT64SZ; + d->mode = GBIT32(p); + p += BIT32SZ; + d->atime = GBIT32(p); + p += BIT32SZ; + d->mtime = GBIT32(p); + p += BIT32SZ; + d->length = GBIT64(p); + + return len; +} + +static char* +dirname(uchar *p, usize *n) +{ + p += BIT16SZ+BIT16SZ+BIT32SZ+BIT8SZ+BIT32SZ+BIT64SZ + + BIT32SZ+BIT32SZ+BIT32SZ+BIT64SZ; + *n = GBIT16(p); + + return (char*)p+BIT16SZ; +} + +static usize +dirsetname(char *name, usize len, uchar *p, usize n, usize maxn) +{ + char *oname; + usize nn, olen; + + if(n == BIT16SZ) + return BIT16SZ; + + oname = dirname(p, &olen); + + nn = n+len-olen; + PBIT16(p, nn-BIT16SZ); + if(nn > maxn) + return BIT16SZ; + + if(len != olen) + memmove(oname+len, oname+olen, p+n-(uchar*)(oname+olen)); + PBIT16((uchar*)(oname-2), len); + memmove(oname, name, len); + + return nn; +} + +/* + * Mountfix might have caused the fixed results of the directory read + * to overflow the buffer. Catch the overflow in c->dirrock. + */ +static void +mountrock(Chan *c, uchar *p, uchar **pe) +{ + uchar *e, *r; + int len, n; + + e = *pe; + + /* find last directory entry */ + for(;;){ + len = BIT16SZ+GBIT16(p); + if(p+len >= e) + break; + p += len; + } + + /* save it away */ + qlock(&c->rockqlock); + if(c->nrock+len > c->mrock){ + n = ROUNDUP(c->nrock+len, 1024); + r = smalloc(n); + memmove(r, c->dirrock, c->nrock); + free(c->dirrock); + c->dirrock = r; + c->mrock = n; + } + memmove(c->dirrock+c->nrock, p, len); + c->nrock += len; + qunlock(&c->rockqlock); + + /* drop it */ + *pe = p; +} + +/* + * Satisfy a directory read with the results saved in c->dirrock. + */ +static int +mountrockread(Chan *c, uchar *op, long n, long *nn) +{ + long dirlen; + uchar *rp, *erp, *ep, *p; + + /* common case */ + if(c->nrock == 0) + return 0; + + /* copy out what we can */ + qlock(&c->rockqlock); + rp = c->dirrock; + erp = rp+c->nrock; + p = op; + ep = p+n; + while(rp+BIT16SZ <= erp){ + dirlen = BIT16SZ+GBIT16(rp); + if(p+dirlen > ep) + break; + memmove(p, rp, dirlen); + p += dirlen; + rp += dirlen; + } + + if(p == op){ + qunlock(&c->rockqlock); + return 0; + } + + /* shift the rest */ + if(rp != erp) + memmove(c->dirrock, rp, erp-rp); + c->nrock = erp - rp; + + *nn = p - op; + qunlock(&c->rockqlock); + return 1; +} + +static void +mountrewind(Chan *c) +{ + c->nrock = 0; +} + +/* + * Rewrite the results of a directory read to reflect current + * name space bindings and mounts. Specifically, replace + * directory entries for bind and mount points with the results + * of statting what is mounted there. Except leave the old names. + */ +static long +mountfix(Chan *c, uchar *op, long n, long maxn) +{ + char *name; + int nbuf; + Chan *nc; + Mhead *mh; + Mount *mount; + usize dirlen, nname, r, rest; + long l; + uchar *buf, *e, *p; + Dir d; + + p = op; + buf = nil; + nbuf = 0; + for(e=&p[n]; p+BIT16SZmount; mount; mount=mount->next) + if(eqchanddq(mount->to, d.type, d.dev, d.qid, 1)) + goto Norewrite; + + name = dirname(p, &nname); + /* + * Do the stat but fix the name. If it fails, + * leave old entry. + * BUG: If it fails because there isn't room for + * the entry, what can we do? Nothing, really. + * Might as well skip it. + */ + if(buf == nil){ + buf = smalloc(4096); + nbuf = 4096; + } + if(waserror()) + goto Norewrite; + l = nc->dev->stat(nc, buf, nbuf); + r = dirsetname(name, nname, buf, l, nbuf); + if(r == BIT16SZ) + error("dirsetname"); + poperror(); + + /* + * Shift data in buffer to accomodate new entry, + * possibly overflowing into rock. + */ + rest = e - (p+dirlen); + if(r > dirlen){ + while(p+r+rest > op+maxn){ + mountrock(c, p, &e); + if(e == p){ + dirlen = 0; + goto Norewrite; + } + rest = e - (p+dirlen); + } + } + if(r != dirlen){ + memmove(p+r, p+dirlen, rest); + dirlen = r; + e = p+dirlen+rest; + } + + /* + * Rewrite directory entry. + */ + memmove(p, buf, r); + + Norewrite: + cclose(nc); + putmhead(mh); + } + } + if(buf) + free(buf); + + if(p != e) + error("oops in mountfix"); + + return e-op; +} + +static long +read(va_list list, int ispread) +{ + int fd; + long n, nn, nnn; + void *p; + Chan *c; + vlong off; + + fd = va_arg(list, int); + p = va_arg(list, void*); + n = va_arg(list, long); + p = validaddr(p, n, 1); + + c = fdtochan(fd, OREAD, 1, 1); + + if(waserror()){ + cclose(c); + nexterror(); + } + + /* + * The offset is passed through on directories, normally. + * Sysseek complains, but pread is used by servers like exportfs, + * that shouldn't need to worry about this issue. + * + * Notice that c->devoffset is the offset that c's dev is seeing. + * The number of bytes read on this fd (c->offset) may be different + * due to rewritings in mountfix. + */ + if(ispread){ + off = va_arg(list, vlong); + if(off == ~0LL){ /* use and maintain channel's offset */ + off = c->offset; + ispread = 0; + } + } + else + off = c->offset; + + if(c->qid.type & QTDIR){ + /* + * Directory read: + * rewind to the beginning of the file if necessary; + * try to fill the buffer via mountrockread; + * clear ispread to always maintain the Chan offset. + */ + if(off == 0LL){ + if(!ispread){ + c->offset = 0; + c->devoffset = 0; + } + mountrewind(c); + unionrewind(c); + } + + if(!mountrockread(c, p, n, &nn)){ + if(c->umh) + nn = unionread(c, p, n); + else{ + if(off != c->offset) + error(Edirseek); + nn = c->dev->read(c, p, n, c->devoffset); + } + } + nnn = mountfix(c, p, nn, n); + + ispread = 0; + } + else + nnn = nn = c->dev->read(c, p, n, off); + + if(!ispread){ + lock(c); + c->devoffset += nn; + c->offset += nnn; + unlock(c); + } + + poperror(); + cclose(c); + + return nnn; +} + +void +sys_read(Ar0* ar0, va_list list) +{ + /* + * long read(int fd, void* buf, long nbytes); + */ + ar0->l = read(list, 0); +} + +void +syspread(Ar0* ar0, va_list list) +{ + /* + * long pread(int fd, void* buf, long nbytes, vlong offset); + */ + ar0->l = read(list, 1); +} + +static long +write(va_list list, int ispwrite) +{ + int fd; + long n, r; + void *p; + Chan *c; + vlong off; + + fd = va_arg(list, int); + p = va_arg(list, void*); + r = n = va_arg(list, long); + + p = validaddr(p, n, 0); + n = 0; + c = fdtochan(fd, OWRITE, 1, 1); + if(waserror()) { + if(!ispwrite){ + lock(c); + c->offset -= n; + unlock(c); + } + cclose(c); + nexterror(); + } + + if(c->qid.type & QTDIR) + error(Eisdir); + + n = r; + + off = ~0LL; + if(ispwrite) + off = va_arg(list, vlong); + if(off == ~0LL){ /* use and maintain channel's offset */ + lock(c); + off = c->offset; + c->offset += n; + unlock(c); + } + + r = c->dev->write(c, p, n, off); + + if(!ispwrite && r < n){ + lock(c); + c->offset -= n - r; + unlock(c); + } + + poperror(); + cclose(c); + + return r; +} + +void +sys_write(Ar0* ar0, va_list list) +{ + /* + * long write(int fd, void* buf, long nbytes); + */ + ar0->l = write(list, 0); +} + +void +syspwrite(Ar0* ar0, va_list list) +{ + /* + * long pwrite(int fd, void *buf, long nbytes, vlong offset); + */ + ar0->l = write(list, 1); +} + +static vlong +sseek(int fd, vlong offset, int whence) +{ + Chan *c; + uchar buf[sizeof(Dir)+100]; + Dir dir; + int n; + + c = fdtochan(fd, -1, 1, 1); + if(waserror()){ + cclose(c); + nexterror(); + } + if(c->dev->dc == '|') + error(Eisstream); + + switch(whence){ + case 0: + if((c->qid.type & QTDIR) && offset != 0LL) + error(Eisdir); + c->offset = offset; + break; + + case 1: + if(c->qid.type & QTDIR) + error(Eisdir); + lock(c); /* lock for read/write update */ + offset += c->offset; + c->offset = offset; + unlock(c); + break; + + case 2: + if(c->qid.type & QTDIR) + error(Eisdir); + n = c->dev->stat(c, buf, sizeof buf); + if(convM2D(buf, n, &dir, nil) == 0) + error("internal error: stat error in seek"); + offset += dir.length; + c->offset = offset; + break; + + default: + error(Ebadarg); + } + c->uri = 0; + c->dri = 0; + cclose(c); + poperror(); + + return offset; +} + +void +sysseek(Ar0* ar0, va_list list) +{ + int fd, whence; + vlong offset, *rv; + + /* + * vlong seek(int fd, vlong n, int type); + * + * The system call actually has 4 arguments, + * int _seek(vlong*, int, vlong, int); + * and the first argument is where the offset + * is returned. The C library arranges the + * argument/return munging if necessary. + */ + rv = va_arg(list, vlong*); + rv = validaddr(rv, sizeof(vlong), 1); + + fd = va_arg(list, int); + offset = va_arg(list, vlong); + whence = va_arg(list, int); + *rv = sseek(fd, offset, whence); + + ar0->i = 0; +} + +void +sysoseek(Ar0* ar0, va_list list) +{ + long offset; + int fd, whence; + + /* + * long oseek(int fd, long n, int type); + * + * Deprecated; backwards compatibility only. + */ + fd = va_arg(list, int); + offset = va_arg(list, long); + whence = va_arg(list, int); + + ar0->l = sseek(fd, offset, whence); +} + +void +validstat(uchar *s, usize n) +{ + usize m; + char buf[64]; + + if(statcheck(s, n) < 0) + error(Ebadstat); + /* verify that name entry is acceptable */ + s += STATFIXLEN - 4*BIT16SZ; /* location of first string */ + /* + * s now points at count for first string. + * if it's too long, let the server decide; this is + * only for his protection anyway. otherwise + * we'd have to allocate and waserror. + */ + m = GBIT16(s); + s += BIT16SZ; + if(m+1 > sizeof buf) + return; + memmove(buf, s, m); + buf[m] = '\0'; + /* name could be '/' */ + if(strcmp(buf, "/") != 0) + validname(buf, 0); +} + +static char* +pathlast(Path *p) +{ + char *s; + + if(p == nil) + return nil; + if(p->len == 0) + return nil; + s = strrchr(p->s, '/'); + if(s) + return s+1; + return p->s; +} + +void +sysfstat(Ar0* ar0, va_list list) +{ + int fd; + Chan *c; + usize n; + int r; + uchar *p; + + /* + * int fstat(int fd, uchar* edir, int nedir); + * should really be + * usize fstat(int fd, uchar* edir, usize nedir); + * but returning an unsigned is probably too + * radical. + */ + fd = va_arg(list, int); + p = va_arg(list, uchar*); + n = va_arg(list, usize); + + p = validaddr(p, n, 1); + c = fdtochan(fd, -1, 0, 1); + if(waserror()) { + cclose(c); + nexterror(); + } + r = c->dev->stat(c, p, n); + poperror(); + cclose(c); + + ar0->i = r; +} + +void +sysstat(Ar0* ar0, va_list list) +{ + char *aname; + Chan *c; + usize n; + int r; + uchar *p; + + /* + * int stat(char* name, uchar* edir, int nedir); + * should really be + * usize stat(char* name, uchar* edir, usize nedir); + * but returning an unsigned is probably too + * radical. + */ + aname = va_arg(list, char*); + aname = validaddr(aname, 1, 0); + p = va_arg(list, uchar*); + n = va_arg(list, usize); + + p = validaddr(p, n, 1); + c = namec(aname, Aaccess, 0, 0); + if(waserror()){ + cclose(c); + nexterror(); + } + r = c->dev->stat(c, p, n); + aname = pathlast(c->path); + if(aname) + r = dirsetname(aname, strlen(aname), p, r, n); + + poperror(); + cclose(c); + + ar0->i = r; +} + +void +syschdir(Ar0* ar0, va_list list) +{ + Chan *c; + char *aname; + + /* + * int chdir(char* dirname); + */ + aname = va_arg(list, char*); + aname = validaddr(aname, 1, 0); + + c = namec(aname, Atodir, 0, 0); + cclose(up->dot); + up->dot = c; + + ar0->i = 0; +} + +static int +bindmount(int ismount, int fd, int afd, char* arg0, char* arg1, int flag, char* spec) +{ + int i; + Dev *dev; + Chan *c0, *c1, *ac, *bc; + struct{ + Chan *chan; + Chan *authchan; + char *spec; + int flags; + }bogus; + + if((flag&~MMASK) || (flag&MORDER)==(MBEFORE|MAFTER)) + error(Ebadarg); + + bogus.flags = flag & MCACHE; + + if(ismount){ + if(up->pgrp->noattach) + error(Enoattach); + + ac = nil; + bc = fdtochan(fd, ORDWR, 0, 1); + if(waserror()) { + if(ac) + cclose(ac); + cclose(bc); + nexterror(); + } + + if(afd >= 0) + ac = fdtochan(afd, ORDWR, 0, 1); + + bogus.chan = bc; + bogus.authchan = ac; + + bogus.spec = validaddr(spec, 1, 0); + if(waserror()) + error(Ebadspec); + spec = validnamedup(spec, 1); + poperror(); + + if(waserror()){ + free(spec); + nexterror(); + } + + dev = devtabget('M', 0); //XDYNX + if(waserror()){ + //devtabdecr(dev); + nexterror(); + } + c0 = dev->attach((char*)&bogus); + poperror(); + //devtabdecr(dev); + + poperror(); /* spec */ + free(spec); + poperror(); /* ac bc */ + if(ac) + cclose(ac); + cclose(bc); + }else{ + bogus.spec = nil; + c0 = namec(validaddr(arg0, 1, 0), Abind, 0, 0); + } + + if(waserror()){ + cclose(c0); + nexterror(); + } + + c1 = namec(validaddr(arg1, 1, 0), Amount, 0, 0); + if(waserror()){ + cclose(c1); + nexterror(); + } + + i = cmount(&c0, c1, flag, bogus.spec); + + poperror(); + cclose(c1); + poperror(); + cclose(c0); + if(ismount) + fdclose(fd, 0); + + return i; +} + +void +sysbind(Ar0* ar0, va_list list) +{ + int flag; + char *name, *old; + + /* + * int bind(char* name, char* old, int flag); + * should be + * long bind(char* name, char* old, int flag); + */ + name = va_arg(list, char*); + old = va_arg(list, char*); + flag = va_arg(list, int); + + ar0->i = bindmount(0, -1, -1, name, old, flag, nil); +} + +void +sysmount(Ar0* ar0, va_list list) +{ + int afd, fd, flag; + char *aname, *old; + + /* + * int mount(int fd, int afd, char* old, int flag, char* aname); + * should be + * long mount(int fd, int afd, char* old, int flag, char* aname); + */ + fd = va_arg(list, int); + afd = va_arg(list, int); + old = va_arg(list, char*); + flag = va_arg(list, int); + aname = va_arg(list, char*); + + ar0->i = bindmount(1, fd, afd, nil, old, flag, aname); +} + +void +sys_mount(Ar0* ar0, va_list list) +{ + int fd, flag; + char *aname, *old; + + /* + * int mount(int fd, char *old, int flag, char *aname); + * should be + * long mount(int fd, char *old, int flag, char *aname); + * + * Deprecated; backwards compatibility only. + */ + fd = va_arg(list, int); + old = va_arg(list, char*); + flag = va_arg(list, int); + aname = va_arg(list, char*); + + ar0->i = bindmount(1, fd, -1, nil, old, flag, aname); +} + +void +sysunmount(Ar0* ar0, va_list list) +{ + char *name, *old; + Chan *cmount, *cmounted; + + /* + * int unmount(char* name, char* old); + */ + name = va_arg(list, char*); + old = va_arg(list, char*); + cmount = namec(validaddr(old, 1, 0), Amount, 0, 0); + + cmounted = nil; + if(name != nil) { + if(waserror()) { + cclose(cmount); + nexterror(); + } + + /* + * This has to be namec(..., Aopen, ...) because + * if arg[0] is something like /srv/cs or /fd/0, + * opening it is the only way to get at the real + * Chan underneath. + */ + cmounted = namec(validaddr(name, 1, 0), Aopen, OREAD, 0); + poperror(); + } + + if(waserror()) { + cclose(cmount); + if(cmounted != nil) + cclose(cmounted); + nexterror(); + } + + cunmount(cmount, cmounted); + cclose(cmount); + if(cmounted != nil) + cclose(cmounted); + poperror(); + + ar0->i = 0; +} + +void +syscreate(Ar0* ar0, va_list list) +{ + char *aname; + int fd, omode, perm; + Chan *c; + + /* + * int create(char* file, int omode, ulong perm); + * should be + * int create(char* file, int omode, int perm); + */ + aname = va_arg(list, char*); + omode = va_arg(list, int); + perm = va_arg(list, int); + + openmode(omode & ~OEXCL); /* error check only; OEXCL okay here */ + c = nil; + if(waserror()) { + if(c != nil) + cclose(c); + nexterror(); + } + c = namec(validaddr(aname, 1, 0), Acreate, omode, perm); + fd = newfd(c); + if(fd < 0) + error(Enofd); + poperror(); + + ar0->i = fd; +} + +void +sysremove(Ar0* ar0, va_list list) +{ + Chan *c; + char *aname; + + /* + * int remove(char* file); + */ + aname = va_arg(list, char*); + c = namec(validaddr(aname, 1, 0), Aremove, 0, 0); + + /* + * Removing mount points is disallowed to avoid surprises + * (which should be removed: the mount point or the mounted Chan?). + */ + if(c->ismtpt){ + cclose(c); + error(Eismtpt); + } + if(waserror()){ + c->dev = nil; /* see below */ + cclose(c); + nexterror(); + } + c->dev->remove(c); + + /* + * Remove clunks the fid, but we need to recover the Chan + * so fake it up. rootclose() is known to be a nop. +Not sure this dicking around is right for Dev ref counts. + */ + c->dev = nil; + poperror(); + cclose(c); + + ar0->i = 0; +} + +static long +wstat(Chan* c, uchar* p, usize n) +{ + long l; + usize namelen; + + if(waserror()){ + cclose(c); + nexterror(); + } + + /* + * Renaming mount points is disallowed to avoid surprises + * (which should be renamed? the mount point or the mounted Chan?). + */ + if(c->ismtpt){ + dirname(p, &namelen); + if(namelen) + nameerror(chanpath(c), Eismtpt); + } + l = c->dev->wstat(c, p, n); + poperror(); + cclose(c); + + return l; +} + +void +syswstat(Ar0* ar0, va_list list) +{ + Chan *c; + char *aname; + uchar *p; + usize n; + + /* + * int wstat(char* name, uchar* edir, int nedir); + * should really be + * usize wstat(char* name, uchar* edir, usize nedir); + * but returning an unsigned is probably too + * radical. + */ + aname = va_arg(list, char*); + p = va_arg(list, uchar*); + n = va_arg(list, usize); + + p = validaddr(p, n, 0); + validstat(p, n); + c = namec(validaddr(aname, 1, 0), Aaccess, 0, 0); + + ar0->l = wstat(c, p, n); +} + +void +sysfwstat(Ar0* ar0, va_list list) +{ + Chan *c; + int fd; + uchar *p; + usize n; + + /* + * int fwstat(int fd, uchar* edir, int nedir); + * should really be + * usize wstat(int fd, uchar* edir, usize nedir); + * but returning an unsigned is probably too + * radical. + */ + fd = va_arg(list, int); + p = va_arg(list, uchar*); + n = va_arg(list, usize); + + p = validaddr(p, n, 0); + validstat(p, n); + c = fdtochan(fd, -1, 1, 1); + + ar0->l = wstat(c, p, n); +} + +static void +packoldstat(uchar *buf, Dir *d) +{ + uchar *p; + ulong q; + + /* lay down old stat buffer - grotty code but it's temporary */ + p = buf; + strncpy((char*)p, d->name, 28); + p += 28; + strncpy((char*)p, d->uid, 28); + p += 28; + strncpy((char*)p, d->gid, 28); + p += 28; + q = d->qid.path & ~DMDIR; /* make sure doesn't accidentally look like directory */ + if(d->qid.type & QTDIR) /* this is the real test of a new directory */ + q |= DMDIR; + PBIT32(p, q); + p += BIT32SZ; + PBIT32(p, d->qid.vers); + p += BIT32SZ; + PBIT32(p, d->mode); + p += BIT32SZ; + PBIT32(p, d->atime); + p += BIT32SZ; + PBIT32(p, d->mtime); + p += BIT32SZ; + PBIT64(p, d->length); + p += BIT64SZ; + PBIT16(p, d->type); + p += BIT16SZ; + PBIT16(p, d->dev); +} + +void +sys_stat(Ar0* ar0, va_list list) +{ + Chan *c; + long l; + uchar buf[128], *p; + char *aname, *name, strs[128]; + Dir d; + char old[] = "old stat system call - recompile"; + + /* + * int stat(char* name, char* edir); + * should have been + * usize stat(char* name, uchar* edir)); + * + * Deprecated; backwards compatibility only. + */ + aname = va_arg(list, char*); + p = va_arg(list, uchar*); + + /* + * Old DIRLEN (116) plus a little should be plenty + * for the buffer sizes. + */ + p = validaddr(p, 116, 1); + + c = namec(validaddr(aname, 1, 0), Aaccess, 0, 0); + if(waserror()){ + cclose(c); + nexterror(); + } + l = c->dev->stat(c, buf, sizeof buf); + + /* + * Buf contains a new stat buf; convert to old. + * Yuck. + * If buffer too small, time to face reality. + */ + if(l <= BIT16SZ) + error(old); + name = pathlast(c->path); + if(name) + l = dirsetname(name, strlen(name), buf, l, sizeof buf); + l = convM2D(buf, l, &d, strs); + if(l == 0) + error(old); + packoldstat(p, &d); + + poperror(); + cclose(c); + + ar0->i = 0; +} + +void +sys_fstat(Ar0* ar0, va_list list) +{ + Chan *c; + char *name; + long l; + uchar buf[128], *p; + char strs[128]; + Dir d; + int fd; + char old[] = "old fstat system call - recompile"; + + /* + * int fstat(int fd, char* edir); + * should have been + * usize fstat(int fd, uchar* edir)); + * + * Deprecated; backwards compatibility only. + */ + fd = va_arg(list, int); + p = va_arg(list, uchar*); + + /* + * Old DIRLEN (116) plus a little should be plenty + * for the buffer sizes. + */ + p = validaddr(p, 116, 1); + c = fdtochan(fd, -1, 0, 1); + if(waserror()){ + cclose(c); + nexterror(); + } + l = c->dev->stat(c, buf, sizeof buf); + + /* + * Buf contains a new stat buf; convert to old. + * Yuck. + * If buffer too small, time to face reality. + */ + if(l <= BIT16SZ) + error(old); + name = pathlast(c->path); + if(name) + l = dirsetname(name, strlen(name), buf, l, sizeof buf); + l = convM2D(buf, l, &d, strs); + if(l == 0) + error(old); + packoldstat(p, &d); + + poperror(); + cclose(c); + + ar0->i = 0; +} + +void +sys_wstat(Ar0*, va_list) +{ + error("old wstat system call - recompile"); +} + +void +sys_fwstat(Ar0*, va_list) +{ + error("old fwstat system call - recompile"); +} diff -Nru 0/sys/src/nix/port/sysproc.c 4/sys/src/nix/port/sysproc.c --- 0/sys/src/nix/port/sysproc.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/sysproc.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1305 @@ +#include "u.h" +#include "tos.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "../port/edf.h" +#include +#include + + +void +sysrfork(Ar0* ar0, va_list list) +{ + Proc *p; + int flag, i, n, pid; + Fgrp *ofg; + Pgrp *opg; + Rgrp *org; + Egrp *oeg; + Mach *wm; + + /* + * int rfork(int); + */ + flag = va_arg(list, int); + + /* Check flags before we commit */ + if((flag & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) + error(Ebadarg); + if((flag & (RFNAMEG|RFCNAMEG)) == (RFNAMEG|RFCNAMEG)) + error(Ebadarg); + if((flag & (RFENVG|RFCENVG)) == (RFENVG|RFCENVG)) + error(Ebadarg); + if((flag & (RFPREPAGE|RFCPREPAGE)) == (RFPREPAGE|RFCPREPAGE)) + error(Ebadarg); + if((flag & (RFCORE|RFCCORE)) == (RFCORE|RFCCORE)) + error(Ebadarg); + if(flag & RFCORE && up->wired != nil) + error("wired proc cannot move to ac"); + + if((flag&RFPROC) == 0) { + if(flag & (RFMEM|RFNOWAIT)) + error(Ebadarg); + if(flag & (RFFDG|RFCFDG)) { + ofg = up->fgrp; + if(flag & RFFDG) + up->fgrp = dupfgrp(ofg); + else + up->fgrp = dupfgrp(nil); + closefgrp(ofg); + } + if(flag & (RFNAMEG|RFCNAMEG)) { + opg = up->pgrp; + up->pgrp = newpgrp(); + if(flag & RFNAMEG) + pgrpcpy(up->pgrp, opg); + /* inherit noattach */ + up->pgrp->noattach = opg->noattach; + closepgrp(opg); + } + if(flag & RFNOMNT) + up->pgrp->noattach = 1; + if(flag & RFREND) { + org = up->rgrp; + up->rgrp = newrgrp(); + closergrp(org); + } + if(flag & (RFENVG|RFCENVG)) { + oeg = up->egrp; + up->egrp = smalloc(sizeof(Egrp)); + up->egrp->ref = 1; + if(flag & RFENVG) + envcpy(up->egrp, oeg); + closeegrp(oeg); + } + if(flag & RFNOTEG) + up->noteid = incref(¬eidalloc); + if(flag & (RFPREPAGE|RFCPREPAGE)){ + up->prepagemem = flag&RFPREPAGE; + nixprepage(-1); + } + if(flag & RFCORE){ + up->ac = getac(up, -1); + up->procctl = Proc_toac; + }else if(flag & RFCCORE){ + if(up->ac != nil) + up->procctl = Proc_totc; + } + + ar0->i = 0; + return; + } + + p = newproc(); + + if(flag & RFCORE){ + if(!waserror()){ + p->ac = getac(p, -1); + p->procctl = Proc_toac; + poperror(); + }else{ + print("warning: rfork: no available ac for the child, it runs in the tc\n"); + p->procctl = 0; + } + } + + if(up->trace) + p->trace = 1; + p->scallnr = up->scallnr; + memmove(p->arg, up->arg, sizeof(up->arg)); + p->nerrlab = 0; + p->slash = up->slash; + p->dot = up->dot; + incref(p->dot); + + memmove(p->note, up->note, sizeof(p->note)); + p->privatemem = up->privatemem; + p->noswap = up->noswap; + p->nnote = up->nnote; + p->notified = 0; + p->lastnote = up->lastnote; + p->notify = up->notify; + p->ureg = up->ureg; + p->prepagemem = up->prepagemem; + p->dbgreg = 0; + + /* Make a new set of memory segments */ + n = flag & RFMEM; + qlock(&p->seglock); + if(waserror()){ + qunlock(&p->seglock); + nexterror(); + } + for(i = 0; i < NSEG; i++) + if(up->seg[i]) + p->seg[i] = dupseg(up->seg, i, n); + qunlock(&p->seglock); + poperror(); + + /* File descriptors */ + if(flag & (RFFDG|RFCFDG)) { + if(flag & RFFDG) + p->fgrp = dupfgrp(up->fgrp); + else + p->fgrp = dupfgrp(nil); + } + else { + p->fgrp = up->fgrp; + incref(p->fgrp); + } + + /* Process groups */ + if(flag & (RFNAMEG|RFCNAMEG)) { + p->pgrp = newpgrp(); + if(flag & RFNAMEG) + pgrpcpy(p->pgrp, up->pgrp); + /* inherit noattach */ + p->pgrp->noattach = up->pgrp->noattach; + } + else { + p->pgrp = up->pgrp; + incref(p->pgrp); + } + if(flag & RFNOMNT) + p->pgrp->noattach = 1; + + if(flag & RFREND) + p->rgrp = newrgrp(); + else { + incref(up->rgrp); + p->rgrp = up->rgrp; + } + + /* Environment group */ + if(flag & (RFENVG|RFCENVG)) { + p->egrp = smalloc(sizeof(Egrp)); + p->egrp->ref = 1; + if(flag & RFENVG) + envcpy(p->egrp, up->egrp); + } + else { + p->egrp = up->egrp; + incref(p->egrp); + } + p->hang = up->hang; + p->procmode = up->procmode; + + /* Craft a return frame which will cause the child to pop out of + * the scheduler in user mode with the return register zero + */ + sysrforkchild(p, up); + + p->parent = up; + p->parentpid = up->pid; + if(flag&RFNOWAIT) + p->parentpid = 0; + else { + lock(&up->exl); + up->nchild++; + unlock(&up->exl); + } + if((flag&RFNOTEG) == 0) + p->noteid = up->noteid; + + pid = p->pid; + memset(p->time, 0, sizeof(p->time)); + p->time[TReal] = sys->ticks; + + if(flag & (RFPREPAGE|RFCPREPAGE)){ + p->prepagemem = flag&RFPREPAGE; + /* + * BUG: this is prepaging our memory, not + * that of the child, but at least we + * will do the copy on write. + */ + nixprepage(-1); + } + + kstrdup(&p->text, up->text); + kstrdup(&p->user, up->user); + /* + * since the bss/data segments are now shareable, + * any mmu info about this process is now stale + * (i.e. has bad properties) and has to be discarded. + */ + mmuflush(); + p->basepri = up->basepri; + p->priority = up->basepri; + p->fixedpri = up->fixedpri; + p->mp = up->mp; + + wm = up->wired; + if(wm) + procwired(p, wm->machno); + p->color = up->color; + ready(p); + sched(); + + ar0->i = pid; +} + +static uvlong +vl2be(uvlong v) +{ + uchar *p; + + p = (uchar*)&v; + return ((uvlong)((p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3])<<32) + |((uvlong)(p[4]<<24)|(p[5]<<16)|(p[6]<<8)|p[7]); +} + +static ulong +l2be(long l) +{ + uchar *cp; + + cp = (uchar*)&l; + return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3]; +} + +typedef struct { + Exec; + uvlong hdr[1]; +} Hdr; + +/* + * flags can ONLY specify that you want an AC for you, or + * that you want an XC for you. + * + */ +static void +execac(Ar0* ar0, int flags, char *ufile, char **argv) +{ + Hdr hdr; + Fgrp *f; + Tos *tos; + Chan *chan, *ichan; + Image *img; + Segment *s; + int argc, i, n; + char *a, *elem, *file, *p; + char line[sizeof(Exec)], *progarg[sizeof(Exec)/2+1]; + long hdrsz, magic, textsz, datasz, bsssz; + uintptr textlim, datalim, bsslim, entry, stack; + static int colorgen; + + + file = nil; + elem = nil; + switch(flags){ + case EXTC: + case EXXC: + break; + case EXAC: + up->ac = getac(up, -1); + break; + default: + error("unknown execac flag"); + } + if(waserror()){ + DBG("execac: failing: %s\n", up->errstr); + free(file); + free(elem); + if(flags == EXAC && up->ac != nil) + up->ac->proc = nil; + up->ac = nil; + nexterror(); + } + + /* + * Open the file, remembering the final element and the full name. + */ + argc = 0; + file = validnamedup(ufile, 1); + DBG("execac: up %#p file %s\n", up, file); + if(up->trace) + proctracepid(up); + ichan = namec(file, Aopen, OEXEC, 0); + if(waserror()){ + cclose(ichan); + nexterror(); + } + kstrdup(&elem, up->genbuf); + + /* + * Read the header. + * If it's a #!, fill in progarg[] with info then read a new header + * from the file indicated by the #!. + * The #! line must be less than sizeof(Exec) in size, + * including the terminating \n. + */ + hdrsz = ichan->dev->read(ichan, &hdr, sizeof(Hdr), 0); + if(hdrsz < 2) + error(Ebadexec); + p = (char*)&hdr; + if(p[0] == '#' && p[1] == '!'){ + p = memccpy(line, (char*)&hdr, '\n', MIN(sizeof(Exec), hdrsz)); + if(p == nil) + error(Ebadexec); + *(p-1) = '\0'; + argc = tokenize(line+2, progarg, nelem(progarg)); + if(argc == 0) + error(Ebadexec); + + /* The original file becomes an extra arg after #! line */ + progarg[argc++] = file; + + /* + * Take the #! $0 as a file to open, and replace + * $0 with the original path's name. + */ + p = progarg[0]; + progarg[0] = elem; + chan = nil; /* in case namec errors out */ + USED(chan); + chan = namec(p, Aopen, OEXEC, 0); + hdrsz = chan->dev->read(chan, &hdr, sizeof(Hdr), 0); + if(hdrsz < 2) + error(Ebadexec); + }else{ + chan = ichan; + incref(ichan); + } + + /* chan is the chan to use, initial or not. ichan is irrelevant now */ + cclose(ichan); + poperror(); + + + /* + * #! has had its chance, now we need a real binary. + */ + magic = l2be(hdr.magic); + if(hdrsz != sizeof(Hdr) || magic != AOUT_MAGIC) + error(Ebadexec); + if(magic & HDR_MAGIC){ + entry = vl2be(hdr.hdr[0]); + hdrsz = sizeof(Hdr); + } + else{ + entry = l2be(hdr.entry); + hdrsz = sizeof(Exec); + } + + textsz = l2be(hdr.text); + datasz = l2be(hdr.data); + bsssz = l2be(hdr.bss); + + textlim = UTROUND(UTZERO+hdrsz+textsz); + datalim = BIGPGROUND(textlim+datasz); + bsslim = BIGPGROUND(textlim+datasz+bsssz); + + /* + * Check the binary header for consistency, + * e.g. the entry point is within the text segment and + * the segments don't overlap each other. + */ + if(entry < UTZERO+hdrsz || entry >= UTZERO+hdrsz+textsz) + error(Ebadexec); + + if(textsz >= textlim || datasz > datalim || bsssz > bsslim + || textlim >= USTKTOP || datalim >= USTKTOP || bsslim >= USTKTOP + || datalim < textlim || bsslim < datalim) + error(Ebadexec); + + if(up->ac != nil && up->ac != m) + up->color = corecolor(up->ac->machno); + else + up->color = corecolor(m->machno); + + /* + * The new stack is created in ESEG, temporarily mapped elsewhere. + * The stack contains, in descending address order: + * a structure containing housekeeping and profiling data (Tos); + * argument strings; + * array of vectors to the argument strings with a terminating + * nil (argv). + * When the exec is committed, this temporary stack in ESEG will + * become SSEG. + * The architecture-dependent code which jumps to the new image + * will also push a count of the argument array onto the stack (argc). + */ + qlock(&up->seglock); + if(waserror()){ + if(up->seg[ESEG] != nil){ + putseg(up->seg[ESEG]); + up->seg[ESEG] = nil; + } + qunlock(&up->seglock); + nexterror(); + } + up->seg[ESEG] = newseg(SG_STACK, TSTKTOP-USTKSIZE, USTKSIZE/BIGPGSZ); + up->seg[ESEG]->color = up->color; + + /* + * Stack is a pointer into the temporary stack + * segment, and will move as items are pushed. + */ + stack = TSTKTOP-sizeof(Tos); + + /* + * First, the top-of-stack structure. + */ + tos = (Tos*)stack; + tos->cyclefreq = m->cyclefreq; + cycles((uvlong*)&tos->pcycles); + tos->pcycles = -tos->pcycles; + tos->kcycles = tos->pcycles; + tos->clock = 0; + + /* + * Next push any arguments found from a #! header. + */ + for(i = 0; i < argc; i++){ + n = strlen(progarg[i])+1; + stack -= n; + memmove(UINT2PTR(stack), progarg[i], n); + } + + /* + * Copy the strings pointed to by the syscall argument argv into + * the temporary stack segment, being careful to check + * the strings argv points to are valid. + */ + for(i = 0;; i++, argv++){ + a = *(char**)validaddr(argv, sizeof(char**), 0); + if(a == nil) + break; + a = validaddr(a, 1, 0); + n = ((char*)vmemchr(a, 0, 0x7fffffff) - a) + 1; + + /* + * This futzing is so argv[0] gets validated even + * though it will be thrown away if this is a shell + * script. + */ + if(argc > 0 && i == 0) + continue; + /* + * Before copying the string into the temporary stack, + * which might involve a demand-page, check the string + * will not overflow the bottom of the stack. + */ + stack -= n; + if(stack < TSTKTOP-USTKSIZE) + error(Enovmem); + p = UINT2PTR(stack); + memmove(p, a, n); + p[n-1] = 0; + argc++; + } + if(argc < 1) + error(Ebadexec); + + /* + * Before pushing the argument pointers onto the temporary stack, + * which might involve a demand-page, check there is room for the + * terminating nil pointer, plus pointers, plus some slop for however + * argc might be passed on the stack by sysexecregs (give a page + * of slop, it is an overestimate, but why not). + * Sysexecstack does any architecture-dependent stack alignment. + * Keep a copy of the start of the argument strings before alignment + * so up->args can be created later. + * Although the argument vectors are being pushed onto the stack in + * the temporary segment, the values must be adjusted to reflect + * the segment address after it replaces the current SSEG. + */ + a = p = UINT2PTR(stack); + stack = sysexecstack(stack, argc); + if(stack-(argc+1)*sizeof(char**)-BIGPGSZ < TSTKTOP-USTKSIZE) + error(Ebadexec); + + argv = (char**)stack; + *--argv = nil; + for(i = 0; i < argc; i++){ + *--argv = p + (USTKTOP-TSTKTOP); + p += strlen(p) + 1; + } + + /* + * Make a good faith copy of the args in up->args using the strings + * in the temporary stack segment. The length must be > 0 as it + * includes the \0 on the last argument and argc was checked earlier + * to be > 0. After the memmove, compensate for any UTF character + * boundary before placing the terminating \0. + */ + n = p - a; + if(n <= 0) + error(Egreg); + if(n > 128) + n = 128; + + p = smalloc(n); + if(waserror()){ + free(p); + nexterror(); + } + + memmove(p, a, n); + while(n > 0 && (p[n-1] & 0xc0) == 0x80) + n--; + p[n-1] = '\0'; + + /* + * All the argument processing is now done, ready to commit. + */ + free(up->text); + up->text = elem; + elem = nil; + free(up->args); + up->args = p; + up->nargs = n; + poperror(); /* p (up->args) */ + + /* + * Close on exec + */ + f = up->fgrp; + for(i=0; i<=f->maxfd; i++) + fdclose(i, CCEXEC); + + /* + * Free old memory. + * Special segments maintained across exec. + */ + for(i = SSEG; i <= HSEG; i++) { + putseg(up->seg[i]); + up->seg[i] = nil; /* in case of error */ + } + for(i = HSEG+1; i< NSEG; i++) { + s = up->seg[i]; + if(s && (s->type&SG_CEXEC)) { + putseg(s); + up->seg[i] = nil; + } + } + + /* Text. Shared. Attaches to cache image if possible + * but prepaged if EXAC + */ + img = attachimage(SG_TEXT|SG_RONLY, chan, up->color, UTZERO, (textlim-UTZERO)/BIGPGSZ); + s = img->s; + up->seg[TSEG] = s; + s->flushme = 1; + s->fstart = 0; + s->flen = hdrsz+textsz; + if(img->color != up->color){ + up->color = img->color; + } + unlock(img); + + /* Data. Shared. */ + s = newseg(SG_DATA, textlim, (datalim-textlim)/BIGPGSZ); + up->seg[DSEG] = s; + s->color = up->color; + + /* Attached by hand */ + incref(img); + s->image = img; + s->fstart = hdrsz+textsz; + s->flen = datasz; + + /* BSS. Zero fill on demand for TS */ + up->seg[BSEG] = newseg(SG_BSS, datalim, (bsslim-datalim)/BIGPGSZ); + up->seg[BSEG]->color= up->color; + + /* + * Move the stack + */ + s = up->seg[ESEG]; + up->seg[ESEG] = nil; + up->seg[SSEG] = s; + /* the color of the stack was decided when we created it before, + * it may have nothing to do with the color of other segments. + */ + qunlock(&up->seglock); + poperror(); /* seglock */ + + s->base = USTKTOP-USTKSIZE; + s->top = USTKTOP; + relocateseg(s, USTKTOP-TSTKTOP); + + /* + * '/' processes are higher priority. + */ + if(chan->dev->dc == L'/') + up->basepri = PriRoot; + up->priority = up->basepri; + poperror(); /* chan, elem, file */ + cclose(chan); + free(file); + + /* + * At this point, the mmu contains info about the old address + * space and needs to be flushed + */ + mmuflush(); + if(up->prepagemem || flags == EXAC) + nixprepage(-1); + qlock(&up->debug); + up->nnote = 0; + up->notify = 0; + up->notified = 0; + up->privatemem = 0; + sysprocsetup(up); + qunlock(&up->debug); + if(up->hang) + up->procctl = Proc_stopme; + + ar0->v = sysexecregs(entry, TSTKTOP - PTR2UINT(argv), argc); + + if(flags == EXAC){ + up->procctl = Proc_toac; + up->prepagemem = 1; + } + + DBG("execac up %#p done\n" + "textsz %lx datasz %lx bsssz %lx hdrsz %lx\n" + "textlim %ullx datalim %ullx bsslim %ullx\n", up, + textsz, datasz, bsssz, hdrsz, textlim, datalim, bsslim); +} + +void +sysexecac(Ar0* ar0, va_list list) +{ + int flags; + char *file, **argv; + + /* + * void* execac(int flags, char* name, char* argv[]); + */ + + flags = va_arg(list, unsigned int); + file = va_arg(list, char*); + file = validaddr(file, 1, 0); + argv = va_arg(list, char**); + evenaddr(PTR2UINT(argv)); + execac(ar0, flags, file, argv); +} + +void +sysexec(Ar0* ar0, va_list list) +{ + char *file, **argv; + + /* + * void* exec(char* name, char* argv[]); + */ + file = va_arg(list, char*); + file = validaddr(file, 1, 0); + argv = va_arg(list, char**); + evenaddr(PTR2UINT(argv)); + execac(ar0, EXTC, file, argv); +} + +void +sysr1(Ar0* , va_list ) +{ + print("sysr1() called. recompile your binary\n"); +} + +void +sysnixsyscall(Ar0* , va_list ) +{ + print("nixsyscall() called. recompile your binary\n"); +} + +int +return0(void*) +{ + return 0; +} + +void +syssleep(Ar0* ar0, va_list list) +{ + long ms; + + /* + * int sleep(long millisecs); + */ + ms = va_arg(list, long); + + ar0->i = 0; + if(ms <= 0) { + if (up->edf && (up->edf->flags & Admitted)) + edfyield(); + else + yield(); + return; + } + if(ms < TK2MS(1)) + ms = TK2MS(1); + tsleep(&up->sleep, return0, 0, ms); +} + +void +sysalarm(Ar0* ar0, va_list list) +{ + unsigned long ms; + + /* + * long alarm(unsigned long millisecs); + * Odd argument type... + */ + ms = va_arg(list, unsigned long); + + ar0->l = procalarm(ms); +} + +void +sysexits(Ar0*, va_list list) +{ + char *status; + char *inval = "invalid exit string"; + char buf[ERRMAX]; + + /* + * void exits(char *msg); + */ + status = va_arg(list, char*); + + if(status){ + if(waserror()) + status = inval; + else{ + status = validaddr(status, 1, 0); + if(vmemchr(status, 0, ERRMAX) == 0){ + memmove(buf, status, ERRMAX); + buf[ERRMAX-1] = 0; + status = buf; + } + poperror(); + } + + } + pexit(status, 1); +} + +void +sys_wait(Ar0* ar0, va_list list) +{ + int pid; + Waitmsg w; + OWaitmsg *ow; + + /* + * int wait(Waitmsg* w); + * + * Deprecated; backwards compatibility only. + */ + ow = va_arg(list, OWaitmsg*); + if(ow == nil){ + ar0->i = pwait(nil); + return; + } + + ow = validaddr(ow, sizeof(OWaitmsg), 1); + evenaddr(PTR2UINT(ow)); + pid = pwait(&w); + if(pid >= 0){ + readnum(0, ow->pid, NUMSIZE, w.pid, NUMSIZE); + readnum(0, ow->time+TUser*NUMSIZE, NUMSIZE, w.time[TUser], NUMSIZE); + readnum(0, ow->time+TSys*NUMSIZE, NUMSIZE, w.time[TSys], NUMSIZE); + readnum(0, ow->time+TReal*NUMSIZE, NUMSIZE, w.time[TReal], NUMSIZE); + strncpy(ow->msg, w.msg, sizeof(ow->msg)); + ow->msg[sizeof(ow->msg)-1] = '\0'; + } + + ar0->i = pid; +} + +void +sysawait(Ar0* ar0, va_list list) +{ + int i; + int pid; + Waitmsg w; + usize n; + char *p; + + /* + * int await(char* s, int n); + * should really be + * usize await(char* s, usize n); + */ + p = va_arg(list, char*); + n = va_arg(list, long); + p = validaddr(p, n, 1); + + pid = pwait(&w); + if(pid < 0){ + ar0->i = -1; + return; + } + i = snprint(p, n, "%d %lud %lud %lud %q", + w.pid, + w.time[TUser], w.time[TSys], w.time[TReal], + w.msg); + + ar0->i = i; +} + +void +werrstr(char *fmt, ...) +{ + va_list va; + + if(up == nil) + return; + + va_start(va, fmt); + vseprint(up->syserrstr, up->syserrstr+ERRMAX, fmt, va); + va_end(va); +} + +static void +generrstr(char *buf, long n) +{ + char *p, tmp[ERRMAX]; + + if(n <= 0) + error(Ebadarg); + p = validaddr(buf, n, 1); + if(n > sizeof tmp) + n = sizeof tmp; + memmove(tmp, p, n); + + /* make sure it's NUL-terminated */ + tmp[n-1] = '\0'; + memmove(p, up->syserrstr, n); + p[n-1] = '\0'; + memmove(up->syserrstr, tmp, n); +} + +void +syserrstr(Ar0* ar0, va_list list) +{ + char *err; + usize nerr; + + /* + * int errstr(char* err, uint nerr); + * should really be + * usize errstr(char* err, usize nerr); + * but errstr always returns 0. + */ + err = va_arg(list, char*); + nerr = va_arg(list, usize); + generrstr(err, nerr); + + ar0->i = 0; +} + +void +sys_errstr(Ar0* ar0, va_list list) +{ + char *p; + + /* + * int errstr(char* err); + * + * Deprecated; backwards compatibility only. + */ + p = va_arg(list, char*); + generrstr(p, 64); + + ar0->i = 0; +} + +void +sysnotify(Ar0* ar0, va_list list) +{ + void (*f)(void*, char*); + + /* + * int notify(void (*f)(void*, char*)); + */ + f = (void (*)(void*, char*))va_arg(list, void*); + + if(f != nil) + validaddr(f, sizeof(void (*)(void*, char*)), 0); + up->notify = f; + + ar0->i = 0; +} + +void +sysnoted(Ar0* ar0, va_list list) +{ + int v; + + /* + * int noted(int v); + */ + v = va_arg(list, int); + + if(v != NRSTR && !up->notified) + error(Egreg); + + ar0->i = 0; +} + +void +sysrendezvous(Ar0* ar0, va_list list) +{ + Proc *p, **l; + uintptr tag, val; + + /* + * void* rendezvous(void*, void*); + */ + tag = PTR2UINT(va_arg(list, void*)); + + l = &REND(up->rgrp, tag); + up->rendval = ~0; + + lock(up->rgrp); + for(p = *l; p; p = p->rendhash) { + if(p->rendtag == tag) { + *l = p->rendhash; + val = p->rendval; + p->rendval = PTR2UINT(va_arg(list, void*)); + + while(p->mach != 0) + ; + ready(p); + unlock(up->rgrp); + + ar0->v = UINT2PTR(val); + return; + } + l = &p->rendhash; + } + + /* Going to sleep here */ + up->rendtag = tag; + up->rendval = PTR2UINT(va_arg(list, void*)); + up->rendhash = *l; + *l = up; + up->state = Rendezvous; + if(up->trace) + proctrace(up, SLock, 0); + unlock(up->rgrp); + + sched(); + + ar0->v = UINT2PTR(up->rendval); +} + +/* + * The implementation of semaphores is complicated by needing + * to avoid rescheduling in syssemrelease, so that it is safe + * to call from real-time processes. This means syssemrelease + * cannot acquire any qlocks, only spin locks. + * + * Semacquire and semrelease must both manipulate the semaphore + * wait list. Lock-free linked lists only exist in theory, not + * in practice, so the wait list is protected by a spin lock. + * + * The semaphore value *addr is stored in user memory, so it + * cannot be read or written while holding spin locks. + * + * Thus, we can access the list only when holding the lock, and + * we can access the semaphore only when not holding the lock. + * This makes things interesting. Note that sleep's condition function + * is called while holding two locks - r and up->rlock - so it cannot + * access the semaphore value either. + * + * An acquirer announces its intention to try for the semaphore + * by putting a Sema structure onto the wait list and then + * setting Sema.waiting. After one last check of semaphore, + * the acquirer sleeps until Sema.waiting==0. A releaser of n + * must wake up n acquirers who have Sema.waiting set. It does + * this by clearing Sema.waiting and then calling wakeup. + * + * There are three interesting races here. + + * The first is that in this particular sleep/wakeup usage, a single + * wakeup can rouse a process from two consecutive sleeps! + * The ordering is: + * + * (a) set Sema.waiting = 1 + * (a) call sleep + * (b) set Sema.waiting = 0 + * (a) check Sema.waiting inside sleep, return w/o sleeping + * (a) try for semaphore, fail + * (a) set Sema.waiting = 1 + * (a) call sleep + * (b) call wakeup(a) + * (a) wake up again + * + * This is okay - semacquire will just go around the loop + * again. It does mean that at the top of the for(;;) loop in + * semacquire, phore.waiting might already be set to 1. + * + * The second is that a releaser might wake an acquirer who is + * interrupted before he can acquire the lock. Since + * release(n) issues only n wakeup calls -- only n can be used + * anyway -- if the interrupted process is not going to use his + * wakeup call he must pass it on to another acquirer. + * + * The third race is similar to the second but more subtle. An + * acquirer sets waiting=1 and then does a final canacquire() + * before going to sleep. The opposite order would result in + * missing wakeups that happen between canacquire and + * waiting=1. (In fact, the whole point of Sema.waiting is to + * avoid missing wakeups between canacquire() and sleep().) But + * there can be spurious wakeups between a successful + * canacquire() and the following semdequeue(). This wakeup is + * not useful to the acquirer, since he has already acquired + * the semaphore. Like in the previous case, though, the + * acquirer must pass the wakeup call along. + * + * This is all rather subtle. The code below has been verified + * with the spin model /sys/src/9/port/semaphore.p. The + * original code anticipated the second race but not the first + * or third, which were caught only with spin. The first race + * is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it. + * It was lucky that my abstract model of sleep/wakeup still managed + * to preserve that behavior. + * + * I remain slightly concerned about memory coherence + * outside of locks. The spin model does not take + * queued processor writes into account so we have to + * think hard. The only variables accessed outside locks + * are the semaphore value itself and the boolean flag + * Sema.waiting. The value is only accessed with CAS, + * whose job description includes doing the right thing as + * far as memory coherence across processors. That leaves + * Sema.waiting. To handle it, we call coherence() before each + * read and after each write. - rsc + */ + +/* Add semaphore p with addr a to list in seg. */ +static void +semqueue(Segment* s, int* addr, Sema* p) +{ + memset(p, 0, sizeof *p); + p->addr = addr; + + lock(&s->sema); /* uses s->sema.Rendez.Lock, but no one else is */ + p->next = &s->sema; + p->prev = s->sema.prev; + p->next->prev = p; + p->prev->next = p; + unlock(&s->sema); +} + +/* Remove semaphore p from list in seg. */ +static void +semdequeue(Segment* s, Sema* p) +{ + lock(&s->sema); + p->next->prev = p->prev; + p->prev->next = p->next; + unlock(&s->sema); +} + +/* Wake up n waiters with addr on list in seg. */ +static void +semwakeup(Segment* s, int* addr, int n) +{ + Sema *p; + + lock(&s->sema); + for(p = s->sema.next; p != &s->sema && n > 0; p = p->next){ + if(p->addr == addr && p->waiting){ + p->waiting = 0; + coherence(); + wakeup(p); + n--; + } + } + unlock(&s->sema); +} + +/* Add delta to semaphore and wake up waiters as appropriate. */ +static int +semrelease(Segment* s, int* addr, int delta) +{ + int value; + + do + value = *addr; + while(!CASW(addr, value, value+delta)); + semwakeup(s, addr, delta); + + return value+delta; +} + +/* Try to acquire semaphore using compare-and-swap */ +static int +canacquire(int* addr) +{ + int value; + + while((value = *addr) > 0){ + if(CASW(addr, value, value-1)) + return 1; + } + + return 0; +} + +/* Should we wake up? */ +static int +semawoke(void* p) +{ + coherence(); + return !((Sema*)p)->waiting; +} + +/* Acquire semaphore (subtract 1). */ +static int +semacquire(Segment* s, int* addr, int block) +{ + int acquired; + Sema phore; + + if(canacquire(addr)) + return 1; + if(!block) + return 0; + + acquired = 0; + semqueue(s, addr, &phore); + for(;;){ + phore.waiting = 1; + coherence(); + if(canacquire(addr)){ + acquired = 1; + break; + } + if(waserror()) + break; + sleep(&phore, semawoke, &phore); + poperror(); + } + semdequeue(s, &phore); + coherence(); /* not strictly necessary due to lock in semdequeue */ + if(!phore.waiting) + semwakeup(s, addr, 1); + if(!acquired) + nexterror(); + + return 1; +} + +/* Acquire semaphore or time-out */ +static int +tsemacquire(Segment* s, int* addr, long ms) +{ + int acquired; + ulong t; + Sema phore; + + if(canacquire(addr)) + return 1; + if(ms == 0) + return 0; + + acquired = 0; + semqueue(s, addr, &phore); + for(;;){ + phore.waiting = 1; + coherence(); + if(canacquire(addr)){ + acquired = 1; + break; + } + if(waserror()) + break; + t = sys->ticks; + tsleep(&phore, semawoke, &phore, ms); + ms -= TK2MS(sys->ticks-t); + poperror(); + if(ms <= 0) + break; + } + semdequeue(s, &phore); + coherence(); /* not strictly necessary due to lock in semdequeue */ + if(!phore.waiting) + semwakeup(s, addr, 1); + if(ms <= 0) + return 0; + if(!acquired) + nexterror(); + return 1; +} + +void +syssemacquire(Ar0* ar0, va_list list) +{ + Segment *s; + int *addr, block; + + /* + * int semacquire(long* addr, int block); + * should be (and will be implemented below as) perhaps + * int semacquire(int* addr, int block); + */ + addr = va_arg(list, int*); + addr = validaddr(addr, sizeof(int), 1); + evenaddr(PTR2UINT(addr)); + block = va_arg(list, int); + + if((s = seg(up, PTR2UINT(addr), 0)) == nil) + error(Ebadarg); + if(*addr < 0) + error(Ebadarg); + + ar0->i = semacquire(s, addr, block); +} + +void +systsemacquire(Ar0* ar0, va_list list) +{ + Segment *s; + int *addr, ms; + + /* + * int tsemacquire(long* addr, ulong ms); + * should be (and will be implemented below as) perhaps + * int tsemacquire(int* addr, ulong ms); + */ + addr = va_arg(list, int*); + addr = validaddr(addr, sizeof(int), 1); + evenaddr(PTR2UINT(addr)); + ms = va_arg(list, ulong); + + if((s = seg(up, PTR2UINT(addr), 0)) == nil) + error(Ebadarg); + if(*addr < 0) + error(Ebadarg); + + ar0->i = tsemacquire(s, addr, ms); +} + +void +syssemrelease(Ar0* ar0, va_list list) +{ + Segment *s; + int *addr, delta; + + /* + * long semrelease(long* addr, long count); + * should be (and will be implemented below as) perhaps + * int semrelease(int* addr, int count); + */ + addr = va_arg(list, int*); + addr = validaddr(addr, sizeof(int), 1); + evenaddr(PTR2UINT(addr)); + delta = va_arg(list, int); + + if((s = seg(up, PTR2UINT(addr), 0)) == nil) + error(Ebadarg); + if(delta < 0 || *addr < 0) + error(Ebadarg); + + ar0->i = semrelease(s, addr, delta); +} diff -Nru 0/sys/src/nix/port/sysseg.c 4/sys/src/nix/port/sysseg.c --- 0/sys/src/nix/port/sysseg.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/sysseg.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,483 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +Segment* (*_globalsegattach)(Proc*, char*); + +static Lock physseglock; + +int +addphysseg(Physseg* new) +{ + Physseg *ps; + + /* + * Check not already entered and there is room + * for a new entry and the terminating null entry. + */ + lock(&physseglock); + for(ps = physseg; ps->name; ps++){ + if(strcmp(ps->name, new->name) == 0){ + unlock(&physseglock); + return -1; + } + } + if(ps-physseg >= nphysseg-2){ + unlock(&physseglock); + return -1; + } + + if(new->pgszi < 0) + new->pgszi = getpgszi(2*MiB); /* 2M pages by default */ + if(new->pgszi < 0) + panic("addphysseg"); + *ps = *new; + unlock(&physseglock); + + return 0; +} + +int +isphysseg(char *name) +{ + int rv; + Physseg *ps; + + lock(&physseglock); + rv = 0; + for(ps = physseg; ps->name; ps++){ + if(strcmp(ps->name, name) == 0){ + rv = 1; + break; + } + } + unlock(&physseglock); + return rv; +} + +/* Needs to be non-static for BGP support */ +uintptr +ibrk(uintptr addr, int seg) +{ + Segment *s, *ns; + uintptr newtop, rtop; + long newsize; + int i, mapsize; + Pte **map; + uintmem pgsz; + + s = up->seg[seg]; + if(s == 0) + error(Ebadarg); + + if(addr == 0) + return s->top; + + qlock(&s->lk); + if(waserror()) { + qunlock(&s->lk); + nexterror(); + } + + /* We may start with the bss overlapping the data */ + if(addr < s->base) { + if(seg != BSEG || up->seg[DSEG] == 0 || addr < up->seg[DSEG]->base) + error(Enovmem); + addr = s->base; + } + + pgsz = m->pgsz[s->pgszi]; + if(seg == BSEG && addr >= ROUNDUP(s->top, 1*GiB) + 1*GiB) + newtop = ROUNDUP(addr, 1*GiB); + else + newtop = ROUNDUP(addr, pgsz); + newsize = (newtop-s->base)/pgsz; + if(newtop < s->top) { + mfreeseg(s, newtop, (s->top-newtop)/pgsz); + s->top = newtop; + s->size = newsize; + poperror(); + qunlock(&s->lk); + mmuflush(); + return newtop; + } + if(newsize > (SEGMAPSIZE*s->ptepertab)) + error(Enovmem); + + for(i = 0; i < NSEG; i++) { + ns = up->seg[i]; + if(ns == 0 || ns == s) + continue; + if(newtop >= ns->base && newtop < ns->top) + error(Esoverlap); + } + + if(seg == BSEG && newtop >= ROUNDUP(s->top, 1*GiB) + 1*GiB){ + DBG("segment using 1G pages\n"); + /* + * brk the bss up to the 1G boundary, and create + * a segment placed at that boundary, using 1G pages if it can. + * This is both back compatible, transparent, + * and permits using 1G pages. + */ + rtop = ROUNDUP(newtop,1*GiB); + newtop = ROUNDUP(s->top, 1*GiB); + newsize -= (rtop-newtop)/BIGPGSZ; +assert(newsize >= 0); + DBG("ibrk: newseg %#ullx %ullx\n", newtop, (rtop-newtop)/BIGPGSZ); + ns = newseg(SG_BSS, newtop, (rtop-newtop)/BIGPGSZ); + ns->color= s->color; + up->seg[HSEG] = ns; + DBG("ibrk: newtop %#ullx newsize %#ulx \n", newtop, newsize); + /* now extend the bss up to newtop */ + }else + rtop = newtop; + + + mapsize = HOWMANY(newsize, s->ptepertab); + if(mapsize > s->mapsize){ + map = smalloc(mapsize*sizeof(Pte*)); + memmove(map, s->map, s->mapsize*sizeof(Pte*)); + if(s->map != s->ssegmap) + free(s->map); + s->map = map; + s->mapsize = mapsize; + } + + s->top = newtop; + s->size = newsize; + poperror(); + qunlock(&s->lk); + + return rtop; +} + +void +syssegbrk(Ar0* ar0, va_list list) +{ + int i; + uintptr addr; + Segment *s; + + /* + * int segbrk(void*, void*); + * should be + * void* segbrk(void* saddr, void* addr); + */ + addr = PTR2UINT(va_arg(list, void*)); + if(addr == 0){ + if(up->seg[HSEG]) + ar0->v = UINT2PTR(up->seg[HSEG]->top); + else + ar0->v = UINT2PTR(up->seg[BSEG]->top); + return; + } + for(i = 0; i < NSEG; i++) { + s = up->seg[i]; + if(s == nil) + continue; + /* Ok to extend an empty segment */ + if(addr < s->base || addr > s->top) + continue; + if(addr == s->top && (s->base < s->top)) + continue; + switch(s->type&SG_TYPE) { + case SG_TEXT: + case SG_DATA: + case SG_STACK: + error(Ebadarg); + default: + addr = PTR2UINT(va_arg(list, void*)); + ar0->v = UINT2PTR(ibrk(addr, i)); + return; + } + } + error(Ebadarg); +} + +void +sysbrk_(Ar0* ar0, va_list list) +{ + uintptr addr; + + /* + * int brk(void*); + * + * Deprecated; should be for backwards compatibility only. + */ + addr = PTR2UINT(va_arg(list, void*)); + + ibrk(addr, BSEG); + + ar0->i = 0; +} + +static uintptr +segattach(Proc* p, int attr, char* name, uintptr va, usize len) +{ + int sno; + Segment *s, *os; + Physseg *ps; + + /* BUG: Only ok for now */ + if((va != 0 && va < UTZERO) || (va & KZERO) == KZERO) + error("virtual address in kernel"); + + vmemchr(name, 0, ~0); + + for(sno = 0; sno < NSEG; sno++) + if(p->seg[sno] == nil && sno != ESEG) + break; + + if(sno == NSEG) + error("too many segments in process"); + + /* + * first look for a global segment with the + * same name + */ + if(_globalsegattach != nil){ + s = (*_globalsegattach)(p, name); + if(s != nil){ + p->seg[sno] = s; + if(p == up && up->prepagemem) + nixprepage(sno); + return s->base; + } + } + + for(ps = physseg; ps->name != nil; ps++) + if(strcmp(name, ps->name) == 0) + break; + if(ps->name == nil) + error("segment not found"); + + if(va == 0 && ps->gva != 0){ + va = ps->gva; + if(len == 0) + len = ps->size*BIGPGSZ; + } + + if(len == 0) + error("zero length"); + + len = BIGPGROUND(len); + if(len == 0) + error("length overflow"); + + /* + * Find a hole in the address space. + * Starting at the lowest possible stack address - len, + * check for an overlapping segment, and repeat at the + * base of that segment - len until either a hole is found + * or the address space is exhausted. + */ + if(va == 0) { + va = p->seg[SSEG]->base - len; + for(;;) { + os = isoverlap(p, va, len); + if(os == nil) + break; + va = os->base; + if(len > va) + error("cannot fit segment at virtual address"); + va -= len; + } + } + + va = va&~(BIGPGSZ-1); + if(isoverlap(p, va, len) != nil) + error(Esoverlap); + + if((len/BIGPGSZ) > ps->size) + error("len > segment size"); + + attr &= ~SG_TYPE; /* Turn off what is not allowed */ + attr |= ps->attr; /* Copy in defaults */ + + s = newseg(attr, va, len/BIGPGSZ); + s->pseg = ps; + p->seg[sno] = s; + + if(p == up && up->prepagemem) + nixprepage(sno); + + return va; +} + +void +syssegattach(Ar0* ar0, va_list list) +{ + int attr; + char *name; + uintptr va; + usize len; + + /* + * long segattach(int, char*, void*, ulong); + * should be + * void* segattach(int, char*, void*, usize); + */ + attr = va_arg(list, int); + name = va_arg(list, char*); + va = PTR2UINT(va_arg(list, void*)); + len = va_arg(list, usize); + + ar0->v = UINT2PTR(segattach(up, attr, validaddr(name, 1, 0), va, len)); +} + +void +syssegdetach(Ar0* ar0, va_list list) +{ + int i; + uintptr addr; + Segment *s; + + /* + * int segdetach(void*); + */ + addr = PTR2UINT(va_arg(list, void*)); + + qlock(&up->seglock); + if(waserror()){ + qunlock(&up->seglock); + nexterror(); + } + + s = 0; + for(i = 0; i < NSEG; i++) + if(s = up->seg[i]) { + qlock(&s->lk); + if((addr >= s->base && addr < s->top) || + (s->top == s->base && addr == s->base)) + goto found; + qunlock(&s->lk); + } + + error(Ebadarg); + +found: + /* + * Can't detach the initial stack segment + * because the clock writes profiling info + * there. + */ + if(s == up->seg[SSEG]){ + qunlock(&s->lk); + error(Ebadarg); + } + up->seg[i] = 0; + qunlock(&s->lk); + putseg(s); + qunlock(&up->seglock); + poperror(); + + /* Ensure we flush any entries from the lost segment */ + mmuflush(); + + ar0->i = 0; +} + +void +syssegfree(Ar0* ar0, va_list list) +{ + Segment *s; + uintptr from, to; + usize len; + + /* + * int segfree(void*, ulong); + * should be + * int segfree(void*, usize); + */ + from = PTR2UINT(va_arg(list, void*)); + s = seg(up, from, 1); + if(s == nil) + error(Ebadarg); + len = va_arg(list, usize); + to = (from + len) & ~(BIGPGSZ-1); + if(to < from || to > s->top){ + qunlock(&s->lk); + error(Ebadarg); + } + from = BIGPGROUND(from); + + mfreeseg(s, from, (to - from) / BIGPGSZ); + qunlock(&s->lk); + mmuflush(); + + ar0->i = 0; +} + +static void +pteflush(Pte *pte, int s, int e) +{ + int i; + Page *p; + + for(i = s; i < e; i++) { + p = pte->pages[i]; + if(pagedout(p) == 0) + memset(p->cachectl, PG_TXTFLUSH, sizeof(p->cachectl)); + } +} + +void +syssegflush(Ar0* ar0, va_list list) +{ + Segment *s; + uintptr addr; + Pte *pte; + usize chunk, l, len, pe, ps; + + /* + * int segflush(void*, ulong); + * should be + * int segflush(void*, usize); + */ + addr = PTR2UINT(va_arg(list, void*)); + len = va_arg(list, usize); + + while(len > 0) { + s = seg(up, addr, 1); + if(s == nil) + error(Ebadarg); + + s->flushme = 1; + more: + l = len; + if(addr+l > s->top) + l = s->top - addr; + + ps = addr-s->base; + pte = s->map[ps/PTEMAPMEM]; + ps &= PTEMAPMEM-1; + pe = PTEMAPMEM; + if(pe-ps > l){ + pe = ps + l; + pe = (pe+BIGPGSZ-1)&~(BIGPGSZ-1); + } + if(pe == ps) { + qunlock(&s->lk); + error(Ebadarg); + } + + if(pte) + pteflush(pte, ps/BIGPGSZ, pe/BIGPGSZ); + + chunk = pe-ps; + len -= chunk; + addr += chunk; + + if(len > 0 && addr < s->top) + goto more; + + qunlock(&s->lk); + } + mmuflush(); + + ar0->i = 0; +} diff -Nru 0/sys/src/nix/port/syssem.c 4/sys/src/nix/port/syssem.c --- 0/sys/src/nix/port/syssem.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/syssem.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,528 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +/* + * Sems are characterized by two structures, Sem (user space) and + * Ksem (kernel space). A Sem includes a spin lock (userlock) to + * protect its three counters: + * + * tickets: natural, number of tokens in the semaphore. + * going: natural, number of processes that are in transit + * from the libc's dowsem to the kernel's semsleep. + * waiting: natural, estimation of the number of processes sleeping + * in the ksem, always >= than the real number of processes + * in the ksem's queue (Ksem.nq). + */ + +enum { + Semtrytimes = 100, + Locktrytimes = 10, + Maxaltsems = 300, + Lockmagic = 0xdeaddead, /* libc and kernel*/ +}; + +/* + * If the userland spinlock is corrupted, + */ +static void +_userlock(Ksem *s) +{ + int n; + int try; + n = 0; + + try = Locktrytimes; + while(TAS(&s->sem->userlock.val)){ + if(s->state == Semdead) + error(Esemdead); + if(++n % 10000 == 0){ + iprint("syssem: warning userlock busy\n"); + if(--try == 0){ + iprint("syssem: the userlock of ksem %p is dead\n", s); + s->state = Semdead; + error(Esemtimeout); + } + } + waitwhile(&s->sem->userlock.val, Lockmagic); + } + return; +} + +static void +_userunlock(Ksem *s) +{ + s->sem->userlock.val = 0; +} + +/* + * called with Ksem's lock held + */ +static Proc* +semdequeue(Ksem *s) +{ + Proc *p; + + assert(s->nq >= 0); + if(s->nq == 0) + return nil; + p = s->q[0]; + s->nq--; + if(s->nq < 0) + panic("semdequeue"); + if(s->nq != 0) + memmove(s->q, s->q + 1, s->nq * sizeof s->q[0]); + return p; +} + +/* + * called with Ksem's lock held + */ +static int +semdequeueme(Ksem *s) +{ + int i; + + assert(s->nq >= 0); + if(s->nq == 0) + return -1; + for(i = 0; i < s->nq; i++) + if(s->q[i] == up) + break; + if(i == s->nq) + return -1; + s->nq--; + if(s->nq < 0) + panic("semdequeueme"); + if(s->nq == 0) /* the queue is empty now */ + return 0; + if(s->nq == i) /* it was the last element in the queue */ + return 0; + memmove(s->q + i, s->q + i + 1, (s->nq - i) * sizeof s->q[0]); + return 0; +} + +/* + * called with Ksem's lock held + */ +static void +semqueue(Ksem *s, Proc *p) +{ + assert(s->nq >= 0); + s->q = realloc(s->q, (s->nq+1) * sizeof s->q[0]); + if(s->q == nil) + panic("semqueue: no memory"); + + s->q[s->nq] = p; + s->nq++; +} + +static void +semwakeup(Ksem *s) +{ + Proc* p; + + DBG("semwakeup up %#p sem %#p\n", up, s->sem); + + rlock(s->semaltlock); + lock(s); + + if(waserror()){ + runlock(s->semaltlock); + unlock(s); + pprint("suicide: semwakeup, userlock is dead\n"); + pexit("Suicide", 0); + } + + /* + * There can be awaken procs in the queue: + * procs performing an alt that have been already + * awaken in other sems but they have not been + * dequeued yet (by semalt). We can dequeue them. + * Note that the queue can also be empty. + */ + while(s->nq > 0){ + p = semdequeue(s); + if(p == nil) + panic("semwakeup"); + _userlock(s); + s->sem->waiting--; + _userunlock(s); + if(TAS(&p->semawaken) == 0){ + poperror(); + p->waitsem = s; + unlock(s); + runlock(s->semaltlock); + ready(p); + return; + } + } + + /* + * Is there any proc to be awaken coming from user space? + * This can happen: an upsem reaches here before the + * preceding downsem reaches semsleep. + */ + _userlock(s); + if(s->sem->going > 0){ + _userunlock(s); + poperror(); + s->nowait++; + unlock(s); + runlock(s->semaltlock); + return; + } + + /* + * The queue is empty and no one is coming: this upsem() + * was fooled by a semalt. + * It has to generate a real ticket to compensate. + */ + s->sem->tickets++; + _userunlock(s); + poperror(); + unlock(s); + runlock(s->semaltlock); +} + +static void +semsleep(Ksem *s) +{ + DBG("semsleep up %#p sem %#p\n", up, s->sem); + + rlock(s->semaltlock); + lock(s); + + if(waserror()){ + unlock(s); + pprint("suicide: semsleep, userlock is dead\n"); + pexit("Suicide", 0); + } + _userlock(s); + s->sem->waiting++; + s->sem->going--; + _userunlock(s); + poperror(); + if(s->nowait > 0){ + s->nowait--; + unlock(s); + runlock(s->semaltlock); + return; + } + up->semawaken = 0; + up->waitsem = nil; + semqueue(s, up); + up->state = Semdown; + unlock(s); + runlock(s->semaltlock); + sched(); + + lock(s); + if(up->waitsem == nil){ + /* + * Nobody did awake us, we are probably being + * killed; we no longer want a ticket. + */ + semdequeueme(s); + if(waserror()){ + unlock(s); + pprint("suicide: semsleep, userlock is dead\n"); + pexit("Suicide", 0); + } + _userlock(s); + s->sem->waiting--; + _userunlock(s); + unlock(s); + poperror(); + error(Edownint); + } + unlock(s); +} + +void +syssemsleep(Ar0*, va_list list) +{ + Ksem *s; + Segment *sg; + Sem *ns; + + /* + * void semsleep(Sem*, int, int); + */ + ns = va_arg(list, Sem*); + ns = validaddr(ns, sizeof *ns, 1); + evenaddr(PTR2UINT(ns)); + if((sg = seg(up, PTR2UINT(ns), 0)) == nil) + error(Ebadarg); + s = segmksem(sg, ns); + semsleep(s); +} + +void +syssemwakeup(Ar0*, va_list list) +{ + Ksem *s; + Segment *sg; + Sem *ns; + + /* + * void semwakeup(Sem*, int); + */ + ns = va_arg(list, Sem*); + ns = validaddr(ns, sizeof *ns, 1); + evenaddr(PTR2UINT(ns)); + if((sg = seg(up, PTR2UINT(ns), 0)) == nil) + error(Ebadarg); + s = segmksem(sg, ns); + semwakeup(s); +} + +static int nextindex; +/* + * Alt makes its best efford to get a token from any sem in the array. + * It ignores the dead sems and only crashes if all sems are dead. + * Nextstart is used to prevent starvation. + */ +static int +semalt(Ksem *ss[], int n) +{ + int i, j, r; + Ksem *s; + RWlock *rwl; + int queued; + ulong from; + + if(n < 1) + error(Ebadarg); + + from = (ulong)semainc(&nextindex) % n; + + /* + * While searching an available sem, the proc + * should not be awaken in a previously processed sem. + * The segment's rwlock (semaltlock) prevents this. + */ + rwl = ss[0]->semaltlock; + wlock(rwl); + up->waitsem = nil; + up->semawaken = 0; + queued = 0; + + for(i = 0; i < n; i++){ + s = ss[(from+i)%n]; + /* + * if the sem is dead, ignore it and keep searching + */ + if(waserror()) + continue; + _userlock(s); + if(s->sem->tickets > 0){ + s->sem->tickets--; + _userunlock(s); + poperror(); + up->waitsem = s; + up->semawaken = 1; + wunlock(rwl); + goto Done; + } + s->sem->waiting++; + _userunlock(s); + poperror(); + /* + * Note that other proc could call semdequeme from this + * fuction (see bellow), so holding the rwlock is not + * sufficient to protect the queue: we need to hold the + * sem's lock. + */ + lock(s); + semqueue(s, up); + unlock(s); + queued++; + } + if(queued == 0){ + pprint("suicide: semalt, all the sems are dead\n"); + wunlock(rwl); + pexit("Suicide", 0); + } + up->state = Semalt; + wunlock(rwl); + sched(); + wlock(rwl); + if(up->waitsem == nil){ + /* + * We are probably being killed. + */ + for(i = 0; i < n; i++){ + s = ss[i]; + lock(s); + /* + * up->waitsem was nil, but there can be dead sems, + * so semdequeueme could return -1. + */ + if(semdequeueme(s) < 0){ + unlock(s); + continue; + } + unlock(s); + if(! waserror()){ + _userlock(s); + s->sem->waiting--; + _userunlock(s); + poperror(); + } + } + wunlock(rwl); + error(Esemaltint); + } + wunlock(rwl); +Done: + DBG("semalt up %#p awaken\n", up); + r = -1; + for(j = 0; j < n; j++){ + s = ss[j]; + if(s == up->waitsem) + r = j; + else{ + /* + * Cancel the reservation for the sem. + * Note that the sem could be already + * dequeued by semwakeup or never + * queued in this sem. + */ + lock(s); + if(semdequeueme(s) == 0){ + if(! waserror()){ + _userlock(s); + s->sem->waiting--; + _userunlock(s); + poperror(); + } + } + unlock(s); + } + } + if(r == -1) + panic("semalt"); + return r; +} + +void +syssemalt(Ar0 *ar0, va_list list) +{ + Sem **sl; + Sem *ns; + + int i, j, nums; + Segment *sg; + Ksem *ksl[Maxaltsems]; + + /* + * void semalt(Sem*[], int); + */ + ar0->i = -1; + sl = va_arg(list, Sem**); + nums = va_arg(list, int); + sl = validaddr(sl, nums * sizeof *ns, 1); + if(nums > nelem(ksl)) + error(Etoomanysems); + for(i = 0; i < nums; i++){ + ns = sl[i]; + ns = validaddr(ns, sizeof(Sem), 1); + evenaddr(PTR2UINT(ns)); + /* + * Are there duplicated sems in the array? + */ + for(j = 0; j < nums; j++) + if(i != j && ns == sl[j]) + error(Ebadargalt); + + if((sg = seg(up, PTR2UINT(ns), 0)) == nil) + error(Ebadarg); + ksl[i] = segmksem(sg, ns); + } + ar0->i = semalt(ksl, nums); +} + +/* + * Kernel version of the libc's upsem. + * It must be called in the context of a process + */ +void +upsem(Ksem *s) +{ + if(waserror()){ + pprint("suicide: upsem, userlock is dead\n"); + pexit("Suicide", 0); + } + _userlock(s); + if(s->sem->tickets == 0 && (s->sem->waiting > 0 || s->sem->going > 0)){ + _userunlock(s); + poperror(); + semwakeup(s); + return; + } + s->sem->tickets++; + _userunlock(s); + poperror(); + return; +} + +/* + * Kernel version of the libc's downsem. + * It must be called in the context of a process. + * Returns 0 if it is non-blocking and there are no tickets. + * Returns 1 if it got a ticket. + */ +int +downsem(Ksem *s, int block) +{ + if(waserror()){ + pprint("suicide: downsem, userlock is dead\n"); + pexit("Suicide", 0); + } + _userlock(s); + if(! block && s->sem->tickets == 0){ + _userunlock(s); + poperror(); + return 0; + } + if(s->sem->tickets == 0){ + s->sem->going++; + _userunlock(s); + poperror(); + semsleep(s); + return 1; + } + s->sem->tickets--; + _userunlock(s); + poperror(); + return 1; +} + +int +altsems(Ksem *ss[], int n) +{ + int i, w; + ulong p; + + p = (ulong) ainc(&nextindex); + + i = 0; + /* busy wait */ + for(w = 0; w < Semtrytimes; w++){ + for(i = 0; i < n; i++) + if(ss[(p+i)%n]->sem->tickets > 0) + break; + if(i < n) + break; + } + p = (p+i)%n; + for(i = 0; i < n; i++) + if(downsem(ss[(p+i)%n], 0) == 1) + return (p+i)%n; + + return semalt(ss, n); +} diff -Nru 0/sys/src/nix/port/systab.c 4/sys/src/nix/port/systab.c --- 0/sys/src/nix/port/systab.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/systab.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,134 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "/sys/src/libc/9syscall/sys.h" + +extern void sysr1(Ar0*, va_list); +extern void sys_errstr(Ar0*, va_list); +extern void sysbind(Ar0*, va_list); +extern void syschdir(Ar0*, va_list); +extern void sysclose(Ar0*, va_list); +extern void sysdup(Ar0*, va_list); +extern void sysalarm(Ar0*, va_list); +extern void sysexec(Ar0*, va_list); +extern void sysexits(Ar0*, va_list); +extern void sys_fsession(Ar0*, va_list); +extern void sysfauth(Ar0*, va_list); +extern void sys_fstat(Ar0*, va_list); +extern void syssegbrk(Ar0*, va_list); +extern void sys_mount(Ar0*, va_list); +extern void sysopen(Ar0*, va_list); +extern void sys_read(Ar0*, va_list); +extern void sysoseek(Ar0*, va_list); +extern void syssleep(Ar0*, va_list); +extern void sys_stat(Ar0*, va_list); +extern void sysrfork(Ar0*, va_list); +extern void sys_write(Ar0*, va_list); +extern void syspipe(Ar0*, va_list); +extern void syscreate(Ar0*, va_list); +extern void sysfd2path(Ar0*, va_list); +extern void sysbrk_(Ar0*, va_list); +extern void sysremove(Ar0*, va_list); +extern void sys_wstat(Ar0*, va_list); +extern void sys_fwstat(Ar0*, va_list); +extern void sysnotify(Ar0*, va_list); +extern void sysnoted(Ar0*, va_list); +extern void syssegattach(Ar0*, va_list); +extern void syssegdetach(Ar0*, va_list); +extern void syssegfree(Ar0*, va_list); +extern void syssegflush(Ar0*, va_list); +extern void sysrendezvous(Ar0*, va_list); +extern void sysunmount(Ar0*, va_list); +extern void sys_wait(Ar0*, va_list); +extern void syssemacquire(Ar0*, va_list); +extern void syssemrelease(Ar0*, va_list); +extern void sysseek(Ar0*, va_list); +extern void sysfversion(Ar0*, va_list); +extern void syserrstr(Ar0*, va_list); +extern void sysstat(Ar0*, va_list); +extern void sysfstat(Ar0*, va_list); +extern void syswstat(Ar0*, va_list); +extern void sysfwstat(Ar0*, va_list); +extern void sysmount(Ar0*, va_list); +extern void sysawait(Ar0*, va_list); +extern void syspread(Ar0*, va_list); +extern void syspwrite(Ar0*, va_list); +extern void systsemacquire(Ar0*, va_list); +extern void syssemsleep(Ar0*, va_list); +extern void syssemwakeup(Ar0*, va_list); +extern void syssemalt(Ar0*, va_list); +extern void sysexecac(Ar0*, va_list); +extern void sysnixsyscall(Ar0*, va_list); +extern void sysziopread(Ar0*, va_list); +extern void sysziopwrite(Ar0*, va_list); +extern void sysziofree(Ar0*, va_list); +struct { + char* n; + void (*f)(Ar0*, va_list); + Ar0 r; +} systab[] = { + [SYSR1] { "Sysr1", sysr1, { .i = -1 } }, + [_ERRSTR] { "_errstr", sys_errstr, { .i = -1 } }, + [BIND] { "Bind", sysbind, { .i = -1 } }, + [CHDIR] { "Chdir", syschdir, { .i = -1 } }, + [CLOSE] { "Close", sysclose, { .i = -1 } }, + [DUP] { "Dup", sysdup, { .i = -1 } }, + [ALARM] { "Alarm", sysalarm, { .l = -1 } }, + [EXEC] { "Exec", sysexec, { .v = (void*)-1 } }, + [EXITS] { "Exits", sysexits, { .i = -1 } }, + [_FSESSION] { "_fsession", sys_fsession, { .i = -1 } }, + [FAUTH] { "Fauth", sysfauth, { .i = -1 } }, + [_FSTAT] { "_fstat", sys_fstat, { .i = -1 } }, + [SEGBRK] { "Segbrk", syssegbrk, { .v = (void*)-1 } }, + [_MOUNT] { "_mount", sys_mount, { .i = -1 } }, + [OPEN] { "Open", sysopen, { .i = -1 } }, + [_READ] { "_read", sys_read, { .l = -1 } }, + [OSEEK] { "Oseek", sysoseek, { .i = -1 } }, + [SLEEP] { "Sleep", syssleep, { .i = -1 } }, + [_STAT] { "_stat", sys_stat, { .i = -1 } }, + [RFORK] { "Rfork", sysrfork, { .i = -1 } }, + [_WRITE] { "_write", sys_write, { .l = -1 } }, + [PIPE] { "Pipe", syspipe, { .i = -1 } }, + [CREATE] { "Create", syscreate, { .i = -1 } }, + [FD2PATH] { "Fd2path", sysfd2path, { .i = -1 } }, + [BRK_] { "Brk", sysbrk_, { .i = -1 } }, + [REMOVE] { "Remove", sysremove, { .i = -1 } }, + [_WSTAT] { "_wstat", sys_wstat, { .i = -1 } }, + [_FWSTAT] { "_fwstat", sys_fwstat, { .i = -1 } }, + [NOTIFY] { "Notify", sysnotify, { .i = -1 } }, + [NOTED] { "Noted", sysnoted, { .i = -1 } }, + [SEGATTACH] { "Segattach", syssegattach, { .v = (void*)-1 } }, + [SEGDETACH] { "Segdetach", syssegdetach, { .i = -1 } }, + [SEGFREE] { "Segfree", syssegfree, { .i = -1 } }, + [SEGFLUSH] { "Segflush", syssegflush, { .i = -1 } }, + [RENDEZVOUS] { "Rendez", sysrendezvous, { .v = (void*)-1 } }, + [UNMOUNT] { "Unmount", sysunmount, { .i = -1 } }, + [_WAIT] { "_wait", sys_wait, { .i = -1 } }, + [SEMACQUIRE] { "Semacquire", syssemacquire, { .i = -1 } }, + [SEMRELEASE] { "Semrelease", syssemrelease, { .i = -1 } }, + [SEEK] { "Seek", sysseek, { .i = -1 } }, + [FVERSION] { "Fversion", sysfversion, { .i = -1 } }, + [ERRSTR] { "Errstr", syserrstr, { .i = -1 } }, + [STAT] { "Stat", sysstat, { .i = -1 } }, + [FSTAT] { "Fstat", sysfstat, { .i = -1 } }, + [WSTAT] { "Wstat", syswstat, { .i = -1 } }, + [FWSTAT] { "Fwstat", sysfwstat, { .i = -1 } }, + [MOUNT] { "Mount", sysmount, { .i = -1 } }, + [AWAIT] { "Await", sysawait, { .i = -1 } }, + [PREAD] { "Pread", syspread, { .l = -1 } }, + [PWRITE] { "Pwrite", syspwrite, { .l = -1 } }, + [TSEMACQUIRE] { "Tsemacquire", systsemacquire, { .i = -1 } }, + [SEMSLEEP] { "Semsleep", syssemsleep, { .i = -1 } }, + [SEMWAKEUP] { "Semwakeup", syssemwakeup, { .i = -1 } }, + [SEMALT] { "Semalt", syssemalt, { .i = -1 } }, + [EXECAC] { "Execac", sysexecac, { .v = (void*)-1 } }, + [NIXSYSCALL] { "Nixsyscall", sysnixsyscall, { .i = -1 } }, + [ZIOPREAD] { "Ziopread", sysziopread, { .l = -1 } }, + [ZIOPWRITE] { "Ziopwrite", sysziopwrite, { .l = -1 } }, + [ZIOFREE] { "Ziofree", sysziofree, { .i = -1 } }, +}; + +int nsyscall = nelem(systab); diff -Nru 0/sys/src/nix/port/syszio.c 4/sys/src/nix/port/syszio.c --- 0/sys/src/nix/port/syszio.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/syszio.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,636 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +/* + * Experiment on zero-copy + * + * Each address in a Zio slot implies a reference + * counter for that buffer. Provided the address, + * we must be able to get to the counter. + * We can use shared segments with fixed message sizes per + * segment, so we can do arithmetic to locate the counter. + * We could also use per-page reference counters, and perhaps + * accept any user pointer. + * If the kernel supplies the buffers, it must allocate them + * from a place available for the user, perhaps a heap segment + * or something like that. + */ + +enum +{ + Maxatomic = 64*KiB +}; + +typedef struct ZMap ZMap; +typedef struct Map Map; + +struct Map { + Map* next; + int free; + uintptr addr; + uvlong size; +}; + +struct ZMap { + Map* map; + Lock; +}; + +static int inited; + +static void zmapfree(ZMap* rmap, uintptr addr); +static uintptr zmapalloc(ZMap* rmap, usize size); + +static void +zioinit(void) +{ + if(inited) + return; + inited++; + fmtinstall('Z', ziofmt); +} + +int +ziofmt(Fmt *f) +{ + Kzio *io; + + io = va_arg(f->args, Kzio*); + return fmtprint(f, "%#p[%#ulx]", io->data, io->size); +} + +static void +dumpzmap(ZMap *map) +{ + Map *mp; + for(mp = map->map; mp != nil; mp = mp->next) + print("\tmap %#ullx[%#ullx] %c\n", mp->addr, mp->size, + mp->free ? 'f' : 'a'); +} + +/* + * No locks! + */ +void +dumpzseg(Segment *s) +{ + Zseg *zs; + ZMap *map; + int i; + + if(DBGFLG == 0) + return; + + zs = &s->zseg; + print("zseg %#ullx type %#ux map %#p naddr %d end %d\n", + s->base, s->type, zs->map, zs->naddr, zs->end); + if(zs->addr != nil) + for(i = 0; i < zs->end; i++) + print("\taddr %#ullx\n", zs->addr[i]); + map = zs->map; + if(map == nil) + return; + dumpzmap(map); +} + +/* + * Called from putseg, when the segment is being destroyed. + */ +void +freezseg(Segment *s) +{ + Zseg *zs; + ZMap *zp; + Map *mp; + + DBG("freezseg: "); + dumpzseg(s); + zs = &s->zseg; + zp = zs->map; + if(zp == nil) + return; + while(zp->map != nil){ + mp = zp->map; + zp->map = mp->next; + free(mp); + } + free(zp); +} + +/* + * Grow the pool of addresses in s's zseg, s is qlocked + */ +void +zgrow(Segment *s) +{ + enum{Incr = 32}; + Zseg *zs; + + zioinit(); + zs = &s->zseg; + zs->naddr += Incr; + zs->addr = realloc(zs->addr, zs->naddr*sizeof(uintptr)); + if(zs->addr == nil) + panic("zgrow: no memory"); +} + +/* + * Find an address in s's zseg; s is qlocked + */ +uintptr +zgetaddr(Segment *s) +{ + Zseg *zs; + uintptr va; + + zs = &s->zseg; + if(zs->end == 0) + return 0ULL; + va = zs->addr[0]; + zs->end--; + if(zs->end > 0) + zs->addr[0] = zs->addr[zs->end]; + DBG("zgetaddr: %#ullx\n", va); + dumpzseg(s); + return va; +} + +/* + * add an address to s's zseg; s is qlocked. + * wakeup any reader if it's waiting. + */ +int +zputaddr(Segment *s, uintptr va) +{ + Zseg *zs; + + zs = &s->zseg; + if((s->type&SG_ZIO) == 0) + return -1; + if((s->type&SG_KZIO) != 0){ + DBG("zputaddr: zmapfree %#ullx\n", va); + zmapfree(s->zseg.map, va); + dumpzseg(s); + return 0; + } + if(zs->end == zs->naddr) + zgrow(s); + zs->addr[zs->end++] = va; + if(zs->end == 1) + wakeup(&zs->rr); /* in case anyone was waiting */ + DBG("zputaddr %#ullx\n", va); + dumpzseg(s); + return 0; +} + +void* +alloczio(Segment *s, long len) +{ + Zseg *zs; + uintptr va; + + zs = &s->zseg; + va = zmapalloc(zs->map, len); + if(va == 0ULL) + error("kernel zero copy segment exhausted"); + return UINT2PTR(va); +} + +/* + * Locate the kernel segment for zero copy here, + * return it unlocked with a reference added. + */ +Segment* +getzkseg(void) +{ + Segment *s; + int i; + + qlock(&up->seglock); + for(i = 0; i < NSEG; i++){ + s = up->seg[i]; + if(s != nil && (s->type&SG_KZIO) != 0){ + incref(s); + qunlock(&up->seglock); + DBG("getzkseg: %#p\n", s); + return s; + } + } + qunlock(&up->seglock); + DBG("getzkseg: nil\n"); + return nil; +} + +/* + * This is the counterpart of devzread in some sense, + * it reads in the traditional way from io[]. + */ +long +readzio(Kzio *io, int nio, void *a, long count) +{ + long tot, nr; + char *p; + + p = a; + tot = 0; + while(nio-- > 0){ + if(tot < count){ + nr = io->size; + if(tot + nr > count) + nr = count - tot; + DBG("readzio: copy %#p %Z\n", p+tot, io); + memmove(p+tot, io->data, nr); + tot += nr; + } + qlock(&io->seg->lk); + zputaddr(io->seg, PTR2UINT(io->data)); + qunlock(&io->seg->lk); + putseg(io->seg); + io->seg = nil; + io++; + } + return tot; +} + +int +devzread(Chan *c, Kzio io[], int nio, usize tot, vlong offset) +{ + Segment *s; + + DBG("devzread %#p[%d]\n", io, nio); + + s = getzkseg(); + if(s == nil) + error("no kernel segment for zero-copy"); + if(tot > Maxatomic) + tot = Maxatomic; + io[0].data = alloczio(s, tot); + io[0].seg = s; + if(waserror()){ + zputaddr(s, PTR2UINT(io[0].data)); + putseg(s); + nexterror(); + } + io[0].size = c->dev->read(c, io[0].data, tot, offset); + poperror(); + return 1; +} + +int +devzwrite(Chan *c, Kzio io[], int nio, vlong offset) +{ + int i, j; + long tot; + Block *bp; + + DBG("devzwrite %#p[%d]\n", io, nio); + + tot = 0; + for(i = 0; i < nio; i++) + tot += io[i].size; + bp = nil; + if(waserror()){ + if(bp != nil) + freeb(bp); + nexterror(); + } + if(nio == 1) + tot = c->dev->write(c, io[0].data, io[0].size, offset); + else{ + bp = allocb(tot); + if(bp == nil) + error(Enomem); + for(i = 0; i < nio; i++){ + DBG("devzwrite: copy %#p %Z\n", bp->wp, &io[i]); + memmove(bp->wp, io[i].data, io[i].size); + bp->wp += io[i].size; + qlock(&io[i].seg->lk); + if(zputaddr(io[i].seg, PTR2UINT(io[i].data)) < 0) + panic("devzwrite: not a shared data segment"); + qunlock(&io[i].seg->lk); + } + tot = c->dev->bwrite(c, bp, offset); + } + j = 0; + for(i = 0; i < nio; i++){ + io[i].data = nil; /* safety */ + io[i].seg = nil; + putseg(io[i].seg); + if(tot > 0) + if(tot >= io[i].size) + tot -= io[i].size; + else + io[i].size = tot; + else{ + j = i; + io[i].size = 0; + } + io[i].data = nil; /* safety */ + putseg(io[i].seg); + io[i].seg = nil; + } + nio = j; + poperror(); + return nio; +} + +static void +kernzio(Kzio *io) +{ + Segment *s; + void *data; + Kzio uio; + + s = getzkseg(); + if(s == nil) + error("can't use zero copy in this segment"); + uio = *io; + data = alloczio(s, io->size); + memmove(data, io->data, io->size); + io->data = data; + DBG("kernzio: copy %Z %Z\n", io, &uio); + putseg(io->seg); + io->seg = s; +} + +/* + * Zero copy I/O. + * I/O is performed using an array of Zio structures. + * Each one points to a shared buffer address indicating a length. + * Each entry indicating a length and using nil as the address + * is asking the system to allocate memory as needed (mread only). + */ +static int +ziorw(int fd, Zio *io, int nio, usize count, vlong offset, int iswrite) +{ + int i, n, isprw; + Kzio *kio, skio[16]; + Chan *c; + usize tot; + + if(nio <= 0 || nio > 512) + error("wrong io[] size"); + zioinit(); + kio = nil; + + io = validaddr(io, sizeof io[0] * nio, 1); + DBG("ziorw %d io%#p[%d] %uld %lld\n", fd, io, nio, count, offset); + if(DBGFLG) + for(i = 0; i < nio; i++) + print("\tio%#p[%d] = %Z %s\n", + io, i, (Kzio*)&io[i], iswrite?"w":"r"); + + if(iswrite) + c = fdtochan(fd, OWRITE, 1, 1); + else + c = fdtochan(fd, OREAD, 1, 1); + isprw = offset != -1LL; + if(isprw) + offset = c->offset; + if(waserror()){ + cclose(c); + if(kio != nil){ + for(i = 0; i < nio; i++) + if(kio[i].seg != nil) + putseg(kio[i].seg); + if(kio != skio) + free(kio); + } + nexterror(); + } + if(nio < nelem(skio)) + kio = skio; + else + kio = smalloc(sizeof kio[0] * nio); + for(i = 0; i < nio; i++){ + kio[i].Zio = io[i]; + if(iswrite){ + kio[i].seg = seg(up, PTR2UINT(io[i].data), 1); + if(kio[i].seg == nil) + error("invalid address in zio"); + incref(kio[i].seg); + qunlock(&kio[i].seg->lk); + validaddr(kio[i].data, kio[i].size, 1); + if((kio[i].seg->type&SG_ZIO) == 0){ + /* + * It's not a segment where we can report + * addresses to anyone once they are free. + * So, allocate space in the kernel + * and copy the user data there. + */ + kernzio(&kio[i]); + } + assert(kio[i].seg->type&SG_ZIO); + }else{ + kio[i].data = nil; + kio[i].seg = nil; + } + } + + if(c->dev->zread == nil){ + DBG("installing devzread for %s\n", c->dev->name); + c->dev->zread = devzread; + } + if(c->dev->zwrite == nil){ + DBG("installing devzwrite for %s\n", c->dev->name); + c->dev->zwrite = devzwrite; + } + if(iswrite) + n = c->dev->zwrite(c, kio, nio, offset); + else + n = c->dev->zread(c, kio, nio, count, offset); + tot = 0; + for(i = 0; i < n; i++){ + io[i] = kio[i].Zio; + tot += kio[i].size; + } + if(!isprw){ + /* unlike in syswrite, we update offsets at the end */ + lock(c); + c->devoffset += tot; + c->offset += tot; + unlock(c); + } + poperror(); + cclose(c); + if(kio != skio) + free(kio); + return n; +} + +void +sysziopread(Ar0 *ar0, va_list list) +{ + int fd, nio; + long count; + vlong offset; + Zio *io; + + /* + * int zpread(int fd, Zio *io[], int nio, usize count, vlong offset); + */ + fd = va_arg(list, int); + io = va_arg(list, Zio*); + nio = va_arg(list, int); + count = va_arg(list, usize); + offset = va_arg(list, vlong); + ar0->i = ziorw(fd, io, nio, count, offset, 0); +} + +void +sysziopwrite(Ar0 *ar0, va_list list) +{ + int fd, nio; + vlong offset; + Zio *io; + + /* + * int zpwrite(int fd, Zio *io[], int nio, vlong offset); + */ + fd = va_arg(list, int); + io = va_arg(list, Zio*); + nio = va_arg(list, int); + offset = va_arg(list, vlong); + ar0->i = ziorw(fd, io, nio, 0, offset, 1); +} + +void +sysziofree(Ar0 *, va_list list) +{ + Zio *io; + int nio, i; + Segment *s; + + /* + * zfree(Zio io[], int nio); + */ + io = va_arg(list, Zio*); + nio = va_arg(list, int); + io = validaddr(io, sizeof io[0] * nio, 1); + for(i = 0; i < nio; i++){ + s = seg(up, PTR2UINT(io[i].data), 1); + if(s == nil) + error("invalid address in zio"); + if((s->type&SG_ZIO) == 0){ + qunlock(&s->lk); + error("segment is not a zero-copy segment"); + } + zputaddr(s, PTR2UINT(io[i].data)); + qunlock(&s->lk); + io[i].data = nil; + io[i].size = 0; + } +} + +/* + * This must go, but for now, we use Zmaps + * to allocate messages within the shared kernel segment. + * This is a simple first fist with a single fragment list. + */ + +void +newzmap(Segment *s) +{ + ZMap *zp; + Map *mp; + + zioinit(); + if((s->type&SG_KZIO) == 0) + panic("newzmap but not SG_KZIO"); + if(s->zseg.map != nil) + panic("newzmap: already allocated"); + zp = smalloc(sizeof(ZMap)); + s->zseg.map = zp; + mp = smalloc(sizeof(Map)); + mp->free = 1; + mp->addr = s->base; + mp->size = s->top - s->base; + zp->map = mp; + if(DBGFLG > 1){ + DBG("newzmap:\n"); + dumpzmap(zp); + } +} + +static void +zmapfree(ZMap* rmap, uintptr addr) +{ + Map *mp, *prev, *next; + + lock(rmap); + if(waserror()){ + unlock(rmap); + nexterror(); + } + prev = nil; + for(mp = rmap->map; mp != nil; mp = mp->next){ + if(mp->addr <= addr) + break; + prev = mp; + } + if(mp == nil) + panic("zmapfree: no map"); + if(mp->free == 1) + panic("zmapfree: already free"); + if(prev != nil && prev->free && prev->addr + prev->size == addr){ + prev->size += mp->size; + prev->next = mp->next; + free(mp); + mp = prev; + } + next = mp->next; + if(next != nil && next->free && mp->addr + mp->size == next->addr){ + mp->size += next->size; + mp->next = next->next; + mp->free = 1; + free(next); + } + poperror(); + unlock(rmap); + if(DBGFLG > 1){ + DBG("zmapfree %#ullx:\n", addr); + dumpzmap(rmap); + } +} + +static uintptr +zmapalloc(ZMap* rmap, usize size) +{ + Map *mp, *nmp; + + lock(rmap); + if(waserror()){ + unlock(rmap); + nexterror(); + } + for(mp = rmap->map; mp->free == 0 || mp->size < size; mp = mp->next) + ; + if(mp == nil){ + poperror(); + unlock(rmap); + return 0ULL; + } + if(mp->free == 0) + panic("zmapalloc: not free"); + if(mp->size > size){ + nmp = smalloc(sizeof *nmp); + *nmp = *mp; + nmp->addr += size; + nmp->size -= size; + nmp->free = 1; + mp->size = size; + mp->next = nmp; + } + mp->free = 0; + poperror(); + unlock(rmap); + if(DBGFLG > 1){ + DBG("zmapalloc %#ullx:\n", mp->addr); + dumpzmap(rmap); + } + return mp->addr; +} diff -Nru 0/sys/src/nix/port/taslock.c 4/sys/src/nix/port/taslock.c --- 0/sys/src/nix/port/taslock.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/taslock.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,326 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "../port/edf.h" + +/* + * measure max lock cycles and max lock waiting time. + */ +#define LOCKCYCLES 0 + +uvlong maxlockcycles; +uvlong maxilockcycles; +uintptr maxlockpc; +uintptr maxilockpc; + +Lockstats lockstats; +Waitstats waitstats; +Lock waitstatslk; + +static void +newwaitstats(void) +{ + if(waitstats.pcs != nil) + return; + waitstats.pcs = malloc(NWstats * sizeof waitstats.pcs[0]); + waitstats.ns = malloc(NWstats * sizeof waitstats.ns[0]); + waitstats.wait = malloc(NWstats * sizeof waitstats.wait[0]); + waitstats.total = malloc(NWstats * sizeof waitstats.total[0]); + waitstats.type = malloc(NWstats * sizeof waitstats.type[0]); +} + +void +startwaitstats(int on) +{ + newwaitstats(); + mfence(); + waitstats.on = on; + print("lockstats %s\n", on?"on":"off"); +} + +void +clearwaitstats(void) +{ + newwaitstats(); + memset(waitstats.ns, 0, NWstats * sizeof(int)); + memset(waitstats.wait, 0, NWstats * sizeof(uvlong)); + memset(waitstats.total, 0, NWstats * sizeof(uvlong)); +} + +void +addwaitstat(uintptr pc, uvlong t0, int type) +{ + uint i; + uvlong w; + + if(waitstats.on == 0) + return; + + cycles(&w); + w -= t0; + mfence(); + for(i = 0; i < NWstats; i++) + if(waitstats.pcs[i] == pc){ + ainc(&waitstats.ns[i]); + if(w > waitstats.wait[i]) + waitstats.wait[i] = w; /* race but ok */ + waitstats.total[i] += w; /* race but ok */ + return; + } + if(!canlock(&waitstatslk)) + return; + + for(i = 0; i < NWstats; i++) + if(waitstats.pcs[i] == pc){ + ainc(&waitstats.ns[i]); + if(w > waitstats.wait[i]) + waitstats.wait[i] = w; /* race but ok */ + waitstats.total[i] += w; + unlock(&waitstatslk); + return; + } + + for(i = 0; i < NWstats; i++) + if(waitstats.pcs[i] == 0){ + waitstats.ns[i] = 1; + waitstats.type[i] = type; + waitstats.wait[i] = w; + waitstats.total[i] = w; + mfence(); + waitstats.pcs[i] = pc; + waitstats.npcs++; + break; + } + + unlock(&waitstatslk); +} + +void +lockloop(Lock *l, uintptr pc) +{ + Proc *p; + + p = l->p; + print("lock %#p loop key %#ux pc %#p held by pc %#p proc %d\n", + l, l->key, pc, l->pc, p ? p->pid : 0); + dumpaproc(up); + if(p != nil) + dumpaproc(p); +} + +int +lock(Lock *l) +{ + int i; + uintptr pc; + uvlong t0; + + pc = getcallerpc(&l); + + lockstats.locks++; + if(up) + ainc(&up->nlocks); /* prevent being scheded */ + if(TAS(&l->key) == 0){ + if(up) + up->lastlock = l; + l->pc = pc; + l->p = up; + l->isilock = 0; + if(LOCKCYCLES) + cycles(&l->lockcycles); + + return 0; + } + if(up) + adec(&up->nlocks); + + cycles(&t0); + lockstats.glare++; + for(;;){ + lockstats.inglare++; + i = 0; + while(l->key){ + if(sys->nmach < 2 && up && up->edf && (up->edf->flags & Admitted)){ + /* + * Priority inversion, yield on a uniprocessor; on a + * multiprocessor, the other processor will unlock + */ + print("inversion %#p pc %#p proc %d held by pc %#p proc %d\n", + l, pc, up ? up->pid : 0, l->pc, l->p ? l->p->pid : 0); + up->edf->d = todget(nil); /* yield to process with lock */ + } + if(i++ > 100000000){ + i = 0; + lockloop(l, pc); + } + } + if(up) + ainc(&up->nlocks); + if(TAS(&l->key) == 0){ + if(up) + up->lastlock = l; + l->pc = pc; + l->p = up; + l->isilock = 0; + if(LOCKCYCLES) + cycles(&l->lockcycles); + if(l != &waitstatslk) + addwaitstat(pc, t0, WSlock); + return 1; + } + if(up) + adec(&up->nlocks); + } +} + +void +ilock(Lock *l) +{ + Mpl pl; + uintptr pc; + uvlong t0; + + pc = getcallerpc(&l); + lockstats.locks++; + + pl = splhi(); + if(TAS(&l->key) != 0){ + cycles(&t0); + lockstats.glare++; + /* + * Cannot also check l->pc, l->m, or l->isilock here + * because they might just not be set yet, or + * (for pc and m) the lock might have just been unlocked. + */ + for(;;){ + lockstats.inglare++; + splx(pl); + while(l->key) + ; + pl = splhi(); + if(TAS(&l->key) == 0){ + if(l != &waitstatslk) + addwaitstat(pc, t0, WSlock); + goto acquire; + } + } + } +acquire: + m->ilockdepth++; + if(up) + up->lastilock = l; + l->pl = pl; + l->pc = pc; + l->p = up; + l->isilock = 1; + l->m = m; + if(LOCKCYCLES) + cycles(&l->lockcycles); +} + +int +canlock(Lock *l) +{ + if(up) + ainc(&up->nlocks); + if(TAS(&l->key)){ + if(up) + adec(&up->nlocks); + return 0; + } + + if(up) + up->lastlock = l; + l->pc = getcallerpc(&l); + l->p = up; + l->m = m; + l->isilock = 0; + if(LOCKCYCLES) + cycles(&l->lockcycles); + + return 1; +} + +void +unlock(Lock *l) +{ + uvlong x; + + if(LOCKCYCLES){ + cycles(&x); + l->lockcycles = x - l->lockcycles; + if(l->lockcycles > maxlockcycles){ + maxlockcycles = l->lockcycles; + maxlockpc = l->pc; + } + } + + if(l->key == 0) + print("unlock: not locked: pc %#p\n", getcallerpc(&l)); + if(l->isilock) + print("unlock of ilock: pc %#p, held by %#p\n", getcallerpc(&l), l->pc); + if(l->p != up) + print("unlock: up changed: pc %#p, acquired at pc %#p, lock p %#p, unlock up %#p\n", getcallerpc(&l), l->pc, l->p, up); + l->m = nil; + l->key = 0; + coherence(); + + if(up && adec(&up->nlocks) == 0 && up->delaysched && islo()){ + /* + * Call sched if the need arose while locks were held + * But, don't do it from interrupt routines, hence the islo() test + */ + sched(); + } +} + +void +iunlock(Lock *l) +{ + Mpl pl; + uvlong x; + + if(LOCKCYCLES){ + cycles(&x); + l->lockcycles = x - l->lockcycles; + if(l->lockcycles > maxilockcycles){ + maxilockcycles = l->lockcycles; + maxilockpc = l->pc; + } + } + + if(l->key == 0) + print("iunlock: not locked: pc %#p\n", getcallerpc(&l)); + if(!l->isilock) + print("iunlock of lock: pc %#p, held by %#p\n", getcallerpc(&l), l->pc); + if(islo()) + print("iunlock while lo: pc %#p, held by %#p\n", getcallerpc(&l), l->pc); + if(l->m != m){ + print("iunlock by cpu%d, locked by cpu%d: pc %#p, held by %#p\n", + m->machno, l->m->machno, getcallerpc(&l), l->pc); + } + + pl = l->pl; + l->m = nil; + l->key = 0; + coherence(); + m->ilockdepth--; + if(up) + up->lastilock = nil; + splx(pl); +} + +void +portwaitwhile(void *value, uintptr val) +{ + int i; + /* it just waits for a little while */ + for(i = 0; i<100; i++) + if(*(uintptr *)value == val) + break; +} + +void (*waitwhile)(void *, uintptr) = portwaitwhile; diff -Nru 0/sys/src/nix/port/tcklock.c 4/sys/src/nix/port/tcklock.c --- 0/sys/src/nix/port/tcklock.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/tcklock.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,330 @@ + +/* + * Ticket locks. + * D. P. Reed and R. K. Kanodia. + * Synchronization with Eventcounts and Sequencers. + * Communications of the ACM, 22(2):115–23, Feb. 1979. + * + * A variant is used in Linux. + * + * These are here to measure them and compare wrt taslocks. + * If there's no difference, these should go + * (because taslocks are known to be robust and don't have bugs). + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "../port/edf.h" + +Lockstats lockstats; +Waitstats waitstats; +Lock waitstatslk; + +static void +newwaitstats(void) +{ + if(waitstats.pcs != nil) + return; + waitstats.pcs = malloc(NWstats * sizeof waitstats.pcs[0]); + waitstats.ns = malloc(NWstats * sizeof waitstats.ns[0]); + waitstats.wait = malloc(NWstats * sizeof waitstats.wait[0]); + waitstats.total = malloc(NWstats * sizeof waitstats.total[0]); + waitstats.type = malloc(NWstats * sizeof waitstats.type[0]); +} + +void +startwaitstats(int on) +{ + newwaitstats(); + mfence(); + waitstats.on = on; + print("lockstats %s\n", on?"on":"off"); +} + +void +clearwaitstats(void) +{ + newwaitstats(); + memset(waitstats.ns, 0, NWstats * sizeof(int)); + memset(waitstats.wait, 0, NWstats * sizeof(uvlong)); + memset(waitstats.total, 0, NWstats * sizeof(uvlong)); +} + +void +addwaitstat(uintptr pc, uvlong t0, int type) +{ + uint i; + uvlong w; + + if(waitstats.on == 0) + return; + + cycles(&w); + w -= t0; + mfence(); + for(i = 0; i < NWstats; i++) + if(waitstats.pcs[i] == pc){ + ainc(&waitstats.ns[i]); + if(w > waitstats.wait[i]) + waitstats.wait[i] = w; /* race but ok */ + waitstats.total[i] += w; /* race but ok */ + return; + } + if(!canlock(&waitstatslk)) + return; + + for(i = 0; i < NWstats; i++) + if(waitstats.pcs[i] == pc){ + ainc(&waitstats.ns[i]); + if(w > waitstats.wait[i]) + waitstats.wait[i] = w; /* race but ok */ + waitstats.total[i] += w; + unlock(&waitstatslk); + return; + } + + for(i = 0; i < NWstats; i++) + if(waitstats.pcs[i] == 0){ + waitstats.ns[i] = 1; + waitstats.type[i] = type; + waitstats.wait[i] = w; + waitstats.total[i] = w; + mfence(); + waitstats.pcs[i] = pc; + waitstats.npcs++; + break; + } + + unlock(&waitstatslk); +} + +void +lockloop(Lock *l, uintptr pc) +{ + Proc *p; + + p = l->p; + print("lock %#p loop key %#ux pc %#p held by pc %#p proc %d\n", + l, l->key, pc, l->pc, p ? p->pid : 0); + dumpaproc(up); + if(p != nil) + dumpaproc(p); +} + +static u32int +getuser(u32int key) +{ + return key & 0xFFFF; +} + +static u32int +getticket(u32int key) +{ + return (key>>16) & 0xFFFF; +} + +static u32int +incuser(u32int *key) +{ + u32int old, new; + + do{ + old = *key; + new = (old&0xFFFF0000) | ((old+1)&0xFFFF); + }while(!cas32(key, old, new)); + return getuser(new); +} + +static u32int +incticket(u32int *key) +{ + u32int old, new; + + do{ + old = *key; + + new = ((old+0x10000)&0xFFFF0000) | (old&0xFFFF); + }while(!cas32(key, old, new)); + return getticket(new); +} + +static u32int +myticket(u32int user) +{ + return (user-1) & 0xFFFF; +} + +int +lock(Lock *l) +{ + int i; + uintptr pc; + u32int user; + uvlong t0; + + pc = getcallerpc(&l); + lockstats.locks++; + if(up) + ainc(&up->nlocks); /* prevent being scheded */ + cycles(&t0); + user = incuser(&l->key); + if(getticket(l->key) != myticket(user)){ + if(up) + adec(&up->nlocks); + lockstats.glare++; + i = 0; + while(getticket(l->key) != myticket(user)){ + if(sys->nmach < 2 && up && up->edf && (up->edf->flags & Admitted)){ + /* + * Priority inversion, yield on a uniprocessor; on a + * multiprocessor, the other processor will unlock + */ + print("inversion %#p pc %#p proc %d held by pc %#p proc %d\n", + l, pc, up ? up->pid : 0, l->pc, l->p ? l->p->pid : 0); + up->edf->d = todget(nil); /* yield to process with lock */ + } + if(i++ > 100000000){ + i = 0; + lockloop(l, pc); + } + } + if(up) + ainc(&up->nlocks); + } + l->pc = pc; + l->p = up; + l->m = m; + l->isilock = 0; + if(up) + up->lastlock = l; + if(l != &waitstatslk) + addwaitstat(pc, t0, WSlock); + return 0; +} + +void +ilock(Lock *l) +{ + Mpl pl; + uintptr pc; + uvlong t0; + u32int user; + + pc = getcallerpc(&l); + lockstats.locks++; + + pl = splhi(); + cycles(&t0); + + user = incuser(&l->key); + if(getticket(l->key) != myticket(user)){ + splx(pl); + while(getticket(l->key) != myticket(user)) + ; + pl = splhi(); + } + m->ilockdepth++; + if(up) + up->lastilock = l; + l->pl = pl; + l->pc = pc; + l->p = up; + l->isilock = 1; + l->m = m; + if(l != &waitstatslk) + addwaitstat(pc, t0, WSlock); +} + +int +canlock(Lock *l) +{ + Lock try, new; + uintptr pc; + uvlong t0; + + pc = getcallerpc(&l); + + lockstats.locks++; + if(up) + ainc(&up->nlocks); /* prevent being scheded */ + cycles(&t0); + + try = *l; + if(getuser(try.key) != getticket(try.key)){ + Cant: + if(up) + adec(&up->nlocks); + return 0; + } + new = try; + incuser(&new.key); + if(!cas32(&l->key, try.key, new.key)) + goto Cant; + l->pc = pc; + l->p = up; + l->m = m; + if(up) + up->lastlock = l; + l->isilock = 0; + return 1; +} + +void +unlock(Lock *l) +{ + if(getticket(l->key) == getuser(l->key)) + print("unlock: not locked: pc %#p\n", getcallerpc(&l)); + if(l->isilock) + print("unlock of ilock: pc %#p, held by %#p\n", getcallerpc(&l), l->pc); + if(l->p != up) + print("unlock: up changed: pc %#p, acquired at pc %#p," + " lock p %#p, unlock up %#p\n", getcallerpc(&l), l->pc, l->p, up); + l->m = nil; + incticket(&l->key); + + if(up && adec(&up->nlocks) == 0 && up->delaysched && islo()){ + /* + * Call sched if the need arose while locks were held + * But, don't do it from interrupt routines, hence the islo() test + */ + sched(); + } +} + +void +iunlock(Lock *l) +{ + Mpl pl; + + if(getticket(l->key) == getuser(l->key)) + print("iunlock: not locked: pc %#p\n", getcallerpc(&l)); + if(!l->isilock) + print("iunlock of lock: pc %#p, held by %#p\n", getcallerpc(&l), l->pc); + if(islo()) + print("iunlock while lo: pc %#p, held by %#p\n", getcallerpc(&l), l->pc); + if(l->m != m){ + print("iunlock by cpu%d, locked by cpu%d: pc %#p, held by %#p\n", + m->machno, l->m->machno, getcallerpc(&l), l->pc); + } + + pl = l->pl; + l->m = nil; + incticket(&l->key); + m->ilockdepth--; + if(up) + up->lastilock = nil; + splx(pl); +} + +void +portwaitwhile(void *value, uintptr val) +{ + while (*(uintptr*)value == val) + ; +} + +void (*waitwhile)(void *, uintptr) = portwaitwhile; diff -Nru 0/sys/src/nix/port/tod.c 4/sys/src/nix/port/tod.c --- 0/sys/src/nix/port/tod.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/tod.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,314 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +/* + * Compute nanosecond epoch time from the fastest ticking clock + * on the system. Converting the time to nanoseconds requires + * the following formula + * + * t = (((1000000000<<31)/f)*ticks)>>31 + * + * where + * + * 'f' is the clock frequency + * 'ticks' are clock ticks + * + * to avoid too much calculation in todget(), we calculate + * + * mult = (1000000000<<32)/f + * + * each time f is set. f is normally set by a user level + * program writing to /dev/fastclock. mul64fract will then + * take that fractional multiplier and a 64 bit integer and + * return the resulting integer product. + * + * We assume that the cpu's of a multiprocessor are synchronized. + * This assumption needs to be questioned with each new architecture. + */ + +/* frequency of the tod clock */ +#define TODFREQ 1000000000ULL +#define MicroFREQ 1000000ULL + +struct { + int init; /* true if initialized */ + ulong cnt; + Lock; + uvlong multiplier; /* ns = off + (multiplier*ticks)>>31 */ + uvlong divider; /* ticks = (divider*(ns-off))>>31 */ + uvlong umultiplier; /* µs = (µmultiplier*ticks)>>31 */ + uvlong udivider; /* ticks = (µdivider*µs)>>31 */ + vlong hz; /* frequency of fast clock */ + vlong last; /* last reading of fast clock */ + vlong off; /* offset from epoch to last */ + vlong lasttime; /* last return value from todget */ + vlong delta; /* add 'delta' each slow clock tick from sstart to send */ + ulong sstart; /* ... */ + ulong send; /* ... */ +} tod; + +static void todfix(void); + +void +todinit(void) +{ + if(tod.init) + return; + ilock(&tod); + tod.last = fastticks((uvlong *)&tod.hz); + iunlock(&tod); + todsetfreq(tod.hz); + tod.init = 1; + addclock0link(todfix, 100); +} + +/* + * calculate multiplier + */ +void +todsetfreq(vlong f) +{ + ilock(&tod); + tod.hz = f; + + /* calculate multiplier for time conversion */ + tod.multiplier = mk64fract(TODFREQ, f); + tod.divider = mk64fract(f, TODFREQ) + 1; + tod.umultiplier = mk64fract(MicroFREQ, f); + tod.udivider = mk64fract(f, MicroFREQ) + 1; + iunlock(&tod); +} + +/* + * Set the time of day struct + */ +void +todset(vlong t, vlong delta, int n) +{ + if(!tod.init) + todinit(); + + ilock(&tod); + if(t >= 0){ + tod.off = t; + tod.last = fastticks(nil); + tod.lasttime = 0; + tod.delta = 0; + tod.sstart = tod.send; + } else { + if(n <= 0) + n = 1; + n *= HZ; + if(delta < 0 && n > -delta) + n = -delta; + if(delta > 0 && n > delta) + n = delta; + delta = delta/n; + tod.sstart = sys->ticks; + tod.send = tod.sstart + n; + tod.delta = delta; + } + iunlock(&tod); +} + +/* + * get time of day + */ +vlong +todget(vlong *ticksp) +{ + uvlong x; + vlong ticks, diff; + ulong t; + + if(!tod.init) + todinit(); + + /* + * we don't want time to pass twixt the measuring of fastticks + * and grabbing tod.last. Also none of the vlongs are atomic so + * we have to look at them inside the lock. + */ + ilock(&tod); + tod.cnt++; + ticks = fastticks(nil); + + /* add in correction */ + if(tod.sstart != tod.send){ + t = sys->ticks; + if(t >= tod.send) + t = tod.send; + tod.off = tod.off + tod.delta*(t - tod.sstart); + tod.sstart = t; + } + + /* convert to epoch */ + diff = ticks - tod.last; + if(diff < 0) + diff = 0; + mul64fract(&x, diff, tod.multiplier); + x += tod.off; + + /* time can't go backwards */ + if(x < tod.lasttime) + x = tod.lasttime; + else + tod.lasttime = x; + + iunlock(&tod); + + if(ticksp != nil) + *ticksp = ticks; + + return x; +} + +/* + * convert time of day to ticks + */ +uvlong +tod2fastticks(vlong ns) +{ + uvlong x; + + ilock(&tod); + mul64fract(&x, ns-tod.off, tod.divider); + x += tod.last; + iunlock(&tod); + return x; +} + +/* + * called regularly to avoid calculation overflows + */ +static void +todfix(void) +{ + vlong ticks, diff; + uvlong x; + + ticks = fastticks(nil); + + diff = ticks - tod.last; + if(diff > tod.hz){ + ilock(&tod); + + /* convert to epoch */ + mul64fract(&x, diff, tod.multiplier); +if(x > 30000000000ULL) print("todfix %llud\n", x); + x += tod.off; + + /* protect against overflows */ + tod.last = ticks; + tod.off = x; + + iunlock(&tod); + } +} + +long +seconds(void) +{ + vlong x; + int i; + + x = todget(nil); + x = x/TODFREQ; + i = x; + return i; +} + +uvlong +fastticks2us(uvlong ticks) +{ + uvlong res; + + if(!tod.init) + todinit(); + mul64fract(&res, ticks, tod.umultiplier); + return res; +} + +uvlong +us2fastticks(uvlong us) +{ + uvlong res; + + if(!tod.init) + todinit(); + mul64fract(&res, us, tod.udivider); + return res; +} + +/* + * convert milliseconds to fast ticks + */ +uvlong +ms2fastticks(ulong ms) +{ + if(!tod.init) + todinit(); + return (tod.hz*ms)/1000ULL; +} + +/* + * convert nanoseconds to fast ticks + */ +uvlong +ns2fastticks(uvlong ns) +{ + uvlong res; + + if(!tod.init) + todinit(); + mul64fract(&res, ns, tod.divider); + return res; +} + +/* + * convert fast ticks to ns + */ +uvlong +fastticks2ns(uvlong ticks) +{ + uvlong res; + + if(!tod.init) + todinit(); + mul64fract(&res, ticks, tod.multiplier); + return res; +} + +/* + * Make a 64 bit fixed point number that has a decimal point + * to the left of the low order 32 bits. This is used with + * mul64fract for converting twixt nanoseconds and fastticks. + * + * multiplier = (to<<32)/from + */ +uvlong +mk64fract(uvlong to, uvlong from) +{ +/* + int shift; + + if(to == 0ULL) + return 0ULL; + + shift = 0; + while(shift < 32 && to < (1ULL<<(32+24))){ + to <<= 8; + shift += 8; + } + while(shift < 32 && to < (1ULL<<(32+31))){ + to <<= 1; + shift += 1; + } + + return (to/from)<<(32-shift); + */ + return (to<<32) / from; +} diff -Nru 0/sys/src/nix/port/usb.h 4/sys/src/nix/port/usb.h --- 0/sys/src/nix/port/usb.h Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/usb.h Wed Feb 6 00:00:00 2013 @@ -0,0 +1,196 @@ +/* + * common USB definitions. + */ +#define dprint(...) do if(debug)print(__VA_ARGS__); while(0) +#define ddprint(...) do if(debug>1)print(__VA_ARGS__); while(0) +#define deprint(...) do if(debug || ep->debug)print(__VA_ARGS__); while(0) +#define ddeprint(...) do if(debug>1 || ep->debug>1)print(__VA_ARGS__); while(0) + +#define GET2(p) ((((p)[1]&0xFF)<<8)|((p)[0]&0xFF)) +#define PUT2(p,v) {((p)[0] = (v)); ((p)[1] = (v)>>8);} + +typedef struct Udev Udev; /* USB device */ +typedef struct Ep Ep; /* Endpoint */ +typedef struct Hci Hci; /* Host Controller Interface */ +typedef struct Hciimpl Hciimpl; /* Link to the controller impl. */ + +enum +{ + /* fundamental constants */ + Ndeveps = 16, /* max nb. of endpoints per device */ + + /* tunable parameters */ + Nhcis = 16, /* max nb. of HCIs */ + Neps = 64, /* max nb. of endpoints */ + Maxctllen = 32*1024, /* max allowed sized for ctl. xfers; see Maxdevconf */ + Xfertmout = 2000, /* default request time out (ms) */ + + /* transfer types. keep this order */ + Tnone = 0, /* no tranfer type configured */ + Tctl, /* wr req + rd/wr data + wr/rd sts */ + Tiso, /* stream rd or wr (real time) */ + Tbulk, /* stream rd or wr */ + Tintr, /* msg rd or wr */ + Nttypes, /* number of transfer types */ + + Epmax = 0xF, /* max ep. addr */ + Devmax = 0x7F, /* max dev. addr */ + + /* Speeds */ + Fullspeed = 0, + Lowspeed, + Highspeed, + Nospeed, + + /* request type */ + Rh2d = 0<<7, + Rd2h = 1<<7, + Rstd = 0<<5, + Rclass = 1<<5, + Rdev = 0, + Rep = 2, + Rother = 3, + + /* req offsets */ + Rtype = 0, + Rreq = 1, + Rvalue = 2, + Rindex = 4, + Rcount = 6, + Rsetuplen = 8, + + /* standard requests */ + Rgetstatus = 0, + Rclearfeature = 1, + Rsetfeature = 3, + Rsetaddr = 5, + Rgetdesc = 6, + + /* device states */ + Dconfig = 0, /* configuration in progress */ + Denabled, /* address assigned */ + Ddetach, /* device is detached */ + Dreset, /* its port is being reset */ + + /* (root) Hub reply to port status (reported to usbd) */ + HPpresent = 0x1, + HPenable = 0x2, + HPsuspend = 0x4, + HPovercurrent = 0x8, + HPreset = 0x10, + HPpower = 0x100, + HPslow = 0x200, + HPhigh = 0x400, + HPstatuschg = 0x10000, + HPchange = 0x20000, +}; + +/* + * Services provided by the driver. + * epopen allocates hardware structures to prepare the endpoint + * for I/O. This happens when the user opens the data file. + * epclose releases them. This happens when the data file is closed. + * epwrite tries to write the given bytes, waiting until all of them + * have been written (or failed) before returning; but not for Iso. + * epread does the same for reading. + * It can be assumed that endpoints are DMEXCL but concurrent + * read/writes may be issued and the controller must take care. + * For control endpoints, device-to-host requests must be followed by + * a read of the expected length if needed. + * The port requests are called when usbd issues commands for root + * hubs. Port status must return bits as a hub request would do. + * Toggle handling and other details are left for the controller driver + * to avoid mixing too much the controller and the comon device. + * While an endpoint is closed, its toggles are saved in the Ep struct. + */ +struct Hciimpl +{ + void *aux; /* for controller info */ + void (*init)(Hci*); /* init. controller */ + void (*dump)(Hci*); /* debug */ + void (*interrupt)(Ureg*, void*); /* service interrupt */ + void (*epopen)(Ep*); /* prepare ep. for I/O */ + void (*epclose)(Ep*); /* terminate I/O on ep. */ + long (*epread)(Ep*,void*,long); /* transmit data for ep */ + long (*epwrite)(Ep*,void*,long); /* receive data for ep */ + char* (*seprintep)(char*,char*,Ep*); /* debug */ + int (*portenable)(Hci*, int, int); /* enable/disable port */ + int (*portreset)(Hci*, int, int); /* set/clear port reset */ + int (*portstatus)(Hci*, int); /* get port status */ + void (*shutdown)(Hci*); /* shutdown for reboot */ + void (*debug)(Hci*, int); /* set/clear debug flag */ +}; + +struct Hci +{ + ISAConf; /* hardware info */ + uint tbdf; /* BOTCH should be hardware info */ + int ctlrno; /* controller number */ + int nports; /* number of ports in hub */ + int highspeed; + Hciimpl; /* HCI driver */ +}; + +/* + * USB endpoint. + * All endpoints are kept in a global array. The first + * block of fields is constant after endpoint creation. + * The rest is configuration information given to all controllers. + * The first endpoint for a device (known as ep0) represents the + * device and is used to configure it and create other endpoints. + * Its QLock also protects per-device data in dev. + * See Hciimpl for clues regarding how this is used by controllers. + */ +struct Ep +{ + Ref; /* one per fid (and per dev ep for ep0s) */ + + /* const once inited. */ + int idx; /* index in global eps array */ + int nb; /* endpoint number in device */ + Hci* hp; /* HCI it belongs to */ + Udev* dev; /* device for the endpoint */ + Ep* ep0; /* control endpoint for its device */ + + QLock; /* protect fields below */ + char* name; /* for ep file names at #u/ */ + int inuse; /* endpoint is open */ + int mode; /* OREAD, OWRITE, or ORDWR */ + int clrhalt; /* true if halt was cleared on ep. */ + int debug; /* per endpoint debug flag */ + char* info; /* for humans to read */ + long maxpkt; /* maximum packet size */ + int ttype; /* tranfer type */ + ulong load; /* in µs, for a fransfer of maxpkt bytes */ + void* aux; /* for controller specific info */ + int rhrepl; /* fake root hub replies */ + int toggle[2]; /* saved toggles (while ep is not in use) */ + long pollival; /* poll interval ([µ]frames; intr/iso) */ + long hz; /* poll frequency (iso) */ + long samplesz; /* sample size (iso) */ + int ntds; /* nb. of Tds per µframe */ + int tmout; /* 0 or timeout for transfers (ms) */ +}; + +/* + * Per-device configuration and cached list of endpoints. + * eps[0]->QLock protects it. + */ +struct Udev +{ + int nb; /* USB device number */ + int state; /* state for the device */ + int ishub; /* hubs can allocate devices */ + int isroot; /* is a root hub */ + int speed; /* Full/Low/High/No -speed */ + int hub; /* dev number for the parent hub */ + int port; /* port number in the parent hub */ + Ep* eps[Ndeveps]; /* end points for this device (cached) */ +}; + +void addhcitype(char *type, int (*reset)(Hci*)); + +extern char *usbmodename[]; +extern char Estalled[]; + +extern char *seprintdata(char*,char*,uchar*,int); diff -Nru 0/sys/src/nix/port/usbehci.c 4/sys/src/nix/port/usbehci.c --- 0/sys/src/nix/port/usbehci.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/usbehci.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,3224 @@ +/* + * USB Enhanced Host Controller Interface (EHCI) driver + * High speed USB 2.0. + * + * Note that all of our unlock routines call coherence. + * + * BUGS: + * - Too many delays and ilocks. + * - bandwidth admission control must be done per-frame. + * - requires polling (some controllers miss interrupts). + * - must warn of power overruns. + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/usb.h" +#include "../port/portusbehci.h" +#include "usbehci.h" +#include "uncached.h" + +#define diprint if(ehcidebug || iso->debug)print +#define ddiprint if(ehcidebug>1 || iso->debug>1)print +#define dqprint if(ehcidebug || (qh->io && qh->io->debug))print +#define ddqprint if(ehcidebug>1 || (qh->io && qh->io->debug>1))print + +#define TRUNC(x, sz) ((x) & ((sz)-1)) +#define LPTR(q) ((ulong*)KADDR((q) & ~0x1F)) + +typedef struct Ctlio Ctlio; +typedef union Ed Ed; +typedef struct Edpool Edpool; +typedef struct Itd Itd; +typedef struct Qio Qio; +typedef struct Qtd Qtd; +typedef struct Sitd Sitd; +typedef struct Td Td; + +/* + * EHCI interface registers and bits + */ +enum +{ + /* Queue states (software) */ + Qidle = 0, + Qinstall, + Qrun, + Qdone, + Qclose, + Qfree, + + Enabledelay = 100, /* waiting for a port to enable */ + Abortdelay = 5, /* delay after cancelling Tds (ms) */ + + Incr = 64, /* for pools of Tds, Qhs, etc. */ + Align = 128, /* in bytes for all those descriptors */ + + /* Keep them as a power of 2, lower than ctlr->nframes */ + /* Also, keep Nisoframes >= Nintrleafs */ + Nintrleafs = 32, /* nb. of leaf frames in intr. tree */ + Nisoframes = 64, /* nb. of iso frames (in window) */ + + /* + * HW constants + */ + + /* Itd bits (csw[]) */ + Itdactive = 0x80000000, /* execution enabled */ + Itddberr = 0x40000000, /* data buffer error */ + Itdbabble = 0x20000000, /* babble error */ + Itdtrerr = 0x10000000, /* transaction error */ + Itdlenshift = 16, /* transaction length */ + Itdlenmask = 0xFFF, + Itdioc = 0x00008000, /* interrupt on complete */ + Itdpgshift = 12, /* page select field */ + Itdoffshift = 0, /* transaction offset */ + /* Itd bits, buffer[] */ + Itdepshift = 8, /* endpoint address (buffer[0]) */ + Itddevshift = 0, /* device address (buffer[0]) */ + Itdin = 0x800, /* is input (buffer[1]) */ + Itdout = 0, + Itdmaxpktshift = 0, /* max packet (buffer[1]) */ + Itdntdsshift = 0, /* nb. of tds per µframe (buffer[2]) */ + + Itderrors = Itddberr|Itdbabble|Itdtrerr, + + /* Sitd bits (epc) */ + Stdin = 0x80000000, /* input direction */ + Stdportshift = 24, /* hub port number */ + Stdhubshift = 16, /* hub address */ + Stdepshift = 8, /* endpoint address */ + Stddevshift = 0, /* device address */ + /* Sitd bits (mfs) */ + Stdssmshift = 0, /* split start mask */ + Stdscmshift = 8, /* split complete mask */ + /* Sitd bits (csw) */ + Stdioc = 0x80000000, /* interrupt on complete */ + Stdpg = 0x40000000, /* page select */ + Stdlenshift = 16, /* total bytes to transfer */ + Stdlenmask = 0x3FF, + Stdactive = 0x00000080, /* active */ + Stderr = 0x00000040, /* tr. translator error */ + Stddberr = 0x00000020, /* data buffer error */ + Stdbabble = 0x00000010, /* babble error */ + Stdtrerr = 0x00000008, /* transaction error */ + Stdmmf = 0x00000004, /* missed µframe */ + Stddcs = 0x00000002, /* do complete split */ + + Stderrors = Stderr|Stddberr|Stdbabble|Stdtrerr|Stdmmf, + + /* Sitd bits buffer[1] */ + Stdtpall = 0x00000000, /* all payload here (188 bytes) */ + Stdtpbegin = 0x00000008, /* first payload for fs trans. */ + Stdtcntmask = 0x00000007, /* T-count */ + + /* Td bits (csw) */ + Tddata1 = 0x80000000, /* data toggle 1 */ + Tddata0 = 0x00000000, /* data toggle 0 */ + Tdlenshift = 16, /* total bytes to transfer */ + Tdlenmask = 0x7FFF, + Tdmaxpkt = 0x5000, /* max buffer for a Td */ + Tdioc = 0x00008000, /* interrupt on complete */ + Tdpgshift = 12, /* current page */ + Tdpgmask = 7, + Tderr1 = 0x00000400, /* bit 0 of error counter */ + Tderr2 = 0x00000800, /* bit 1 of error counter */ + Tdtokout = 0x00000000, /* direction out */ + Tdtokin = 0x00000100, /* direction in */ + Tdtoksetup = 0x00000200, /* setup packet */ + Tdtok = 0x00000300, /* token bits */ + Tdactive = 0x00000080, /* active */ + Tdhalt = 0x00000040, /* halted */ + Tddberr = 0x00000020, /* data buffer error */ + Tdbabble = 0x00000010, /* babble error */ + Tdtrerr = 0x00000008, /* transaction error */ + Tdmmf = 0x00000004, /* missed µframe */ + Tddcs = 0x00000002, /* do complete split */ + Tdping = 0x00000001, /* do ping */ + + Tderrors = Tdhalt|Tddberr|Tdbabble|Tdtrerr|Tdmmf, + + /* Qh bits (eps0) */ + Qhrlcmask = 0xF, /* nak reload count */ + Qhrlcshift = 28, /* nak reload count */ + Qhnhctl = 0x08000000, /* not-high speed ctl */ + Qhmplmask = 0x7FF, /* max packet */ + Qhmplshift = 16, + Qhhrl = 0x00008000, /* head of reclamation list */ + Qhdtc = 0x00004000, /* data toggle ctl. */ + Qhint = 0x00000080, /* inactivate on next transition */ + Qhspeedmask = 0x00003000, /* speed bits */ + Qhfull = 0x00000000, /* full speed */ + Qhlow = 0x00001000, /* low speed */ + Qhhigh = 0x00002000, /* high speed */ + + /* Qh bits (eps1) */ + Qhmultshift = 30, /* multiple tds per µframe */ + Qhmultmask = 3, + Qhportshift = 23, /* hub port number */ + Qhhubshift = 16, /* hub address */ + Qhscmshift = 8, /* split completion mask bits */ + Qhismshift = 0, /* interrupt sched. mask bits */ +}; + +/* + * Endpoint tree (software) + */ +struct Qtree +{ + int nel; + int depth; + ulong* bw; + Qh** root; +}; + +/* + * One per endpoint per direction, to control I/O. + */ +struct Qio +{ + QLock; /* for the entire I/O process */ + Rendez; /* wait for completion */ + Qh* qh; /* Td list (field const after init) */ + int usbid; /* usb address for endpoint/device */ + int toggle; /* Tddata0/Tddata1 */ + int tok; /* Tdtoksetup, Tdtokin, Tdtokout */ + ulong iotime; /* last I/O time; to hold interrupt polls */ + int debug; /* debug flag from the endpoint */ + char* err; /* error string */ + char* tag; /* debug (no room in Qh for this) */ + ulong bw; +}; + +struct Ctlio +{ + Qio; /* a single Qio for each RPC */ + uchar* data; /* read from last ctl req. */ + int ndata; /* number of bytes read */ +}; + +struct Isoio +{ + QLock; + Rendez; /* wait for space/completion/errors */ + int usbid; /* address used for device/endpoint */ + int tok; /* Tdtokin or Tdtokout */ + int state; /* Qrun -> Qdone -> Qrun... -> Qclose */ + int nframes; /* number of frames ([S]Itds) used */ + uchar* data; /* iso data buffers if not embedded */ + char* err; /* error string */ + int nerrs; /* nb of consecutive I/O errors */ + ulong maxsize; /* ntds * ep->maxpkt */ + long nleft; /* number of bytes left from last write */ + int debug; /* debug flag from the endpoint */ + int hs; /* is high speed? */ + Isoio* next; /* in list of active Isoios */ + ulong td0frno; /* first frame used in ctlr */ + union{ + Itd* tdi; /* next td processed by interrupt */ + Sitd* stdi; + }; + union{ + Itd* tdu; /* next td for user I/O in tdps */ + Sitd* stdu; + }; + union{ + Itd** itdps; /* itdps[i]: ptr to Itd for i-th frame or nil */ + Sitd** sitdps; /* sitdps[i]: ptr to Sitd for i-th frame or nil */ + ulong** tdps; /* same thing, as seen by hw */ + }; +}; + +struct Edpool +{ + Lock; + Ed* free; + int nalloc; + int ninuse; + int nfree; +}; + +/* + * We use the 64-bit version for Itd, Sitd, Td, and Qh. + * If the ehci is 64-bit capable it assumes we are using those + * structures even when the system is 32 bits. + */ + +/* + * Iso transfer descriptor. hw: 92 bytes, 108 bytes total + * aligned to 32. + */ +struct Itd +{ + u32int link; /* to next hw struct */ + u32int csw[8]; /* sts/length/pg/off. updated by hw */ + u32int buffer[7]; /* buffer pointers, addrs, maxsz */ + u32int xbuffer[7]; /* high 32 bits of buffer for 64-bits */ + + u32int _pad0; /* pad to next cache line */ + /* cache-line boundary here */ + + /* software */ + Itd* next; + uint ndata; /* number of bytes in data */ + uint mdata; /* max number of bytes in data */ + uchar* data; +}; + +/* + * Split transaction iso transfer descriptor. + * hw: 36 bytes, 52 bytes total. aligned to 32. + */ +struct Sitd +{ + u32int link; /* to next hw struct */ + u32int epc; /* static endpoint state. addrs */ + u32int mfs; /* static endpoint state. µ-frame sched. */ + u32int csw; /* transfer state. updated by hw */ + u32int buffer[2]; /* buf. ptr/offset. offset updated by hw */ + /* buf ptr/TP/Tcnt. TP/Tcnt updated by hw */ + u32int blink; /* back pointer */ + /* cache-line boundary after xbuffer[0] */ + u32int xbuffer[2]; /* high 32 bits of buffer for 64-bits */ + + /* software */ + Sitd* next; + uint ndata; /* number of bytes in data */ + uint mdata; /* max number of bytes in data */ + uchar* data; +}; + +/* + * Queue element transfer descriptor. + * hw: first 52 bytes, total 68+sbuff bytes. aligned to 32 bytes. + */ +struct Td +{ + u32int nlink; /* to next Td */ + u32int alink; /* alternate link to next Td */ + u32int csw; /* cmd/sts. updated by hw */ + u32int buffer[5]; /* buf ptrs. offset updated by hw */ + /* cache-line boundary here */ + u32int xbuffer[5]; /* high 32 bits of buffer for 64-bits */ + + /* software */ + Td* next; /* in qh or Isoio or free list */ + uint ndata; /* bytes available/used at data */ + uchar* data; /* pointer to actual data */ + uchar* buff; /* allocated data buffer or nil */ + uchar sbuff[1]; /* first byte of embedded buffer */ +}; + +/* + * Queue head. Aligned to 32 bytes. + * hw: first 68 bytes, 92 total. + */ +struct Qh +{ + u32int link; /* to next Qh in round robin */ + u32int eps0; /* static endpoint state. addrs */ + u32int eps1; /* static endpoint state. µ-frame sched. */ + + /* updated by hw */ + u32int tclink; /* current Td (No Term bit here!) */ + u32int nlink; /* to next Td */ + u32int alink; /* alternate link to next Td */ + u32int csw; /* cmd/sts. updated by hw */ + /* cache-line boundary after buffer[0] */ + u32int buffer[5]; /* buf ptrs. offset updated by hw */ + u32int xbuffer[5]; /* high 32 bits of buffer for 64-bits */ + + /* software */ + Qh* next; /* in controller list/tree of Qhs */ + int state; /* Qidle -> Qinstall -> Qrun -> Qdone | Qclose */ + Qio* io; /* for this queue */ + Td* tds; /* for this queue */ + int sched; /* slot for for intr. Qhs */ + Qh* inext; /* next in list of intr. qhs */ +}; + +/* + * We can avoid frame span traversal nodes if we don't span frames. + * Just schedule transfers that can fit on the current frame and + * wait a little bit otherwise. + */ + +/* + * Software. Ehci descriptors provided by pool. + * There are soo few because we avoid using Fstn. + */ +union Ed +{ + Ed* next; /* in free list */ + Qh qh; + Td td; + Itd itd; + Sitd sitd; + uchar align[Align]; +}; + +int ehcidebug = 0; + +static Edpool edpool; +static char Ebug[] = "not yet implemented"; +static char* qhsname[] = { "idle", "install", "run", "done", "close", "FREE" }; + +Ecapio* ehcidebugcapio; +int ehcidebugport; + +void +ehcirun(Ctlr *ctlr, int on) +{ + int i; + Eopio *opio; + + ddprint("ehci %#p %s\n", ctlr->capio, on ? "starting" : "halting"); + opio = ctlr->opio; + if(on) + opio->cmd |= Crun; + else + opio->cmd = Cstop; + coherence(); + for(i = 0; i < 100; i++) + if(on == 0 && (opio->sts & Shalted) != 0) + break; + else if(on != 0 && (opio->sts & Shalted) == 0) + break; + else + delay(1); + if(i == 100) + print("ehci %#p %s cmd timed out\n", + ctlr->capio, on ? "run" : "halt"); + ddprint("ehci %#p cmd %#lux sts %#lux\n", + ctlr->capio, opio->cmd, opio->sts); +} + +static void* +edalloc(void) +{ + Ed *ed, *pool; + int i; + + lock(&edpool); + if(edpool.free == nil){ + pool = mallocalign(Incr*sizeof(Ed), Align, 0, 0); + if(pool == nil) + panic("edalloc"); + for(i=Incr; --i>=0;){ + pool[i].next = edpool.free; + edpool.free = &pool[i]; + } + edpool.nalloc += Incr; + edpool.nfree += Incr; + dprint("ehci: edalloc: %d eds\n", edpool.nalloc); + } + ed = edpool.free; + edpool.free = ed->next; + edpool.ninuse++; + edpool.nfree--; + unlock(&edpool); + + memset(ed, 0, sizeof(Ed)); /* safety */ + assert(((uintptr)ed & 0xF) == 0); + return ed; +} + +static void +edfree(void *a) +{ + Ed *ed; + + ed = a; + lock(&edpool); + ed->next = edpool.free; + edpool.free = ed; + edpool.ninuse--; + edpool.nfree++; + unlock(&edpool); +} + +/* + * Allocate and do some initialization. + * Free after releasing buffers used. + */ + +static Itd* +itdalloc(void) +{ + Itd *td; + + td = edalloc(); + td->link = Lterm; + return td; +} + +static void +itdfree(Itd *td) +{ + edfree(td); +} + +static Sitd* +sitdalloc(void) +{ + Sitd *td; + + td = edalloc(); + td->link = td->blink = Lterm; + return td; +} + +static void +sitdfree(Sitd *td) +{ + edfree(td); +} + +static Td* +tdalloc(void) +{ + Td *td; + + td = edalloc(); + td->nlink = td->alink = Lterm; + return td; +} + +static void +tdfree(Td *td) +{ + if(td == nil) + return; + free(td->buff); + edfree(td); +} + +static void +tdlinktd(Td *td, Td *next) +{ + td->next = next; + td->alink = Lterm; + if(next == nil) + td->nlink = Lterm; + else + td->nlink = PADDR(next); + coherence(); +} + +static Qh* +qhlinkqh(Qh *qh, Qh *next) +{ + qh->next = next; + if(next == nil) + qh->link = Lterm; + else + qh->link = PADDR(next)|Lqh; + coherence(); + return qh; +} + +static void +qhsetaddr(Qh *qh, ulong addr) +{ + ulong eps0; + + eps0 = qh->eps0 & ~((Epmax<<8)|Devmax); + qh->eps0 = eps0 | addr & Devmax | ((addr >> 7) & Epmax) << 8; + coherence(); +} + +/* + * return largest power of 2 <= n + */ +static int +flog2lower(int n) +{ + int i; + + for(i = 0; (1 << (i + 1)) <= n; i++) + ; + return i; +} + +static int +pickschedq(Qtree *qt, int pollival, ulong bw, ulong limit) +{ + int i, j, d, upperb, q; + ulong best, worst, total; + + d = flog2lower(pollival); + if(d > qt->depth) + d = qt->depth; + q = -1; + worst = 0; + best = ~0; + upperb = (1 << (d+1)) - 1; + for(i = (1 << d) - 1; i < upperb; i++){ + total = qt->bw[0]; + for(j = i; j > 0; j = (j - 1) / 2) + total += qt->bw[j]; + if(total < best){ + best = total; + q = i; + } + if(total > worst) + worst = total; + } + if(worst + bw >= limit) + return -1; + return q; +} + +static int +schedq(Ctlr *ctlr, Qh *qh, int pollival) +{ + int q; + Qh *tqh; + ulong bw; + + bw = qh->io->bw; + q = pickschedq(ctlr->tree, pollival, 0, ~0); + ddqprint("ehci: sched %#p q %d, ival %d, bw %uld\n", + qh->io, q, pollival, bw); + if(q < 0){ + print("ehci: no room for ed\n"); + return -1; + } + ctlr->tree->bw[q] += bw; + tqh = ctlr->tree->root[q]; + qh->sched = q; + qhlinkqh(qh, tqh->next); + qhlinkqh(tqh, qh); + coherence(); + qh->inext = ctlr->intrqhs; + ctlr->intrqhs = qh; + coherence(); + return 0; +} + +static void +unschedq(Ctlr *ctlr, Qh *qh) +{ + int q; + Qh *prev, *this, *next; + Qh **l; + ulong bw; + + bw = qh->io->bw; + q = qh->sched; + if(q < 0) + return; + ctlr->tree->bw[q] -= bw; + + prev = ctlr->tree->root[q]; + this = prev->next; + while(this != nil && this != qh){ + prev = this; + this = this->next; + } + if(this == nil) + print("ehci: unschedq %d: not found\n", q); + else{ + next = this->next; + qhlinkqh(prev, next); + } + for(l = &ctlr->intrqhs; *l != nil; l = &(*l)->inext) + if(*l == qh){ + *l = (*l)->inext; + return; + } + print("ehci: unschedq: qh %#p not found\n", qh); +} + +static u32int +qhmaxpkt(Qh *qh) +{ + return (qh->eps0 >> Qhmplshift) & Qhmplmask; +} + +static void +qhsetmaxpkt(Qh *qh, int maxpkt) +{ + ulong eps0; + + eps0 = qh->eps0 & ~(Qhmplmask << Qhmplshift); + qh->eps0 = eps0 | (maxpkt & Qhmplmask) << Qhmplshift; + coherence(); +} + +/* + * Initialize the round-robin circular list of ctl/bulk Qhs + * if ep is nil. Otherwise, allocate and link a new Qh in the ctlr. + */ +static Qh* +qhalloc(Ctlr *ctlr, Ep *ep, Qio *io, char* tag) +{ + Qh *qh; + int ttype; + + qh = edalloc(); + qh->nlink = Lterm; + qh->alink = Lterm; + qh->csw = Tdhalt; + qh->state = Qidle; + qh->sched = -1; + qh->io = io; + if(ep != nil){ + qh->eps0 = 0; + qhsetmaxpkt(qh, ep->maxpkt); + if(ep->dev->speed == Lowspeed) + qh->eps0 |= Qhlow; + if(ep->dev->speed == Highspeed) + qh->eps0 |= Qhhigh; + else if(ep->ttype == Tctl) + qh->eps0 |= Qhnhctl; + qh->eps0 |= Qhdtc | 8 << Qhrlcshift; /* 8 naks max */ + coherence(); + qhsetaddr(qh, io->usbid); + qh->eps1 = (ep->ntds & Qhmultmask) << Qhmultshift; + qh->eps1 |= ep->dev->port << Qhportshift; + qh->eps1 |= ep->dev->hub << Qhhubshift; + qh->eps1 |= 034 << Qhscmshift; + if(ep->ttype == Tintr) + qh->eps1 |= 1 << Qhismshift; /* intr. start µf. */ + coherence(); + if(io != nil) + io->tag = tag; + } + ilock(ctlr); + ttype = Tctl; + if(ep != nil) + ttype = ep->ttype; + switch(ttype){ + case Tctl: + case Tbulk: + if(ctlr->qhs == nil){ + ctlr->qhs = qhlinkqh(qh, qh); + qh->eps0 |= Qhhigh | Qhhrl; + coherence(); + ctlr->opio->link = PADDR(qh)|Lqh; + coherence(); + }else{ + qhlinkqh(qh, ctlr->qhs->next); + qhlinkqh(ctlr->qhs, qh); + } + break; + case Tintr: + schedq(ctlr, qh, ep->pollival); + break; + default: + print("ehci: qhalloc called for ttype != ctl/bulk\n"); + } + iunlock(ctlr); + return qh; +} + +static int +qhadvanced(void *a) +{ + Ctlr *ctlr; + + ctlr = a; + return (ctlr->opio->cmd & Ciasync) == 0; +} + +/* + * called when a qh is removed, to be sure the hw is not + * keeping pointers into it. + */ +static void +qhcoherency(Ctlr *ctlr) +{ + int i; + + qlock(&ctlr->portlck); + ctlr->opio->cmd |= Ciasync; /* ask for intr. on async advance */ + coherence(); + for(i = 0; i < 3 && qhadvanced(ctlr) == 0; i++) + if(!waserror()){ + tsleep(ctlr, qhadvanced, ctlr, Abortdelay); + poperror(); + } + dprint("ehci: qhcoherency: doorbell %d\n", qhadvanced(ctlr)); + if(i == 3) + print("ehci: async advance doorbell did not ring\n"); + ctlr->opio->cmd &= ~Ciasync; /* try to clean */ + qunlock(&ctlr->portlck); +} + +static void +qhfree(Ctlr *ctlr, Qh *qh) +{ + Td *td, *ltd; + Qh *q; + + if(qh == nil) + return; + ilock(ctlr); + if(qh->sched < 0){ + for(q = ctlr->qhs; q != nil; q = q->next) + if(q->next == qh) + break; + if(q == nil) + panic("qhfree: nil q"); + q->next = qh->next; + q->link = qh->link; + coherence(); + }else + unschedq(ctlr, qh); + iunlock(ctlr); + + qhcoherency(ctlr); + + for(td = qh->tds; td != nil; td = ltd){ + ltd = td->next; + tdfree(td); + } + + edfree(qh); +} + +static void +qhlinktd(Qh *qh, Td *td) +{ + ulong csw; + int i; + + csw = qh->csw; + qh->tds = td; + if(td == nil) + qh->csw = (csw & ~Tdactive) | Tdhalt; + else{ + csw &= Tddata1 | Tdping; /* save */ + qh->csw = Tdhalt; + coherence(); + qh->tclink = 0; + qh->alink = Lterm; + qh->nlink = PADDR(td); + for(i = 0; i < nelem(qh->buffer); i++) + qh->buffer[i] = 0; + coherence(); + qh->csw = csw & ~(Tdhalt|Tdactive); /* activate next */ + } + coherence(); +} + +static char* +seprintlink(char *s, char *se, char *name, ulong l, int typed) +{ + s = seprint(s, se, "%s %ulx", name, l); + if((l & Lterm) != 0) + return seprint(s, se, "T"); + if(typed == 0) + return s; + switch(l & (3<<1)){ + case Litd: + return seprint(s, se, "I"); + case Lqh: + return seprint(s, se, "Q"); + case Lsitd: + return seprint(s, se, "S"); + default: + return seprint(s, se, "F"); + } +} + +static char* +seprintitd(char *s, char *se, Itd *td) +{ + int i; + u32int b0, b1; + char flags[6]; + char *rw; + + if(td == nil) + return seprint(s, se, "\n"); + b0 = td->buffer[0]; + b1 = td->buffer[1]; + + s = seprint(s, se, "itd %#p", td); + rw = (b1 & Itdin) ? "in" : "out"; + s = seprint(s, se, " %s ep %ud dev %ud max %ud mult %ud", + rw, (b0>>8)&Epmax, (b0&Devmax), + td->buffer[1] & 0x7ff, b1 & 3); + s = seprintlink(s, se, " link", td->link, 1); + s = seprint(s, se, "\n"); + for(i = 0; i < nelem(td->csw); i++){ + memset(flags, '-', 5); + if((td->csw[i] & Itdactive) != 0) + flags[0] = 'a'; + if((td->csw[i] & Itdioc) != 0) + flags[1] = 'i'; + if((td->csw[i] & Itddberr) != 0) + flags[2] = 'd'; + if((td->csw[i] & Itdbabble) != 0) + flags[3] = 'b'; + if((td->csw[i] & Itdtrerr) != 0) + flags[4] = 't'; + flags[5] = 0; + s = seprint(s, se, "\ttd%d %s", i, flags); + s = seprint(s, se, " len %ud", (td->csw[i] >> 16) & 0x7ff); + s = seprint(s, se, " pg %ud", (td->csw[i] >> 12) & 0x7); + s = seprint(s, se, " off %ud\n", td->csw[i] & 0xfff); + } + s = seprint(s, se, "\tbuffs:"); + for(i = 0; i < nelem(td->buffer); i++) + s = seprint(s, se, " %#ux", td->buffer[i] >> 12); + return seprint(s, se, "\n"); +} + +static char* +seprintsitd(char *s, char *se, Sitd *td) +{ + char rw, pg, ss; + char flags[8]; + static char pc[4] = { 'a', 'b', 'm', 'e' }; + + if(td == nil) + return seprint(s, se, "\n"); + s = seprint(s, se, "sitd %#p", td); + rw = (td->epc & Stdin) ? 'r' : 'w'; + s = seprint(s, se, " %c ep %ud dev %ud", + rw, (td->epc>>8)&0xf, td->epc&0x7f); + s = seprint(s, se, " max %ud", (td->csw >> 16) & 0x3ff); + s = seprint(s, se, " hub %ud", (td->epc >> 16) & 0x7f); + s = seprint(s, se, " port %ud\n", (td->epc >> 24) & 0x7f); + memset(flags, '-', 7); + if((td->csw & Stdactive) != 0) + flags[0] = 'a'; + if((td->csw & Stdioc) != 0) + flags[1] = 'i'; + if((td->csw & Stderr) != 0) + flags[2] = 'e'; + if((td->csw & Stddberr) != 0) + flags[3] = 'd'; + if((td->csw & Stdbabble) != 0) + flags[4] = 'b'; + if((td->csw & Stdtrerr) != 0) + flags[5] = 't'; + if((td->csw & Stdmmf) != 0) + flags[6] = 'n'; + flags[7] = 0; + ss = (td->csw & Stddcs) ? 'c' : 's'; + pg = (td->csw & Stdpg) ? '1' : '0'; + s = seprint(s, se, "\t%s %cs pg%c", flags, ss, pg); + s = seprint(s, se, " b0 %#ux b1 %#ux off %ud\n", + td->buffer[0] >> 12, td->buffer[1] >> 12, td->buffer[0] & 0xfff); + s = seprint(s, se, "\ttpos %c tcnt %ud", + pc[(td->buffer[0]>>3)&3], td->buffer[1] & 7); + s = seprint(s, se, " ssm %#ux csm %#ux cspm %#ux", + td->mfs & 0xff, (td->mfs>>8) & 0xff, (td->csw>>8) & 0xff); + s = seprintlink(s, se, " link", td->link, 1); + s = seprintlink(s, se, " blink", td->blink, 0); + return seprint(s, se, "\n"); +} + +static long +maxtdlen(Td *td) +{ + return (td->csw >> Tdlenshift) & Tdlenmask; +} + +static long +tdlen(Td *td) +{ + if(td->data == nil) + return 0; + return td->ndata - maxtdlen(td); +} + +static char* +seprinttd(char *s, char *se, Td *td, char *tag) +{ + int i; + char t, ss; + char flags[9]; + static char *tok[4] = { "out", "in", "setup", "BUG" }; + + if(td == nil) + return seprint(s, se, "%s \n", tag); + s = seprint(s, se, "%s %#p", tag, td); + s = seprintlink(s, se, " nlink", td->nlink, 0); + s = seprintlink(s, se, " alink", td->alink, 0); + s = seprint(s, se, " %s", tok[(td->csw & Tdtok) >> 8]); + if((td->csw & Tdping) != 0) + s = seprint(s, se, " png"); + memset(flags, '-', 8); + if((td->csw & Tdactive) != 0) + flags[0] = 'a'; + if((td->csw & Tdioc) != 0) + flags[1] = 'i'; + if((td->csw & Tdhalt) != 0) + flags[2] = 'h'; + if((td->csw & Tddberr) != 0) + flags[3] = 'd'; + if((td->csw & Tdbabble) != 0) + flags[4] = 'b'; + if((td->csw & Tdtrerr) != 0) + flags[5] = 't'; + if((td->csw & Tdmmf) != 0) + flags[6] = 'n'; + if((td->csw & (Tderr2|Tderr1)) == 0) + flags[7] = 'z'; + flags[8] = 0; + t = (td->csw & Tddata1) ? '1' : '0'; + ss = (td->csw & Tddcs) ? 'c' : 's'; + s = seprint(s, se, "\n\td%c %s %cs", t, flags, ss); + s = seprint(s, se, " max %uld", maxtdlen(td)); + s = seprint(s, se, " pg %ud off %#ux\n", + (td->csw >> Tdpgshift) & Tdpgmask, td->buffer[0] & 0xFFF); + s = seprint(s, se, "\tbuffs:"); + for(i = 0; i < nelem(td->buffer); i++) + s = seprint(s, se, " %#ux", td->buffer[i]>>12); + if(td->data != nil) + s = seprintdata(s, se, td->data, td->ndata); + return seprint(s, se, "\n"); +} + +static void +dumptd(Td *td, char *pref) +{ + char buf[256]; + char *se; + int i; + + i = 0; + se = buf+sizeof(buf); + for(; td != nil; td = td->next){ + seprinttd(buf, se, td, pref); + print("%s", buf); + if(i++ > 20){ + print("...more tds...\n"); + break; + } + } +} + +static void +qhdump(Qh *qh) +{ + char buf[256]; + char *s, *se, *tag; + Td td; + static char *speed[] = {"full", "low", "high", "BUG"}; + + if(qh == nil){ + print("\n"); + return; + } + if(qh->io == nil) + tag = "qh"; + else + tag = qh->io->tag; + se = buf+sizeof(buf); + s = seprint(buf, se, "%s %#p", tag, qh); + s = seprint(s, se, " ep %ud dev %ud", + (qh->eps0>>8)&0xf, qh->eps0&0x7f); + s = seprint(s, se, " hub %ud", (qh->eps1 >> 16) & 0x7f); + s = seprint(s, se, " port %ud", (qh->eps1 >> 23) & 0x7f); + s = seprintlink(s, se, " link", qh->link, 1); + seprint(s, se, " clink %#ux", qh->tclink); + print("%s\n", buf); + s = seprint(buf, se, "\tnrld %ud", (qh->eps0 >> Qhrlcshift) & Qhrlcmask); + s = seprint(s, se, " nak %ud", (qh->alink >> 1) & 0xf); + s = seprint(s, se, " max %ud ", qhmaxpkt(qh)); + if((qh->eps0 & Qhnhctl) != 0) + s = seprint(s, se, "c"); + if((qh->eps0 & Qhhrl) != 0) + s = seprint(s, se, "h"); + if((qh->eps0 & Qhdtc) != 0) + s = seprint(s, se, "d"); + if((qh->eps0 & Qhint) != 0) + s = seprint(s, se, "i"); + s = seprint(s, se, " %s", speed[(qh->eps0 >> 12) & 3]); + s = seprint(s, se, " mult %ud", (qh->eps1 >> Qhmultshift) & Qhmultmask); + seprint(s, se, " scm %#ux ism %#ux\n", + (qh->eps1 >> 8 & 0xff), qh->eps1 & 0xff); + print("%s\n", buf); + memset(&td, 0, sizeof(td)); + memmove(&td, &qh->nlink, 32); /* overlay area */ + seprinttd(buf, se, &td, "\tovl"); + print("%s", buf); +} + +static void +isodump(Isoio* iso, int all) +{ + Itd *td, *tdi, *tdu; + Sitd *std, *stdi, *stdu; + char buf[256]; + int i; + + if(iso == nil){ + print("\n"); + return; + } + print("iso %#p %s %s speed state %d nframes %d maxsz %uld", + iso, iso->tok == Tdtokin ? "in" : "out", + iso->hs ? "high" : "full", + iso->state, iso->nframes, iso->maxsize); + print(" td0 %uld tdi %#p tdu %#p data %#p\n", + iso->td0frno, iso->tdi, iso->tdu, iso->data); + if(iso->err != nil) + print("\terr %s\n", iso->err); + if(iso->err != nil) + print("\terr='%s'\n", iso->err); + if(all == 0) + if(iso->hs != 0){ + tdi = iso->tdi; + seprintitd(buf, buf+sizeof(buf), tdi); + print("\ttdi %s\n", buf); + tdu = iso->tdu; + seprintitd(buf, buf+sizeof(buf), tdu); + print("\ttdu %s\n", buf); + }else{ + stdi = iso->stdi; + seprintsitd(buf, buf+sizeof(buf), stdi); + print("\tstdi %s\n", buf); + stdu = iso->stdu; + seprintsitd(buf, buf+sizeof(buf), stdu); + print("\tstdu %s\n", buf); + } + else + for(i = 0; i < Nisoframes; i++) + if(iso->tdps[i] != nil) + if(iso->hs != 0){ + td = iso->itdps[i]; + seprintitd(buf, buf+sizeof(buf), td); + if(td == iso->tdi) + print("i->"); + if(td == iso->tdu) + print("i->"); + print("[%d]\t%s", i, buf); + }else{ + std = iso->sitdps[i]; + seprintsitd(buf, buf+sizeof(buf), std); + if(std == iso->stdi) + print("i->"); + if(std == iso->stdu) + print("u->"); + print("[%d]\t%s", i, buf); + } +} + +static void +dump(Hci *hp) +{ + int i; + char *s, *se; + char buf[128]; + Ctlr *ctlr; + Eopio *opio; + Isoio *iso; + Qh *qh; + + ctlr = hp->aux; + opio = ctlr->opio; + ilock(ctlr); + print("ehci port %#p frames %#p (%d fr.) nintr %d ntdintr %d", + ctlr->capio, ctlr->frames, ctlr->nframes, + ctlr->nintr, ctlr->ntdintr); + print(" nqhintr %d nisointr %d\n", ctlr->nqhintr, ctlr->nisointr); + print("\tcmd %#lux sts %#lux intr %#lux frno %uld", + opio->cmd, opio->sts, opio->intr, opio->frno); + print(" base %#lux link %#lux fr0 %#lux\n", + opio->frbase, opio->link, ctlr->frames[0]); + se = buf+sizeof(buf); + s = seprint(buf, se, "\t"); + for(i = 0; i < hp->nports; i++){ + s = seprint(s, se, "p%d %#lux ", i, opio->portsc[i]); + if(hp->nports > 4 && i == hp->nports/2 - 1) + s = seprint(s, se, "\n\t"); + } + print("%s\n", buf); + qh = ctlr->qhs; + i = 0; + do{ + qhdump(qh); + qh = qh->next; + }while(qh != ctlr->qhs && i++ < 100); + if(i > 100) + print("...too many Qhs...\n"); + if(ctlr->intrqhs != nil) + print("intr qhs:\n"); + for(qh = ctlr->intrqhs; qh != nil; qh = qh->inext) + qhdump(qh); + if(ctlr->iso != nil) + print("iso:\n"); + for(iso = ctlr->iso; iso != nil; iso = iso->next) + isodump(ctlr->iso, 0); + print("%d eds in tree\n", ctlr->ntree); + iunlock(ctlr); + lock(&edpool); + print("%d eds allocated = %d in use + %d free\n", + edpool.nalloc, edpool.ninuse, edpool.nfree); + unlock(&edpool); +} + +static char* +errmsg(int err) +{ + if(err == 0) + return "ok"; + if(err & Tddberr) + return "data buffer error"; + if(err & Tdbabble) + return "babble detected"; + if(err & Tdtrerr) + return "transaction error"; + if(err & Tdmmf) + return "missed µframe"; + if(err & Tdhalt) + return Estalled; /* [uo]hci report this error */ + return Eio; +} + +static char* +ierrmsg(int err) +{ + if(err == 0) + return "ok"; + if(err & Itddberr) + return "data buffer error"; + if(err & Itdbabble) + return "babble detected"; + if(err & Itdtrerr) + return "transaction error"; + return Eio; +} + +static char* +serrmsg(int err) +{ + if(err & Stderr) + return "translation translator error"; + /* other errors have same numbers than Td errors */ + return errmsg(err); +} + +static int +isocanread(void *a) +{ + Isoio *iso; + + iso = a; + if(iso->state == Qclose) + return 1; + if(iso->state == Qrun && iso->tok == Tdtokin){ + if(iso->hs != 0 && iso->tdi != iso->tdu) + return 1; + if(iso->hs == 0 && iso->stdi != iso->stdu) + return 1; + } + return 0; +} + +static int +isocanwrite(void *a) +{ + Isoio *iso; + + iso = a; + if(iso->state == Qclose) + return 1; + if(iso->state == Qrun && iso->tok == Tdtokout){ + if(iso->hs != 0 && iso->tdu->next != iso->tdi) + return 1; + if(iso->hs == 0 && iso->stdu->next != iso->stdi) + return 1; + } + return 0; +} + +static void +itdinit(Isoio *iso, Itd *td) +{ + int p, t; + ulong pa, tsize, size; + + /* + * BUG: This does not put an integral number of samples + * on each µframe unless samples per packet % 8 == 0 + * Also, all samples are packed early on each frame. + */ + p = 0; + size = td->ndata = td->mdata; + pa = PADDR(td->data); + for(t = 0; size > 0 && t < 8; t++){ + tsize = size; + if(tsize > iso->maxsize) + tsize = iso->maxsize; + size -= tsize; + assert(p < nelem(td->buffer)); + td->csw[t] = tsize << Itdlenshift | p << Itdpgshift | + (pa & 0xFFF) << Itdoffshift | Itdactive | Itdioc; + coherence(); + if(((pa+tsize) & ~0xFFF) != (pa & ~0xFFF)) + p++; + pa += tsize; + } +} + +static void +sitdinit(Isoio *iso, Sitd *td) +{ + td->ndata = td->mdata & Stdlenmask; + td->buffer[0] = PADDR(td->data); + td->buffer[1] = (td->buffer[0] & ~0xFFF) + 0x1000; + if(iso->tok == Tdtokin || td->ndata <= 188) + td->buffer[1] |= Stdtpall; + else + td->buffer[1] |= Stdtpbegin; + if(iso->tok == Tdtokin) + td->buffer[1] |= 1; + else + td->buffer[1] |= ((td->ndata + 187) / 188) & Stdtcntmask; + coherence(); + td->csw = td->ndata << Stdlenshift | Stdactive | Stdioc; + coherence(); +} + +static int +itdactive(Itd *td) +{ + int i; + + for(i = 0; i < nelem(td->csw); i++) + if((td->csw[i] & Itdactive) != 0) + return 1; + return 0; +} + +static int +isohsinterrupt(Ctlr *ctlr, Isoio *iso) +{ + int err, i, nframes, t; + Itd *tdi; + + tdi = iso->tdi; + assert(tdi != nil); + if(itdactive(tdi)) /* not all tds are done */ + return 0; + ctlr->nisointr++; + ddiprint("isohsintr: iso %#p: tdi %#p tdu %#p\n", iso, tdi, iso->tdu); + if(iso->state != Qrun && iso->state != Qdone) + panic("isofsintr: iso state"); + if(ehcidebug > 1 || iso->debug > 1) + isodump(iso, 0); + + nframes = iso->nframes / 2; /* limit how many we look */ + if(nframes > Nisoframes) + nframes = Nisoframes; + + if(iso->tok == Tdtokin) + tdi->ndata = 0; + /* else, it has the number of bytes transferred */ + + for(i = 0; i < nframes && itdactive(tdi) == 0; i++){ + if(iso->tok == Tdtokin) + tdi->ndata += (tdi->csw[i] >> Itdlenshift) & Itdlenmask; + err = 0; + coherence(); + for(t = 0; t < nelem(tdi->csw); t++){ + tdi->csw[t] &= ~Itdioc; + coherence(); + err |= tdi->csw[t] & Itderrors; + } + if(err == 0) + iso->nerrs = 0; + else if(iso->nerrs++ > iso->nframes/2){ + if(iso->err == nil){ + iso->err = ierrmsg(err); + diprint("isohsintr: tdi %#p error %#ux %s\n", + tdi, err, iso->err); + diprint("ctlr load %uld\n", ctlr->load); + } + tdi->ndata = 0; + }else + tdi->ndata = 0; + if(tdi->next == iso->tdu || tdi->next->next == iso->tdu){ + memset(iso->tdu->data, 0, iso->tdu->mdata); + itdinit(iso, iso->tdu); + iso->tdu = iso->tdu->next; + iso->nleft = 0; + } + tdi = tdi->next; + coherence(); + } + ddiprint("isohsintr: %d frames processed\n", nframes); + if(i == nframes){ + tdi->csw[0] |= Itdioc; + coherence(); + } + iso->tdi = tdi; + coherence(); + if(isocanwrite(iso) || isocanread(iso)){ + diprint("wakeup iso %#p tdi %#p tdu %#p\n", iso, + iso->tdi, iso->tdu); + wakeup(iso); + } + return 1; +} + +static int +isofsinterrupt(Ctlr *ctlr, Isoio *iso) +{ + int err, i, nframes; + Sitd *stdi; + + stdi = iso->stdi; + assert(stdi != nil); + if((stdi->csw & Stdactive) != 0) /* nothing new done */ + return 0; + ctlr->nisointr++; + ddiprint("isofsintr: iso %#p: tdi %#p tdu %#p\n", iso, stdi, iso->stdu); + if(iso->state != Qrun && iso->state != Qdone) + panic("isofsintr: iso state"); + if(ehcidebug > 1 || iso->debug > 1) + isodump(iso, 0); + + nframes = iso->nframes / 2; /* limit how many we look */ + if(nframes > Nisoframes) + nframes = Nisoframes; + + for(i = 0; i < nframes && (stdi->csw & Stdactive) == 0; i++){ + stdi->csw &= ~Stdioc; + /* write back csw and see if it produces errors */ + coherence(); + err = stdi->csw & Stderrors; + if(err == 0){ + iso->nerrs = 0; + if(iso->tok == Tdtokin) + stdi->ndata = (stdi->csw>>Stdlenshift)&Stdlenmask; + /* else len is assumed correct */ + }else if(iso->nerrs++ > iso->nframes/2){ + if(iso->err == nil){ + iso->err = serrmsg(err); + diprint("isofsintr: tdi %#p error %#ux %s\n", + stdi, err, iso->err); + diprint("ctlr load %uld\n", ctlr->load); + } + stdi->ndata = 0; + }else + stdi->ndata = 0; + + if(stdi->next == iso->stdu || stdi->next->next == iso->stdu){ + memset(iso->stdu->data, 0, iso->stdu->mdata); + coherence(); + sitdinit(iso, iso->stdu); + iso->stdu = iso->stdu->next; + iso->nleft = 0; + } + coherence(); + stdi = stdi->next; + } + ddiprint("isofsintr: %d frames processed\n", nframes); + if(i == nframes){ + stdi->csw |= Stdioc; + coherence(); + } + iso->stdi = stdi; + coherence(); + if(isocanwrite(iso) || isocanread(iso)){ + diprint("wakeup iso %#p tdi %#p tdu %#p\n", iso, + iso->stdi, iso->stdu); + wakeup(iso); + } + return 1; +} + +static int +qhinterrupt(Ctlr *ctlr, Qh *qh) +{ + Td *td; + int err; + + if(qh->state != Qrun) + panic("qhinterrupt: qh state"); + td = qh->tds; + if(td == nil) + panic("qhinterrupt: no tds"); + if((td->csw & Tdactive) == 0) + ddqprint("qhinterrupt port %#p qh %#p\n", ctlr->capio, qh); + for(; td != nil; td = td->next){ + if(td->csw & Tdactive) + return 0; + err = td->csw & Tderrors; + if(err != 0){ + if(qh->io->err == nil){ + qh->io->err = errmsg(err); + dqprint("qhintr: td %#p csw %#ux error %#ux %s\n", + td, td->csw, err, qh->io->err); + } + break; + } + td->ndata = tdlen(td); + coherence(); + if(td->ndata < maxtdlen(td)){ /* EOT */ + td = td->next; + break; + } + } + /* + * Done. Make void the Tds not used (errors or EOT) and wakeup epio. + */ + for(; td != nil; td = td->next) + td->ndata = 0; + coherence(); + qh->state = Qdone; + coherence(); + wakeup(qh->io); + return 1; +} + +static int +ehciintr(Hci *hp) +{ + Ctlr *ctlr; + Eopio *opio; + Isoio *iso; + ulong sts; + Qh *qh; + int i, some; + + ctlr = hp->aux; + opio = ctlr->opio; + + /* + * Will we know in USB 3.0 who the interrupt was for?. + * Do they still teach indexing in CS? + * This is Intel's doing. + */ + ilock(ctlr); + ctlr->nintr++; + sts = opio->sts & Sintrs; + if(sts == 0){ /* not ours; shared intr. */ + iunlock(ctlr); + return 0; + } + opio->sts = sts; + coherence(); + if((sts & Sherr) != 0) + print("ehci: port %#p fatal host system error\n", ctlr->capio); + if((sts & Shalted) != 0) + print("ehci: port %#p: halted\n", ctlr->capio); + if((sts & Sasync) != 0){ + dprint("ehci: doorbell\n"); + wakeup(ctlr); + } + /* + * We enter always this if, even if it seems the + * interrupt does not report anything done/failed. + * Some controllers don't post interrupts right. + */ + some = 0; + if((sts & (Serrintr|Sintr)) != 0){ + ctlr->ntdintr++; + if(ehcidebug > 1){ + print("ehci port %#p frames %#p nintr %d ntdintr %d", + ctlr->capio, ctlr->frames, + ctlr->nintr, ctlr->ntdintr); + print(" nqhintr %d nisointr %d\n", + ctlr->nqhintr, ctlr->nisointr); + print("\tcmd %#lux sts %#lux intr %#lux frno %uld", + opio->cmd, opio->sts, opio->intr, opio->frno); + } + + /* process the Iso transfers */ + for(iso = ctlr->iso; iso != nil; iso = iso->next) + if(iso->state == Qrun || iso->state == Qdone) + if(iso->hs != 0) + some += isohsinterrupt(ctlr, iso); + else + some += isofsinterrupt(ctlr, iso); + + /* process the qhs in the periodic tree */ + for(qh = ctlr->intrqhs; qh != nil; qh = qh->inext) + if(qh->state == Qrun) + some += qhinterrupt(ctlr, qh); + + /* process the async Qh circular list */ + qh = ctlr->qhs; + i = 0; + do{ + if (qh == nil) + panic("ehciintr: nil qh"); + if(qh->state == Qrun) + some += qhinterrupt(ctlr, qh); + qh = qh->next; + }while(qh != ctlr->qhs && i++ < 100); + if(i > 100) + print("echi: interrupt: qh loop?\n"); + } +// if (some == 0) +// panic("ehciintr: no work"); + iunlock(ctlr); + return some; +} + +static void +interrupt(Ureg*, void* a) +{ + ehciintr(a); +} + +static int +portenable(Hci *hp, int port, int on) +{ + Ctlr *ctlr; + Eopio *opio; + int s; + + ctlr = hp->aux; + opio = ctlr->opio; + s = opio->portsc[port-1]; + qlock(&ctlr->portlck); + if(waserror()){ + qunlock(&ctlr->portlck); + nexterror(); + } + dprint("ehci %#p port %d enable=%d; sts %#x\n", + ctlr->capio, port, on, s); + ilock(ctlr); + if(s & (Psstatuschg | Pschange)) + opio->portsc[port-1] = s; + if(on) + opio->portsc[port-1] |= Psenable; + else + opio->portsc[port-1] &= ~Psenable; + coherence(); + microdelay(64); + iunlock(ctlr); + tsleep(&up->sleep, return0, 0, Enabledelay); + dprint("ehci %#p port %d enable=%d: sts %#lux\n", + ctlr->capio, port, on, opio->portsc[port-1]); + qunlock(&ctlr->portlck); + poperror(); + return 0; +} + +/* + * If we detect during status that the port is low-speed or + * during reset that it's full-speed, the device is not for + * ourselves. The companion controller will take care. + * Low-speed devices will not be seen by usbd. Full-speed + * ones are seen because it's only after reset that we know what + * they are (usbd may notice a device not enabled in this case). + */ +static void +portlend(Ctlr *ctlr, int port, char *ss) +{ + Eopio *opio; + ulong s; + + opio = ctlr->opio; + + dprint("ehci %#p port %d: %s speed device: no longer owned\n", + ctlr->capio, port, ss); + s = opio->portsc[port-1] & ~(Pschange|Psstatuschg); + opio->portsc[port-1] = s | Psowner; + coherence(); +} + +static int +portreset(Hci *hp, int port, int on) +{ + ulong *portscp; + Eopio *opio; + Ctlr *ctlr; + int i; + + if(on == 0) + return 0; + + ctlr = hp->aux; + opio = ctlr->opio; + qlock(&ctlr->portlck); + if(waserror()){ + iunlock(ctlr); + qunlock(&ctlr->portlck); + nexterror(); + } + portscp = &opio->portsc[port-1]; + dprint("ehci %#p port %d reset; sts %#lux\n", ctlr->capio, port, *portscp); + ilock(ctlr); + /* Shalted must be zero, else Psreset will stay set */ + if (opio->sts & Shalted) + iprint("ehci %#p: halted yet trying to reset port\n", + ctlr->capio); + *portscp = (*portscp & ~Psenable) | Psreset; /* initiate reset */ + coherence(); + + /* + * usb 2 spec: reset must finish within 20 ms. + * linux says spec says it can take 50 ms. for hubs. + */ + for(i = 0; *portscp & Psreset && i < 50; i++) + delay(10); + if (*portscp & Psreset) + iprint("ehci %#p: port %d didn't reset within %d ms; sts %#lux\n", + ctlr->capio, port, i * 10, *portscp); + *portscp &= ~Psreset; /* force appearance of reset done */ + coherence(); + delay(10); /* ehci spec: enable within 2 ms. */ + + if((*portscp & Psenable) == 0) + portlend(ctlr, port, "full"); + + iunlock(ctlr); + dprint("ehci %#p after port %d reset; sts %#lux\n", + ctlr->capio, port, *portscp); + qunlock(&ctlr->portlck); + poperror(); + return 0; +} + +static int +portstatus(Hci *hp, int port) +{ + int s, r; + Eopio *opio; + Ctlr *ctlr; + + ctlr = hp->aux; + opio = ctlr->opio; + qlock(&ctlr->portlck); + if(waserror()){ + iunlock(ctlr); + qunlock(&ctlr->portlck); + nexterror(); + } + ilock(ctlr); + s = opio->portsc[port-1]; + if(s & (Psstatuschg | Pschange)){ + opio->portsc[port-1] = s; + coherence(); + ddprint("ehci %#p port %d status %#x\n", ctlr->capio, port, s); + } + /* + * If the port is a low speed port we yield ownership now + * to the [uo]hci companion controller and pretend it's not here. + */ + if((s & Pspresent) != 0 && (s & Pslinemask) == Pslow){ + portlend(ctlr, port, "low"); + s &= ~Pspresent; /* not for us this time */ + } + iunlock(ctlr); + qunlock(&ctlr->portlck); + poperror(); + + /* + * We must return status bits as a + * get port status hub request would do. + */ + r = 0; + if(s & Pspresent) + r |= HPpresent|HPhigh; + if(s & Psenable) + r |= HPenable; + if(s & Pssuspend) + r |= HPsuspend; + if(s & Psreset) + r |= HPreset; + if(s & Psstatuschg) + r |= HPstatuschg; + if(s & Pschange) + r |= HPchange; + return r; +} + +static char* +seprintio(char *s, char *e, Qio *io, char *pref) +{ + s = seprint(s,e,"%s io %#p qh %#p id %#x", pref, io, io->qh, io->usbid); + s = seprint(s,e," iot %ld", io->iotime); + s = seprint(s,e," tog %#x tok %#x err %s", io->toggle, io->tok, io->err); + return s; +} + +static char* +seprintep(char *s, char *e, Ep *ep) +{ + Qio *io; + Ctlio *cio; + Ctlr *ctlr; + + ctlr = ep->hp->aux; + ilock(ctlr); + if(ep->aux == nil){ + *s = 0; + iunlock(ctlr); + return s; + } + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + s = seprintio(s, e, cio, "c"); + s = seprint(s, e, "\trepl %d ndata %d\n", ep->rhrepl, cio->ndata); + break; + case Tbulk: + case Tintr: + io = ep->aux; + if(ep->mode != OWRITE) + s = seprintio(s, e, &io[OREAD], "r"); + if(ep->mode != OREAD) + s = seprintio(s, e, &io[OWRITE], "w"); + break; + case Tiso: + *s = 0; + break; + } + iunlock(ctlr); + return s; +} + +/* + * halt condition was cleared on the endpoint. update our toggles. + */ +static void +clrhalt(Ep *ep) +{ + Qio *io; + + ep->clrhalt = 0; + coherence(); + switch(ep->ttype){ + case Tintr: + case Tbulk: + io = ep->aux; + if(ep->mode != OREAD){ + qlock(&io[OWRITE]); + io[OWRITE].toggle = Tddata0; + deprint("ep clrhalt for io %#p\n", io+OWRITE); + qunlock(&io[OWRITE]); + } + if(ep->mode != OWRITE){ + qlock(&io[OREAD]); + io[OREAD].toggle = Tddata0; + deprint("ep clrhalt for io %#p\n", io+OREAD); + qunlock(&io[OREAD]); + } + break; + } +} + +static void +xdump(char* pref, void *qh) +{ + int i; + ulong *u; + + u = qh; + print("%s %#p:", pref, u); + for(i = 0; i < 16; i++) + if((i%4) == 0) + print("\n %#8.8ulx", u[i]); + else + print(" %#8.8ulx", u[i]); + print("\n"); +} + +static long +episohscpy(Ctlr *ctlr, Ep *ep, Isoio* iso, uchar *b, long count) +{ + int nr; + long tot; + Itd *tdu; + + for(tot = 0; iso->tdi != iso->tdu && tot < count; tot += nr){ + tdu = iso->tdu; + if(itdactive(tdu)) + break; + nr = tdu->ndata; + if(tot + nr > count) + nr = count - tot; + if(nr == 0) + print("ehci: ep%d.%d: too many polls\n", + ep->dev->nb, ep->nb); + else{ + iunlock(ctlr); /* We could page fault here */ + memmove(b+tot, tdu->data, nr); + ilock(ctlr); + if(nr < tdu->ndata) + memmove(tdu->data, tdu->data+nr, tdu->ndata - nr); + tdu->ndata -= nr; + coherence(); + } + if(tdu->ndata == 0){ + itdinit(iso, tdu); + iso->tdu = tdu->next; + } + } + return tot; +} + +static long +episofscpy(Ctlr *ctlr, Ep *ep, Isoio* iso, uchar *b, long count) +{ + int nr; + long tot; + Sitd *stdu; + + for(tot = 0; iso->stdi != iso->stdu && tot < count; tot += nr){ + stdu = iso->stdu; + if(stdu->csw & Stdactive){ + diprint("ehci: episoread: %#p tdu active\n", iso); + break; + } + nr = stdu->ndata; + if(tot + nr > count) + nr = count - tot; + if(nr == 0) + print("ehci: ep%d.%d: too many polls\n", + ep->dev->nb, ep->nb); + else{ + iunlock(ctlr); /* We could page fault here */ + memmove(b+tot, stdu->data, nr); + ilock(ctlr); + if(nr < stdu->ndata) + memmove(stdu->data, stdu->data+nr, + stdu->ndata - nr); + stdu->ndata -= nr; + coherence(); + } + if(stdu->ndata == 0){ + sitdinit(iso, stdu); + iso->stdu = stdu->next; + } + } + return tot; +} + +static long +episoread(Ep *ep, Isoio *iso, void *a, long count) +{ + Ctlr *ctlr; + uchar *b; + long tot; + + iso->debug = ep->debug; + diprint("ehci: episoread: %#p ep%d.%d\n", iso, ep->dev->nb, ep->nb); + + b = a; + ctlr = ep->hp->aux; + qlock(iso); + if(waserror()){ + qunlock(iso); + nexterror(); + } + iso->err = nil; + iso->nerrs = 0; + ilock(ctlr); + if(iso->state == Qclose){ + iunlock(ctlr); + error(iso->err ? iso->err : Eio); + } + iso->state = Qrun; + coherence(); + while(isocanread(iso) == 0){ + iunlock(ctlr); + diprint("ehci: episoread: %#p sleep\n", iso); + if(waserror()){ + if(iso->err == nil) + iso->err = "I/O timed out"; + ilock(ctlr); + break; + } + tsleep(iso, isocanread, iso, ep->tmout); + poperror(); + ilock(ctlr); + } + if(iso->state == Qclose){ + iunlock(ctlr); + error(iso->err ? iso->err : Eio); + } + iso->state = Qdone; + coherence(); + assert(iso->tdu != iso->tdi); + + if(iso->hs != 0) + tot = episohscpy(ctlr, ep, iso, b, count); + else + tot = episofscpy(ctlr, ep, iso, b, count); + iunlock(ctlr); + qunlock(iso); + poperror(); + diprint("uhci: episoread: %#p %uld bytes err '%s'\n", iso, tot, iso->err); + if(iso->err != nil) + error(iso->err); + return tot; +} + +/* + * iso->tdu is the next place to put data. When it gets full + * it is activated and tdu advanced. + */ +static long +putsamples(Isoio *iso, uchar *b, long count) +{ + long tot, n; + + for(tot = 0; isocanwrite(iso) && tot < count; tot += n){ + n = count-tot; + if(iso->hs != 0){ + if(n > iso->tdu->mdata - iso->nleft) + n = iso->tdu->mdata - iso->nleft; + memmove(iso->tdu->data + iso->nleft, b + tot, n); + coherence(); + iso->nleft += n; + if(iso->nleft == iso->tdu->mdata){ + itdinit(iso, iso->tdu); + iso->nleft = 0; + iso->tdu = iso->tdu->next; + } + }else{ + if(n > iso->stdu->mdata - iso->nleft) + n = iso->stdu->mdata - iso->nleft; + memmove(iso->stdu->data + iso->nleft, b + tot, n); + coherence(); + iso->nleft += n; + if(iso->nleft == iso->stdu->mdata){ + sitdinit(iso, iso->stdu); + iso->nleft = 0; + iso->stdu = iso->stdu->next; + } + } + } + return tot; +} + +/* + * Queue data for writing and return error status from + * last writes done, to maintain buffered data. + */ +static long +episowrite(Ep *ep, Isoio *iso, void *a, long count) +{ + Ctlr *ctlr; + uchar *b; + int tot, nw; + char *err; + + iso->debug = ep->debug; + diprint("ehci: episowrite: %#p ep%d.%d\n", iso, ep->dev->nb, ep->nb); + + ctlr = ep->hp->aux; + qlock(iso); + if(waserror()){ + qunlock(iso); + nexterror(); + } + ilock(ctlr); + if(iso->state == Qclose){ + iunlock(ctlr); + error(iso->err ? iso->err : Eio); + } + iso->state = Qrun; + coherence(); + b = a; + for(tot = 0; tot < count; tot += nw){ + while(isocanwrite(iso) == 0){ + iunlock(ctlr); + diprint("ehci: episowrite: %#p sleep\n", iso); + if(waserror()){ + if(iso->err == nil) + iso->err = "I/O timed out"; + ilock(ctlr); + break; + } + tsleep(iso, isocanwrite, iso, ep->tmout); + poperror(); + ilock(ctlr); + } + err = iso->err; + iso->err = nil; + if(iso->state == Qclose || err != nil){ + iunlock(ctlr); + error(err ? err : Eio); + } + if(iso->state != Qrun) + panic("episowrite: iso not running"); + iunlock(ctlr); /* We could page fault here */ + nw = putsamples(iso, b+tot, count-tot); + ilock(ctlr); + } + if(iso->state != Qclose) + iso->state = Qdone; + iunlock(ctlr); + err = iso->err; /* in case it failed early */ + iso->err = nil; + qunlock(iso); + poperror(); + if(err != nil) + error(err); + diprint("ehci: episowrite: %#p %d bytes\n", iso, tot); + return tot; +} + +static int +nexttoggle(int toggle, int count, int maxpkt) +{ + int np; + + np = count / maxpkt; + if(np == 0) + np = 1; + if((np % 2) == 0) + return toggle; + if(toggle == Tddata1) + return Tddata0; + else + return Tddata1; +} + +static Td* +epgettd(Qio *io, int flags, void *a, int count, int maxpkt) +{ + Td *td; + ulong pa; + int i; + + if(count > Tdmaxpkt) + panic("ehci: epgettd: too many bytes"); + td = tdalloc(); + td->csw = flags | io->toggle | io->tok | count << Tdlenshift | + Tderr2 | Tderr1; + + /* + * use the space wasted by alignment as an + * embedded buffer if count bytes fit in there. + */ + assert(Align > sizeof(Td)); + if(count <= Align - sizeof(Td)){ + td->data = td->sbuff; + td->buff = nil; + }else + td->data = td->buff = smalloc(Tdmaxpkt); + + pa = PADDR(td->data); + for(i = 0; i < nelem(td->buffer); i++){ + td->buffer[i] = pa; + if(i > 0) + td->buffer[i] &= ~0xFFF; + pa += 0x1000; + } + td->ndata = count; + if(a != nil && count > 0) + memmove(td->data, a, count); + coherence(); + io->toggle = nexttoggle(io->toggle, count, maxpkt); + coherence(); + return td; +} + +/* + * Try to get them idle + */ +static void +aborttds(Qh *qh) +{ + Td *td; + + qh->state = Qdone; + coherence(); + if(qh->sched >= 0 && (qh->eps0 & Qhspeedmask) != Qhhigh) + qh->eps0 |= Qhint; /* inactivate on next pass */ + coherence(); + for(td = qh->tds; td != nil; td = td->next){ + if(td->csw & Tdactive) + td->ndata = 0; + td->csw |= Tdhalt; + coherence(); + } +} + +/* + * Some controllers do not post the usb/error interrupt after + * the work has been done. It seems that we must poll for them. + */ +static int +workpending(void *a) +{ + Ctlr *ctlr; + + ctlr = a; + return ctlr->nreqs > 0; +} + +static void +ehcipoll(void* a) +{ + Hci *hp; + Ctlr *ctlr; + Poll *poll; + int i; + + hp = a; + ctlr = hp->aux; + poll = &ctlr->poll; + for(;;){ + if(ctlr->nreqs == 0){ + if(0)ddprint("ehcipoll %#p sleep\n", ctlr->capio); + sleep(poll, workpending, ctlr); + if(0)ddprint("ehcipoll %#p awaken\n", ctlr->capio); + } + for(i = 0; i < 16 && ctlr->nreqs > 0; i++) + if(ehciintr(hp) == 0) + break; + do{ + tsleep(&up->sleep, return0, 0, 1); + ehciintr(hp); + }while(ctlr->nreqs > 0); + } +} + +static void +pollcheck(Hci *hp) +{ + Ctlr *ctlr; + Poll *poll; + + ctlr = hp->aux; + poll = &ctlr->poll; + + if(poll->must != 0 && poll->does == 0){ + lock(poll); + if(poll->must != 0 && poll->does == 0){ + poll->does++; + print("ehci %#p: polling\n", ctlr->capio); + kproc("ehcipoll", ehcipoll, hp); + } + unlock(poll); + } +} + +static int +epiodone(void *a) +{ + Qh *qh; + + qh = a; + return qh->state != Qrun; +} + +static void +epiowait(Hci *hp, Qio *io, int tmout, ulong load) +{ + Qh *qh; + int timedout; + Ctlr *ctlr; + + ctlr = hp->aux; + qh = io->qh; + ddqprint("ehci %#p: io %#p sleep on qh %#p state %s\n", + ctlr->capio, io, qh, qhsname[qh->state]); + timedout = 0; + if(waserror()){ + dqprint("ehci %#p: io %#p qh %#p timed out\n", + ctlr->capio, io, qh); + timedout++; + }else{ + if(tmout == 0) + sleep(io, epiodone, qh); + else + tsleep(io, epiodone, qh, tmout); + poperror(); + } + + ilock(ctlr); + /* Are we missing interrupts? */ + if(qh->state == Qrun){ + iunlock(ctlr); + ehciintr(hp); + ilock(ctlr); + if(qh->state == Qdone){ + dqprint("ehci %#p: polling required\n", ctlr->capio); + ctlr->poll.must = 1; + pollcheck(hp); + } + } + + if(qh->state == Qrun){ +// dqprint("ehci %#p: io %#p qh %#p timed out (no intr?)\n", + iprint("ehci %#p: io %#p qh %#p timed out (no intr?)\n", + ctlr->capio, io, qh); + timedout = 1; + }else if(qh->state != Qdone && qh->state != Qclose) + panic("ehci: epio: queue state %d", qh->state); + if(timedout){ + aborttds(io->qh); + io->err = "request timed out"; + iunlock(ctlr); + if(!waserror()){ + tsleep(&up->sleep, return0, 0, Abortdelay); + poperror(); + } + ilock(ctlr); + } + if(qh->state != Qclose) + qh->state = Qidle; + coherence(); + qhlinktd(qh, nil); + ctlr->load -= load; + ctlr->nreqs--; + iunlock(ctlr); +} + +/* + * Non iso I/O. + * To make it work for control transfers, the caller may + * lock the Qio for the entire control transfer. + */ +static long +epio(Ep *ep, Qio *io, void *a, long count, int mustlock) +{ + int saved, ntds, tmout; + long n, tot; + ulong load; + char *err; + char buf[128]; + uchar *c; + Ctlr *ctlr; + Qh* qh; + Td *td, *ltd, *td0, *ntd; + + qh = io->qh; + ctlr = ep->hp->aux; + io->debug = ep->debug; + tmout = ep->tmout; + ddeprint("epio: %s ep%d.%d io %#p count %ld load %uld\n", + io->tok == Tdtokin ? "in" : "out", + ep->dev->nb, ep->nb, io, count, ctlr->load); + if((ehcidebug > 1 || ep->debug > 1) && io->tok != Tdtokin){ + seprintdata(buf, buf+sizeof(buf), a, count); + print("echi epio: user data: %s\n", buf); + } + if(mustlock){ + qlock(io); + if(waserror()){ + qunlock(io); + nexterror(); + } + } + io->err = nil; + ilock(ctlr); + if(qh->state == Qclose){ /* Tds released by cancelio */ + iunlock(ctlr); + error(io->err ? io->err : Eio); + } + if(qh->state != Qidle) + panic("epio: qh not idle"); + qh->state = Qinstall; + iunlock(ctlr); + + c = a; + td0 = ltd = nil; + load = tot = 0; + do{ + n = (Tdmaxpkt / ep->maxpkt) * ep->maxpkt; + if(count-tot < n) + n = count-tot; + if(c != nil && io->tok != Tdtokin) + td = epgettd(io, Tdactive, c+tot, n, ep->maxpkt); + else + td = epgettd(io, Tdactive, nil, n, ep->maxpkt); + if(td0 == nil) + td0 = td; + else + tdlinktd(ltd, td); + ltd = td; + tot += n; + load += ep->load; + }while(tot < count); + if(td0 == nil || ltd == nil) + panic("epio: no td"); + + ltd->csw |= Tdioc; /* the last one interrupts */ + coherence(); + + ddeprint("ehci: load %uld ctlr load %uld\n", load, ctlr->load); + if(ehcidebug > 1 || ep->debug > 1) + dumptd(td0, "epio: put: "); + + ilock(ctlr); + if(qh->state != Qclose){ + io->iotime = TK2MS(sys->ticks); + qh->state = Qrun; + coherence(); + qhlinktd(qh, td0); + ctlr->nreqs++; + ctlr->load += load; + } + iunlock(ctlr); + + if(ctlr->poll.does) + wakeup(&ctlr->poll); + + epiowait(ep->hp, io, tmout, load); + if(ehcidebug > 1 || ep->debug > 1){ + dumptd(td0, "epio: got: "); + qhdump(qh); + } + + tot = 0; + c = a; + saved = 0; + ntds = 0; + for(td = td0; td != nil; td = ntd){ + ntds++; + /* + * Use td tok, not io tok, because of setup packets. + * Also, we must save the next toggle value from the + * last completed Td (in case of a short packet, or + * fewer than the requested number of packets in the + * Td being transferred). + */ + if(td->csw & (Tdhalt|Tdactive)) + saved++; + else{ + if(!saved){ + io->toggle = td->csw & Tddata1; + coherence(); + } + tot += td->ndata; + if(c != nil && (td->csw & Tdtok) == Tdtokin && td->ndata > 0){ + memmove(c, td->data, td->ndata); + c += td->ndata; + } + } + ntd = td->next; + tdfree(td); + } + err = io->err; + if(mustlock){ + qunlock(io); + poperror(); + } + ddeprint("epio: io %#p: %d tds: return %ld err '%s'\n", + io, ntds, tot, err); + if(err == Estalled) + return 0; /* that's our convention */ + if(err != nil) + error(err); + if(tot < 0) + error(Eio); + return tot; +} + +static long +epread(Ep *ep, void *a, long count) +{ + Ctlio *cio; + Qio *io; + Isoio *iso; + char buf[160]; + ulong delta; + + ddeprint("ehci: epread\n"); + if(ep->aux == nil) + panic("epread: not open"); + + pollcheck(ep->hp); + + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + qlock(cio); + if(waserror()){ + qunlock(cio); + nexterror(); + } + ddeprint("epread ctl ndata %d\n", cio->ndata); + if(cio->ndata < 0) + error("request expected"); + else if(cio->ndata == 0){ + cio->ndata = -1; + count = 0; + }else{ + if(count > cio->ndata) + count = cio->ndata; + if(count > 0) + memmove(a, cio->data, count); + /* BUG for big transfers */ + free(cio->data); + cio->data = nil; + cio->ndata = 0; /* signal EOF next time */ + } + qunlock(cio); + poperror(); + if(ehcidebug>1 || ep->debug){ + seprintdata(buf, buf+sizeof(buf), a, count); + print("epread: %s\n", buf); + } + return count; + case Tbulk: + io = ep->aux; + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OREAD], a, count, 1); + case Tintr: + io = ep->aux; + delta = TK2MS(sys->ticks) - io[OREAD].iotime + 1; + if(delta < ep->pollival / 2) + tsleep(&up->sleep, return0, 0, ep->pollival/2 - delta); + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OREAD], a, count, 1); + case Tiso: + iso = ep->aux; + return episoread(ep, iso, a, count); + } + return -1; +} + +/* + * Control transfers are one setup write (data0) + * plus zero or more reads/writes (data1, data0, ...) + * plus a final write/read with data1 to ack. + * For both host to device and device to host we perform + * the entire transfer when the user writes the request, + * and keep any data read from the device for a later read. + * We call epio three times instead of placing all Tds at + * the same time because doing so leads to crc/tmout errors + * for some devices. + * Upon errors on the data phase we must still run the status + * phase or the device may cease responding in the future. + */ +static long +epctlio(Ep *ep, Ctlio *cio, void *a, long count) +{ + uchar *c; + long len; + + ddeprint("epctlio: cio %#p ep%d.%d count %ld\n", + cio, ep->dev->nb, ep->nb, count); + if(count < Rsetuplen) + error("short usb comand"); + qlock(cio); + free(cio->data); + cio->data = nil; + cio->ndata = 0; + if(waserror()){ + free(cio->data); + cio->data = nil; + cio->ndata = 0; + qunlock(cio); + nexterror(); + } + + /* set the address if unset and out of configuration state */ + if(ep->dev->state != Dconfig && ep->dev->state != Dreset) + if(cio->usbid == 0){ + cio->usbid = (ep->nb&Epmax) << 7 | ep->dev->nb&Devmax; + coherence(); + qhsetaddr(cio->qh, cio->usbid); + } + /* adjust maxpkt if the user has learned a different one */ + if(qhmaxpkt(cio->qh) != ep->maxpkt) + qhsetmaxpkt(cio->qh, ep->maxpkt); + c = a; + cio->tok = Tdtoksetup; + cio->toggle = Tddata0; + coherence(); + if(epio(ep, cio, a, Rsetuplen, 0) < Rsetuplen) + error(Eio); + a = c + Rsetuplen; + count -= Rsetuplen; + + cio->toggle = Tddata1; + if(c[Rtype] & Rd2h){ + cio->tok = Tdtokin; + len = GET2(c+Rcount); + if(len <= 0) + error("bad length in d2h request"); + if(len > Maxctllen) + error("d2h data too large to fit in ehci"); + a = cio->data = smalloc(len+1); + }else{ + cio->tok = Tdtokout; + len = count; + } + coherence(); + if(len > 0) + if(waserror()) + len = -1; + else{ + len = epio(ep, cio, a, len, 0); + poperror(); + } + if(c[Rtype] & Rd2h){ + count = Rsetuplen; + cio->ndata = len; + cio->tok = Tdtokout; + }else{ + if(len < 0) + count = -1; + else + count = Rsetuplen + len; + cio->tok = Tdtokin; + } + cio->toggle = Tddata1; + coherence(); + epio(ep, cio, nil, 0, 0); + qunlock(cio); + poperror(); + ddeprint("epctlio cio %#p return %ld\n", cio, count); + return count; +} + +static long +epwrite(Ep *ep, void *a, long count) +{ + Qio *io; + Ctlio *cio; + Isoio *iso; + ulong delta; + + pollcheck(ep->hp); + + ddeprint("ehci: epwrite ep%d.%d\n", ep->dev->nb, ep->nb); + if(ep->aux == nil) + panic("ehci: epwrite: not open"); + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + return epctlio(ep, cio, a, count); + case Tbulk: + io = ep->aux; + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OWRITE], a, count, 1); + case Tintr: + io = ep->aux; + delta = TK2MS(sys->ticks) - io[OWRITE].iotime + 1; + if(delta < ep->pollival) + tsleep(&up->sleep, return0, 0, ep->pollival - delta); + if(ep->clrhalt) + clrhalt(ep); + return epio(ep, &io[OWRITE], a, count, 1); + case Tiso: + iso = ep->aux; + return episowrite(ep, iso, a, count); + } + return -1; +} + +static void +isofsinit(Ep *ep, Isoio *iso) +{ + long left; + Sitd *td, *ltd; + int i; + ulong frno; + + left = 0; + ltd = nil; + frno = iso->td0frno; + for(i = 0; i < iso->nframes; i++){ + td = sitdalloc(); + td->data = iso->data + i * ep->maxpkt; + td->epc = ep->dev->port << Stdportshift; + td->epc |= ep->dev->hub << Stdhubshift; + td->epc |= ep->nb << Stdepshift; + td->epc |= ep->dev->nb << Stddevshift; + td->mfs = 034 << Stdscmshift | 1 << Stdssmshift; + if(ep->mode == OREAD){ + td->epc |= Stdin; + td->mdata = ep->maxpkt; + }else{ + td->mdata = (ep->hz+left) * ep->pollival / 1000; + td->mdata *= ep->samplesz; + left = (ep->hz+left) * ep->pollival % 1000; + if(td->mdata > ep->maxpkt){ + print("ehci: ep%d.%d: size > maxpkt\n", + ep->dev->nb, ep->nb); + print("size = %d max = %ld\n", + td->mdata, ep->maxpkt); + td->mdata = ep->maxpkt; + } + } + coherence(); + + iso->sitdps[frno] = td; + coherence(); + sitdinit(iso, td); + if(ltd != nil) + ltd->next = td; + ltd = td; + frno = TRUNC(frno+ep->pollival, Nisoframes); + } + ltd->next = iso->sitdps[iso->td0frno]; + coherence(); +} + +static void +isohsinit(Ep *ep, Isoio *iso) +{ + int ival, p; + long left; + ulong frno, i, pa; + Itd *ltd, *td; + + iso->hs = 1; + ival = 1; + if(ep->pollival > 8) + ival = ep->pollival/8; + left = 0; + ltd = nil; + frno = iso->td0frno; + for(i = 0; i < iso->nframes; i++){ + td = itdalloc(); + td->data = iso->data + i * 8 * iso->maxsize; + pa = PADDR(td->data) & ~0xFFF; + for(p = 0; p < 8; p++) + td->buffer[i] = pa + p * 0x1000; + td->buffer[0] = PADDR(iso->data) & ~0xFFF | + ep->nb << Itdepshift | ep->dev->nb << Itddevshift; + if(ep->mode == OREAD) + td->buffer[1] |= Itdin; + else + td->buffer[1] |= Itdout; + td->buffer[1] |= ep->maxpkt << Itdmaxpktshift; + td->buffer[2] |= ep->ntds << Itdntdsshift; + + if(ep->mode == OREAD) + td->mdata = 8 * iso->maxsize; + else{ + td->mdata = (ep->hz + left) * ep->pollival / 1000; + td->mdata *= ep->samplesz; + left = (ep->hz + left) * ep->pollival % 1000; + } + coherence(); + iso->itdps[frno] = td; + coherence(); + itdinit(iso, td); + if(ltd != nil) + ltd->next = td; + ltd = td; + frno = TRUNC(frno + ival, Nisoframes); + } +} + +static void +isoopen(Ctlr *ctlr, Ep *ep) +{ + int ival; /* pollival in ms */ + int tpf; /* tds per frame */ + int i, n, w, woff; + ulong frno; + Isoio *iso; + + iso = ep->aux; + switch(ep->mode){ + case OREAD: + iso->tok = Tdtokin; + break; + case OWRITE: + iso->tok = Tdtokout; + break; + default: + error("iso i/o is half-duplex"); + } + iso->usbid = ep->nb << 7 | ep->dev->nb & Devmax; + iso->state = Qidle; + coherence(); + iso->debug = ep->debug; + ival = ep->pollival; + tpf = 1; + if(ep->dev->speed == Highspeed){ + tpf = 8; + if(ival <= 8) + ival = 1; + else + ival /= 8; + } + assert(ival != 0); + iso->nframes = Nisoframes / ival; + if(iso->nframes < 3) + error("uhci isoopen bug"); /* we need at least 3 tds */ + iso->maxsize = ep->ntds * ep->maxpkt; + if(ctlr->load + ep->load > 800) + print("usb: ehci: bandwidth may be exceeded\n"); + ilock(ctlr); + ctlr->load += ep->load; + ctlr->isoload += ep->load; + ctlr->nreqs++; + dprint("ehci: load %uld isoload %uld\n", ctlr->load, ctlr->isoload); + diprint("iso nframes %d pollival %uld ival %d maxpkt %uld ntds %d\n", + iso->nframes, ep->pollival, ival, ep->maxpkt, ep->ntds); + iunlock(ctlr); + if(ctlr->poll.does) + wakeup(&ctlr->poll); + + /* + * From here on this cannot raise errors + * unless we catch them and release here all memory allocated. + */ + assert(ep->maxpkt > 0 && ep->ntds > 0 && ep->ntds < 4); + assert(ep->maxpkt <= 1024); + iso->tdps = smalloc(sizeof(uintptr) * Nisoframes); + iso->data = smalloc(iso->nframes * tpf * ep->ntds * ep->maxpkt); + iso->td0frno = TRUNC(ctlr->opio->frno + 10, Nisoframes); + /* read: now; write: 1s ahead */ + + if(ep->dev->speed == Highspeed) + isohsinit(ep, iso); + else + isofsinit(ep, iso); + iso->tdu = iso->tdi = iso->itdps[iso->td0frno]; + iso->stdu = iso->stdi = iso->sitdps[iso->td0frno]; + coherence(); + + ilock(ctlr); + frno = iso->td0frno; + for(i = 0; i < iso->nframes; i++){ + *iso->tdps[frno] = ctlr->frames[frno]; + frno = TRUNC(frno+ival, Nisoframes); + } + + /* + * Iso uses a virtual frame window of Nisoframes, and we must + * fill the actual ctlr frame array by placing ctlr->nframes/Nisoframes + * copies of the window in the frame array. + */ + assert(ctlr->nframes >= Nisoframes && Nisoframes >= iso->nframes); + assert(Nisoframes >= Nintrleafs); + n = ctlr->nframes / Nisoframes; + for(w = 0; w < n; w++){ + frno = iso->td0frno; + woff = w * Nisoframes; + for(i = 0; i < iso->nframes ; i++){ + assert(woff+frno < ctlr->nframes); + assert(iso->tdps[frno] != nil); + if(ep->dev->speed == Highspeed) + ctlr->frames[woff+frno] = PADDR(iso->tdps[frno]) + |Litd; + else + ctlr->frames[woff+frno] = PADDR(iso->tdps[frno]) + |Lsitd; + coherence(); + frno = TRUNC(frno+ep->pollival, Nisoframes); + } + } + coherence(); + iso->next = ctlr->iso; + ctlr->iso = iso; + coherence(); + iso->state = Qdone; + iunlock(ctlr); + if(ehcidebug > 1 || iso->debug >1) + isodump(iso, 0); +} + +/* + * Allocate the endpoint and set it up for I/O + * in the controller. This must follow what's said + * in Ep regarding configuration, including perhaps + * the saved toggles (saved on a previous close of + * the endpoint data file by epclose). + */ +static void +epopen(Ep *ep) +{ + Ctlr *ctlr; + Ctlio *cio; + Qio *io; + int usbid; + + ctlr = ep->hp->aux; + deprint("ehci: epopen ep%d.%d\n", ep->dev->nb, ep->nb); + if(ep->aux != nil) + panic("ehci: epopen called with open ep"); + if(waserror()){ + free(ep->aux); + ep->aux = nil; + nexterror(); + } + switch(ep->ttype){ + case Tnone: + error("endpoint not configured"); + case Tiso: + ep->aux = smalloc(sizeof(Isoio)); + isoopen(ctlr, ep); + break; + case Tctl: + cio = ep->aux = smalloc(sizeof(Ctlio)); + cio->debug = ep->debug; + cio->ndata = -1; + cio->data = nil; + if(ep->dev->isroot != 0 && ep->nb == 0) /* root hub */ + break; + cio->qh = qhalloc(ctlr, ep, cio, "epc"); + break; + case Tbulk: + ep->pollival = 1; /* assume this; doesn't really matter */ + /* and fall... */ + case Tintr: + io = ep->aux = smalloc(sizeof(Qio)*2); + io[OREAD].debug = io[OWRITE].debug = ep->debug; + usbid = (ep->nb&Epmax) << 7 | ep->dev->nb &Devmax; + assert(ep->pollival != 0); + if(ep->mode != OREAD){ + if(ep->toggle[OWRITE] != 0) + io[OWRITE].toggle = Tddata1; + else + io[OWRITE].toggle = Tddata0; + io[OWRITE].tok = Tdtokout; + io[OWRITE].usbid = usbid; + io[OWRITE].bw = ep->maxpkt*1000/ep->pollival; /* bytes/s */ + io[OWRITE].qh = qhalloc(ctlr, ep, io+OWRITE, "epw"); + } + if(ep->mode != OWRITE){ + if(ep->toggle[OREAD] != 0) + io[OREAD].toggle = Tddata1; + else + io[OREAD].toggle = Tddata0; + io[OREAD].tok = Tdtokin; + io[OREAD].usbid = usbid; + io[OREAD].bw = ep->maxpkt*1000/ep->pollival; /* bytes/s */ + io[OREAD].qh = qhalloc(ctlr, ep, io+OREAD, "epr"); + } + break; + } + coherence(); + if(ehcidebug>1 || ep->debug) + dump(ep->hp); + deprint("ehci: epopen done\n"); + poperror(); +} + +static void +cancelio(Ctlr *ctlr, Qio *io) +{ + Qh *qh; + + ilock(ctlr); + qh = io->qh; + if(io == nil || io->qh == nil || io->qh->state == Qclose){ + iunlock(ctlr); + return; + } + dqprint("ehci: cancelio for qh %#p state %s\n", + qh, qhsname[qh->state]); + aborttds(qh); + qh->state = Qclose; + iunlock(ctlr); + if(!waserror()){ + tsleep(&up->sleep, return0, 0, Abortdelay); + poperror(); + } + wakeup(io); + qlock(io); + /* wait for epio if running */ + qunlock(io); + + qhfree(ctlr, qh); + io->qh = nil; +} + +static void +cancelisoio(Ctlr *ctlr, Isoio *iso, int pollival, ulong load) +{ + int frno, i, n, t, w, woff; + ulong *lp, *tp; + Isoio **il; + Itd *td; + Sitd *std; + + ilock(ctlr); + if(iso->state == Qclose){ + iunlock(ctlr); + return; + } + ctlr->nreqs--; + if(iso->state != Qrun && iso->state != Qdone) + panic("bad iso state"); + iso->state = Qclose; + coherence(); + if(ctlr->isoload < load) + panic("ehci: low isoload"); + ctlr->isoload -= load; + ctlr->load -= load; + for(il = &ctlr->iso; *il != nil; il = &(*il)->next) + if(*il == iso) + break; + if(*il == nil) + panic("cancleiso: not found"); + *il = iso->next; + + frno = iso->td0frno; + for(i = 0; i < iso->nframes; i++){ + tp = iso->tdps[frno]; + if(iso->hs != 0){ + td = iso->itdps[frno]; + for(t = 0; t < nelem(td->csw); t++) + td->csw[t] &= ~(Itdioc|Itdactive); + }else{ + std = iso->sitdps[frno]; + std->csw &= ~(Stdioc|Stdactive); + } + coherence(); + for(lp = &ctlr->frames[frno]; !(*lp & Lterm); + lp = &LPTR(*lp)[0]) + if(LPTR(*lp) == tp) + break; + if(*lp & Lterm) + panic("cancelisoio: td not found"); + *lp = tp[0]; + /* + * Iso uses a virtual frame window of Nisoframes, and we must + * restore pointers in copies of the window kept at ctlr->frames. + */ + if(lp == &ctlr->frames[frno]){ + n = ctlr->nframes / Nisoframes; + for(w = 1; w < n; w++){ + woff = w * Nisoframes; + ctlr->frames[woff+frno] = *lp; + } + } + coherence(); + frno = TRUNC(frno+pollival, Nisoframes); + } + iunlock(ctlr); + + /* + * wakeup anyone waiting for I/O and + * wait to be sure no I/O is in progress in the controller. + * and then wait to be sure episo* is no longer running. + */ + wakeup(iso); + diprint("cancelisoio iso %#p waiting for I/O to cease\n", iso); + tsleep(&up->sleep, return0, 0, 5); + qlock(iso); + qunlock(iso); + diprint("cancelisoio iso %#p releasing iso\n", iso); + + frno = iso->td0frno; + for(i = 0; i < iso->nframes; i++){ + if(iso->hs != 0) + itdfree(iso->itdps[frno]); + else + sitdfree(iso->sitdps[frno]); + iso->tdps[frno] = nil; + frno = TRUNC(frno+pollival, Nisoframes); + } + free(iso->tdps); + iso->tdps = nil; + free(iso->data); + iso->data = nil; + coherence(); +} + +static void +epclose(Ep *ep) +{ + Qio *io; + Ctlio *cio; + Isoio *iso; + Ctlr *ctlr; + + ctlr = ep->hp->aux; + deprint("ehci: epclose ep%d.%d\n", ep->dev->nb, ep->nb); + + if(ep->aux == nil) + panic("ehci: epclose called with closed ep"); + switch(ep->ttype){ + case Tctl: + cio = ep->aux; + cancelio(ctlr, cio); + free(cio->data); + cio->data = nil; + break; + case Tintr: + case Tbulk: + io = ep->aux; + ep->toggle[OREAD] = ep->toggle[OWRITE] = 0; + if(ep->mode != OWRITE){ + cancelio(ctlr, &io[OREAD]); + if(io[OREAD].toggle == Tddata1) + ep->toggle[OREAD] = 1; + } + if(ep->mode != OREAD){ + cancelio(ctlr, &io[OWRITE]); + if(io[OWRITE].toggle == Tddata1) + ep->toggle[OWRITE] = 1; + } + coherence(); + break; + case Tiso: + iso = ep->aux; + cancelisoio(ctlr, iso, ep->pollival, ep->load); + break; + default: + panic("epclose: bad ttype"); + } + free(ep->aux); + ep->aux = nil; +} + +/* + * return smallest power of 2 >= n + */ +static int +flog2(int n) +{ + int i; + + for(i = 0; (1 << i) < n; i++) + ; + return i; +} + +/* + * build the periodic scheduling tree: + * framesize must be a multiple of the tree size + */ +static void +mkqhtree(Ctlr *ctlr) +{ + int i, n, d, o, leaf0, depth; + ulong leafs[Nintrleafs]; + Qh *qh; + Qh **tree; + Qtree *qt; + + depth = flog2(Nintrleafs); + n = (1 << (depth+1)) - 1; + qt = mallocz(sizeof(*qt), 1); + if(qt == nil) + panic("ehci: mkqhtree: no memory"); + qt->nel = n; + qt->depth = depth; + qt->bw = mallocz(n * sizeof(qt->bw), 1); + qt->root = tree = mallocz(n * sizeof(Qh *), 1); + if(qt->bw == nil || tree == nil) + panic("ehci: mkqhtree: no memory"); + for(i = 0; i < n; i++){ + tree[i] = qh = edalloc(); + if(qh == nil) + panic("ehci: mkqhtree: no memory"); + qh->nlink = qh->alink = qh->link = Lterm; + qh->csw = Tdhalt; + qh->state = Qidle; + coherence(); + if(i > 0) + qhlinkqh(tree[i], tree[(i-1)/2]); + } + ctlr->ntree = i; + dprint("ehci: tree: %d endpoints allocated\n", i); + + /* distribute leaves evenly round the frame list */ + leaf0 = n / 2; + for(i = 0; i < Nintrleafs; i++){ + o = 0; + for(d = 0; d < depth; d++){ + o <<= 1; + if(i & (1 << d)) + o |= 1; + } + if(leaf0 + o >= n){ + print("leaf0=%d o=%d i=%d n=%d\n", leaf0, o, i, n); + break; + } + leafs[i] = PADDR(tree[leaf0 + o]) | Lqh; + } + assert((ctlr->nframes % Nintrleafs) == 0); + for(i = 0; i < ctlr->nframes; i += Nintrleafs){ + memmove(ctlr->frames + i, leafs, sizeof leafs); + coherence(); + } + ctlr->tree = qt; + coherence(); +} + +void +ehcimeminit(Ctlr *ctlr) +{ + int i, frsize; + Eopio *opio; + + opio = ctlr->opio; + frsize = ctlr->nframes * sizeof(ulong); + assert((frsize & 0xFFF) == 0); /* must be 4k aligned */ + ctlr->frames = mallocalign(frsize, frsize, 0, 0); + if(ctlr->frames == nil) + panic("ehci reset: no memory"); + + for (i = 0; i < ctlr->nframes; i++) + ctlr->frames[i] = Lterm; + opio->frbase = PADDR(ctlr->frames); + opio->frno = 0; + coherence(); + + qhalloc(ctlr, nil, nil, nil); /* init async list */ + mkqhtree(ctlr); /* init sync list */ + edfree(edalloc()); /* try to get some ones pre-allocated */ + + dprint("ehci %#p flb %#lux frno %#lux\n", + ctlr->capio, opio->frbase, opio->frno); +} + +static void +init(Hci *hp) +{ + Ctlr *ctlr; + Eopio *opio; + int i; + static int ctlrno; + + hp->highspeed = 1; + ctlr = hp->aux; + opio = ctlr->opio; + dprint("ehci %#p init\n", ctlr->capio); + + ilock(ctlr); + /* + * Unless we activate frroll interrupt + * some machines won't post other interrupts. + */ + opio->intr = Iusb|Ierr|Iportchg|Ihcerr|Iasync; + coherence(); + opio->cmd |= Cpse; + coherence(); + opio->cmd |= Case; + coherence(); + ehcirun(ctlr, 1); + /* + * route all ports by default to only one ehci (the first). + * it's not obvious how multiple ehcis could work and on some + * machines, setting Callmine on all ehcis makes the machine seize up. + */ + opio->config = (ctlrno == 0? Callmine: 0); + coherence(); + + for (i = 0; i < hp->nports; i++) + opio->portsc[i] = Pspower; + iunlock(ctlr); + if(ehcidebug > 1) + dump(hp); + ctlrno++; +} + +void +ehcilinkage(Hci *hp) +{ + hp->init = init; + hp->dump = dump; + hp->interrupt = interrupt; + hp->epopen = epopen; + hp->epclose = epclose; + hp->epread = epread; + hp->epwrite = epwrite; + hp->seprintep = seprintep; + hp->portenable = portenable; + hp->portreset = portreset; + hp->portstatus = portstatus; +// hp->shutdown = shutdown; +// hp->debug = setdebug; + hp->type = "ehci"; +} diff -Nru 0/sys/src/nix/port/xalloc.c 4/sys/src/nix/port/xalloc.c --- 0/sys/src/nix/port/xalloc.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/port/xalloc.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,269 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +THIS FILE IS NOT USED FOR NIX. +I'm leaving it here, and making sure it does not compile. -nemo + +enum +{ + Nhole = 128, + Magichole = 0x484F4C45, /* HOLE */ +}; + +typedef struct Hole Hole; +typedef struct Xalloc Xalloc; +typedef struct Xhdr Xhdr; + +struct Hole +{ + uintptr addr; + ulong size; + uintptr top; + Hole* link; +}; + +struct Xhdr +{ + ulong size; + ulong magix; + char data[]; +}; + +struct Xalloc +{ + Lock; + Hole hole[Nhole]; + Hole* flist; + Hole* table; +}; + +static Xalloc xlists; + +extern void* xalloc(ulong); +extern int xmerge(void*, void*); +extern void xhole(uintptr, ulong); +extern void xsummary(void); + +void +xinit(void) +{ + int i, n, upages, kpages; + ulong maxkpa; + Confmem *m; + Pallocmem *pm; + Hole *h, *eh; + + eh = &xlists.hole[Nhole-1]; + for(h = xlists.hole; h < eh; h++) + h->link = h+1; + + xlists.flist = xlists.hole; + + upages = conf.upages; + kpages = conf.npage - upages; + pm = palloc.mem; + maxkpa = -KZERO; + for(i=0; inpage; + if(n > kpages) + n = kpages; + if(m->base >= maxkpa) + n = 0; + else if(n > 0 && m->base+n*PGSZ >= maxkpa) + n = (maxkpa - m->base)/PGSZ; + /* first give to kernel */ + if(n > 0){ + m->kbase = PTR2UINT(KADDR(m->base)); + m->klimit = PTR2UINT(KADDR(m->base+n*PGSZ)); + xhole(m->base, n*PGSZ); + kpages -= n; + } + /* if anything left over, give to user */ + if(n < m->npage){ + if(pm >= palloc.mem+nelem(palloc.mem)){ + print("xinit: losing %lud pages\n", m->npage-n); + continue; + } + pm->base = m->base+n*PGSZ; + pm->npage = m->npage - n; + pm++; + } + } + xsummary(); +} + +void* +xspanalloc(ulong size, int align, ulong span) +{ + uintptr a, v, t; + + a = PTR2UINT(xalloc(size+align+span)); + if(a == 0) + panic("xspanalloc: %lud %d %lux\n", size, align, span); + + if(span > 2) { + v = (a + span) & ~(span-1); + t = v - a; + if(t > 0) + xhole(PADDR(UINT2PTR(a)), t); + t = a + span - v; + if(t > 0) + xhole(PADDR(UINT2PTR(v+size+align)), t); + } + else + v = a; + + if(align > 1) + v = (v + align) & ~(align-1); + + return (void*)v; +} + +void* +xallocz(ulong size, int zero) +{ + Xhdr *p; + Hole *h, **l; + + size += BY2V + offsetof(Xhdr, data[0]); + size &= ~(BY2V-1); + + ilock(&xlists); + l = &xlists.table; + for(h = *l; h; h = h->link) { + if(h->size >= size) { + p = (Xhdr*)KADDR(h->addr); + h->addr += size; + h->size -= size; + if(h->size == 0) { + *l = h->link; + h->link = xlists.flist; + xlists.flist = h; + } + iunlock(&xlists); + if(zero) + memset(p->data, 0, size); + p->magix = Magichole; + p->size = size; + return p->data; + } + l = &h->link; + } + iunlock(&xlists); + return nil; +} + +void* +xalloc(ulong size) +{ + return xallocz(size, 1); +} + +void +xfree(void *p) +{ + Xhdr *x; + + x = UINT2PTR((PTR2UINT(p) - offsetof(Xhdr, data[0]))); + if(x->magix != Magichole) { + xsummary(); + panic("xfree(%#p) %#ux != %#lux", p, Magichole, x->magix); + } + xhole(PADDR(x), x->size); +} + +int +xmerge(void *vp, void *vq) +{ + Xhdr *p, *q; + + p = UINT2PTR((PTR2UINT(vp) - offsetof(Xhdr, data[0]))); + q = UINT2PTR((PTR2UINT(vq) - offsetof(Xhdr, data[0]))); + if(p->magix != Magichole || q->magix != Magichole) { + xsummary(); + panic("xmerge(%#p, %#p) bad magic %#lux, %#lux\n", + vp, vq, p->magix, q->magix); + } + if((uchar*)p+p->size == (uchar*)q) { + p->size += q->size; + return 1; + } + return 0; +} + +void +xhole(uintptr addr, ulong size) +{ + uintptr top; + Hole *h, *c, **l; + + if(size == 0) + return; + + top = addr + size; + ilock(&xlists); + l = &xlists.table; + for(h = *l; h; h = h->link) { + if(h->top == addr) { + h->size += size; + h->top = h->addr+h->size; + c = h->link; + if(c && h->top == c->addr) { + h->top += c->size; + h->size += c->size; + h->link = c->link; + c->link = xlists.flist; + xlists.flist = c; + } + iunlock(&xlists); + return; + } + if(h->addr > addr) + break; + l = &h->link; + } + if(h && top == h->addr) { + h->addr -= size; + h->size += size; + iunlock(&xlists); + return; + } + + if(xlists.flist == nil) { + iunlock(&xlists); + print("xfree: no free holes, leaked %lud bytes\n", size); + return; + } + + h = xlists.flist; + xlists.flist = h->link; + h->addr = addr; + h->top = top; + h->size = size; + h->link = *l; + *l = h; + iunlock(&xlists); +} + +void +xsummary(void) +{ + int i; + Hole *h; + + i = 0; + for(h = xlists.flist; h; h = h->link) + i++; + + print("%d holes free\n", i); + i = 0; + for(h = xlists.table; h; h = h->link) { + print("%.8p %.8p %lud\n", h->addr, h->top, h->size); + i += h->size; + } + print("%d bytes free\n", i); +} diff -Nru 0/sys/src/nix/root/big.c 4/sys/src/nix/root/big.c --- 0/sys/src/nix/root/big.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/root/big.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,20 @@ +#include +#include + +void +main(int argc, char *argv[]) +{ + ulong num = 1; + uvlong size; + u8int *c; + + if (argc > 1) + num = strtoul(argv[1], 0, 0); + size = num * 0x200000ULL; + print("Try to malloc %ulld bytes\n", size); + c = mallocz(size, 1); + print("Did it\n"); + while(1); +} + +/* 6c big.c; 6l -o big big.6 */ diff -Nru 0/sys/src/nix/root/bigloop.c 4/sys/src/nix/root/bigloop.c --- 0/sys/src/nix/root/bigloop.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/root/bigloop.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,22 @@ +#include +#include + +void +main(int argc, char *argv[]) +{ + ulong num = 1; + int i; + + if (argc > 1) + num = strtoul(argv[1], 0, 0); + print("Try to malloc %ulld bytes in %ld loops\n", num*0x200000ULL, num); + for(i = 0; i < num; i++) + if (sbrk(0x200000) == nil){ + print("%d sbrk failed\n", i); + break; + } + print("Did it\n"); + while(1); +} + +/* 6c bigloop.c; 6l -o bigloop bigloop.6 */ diff -Nru 0/sys/src/nix/root/blow.c 4/sys/src/nix/root/blow.c --- 0/sys/src/nix/root/blow.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/root/blow.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,239 @@ +#include +#include + +#define ESTR 256 + +static void +error(char* fmt, ...) +{ + va_list v; + char *e, estr[ESTR], *p; + + va_start(v, fmt); + e = estr + ESTR; + p = seprint(estr, e, "%s: ", argv0); + p = vseprint(p, e, fmt, v); + p = seprint(p, e, "\n"); + va_end(v); + + write(2, estr, p-estr); +} + +static void +fatal(char* fmt, ...) +{ + va_list v; + char *e, estr[ESTR], *p; + + va_start(v, fmt); + e = estr + ESTR; + p = seprint(estr, e, "%s: ", argv0); + p = vseprint(p, e, fmt, v); + p = seprint(p, e, "\n"); + va_end(v); + + write(2, estr, p - estr); + exits("fatal"); +} + +static void +usage(void) +{ + char *e, estr[ESTR], *p; + + e = estr + ESTR; + p = seprint(estr, e, "usage: %s" + " [whatever]" + "\n", + argv0); + write(2, estr, p-estr); + exits("usage"); +} + +#define F(v, o, w) (((v) & ((1<<(w))-1))<<(o)) + +enum { + X = 0, /* dimension */ + Y = 1, + Z = 2, + N = 3, + + Chunk = 32, /* granularity of FIFO */ + Pchunk = 8, /* Chunks in a packet */ + + Quad = 16, +}; + +/* + * Packet header. The hardware requires an 8-byte header + * of which the last two are reserved (they contain a sequence + * number and a header checksum inserted by the hardware). + * The hardware also requires the packet to be aligned on a + * 128-bit boundary for loading into the HUMMER. + */ +typedef struct Tpkt Tpkt; +struct Tpkt { + u8int sk; /* Skip Checksum Control */ + u8int hint; /* Hint|Dp|Pid0 */ + u8int size; /* Size|Pid1|Dm|Dy|VC */ + u8int dst[N]; /* Destination Coordinates */ + u8int _6_[2]; /* reserved */ + u8int _8_[8]; /* protocol header */ + u8int payload[]; +}; + +/* + * SKIP is a field in .sk giving the number of 2-bytes + * to skip from the top of the packet before including + * the packet bytes into the running checksum. + * SIZE is a field in .size giving the size of the + * packet in 32-byte 'chunks'. + */ +#define SKIP(n) F(n, 1, 7) +#define SIZE(n) F(n, 5, 3) + +enum { + Sk = 0x01, /* Skip Checksum */ + + Pid0 = 0x01, /* Destination Group FIFO MSb */ + Dp = 0x02, /* Multicast Deposit */ + Hzm = 0x04, /* Z- Hint */ + Hzp = 0x08, /* Z+ Hint */ + Hym = 0x10, /* Y- Hint */ + Hyp = 0x20, /* Y+ Hint */ + Hxm = 0x40, /* X- Hint */ + Hxp = 0x80, /* X+ Hint */ + + Vcd0 = 0x00, /* Dynamic 0 VC */ + Vcd1 = 0x01, /* Dynamic 1 VC */ + Vcbn = 0x02, /* Deterministic Bubble VC */ + Vcbp = 0x03, /* Deterministic Priority VC */ + Dy = 0x04, /* Dynamic Routing */ + Dm = 0x08, /* DMA Mode */ + Pid1 = 0x10, /* Destination Group FIFO LSb */ +}; + +static int +torusparse(u8int d[3], char* item, char* buf) +{ + int n; + char *p; + + if((p = strstr(buf, item)) == nil || (p != buf && *(p-1) != '\n')) + return -1; + n = strlen(item); + if(strlen(p) < n+sizeof(": x 0 y 0 z 0")) + return -1; + p += n+sizeof(": x ")-1; + if(strncmp(p-4, ": x ", 4) != 0) + return -1; + if((n = strtol(p, &p, 0)) > 255 || *p != ' ' || *(p+1) != 'y') + return -1; + d[0] = n; + if((n = strtol(p+2, &p, 0)) > 255 || *p != ' ' || *(p+1) != 'z') + return -1; + d[1] = n; + if((n = strtol(p+2, &p, 0)) > 255 || (*p != '\n' && *p != '\0')) + return -1; + d[2] = n; + + return 0; +} + +void +main(int argc, char* argv[]) +{ + Tpkt *tpkt; + u8int d[N]; + char buf[512], *p; + uvlong r, start, stop; + int count, fd, i, length, mhz, n, x, y, z; + + count = 1; + length = Pchunk*Chunk; + mhz = 700; + + ARGBEGIN{ + default: + usage(); + break; + case 'l': + p = EARGF(usage()); + if((n = strtol(argv[0], &p, 0)) <= 0 || p == argv[0] || *p != 0) + usage(); + if(n % Chunk) + usage(); + length = n; + break; + case 'm': + p = EARGF(usage()); + if((n = strtol(argv[0], &p, 0)) <= 0 || p == argv[0] || *p != 0) + usage(); + mhz = n; + break; + case 'n': + p = EARGF(usage()); + if((n = strtol(argv[0], &p, 0)) <= 0 || p == argv[0] || *p != 0) + usage(); + count = n; + break; + }ARGEND; + + if(argc != 3) + usage(); + if((x = strtol(argv[0], &p, 0)) < 0 || *p != 0) + fatal("x invalid: %d\n", argv[0]); + if((y = strtol(argv[1], &p, 0)) < 0 || *p != 0) + fatal("y invalid: %d\n", argv[1]); + if((z = strtol(argv[2], &p, 0)) <= 0 || *p != 0) + fatal("z invalid: %d\n", argv[2]); + z -= 1; + + if((fd = open("/dev/torusstatus", OREAD)) < 0) + fatal("open /dev/torusstatus: %r\n"); + if((n = read(fd, buf, sizeof(buf))) < 0) + fatal("read /dev/torusstatus: %r\n"); + close(fd); + buf[n] = 0; + + if(torusparse(d, "size", buf) < 0) + fatal("parse /dev/torusstatus: <%s>\n", buf); + if(x >= d[X] || y >= d[Y] || z >= d[Z]) + fatal("destination out of range: %d.%d.%d >= %d.%d.%d", + x, y, z, d[X], d[Y], d[Z]); + + if((tpkt = mallocalign(length, Chunk, 0, 0)) == nil) + fatal("mallocalign tpkt\n"); + memset(tpkt, 0, length); + + tpkt->sk = SKIP(4); + tpkt->hint = 0; + tpkt->size = SIZE(Pchunk-1)|Dy|Vcd0; + tpkt->dst[X] = x; + tpkt->dst[Y] = y; + tpkt->dst[Z] = z; + + if((fd = open("/dev/torus", ORDWR)) < 0) + fatal("open /dev/torus: %r\n"); + + cycles(&start); + for(i = 0; i < count; i++){ + n = pwrite(fd, tpkt, length, 0); + if(n < 0) + fatal("write /dev/torus: %r\n", n); + else if(n < length) + fatal("write /dev/torus: short write %d\n", n); + } + cycles(&stop); + + close(fd); + + r = (count*length); + r *= mhz; + r /= stop - start; + + print("%d writes of %d in %llud cycles @ %dMHz = %llud MB/s\n", + count, length, stop - start, mhz, r); + + exits(0); +} diff -Nru 0/sys/src/nix/root/common 4/sys/src/nix/root/common --- 0/sys/src/nix/root/common Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/root/common Wed Feb 6 00:00:00 2013 @@ -0,0 +1,303 @@ +# +# The master for this file is /n/fs/lib/ndb/common +# + +# +# real dns root server ips +# +dom=A.ROOT-SERVERS.NET ip=198.41.0.4 +dom=B.ROOT-SERVERS.NET ip=128.9.0.107 +dom=C.ROOT-SERVERS.NET ip=192.33.4.12 +dom=D.ROOT-SERVERS.NET ip=128.8.10.90 +dom=E.ROOT-SERVERS.NET ip=192.203.230.10 +dom=F.ROOT-SERVERS.NET ip=192.5.5.241 +dom=G.ROOT-SERVERS.NET ip=192.112.36.4 +dom=H.ROOT-SERVERS.NET ip=128.63.2.53 +dom=I.ROOT-SERVERS.NET ip=192.36.148.17 +dom=J.ROOT-SERVERS.NET ip=198.41.0.10 +dom=K.ROOT-SERVERS.NET ip=193.0.14.129 +dom=L.ROOT-SERVERS.NET ip=198.32.64.12 # old + ip=199.7.83.42 # new, nov 2007 +dom=M.ROOT-SERVERS.NET ip=202.12.27.33 + +dom=a.gtld-servers.net ip=192.5.6.30 +dom=b.gtld-servers.net ip=192.33.14.30 +dom=c.gtld-servers.net ip=192.26.92.30 +dom=d.gtld-servers.net ip=192.31.80.30 +dom=e.gtld-servers.net ip=192.12.94.30 +dom=f.gtld-servers.net ip=192.35.51.30 +dom=g.gtld-servers.net ip=192.42.93.30 +dom=h.gtld-servers.net ip=192.54.112.30 +dom=i.gtld-servers.net ip=192.43.172.30 +dom=j.gtld-servers.net ip=192.48.79.30 +dom=k.gtld-servers.net ip=192.52.178.30 +dom=l.gtld-servers.net ip=192.41.162.30 +dom=m.gtld-servers.net ip=192.55.83.30 + +# +# spam defense. unfortunately, arin doesn't give negative +# rcodes for these non-routable addresses. we'll do it for them. +# +dom=10.in-addr.arpa soa= # rfc1918 zones + dom=16.172.in-addr.arpa soa= + dom=17.172.in-addr.arpa soa= + dom=18.172.in-addr.arpa soa= + dom=19.172.in-addr.arpa soa= + dom=20.172.in-addr.arpa soa= + dom=21.172.in-addr.arpa soa= + dom=22.172.in-addr.arpa soa= + dom=23.172.in-addr.arpa soa= + dom=24.172.in-addr.arpa soa= + dom=25.172.in-addr.arpa soa= + dom=26.172.in-addr.arpa soa= + dom=27.172.in-addr.arpa soa= + dom=28.172.in-addr.arpa soa= + dom=29.172.in-addr.arpa soa= + dom=30.172.in-addr.arpa soa= + dom=31.172.in-addr.arpa soa= + dom=168.192.in-addr.arpa soa= + dom=0.in-addr.arpa soa= # rfc3330 zones + dom=127.in-addr.arpa soa= + dom=254.169.in-addr.arpa soa= + dom=2.0.192.in-addr.arpa soa= + dom=255.255.255.255.in-addr.arpa soa= + dom=d.f.ip6.arpa soa= # rfc4193 recommendation + dom=0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa soa= # rfc4291 zones + dom=1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.ip6.arpa soa= + dom=8.e.f.ip6.arpa soa= + dom=9.e.f.ip6.arpa soa= + dom=a.e.f.ip6.arpa soa= + dom=b.e.f.ip6.arpa soa= + refresh=3600 ttl=3600 + ns=ns1.cs.bell-labs.com + ns=ns2.cs.bell-labs.com + +# +# ip protocol numbers +# +protocol=reserved ipv4proto=0 +protocol=icmp ipv4proto=1 +protocol=igmp ipv4proto=2 +protocol=ggp ipv4proto=3 +protocol=ip ipv4proto=4 +protocol=st ipv4proto=5 +protocol=tcp ipv4proto=6 +protocol=ucl ipv4proto=7 +protocol=egp ipv4proto=8 +protocol=igp ipv4proto=9 +protocol=bbn-rcc-mon ipv4proto=10 +protocol=nvp-ii ipv4proto=11 +protocol=pup ipv4proto=12 +protocol=argus ipv4proto=13 +protocol=emcon ipv4proto=14 +protocol=xnet ipv4proto=15 +protocol=chaos ipv4proto=16 +protocol=udp ipv4proto=17 +protocol=mux ipv4proto=18 +protocol=dcn-meas ipv4proto=19 +protocol=hmp ipv4proto=20 +protocol=prm ipv4proto=21 +protocol=xns-idp ipv4proto=22 +protocol=trunk-1 ipv4proto=23 +protocol=trunk-2 ipv4proto=24 +protocol=leaf-1 ipv4proto=25 +protocol=leaf-2 ipv4proto=26 +protocol=rdp ipv4proto=27 +protocol=irtp ipv4proto=28 +protocol=iso-tp4 ipv4proto=29 +protocol=netblt ipv4proto=30 +protocol=mfe-nsp ipv4proto=31 +protocol=merit-inp ipv4proto=32 +protocol=sep ipv4proto=33 +protocol=3pc ipv4proto=34 +protocol=idpr ipv4proto=35 +protocol=xtp ipv4proto=36 +protocol=ddp ipv4proto=37 +protocol=idpr-cmtp ipv4proto=38 +protocol=tp++ ipv4proto=39 +protocol=il ipv4proto=40 +protocol=sip ipv4proto=41 +protocol=sdrp ipv4proto=42 +protocol=sip-sr ipv4proto=43 +protocol=sip-frag ipv4proto=44 +protocol=idrp ipv4proto=45 +protocol=rsvp ipv4proto=46 +protocol=gre ipv4proto=47 +protocol=mhrp ipv4proto=48 +protocol=bna ipv4proto=49 +protocol=sipp-esp ipv4proto=50 +protocol=sipp-ah ipv4proto=51 +protocol=i-nlsp ipv4proto=52 +protocol=swipe ipv4proto=53 +protocol=nhrp ipv4proto=54 +protocol=any ipv4proto=61 +protocol=cftp ipv4proto=62 +protocol=any ipv4proto=63 +protocol=sat-expak ipv4proto=64 +protocol=kryptolan ipv4proto=65 +protocol=rvd ipv4proto=66 +protocol=ippc ipv4proto=67 +protocol=any ipv4proto=68 +protocol=sat-mon ipv4proto=69 +protocol=visa ipv4proto=70 +protocol=ipcv ipv4proto=71 +protocol=cpnx ipv4proto=72 +protocol=cphb ipv4proto=73 +protocol=wsn ipv4proto=74 +protocol=pvp ipv4proto=75 +protocol=br-sat-mon ipv4proto=76 +protocol=sun-nd ipv4proto=77 +protocol=wb-mon ipv4proto=78 +protocol=wb-expak ipv4proto=79 +protocol=iso-ip ipv4proto=80 +protocol=vmtp ipv4proto=81 +protocol=secure-vmtp ipv4proto=82 +protocol=vines ipv4proto=83 +protocol=ttp ipv4proto=84 +protocol=nsfnet-igp ipv4proto=85 +protocol=dgp ipv4proto=86 +protocol=tcf ipv4proto=87 +protocol=igrp ipv4proto=88 +protocol=ospfigp ipv4proto=89 protocol=ospf +protocol=sprite-rpc ipv4proto=90 +protocol=larp ipv4proto=91 +protocol=mtp ipv4proto=92 +protocol=ax.25 ipv4proto=93 +protocol=ipip ipv4proto=94 +protocol=micp ipv4proto=95 +protocol=scc-sp ipv4proto=96 +protocol=etherip ipv4proto=97 +protocol=encap ipv4proto=98 +protocol=any ipv4proto=99 +protocol=gmtp ipv4proto=100 +protocol=rudp ipv4proto=254 # unofficial + +# +# services +# +tcp=cs port=1 +tcp=echo port=7 +tcp=discard port=9 +tcp=systat port=11 +tcp=daytime port=13 +tcp=netstat port=15 +tcp=chargen port=19 +tcp=ftp-data port=20 +tcp=ftp port=21 +tcp=ssh port=22 +tcp=telnet port=23 +tcp=smtp port=25 +tcp=time port=37 +tcp=whois port=43 +tcp=dns port=53 +tcp=domain port=53 +tcp=uucp port=64 +tcp=gopher port=70 +tcp=rje port=77 +tcp=finger port=79 +tcp=http port=80 +tcp=link port=87 +tcp=supdup port=95 +tcp=hostnames port=101 +tcp=iso-tsap port=102 +tcp=x400 port=103 +tcp=x400-snd port=104 +tcp=csnet-ns port=105 +tcp=pop-2 port=109 +tcp=pop3 port=110 +tcp=portmap port=111 +tcp=uucp-path port=117 +tcp=nntp port=119 +tcp=netbios port=139 +tcp=imap4 port=143 +tcp=imap port=143 +tcp=NeWS port=144 +tcp=print-srv port=170 +tcp=z39.50 port=210 +tcp=fsb port=400 +tcp=sysmon port=401 +tcp=proxy port=402 +tcp=proxyd port=404 +tcp=https port=443 +tcp=cifs port=445 +tcp=ssmtp port=465 +tcp=rexec port=512 restricted= +tcp=login port=513 restricted= +tcp=shell port=514 restricted= +tcp=printer port=515 +tcp=ncp port=524 +tcp=courier port=530 +tcp=cscan port=531 +tcp=uucp port=540 +tcp=snntp port=563 +tcp=9fs port=564 +tcp=whoami port=565 +tcp=guard port=566 +tcp=ticket port=567 +tcp=fmclient port=729 +tcp=imaps port=993 +tcp=pop3s port=995 +tcp=ingreslock port=1524 +tcp=pptp port=1723 +tcp=nfs port=2049 +tcp=webster port=2627 +tcp=weather port=3000 +tcp=sip port=5060 +tcp=sips port=5061 +tcp=secstore port=5356 +tcp=vnc-http port=5800 +tcp=vnc port=5900 +tcp=Xdisplay port=6000 +tcp=styx port=6666 +tcp=mpeg port=6667 +tcp=rstyx port=6668 +tcp=infdb port=6669 +tcp=infsigner port=6671 +tcp=infcsigner port=6672 +tcp=inflogin port=6673 +tcp=bandt port=7330 +tcp=face port=32000 +tcp=dhashgate port=11978 +tcp=exportfs port=17007 +tcp=rexexec port=17009 +tcp=ncpu port=17010 +tcp=cpu port=17013 +tcp=venti port=17034 +tcp=wiki port=17035 +tcp=vica port=17036 + +udp=echo port=7 +udp=tacacs port=49 +udp=tftp port=69 +udp=bootpc port=68 +udp=bootp port=67 +udp=domain port=53 +udp=dns port=53 +udp=portmap port=111 +udp=ntp port=123 +udp=netbios-ns port=137 +udp=snmp port=161 +udp=ikev2 port=500 +udp=syslog port=514 +udp=rip port=520 +udp=dhcp6c port=546 +udp=dhcp6s port=547 +udp=nfs port=2049 +udp=bfs port=2201 +udp=virgil port=2202 +udp=sip port=5060 +udp=bandt2 port=7331 +udp=oradius port=1812 +udp=radius port=1812 +udp=dhash port=11977 +udp=ulctl port=12666 +udp=uldata port=12667 +udp=dldata port=12668 + +gre=ppp port=34827 + +# +# authdom declarations need to be visible on the inside network, +# even for outside machines. putting them here ensures +# their visibility everywhere. +# diff -Nru 0/sys/src/nix/root/cpu.c 4/sys/src/nix/root/cpu.c --- 0/sys/src/nix/root/cpu.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/root/cpu.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,1153 @@ +/* + * cpu.c - Make a connection to a cpu server + * + * Invoked by listen as 'cpu -R | -N service net netdir' + * by users as 'cpu [-h system] [-c cmd args ...]' + */ + +#include +#include +#include +#include +#include +#include + +#define Maxfdata 8192 +#define MaxStr 128 + +void remoteside(int); +void fatal(int, char*, ...); +void lclnoteproc(int); +void rmtnoteproc(void); +void catcher(void*, char*); +void usage(void); +void writestr(int, char*, char*, int); +int readstr(int, char*, int); +char *rexcall(int*, char*, char*); +int setamalg(char*); +char *keyspec = ""; + +int notechan; +int exportpid; +char *system; +int cflag; +int dbg; +char *user; +char *patternfile; + +char *srvname = "ncpu"; +char *exportfs = "/bin/exportfs"; +char *ealgs = "rc4_256 sha1"; + +/* message size for exportfs; may be larger so we can do big graphics in CPU window */ +int msgsize = Maxfdata+IOHDRSZ; + +/* authentication mechanisms */ +static int netkeyauth(int); +static int netkeysrvauth(int, char*); +static int p9auth(int); +static int srvp9auth(int, char*); +static int noauth(int); +static int srvnoauth(int, char*); + +typedef struct AuthMethod AuthMethod; +struct AuthMethod { + char *name; /* name of method */ + int (*cf)(int); /* client side authentication */ + int (*sf)(int, char*); /* server side authentication */ +} authmethod[] = +{ + { "p9", p9auth, srvp9auth,}, + { "netkey", netkeyauth, netkeysrvauth,}, + { "none", noauth, srvnoauth,}, + { nil, nil} +}; +AuthMethod *am = authmethod; /* default is p9 */ + +char *p9authproto = "p9any"; + +int setam(char*); + +void +usage(void) +{ + fprint(2, "usage: cpu [-h system] [-u user] [-a authmethod] [-e 'crypt hash'] [-k keypattern] [-P patternfile] [-c cmd args ...]\n"); + exits("usage"); +} + +void +main(int argc, char **argv) +{ + char dat[MaxStr], buf[MaxStr], cmd[MaxStr], *p, *err; + int ac, fd, ms, data; + char *av[10]; + + /* see if we should use a larger message size */ + fd = open("/dev/draw", OREAD); + if(fd > 0){ + ms = iounit(fd); + if(msgsize < ms+IOHDRSZ) + msgsize = ms+IOHDRSZ; + close(fd); + } + + user = getuser(); + if(user == nil) + fatal(1, "can't read user name"); + ARGBEGIN{ + case 'a': + p = EARGF(usage()); + if(setam(p) < 0) + fatal(0, "unknown auth method %s", p); + break; + case 'e': + ealgs = EARGF(usage()); + if(*ealgs == 0 || strcmp(ealgs, "clear") == 0) + ealgs = nil; + break; + case 'd': + dbg++; + break; + case 'f': + /* ignored but accepted for compatibility */ + break; + case 'O': + p9authproto = "p9sk2"; + remoteside(1); /* From listen */ + break; + case 'R': /* From listen */ + remoteside(0); + break; + case 'h': + system = EARGF(usage()); + break; + case 'c': + cflag++; + cmd[0] = '!'; + cmd[1] = '\0'; + while(p = ARGF()) { + strcat(cmd, " "); + strcat(cmd, p); + } + break; + case 'k': + keyspec = smprint("%s %s", keyspec, EARGF(usage())); + break; + case 'P': + patternfile = EARGF(usage()); + break; + case 'u': + user = EARGF(usage()); + keyspec = smprint("%s user=%s", keyspec, user); + break; + default: + usage(); + }ARGEND; + + + if(argc != 0) + usage(); + + if(system == nil) { + p = getenv("cpu"); + if(p == 0) + fatal(0, "set $cpu"); + system = p; + } + + if(err = rexcall(&data, system, srvname)) + fatal(1, "%s: %s", err, system); + + /* Tell the remote side the command to execute and where our working directory is */ + if(cflag) + writestr(data, cmd, "command", 0); + if(getwd(dat, sizeof(dat)) == 0) + writestr(data, "NO", "dir", 0); + else + writestr(data, dat, "dir", 0); + + /* start up a process to pass along notes */ + lclnoteproc(data); + + /* + * Wait for the other end to execute and start our file service + * of /mnt/term + */ + if(readstr(data, buf, sizeof(buf)) < 0) + fatal(1, "waiting for FS: %r"); + if(strncmp("FS", buf, 2) != 0) { + print("remote cpu: %s", buf); + exits(buf); + } + + /* Begin serving the gnot namespace */ + close(0); + dup(data, 0); + close(data); + + sprint(buf, "%d", msgsize); + ac = 0; + av[ac++] = exportfs; + av[ac++] = "-m"; + av[ac++] = buf; + if(dbg) + av[ac++] = "-d"; + if(patternfile != nil){ + av[ac++] = "-P"; + av[ac++] = patternfile; + } + av[ac] = nil; + exec(exportfs, av); + fatal(1, "starting exportfs"); +} + +void +fatal(int syserr, char *fmt, ...) +{ + Fmt f; + char *str; + va_list arg; + + fmtstrinit(&f); + fmtprint(&f, "cpu: "); + va_start(arg, fmt); + fmtvprint(&f, fmt, arg); + va_end(arg); + if(syserr) + fmtprint(&f, ": %r"); + fmtprint(&f, "\n"); + str = fmtstrflush(&f); + write(2, str, strlen(str)); + exits(str); +} + +char *negstr = "negotiating authentication method"; + +char bug[256]; + +int +old9p(int fd) +{ + int p[2]; + + if(pipe(p) < 0) + fatal(1, "pipe"); + + switch(rfork(RFPROC|RFFDG|RFNAMEG)) { + case -1: + fatal(1, "rfork srvold9p"); + case 0: + if(fd != 1){ + dup(fd, 1); + close(fd); + } + if(p[0] != 0){ + dup(p[0], 0); + close(p[0]); + } + close(p[1]); + if(0){ + fd = open("/sys/log/cpu", OWRITE); + if(fd != 2){ + dup(fd, 2); + close(fd); + } + execl("/bin/srvold9p", "srvold9p", "-ds", nil); + } else + execl("/bin/srvold9p", "srvold9p", "-s", nil); + fatal(1, "exec srvold9p"); + default: + close(fd); + close(p[0]); + } + return p[1]; +} + +/* Invoked with stdin, stdout and stderr connected to the network connection */ +void +remoteside(int old) +{ + char user[MaxStr], home[MaxStr], buf[MaxStr], xdir[MaxStr], cmd[MaxStr]; + int i, n, fd, badchdir, gotcmd; + + rfork(RFENVG); + putenv("service", "cpu"); + fd = 0; + + /* negotiate authentication mechanism */ + n = readstr(fd, cmd, sizeof(cmd)); + if(n < 0) + fatal(1, "authenticating"); + if(setamalg(cmd) < 0){ + writestr(fd, "unsupported auth method", nil, 0); + fatal(1, "bad auth method %s", cmd); + } else + writestr(fd, "", "", 1); + + fd = (*am->sf)(fd, user); + if(fd < 0) + fatal(1, "srvauth"); + + /* Set environment values for the user */ + putenv("user", user); + sprint(home, "/usr/%s", user); + putenv("home", home); + + /* Now collect invoking cpu's current directory or possibly a command */ + gotcmd = 0; + if(readstr(fd, xdir, sizeof(xdir)) < 0) + fatal(1, "dir/cmd"); + if(xdir[0] == '!') { + strcpy(cmd, &xdir[1]); + gotcmd = 1; + if(readstr(fd, xdir, sizeof(xdir)) < 0) + fatal(1, "dir"); + } + + /* Establish the new process at the current working directory of the + * gnot */ + badchdir = 0; + if(strcmp(xdir, "NO") == 0) + chdir(home); + else if(chdir(xdir) < 0) { + badchdir = 1; + chdir(home); + } + + /* Start the gnot serving its namespace */ + writestr(fd, "FS", "FS", 0); + writestr(fd, "/", "exportfs dir", 0); + + n = read(fd, buf, sizeof(buf)); + if(n != 2 || buf[0] != 'O' || buf[1] != 'K') + exits("remote tree"); + + if(old) + fd = old9p(fd); + + /* make sure buffers are big by doing fversion explicitly; pick a huge number; other side will trim */ + strcpy(buf, VERSION9P); + if(fversion(fd, 64*1024, buf, sizeof buf) < 0) + exits("fversion failed"); + if(mount(fd, -1, "/mnt/term", MCREATE|MREPL, "") < 0) + exits("mount failed"); + + close(fd); + + /* the remote noteproc uses the mount so it must follow it */ + rmtnoteproc(); + + for(i = 0; i < 3; i++) + close(i); + + if(open("/mnt/term/dev/cons", OREAD) != 0) + exits("open stdin"); + if(open("/mnt/term/dev/cons", OWRITE) != 1) + exits("open stdout"); + dup(1, 2); + + if(badchdir) + print("cpu: failed to chdir to '%s'\n", xdir); + + if(gotcmd) + execl("/bin/rc", "rc", "-lc", cmd, nil); + else + execl("/bin/rc", "rc", "-li", nil); + fatal(1, "exec shell"); +} + +char* +rexcall(int *fd, char *host, char *service) +{ + char *na; + char dir[MaxStr]; + char err[ERRMAX]; + char msg[MaxStr]; + int n; + + na = netmkaddr(host, 0, service); + if((*fd = dial(na, 0, dir, 0)) < 0) + return "can't dial"; + + /* negotiate authentication mechanism */ + if(ealgs != nil) + snprint(msg, sizeof(msg), "%s %s", am->name, ealgs); + else + snprint(msg, sizeof(msg), "%s", am->name); + writestr(*fd, msg, negstr, 0); + n = readstr(*fd, err, sizeof err); + if(n < 0) + return negstr; + if(*err){ + werrstr(err); + return negstr; + } + + /* authenticate */ + *fd = (*am->cf)(*fd); + if(*fd < 0) + return "can't authenticate"; + return 0; +} + +void +writestr(int fd, char *str, char *thing, int ignore) +{ + int l, n; + + l = strlen(str); + n = write(fd, str, l+1); + if(!ignore && n < 0) + fatal(1, "writing network: %s", thing); +} + +int +readstr(int fd, char *str, int len) +{ + int n; + + while(len) { + n = read(fd, str, 1); + if(n < 0) + return -1; + if(*str == '\0') + return 0; + str++; + len--; + } + return -1; +} + +static int +readln(char *buf, int n) +{ + int i; + char *p; + + n--; /* room for \0 */ + p = buf; + for(i=0; ichal, "challenge", 1); + if(readstr(fd, response, sizeof response) < 0) + return -1; + ch->resp = response; + ch->nresp = strlen(response); + if((ai = auth_response(ch)) != nil) + break; + } + auth_freechal(ch); + if(ai == nil) + return -1; + writestr(fd, "", "challenge", 1); + if(auth_chuid(ai, 0) < 0) + fatal(1, "newns"); + auth_freeAI(ai); + return fd; +} + +static void +mksecret(char *t, uchar *f) +{ + sprint(t, "%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux", + f[0], f[1], f[2], f[3], f[4], f[5], f[6], f[7], f[8], f[9]); +} + +/* + * plan9 authentication followed by rc4 encryption + */ +static int +p9auth(int fd) +{ + uchar key[16]; + uchar digest[SHA1dlen]; + char fromclientsecret[21]; + char fromserversecret[21]; + int i; + AuthInfo *ai; + + ai = auth_proxy(fd, auth_getkey, "proto=%q role=client %s", p9authproto, keyspec); + if(ai == nil) + return -1; + memmove(key+4, ai->secret, ai->nsecret); + if(ealgs == nil) + return fd; + + /* exchange random numbers */ + srand(truerand()); + for(i = 0; i < 4; i++) + key[i] = rand(); + if(write(fd, key, 4) != 4) + return -1; + if(readn(fd, key+12, 4) != 4) + return -1; + + /* scramble into two secrets */ + sha1(key, sizeof(key), digest, nil); + mksecret(fromclientsecret, digest); + mksecret(fromserversecret, digest+10); + + /* set up encryption */ + i = pushssl(fd, ealgs, fromclientsecret, fromserversecret, nil); + if(i < 0) + werrstr("can't establish ssl connection: %r"); + return i; +} + +static int +noauth(int fd) +{ + ealgs = nil; + return fd; +} + +static int +srvnoauth(int fd, char *user) +{ + strecpy(user, user+MaxStr, getuser()); + ealgs = nil; + return fd; +} + +void +loghex(uchar *p, int n) +{ + char buf[100]; + int i; + + for(i = 0; i < n; i++) + sprint(buf+2*i, "%2.2ux", p[i]); + syslog(0, "cpu", buf); +} + +static int +srvp9auth(int fd, char *user) +{ + uchar key[16]; + uchar digest[SHA1dlen]; + char fromclientsecret[21]; + char fromserversecret[21]; + int i; + AuthInfo *ai; + + ai = auth_proxy(0, nil, "proto=%q role=server %s", p9authproto, keyspec); + if(ai == nil) + return -1; + if(auth_chuid(ai, nil) < 0) + return -1; + strecpy(user, user+MaxStr, ai->cuid); + memmove(key+4, ai->secret, ai->nsecret); + + if(ealgs == nil) + return fd; + + /* exchange random numbers */ + srand(truerand()); + for(i = 0; i < 4; i++) + key[i+12] = rand(); + if(readn(fd, key, 4) != 4) + return -1; + if(write(fd, key+12, 4) != 4) + return -1; + + /* scramble into two secrets */ + sha1(key, sizeof(key), digest, nil); + mksecret(fromclientsecret, digest); + mksecret(fromserversecret, digest+10); + + /* set up encryption */ + i = pushssl(fd, ealgs, fromserversecret, fromclientsecret, nil); + if(i < 0) + werrstr("can't establish ssl connection: %r"); + return i; +} + +/* + * set authentication mechanism + */ +int +setam(char *name) +{ + for(am = authmethod; am->name != nil; am++) + if(strcmp(am->name, name) == 0) + return 0; + am = authmethod; + return -1; +} + +/* + * set authentication mechanism and encryption/hash algs + */ +int +setamalg(char *s) +{ + ealgs = strchr(s, ' '); + if(ealgs != nil) + *ealgs++ = 0; + return setam(s); +} + +char *rmtnotefile = "/mnt/term/dev/cpunote"; + +/* + * loop reading /mnt/term/dev/note looking for notes. + * The child returns to start the shell. + */ +void +rmtnoteproc(void) +{ + int n, fd, pid, notepid; + char buf[256]; + + /* new proc returns to start shell */ + pid = rfork(RFPROC|RFFDG|RFNOTEG|RFNAMEG|RFMEM); + switch(pid){ + case -1: + syslog(0, "cpu", "cpu -R: can't start noteproc: %r"); + return; + case 0: + return; + } + + /* new proc reads notes from other side and posts them to shell */ + switch(notepid = rfork(RFPROC|RFFDG|RFMEM)){ + case -1: + syslog(0, "cpu", "cpu -R: can't start wait proc: %r"); + _exits(0); + case 0: + fd = open(rmtnotefile, OREAD); + if(fd < 0){ + syslog(0, "cpu", "cpu -R: can't open %s", rmtnotefile); + _exits(0); + } + + for(;;){ + n = read(fd, buf, sizeof(buf)-1); + if(n <= 0){ + postnote(PNGROUP, pid, "hangup"); + _exits(0); + } + buf[n] = 0; + postnote(PNGROUP, pid, buf); + } + } + + /* original proc waits for shell proc to die and kills note proc */ + for(;;){ + n = waitpid(); + if(n < 0 || n == pid) + break; + } + postnote(PNPROC, notepid, "kill"); + _exits(0); +} + +enum +{ + Qdir, + Qcpunote, + + Nfid = 32, +}; + +struct { + char *name; + Qid qid; + ulong perm; +} fstab[] = +{ + [Qdir] { ".", {Qdir, 0, QTDIR}, DMDIR|0555 }, + [Qcpunote] { "cpunote", {Qcpunote, 0}, 0444 }, +}; + +typedef struct Note Note; +struct Note +{ + Note *next; + char msg[ERRMAX]; +}; + +typedef struct Request Request; +struct Request +{ + Request *next; + Fcall f; +}; + +typedef struct Fid Fid; +struct Fid +{ + int fid; + int file; + int omode; +}; +Fid fids[Nfid]; + +struct { + Lock; + Note *nfirst, *nlast; + Request *rfirst, *rlast; +} nfs; + +int +fsreply(int fd, Fcall *f) +{ + uchar buf[IOHDRSZ+Maxfdata]; + int n; + + if(dbg) + fprint(2, "<-%F\n", f); + n = convS2M(f, buf, sizeof buf); + if(n > 0){ + if(write(fd, buf, n) != n){ + close(fd); + return -1; + } + } + return 0; +} + +/* match a note read request with a note, reply to the request */ +int +kick(int fd) +{ + Request *rp; + Note *np; + int rv; + + for(;;){ + lock(&nfs); + rp = nfs.rfirst; + np = nfs.nfirst; + if(rp == nil || np == nil){ + unlock(&nfs); + break; + } + nfs.rfirst = rp->next; + nfs.nfirst = np->next; + unlock(&nfs); + + rp->f.type = Rread; + rp->f.count = strlen(np->msg); + rp->f.data = np->msg; + rv = fsreply(fd, &rp->f); + free(rp); + free(np); + if(rv < 0) + return -1; + } + return 0; +} + +void +flushreq(int tag) +{ + Request **l, *rp; + + lock(&nfs); + for(l = &nfs.rfirst; *l != nil; l = &(*l)->next){ + rp = *l; + if(rp->f.tag == tag){ + *l = rp->next; + unlock(&nfs); + free(rp); + return; + } + } + unlock(&nfs); +} + +Fid* +getfid(int fid) +{ + int i, freefid; + + freefid = -1; + for(i = 0; i < Nfid; i++){ + if(freefid < 0 && fids[i].file < 0) + freefid = i; + if(fids[i].fid == fid) + return &fids[i]; + } + if(freefid >= 0){ + fids[freefid].fid = fid; + return &fids[freefid]; + } + return nil; +} + +int +fsstat(int fd, Fid *fid, Fcall *f) +{ + Dir d; + uchar statbuf[256]; + + memset(&d, 0, sizeof(d)); + d.name = fstab[fid->file].name; + d.uid = user; + d.gid = user; + d.muid = user; + d.qid = fstab[fid->file].qid; + d.mode = fstab[fid->file].perm; + d.atime = d.mtime = time(0); + f->stat = statbuf; + f->nstat = convD2M(&d, statbuf, sizeof statbuf); + return fsreply(fd, f); +} + +int +fsread(int fd, Fid *fid, Fcall *f) +{ + Dir d; + uchar buf[256]; + Request *rp; + + switch(fid->file){ + default: + return -1; + case Qdir: + if(f->offset == 0 && f->count >0){ + memset(&d, 0, sizeof(d)); + d.name = fstab[Qcpunote].name; + d.uid = user; + d.gid = user; + d.muid = user; + d.qid = fstab[Qcpunote].qid; + d.mode = fstab[Qcpunote].perm; + d.atime = d.mtime = time(0); + f->count = convD2M(&d, buf, sizeof buf); + f->data = (char*)buf; + } else + f->count = 0; + return fsreply(fd, f); + case Qcpunote: + rp = mallocz(sizeof(*rp), 1); + if(rp == nil) + return -1; + rp->f = *f; + lock(&nfs); + if(nfs.rfirst == nil) + nfs.rfirst = rp; + else + nfs.rlast->next = rp; + nfs.rlast = rp; + unlock(&nfs); + return kick(fd);; + } +} + +char Eperm[] = "permission denied"; +char Enofile[] = "out of files"; +char Enotdir[] = "not a directory"; + +void +notefs(int fd) +{ + uchar buf[IOHDRSZ+Maxfdata]; + int i, j, n, ncpunote; + char err[ERRMAX]; + Fcall f; + Fid *fid, *nfid; + int doreply; + + rfork(RFNOTEG); + fmtinstall('F', fcallfmt); + + for(n = 0; n < Nfid; n++){ + fids[n].file = -1; + fids[n].omode = -1; + } + + ncpunote = 0; + for(;;){ + n = read9pmsg(fd, buf, sizeof(buf)); + if(n <= 0){ + if(dbg) + fprint(2, "read9pmsg(%d) returns %d: %r\n", fd, n); + break; + } + if(convM2S(buf, n, &f) <= BIT16SZ) + break; + if(dbg) + fprint(2, "->%F\n", &f); + doreply = 1; + fid = getfid(f.fid); + if(fid == nil){ +nofids: + f.type = Rerror; + f.ename = Enofile; + fsreply(fd, &f); + continue; + } + switch(f.type++){ + default: + f.type = Rerror; + f.ename = "unknown type"; + break; + case Tflush: + flushreq(f.oldtag); + break; + case Tversion: + if(f.msize > IOHDRSZ+Maxfdata) + f.msize = IOHDRSZ+Maxfdata; + break; + case Tauth: + f.type = Rerror; + f.ename = "cpu: authentication not required"; + break; + case Tattach: + f.qid = fstab[Qdir].qid; + fid->file = Qdir; + break; + case Twalk: + nfid = nil; + if(f.newfid != f.fid){ + nfid = getfid(f.newfid); + if(nfid == nil) + goto nofids; + nfid->file = fid->file; + fid = nfid; + } + + f.ename = nil; + for(i=0; i MAXWELEM){ + f.type = Rerror; + f.ename = "too many name elements"; + break; + } + if(fid->file != Qdir){ + f.type = Rerror; + f.ename = Enotdir; + break; + } + if(strcmp(f.wname[i], "cpunote") == 0){ + fid->file = Qcpunote; + f.wqid[i] = fstab[Qcpunote].qid; + continue; + } + f.type = Rerror; + f.ename = err; + strcpy(err, "cpu: file \""); + for(j=0; j<=i; j++){ + if(strlen(err)+1+strlen(f.wname[j])+32 > sizeof err) + break; + if(j != 0) + strcat(err, "/"); + strcat(err, f.wname[j]); + } + strcat(err, "\" does not exist"); + break; + } + if(nfid != nil && (f.ename != nil || i < f.nwname)) + nfid ->file = -1; + if(f.type != Rerror) + f.nwqid = i; + break; + case Topen: + if(f.mode != OREAD){ + f.type = Rerror; + f.ename = Eperm; + } + fid->omode = f.mode; + if(fid->file == Qcpunote) + ncpunote++; + f.qid = fstab[fid->file].qid; + break; + case Tcreate: + f.type = Rerror; + f.ename = Eperm; + break; + case Tread: + if(fsread(fd, fid, &f) < 0) + goto err; + doreply = 0; + break; + case Twrite: + f.type = Rerror; + f.ename = Eperm; + break; + case Tclunk: + if(fid->omode != -1 && fid->file == Qcpunote){ + ncpunote--; + if(ncpunote == 0) /* remote side is done */ + goto err; + } + fid->file = -1; + fid->omode = -1; + break; + case Tremove: + f.type = Rerror; + f.ename = Eperm; + break; + case Tstat: + if(fsstat(fd, fid, &f) < 0) + goto err; + doreply = 0; + break; + case Twstat: + f.type = Rerror; + f.ename = Eperm; + break; + } + if(doreply) + if(fsreply(fd, &f) < 0) + break; + } +err: + if(dbg) + fprint(2, "notefs exiting: %r\n"); + werrstr("success"); + postnote(PNGROUP, exportpid, "kill"); + if(dbg) + fprint(2, "postnote PNGROUP %d: %r\n", exportpid); + close(fd); +} + +char notebuf[ERRMAX]; + +void +catcher(void*, char *text) +{ + int n; + + n = strlen(text); + if(n >= sizeof(notebuf)) + n = sizeof(notebuf)-1; + memmove(notebuf, text, n); + notebuf[n] = '\0'; + noted(NCONT); +} + +/* + * mount in /dev a note file for the remote side to read. + */ +void +lclnoteproc(int netfd) +{ + Waitmsg *w; + Note *np; + int pfd[2]; + int pid; + + if(pipe(pfd) < 0){ + fprint(2, "cpu: can't start note proc: pipe: %r\n"); + return; + } + + /* new proc mounts and returns to start exportfs */ + switch(pid = rfork(RFPROC|RFNAMEG|RFFDG|RFMEM)){ + default: + exportpid = pid; + break; + case -1: + fprint(2, "cpu: can't start note proc: rfork: %r\n"); + return; + case 0: + close(pfd[0]); + if(mount(pfd[1], -1, "/dev", MBEFORE, "") < 0) + fprint(2, "cpu: can't mount note proc: %r\n"); + close(pfd[1]); + return; + } + + close(netfd); + close(pfd[1]); + + /* new proc listens for note file system rpc's */ + switch(rfork(RFPROC|RFNAMEG|RFMEM)){ + case -1: + fprint(2, "cpu: can't start note proc: rfork1: %r\n"); + _exits(0); + case 0: + notefs(pfd[0]); + _exits(0); + } + + /* original proc waits for notes */ + notify(catcher); + w = nil; + for(;;) { + *notebuf = 0; + free(w); + w = wait(); + if(w == nil) { + if(*notebuf == 0) + break; + np = mallocz(sizeof(Note), 1); + if(np != nil){ + strcpy(np->msg, notebuf); + lock(&nfs); + if(nfs.nfirst == nil) + nfs.nfirst = np; + else + nfs.nlast->next = np; + nfs.nlast = np; + unlock(&nfs); + kick(pfd[0]); + } + unlock(&nfs); + } else if(w->pid == exportpid) + break; + } + + if(w == nil) + exits(nil); + exits(0); +/* exits(w->msg); */ +} diff -Nru 0/sys/src/nix/root/mkfile 4/sys/src/nix/root/mkfile --- 0/sys/src/nix/root/mkfile Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/root/mkfile Wed Feb 6 00:00:00 2013 @@ -0,0 +1,10 @@ +[2]/dev/null sed '' $f + }|sed -e '$!N;s/([^ ])$/\1/;ta' -e 'P;D;b' -e ':a;s/\n//' \ + |sed 's/ +/ /g;s/^([^ ]+) +([^ ]+) +([^ ]+) +([^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+ +[^ ]+) +([^ ]+)+ ([^ ]+ +[^ ]+)(.*)/\2 '^$i^' \5K \3 \1 \7/' +}} +fn netstat {@{ + for(p in tcp udp){ + cd /net/$p; + for(i in `{echo [0-9] [1-9][0-9] [1-9][0-9][0-9] [1-9][0-9][0-9]*|sed 's/\[.*\][ \*]//'}){ + echo -n $p' '$i' *owner* '; + cat $i/status $i/local $i/remote \ + | sed -n -e :a -e '$!N; s/ .*//; s/!/ /; s/\n/ /; ta; + s/([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+)/\1 \3 \5 \4/p' + } + } +}} + +status='' +if(! ~ $#* 0) . $* +. -i '#d/0' +exit $status diff -Nru 0/sys/src/nix/root/suck.c 4/sys/src/nix/root/suck.c --- 0/sys/src/nix/root/suck.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/root/suck.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,286 @@ +#include +#include + +#define ESTR 256 + +static void +error(char* fmt, ...) +{ + va_list v; + char *e, estr[ESTR], *p; + + va_start(v, fmt); + e = estr + ESTR; + p = seprint(estr, e, "%s: ", argv0); + p = vseprint(p, e, fmt, v); + p = seprint(p, e, "\n"); + va_end(v); + + write(2, estr, p-estr); +} + +static void +fatal(char* fmt, ...) +{ + va_list v; + char *e, estr[ESTR], *p; + + va_start(v, fmt); + e = estr + ESTR; + p = seprint(estr, e, "%s: ", argv0); + p = vseprint(p, e, fmt, v); + p = seprint(p, e, "\n"); + va_end(v); + + write(2, estr, p - estr); + exits("fatal"); +} + +static void +usage(void) +{ + char *e, estr[ESTR], *p; + + e = estr + ESTR; + p = seprint(estr, e, "usage: %s" + " [whatever]" + "\n", + argv0); + write(2, estr, p-estr); + exits("usage"); +} + +#define F(v, o, w) (((v) & ((1<<(w))-1))<<(o)) + +enum { + X = 0, /* dimension */ + Y = 1, + Z = 2, + N = 3, + + Chunk = 32, /* granularity of FIFO */ + Pchunk = 8, /* Chunks in a packet */ + + Quad = 16, +}; + +/* + * Packet header. The hardware requires an 8-byte header + * of which the last two are reserved (they contain a sequence + * number and a header checksum inserted by the hardware). + * The hardware also requires the packet to be aligned on a + * 128-bit boundary for loading into the HUMMER. + */ +typedef struct Tpkt Tpkt; +struct Tpkt { + u8int sk; /* Skip Checksum Control */ + u8int hint; /* Hint|Dp|Pid0 */ + u8int size; /* Size|Pid1|Dm|Dy|VC */ + u8int dst[N]; /* Destination Coordinates */ + u8int _6_[2]; /* reserved */ + u8int _8_[8]; /* protocol header */ + u8int payload[]; +}; + +/* + * SKIP is a field in .sk giving the number of 2-bytes + * to skip from the top of the packet before including + * the packet bytes into the running checksum. + * SIZE is a field in .size giving the size of the + * packet in 32-byte 'chunks'. + */ +#define SKIP(n) F(n, 1, 7) +#define SIZE(n) F(n, 5, 3) + +enum { + Sk = 0x01, /* Skip Checksum */ + + Pid0 = 0x01, /* Destination Group FIFO MSb */ + Dp = 0x02, /* Multicast Deposit */ + Hzm = 0x04, /* Z- Hint */ + Hzp = 0x08, /* Z+ Hint */ + Hym = 0x10, /* Y- Hint */ + Hyp = 0x20, /* Y+ Hint */ + Hxm = 0x40, /* X- Hint */ + Hxp = 0x80, /* X+ Hint */ + + Vcd0 = 0x00, /* Dynamic 0 VC */ + Vcd1 = 0x01, /* Dynamic 1 VC */ + Vcbn = 0x02, /* Deterministic Bubble VC */ + Vcbp = 0x03, /* Deterministic Priority VC */ + Dy = 0x04, /* Dynamic Routing */ + Dm = 0x08, /* DMA Mode */ + Pid1 = 0x10, /* Destination Group FIFO LSb */ +}; + +static int +torusparse(u8int d[3], char* item, char* buf) +{ + int n; + char *p; + + if((p = strstr(buf, item)) == nil || (p != buf && *(p-1) != '\n')) + return -1; + n = strlen(item); + if(strlen(p) < n+sizeof(": x 0 y 0 z 0")) + return -1; + p += n+sizeof(": x ")-1; + if(strncmp(p-4, ": x ", 4) != 0) + return -1; + if((n = strtol(p, &p, 0)) > 255 || *p != ' ' || *(p+1) != 'y') + return -1; + d[0] = n; + if((n = strtol(p+2, &p, 0)) > 255 || *p != ' ' || *(p+1) != 'z') + return -1; + d[1] = n; + if((n = strtol(p+2, &p, 0)) > 255 || (*p != '\n' && *p != '\0')) + return -1; + d[2] = n; + + return 0; +} + +static void +dumptpkt(Tpkt* tpkt, int hflag, int dflag) +{ + uchar *t; + int i, j, n; + char buf[512], *e, *p; + + n = ((tpkt->size>>5)+1) * Chunk; + + p = buf; + e = buf + sizeof(buf); + if(hflag){ + p = seprint(p, e, "Hw:"); +#ifdef notdef + p = seprint(p, e, " sk %#2.2ux (Skip %d Sk %d)", + tpkt->sk, tpkt->sk & Sk, tpkt->sk>>1); + p = seprint(p, e, " hint %#2.2ux", tpkt->hint); + p = seprint(p, e, " size %#2.2ux", tpkt->size); + p = seprint(p, e, " dst [%d, %d, %d]", + tpkt->dst[X], tpkt->dst[Y], tpkt->dst[Z]); + p = seprint(p, e, " _6_[0] %#2.2ux (seqno %d)", + tpkt->_6_[0], tpkt->_6_[0]); + p = seprint(p, e, " _6_[1] %#2.2ux (crc)\n", tpkt->_6_[1]); +#else + t = (uchar*)tpkt; + for(i = 0; i < 8; i++) + p = seprint(p, e, " %2.2ux", t[i]); + p = seprint(p, e, "\n"); +#endif /* notdef */ + + p = seprint(p, e, "Sw:"); + t = (uchar*)tpkt->_8_; + for(i = 0; i < 8; i++) + p = seprint(p, e, " %#2.2ux", t[i]); + print("%s\n", buf); + + } + + if(!dflag) + return; + + n -= sizeof(Tpkt); + for(i = 0; i < n; i += 16){ + p = seprint(buf, e, "%4.4ux:", i); + for(j = 0; j < 16; j++) + seprint(p, e, " %2.2ux", tpkt->payload[i+j]); + print("%s\n", buf); + } +} + +void +main(int argc, char* argv[]) +{ + Tpkt *tpkt; + u8int d[N]; + char buf[512], *p; + uvlong r, start, stop; + int count, dflag, fd, i, hflag, length, mhz, n; + + count = 1; + dflag = hflag = 0; + length = Pchunk*Chunk; + mhz = 700; + + ARGBEGIN{ + default: + usage(); + break; + case 'd': + dflag = 1; + break; + case 'h': + hflag = 1; + break; + case 'l': + p = EARGF(usage()); + if((n = strtol(argv[0], &p, 0)) <= 0 || p == argv[0] || *p != 0) + usage(); + if(n % Chunk) + usage(); + length = n; + if(length > Pchunk*Chunk){ + n = (n + (Pchunk*Chunk)-1)/(Pchunk*Chunk); + length += (n-1) * sizeof(Tpkt); + } + break; + case 'm': + p = EARGF(usage()); + if((n = strtol(argv[0], &p, 0)) <= 0 || p == argv[0] || *p != 0) + usage(); + mhz = n; + break; + case 'n': + p = EARGF(usage()); + if((n = strtol(argv[0], &p, 0)) <= 0 || p == argv[0] || *p != 0) + usage(); + count = n; + break; + }ARGEND; + + if((fd = open("/dev/torusstatus", OREAD)) < 0) + fatal("open /dev/torusstatus: %r\n"); + if((n = read(fd, buf, sizeof(buf))) < 0) + fatal("read /dev/torusstatus: %r\n"); + close(fd); + buf[n] = 0; + + if(torusparse(d, "addr", buf) < 0) + fatal("parse /dev/torusstatus: <%s>\n", buf); + print("addr: %d.%d.%d\n", d[X], d[Y], d[Z]); + if(torusparse(d, "size", buf) < 0) + fatal("parse /dev/torusstatus: <%s>\n", buf); + print("size: %d.%d.%d\n", d[X], d[Y], d[Z]); + + if((tpkt = mallocalign(length, Chunk, 0, 0)) == nil) + fatal("mallocalign tpkt\n"); + + if((fd = open("/dev/torus", ORDWR)) < 0) + fatal("open /dev/torus: %r\n"); + + print("starting %d reads of %d\n", count, length); + + r = count*length; + + cycles(&start); + for(i = 0; i < r; i += n){ + if((n = pread(fd, tpkt, length, 0)) < 0) + fatal("read /dev/torus: %r\n", n); + if(hflag || dflag) + dumptpkt(tpkt, hflag, dflag); + } + cycles(&stop); + + close(fd); + + r = (count*length); + r *= mhz; + r /= stop - start; + + print("%d reads in %llud cycles @ %dMHz = %llud MB/s\n", + i, stop - start, mhz, r); + + exits(0); +} diff -Nru 0/sys/src/nix/root/tcp23 4/sys/src/nix/root/tcp23 --- 0/sys/src/nix/root/tcp23 Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/root/tcp23 Wed Feb 6 00:00:00 2013 @@ -0,0 +1,2 @@ +#!/bin/rc +exec /bin/ip/telnetd -at $* diff -Nru 0/sys/src/nix/root/tcp564 4/sys/src/nix/root/tcp564 --- 0/sys/src/nix/root/tcp564 Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/root/tcp564 Wed Feb 6 00:00:00 2013 @@ -0,0 +1,2 @@ +#!/bin/rc +exec exportfs -r / diff -Nru 0/sys/src/nix/test/1/kern 4/sys/src/nix/test/1/kern --- 0/sys/src/nix/test/1/kern Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/test/1/kern Wed Feb 6 00:00:00 2013 @@ -0,0 +1,19 @@ +#!/bin/rc + +rfork ne + +# import rc functions popular among scripts, e.g. fail +# +. ../tools + +# we might have a main.c file here and do something like: +# bind main.c ../../k10/main.c +# to override the source used for this test + +# we might change the std source like this: +# sed 's/initialTCs = [0-9]+/initialTCs = 16/' < ../../k10/main.c >main.c +# bind main.c ../../k10/main.c + +cd /sys/src/nix/k10 +mk clean +mk install diff -Nru 0/sys/src/nix/test/1/koutput 4/sys/src/nix/test/1/koutput --- 0/sys/src/nix/test/1/koutput Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/test/1/koutput Wed Feb 6 00:00:00 2013 @@ -0,0 +1,193 @@ +rm -f *.[5678qv] *.root.[cs] *.out *.m *.acid errstr.h init.h amd64^l.h +for(i in k8cpu) + mk $i.clean +@{cd ../root; mk clean} +if(test -d ./root) @{cd ./root; mk clean}; status='' +rm -f k8cpu.c [9bz]k8cpu [9bz]k8cpu.gz bootk8cpu.* +rm -f *.[5678qv] *.a[5678qv] y.tab.? lex.yy.c y.debug y.output [5678qv].??* big bigloop blow cpu suck $CLEANFILES +rm -f *.[5678qv] *.a[5678qv] y.tab.? lex.yy.c y.debug y.output [5678qv].??* cpu $CLEANFILES +awk -f ../mk/parse -- -mkdevc k8cpu > k8cpu.c +rc ../mk/mkenum amd64.h > amd64l.h +6a $AFLAGS l64cpuid.s +6a $AFLAGS l64fpu.s +6c -FTVw -I. -D'_DBGC_=''c''' ./acore.c +6c -FTVw -I. -D'_DBGC_=''V''' ./arch.c +6c -FTVw archk10.c +6c -FTVw -I. -D'_DBGC_=''m''' ./asm.c +6c -FTVw cga.c +6c -FTVw crap.c +6c -FTVw fpu.c +6c -FTVw i8254.c +6c -FTVw i8259.c +6c -FTVw -I. ../386/kbd.c +6c -FTVw init9.c +6c -FTVw -I. ../port/initcode.c +6c -FTVw map.c +6c -FTVw -I. -D'_DBGC_=''m''' ./memory.c +6c -FTVw -I. -D'_DBGC_=''v''' ./mmu.c +6c -FTVw multiboot.c +6c -FTVw qmalloc.c +6c -FTVw -I. ../386/random.c +6c -FTVw syscall.c +6c -FTVw -I. -D'_DBGC_=''c''' ./tcore.c +6c -FTVw trap.c +6c -FTVw vsvm.c +6c -FTVw -I. -D'_DBGC_=''m''' ./physalloc.c +6c -FTVw -I. ../port/print.c +6c -I../boot -FTVw ../boot/bootauth.c +6c -I../boot -FTVw ../boot/aux.c +6c -I../boot -FTVw ../boot/boot.c +6c -I../boot -FTVw ../boot/bootcache.c +6c -I../boot -FTVw ../boot/bootip.c +6c -I../boot -FTVw ../boot/local.c +6c -I../boot -FTVw ../boot/embed.c +6c -I../boot -FTVw ../boot/settime.c +6c -I../boot -FTVw ../boot/sac.c +6c -I../boot -FTVw ../boot/paq.c +6c -I../boot -FTVw ../boot/printstub.c +6c -FTVw -I. ../386/ether8169.c +6c -FTVw -I. ../386/devrtc.c +6c -FTVw -I. ../port/netif.c +6c -FTVw -I. ../port/devssl.c +6c -FTVw -I. ../ip/ip.c +6c -FTVw -I. ../ip/ipv6.c +6c -FTVw -I. ../386/pci.c +6c -FTVw ether82563.c +6c -FTVw -I. ../ip/devip.c +6c -FTVw -I. ../port/devuart.c +6c -FTVw -I. ../port/ethermii.c +6c -FTVw -I. ../386/etherigbe.c +6c -FTVw -I. ../port/devproc.c +6c -FTVw -I. ../port/devkprof.c +6c -FTVw -I. ../ip/netlog.c +6c -FTVw -I. ../ip/nullmedium.c +6c -FTVw -I. ../386/ether82557.c +6c -FTVw -I. ../ip/tcp.c +6c -FTVw -I. -D'_DBGC_=''I''' ./ioapic.c +6c -FTVw -I. ../port/devsrv.c +6c -FTVw -I. ../ip/ipaux.c +6c -FTVw -I. ../port/devenv.c +6c -FTVw -I. ../port/devdup.c +6c -FTVw -I. ../port/devpipe.c +6c -FTVw -I. ../ip/arp.c +6c -FTVw -I. ../386/uartpci.c +6c -FTVw -I. -D'_DBGC_=''M''' ./mp.c +6c -FTVw -I. -D'_DBGC_=''T''' ../port/devtube.c +6c -FTVw pmcio.c +6c -FTVw -I. -D'_DBGC_=''z''' ../port/devsegment.c +6c -FTVw -I. ../ip/icmp.c +6c -FTVw -I. ../ip/icmp6.c +6c -FTVw -I. ../ip/ptclbsum.c +6c -FTVw -I. ../ip/ethermedium.c +6c -FTVw -I. ../ip/ipifc.c +6c -FTVw -I. ../386/devether.c +6c -FTVw -I. ../386/uarti8250.c +6c -FTVw -I. ../ip/iproute.c +6c -FTVw devarch.c +6c -FTVw -I. ../ip/inferno.c +6c -FTVw -I. ../port/devmnt.c +6c -FTVw -I. -D'_DBGC_=''C''' ./devacpi.c +6c -FTVw -I. ../ip/chandial.c +6c -FTVw -I. ../ip/netdevmedium.c +6c -FTVw -I. -D'_DBGC_=''A''' ./apic.c +warning: ../port/devtube.c:135 empty if body +warning: ../port/devtube.c:137 empty if body +6c -FTVw -I. ../port/devcap.c +6c -FTVw -I. ../port/devws.c +6c -FTVw -I. ../ip/loopbackmedium.c +6c -FTVw -I. ../port/devcons.c +6c -FTVw -I. ../ip/pktmedium.c +6c -FTVw -I. ../ip/udp.c +6c -FTVw -I. -D'_DBGC_=''z''' ../port/devzp.c +6c -FTVw -I. ../port/devroot.c +6c -FTVw -I. ../port/devpmc.c +6c -FTVw -I. ../port/tod.c +6c -FTVw -I. ../port/sysauth.c +6c -FTVw -I. -D'_DBGC_=''p''' ../port/pager.c +6c -FTVw -I. ../port/edf.c +6c -FTVw -I. ../port/latin1.c +6c -FTVw -I. -D'_DBGC_=''z''' ../port/syszio.c +6c -FTVw -I. ../port/segment.c +6c -FTVw -I. ../port/allocb.c +6c -FTVw -I. -D'_DBGC_=''n''' ../port/nixcall.c +6c -FTVw -I. ../port/systab.c +6c -FTVw -I. ../port/qio.c +awk -f ../mk/parse -- -mkerrstr > errstr.h +6c -FTVw -I. ../port/ps.c +6c -FTVw -I. ../port/fault.c +6c -FTVw -I. -D'_DBGC_=''p''' ../port/image.c +6c -FTVw -I. -D'_DBGC_=''p''' ../port/page.c +6c -FTVw -I. -D'_DBGC_=''p''' ../port/sysseg.c +6c -FTVw -I. ../port/parse.c +6c -FTVw -I. ../port/devtab.c +6c -FTVw -I. -D'_DBGC_=''S''' ../port/syssem.c +6c -FTVw -I. ../port/dev.c +6c -FTVw -I. ../port/rebootcmd.c +6c -FTVw -I. -D'_DBGC_=''E''' ../port/sysproc.c +6c -FTVw -I. ../port/portclock.c +6c -FTVw -I. ../port/chan.c +6c -FTVw -I. ../port/syscallfmt.c +6c -FTVw -I. ../port/pgrp.c +6c -FTVw -I. ../port/qlock.c +6c -FTVw -I. ../port/alarm.c +6c -FTVw -I. ../port/sysfile.c +6c -FTVw -I. ../port/taslock.c +6a $AFLAGS l32p.s +6a $AFLAGS l64v.s +6a $AFLAGS l64idt.s +6a $AFLAGS l64acidt.s +6a $AFLAGS l64syscall.s +6a $AFLAGS l64acsyscall.s +6a $AFLAGS l64sipi.s +6l -l -R1 -s -o init.out init9.6 initcode.6 -lc +6c -FTVw -I. ../port/proc.c +6l -o l64sipi.out -T0xfffffffff0003000 -R4 -l -s l64sipi.6 +{echo 'uchar sipihandler[]={' + xd -1x l64sipi.out | + sed -e 's/^[0-9a-f]+ //' \ + -e '1,2d' -e '3s/^ .. .. .. .. .. .. .. ..//' \ + -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g' + echo '};'} > sipi.h +{echo 'uchar initcode[]={' + xd -1x init.out | sed -e 's/^[0-9a-f]+ //' -e 's/ ([0-9a-f][0-9a-f])/0x\1,/g' + echo '};'} > init.h +names=`{membername $newprereq} +ar vu ../boot/libboot.a6 $names +rm $names +6c -FTVw sipi.c +6c -FTVw -I. -D'_DBGC_=''x''' ./main.c +r - bootauth.6 +r - aux.6 +r - boot.6 +r - bootcache.6 +r - bootip.6 +r - local.6 +r - embed.6 +r - settime.6 +r - sac.6 +r - paq.6 +r - printstub.6 +awk -f ../mk/parse -- -mkbootconf k8cpu > bootk8cpu.c +6c -FTVw bootk8cpu.c +6c -FTVw ../boot/printstub.c +6l -o bootk8cpu.out bootk8cpu.6 ../boot/libboot.a6 printstub.6 +../mk/mkrootall\ + boot bootk8cpu_out bootk8cpu.out\ + factotum _amd64_bin_auth_factotum /amd64/bin/auth/factotum\ + ipconfig _amd64_bin_ip_ipconfig /amd64/bin/ip/ipconfig\ + nvram ___root_nvram ../root/nvram\ +>k8cpu.root.s +6a $AFLAGS k8cpu.root.s +6c -FTVw '-DKERNDATE='`{date -n} k8cpu.c +6l -o 9k8cpu -T0xfffffffff0110000 -R4096 -l l32p.6 l64v.6 l64idt.6 l64acidt.6 l64cpuid.6 l64syscall.6 l64acsyscall.6 l64fpu.6 acore.6 arch.6 archk10.6 asm.6 cga.6 crap.6 fpu.6 i8254.6 i8259.6 kbd.6 main.6 map.6 memory.6 mmu.6 multiboot.6 qmalloc.6 random.6 syscall.6 tcore.6 trap.6 vsvm.6 physalloc.6 k8cpu.root.6 ether8169.6 devrtc.6 netif.6 devssl.6 ip.6 ipv6.6 pci.6 ether82563.6 devip.6 devuart.6 ethermii.6 etherigbe.6 devproc.6 devkprof.6 netlog.6 nullmedium.6 ether82557.6 tcp.6 ioapic.6 devsrv.6 ipaux.6 devenv.6 devdup.6 devpipe.6 arp.6 uartpci.6 mp.6 devtube.6 pmcio.6 devsegment.6 icmp.6 sipi.6 icmp6.6 ptclbsum.6 ethermedium.6 ipifc.6 devether.6 uarti8250.6 iproute.6 devarch.6 inferno.6 devmnt.6 devacpi.6 chandial.6 netdevmedium.6 apic.6 devcap.6 devws.6 loopbackmedium.6 devcons.6 pktmedium.6 udp.6 devzp.6 devroot.6 devpmc.6 tod.6 sysauth.6 pager.6 edf.6 latin1.6 syszio.6 segment.6 allocb.6 nixcall.6 systab.6 qio.6 proc.6 ps.6 fault.6 image.6 page.6 sysseg.6 parse.6 devtab.6 syssem.6 dev.6 rebootcmd.6 sysproc.6 portclock.6 chan.6 syscallfmt.6 pgrp.6 qlock.6 alarm.6 print.6 sysfile.6 taslock.6 k8cpu.6 /amd64/lib/libip.a /amd64/lib/libc.a /amd64/lib/libsec.a +6l -o 9k8cpu.elf -H5 -8 -T0xfffffffff0110000 -l l32p.6 l64v.6 l64idt.6 l64acidt.6 l64cpuid.6 l64syscall.6 l64acsyscall.6 l64fpu.6 acore.6 arch.6 archk10.6 asm.6 cga.6 crap.6 fpu.6 i8254.6 i8259.6 kbd.6 main.6 map.6 memory.6 mmu.6 multiboot.6 qmalloc.6 random.6 syscall.6 tcore.6 trap.6 vsvm.6 physalloc.6 k8cpu.root.6 ether8169.6 devrtc.6 netif.6 devssl.6 ip.6 ipv6.6 pci.6 ether82563.6 devip.6 devuart.6 ethermii.6 etherigbe.6 devproc.6 devkprof.6 netlog.6 nullmedium.6 ether82557.6 tcp.6 ioapic.6 devsrv.6 ipaux.6 devenv.6 devdup.6 devpipe.6 arp.6 uartpci.6 mp.6 devtube.6 pmcio.6 devsegment.6 icmp.6 sipi.6 icmp6.6 ptclbsum.6 ethermedium.6 ipifc.6 devether.6 uarti8250.6 iproute.6 devarch.6 inferno.6 devmnt.6 devacpi.6 chandial.6 netdevmedium.6 apic.6 devcap.6 devws.6 loopbackmedium.6 devcons.6 pktmedium.6 udp.6 devzp.6 devroot.6 devpmc.6 tod.6 sysauth.6 pager.6 edf.6 latin1.6 syszio.6 segment.6 allocb.6 nixcall.6 systab.6 qio.6 proc.6 ps.6 fault.6 image.6 page.6 sysseg.6 parse.6 devtab.6 syssem.6 dev.6 rebootcmd.6 sysproc.6 portclock.6 chan.6 syscallfmt.6 pgrp.6 qlock.6 alarm.6 print.6 sysfile.6 taslock.6 k8cpu.6 /amd64/lib/libip.a /amd64/lib/libc.a /amd64/lib/libsec.a +size 9k8cpu +560858t + 560160d + 192416b = 1313434 9k8cpu +strip -o /fd/1 9k8cpu | gzip -9 > 9k8cpu.gz +--r-xrwxr-x M 9 nemo sys 1410811 Jan 18 11:26 /amd64/9k8cpu +--r--rw-r-- M 9 nemo sys 461939 Jan 18 11:26 /amd64/9k8cpu.gz +--rwxrwxr-x M 13 nemo sys 1410811 Jan 18 11:26 /n/9/amd64/9k8cpu +--rw-rw-r-- M 13 nemo sys 461939 Jan 18 11:26 /n/9/amd64/9k8cpu.gz +--r-xrwxr-x M 15 esoriano sys 1410811 Jan 18 11:26 /n/planb/amd64/9k8cpu +--r--rw-r-- M 15 rminnich sys 461939 Jan 18 11:26 /n/planb/amd64/9k8cpu.gz +done diff -Nru 0/sys/src/nix/test/1/main.c 4/sys/src/nix/test/1/main.c --- 0/sys/src/nix/test/1/main.c Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/test/1/main.c Wed Feb 6 00:00:00 2013 @@ -0,0 +1,571 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" + +#include "init.h" +#include "io.h" + +static int initialTCs = 16;; /* default # of TCs */ + +Conf conf; /* XXX - must go - gag */ + +extern void crapoptions(void); /* XXX - must go */ +extern void confsetenv(void); /* XXX - must go */ + +static uintptr sp; /* XXX - must go - user stack of init proc */ + +uintptr kseg0 = KZERO; +Sys* sys = nil; +usize sizeofSys = sizeof(Sys); + +/* + * Option arguments from the command line. + * oargv[0] is the boot file. + * Optionsinit() is called from multiboot() to + * set it all up. + */ +static int oargc; +static char* oargv[20]; +static char oargb[128]; +static int oargblen; + +static int maxcores = 1024; /* max # of cores given as an argument */ + +char dbgflg[256]; +static int vflag = 0; + +void +optionsinit(char* s) +{ + oargblen = strecpy(oargb, oargb+sizeof(oargb), s) - oargb; + oargc = tokenize(oargb, oargv, nelem(oargv)-1); + oargv[oargc] = nil; +} + +static void +options(int argc, char* argv[]) +{ + char *p; + int n, o; + + /* + * Process flags. + * Flags [A-Za-z] may be optionally followed by + * an integer level between 1 and 127 inclusive + * (no space between flag and level). + * '--' ends flag processing. + */ + while(--argc > 0 && (*++argv)[0] == '-' && (*argv)[1] != '-'){ + while(o = *++argv[0]){ + if(!(o >= 'A' && o <= 'Z') && !(o >= 'a' && o <= 'z')) + continue; + n = strtol(argv[0]+1, &p, 0); + if(p == argv[0]+1 || n < 1 || n > 127) + n = 1; + argv[0] = p-1; + dbgflg[o] = n; + } + } + vflag = dbgflg['v']; + if(argc > 0){ + maxcores = strtol(argv[0], 0, 0); + argc--; + argv++; + } + if(argc > 0){ + initialTCs = strtol(argv[0], 0, 0); + //argc--; + //argv++; + } +} + +extern void setmachsched(Mach*); + +void +squidboy(int apicno) +{ + char *n[] = { + [NIXAC] "AC", + [NIXTC] "TC", + [NIXKC] "KC" + }; + vlong hz; + + sys->machptr[m->machno] = m; + setmachsched(m); + /* + * Need something for initial delays + * until a timebase is worked out. + */ + m->cpuhz = 2000000000ll; + m->cpumhz = 2000; + m->perf.period = 1; + + m->nixtype = NIXAC; + + DBG("Hello Squidboy %d %d\n", apicno, m->machno); + + vsvminit(MACHSTKSZ, m->nixtype); + + /* + * Beware the Curse of The Non-Interruptable Were-Temporary. + */ + hz = archhz(); + if(hz == 0) + ndnr(); + m->cpuhz = hz; + m->cyclefreq = hz; + m->cpumhz = hz/1000000ll; + + mmuinit(); + if(!apiconline()) + ndnr(); + fpuinit(); + + acmodeset(m->nixtype); + m->splpc = 0; + m->online = 1; + + /* + * CAUTION: no time sync done, etc. + */ + DBG("Wait for the thunderbirds!\n"); + while(!active.thunderbirdsarego) + ; + wrmsr(0x10, sys->epoch); + m->rdtsc = rdtsc(); + + print("cpu%d color %d role %s tsc %lld\n", + m->machno, corecolor(m->machno), n[m->nixtype], m->rdtsc); + switch(m->nixtype){ + case NIXAC: + acmmuswitch(); + acinit(); + adec(&active.nbooting); + ainc(&active.nonline); /* this was commented out */ + acsched(); + panic("squidboy"); + break; + case NIXTC: + /* + * We only need the idt and syscall entry point actually. + * At boot time the boot processor might set our role after + * we have decided to become an AC. + */ + vsvminit(MACHSTKSZ, NIXTC); + + /* + * Enable the timer interrupt. + */ + apicpri(0); + + timersinit(); + adec(&active.nbooting); + ainc(&active.nonline); /* this was commented out */ + + schedinit(); + break; + } + panic("squidboy returns (type %d)", m->nixtype); +} + +static void +testiccs(void) +{ + int i; + Mach *mp; + extern void testicc(int); + + /* setup arguments for all */ + for(i = 1; i < MACHMAX; i++) + if((mp = sys->machptr[i]) != nil && mp->online != 0) + if(mp->nixtype == NIXAC) + testicc(i); + print("bootcore: all cores done\n"); +} + +/* + * Rendezvous with other cores. Set roles for those that came + * up online, and wait until they are initialized. + * Sync TSC with them. + * We assume other processors that could boot had time to + * set online to 1 by now. + */ +static void +nixsquids(void) +{ + Mach *mp; + int i; + uvlong now, start; + + for(i = 1; i < MACHMAX; i++) + if((mp = sys->machptr[i]) != nil && mp->online != 0){ + /* + * Inter-core calls. A ensure *mp->iccall and mp->icargs + * go into different cache lines. + */ + mp->icc = mallocalign(sizeof *m->icc, ICCLNSZ, 0, 0); + mp->icc->fn = nil; + if(i < initialTCs){ + conf.nmach++; + mp->nixtype = NIXTC; + } + ainc(&active.nbooting); + } + sys->epoch = rdtsc(); + mfence(); + wrmsr(0x10, sys->epoch); + m->rdtsc = rdtsc(); + active.thunderbirdsarego = 1; + start = fastticks2us(fastticks(nil)); + do{ + now = fastticks2us(fastticks(nil)); + }while(active.nbooting > 0 && now - start < 1000000) + ; + if(active.nbooting > 0) + print("cpu0: %d cores couldn't start\n", active.nbooting); + active.nbooting = 0; +} + +void +DONE(void) +{ + print("DONE\n"); + prflush(); + delay(10000); + ndnr(); +} + +void +HERE(void) +{ + print("here\n"); + prflush(); + delay(5000); +} + +void +main(u32int ax, u32int bx) +{ + vlong hz; + + memset(edata, 0, end - edata); + + /* + * ilock via i8250enable via i8250console + * needs m->machno, sys->machptr[] set, and + * also 'up' set to nil. + */ + cgapost(sizeof(uintptr)*8); + memset(m, 0, sizeof(Mach)); + m->machno = 0; + m->online = 1; + m->nixtype = NIXTC; + sys->machptr[m->machno] = &sys->mach; + m->stack = PTR2UINT(sys->machstk); + m->vsvm = sys->vsvmpage; + up = nil; + active.nonline = 1; + active.exiting = 0; + active.nbooting = 0; + asminit(); + multiboot(ax, bx, 0); + options(oargc, oargv); + crapoptions(); + + /* + * Need something for initial delays + * until a timebase is worked out. + */ + m->cpuhz = 2000000000ll; + m->cpumhz = 2000; + + cgainit(); + i8250console("0"); + consputs = cgaconsputs; + + vsvminit(MACHSTKSZ, NIXTC); + + conf.nmach = 1; + + fmtinit(); + print("\nNIX\n"); + if(vflag){ + print("&ax = %#p, ax = %#ux, bx = %#ux\n", &ax, ax, bx); + multiboot(ax, bx, vflag); + } + + m->perf.period = 1; + if((hz = archhz()) != 0ll){ + m->cpuhz = hz; + m->cyclefreq = hz; + m->cpumhz = hz/1000000ll; + } + + /* + * Mmuinit before meminit because it + * flushes the TLB via m->pml4->pa. + */ + mmuinit(); + + ioinit(); + kbdinit(); + meminit(); + confinit(); + archinit(); + mallocinit(); + + /* + * Acpiinit will cause the first malloc + * call to happen. + * If the system dies here it's probably due + * to malloc not being initialised + * correctly, or the data segment is misaligned + * (it's amazing how far you can get with + * things like that completely broken). + */ + acpiinit(); + + umeminit(); + trapinit(); + printinit(); + + /* + * This is necessary with GRUB and QEMU. + * Without it an interrupt can occur at a weird vector, + * because the vector base is likely different, causing + * havoc. Do it before any APIC initialisation. + */ + i8259init(32); + + + procinit0(); + mpsinit(maxcores); + apiconline(); + sipi(); + + timersinit(); + kbdenable(); + fpuinit(); + psinit(conf.nproc); + initimage(); + links(); + devtabreset(); + pageinit(); + swapinit(); + userinit(); + nixsquids(); +testiccs(); +print("schedinit...\n"); + schedinit(); +} + +void +init0(void) +{ + char buf[2*KNAMELEN]; + + up->nerrlab = 0; + +// if(consuart == nil) +// i8250console("0"); + spllo(); + + /* + * These are o.k. because rootinit is null. + * Then early kproc's will have a root and dot. + */ + up->slash = namec("#/", Atodir, 0, 0); + pathclose(up->slash->path); + up->slash->path = newpath("/"); + up->dot = cclone(up->slash); + + devtabinit(); + + if(!waserror()){ + snprint(buf, sizeof(buf), "%s %s", "AMD64", conffile); + ksetenv("terminal", buf, 0); + ksetenv("cputype", "amd64", 0); + if(cpuserver) + ksetenv("service", "cpu", 0); + else + ksetenv("service", "terminal", 0); + ksetenv("pgsz", "2097152", 0); + confsetenv(); + poperror(); + } + kproc("alarm", alarmkproc, 0); + touser(sp); +} + +void +bootargs(uintptr base) +{ + int i; + ulong ssize; + char **av, *p; + + /* + * Push the boot args onto the stack. + * Make sure the validaddr check in syscall won't fail + * because there are fewer than the maximum number of + * args by subtracting sizeof(up->arg). + */ + i = oargblen+1; + p = UINT2PTR(STACKALIGN(base + BIGPGSZ - sizeof(up->arg) - i)); + memmove(p, oargb, i); + + /* + * Now push argc and the argv pointers. + * This isn't strictly correct as the code jumped to by + * touser in init9.[cs] calls startboot (port/initcode.c) which + * expects arguments + * startboot(char* argv0, char* argv[]) + * not the usual (int argc, char* argv[]), but argv0 is + * unused so it doesn't matter (at the moment...). + */ + av = (char**)(p - (oargc+2)*sizeof(char*)); + ssize = base + BIGPGSZ - PTR2UINT(av); + *av++ = (char*)oargc; + for(i = 0; i < oargc; i++) + *av++ = (oargv[i] - oargb) + (p - base) + (USTKTOP - BIGPGSZ); + *av = nil; + + sp = USTKTOP - ssize; +} + +void +userinit(void) +{ + Proc *p; + Segment *s; + KMap *k; + Page *pg; + + p = newproc(); + p->pgrp = newpgrp(); + p->egrp = smalloc(sizeof(Egrp)); + p->egrp->ref = 1; + p->fgrp = dupfgrp(nil); + p->rgrp = newrgrp(); + p->procmode = 0640; + + kstrdup(&eve, ""); + kstrdup(&p->text, "*init*"); + kstrdup(&p->user, eve); + + /* + * Kernel Stack + * + * N.B. make sure there's enough space for syscall to check + * for valid args and + * space for gotolabel's return PC + * AMD64 stack must be quad-aligned. + */ + p->sched.pc = PTR2UINT(init0); + p->sched.sp = PTR2UINT(p->kstack+KSTACK-sizeof(up->arg)-sizeof(uintptr)); + p->sched.sp = STACKALIGN(p->sched.sp); + + /* + * User Stack + * + * Technically, newpage can't be called here because it + * should only be called when in a user context as it may + * try to sleep if there are no pages available, but that + * shouldn't be the case here. + */ + s = newseg(SG_STACK, USTKTOP-USTKSIZE, USTKSIZE/BIGPGSZ); + p->seg[SSEG] = s; + + pg = newpage(1, 0, USTKTOP-BIGPGSZ, BIGPGSZ, -1); + segpage(s, pg); + k = kmap(pg); + bootargs(VA(k)); + kunmap(k); + + /* + * Text + */ + s = newseg(SG_TEXT, UTZERO, 1); + s->flushme++; + p->seg[TSEG] = s; + pg = newpage(1, 0, UTZERO, BIGPGSZ, -1); + memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl)); + segpage(s, pg); + k = kmap(s->map[0]->pages[0]); + memmove(UINT2PTR(VA(k)), initcode, sizeof initcode); + kunmap(k); + + ready(p); +} + +void +confinit(void) +{ + int i; + + conf.npage = 0; + for(i=0; i 1000) + conf.nproc = 1000; + conf.nimage = 200; + conf.nswap = conf.nproc*80; + conf.nswppo = 4096; +} + +static void +shutdown(int ispanic) +{ + int ms, once; + + lock(&active); + if(ispanic) + active.ispanic = ispanic; + else if(m->machno == 0 && m->online == 0) + active.ispanic = 0; + once = m->online; + m->online = 0; + adec(&active.nonline); + active.exiting = 1; + unlock(&active); + + if(once) + iprint("cpu%d: exiting\n", m->machno); + + spllo(); + for(ms = 5*1000; ms > 0; ms -= TK2MS(2)){ + delay(TK2MS(2)); + if(active.nonline == 0 && consactive() == 0) + break; + } + + if(active.ispanic && m->machno == 0){ + if(cpuserver) + delay(30000); + else + for(;;) + halt(); + } + else + delay(1000); +} + +void +reboot(void*, void*, long) +{ + panic("reboot\n"); +} + +void +exit(int ispanic) +{ + shutdown(ispanic); + archreset(); +} diff -Nru 0/sys/src/nix/test/1/runtest 4/sys/src/nix/test/1/runtest --- 0/sys/src/nix/test/1/runtest Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/test/1/runtest Wed Feb 6 00:00:00 2013 @@ -0,0 +1,12 @@ +#!/bin/rc + +rfork ne + +# import rc functions popular among scripts, e.g. fail +# +. ../tools + +# make sure we have 32 cores running +ncores=`{wc -l /dev/sysstat | sed 's,/.*,,'} +~ $ncores 32 || fail does not have 32 cores + diff -Nru 0/sys/src/nix/test/README 4/sys/src/nix/test/README --- 0/sys/src/nix/test/README Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/test/README Wed Feb 6 00:00:00 2013 @@ -0,0 +1,45 @@ +This is regression testing for nix. +Each directory named with a number represents a single test. +(e.g., 1/ 2/ ...) +A test directory may be also named k1, k2, k.... +All k tests run affter all std. tests. They are meant to install ad-hoc +kernels for testing. + +The script actually running the tests is runtests, which should be run from +cpustart during the machine boot process. +That is, include + test -x /cfg/$sysname/runtests && /cfg/$sysname/runtests +in your cpustart file. +It is not meant to start the testing sequence by hand. +To (re)run all the tests, you should run ./Tests instead. + +See 1/ for a template. Copy it to your own and tweak it at will. + +To start testing, run the script ./Tests in clu, which would +change the boot sequence such that the machine starts to run tests +(perhaps installing different kernels and rebooting) until all +tests have been run or one has failed. + +Each directory must contain: + +- kern: a script used to compile and install a kernel used for testing + if no such file is found, no new kernel is installed. the current one + is used. Otherwise, the indicated kernel is installed and the machine + reboots using this kernel. + +- runtest: a script used to run a test. This is mandatory. + +- whichever other files must be available for the tests to run. + +Tests generate within each test directory: + +- koutput: a file keeping the output for a kern that did run +- output: a file keeping the output for a test that did run +- FAIL: an empty file, reporting that a test did fail. + + +BEWARE that if you install a kernel for a test then that kernel is used +for all following tests. +As a convention, tests installing a kernel should be named k0, k1, ... +test 1 installs the std kernel, so that all tests use the regular kernel. + diff -Nru 0/sys/src/nix/test/Tests 4/sys/src/nix/test/Tests --- 0/sys/src/nix/test/Tests Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/test/Tests Wed Feb 6 00:00:00 2013 @@ -0,0 +1,15 @@ +#!/bin/rc + +# change the boot sequence to run tests. + +# remove stale state from tests: +rm -f [0-9]*/^(koutput output FAIL) k[0-9]*/^(koutput output FAIL) + +# arrange for them to run after rebooting +if(test -e /cfg/$sysname/_runtests) + mv /cfg/$sysname/_runtests /cfg/$sysname/runtests +if(! test -x /cfg/$sysname/runtests){ + echo there is no /cfg/$sysname/runtests + exit no +} +reboot diff -Nru 0/sys/src/nix/test/runtests 4/sys/src/nix/test/runtests --- 0/sys/src/nix/test/runtests Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/test/runtests Wed Feb 6 00:00:00 2013 @@ -0,0 +1,37 @@ +#!/bin/rc + +echo tests... +cd /sys/src/nix/test +. tools +failed=() +tests=[0-9]* +if(test -e k[0-9]*) + tests=($tests k[0-9]*) +for(t in $tests){ + cd $t || fail cannot cd into test $t + if(test -e FAIL) + failed=($failed $t) + if not{ + if(! test -e output && ! test -e FAIL) { + echo running test $t + if(test -x kern && ! test -e koutput){ + log running kern for test $t + if(! kern >koutput >[2=1]){ + touch FAIL + fail test $t failed + } + reboot + } + if(! runtest>output >[2=1]){ + touch FAIL + fail test $t failed + } + echo test $t ok + } + } + cd .. +} +if(! ~ $#failed 0) + echo tests $failed failed +if not + echo all tests passed diff -Nru 0/sys/src/nix/test/tools 4/sys/src/nix/test/tools --- 0/sys/src/nix/test/tools Thu Jan 1 00:00:00 1970 +++ 4/sys/src/nix/test/tools Wed Feb 6 00:00:00 2013 @@ -0,0 +1,9 @@ +fn fail { + echo $* >[1=2] + exit fail +} + +fn log { + echo $* + echo $* >/dev/cons +}