--- /sys/src/9/port/devpmc.c Thu Jul 11 00:00:00 2013 +++ /sys/src/9/port/devpmc.c Thu Jul 11 00:00:00 2013 @@ -0,0 +1,379 @@ +/* + * Performance counters + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "pmc.h" + + +enum{ + Qdir = 0, + Qdesc, + Qcore, + PmcCtlRdStr = 4*1024, +}; + +#define PMCTYPE(x) (((unsigned)x)&0xffful) +#define PMCID(x) (((unsigned)x)>>12) +#define PMCQID(i, t) ((((unsigned)i)<<12)|(t)) + +static Dirtab *toptab; +static Lock toptablck; +static int ntoptab; +int pmcdebug; + +static void +topdirinit(void) +{ + Dirtab *d; + int nent; + + nent = 1 + MAXMACH; + toptab = mallocz(nent * sizeof(Dirtab), 1); + if (toptab == nil) + return; + d = toptab; + strncpy(d->name, "ctrdesc", KNAMELEN); + mkqid(&d->qid, Qdesc, 0, 0); + d->perm = 0440; + +} + +static int +corefilesinit(void) +{ + int i, nc, newn; + Dirtab *d; + + nc = 0; + lock(&toptablck); + for(i = 0; i < MAXMACH; i++) { + if(MACHP(i) != nil){ + d = &toptab[nc + 1]; + /* if you take them out, be careful in pmcgen too */ + if(d->name[0] != '\0'){ + if(PMCQID(i, Qcore) == d->qid.path){ + nc++; + continue; + }else{ + /* a new one appeared, make space, should almost never happen */ + memmove(d + 1, d, (MAXMACH - i)*sizeof(*d)); + memset(d, 0, sizeof(*d)); + } + } + snprint(d->name, KNAMELEN, "core%4.4ud", i); + mkqid(&d->qid, PMCQID(i, Qcore), 0, 0); + d->perm = 0660; + nc++; + } + } + newn = 1 + nc; + ntoptab = newn; + unlock(&toptablck); + return newn; + +} + + +static void +pmcinit(void) +{ + pmcconfigure(); + topdirinit(); + corefilesinit(); +} + +static Chan * +pmcattach(char *spec) +{ + corefilesinit(); + return devattach(L'ε', spec); +} + +int +pmcgen(Chan *c, char *, Dirtab*, int, int s, Dir *dp) +{ + int ntab; + Dirtab *d; + + ntab = corefilesinit(); + if(s == DEVDOTDOT){ + devdir(c, (Qid){Qdir, 0, QTDIR}, "#ε", 0, eve, 0555, dp); + c->aux = nil; + return 1; + } + /* first, for directories, generate children */ + switch((int)PMCTYPE(c->qid.path)){ + case Qdir: + case Qcore: + if(s >= ntab) + return -1; + d = &toptab[s]; + devdir(c, d->qid, d->name, d->length, eve, d->perm, dp); + return 1; + default: + return -1; + } +} + +static Walkqid* +pmcwalk(Chan *c, Chan *nc, char **name, int nname) +{ + if(PMCTYPE(c->qid.path) == Qcore) + c->aux = (void *)PMCID(c->qid.path); /* core no */ + return devwalk(c, nc, name, nname, nil, 0, pmcgen); +} + +static int +pmcstat(Chan *c, uchar *dp, int n) +{ + return devstat(c, dp, n, nil, 0, pmcgen); +} + +static Chan* +pmcopen(Chan *c, int omode) +{ + if (!iseve()) + error(Eperm); + return devopen(c, omode, nil, 0, pmcgen); +} + +static void +pmcclose(Chan *) +{ +} + +static int +pmcctlstr(char *str, int nstr, PmcCtl *p, vlong v) +{ + int ns; + + ns = 0; + ns += snprint(str + ns, nstr - ns, "%#ullx ", v); + if (p->enab && p->enab != PmcCtlNullval) + ns += snprint(str + ns, nstr - ns, "on "); + else + ns += snprint(str + ns, nstr - ns, "off "); + + if (p->user && p->user != PmcCtlNullval) + ns += snprint(str + ns, nstr - ns, "user "); + else + ns += snprint(str + ns, nstr - ns, "nouser "); + + if (p->os && p->user != PmcCtlNullval) + ns += snprint(str + ns, nstr - ns, "os "); + else + ns += snprint(str + ns, nstr - ns, "noos "); + + /* TODO, inverse pmctrans? */ + if(!p->nodesc) + ns += snprint(str + ns, nstr - ns, "%s", p->descstr); + else + ns += snprint(str + ns, nstr - ns, "no desc"); + ns += snprint(str + ns, nstr - ns, "\n"); + return ns; +} + + +/* this should be safe to use even if there is no core anymore */ +static long +pmcread(Chan *c, void *a, long n, vlong offset) +{ + ulong type; + PmcCtl p; + char *s; + u64int v; + u64int coreno; + int nr, i, ns, nn; + + type = PMCTYPE(c->qid.path); + coreno = PMCID(c->qid.path); + + if(type == Qdir) + return devdirread(c, a, n, nil, 0, pmcgen); + s = malloc(PmcCtlRdStr); + if(waserror()){ + free(s); + nexterror(); + } + + p.coreno = coreno; + nr = pmcnregs(); + switch(type){ + case Qcore: + ns = 0; + for(i = 0; i < nr; i ++){ + if (pmcgetctl(coreno, &p, i) < 0) + error("bad ctr"); + if(! p.enab) + continue; + v = pmcgetctr(coreno, i); + ns += snprint(s + ns, PmcCtlRdStr - ns, "%2.2ud ", i); + nn = pmcctlstr(s + ns, PmcCtlRdStr - ns, &p, v); + if (n < 0) + error("bad pmc"); + ns += nn; + } + break; + case Qdesc: + if (pmcdescstr(s, PmcCtlRdStr) < 0) + error("bad pmc"); + break; + default: + error(Eperm); + } + n = readstr(offset, a, n, s); + free(s); + poperror(); + return n; +} + +static int +isset(char *str) +{ + return strncmp(str, "-", 2) != 0; +} + +static int +pickregno(int coreno) +{ + PmcCtl p; + int nr, i; + + nr = pmcnregs(); + for(i = 0; i < nr; i++){ + if (pmcgetctl(coreno, &p, i) || p.enab) + continue; + return i; + } + + return -1; +} + +static int +fillctl(PmcCtl *p, Cmdbuf *cb, int start, int end) +{ + int i; + + if(end > cb->nf -1) + end = cb->nf -1; + for(i = start; i <= end; i++){ + if(pmcdebug != 0) + print("setting field %d to %s\n", i, cb->f[i]); + if(!isset(cb->f[i])) + continue; + else if(strcmp("on", cb->f[i]) == 0) + p->enab = 1; + else if(strcmp("off", cb->f[i]) == 0) + p->enab = 0; + else if(strcmp("user", cb->f[i]) == 0) + p->user = 1; + else if(strcmp("os", cb->f[i]) == 0) + p->os = 1; + else if(strcmp("nouser", cb->f[i]) == 0) + p->user = 0; + else if(strcmp("noos", cb->f[i]) == 0) + p->os = 0; + else + error("bad ctl"); + } + return 0; +} + +/* this should be safe to use even if there is no core anymore */ +static long +pmcwrite(Chan *c, void *a, long n, vlong) +{ + Cmdbuf *cb; + u64int coreno; + int regno, i, ns; + PmcCtl p; + char *s; + + if (c->qid.type == QTDIR) + error(Eperm); + if (c->qid.path == Qdesc) + error(Eperm); + + coreno = PMCID(c->qid.path);; + p.coreno = coreno; + + /* TODO, multiple lines? */ + cb = parsecmd(a, n); + if(waserror()){ + free(cb); + nexterror(); + } + if(cb->nf < 1) + error("short ctl"); + if(strcmp("debug", cb->f[0]) == 0) + pmcdebug = ~pmcdebug; + else{ + if(cb->nf < 2) + error("short ctl"); + if(!isset(cb->f[0])){ + /* racy, it does not reserve the core */ + regno = pickregno(coreno); + if(regno < 0) + error("no free regno"); + if(pmcdebug != 0) + print("picked regno %d\n", regno); + }else{ + regno = strtoull(cb->f[0], 0, 0); + if(regno > pmcnregs()) + error("ctr number too big"); + if(pmcdebug != 0) + print("setting regno %d\n", regno); + } + if(isset(cb->f[1])) + pmcsetctr(coreno, strtoull(cb->f[1], 0, 0), regno); + + pmcinitctl(&p); + fillctl(&p, cb, 2, 4); + ns = 0; + s = p.descstr; + s[0] = '\0'; + for(i = 5; i < cb->nf; i++){ + if(!isset(cb->f[i])) + continue; + ns += snprint(s + ns, KNAMELEN - ns, "%s ", cb->f[i]); + p.nodesc = 0; + } + if(pmcdebug != 0) + print("setting desc to %s\n", p.descstr); + pmcsetctl(coreno, &p, regno); + } + free(cb); + poperror(); + + + return n; +} + + +Dev pmcdevtab = { + L'ε', + "pmc", + + pmcinit, + devinit, + devshutdown, + pmcattach, + pmcwalk, + pmcstat, + pmcopen, + devcreate, + pmcclose, + pmcread, + devbread, + pmcwrite, + devbwrite, + devremove, + devwstat, +}; --- /sys/man/3/pmc Thu Jul 11 00:00:00 2013 +++ /sys/man/3/pmc Thu Jul 11 00:00:00 2013 @@ -0,0 +1,118 @@ +.TH PMC 3 +.SH NAME +pmc \- performance monitoring counters +.SH SYNOPSIS +.nf +.B bind '#ε' /dev + +.BI #ε/ctrdesc +.BI #ε/core0000 +.BI #ε/core0001 +.BI #ε/core0002 + ... +.fi +.SH DESCRIPTION +.PP +The +.I pmc +device serves a one-level directory, giving +access to the hardware counters on the +different cores. This counters can count events, +cycles or any unit of measure imagined by the designer +of the architecture. +There is a file per core, +.B coreNNNN, +containing the state of the active counters on +that core. +.PP +This file provide access to the value of the counters. +.PP +Any change to or from the counters or the configurations is seen +as soon as possible. In the worst case, it is guaranteed that the values +read or set are at least as fresh as the last time a process went in or out +of the kernel on that core. +.PP +The file contains a line per active counter. Each line has 7 fields separated by spaces. +The fields are: +.IP \- +The counter number (each core my have a number of counters). +.IP \- +The value of the counter. +.IP \- +The string +.B on. +.IP \- +A string describing whether it is counts only in user space, either +.B user +or +.B nouser. +.IP \- +A string describing whether it is counts only in kernel space, either +.B os +or +.B noos. +.IP \- +A string the configuration in a format recognized by this +driver. Readable versions of this string can be found in the file +.B ctrdesc. +.PP +A line similar to those obtained when reading can be written to the file to configure +the counters. The first two fields are mandatory, the rest are optional and may or may +not be present. If the number of the counter is -, the driver picks +an appropiate one or returns an error if there is not any free. +Any other field with the value +.B - +is simply ignored. +.PP +.SH "EXAMPLES +.EX + % bind '#ε' /dev + % cat /dev/ctrdesc + locked instr + locked cycles nonspecul + SMI intr + DC access + DC miss + DC refills + DC evicted + L1 DTLB miss + L2 DTLB miss + L1 DTLB hit + global TLB flush + L2 hit + L2 miss + IC miss + IC refill from L2 + IC refill from system + L1 ITLB miss + L2 ITLB miss + DRAM access + L3 miss + + % echo 00 - on > core0000 + % cat core0000 + 00 0x23423 on user noos 0xe0 0x3f + % echo '00 0x0 on user noos L1 DTLB miss' > core0000 + % cat core0000 + 00 0x2 on user noos 0xe0 0x3f + % echo '- 0xdead on user noos DRAM access' > core0000 + % cat core0000 + 00 0x78 on user noos 0xe0 0x3f + 01 0xdefd on user noos 0xe0 0x3f + % echo 00 - off > core0000 + % cat core0000 + 01 0xfabc on user noos 0xe0 0x3f +.EE +.SH SOURCE +.PP +.B /sys/src/nix/port/pmc.h +.B /sys/src/nix/port/devpmc.c +.B /sys/src/nix/k10/pmcio.c +.SH "SEE ALSO +AMD +``BIOS and Kernel Develper's Guide (BKDG) For AMD Family 10h Processors'', +.I +section 3.14, Performance Counter Events . +Intel Software Deveveloper's Manual, chapter 18 and 19. +.SH BUGS +More portable description for different families of Intel should be added. --- /sys/src/9/pc/pmc.h Thu Jul 11 00:00:00 2013 +++ /sys/src/9/pc/pmc.h Thu Jul 11 00:00:00 2013 @@ -0,0 +1,64 @@ +typedef struct PmcCtl PmcCtl; +typedef struct PmcCtr PmcCtr; +typedef struct PmcCtlCtrId PmcCtlCtrId; + +enum { + Pce = 0x00000100, /* Performance Monitoring Counter Enable */ +}; + +/* + * HW performance counters + */ +struct PmcCtl { + u32int coreno; + int enab; + int user; + int os; + int nodesc; + char descstr[KNAMELEN]; +}; + +struct PmcCtr{ + int stale; + Rendez r; + u64int ctr; + int ctrset; + PmcCtl; + int ctlset; +}; + +enum { + PmcMaxCtrs = 4, +}; + +struct PmcCore{ + Lock; + PmcCtr ctr[PmcMaxCtrs]; +}; + +struct PmcCtlCtrId { + char portdesc[KNAMELEN]; + char archdesc[KNAMELEN]; +}; + +enum { + PmcIgn = 0, + PmcGet = 1, + PmcSet = 2, +}; + +enum { + PmcCtlNullval = 0xdead, +}; + +extern int pmcnregs(void); +extern void pmcinitctl(PmcCtl*); +extern int pmcsetctl(u32int, PmcCtl*, u32int); +extern int pmctrans(PmcCtl*); +extern int pmcgetctl(u32int, PmcCtl*, u32int); +extern int pmcdescstr(char*, int); +extern u64int pmcgetctr(u32int, u32int); +extern int pmcsetctr(u32int, u64int, u32int); + +extern void (*_pmcupdate)(Mach*); +extern void pmcconfigure(void); --- /sys/src/9/pc/pmcio.c Thu Jul 11 00:00:00 2013 +++ /sys/src/9/pc/pmcio.c Thu Jul 11 00:00:00 2013 @@ -0,0 +1,589 @@ +/* + * Performance counters non portable part + */ + +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "../port/error.h" + +#include "pmc.h" + +typedef struct PmcCfg PmcCfg; +typedef struct PmcCore PmcCore; + +enum { + PeUnk, + PeAmd, + /* + * See Vol 3B Intel + * 64 Architecture's Software Developer's manual + */ + PeIntel, +}; + +enum { + _PeUnk, + /* Non architectural */ + PeIntelSandy, + PeIntelNehalem, + PeIntelWestmere, + /* + * See BKDG for AMD cfg.family 10 Processors + * section 2.16 and 3.14 + */ + PeK10, + +}; + +enum { + PeNregAmd = 4, /* Number of Pe/Pct regs for K10 */ +}; + +enum { /* MSRs */ + PerfCtrbaseIntel= 0x000000c1, /* Performance Counters */ + PerfEvtbaseIntel= 0x00000186, /* Performance Event Select */ + PerfGlobalCtr = 0x0000038f, /* Performance Event Global Ctrl, intel */ + + PerfEvtbaseAmd = 0xc0010000, /* Performance Event Select */ + PerfCtrbaseAmd = 0xc0010004, /* Performance Counters */ +}; + +enum { /* HW Performance Counters Event Selector */ + + PeHo = 0x0000020000000000ull,/* Host only */ + PeGo = 0x0000010000000000ull,/* Guest only */ + PeEvMskH = 0x0000000f00000000ull,/* Event mask H */ + PeCtMsk = 0x00000000ff000000ull,/* Counter mask */ + PeInMsk = 0x0000000000800000ull,/* Invert mask */ + PeCtEna = 0x0000000000400000ull,/* Counter enable */ + PeInEna = 0x0000000000100000ull,/* Interrupt enable */ + PePnCtl = 0x0000000000080000ull,/* Pin control */ + PeEdg = 0x0000000000040000ull,/* Edge detect */ + PeOS = 0x0000000000020000ull,/* OS mode */ + PeUsr = 0x0000000000010000ull,/* User mode */ + PeUnMsk = 0x000000000000ff00ull,/* Unit Mask */ + PeEvMskL = 0x00000000000000ffull,/* Event Mask L */ + + PeEvMsksh = 32ull, /* Event mask shift */ +}; + +struct PmcCfg { + int nregs; + u32int ctrbase; + u32int evtbase; + int vendor; + int family; + PmcCtlCtrId *pmcidsarch; + PmcCtlCtrId *pmcids; +}; + +extern int pmcdebug; + +static PmcCfg cfg; +static PmcCore pmccore[MAXMACH]; + +static void pmcmachupdate(void); + +int +pmcnregs(void) +{ + ulong info[4]; + int nregs; + + if(cfg.nregs != 0) + return cfg.nregs; /* don't call cpuid more than necessary */ + switch(cfg.vendor){ + case PeAmd: + nregs = PeNregAmd; + break; + case PeIntel: + cpuid(0xa, info); + nregs = (info[0]>>8)&0xff; + break; + default: + nregs = 0; + } + if(nregs > PmcMaxCtrs) + nregs = PmcMaxCtrs; + return nregs; +} + +static u64int +pmcmsk(void) +{ + ulong info[4]; + u64int msk; + + msk = 0; + switch(cfg.vendor){ + case PeAmd: + msk = ~0ULL; + break; + case PeIntel: + cpuid(0xa, info); + msk = (1<<((info[0]>>16)&0xff)) - 1; + break; + } + return msk; +} + +PmcCtlCtrId pmcidsk10[] = { + {"locked instr", "0x024 0x1"}, + {"locked cycles nonspecul", "0x024 0x4"}, /* in cycles */ + {"SMI intr", "0x02b 0x0"}, + {"DC access", "0x040 0x0"}, + {"DC miss", "0x041 0x0"}, + {"DC refills", "0x042 0x1f"}, + {"DC evicted", "0x042 0x3f"}, + {"L1 DTLB miss", "0x045 0x7"}, /* DTLB L2 hits */ + {"L2 DTLB miss", "0x046 0x7"}, + {"L1 DTLB hit", "0x04d 0x3"}, + {"global TLB flush", "0x054 0x0"}, + {"L2 hit", "0x07d 0x3f"}, + {"L2 miss", "0x07e 0xf"}, + {"IC miss", "0x081 0x0"}, + {"IC refill from L2", "0x082 0x0"}, + {"IC refill from system", "0x083 0x0"}, + {"L1 ITLB miss", "0x084 0x0"}, /* L2 ITLB hits */ + {"L2 ITLB miss", "0x085 0x3"}, + {"DRAM access", "0x0e0 0x3f"}, + //{"L3 miss core 0", "0x4e1 0x13"}, + //{"L3 miss core 1", "0x4e1 0x23"}, + //{"L3 miss core 2", "0x4e1 0x43"}, + //{"L3 miss core 3", "0x4e1 0x83"}, + {"L3 miss", "0x4e1 0xf3"}, /* all cores in the socket */ + {"", ""}, +}; + +/*18.2.3 Intel Software Deveveloper's Manual */ +PmcCtlCtrId pmcidsintel[] = { + {"unhalted cycles", "0x3c 0x0"}, + {"instr", "0xc0 0x0"}, + {"Llast misses", "0x2e 0x41"}, + {"branch instr", "0xc4 0x0"}, + {"branch misses", "0xc5 0x0 "}, + {"", ""}, +}; + +/* Table 19.7 Intel Software Deveveloper's Manual */ +PmcCtlCtrId pmcidsandy[] = { + {"DTLB walk cycles", "0x49 0x4"}, /* all levels */ + {"DTLB miss", "0x8 0x2"}, + {"DTLB hit", "0x8 0x4"}, + {"L2 hit", "0x24 0x4"}, + {"L2 miss", "0x24 0x8"}, + {"IL2 hit", "0x24 0x10"}, + {"IL2 miss", "0x24 0x20"}, + {"ITLB miss", "0x85 0x2"}, + {"ITLB walk cycles", "0x85 0x4"}, + {"ITLB flush", "0xae 0x1"}, + {"mem loads", "0xd0 0xf1"}, /* counts μops */ + {"mem stores", "0xd0 0xf2"}, + {"mem ops", "0xd0 0xf3"}, + {"", ""}, +}; + +#define X86MODEL(x) ((((x)>>4) & 0x0F) | (((x)>>16) & 0x0F)<<4) +#define X86FAMILY(x) ((((x)>>8) & 0x0F) | (((x)>>20) & 0xFF)<<4) + +static int +pmcintelfamily(void) +{ + u32int info, fam, mod; + + info = m->cpuidax; + + fam = X86FAMILY(info); + mod = X86MODEL(info); + if(fam != 0x6 && fam != 0xf) + return PeUnk; + switch(mod){ + case 0x2a: + return PeIntelSandy; + case 0x1a: + case 0x1e: + case 0x1f: + return PeIntelNehalem; + case 0x25: + case 0x2c: + return PeIntelWestmere; + } + return PeUnk; +} + +void +pmcinitctl(PmcCtl *p) +{ + memset(p, 0xff, sizeof(PmcCtl)); + p->enab = PmcCtlNullval; + p->user = PmcCtlNullval; + p->os = PmcCtlNullval; + p->nodesc = 1; +} + +void +pmcconfigure(void) +{ + int i, j, isrecog; + + isrecog = 0; + + if(memcmp(m->cpuidid, "AuthenticAMD", 12) == 0){ + isrecog++; + cfg.ctrbase = PerfCtrbaseAmd; + cfg.evtbase = PerfEvtbaseAmd; + cfg.vendor = PeAmd; + cfg.family = PeUnk; + cfg.pmcidsarch = pmcidsk10; + }else if(memcmp(m->cpuidid, "GenuineIntel", 12) == 0){ + isrecog++; + cfg.ctrbase = PerfCtrbaseIntel; + cfg.evtbase = PerfEvtbaseIntel; + cfg.vendor = PeIntel; + cfg.family = pmcintelfamily(); + cfg.pmcidsarch = pmcidsintel; + switch(cfg.family){ + case PeIntelSandy: + cfg.pmcids = pmcidsandy; + break; + case PeIntelNehalem: + case PeIntelWestmere: + break; + } + }else + cfg.vendor = PeUnk; + + cfg.nregs = pmcnregs(); + if(isrecog) + pmcupdate = pmcmachupdate; + + for(i = 0; i < MAXMACH; i++) { + if(MACHP(i) != nil){ + for(j = 0; j < cfg.nregs; j++) + pmcinitctl(&pmccore[i].ctr[j]); + } + } +} + +static void +pmcenab(void) +{ + switch(cfg.vendor){ + case PeAmd: + return; + case PeIntel: + wrmsr(PerfGlobalCtr, pmcmsk()); + break; + } +} + +/* so they can be read from user space */ +static int +pmcuserenab(int enable) +{ + u64int cr4; + + cr4 = getcr4(); + if (enable){ + cr4 |= Pce; + } else + cr4 &= ~Pce; + putcr4(cr4); + return cr4&Pce; +} + +int +pmctrans(PmcCtl *p) +{ + PmcCtlCtrId *pi; + int n; + + n = 0; + if(cfg.pmcidsarch != nil) + for (pi = &cfg.pmcidsarch[0]; pi->portdesc[0] != '\0'; pi++){ + if (strncmp(p->descstr, pi->portdesc, strlen(pi->portdesc)) == 0){ + strncpy(p->descstr, pi->archdesc, strlen(pi->archdesc) + 1); + n = 1; + break; + } + } + /* this ones supersede the other ones */ + if(cfg.pmcids != nil) + for (pi = &cfg.pmcids[0]; pi->portdesc[0] != '\0'; pi++){ + if (strncmp(p->descstr, pi->portdesc, strlen(pi->portdesc)) == 0){ + strncpy(p->descstr, pi->archdesc, strlen(pi->archdesc) + 1); + n = 1; + break; + } + } + if(pmcdebug != 0) + print("really setting %s\n", p->descstr); + return n; +} + +//PeHo|PeGo +#define PeAll (PeOS|PeUsr) +#define SetEvMsk(v, e) ((v)|(((e)&PeEvMskL)|(((e)<<(PeEvMsksh-8))&PeEvMskH))) +#define SetUMsk(v, u) ((v)|(((u)<<8ull)&PeUnMsk)) + +#define GetEvMsk(e) (((e)&PeEvMskL)|(((e)&PeEvMskH)>>(PeEvMsksh-8))) +#define GetUMsk(u) (((u)&PeUnMsk)>>8ull) + +static int +getctl(PmcCtl *p, u32int regno) +{ + u64int e, u; + vlong r; + + rdmsr(regno + cfg.evtbase, &r); + p->enab = (r&PeCtEna) != 0; + p->user = (r&PeUsr) != 0; + p->os = (r&PeOS) != 0; + e = GetEvMsk(r); + u = GetUMsk(r); + /* TODO inverse translation */ + snprint(p->descstr, KNAMELEN, "%#ullx %#ullx", e, u); + p->nodesc = 0; + return 0; +} + +static int +pmcanyenab(void) +{ + int i; + PmcCtl p; + + for (i = 0; i < cfg.nregs; i++) { + if (getctl(&p, i) < 0) + return -1; + if (p.enab) + return 1; + } + + return 0; +} + + +static int +setctl(PmcCtl *p, int regno) +{ + u64int e, u; + vlong v; + char *toks[2]; + char str[KNAMELEN]; + + rdmsr(regno + cfg.evtbase, &v); + v &= PeEvMskH|PeEvMskL|PeCtEna|PeOS|PeUsr|PeUnMsk; + if (p->enab != PmcCtlNullval) + if (p->enab) + v |= PeCtEna; + else + v &= ~PeCtEna; + + if (p->user != PmcCtlNullval) + if (p->user) + v |= PeUsr; + else + v &= ~PeUsr; + + if (p->os != PmcCtlNullval) + if (p->os) + v |= PeOS; + else + v &= ~PeOS; + + if (pmctrans(p) < 0) + return -1; + + if (p->nodesc == 0) { + memmove(str, p->descstr, KNAMELEN); + if (tokenize(str, toks, 2) != 2) + return -1; + e = atoi(toks[0]); + u = atoi(toks[1]); + v &= ~(PeEvMskL|PeEvMskH|PeUnMsk); + v |= SetEvMsk(v, e); + v |= SetUMsk(v, u); + } + wrmsr(regno+ cfg.evtbase, v); + pmcuserenab(pmcanyenab()); + if (pmcdebug) { + rdmsr(regno+ cfg.evtbase, &v); + print("conf pmc[%#ux]: %#llux\n", regno, v); + } + return 0; +} + +int +pmcdescstr(char *str, int nstr) +{ + PmcCtlCtrId *pi; + int ns; + + ns = 0; + + if(pmcdebug != 0) + print("vendor %x family %x nregs %d pmcnregs %d\n", cfg.vendor, cfg.family, cfg.nregs, pmcnregs()); + if(cfg.pmcidsarch == nil && cfg.pmcids == nil){ + *str = 0; + return ns; + } + + if(cfg.pmcidsarch != nil) + for (pi = &cfg.pmcidsarch[0]; pi->portdesc[0] != '\0'; pi++) + ns += snprint(str + ns, nstr - ns, "%s\n",pi->portdesc); + if(cfg.pmcids != nil) + for (pi = &cfg.pmcids[0]; pi->portdesc[0] != '\0'; pi++) + ns += snprint(str + ns, nstr - ns, "%s\n",pi->portdesc); + return ns; +} + +static u64int +getctr(u32int regno) +{ + vlong r; + + rdmsr(regno + cfg.ctrbase, &r); + return r; +} + +static int +setctr(u64int v, u32int regno) +{ + wrmsr(regno + cfg.ctrbase, v); + return 0; +} + +u64int +pmcgetctr(u32int coreno, u32int regno) +{ + PmcCtr *p; + u64int ctr; + + if (regno >= cfg.nregs) + error("invalid reg"); + p = &pmccore[coreno].ctr[regno]; + + ilock(&pmccore[coreno]); + if(coreno == m->machno) + ctr = getctr(regno); + else + ctr = p->ctr; + iunlock(&pmccore[coreno]); + + return ctr; +} + +int +pmcsetctr(u32int coreno, u64int v, u32int regno) +{ + PmcCtr *p; + int n; + + if (regno >= cfg.nregs) + error("invalid reg"); + p = &pmccore[coreno].ctr[regno]; + + ilock(&pmccore[coreno]); + if(coreno == m->machno) + n = setctr(v, regno); + else{ + p->ctr = v; + p->ctrset |= PmcSet; + p->stale = 1; + n = 0; + } + iunlock(&pmccore[coreno]); + + return n; +} + +static void +ctl2ctl(PmcCtl *dctl, PmcCtl *sctl) +{ + if(sctl->enab != PmcCtlNullval) + dctl->enab = sctl->enab; + if(sctl->user != PmcCtlNullval) + dctl->user = sctl->user; + if(sctl->os != PmcCtlNullval) + dctl->os = sctl->os; + if(sctl->nodesc == 0) { + memmove(dctl->descstr, sctl->descstr, KNAMELEN); + dctl->nodesc = 0; + } +} + +int +pmcsetctl(u32int coreno, PmcCtl *pctl, u32int regno) +{ + PmcCtr *p; + int n; + + if (regno >= cfg.nregs) + error("invalid reg"); + p = &pmccore[coreno].ctr[regno]; + + ilock(&pmccore[coreno]); + if(coreno == m->machno) + n = setctl(pctl, regno); + else{ + ctl2ctl(&p->PmcCtl, pctl); + p->ctlset |= PmcSet; + p->stale = 1; + n = 0; + } + iunlock(&pmccore[coreno]); + + return n; +} + +int +pmcgetctl(u32int coreno, PmcCtl *pctl, u32int regno) +{ + PmcCtr *p; + int n; + + if (regno >= cfg.nregs) + error("invalid reg"); + p = &pmccore[coreno].ctr[regno]; + + ilock(&pmccore[coreno]); + if(coreno == m->machno) + n = getctl(pctl, regno); + else{ + memmove(pctl, &p->PmcCtl, sizeof(PmcCtl)); + n = 0; + } + iunlock(&pmccore[coreno]); + + return n; +} + +static void +pmcmachupdate(void) +{ + PmcCtr *p; + int coreno, i, maxct; + + if((maxct = cfg.nregs) <= 0) + return; + coreno = m->machno; + + ilock(&pmccore[coreno]); + for (i = 0; i < maxct; i++) { + p = &pmccore[coreno].ctr[i]; + if(p->ctrset & PmcSet) + setctr(p->ctr, i); + if(p->ctlset & PmcSet) + setctl(p, i); + p->ctr = getctr(i); + getctl(p, i); + p->ctrset = PmcIgn; + p->ctlset = PmcIgn; + p->stale = 0; + } + iunlock(&pmccore[coreno]); +} --- /n/sources/plan9/sys/src/9/pc/trap.c Thu Jul 11 23:35:25 2013 +++ /sys/src/9/pc/trap.c Thu Jul 11 00:00:00 2013 @@ -287,6 +288,8 @@ intrtimes[vno][diff]++; } +void (*pmcupdate)(void); + /* go to user space */ void kexit(Ureg*) @@ -294,6 +297,10 @@ uvlong t; Tos *tos; + /* performance counters */ + if(pmcupdate != nil) + pmcupdate(); + /* precise time accounting, kernel exit */ tos = (Tos*)(USTKTOP-sizeof(Tos)); cycles(&t); @@ -330,6 +337,9 @@ up->dbgreg = ureg; cycles(&up->kentry); } + /* performance counters */ + if(pmcupdate != nil) + pmcupdate(); clockintr = 0; --- /n/sources/plan9/sys/src/9/pc/fns.h Thu Jun 13 21:01:35 2013 +++ /sys/src/9/pc/fns.h Thu Jul 11 00:00:00 2013 @@ -142,6 +142,7 @@ void pcisetioe(Pcidev*); void pcisetmwi(Pcidev*); int pcisetpms(Pcidev*, int); +void (*pmcupdate)(void); void pcmcisread(PCMslot*); int pcmcistuple(int, int, int, void*, int); PCMmap* pcmmap(int, ulong, int, int); --- /n/sources/plan9/sys/src/9/pc/pcf Fri Aug 17 21:44:56 2012 +++ /sys/src/9/pc/pcf Sun Jul 7 00:00:00 2013 @@ -36,6 +36,7 @@ i82365 cis uart usb + pmc pmcio link realmode