--- /sys/man/8/smart Tue Jan 1 13:46:13 2013 +++ /sys/man/8/smart Tue Jan 1 00:00:00 2013 @@ -0,0 +1,52 @@ +.TH SMART 8 +.SH NAME +disk/smart, hdtemp \- hard drive error monitoring +.SH SYNOPSIS +.B disk/smart +[ +.B -aptv +] +.IR drive ... +.br +hdtemp +.IR drive ... +.SH DESCRIPTION +The +.B disk/smart +command uses the +.IR sd (3) +raw interface for continuous disk health logging to +.B /sys/log/smart +and, with the +.B -v +flag, the console for ATA and SCSI disks supporting +SMART-style reporting. The +.B -t flag causes +.B disk/smart +to exit after a single probe and implies +.BR -v . +With the +.B -p +flag or no arguments, +.B disk/smart +probes for all SMART-capable drives. With +.B -a +SMART-capable drives are announced. Logging +behaves as if level triggered. If a smart +condition is set it will be logged every 6 hours. +Condition reset also resets the log timer. +.PP +.I Hdtemp +prints the current drive temperature in Celsius +if the drive and transport support it. +.SH FILES +.BR /lib/scsicodes , +.BR /dev/sdXX/raw +.SH "SEE ALSO" +.IR atazz (8), +.IR scuzz (8), +.IR sd (3). +.SH SOURCE +/sys/src/cmd/disk/smart +.SH BUGS +Past failures are no indication of future performance. ape/diff -Nru /n/sources/plan9/sys/src/cmd/disk/mkfile /sys/src/cmd/disk/mkfile --- /n/sources/plan9/sys/src/cmd/disk/mkfile Wed Oct 24 22:29:51 2007 +++ /sys/src/cmd/disk/mkfile Tue Jan 1 00:00:00 2013 @@ -13,6 +13,7 @@ 9660\ kfs\ prep\ + smart\ OFILES= diff -Nru /sys/src/cmd/disk/smart/ata.c /sys/src/cmd/disk/smart/ata.c --- /sys/src/cmd/disk/smart/ata.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/disk/smart/ata.c Tue Jan 1 00:00:00 2013 @@ -0,0 +1,192 @@ +#include +#include +#include +#include "smart.h" + +enum{ + Nop, + Idall, + Idpkt, + Smart, + Id, + Sig, + + Cmdsz = 18, + Replysz = 18, + +}; + +typedef struct Atatab Atatab; +struct Atatab { + ushort cc; + uchar protocol; + char *name; +}; + +Atatab atatab[] = { +[Nop] 0x00, Pnd|P28, "nop", +[Idall] 0xff, Pin|Ppio|P28, "identify * device", +[Idpkt] 0xa1, Pin|Ppio|P28, "identify packet device", +[Smart] 0xb0, Pnd|P28, "smart", +[Id] 0xec, Pin|Ppio|P28, "identify device", +[Sig] 0xf000, Pnd|P28, "signature", +}; + +typedef struct Rcmd Rcmd; +struct Rcmd{ + uchar sdcmd; /* sd command; 0xff means ata passthrough */ + uchar ataproto; /* ata protocol. non-data, pio, reset, dd, etc. */ + uchar fis[Fissize]; +}; + +typedef struct Req Req; +struct Req { + char haverfis; + Rcmd cmd; + Rcmd reply; + uchar data[0x200]; + uint count; +}; + +static int +issueata(Req *r, Sdisk *d, int errok) +{ + char buf[ERRMAX]; + int ok, rv; + + if((rv = write(d->fd, &r->cmd, Cmdsz)) != Cmdsz){ + /* handle non-atazz compatable kernels */ + rerrstr(buf, sizeof buf); + if(rv != -1 || strstr(buf, "bad arg in system call") != 0) + eprint(d, "fis write error: %r\n"); + return -1; + } + + werrstr(""); + switch(r->cmd.ataproto & Pdatam){ + default: + ok = read(d->fd, "", 0) == 0; + break; + case Pin: + ok = read(d->fd, r->data, r->count) == r->count; + break; + case Pout: + ok = write(d->fd, r->data, r->count) == r->count; + break; + } + rv = 0; + if(ok == 0){ + rerrstr(buf, sizeof buf); + if(!errok && strstr(buf, "not sata") == 0) + eprint(d, "xfer error: %.2ux%.2ux: %r\n", r->cmd.fis[0], r->cmd.fis[2]); + rv = -1; + } + if(read(d->fd, &r->reply, Replysz) != Replysz){ + if(!errok) + eprint(d, "status fis read error: %r\n"); + return -1; + } + r->haverfis = 1; + return rv; +} + +int +issueatat(Req *r, int i, Sdisk *d, int e) +{ + uchar *fis; + Atatab *a; + + a = atatab + i; + r->haverfis = 0; + r->cmd.sdcmd = 0xff; + r->cmd.ataproto = a->protocol; + fis = r->cmd.fis; + fis[0] = H2dev; + if(a->cc & 0xff00) + fis[0] = a->cc >> 8; + fis[1] = Fiscmd; + if(a->cc != 0xff) + fis[2] = a->cc; + return issueata(r, d, e); +} + +int +ataprobe(Sdisk *d) +{ + int rv; + Req r; + + memset(&r, 0, sizeof r); + if(issueatat(&r, Sig, d, 1) == -1) + return -1; + setfissig(d, fistosig(r.reply.fis)); + memset(&r, 0, sizeof r); + r.count = 0x200; + identifyfis(d, r.cmd.fis); + if((rv = issueatat(&r, Idall, d, 1)) != -1){ + idfeat(d, (ushort*)r.data); + if((d->feat & Dsmart) == 0) + rv = -1; + } + return rv; +} + +int +smartfis(Sfis *f, uchar *c, int n) +{ + if((f->feat & Dsmart) == 0) + return -1; + skelfis(c); + c[2] = 0xb0; + c[3] = 0xd8 + n; /* able smart */ + c[5] = 0x4f; + c[6] = 0xc2; + return 0; +} + +int +ataenable(Sdisk *d) +{ + int rv; + Req r; + + memset(&r, 0, sizeof r); + smartfis(d, r.cmd.fis, 0); + rv = issueatat(&r, Smart, d, 0); + return rv; +} + +void +smartrsfis(Sfis*, uchar *c) +{ + skelfis(c); + c[2] = 0xb0; + c[3] = 0xda; /* return smart status */ + c[5] = 0x4f; + c[6] = 0xc2; +} + +int +atastatus(Sdisk *d, char *s, int l) +{ + uchar *fis; + int rv; + Req r; + + memset(&r, 0, sizeof r); + smartrsfis(d, r.cmd.fis); + rv = issueatat(&r, Smart, d, 0); + *s = 0; + if(rv != -1){ + fis = r.reply.fis; + if(fis[5] == 0x4f && + fis[6] == 0xc2) + snprint(s, l, "normal"); + else{ + snprint(s, l, "threshold exceeded"); + rv = -1; + } + } else + snprint(s, l, "smart error"); + return rv; +} diff -Nru /sys/src/cmd/disk/smart/mkfile /sys/src/cmd/disk/smart/mkfile --- /sys/src/cmd/disk/smart/mkfile Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/disk/smart/mkfile Tue Jan 1 00:00:00 2013 @@ -0,0 +1,15 @@ + +#include +#include +#include +#include +#include "smart.h" + +enum{ + Replysz = 16, +}; + +typedef struct Rcmd Rcmd; +struct Rcmd{ + uchar proto; + uchar cdbsz; + uchar cdb[16]; +}; + +typedef struct Req Req; +struct Req { + char haverfis; + Rcmd cmd; + char sdstat[16]; + uchar sense[0x100]; + uchar data[0x200]; + uint count; +}; + +void +turcdb(Req *r) +{ + uchar *cmd; + + cmd = r->cmd.cdb; + r->cmd.cdbsz = 6; + r->cmd.proto = Pin; + memset(cmd, 0, 6); + r->count = 0; +} + +void +reqsensecdb(Req *r) +{ + uchar *cmd; + + cmd = r->cmd.cdb; + r->cmd.cdbsz = 6; + r->cmd.proto = Pin; + memset(cmd, 0, 6); + cmd[0] = ScmdRsense; + cmd[4] = 128; + r->count = 128; +} + +static void +sensetrace(uchar *cdb, uchar *u) +{ + char *e; + + USED(cdb); + if(1) + return; + e = scsierror(u[12], u[13]); + fprint(2, "sense %.2ux: %.2ux%.2ux%.2ux %s\n", cdb[0], u[2], u[12], u[13], e); +} + +static int +issuescsi(Req *r, Sdisk *d) +{ + uchar *u; + int ok, rv, n; + Req sense; + + if(write(d->fd, r->cmd.cdb, r->cmd.cdbsz) != r->cmd.cdbsz){ + eprint(d, "cdb write error: %r\n"); + return -1; + } + werrstr(""); + switch(r->cmd.proto){ + default: + case Pin: + n = read(d->fd, r->data, r->count); + ok = n >= 0; + r->count = 0; + if(ok) + r->count = n; + break; + case Pout: + n = write(d->fd, r->data, r->count); + ok = n == r->count; + break; + } + rv = 0; + memset(r->sdstat, 0, sizeof r->sdstat); + if(read(d->fd, r->sdstat, Replysz) < 1){ + eprint(d, "status reply read error: %r\n"); + return -1; + } + if(n == -1) + rv = -1; /* scsi not supported; don't whine */ + else if(rv == 0 && (rv = atoi(r->sdstat)) != 0){ + memset(&sense, 0, sizeof sense); + reqsensecdb(&sense); + if(issuescsi(&sense, d) == 0){ + memmove(r->sense, sense.data, sense.count); + u = r->sense; + rv = u[2]; + sensetrace(r->cmd.cdb, u); + }else + rv = -1; + } + return ok? rv: -1; +} + +void +modesensecdb(Req *r, uchar page, uint n) +{ + uchar *cmd; + + cmd = r->cmd.cdb; + r->cmd.cdbsz = 10; + r->cmd.proto = Pin; + memset(cmd, 0, 10); + cmd[0] = ScmdMsense10; + cmd[2] = page; + cmd[7] = n>>8; + cmd[8] = n; + r->count = n; +} + +void +modeselectcdb(Req *r, uint n) +{ + uchar *cmd; + + cmd = r->cmd.cdb; + r->cmd.proto = Pout; + r->cmd.cdbsz = 10; + memset(cmd, 0, 10); + cmd[0] = ScmdMselect10; + cmd[1] = 0x10; /* assume scsi2 ! */ + cmd[7] = n>>8; + cmd[8] = n; + r->count = n; +} + +int +scsiprobe(Sdisk *d) +{ + Req r; + + memset(&r, 0, sizeof r); + turcdb(&r); + if(issuescsi(&r, d) == -1) + return -1; + memset(&r, 0, sizeof r); + modesensecdb(&r, 0x1c, sizeof r.data); + if(issuescsi(&r, d) != 0 || r.count < 8) + return -1; + return 0; +} + +enum{ + /* mrie bits */ + Mnone = 0, + Masync = 1, /* obs */ + Mattn = 2, /* generate unit attention */ + Mcrerror = 3, /* conditionally generate recovered error */ + Mrerror = 4, /* unconditionally " */ + Mnosense = 5, /* generate no sense */ + Mreqonly = 6, /* report only in response to req sense */ + + /* byte 2 bits */ + Perf = 1<<7, /* smart may not cause delays */ + Ebf = 1<<5, /* enable bacground functions */ + Ewasc = 1<<4, /* enable warnings */ + Dexcpt = 1<<3, /* disable smart */ + Smarttst = 1<<4, /* generate spurious smart error 5dff */ + Logerr = 1<<0, /* enable reporting */ +}; + +int +scsienable(Sdisk *d) +{ + Req r; + + memset(&r, 0, sizeof r); + r.data[8 + 0] = 0x1c; + r.data[8 + 1] = 0xa; + r.data[8 + 2] = Ebf | Ewasc | Logerr; + r.data[8 + 3] = Mreqonly; + r.data[8 +11] = 1; + modeselectcdb(&r, 12 + 8); + if(issuescsi(&r, d) != 0) + return -1; + return 0; +} + +int +scsistatus(Sdisk *d, char *s, int l) +{ + char *err; + uchar *u; + int rv; + Req r; + + memset(&r, 0, sizeof r); + reqsensecdb(&r); + rv = issuescsi(&r, d); + if(rv == 0 && r.count > 12){ + u = r.data; + if(u[12] + u[13] == 0) + err = "normal"; + else{ + err = scsierror(u[12], u[13]); + rv = -1; + } + if(err == nil) + err = "unknown"; + snprint(s, l, "%s", err); + }else + snprint(s, l, "smart error"); + return rv; +} diff -Nru /sys/src/cmd/disk/smart/smart.c /sys/src/cmd/disk/smart/smart.c --- /sys/src/cmd/disk/smart/smart.c Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/disk/smart/smart.c Tue Jan 1 00:00:00 2013 @@ -0,0 +1,236 @@ +/* + * smart monitoring for scsi and ata + * copyright © 2009 erik quanstrom + */ +#include +#include +#include +#include "smart.h" + +enum{ + Checksec = 600, + Opensec = 60 * 60, + Relogsec = 38400 / 4, +}; + +static Sdisk *disks; +static Dtype dtab[] = { + Tata, "ata", ataprobe, ataenable, atastatus, + Tscsi, "scsi", scsiprobe, scsienable, scsistatus, +}; +static char *logfile = "smart"; +static int aflag; +static int tflag; +static int vflag; + +void +eprint(Sdisk *d, char *s, ...) +{ + char buf[256]; + va_list arg; + + va_start(arg, s); + vseprint(buf, buf + sizeof buf, s, arg); + va_end(arg); +// syslog(0, logfile, "%s: %s", d->name, buf); + if(vflag) + fprint(2, "%s: %s", d->name, buf); +} + +void +smartlog(Sdisk *d, char *s, ...) +{ + char buf[256]; + va_list arg; + + va_start(arg, s); + vseprint(buf, buf + sizeof buf, s, arg); + va_end(arg); + if(!tflag) + syslog(0, logfile, "%s: %s", d->name, buf); + if(tflag || vflag) + fprint(2, "%s: %s\n", d->name, buf); +} + +static void +diskclose(Sdisk *d) +{ + close(d->fd); + d->fd = -1; +} + +static int +diskopen(Sdisk *d) +{ + char buf[128]; + + snprint(buf, sizeof buf, "%s/raw", d->path); + werrstr(""); + return d->fd = open(buf, ORDWR); +} + +static int +noexist(void) +{ + char buf[ERRMAX]; + + errstr(buf, sizeof buf); + if(strstr(buf, "exist")) + return -1; + return 0; +} + +static void +lognew(Sdisk *d) +{ + if(aflag && !tflag) + smartlog(d, d->t->tname); +} + +static int +newdisk(char *s) +{ + char buf[128], *p; + int i; + Sdisk d; + + memset(&d, 0, sizeof d); + snprint(d.path, sizeof d.path, "%s", s); + if(p = strrchr(s, '/')) + p++; + else + p = s; + snprint(d.name, sizeof d.name, "%s", p); + snprint(buf, sizeof buf, "%s/raw", s); + if(diskopen(&d) == -1) + return noexist(); + for(i = 0; i < nelem(dtab); i++) + if(dtab[i].probe(&d) == 0) + if(dtab[i].enable(&d) == 0){ + d.t = dtab + i; + lognew(&d); + break; + } + diskclose(&d); + if(d.t != 0){ + d.next = disks; + disks = malloc(sizeof d); + memmove(disks, &d, sizeof d); + } + return 0; +} + +static int +probe0(char *s, int l) +{ + char *p, *f[3], buf[16]; + int i; + + s[l] = 0; + for(; p = strchr(s, '\n'); s = p + 1){ + if(tokenize(s, f, nelem(f)) < 1) + continue; + for(i = 0; i < 0x10; i++){ + snprint(buf, sizeof buf, "/dev/%s%ux", f[0], i); + if(newdisk(buf) == -1 && i > 2) + break; + } + } + return -1; +} + +int +probe(void) +{ + char *s; + int fd, l, r; + + fd = open("/dev/sdctl", OREAD); + if(fd == -1) + return -1; + r = -1; + l = 1024; /* #S/sdctl has 0 size; guess */ + if(s = malloc(l + 1)) + if((l = read(fd, s, l)) > 0) + r = probe0(s, l); + free(s); + close(fd); + return r; +} + +void +run(void) +{ + char buf[1024]; + int e, s0; + uvlong t, t0; + Sdisk *d; + + e = 0; + t = time(0); + for(d = disks; d; d = d->next){ + t0 = d->lastcheck; + if(t0 != 0 && t - t0 < Checksec) + continue; + if(diskopen(d) == -1){ + if(t - t0 > Opensec) + smartlog(d, "can't open in %ullds\n", t - t0); + continue; + } + s0 = d->status; + d->status = d->t->status(d, buf, sizeof buf); + diskclose(d); + if(d->status == -1) + e++; + if((aflag || d->status != s0 || d->status != 0) && !d->silent){ + t0 = d->lastlog; + if(t0 == 0 || t - t0 >= Relogsec){ + smartlog(d, buf); + d->lastlog = t; + } + }else + d->lastlog = 0; + d->lastcheck = t; + } + if(tflag) + exits(e? "smart errors": ""); +} + +void +usage(void) +{ + fprint(2, "usage: disk/smart [-aptv] [/dev/sdXX] ...\n"); + exits("usage"); +} + +void +main(int argc, char **argv) +{ + int pflag; + + pflag = 0; + ARGBEGIN{ + case 'a': + aflag = 1; + break; + case 'p': + pflag = 1; + break; + case 't': + tflag = 1; + case 'v': + vflag = 1; + break; + default: + usage(); + }ARGEND + + for(; *argv; argv++) + newdisk(*argv); + if(argc == 0 || pflag) + probe(); + if(disks == nil) + sysfatal("no disks"); + for(;; sleep(30*1000)) + run(); +} diff -Nru /sys/src/cmd/disk/smart/smart.h /sys/src/cmd/disk/smart/smart.h --- /sys/src/cmd/disk/smart/smart.h Thu Jan 1 00:00:00 1970 +++ /sys/src/cmd/disk/smart/smart.h Tue Jan 1 00:00:00 2013 @@ -0,0 +1,44 @@ +enum { + Tscsi = 1, + Tata = 2, + + Sok = 0, + Ssoon = 1, + Sfail = 2, + + Nrb = 32, + Pathlen = 256, +}; + +typedef struct Dtype Dtype; +typedef struct Sdisk Sdisk; + +struct Dtype { + int type; + char *tname; + int (*probe)(Sdisk*); + int (*enable)(Sdisk*); + int (*status)(Sdisk*, char*, int); +}; + +struct Sdisk { + Sdisk *next; + Dtype *t; + int fd; + Sfis; + char path[Pathlen]; + char name[28]; + char status; + uchar silent; + uvlong lastcheck; + uvlong lastlog; +}; + +int scsiprobe(Sdisk*); +int scsienable(Sdisk*); +int scsistatus(Sdisk*, char*, int); +int ataprobe(Sdisk*); +int ataenable(Sdisk*); +int atastatus(Sdisk*, char*, int); + +void eprint(Sdisk*, char *, ...);