diff -Nru /amd64/include/ape/float.h /amd64/include/ape/float.h --- /amd64/include/ape/float.h Thu Jan 1 00:00:00 1970 +++ /amd64/include/ape/float.h Thu Apr 25 00:00:00 2013 @@ -0,0 +1,80 @@ +#ifndef __FLOAT +#define __FLOAT +/* IEEE, default rounding */ + +#define FLT_ROUNDS 1 +#define FLT_RADIX 2 + +#define FLT_DIG 6 +#define FLT_EPSILON 1.19209290e-07 +#define FLT_MANT_DIG 24 +#define FLT_MAX 3.40282347e+38 +#define FLT_MAX_10_EXP 38 +#define FLT_MAX_EXP 128 +#define FLT_MIN 1.17549435e-38 +#define FLT_MIN_10_EXP -37 +#define FLT_MIN_EXP -125 + +#define DBL_DIG 15 +#define DBL_EPSILON 2.2204460492503131e-16 +#define DBL_MANT_DIG 53 +#define DBL_MAX 1.797693134862315708145e+308 +#define DBL_MAX_10_EXP 308 +#define DBL_MAX_EXP 1024 +#define DBL_MIN 2.225073858507201383090233e-308 +#define DBL_MIN_10_EXP -307 +#define DBL_MIN_EXP -1021 +#define LDBL_MANT_DIG DBL_MANT_DIG +#define LDBL_EPSILON DBL_EPSILON +#define LDBL_DIG DBL_DIG +#define LDBL_MIN_EXP DBL_MIN_EXP +#define LDBL_MIN DBL_MIN +#define LDBL_MIN_10_EXP DBL_MIN_10_EXP +#define LDBL_MAX_EXP DBL_MAX_EXP +#define LDBL_MAX DBL_MAX +#define LDBL_MAX_10_EXP DBL_MAX_10_EXP + +typedef union FPdbleword FPdbleword; +union FPdbleword +{ + double x; + struct { /* little endian */ + long lo; + long hi; + }; +}; + +#ifdef _RESEARCH_SOURCE +/* define stuff needed for floating conversion */ +#define IEEE_8087 1 +#define Sudden_Underflow 1 +#endif +#ifdef _PLAN9_SOURCE +/* MXCSR */ +/* fcr */ +#define FPFTZ (1<<15) /* amd64 */ +#define FPINEX (1<<12) +#define FPUNFL (1<<11) +#define FPOVFL (1<<10) +#define FPZDIV (1<<9) +#define FPDNRM (1<<8) /* amd64 */ +#define FPINVAL (1<<7) +#define FPDAZ (1<<6) /* amd64 */ +#define FPRNR (0<<13) +#define FPRZ (3<<13) +#define FPRPINF (2<<13) +#define FPRNINF (1<<13) +#define FPRMASK (3<<13) +#define FPPEXT 0 +#define FPPSGL 0 +#define FPPDBL 0 +#define FPPMASK 0 +/* fsr */ +#define FPAINEX (1<<5) +#define FPAUNFL (1<<4) +#define FPAOVFL (1<<3) +#define FPAZDIV (1<<2) +#define FPADNRM (1<<1) /* not in plan 9 */ +#define FPAINVAL (1<<0) +#endif +#endif /* __FLOAT */ diff -Nru /amd64/include/ape/math.h /amd64/include/ape/math.h --- /amd64/include/ape/math.h Thu Jan 1 00:00:00 1970 +++ /amd64/include/ape/math.h Thu Apr 25 00:00:00 2013 @@ -0,0 +1,74 @@ +#ifndef __MATH +#define __MATH +#pragma lib "/$M/lib/ape/libap.a" + +/* a HUGE_VAL appropriate for IEEE double-precision */ +/* the correct value, 1.797693134862316e+308, causes a ken overflow */ +#define HUGE_VAL 1.79769313486231e+308 + +#ifdef __cplusplus +extern "C" { +#endif + +extern double acos(double); +extern double asin(double); +extern double atan(double); +extern double atan2(double, double); +extern double cos(double); +extern double sin(double); +extern double tan(double); +extern double cosh(double); +extern double sinh(double); +extern double tanh(double); +extern double exp(double); +extern double frexp(double, int *); +extern double ldexp(double, int); +extern double log(double); +extern double log10(double); +extern double modf(double, double *); +extern double pow(double, double); +extern double sqrt(double); +extern double ceil(double); +extern double fabs(double); +extern double floor(double); +extern double fmod(double, double); +extern double NaN(void); +extern int isNaN(double); +extern double Inf(int); +extern int isInf(double, int); + +#ifdef _RESEARCH_SOURCE +/* does >> treat left operand as unsigned ? */ +#define Unsigned_Shifts 1 +#define M_E 2.7182818284590452354 /* e */ +#define M_LOG2E 1.4426950408889634074 /* log 2e */ +#define M_LOG10E 0.43429448190325182765 /* log 10e */ +#define M_LN2 0.69314718055994530942 /* log e2 */ +#define M_LN10 2.30258509299404568402 /* log e10 */ +#define M_PI 3.14159265358979323846 /* pi */ +#define M_PI_2 1.57079632679489661923 /* pi/2 */ +#define M_PI_4 0.78539816339744830962 /* pi/4 */ +#define M_1_PI 0.31830988618379067154 /* 1/pi */ +#define M_2_PI 0.63661977236758134308 /* 2/pi */ +#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */ +#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ +#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ + +extern double hypot(double, double); +extern double erf(double); +extern double erfc(double); +extern double j0(double); +extern double y0(double); +extern double j1(double); +extern double y1(double); +extern double jn(int, double); +extern double yn(int, double); + +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* __MATH */ diff -Nru /amd64/include/ape/stdarg.h /amd64/include/ape/stdarg.h --- /amd64/include/ape/stdarg.h Thu Jan 1 00:00:00 1970 +++ /amd64/include/ape/stdarg.h Thu Apr 25 00:00:00 2013 @@ -0,0 +1,18 @@ +#ifndef __STDARG +#define __STDARG + +typedef char *va_list; + +#define va_start(list, start) list = (sizeof(start)<8 ? (char *)((long long *)&(start)+1) : \ +(char *)(&(start)+1)) +#define va_end(list) +#define va_arg(list, mode)\ + ((sizeof(mode) == 1)?\ + ((mode*)(list += 8))[-8]:\ + (sizeof(mode) == 2)?\ + ((mode*)(list += 8))[-4]:\ + (sizeof(mode) == 4)?\ + ((mode*)(list += 8))[-2]:\ + ((mode*)(list += sizeof(mode)))[-1]) + +#endif /* __STDARG */ diff -Nru /amd64/include/ape/ureg.h /amd64/include/ape/ureg.h --- /amd64/include/ape/ureg.h Thu Jan 1 00:00:00 1970 +++ /amd64/include/ape/ureg.h Thu Apr 25 00:00:00 2013 @@ -0,0 +1,38 @@ +#ifndef __UREG_H +#define __UREG_H +#if !defined(_PLAN9_SOURCE) + This header file is an extension to ANSI/POSIX +#endif + +struct Ureg { + unsigned long long ax; + unsigned long long bx; + unsigned long long cx; + unsigned long long dx; + unsigned long long si; + unsigned long long di; + unsigned long long bp; + unsigned long long r8; + unsigned long long r9; + unsigned long long r10; + unsigned long long r11; + unsigned long long r12; + unsigned long long r13; + unsigned long long r14; + unsigned long long r15; + + unsigned short ds; + unsigned short es; + unsigned short fs; + unsigned short gs; + + unsigned long long type; + unsigned long long error; /* error code (or zero) */ + unsigned long long ip; /* pc */ + unsigned long long cs; /* old context */ + unsigned long long flags; /* old flags */ + unsigned long long sp; /* sp */ + unsigned long long ss; /* old stack segment */ +}; + +#endif diff -Nru /sys/src/ape/lib/9/amd64/getcallerpc.s /sys/src/ape/lib/9/amd64/getcallerpc.s --- /sys/src/ape/lib/9/amd64/getcallerpc.s Thu Jan 1 00:00:00 1970 +++ /sys/src/ape/lib/9/amd64/getcallerpc.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,3 @@ +TEXT getcallerpc(SB), $0 + MOVQ -8(RARG), AX + RET diff -Nru /sys/src/ape/lib/9/amd64/getfcr.s /sys/src/ape/lib/9/amd64/getfcr.s --- /sys/src/ape/lib/9/amd64/getfcr.s Thu Jan 1 00:00:00 1970 +++ /sys/src/ape/lib/9/amd64/getfcr.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,38 @@ + +TEXT setfcr(SB), $4 + XORL $(0x3F<<7),RARG /* bits are cleared in csr to enable them */ + ANDL $0xFFC0, RARG /* just the fcr bits */ + WAIT /* is this needed? */ + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $~0x3F, AX + ORL RARG, AX + MOVL AX, 0(SP) + LDMXCSR 0(SP) + RET + +TEXT getfcr(SB), $4 + WAIT + STMXCSR 0(SP) + MOVWLZX 0(SP), AX + ANDL $0xFFC0, AX + XORL $(0x3F<<7),AX + RET + +TEXT getfsr(SB), $4 + WAIT + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $0x3F, AX + RET + +TEXT setfsr(SB), $4 + ANDL $0x3F, RARG + WAIT + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $~0x3F, AX + ORL RARG, AX + MOVL AX, 0(SP) + LDMXCSR 0(SP) + RET diff -Nru /sys/src/ape/lib/ap/amd64/_seek.c /sys/src/ape/lib/ap/amd64/_seek.c --- /sys/src/ape/lib/ap/amd64/_seek.c Thu Jan 1 00:00:00 1970 +++ /sys/src/ape/lib/ap/amd64/_seek.c Thu Apr 25 00:00:00 2013 @@ -0,0 +1,11 @@ +extern long __SEEK(long long*, int, long long, int); + +long long +_SEEK(int fd, long long o, int p) +{ + long long l; + + if(__SEEK(&l, fd, o, p) < 0) + l = -1; + return l; +} diff -Nru /sys/src/ape/lib/ap/amd64/cycles.s /sys/src/ape/lib/ap/amd64/cycles.s --- /sys/src/ape/lib/ap/amd64/cycles.s Thu Jan 1 00:00:00 1970 +++ /sys/src/ape/lib/ap/amd64/cycles.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,5 @@ +TEXT _cycles(SB),1,$0 /* time stamp counter; cycles since power up */ + RDTSC + MOVL AX, 0(RARG) /* lo */ + MOVL DX, 4(RARG) /* hi */ + RET diff -Nru /sys/src/ape/lib/ap/amd64/lock.c /sys/src/ape/lib/ap/amd64/lock.c --- /sys/src/ape/lib/ap/amd64/lock.c Thu Jan 1 00:00:00 1970 +++ /sys/src/ape/lib/ap/amd64/lock.c Thu Apr 25 00:00:00 2013 @@ -0,0 +1,26 @@ +#define _LOCK_EXTENSION +#include "../plan9/sys9.h" +#include + +int tas(int*); + +void +lock(Lock *lk) +{ + while(tas(&lk->val)) + _SLEEP(0); +} + +int +canlock(Lock *lk) +{ + if(tas(&lk->val)) + return 0; + return 1; +} + +void +unlock(Lock *lk) +{ + lk->val = 0; +} diff -Nru /sys/src/ape/lib/ap/amd64/main9.s /sys/src/ape/lib/ap/amd64/main9.s --- /sys/src/ape/lib/ap/amd64/main9.s Thu Jan 1 00:00:00 1970 +++ /sys/src/ape/lib/ap/amd64/main9.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,12 @@ + TEXT _main(SB), 1, $(3*8) + + CALL _envsetup(SB) + MOVL inargc-8(FP), RARG + LEAQ inargv+0(FP), AX + MOVQ AX, 8(SP) + MOVQ environ(SB), AX + MOVQ AX, 16(SP) + CALL main(SB) + MOVQ AX, RARG + CALL exit(SB) + RET diff -Nru /sys/src/ape/lib/ap/amd64/main9p.s /sys/src/ape/lib/ap/amd64/main9p.s --- /sys/src/ape/lib/ap/amd64/main9p.s Thu Jan 1 00:00:00 1970 +++ /sys/src/ape/lib/ap/amd64/main9p.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,45 @@ +#define NPRIVATES 16 + +GLOBL _tos(SB), $8 +GLOBL _privates(SB), $8 +GLOBL _nprivates(SB), $8 + +TEXT _mainp(SB), 1, $(3*8+NPRIVATES*8) + + /* _tos = arg */ + MOVQ AX, _tos(SB) + LEAQ 8(SP), AX + MOVQ AX, _privates(SB) + MOVQ $NPRIVATES, _nprivates(SB) + + /* _profmain(); */ + CALL _profmain(SB) + + /* _tos->prof.pp = _tos->prof.next; */ + MOVQ _tos+0(SB),DX + MOVQ 4(DX),CX + MOVQ CX,(DX) + + CALL _envsetup(SB) + + /* main(argc, argv, environ); */ + MOVL inargc-8(FP), RARG + LEAQ inargv+0(FP), AX + MOVQ AX, 8(SP) + MOVQ environ(SB), AX + MOVQ AX, 16(SP) + CALL main(SB) + +loop: + MOVL AX, RARG + CALL exit(SB) + MOVQ $_profin(SB), AX /* force loading of profile */ + MOVL $0, AX + JMP loop + +TEXT _savearg(SB), 1, $0 + RET + +TEXT _callpc(SB), 1, $0 + MOVQ 8(RARG), AX + RET diff -Nru /sys/src/ape/lib/ap/amd64/mkfile /sys/src/ape/lib/ap/amd64/mkfile --- /sys/src/ape/lib/ap/amd64/mkfile Thu Jan 1 00:00:00 1970 +++ /sys/src/ape/lib/ap/amd64/mkfile Thu Apr 25 00:00:00 2013 @@ -0,0 +1,20 @@ +APE=/sys/src/ape +objtype=amd64 +<$APE/config +LIB=/$objtype/lib/ape/libap.a +OFILES=\ + _seek.$O\ + cycles.$O\ + lock.$O\ + main9.$O\ + main9p.$O\ + notetramp.$O\ + setjmp.$O\ + strchr.$O\ + strlen.$O\ + tas.$O\ + + +#include + +/* A stack to hold pcs when signals nest */ +#define MAXSIGSTACK 20 +typedef struct Pcstack Pcstack; +static struct Pcstack { + int sig; + void (*hdlr)(int, char*, Ureg*); + unsigned long long restorepc; + Ureg *u; +} pcstack[MAXSIGSTACK]; +static int nstack = 0; + +static void notecont(Ureg*, char*); + +void +_notetramp(int sig, void (*hdlr)(int, char*, Ureg*), Ureg *u) +{ + Pcstack *p; + + if(nstack >= MAXSIGSTACK) + _NOTED(1); /* nesting too deep; just do system default */ + p = &pcstack[nstack]; + p->restorepc = u->ip; + p->sig = sig; + p->hdlr = hdlr; + p->u = u; + nstack++; + u->ip = (unsigned long long) notecont; + _NOTED(2); /* NSAVE: clear note but hold state */ +} + +static void +notecont(Ureg *u, char *s) +{ + Pcstack *p; + void(*f)(int, char*, Ureg*); + + p = &pcstack[nstack-1]; + f = p->hdlr; + u->ip = p->restorepc; + nstack--; + (*f)(p->sig, s, u); + _NOTED(3); /* NRSTR */ +} + +#define JMPBUFPC 1 +#define JMPBUFSP 0 + +extern sigset_t _psigblocked; + +typedef struct { + sigset_t set; + sigset_t blocked; + unsigned long long jmpbuf[2]; +} sigjmp_buf_amd64; + +void +siglongjmp(sigjmp_buf j, int ret) +{ + struct Ureg *u; + sigjmp_buf_amd64 *jb; + + jb = (sigjmp_buf_amd64*)j; + + if(jb->set) + _psigblocked = jb->blocked; + if(nstack == 0 || pcstack[nstack-1].u->sp > jb->jmpbuf[JMPBUFSP]) + longjmp((void*)jb->jmpbuf, ret); + u = pcstack[nstack-1].u; + nstack--; + u->ax = ret; + if(ret == 0) + u->ax = 1; + u->ip = jb->jmpbuf[JMPBUFPC]; + u->sp = jb->jmpbuf[JMPBUFSP] + 8; + _NOTED(3); /* NRSTR */ +} diff -Nru /sys/src/ape/lib/ap/amd64/setjmp.s /sys/src/ape/lib/ap/amd64/setjmp.s --- /sys/src/ape/lib/ap/amd64/setjmp.s Thu Jan 1 00:00:00 1970 +++ /sys/src/ape/lib/ap/amd64/setjmp.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,27 @@ +TEXT longjmp(SB), $0 + MOVL r+8(FP), AX + CMPL AX, $0 + JNE ok /* ansi: "longjmp(0) => longjmp(1)" */ + MOVL $1, AX /* bless their pointed heads */ +ok: + MOVQ 0(RARG), SP /* restore sp */ + MOVQ 8(RARG), BX /* put return pc on the stack */ + MOVQ BX, 0(SP) + RET + +TEXT setjmp(SB), $0 + MOVQ SP, 0(RARG) /* store sp */ + MOVQ 0(SP), BX /* store return pc */ + MOVQ BX, 8(RARG) + MOVL $0, AX /* return 0 */ + RET + +TEXT sigsetjmp(SB), $0 + MOVL savemask+8(FP), BX + MOVL BX, 0(RARG) + MOVL $_psigblocked(SB), 4(RARG) + MOVQ SP, 8(RARG) /* store sp */ + MOVQ 0(SP), BX /* store return pc */ + MOVQ BX, 16(RARG) + MOVL $0, AX /* return 0 */ + RET diff -Nru /sys/src/ape/lib/ap/amd64/strchr.s /sys/src/ape/lib/ap/amd64/strchr.s --- /sys/src/ape/lib/ap/amd64/strchr.s Thu Jan 1 00:00:00 1970 +++ /sys/src/ape/lib/ap/amd64/strchr.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,38 @@ + TEXT strchr(SB), $0 + + MOVQ RARG, DI + MOVB c+8(FP), AX + CMPB AX, $0 + JEQ l2 /**/ + +/* + * char is not null + */ +l1: + MOVB (DI), BX + CMPB BX, $0 + JEQ ret0 + ADDQ $1, DI + CMPB AX, BX + JNE l1 + + MOVQ DI, AX + SUBQ $1, AX + RET + +/* + * char is null + */ +l2: + MOVQ $-1, CX + CLD + + REPN; SCASB + + MOVQ DI, AX + SUBQ $1, AX + RET + +ret0: + MOVQ $0, AX + RET diff -Nru /sys/src/ape/lib/ap/amd64/strlen.s /sys/src/ape/lib/ap/amd64/strlen.s --- /sys/src/ape/lib/ap/amd64/strlen.s Thu Jan 1 00:00:00 1970 +++ /sys/src/ape/lib/ap/amd64/strlen.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,16 @@ + TEXT strlen(SB),$0 + + MOVL $0, AX + MOVQ $-1, CX + CLD +/* + * look for end of string + */ + + MOVQ RARG, DI + REPN; SCASB + + MOVQ DI, AX + SUBQ RARG, AX + SUBQ $1, AX + RET diff -Nru /sys/src/ape/lib/ap/amd64/tas.s /sys/src/ape/lib/ap/amd64/tas.s --- /sys/src/ape/lib/ap/amd64/tas.s Thu Jan 1 00:00:00 1970 +++ /sys/src/ape/lib/ap/amd64/tas.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,5 @@ +TEXT tas(SB),$0 + + MOVL $0xdeadead,AX + XCHGL AX,(RARG) + RET diff -Nru /sys/src/libc/amd64/_seek.c /sys/src/libc/amd64/_seek.c --- /sys/src/libc/amd64/_seek.c Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/_seek.c Thu Apr 25 00:00:00 2013 @@ -0,0 +1,14 @@ +#include +#include + +extern int _seek(vlong*, int, vlong, int); + +vlong +seek(int fd, vlong o, int p) +{ + vlong l; + + if(_seek(&l, fd, o, p) < 0) + l = -1LL; + return l; +} diff -Nru /sys/src/libc/amd64/argv0.s /sys/src/libc/amd64/argv0.s --- /sys/src/libc/amd64/argv0.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/argv0.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,4 @@ +GLOBL argv0(SB), $8 +GLOBL _tos(SB), $8 +GLOBL _privates(SB), $8 +GLOBL _nprivates(SB), $4 diff -Nru /sys/src/libc/amd64/atom.s /sys/src/libc/amd64/atom.s --- /sys/src/libc/amd64/atom.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/atom.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,66 @@ +TEXT ainc(SB), 1, $0 /* long ainc(long *); */ +ainclp: + MOVL (RARG), AX /* exp */ + MOVL AX, BX + INCL BX /* new */ + LOCK; CMPXCHGL BX, (RARG) + JNZ ainclp + MOVL BX, AX + RET + +TEXT adec(SB), 1, $0 /* long adec(long*); */ +adeclp: + MOVL (RARG), AX + MOVL AX, BX + DECL BX + LOCK; CMPXCHGL BX, (RARG) + JNZ adeclp + MOVL BX, AX + RET + +/* + * int cas32(u32int *p, u32int ov, u32int nv); + * int cas(uint *p, int ov, int nv); + * int casul(ulong *p, ulong ov, ulong nv); + */ + +TEXT cas32(SB), 1, $0 +TEXT cas(SB), 1, $0 +TEXT casul(SB), 1, $0 +TEXT casl(SB), 1, $0 /* back compat */ + MOVL exp+8(FP), AX + MOVL new+16(FP), BX + LOCK; CMPXCHGL BX, (RARG) + MOVL $1, AX /* use CMOVLEQ etc. here? */ + JNZ _cas32r0 +_cas32r1: + RET +_cas32r0: + DECL AX + RET + +/* + * int cas64(u64int *p, u64int ov, u64int nv); + * int casp(void **p, void *ov, void *nv); + */ + +TEXT cas64(SB), 1, $0 +TEXT casp(SB), 1, $0 + MOVQ exp+8(FP), AX + MOVQ new+16(FP), BX + LOCK; CMPXCHGQ BX, (RARG) + MOVL $1, AX /* use CMOVLEQ etc. here? */ + JNZ _cas64r0 +_cas64r1: + RET +_cas64r0: + DECL AX + RET + +/* + * void mfence(void); + */ +TEXT mfence(SB),0,$0 + MFENCE + RET + diff -Nru /sys/src/libc/amd64/cycles.s /sys/src/libc/amd64/cycles.s --- /sys/src/libc/amd64/cycles.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/cycles.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,5 @@ +TEXT cycles(SB),1,$0 /* time stamp counter; cycles since power up */ + RDTSC + MOVL AX, 0(RARG) /* lo */ + MOVL DX, 4(RARG) /* hi */ + RET diff -Nru /sys/src/libc/amd64/getcallerpc.s /sys/src/libc/amd64/getcallerpc.s --- /sys/src/libc/amd64/getcallerpc.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/getcallerpc.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,3 @@ +TEXT getcallerpc(SB), $0 + MOVQ -8(RARG), AX + RET diff -Nru /sys/src/libc/amd64/getfcr.s /sys/src/libc/amd64/getfcr.s --- /sys/src/libc/amd64/getfcr.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/getfcr.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,38 @@ + +TEXT setfcr(SB), $4 + XORL $(0x3F<<7),RARG /* bits are cleared in csr to enable them */ + ANDL $0xFFC0, RARG /* just the fcr bits */ + WAIT /* is this needed? */ + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $~0x3F, AX + ORL RARG, AX + MOVL AX, 0(SP) + LDMXCSR 0(SP) + RET + +TEXT getfcr(SB), $4 + WAIT + STMXCSR 0(SP) + MOVWLZX 0(SP), AX + ANDL $0xFFC0, AX + XORL $(0x3F<<7),AX + RET + +TEXT getfsr(SB), $4 + WAIT + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $0x3F, AX + RET + +TEXT setfsr(SB), $4 + ANDL $0x3F, RARG + WAIT + STMXCSR 0(SP) + MOVL 0(SP), AX + ANDL $~0x3F, AX + ORL RARG, AX + MOVL AX, 0(SP) + LDMXCSR 0(SP) + RET diff -Nru /sys/src/libc/amd64/main9.s /sys/src/libc/amd64/main9.s --- /sys/src/libc/amd64/main9.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/main9.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,19 @@ +#define NPRIVATES 16 + +TEXT _main(SB), 1, $(2*8+NPRIVATES*8) + MOVQ AX, _tos(SB) + LEAQ 16(SP), AX + MOVQ AX, _privates(SB) + MOVL $NPRIVATES, _nprivates(SB) + MOVL inargc-8(FP), RARG + LEAQ inargv+0(FP), AX + MOVQ AX, 8(SP) + CALL main(SB) + +loop: + MOVQ $_exits<>(SB), RARG + CALL exits(SB) + JMP loop + +DATA _exits<>+0(SB)/4, $"main" +GLOBL _exits<>+0(SB), $5 diff -Nru /sys/src/libc/amd64/main9p.s /sys/src/libc/amd64/main9p.s --- /sys/src/libc/amd64/main9p.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/main9p.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,41 @@ +#define NPRIVATES 16 + +TEXT _mainp(SB), 1, $(2*8+NPRIVATES*8) + MOVQ AX, _tos(SB) /* _tos = arg */ + LEAQ 16(SP), AX + MOVQ AX, _privates(SB) + MOVL $NPRIVATES, _nprivates(SB) + + CALL _profmain(SB) /* _profmain(); */ + + MOVQ _tos+0(SB), DX /* _tos->prof.pp = _tos->prof.next; */ + MOVQ 8(DX), CX + MOVQ CX, (DX) + + MOVL inargc-8(FP), RARG /* main(argc, argv); */ + LEAQ inargv+0(FP), AX + MOVQ AX, 8(SP) + CALL main(SB) + +loop: + MOVQ $_exits<>(SB), RARG + CALL exits(SB) + MOVQ $_profin(SB), AX /* force loading of profile */ + JMP loop + +TEXT _savearg(SB), 1, $0 + MOVQ RARG, AX + RET + +TEXT _saveret(SB), 1, $0 + RET + +TEXT _restorearg(SB), 1, $0 + RET /* we want RARG in RARG */ + +TEXT _callpc(SB), 1, $0 + MOVQ 8(RARG), AX + RET + +DATA _exits<>+0(SB)/4, $"main" +GLOBL _exits<>+0(SB), $5 diff -Nru /sys/src/libc/amd64/memccpy.s /sys/src/libc/amd64/memccpy.s --- /sys/src/libc/amd64/memccpy.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/memccpy.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,58 @@ + TEXT memccpy(SB),$0 + + MOVL n+24(FP), CX + CMPL CX, $0 + JEQ none + MOVQ p2+8(FP), DI + MOVBLZX c+16(FP), AX + CLD +/* + * find the character in the second string + */ + + REPN; SCASB + JEQ found + +/* + * if not found, set count to 'n' + */ +none: + MOVL $0, AX + MOVL n+24(FP), BX + JMP memcpy + +/* + * if found, set count to bytes thru character + */ +found: + MOVQ DI, AX + SUBQ p2+8(FP), AX + MOVQ AX, BX + ADDQ RARG, AX + +/* + * copy the memory + */ + +memcpy: + MOVQ RARG, DI + MOVQ p2+8(FP), SI +/* + * copy whole longs, if aligned + */ + MOVQ DI, DX + ORQ SI, DX + ANDL $3, DX + JNE c3 + MOVL BX, CX + SHRQ $2, CX + REP; MOVSL +/* + * copy the rest, by bytes + */ + ANDL $3, BX +c3: + MOVL BX, CX + REP; MOVSB + + RET diff -Nru /sys/src/libc/amd64/memchr.s /sys/src/libc/amd64/memchr.s --- /sys/src/libc/amd64/memchr.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/memchr.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,23 @@ + TEXT memchr(SB),$0 + + MOVL n+16(FP), CX + CMPL CX, $0 + JEQ none + MOVQ RARG, DI + MOVBLZX c+8(FP), AX + CLD +/* + * SCASB is memchr instruction + */ + + REPN; SCASB + JEQ found + +none: + MOVL $0, AX + RET + +found: + MOVQ DI, AX + SUBQ $1, AX + RET diff -Nru /sys/src/libc/amd64/memcmp.s /sys/src/libc/amd64/memcmp.s --- /sys/src/libc/amd64/memcmp.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/memcmp.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,52 @@ + TEXT memcmp(SB),$0 + + MOVL n+16(FP), BX + CMPL BX, $0 + JEQ none + MOVQ RARG, DI + MOVQ p2+8(FP), SI + CLD + MOVQ DI, CX + ORQ SI, CX + ANDL $3, CX + JNE c3 +/* + * first by longs + */ + + MOVL BX, CX + SHRQ $2, CX + + REP; CMPSL + JNE found + +/* + * then by bytes + */ + ANDL $3, BX +c3: + MOVL BX, CX + REP; CMPSB + JNE found1 + +none: + MOVQ $0, AX + RET + +/* + * if long found, + * back up and look by bytes + */ +found: + MOVL $4, CX + SUBQ CX, DI + SUBQ CX, SI + REP; CMPSB + +found1: + JLS lt + MOVQ $-1, AX + RET +lt: + MOVQ $1, AX + RET diff -Nru /sys/src/libc/amd64/memcpy.s /sys/src/libc/amd64/memcpy.s --- /sys/src/libc/amd64/memcpy.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/memcpy.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,81 @@ +TEXT memcpy(SB), $0 + MOVQ RARG, DI + MOVQ DI, AX /* return value */ + MOVQ p2+8(FP), SI + MOVL n+16(FP), BX + CMPL BX, $0 + JGT _ok + JEQ _return /* nothing to do if n == 0 */ + MOVL $0, SI /* fault if n < 0 */ + +/* + * check and set for backwards: + * (p2 < p1) && ((p2+n) > p1) + */ +_ok: + CMPQ SI, DI + JGT _forward + JEQ _return /* nothing to do if p2 == p1 */ + MOVQ SI, DX + ADDQ BX, DX + CMPQ DX, DI + JGT _back + +/* + * copy whole longs if aligned + */ +_forward: + CLD + MOVQ SI, DX + ORQ DI, DX + ANDL $3, DX + JNE c3f + MOVQ BX, CX + SHRQ $2, CX + ANDL $3, BX + REP; MOVSL + +/* + * copy the rest, by bytes + */ + JEQ _return /* flags set by above ANDL */ +c3f: + MOVL BX, CX + REP; MOVSB + + RET + +/* + * whole thing backwards has + * adjusted addresses + */ +_back: + ADDQ BX, DI + ADDQ BX, SI + STD + SUBQ $4, DI + SUBQ $4, SI +/* + * copy whole longs, if aligned + */ + MOVQ DI, DX + ORQ SI, DX + ANDL $3, DX + JNE c3b + MOVL BX, CX + SHRQ $2, CX + ANDL $3, BX + REP; MOVSL +/* + * copy the rest, by bytes + */ + JEQ _return /* flags set by above ANDL */ + +c3b: + ADDQ $3, DI + ADDQ $3, SI + MOVL BX, CX + REP; MOVSB + +_return: + RET diff -Nru /sys/src/libc/amd64/memmove.s /sys/src/libc/amd64/memmove.s --- /sys/src/libc/amd64/memmove.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/memmove.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,81 @@ +TEXT memmove(SB), $0 + MOVQ RARG, DI + MOVQ DI, AX /* return value */ + MOVQ p2+8(FP), SI + MOVL n+16(FP), BX + CMPL BX, $0 + JGT _ok + JEQ _return /* nothing to do if n == 0 */ + MOVL $0, SI /* fault if n < 0 */ + +/* + * check and set for backwards: + * (p2 < p1) && ((p2+n) > p1) + */ +_ok: + CMPQ SI, DI + JGT _forward + JEQ _return /* nothing to do if p2 == p1 */ + MOVQ SI, DX + ADDQ BX, DX + CMPQ DX, DI + JGT _back + +/* + * copy whole longs if aligned + */ +_forward: + CLD + MOVQ SI, DX + ORQ DI, DX + ANDL $3, DX + JNE c3f + MOVQ BX, CX + SHRQ $2, CX + ANDL $3, BX + REP; MOVSL + +/* + * copy the rest, by bytes + */ + JEQ _return /* flags set by above ANDL */ +c3f: + MOVL BX, CX + REP; MOVSB + + RET + +/* + * whole thing backwards has + * adjusted addresses + */ +_back: + ADDQ BX, DI + ADDQ BX, SI + STD + SUBQ $4, DI + SUBQ $4, SI +/* + * copy whole longs, if aligned + */ + MOVQ DI, DX + ORQ SI, DX + ANDL $3, DX + JNE c3b + MOVL BX, CX + SHRQ $2, CX + ANDL $3, BX + REP; MOVSL +/* + * copy the rest, by bytes + */ + JEQ _return /* flags set by above ANDL */ + +c3b: + ADDQ $3, DI + ADDQ $3, SI + MOVL BX, CX + REP; MOVSB + +_return: + RET diff -Nru /sys/src/libc/amd64/memset.s /sys/src/libc/amd64/memset.s --- /sys/src/libc/amd64/memset.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/memset.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,41 @@ + TEXT memset(SB),$0 + + CLD + MOVQ RARG, DI + MOVBLZX c+8(FP), AX + MOVL n+16(FP), BX +/* + * if not enough bytes, just set bytes + */ + CMPL BX, $9 + JLS c3 +/* + * if not aligned, just set bytes + */ + MOVQ RARG, CX + ANDL $3,CX + JNE c3 +/* + * build word in AX + */ + MOVB AL, AH + MOVL AX, CX + SHLL $16, CX + ORL CX, AX +/* + * set whole longs + */ +c1: + MOVQ BX, CX + SHRQ $2, CX + ANDL $3, BX + REP; STOSL +/* + * set the rest, by bytes + */ +c3: + MOVL BX, CX + REP; STOSB +ret: + MOVQ RARG,AX + RET diff -Nru /sys/src/libc/amd64/mkfile /sys/src/libc/amd64/mkfile --- /sys/src/libc/amd64/mkfile Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/mkfile Thu Apr 25 00:00:00 2013 @@ -0,0 +1,42 @@ +objtype=amd64 + +#include +#include + +void +notejmp(void *vr, jmp_buf j, int ret) +{ + struct Ureg *r = vr; + + r->ax = ret; + if(ret == 0) + r->ax = 1; + r->ip = j[JMPBUFPC]; + r->sp = j[JMPBUFSP] + 8; + noted(NCONT); +} diff -Nru /sys/src/libc/amd64/rdpmc.s /sys/src/libc/amd64/rdpmc.s --- /sys/src/libc/amd64/rdpmc.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/rdpmc.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,9 @@ +MODE $64 + +TEXT rdpmc(SB), 1, $-4 /* performance monitor counter */ + MOVL RARG, CX + RDPMC /* read CX performance counter */ + XCHGL DX, AX /* swap lo/hi, zero-extend */ + SHLQ $32, AX /* hi<<32 */ + ORQ DX, AX /* (hi<<32)|lo */ + RET diff -Nru /sys/src/libc/amd64/setjmp.s /sys/src/libc/amd64/setjmp.s --- /sys/src/libc/amd64/setjmp.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/setjmp.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,17 @@ +TEXT longjmp(SB), $0 + MOVL r+8(FP), AX + CMPL AX, $0 + JNE ok /* ansi: "longjmp(0) => longjmp(1)" */ + MOVL $1, AX /* bless their pointed heads */ +ok: + MOVQ 0(RARG), SP /* restore sp */ + MOVQ 8(RARG), BX /* put return pc on the stack */ + MOVQ BX, 0(SP) + RET + +TEXT setjmp(SB), $0 + MOVQ SP, 0(RARG) /* store sp */ + MOVQ 0(SP), BX /* store return pc */ + MOVQ BX, 8(RARG) + MOVL $0, AX /* return 0 */ + RET diff -Nru /sys/src/libc/amd64/sqrt.s /sys/src/libc/amd64/sqrt.s --- /sys/src/libc/amd64/sqrt.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/sqrt.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,4 @@ +TEXT sqrt(SB), $0 + MOVSD a+0(FP), X0 + SQRTSD X0, X0 + RET diff -Nru /sys/src/libc/amd64/strcat.s /sys/src/libc/amd64/strcat.s --- /sys/src/libc/amd64/strcat.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/strcat.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,48 @@ + TEXT strcat(SB),$0 + + MOVL $0, AX + MOVQ $-1, CX + CLD + +/* + * find length of second string + */ + + MOVQ p2+8(FP), DI + REPN; SCASB + + MOVQ DI, BX + SUBQ p2+8(FP), BX + +/* + * find end of first string + */ + + MOVQ RARG, DI + REPN; SCASB + +/* + * copy the memory + */ + SUBQ $1, DI + MOVQ p2+8(FP), SI +/* + * copy whole longs, if aligned + */ + MOVQ DI, CX + ORQ SI, CX + ANDL $3, CX + JNE c3 + MOVQ BX, CX + SHRQ $2, CX + REP; MOVSL +/* + * copy the rest, by bytes + */ + ANDL $3, BX +c3: + MOVQ BX, CX + REP; MOVSB + + MOVQ RARG, AX + RET diff -Nru /sys/src/libc/amd64/strchr.s /sys/src/libc/amd64/strchr.s --- /sys/src/libc/amd64/strchr.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/strchr.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,38 @@ + TEXT strchr(SB), $0 + + MOVQ RARG, DI + MOVB c+8(FP), AX + CMPB AX, $0 + JEQ l2 /**/ + +/* + * char is not null + */ +l1: + MOVB (DI), BX + CMPB BX, $0 + JEQ ret0 + ADDQ $1, DI + CMPB AX, BX + JNE l1 + + MOVQ DI, AX + SUBQ $1, AX + RET + +/* + * char is null + */ +l2: + MOVQ $-1, CX + CLD + + REPN; SCASB + + MOVQ DI, AX + SUBQ $1, AX + RET + +ret0: + MOVQ $0, AX + RET diff -Nru /sys/src/libc/amd64/strcpy.s /sys/src/libc/amd64/strcpy.s --- /sys/src/libc/amd64/strcpy.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/strcpy.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,40 @@ + TEXT strcpy(SB),$0 + + MOVL $0, AX + MOVQ $-1, CX + CLD +/* + * find end of second string + */ + + MOVQ p2+8(FP), DI + REPN; SCASB + + MOVQ DI, BX + SUBQ p2+8(FP), BX + +/* + * copy the memory + */ + MOVQ RARG, DI + MOVQ p2+8(FP), SI +/* + * copy whole longs, if aligned + */ + MOVQ DI, CX + ORQ SI, CX + ANDL $3, CX + JNE c3 + MOVQ BX, CX + SHRQ $2, CX + REP; MOVSL +/* + * copy the rest, by bytes + */ + ANDL $3, BX +c3: + MOVL BX, CX + REP; MOVSB + + MOVQ RARG, AX + RET diff -Nru /sys/src/libc/amd64/strlen.s /sys/src/libc/amd64/strlen.s --- /sys/src/libc/amd64/strlen.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/strlen.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,16 @@ + TEXT strlen(SB),$0 + + MOVL $0, AX + MOVQ $-1, CX + CLD +/* + * look for end of string + */ + + MOVQ RARG, DI + REPN; SCASB + + MOVQ DI, AX + SUBQ RARG, AX + SUBQ $1, AX + RET diff -Nru /sys/src/libc/amd64/tas.s /sys/src/libc/amd64/tas.s --- /sys/src/libc/amd64/tas.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libc/amd64/tas.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,8 @@ +/* + * The kernel and the libc use the same constant for TAS + */ +TEXT _tas(SB),$0 + + MOVL $0xdeaddead,AX + XCHGL AX,(RARG) + RET diff -Nru /sys/src/libmp/amd64/mkfile /sys/src/libmp/amd64/mkfile --- /sys/src/libmp/amd64/mkfile Thu Jan 1 00:00:00 1970 +++ /sys/src/libmp/amd64/mkfile Thu Apr 25 00:00:00 2013 @@ -0,0 +1,20 @@ +objtype=amd64 += 2^32 * divisor */ + JHS _divovfl + CMPL BX,CX /* divisor == 0 */ + JE _divovfl + DIVL BX /* AX = DX:AX/BX */ + MOVL AX,0(DI) + RET + + /* return all 1's */ +_divovfl: + NOTL CX + MOVL CX,0(DI) + RET diff -Nru /sys/src/libmp/amd64/mpvecadd.s /sys/src/libmp/amd64/mpvecadd.s --- /sys/src/libmp/amd64/mpvecadd.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libmp/amd64/mpvecadd.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,54 @@ +/* + * mpvecadd(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *sum) + * + * sum[0:alen] = a[0:alen-1] + b[0:blen-1] + * + * prereq: alen >= blen, sum has room for alen+1 digits + */ +TEXT mpvecadd(SB),$0 + + MOVL alen+8(FP),DX + MOVL blen+24(FP),CX +/* MOVL a+0(FP),SI */ + MOVQ RARG, SI + MOVQ b+16(FP),BX + SUBL CX,DX + MOVQ sum+32(FP),DI + XORL BP,BP /* this also sets carry to 0 */ + + /* skip addition if b is zero */ + TESTL CX,CX + JZ _add1 + + /* sum[0:blen-1],carry = a[0:blen-1] + b[0:blen-1] */ +_addloop1: + MOVL (SI)(BP*4), AX + ADCL (BX)(BP*4), AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _addloop1 + +_add1: + /* jump if alen > blen */ + INCL DX + MOVL DX,CX + LOOP _addloop2 + + /* sum[alen] = carry */ +_addend: + JC _addcarry + MOVL $0,(DI)(BP*4) + RET +_addcarry: + MOVL $1,(DI)(BP*4) + RET + + /* sum[blen:alen-1],carry = a[blen:alen-1] + 0 */ +_addloop2: + MOVL (SI)(BP*4),AX + ADCL $0,AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _addloop2 + JMP _addend + diff -Nru /sys/src/libmp/amd64/mpvecdigmuladd.s /sys/src/libmp/amd64/mpvecdigmuladd.s --- /sys/src/libmp/amd64/mpvecdigmuladd.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libmp/amd64/mpvecdigmuladd.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,53 @@ +/* + * mpvecdigmul(mpdigit *b, int n, mpdigit m, mpdigit *p) + * + * p += b*m + * + * each step look like: + * hi,lo = m*b[i] + * lo += oldhi + carry + * hi += carry + * p[i] += lo + * oldhi = hi + * + * the registers are: + * hi = DX - constrained by hardware + * lo = AX - constrained by hardware + * b+n = SI - can't be BP + * p+n = DI - can't be BP + * i-n = BP + * m = BX + * oldhi = CX + * + */ +TEXT mpvecdigmuladd(SB),$0 + +/* MOVQ b+0(FP),SI */ + MOVQ RARG,SI + MOVL n+8(FP),CX + MOVL m+16(FP),BX + MOVQ p+24(FP),DI + MOVL CX,BP + NEGQ BP /* BP = -n */ + SHLL $2,CX + ADDQ CX,SI /* SI = b + n */ + ADDQ CX,DI /* DI = p + n */ + XORL CX,CX +_muladdloop: + MOVL (SI)(BP*4),AX /* lo = b[i] */ + MULL BX /* hi, lo = b[i] * m */ + ADDL CX,AX /* lo += oldhi */ + JCC _muladdnocarry1 + INCL DX /* hi += carry */ +_muladdnocarry1: + ADDL AX,(DI)(BP*4) /* p[i] += lo */ + JCC _muladdnocarry2 + INCL DX /* hi += carry */ +_muladdnocarry2: + MOVL DX,CX /* oldhi = hi */ + INCQ BP /* i++ */ + JNZ _muladdloop + XORL AX,AX + ADDL CX,(DI)(BP*4) /* p[n] + oldhi */ + ADCL AX,AX /* return carry out of p[n] */ + RET diff -Nru /sys/src/libmp/amd64/mpvecdigmulsub.s /sys/src/libmp/amd64/mpvecdigmulsub.s --- /sys/src/libmp/amd64/mpvecdigmulsub.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libmp/amd64/mpvecdigmulsub.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,53 @@ +/* + * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p) + * + * p -= b*m + * + * each step look like: + * hi,lo = m*b[i] + * lo += oldhi + carry + * hi += carry + * p[i] += lo + * oldhi = hi + * + * the registers are: + * hi = DX - constrained by hardware + * lo = AX - constrained by hardware + * b = SI - can't be BP + * p = DI - can't be BP + * i = BP + * n = CX - constrained by LOOP instr + * m = BX + * oldhi = R8 + * + */ +TEXT mpvecdigmulsub(SB),$0 + +/* MOVL b+0(FP),SI */ + MOVQ RARG,SI + MOVL n+8(FP),CX + MOVL m+16(FP),BX + MOVQ p+24(FP),DI + XORL BP,BP + MOVL BP,R8 +_mulsubloop: + MOVL (SI)(BP*4),AX /* lo = b[i] */ + MULL BX /* hi, lo = b[i] * m */ + ADDL R8,AX /* lo += oldhi */ + JCC _mulsubnocarry1 + INCL DX /* hi += carry */ +_mulsubnocarry1: + SUBL AX,(DI)(BP*4) + JCC _mulsubnocarry2 + INCL DX /* hi += carry */ +_mulsubnocarry2: + MOVL DX,R8 + INCL BP + LOOP _mulsubloop + SUBL R8,(DI)(BP*4) + JCC _mulsubnocarry3 + MOVQ $-1,AX + RET +_mulsubnocarry3: + MOVQ $1,AX + RET diff -Nru /sys/src/libmp/amd64/mpvecsub.s /sys/src/libmp/amd64/mpvecsub.s --- /sys/src/libmp/amd64/mpvecsub.s Thu Jan 1 00:00:00 1970 +++ /sys/src/libmp/amd64/mpvecsub.s Thu Apr 25 00:00:00 2013 @@ -0,0 +1,45 @@ +/* + * mpvecsub(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *diff) + * + * diff[0:alen-1] = a[0:alen-1] - b[0:blen-1] + * + * prereq: alen >= blen, diff has room for alen digits + */ +TEXT mpvecsub(SB),$0 + +/* MOVQ a+0(FP),SI */ + MOVQ RARG, SI + MOVQ b+16(FP),BX + MOVL alen+8(FP),DX + MOVL blen+24(FP),CX + MOVQ diff+32(FP),DI + SUBL CX,DX + XORL BP,BP /* this also sets carry to 0 */ + + /* skip subraction if b is zero */ + TESTL CX,CX + JZ _sub1 + + /* diff[0:blen-1],borrow = a[0:blen-1] - b[0:blen-1] */ +_subloop1: + MOVL (SI)(BP*4),AX + SBBL (BX)(BP*4),AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _subloop1 + +_sub1: + INCL DX + MOVL DX,CX + LOOP _subloop2 + RET + + /* diff[blen:alen-1] = a[blen:alen-1] - 0 */ +_subloop2: + MOVL (SI)(BP*4),AX + SBBL $0,AX + MOVL AX,(DI)(BP*4) + INCL BP + LOOP _subloop2 + RET + diff -Nru /sys/src/libsec/amd64/mkfile /sys/src/libsec/amd64/mkfile --- /sys/src/libsec/amd64/mkfile Thu Jan 1 00:00:00 1970 +++ /sys/src/libsec/amd64/mkfile Thu Apr 25 00:00:00 2013 @@ -0,0 +1,11 @@ +objtype=amd64 +