diff -Nru /n/sources/plan9/sys/src/cmd/6a/a.h /sys/src/cmd/6a/a.h --- /n/sources/plan9/sys/src/cmd/6a/a.h Mon May 23 18:57:41 2005 +++ /sys/src/cmd/6a/a.h Wed Nov 11 00:00:00 2015 @@ -156,6 +156,7 @@ void ieeedtod(Ieee*, double); int filbuf(void); Sym* getsym(void); +int isxyreg(int); void domacro(void); void macund(void); void macdef(void); diff -Nru /n/sources/plan9/sys/src/cmd/6a/a.y /sys/src/cmd/6a/a.y --- /n/sources/plan9/sys/src/cmd/6a/a.y Mon May 23 18:57:41 2005 +++ /sys/src/cmd/6a/a.y Wed Nov 11 00:00:00 2015 @@ -16,16 +16,16 @@ %left '+' '-' %left '*' '/' '%' %token LTYPE0 LTYPE1 LTYPE2 LTYPE3 LTYPE4 -%token LTYPEC LTYPED LTYPEN LTYPER LTYPET LTYPES LTYPEM LTYPEI LTYPEXC LTYPEX LTYPERT +%token LTYPEC LTYPED LTYPEN LTYPER LTYPET LTYPES LTYPEM LTYPEI LTYPEG LTYPEXC LTYPEX LTYPEY LTYPERT %token LCONST LFP LPC LSB -%token LBREG LLREG LSREG LFREG LMREG LXREG +%token LBREG LLREG LSREG LFREG LMREG LXREG LYREG %token LFCONST %token LSCONST LSP %token LNAME LLAB LVAR %type con expr pointer offset %type mem imm reg nam rel rem rim rom omem nmem -%type nonnon nonrel nonrem rimnon rimrem remrim spec10 -%type spec1 spec2 spec3 spec4 spec5 spec6 spec7 spec8 spec9 +%type nonnon nonrel nonrem rimnon rimrem remrim +%type spec1 spec2 spec3 spec4 spec5 spec6 spec7 spec8 spec9 spec10 spec11 spec12 %% prog: | prog line @@ -75,7 +75,9 @@ | LTYPEI spec7 { outcode($1, &$2); } | LTYPEXC spec8 { outcode($1, &$2); } | LTYPEX spec9 { outcode($1, &$2); } -| LTYPERT spec10 { outcode($1, &$2); } +| LTYPEG spec10 { outcode($1, &$2); } +| LTYPEY spec11 { outcode($1, &$2); } +| LTYPERT spec12 { outcode($1, &$2); } nonnon: { @@ -229,8 +231,17 @@ $$.to = $3; $$.from.offset = $5; } +| reg ',' reg ',' rem ',' con /* VCMPPS/VCMPPD */ + { + $$.from = $1; + if(!isxyreg($3.type)) + yyerror("second source operand must be X/Y register"); + $$.from.index = $3.type; + $$.to = $5; + $$.from.offset = $7; + } -spec9: /* shufl */ +spec9: /* SHUFL */ imm ',' rem ',' reg { $$.from = $3; @@ -239,8 +250,47 @@ yyerror("illegal constant"); $$.to.offset = $1.offset; } +| imm ',' rem ',' reg ',' reg + { + $$.from = $3; + $$.to = $7; + if($1.type != D_CONST) + yyerror("illegal constant"); + $$.to.offset = $1.offset; + if(!isxyreg($5.type)) + yyerror("second source operand must be X/Y register"); + $$.to.index = $5.type; + } -spec10: /* RET/RETF */ +spec10: /* GLOBL */ + mem ',' imm + { + $$.from = $1; + $$.to = $3; + } +| mem ',' con ',' imm + { + $$.from = $1; + $$.from.scale = $3; + $$.to = $5; + } + +spec11: + rimrem +| rim ',' reg ',' rem + { + $$.from = $1; + $$.to = $5; + if(isxyreg($3.type)) { + if(isxyreg($1.type)) + $$.from.index = $3.type; + else if(isxyreg($5.type)) + $$.to.index = $3.type; + } else + yyerror("second source operand must be X or Y register"); + } + +spec12: /* RET/RETF */ { $$.from = nullgen; $$.to = nullgen; @@ -268,6 +318,7 @@ } | reg | omem +| imm rim: rem @@ -333,6 +384,11 @@ $$ = nullgen; $$.type = $1; } +| LYREG + { + $$ = nullgen; + $$.type = $1; + } imm: '$' con @@ -427,6 +483,12 @@ { $$ = nullgen; $$.type = D_INDIR+D_SP; + } +| con '(' LSREG ')' + { + $$ = nullgen; + $$.type = D_INDIR+$3; + $$.offset = $1; } | '(' LLREG '*' con ')' { diff -Nru /n/sources/plan9/sys/src/cmd/6a/lex.c /sys/src/cmd/6a/lex.c --- /n/sources/plan9/sys/src/cmd/6a/lex.c Tue Dec 9 14:57:57 2008 +++ /sys/src/cmd/6a/lex.c Wed Nov 11 00:00:00 2015 @@ -121,7 +121,10 @@ setinclude(p); } else { if(systemtype(Plan9)) { - sprint(incfile,"/%s/include", thestring); + p = getenv("ccroot"); + if(p == nil) + p = ""; + snprint(incfile, sizeof(incfile), "%s/%s/include", p, thestring); setinclude(strdup(incfile)); } } @@ -206,6 +209,7 @@ "RARG", LLREG, REGARG, +/* "F0", LFREG, D_F0+0, "F1", LFREG, D_F0+1, "F2", LFREG, D_F0+2, @@ -214,6 +218,7 @@ "F5", LFREG, D_F0+5, "F6", LFREG, D_F0+6, "F7", LFREG, D_F0+7, +*/ "M0", LMREG, D_M0+0, "M1", LMREG, D_M0+1, @@ -241,6 +246,23 @@ "X14", LXREG, D_X0+14, "X15", LXREG, D_X0+15, + "Y0", LYREG, D_Y0+0, + "Y1", LYREG, D_Y0+1, + "Y2", LYREG, D_Y0+2, + "Y3", LYREG, D_Y0+3, + "Y4", LYREG, D_Y0+4, + "Y5", LYREG, D_Y0+5, + "Y6", LYREG, D_Y0+6, + "Y7", LYREG, D_Y0+7, + "Y8", LYREG, D_Y0+8, + "Y9", LYREG, D_Y0+9, + "Y10", LYREG, D_Y0+10, + "Y11", LYREG, D_Y0+11, + "Y12", LYREG, D_Y0+12, + "Y13", LYREG, D_Y0+13, + "Y14", LYREG, D_Y0+14, + "Y15", LYREG, D_Y0+15, + "CS", LSREG, D_CS, "SS", LSREG, D_SS, "DS", LSREG, D_DS, @@ -315,6 +337,8 @@ "BSRL", LTYPE3, ABSRL, "BSRQ", LTYPE3, ABSRQ, "BSRW", LTYPE3, ABSRW, + "BSWAPL", LTYPE1, ABSWAPL, + "BSWAPQ", LTYPE1, ABSWAPQ, "BTCL", LTYPE3, ABTCL, "BTCQ", LTYPE3, ABTCQ, "BTCW", LTYPE3, ABTCW, @@ -343,6 +367,7 @@ "CMPSQ", LTYPE0, ACMPSQ, "CMPSW", LTYPE0, ACMPSW, "CMPXCHG8B", LTYPE1, ACMPXCHG8B, + "CMPXCHG16B", LTYPE1, ACMPXCHG16B, "CMPXCHGB", LTYPE3, ACMPXCHGB, /* LTYPE3? */ "CMPXCHGL", LTYPE3, ACMPXCHGL, "CMPXCHGQ", LTYPE3, ACMPXCHGQ, @@ -475,6 +500,7 @@ "MOVLQZX", LTYPE3, AMOVLQZX, "MOVNTIL", LTYPE3, AMOVNTIL, "MOVNTIQ", LTYPE3, AMOVNTIQ, + "MOVQL", LTYPE3, AMOVQL, "MOVWLSX", LTYPE3, AMOVWLSX, "MOVWLZX", LTYPE3, AMOVWLZX, "MOVWQSX", LTYPE3, AMOVWQSX, @@ -506,8 +532,12 @@ "OUTSB", LTYPE0, AOUTSB, "OUTSL", LTYPE0, AOUTSL, "OUTSW", LTYPE0, AOUTSW, + "PAUSE", LTYPE0, APAUSE, "POPAL", LTYPE0, APOPAL, "POPAW", LTYPE0, APOPAW, + "POPCNTW", LTYPE3, APOPCNTW, + "POPCNTL", LTYPE3, APOPCNTL, + "POPCNTQ", LTYPE3, APOPCNTQ, "POPFL", LTYPE0, APOPFL, "POPFQ", LTYPE0, APOPFQ, "POPFW", LTYPE0, APOPFW, @@ -780,75 +810,77 @@ "FYL2X", LTYPE0, AFYL2X, "FYL2XP1", LTYPE0, AFYL2XP1, - "ADDPD", LTYPE3, AADDPD, - "ADDPS", LTYPE3, AADDPS, - "ADDSD", LTYPE3, AADDSD, - "ADDSS", LTYPE3, AADDSS, - "ANDNPD", LTYPE3, AANDNPD, - "ANDNPS", LTYPE3, AANDNPS, - "ANDPD", LTYPE3, AANDPD, - "ANDPS", LTYPE3, AANDPS, + "ADDPD", LTYPEY, AADDPD, + "ADDPS", LTYPEY, AADDPS, + "ADDSD", LTYPEY, AADDSD, + "ADDSS", LTYPEY, AADDSS, + "ANDNPD", LTYPEY, AANDNPD, + "ANDNPS", LTYPEY, AANDNPS, + "ANDPD", LTYPEY, AANDPD, + "ANDPS", LTYPEY, AANDPS, "CMPPD", LTYPEXC,ACMPPD, "CMPPS", LTYPEXC,ACMPPS, "CMPSD", LTYPEXC,ACMPSD, "CMPSS", LTYPEXC,ACMPSS, - "COMISD", LTYPE3, ACOMISD, - "COMISS", LTYPE3, ACOMISS, - "CVTPL2PD", LTYPE3, ACVTPL2PD, - "CVTPL2PS", LTYPE3, ACVTPL2PS, - "CVTPD2PL", LTYPE3, ACVTPD2PL, - "CVTPD2PS", LTYPE3, ACVTPD2PS, - "CVTPS2PL", LTYPE3, ACVTPS2PL, - "PF2IW", LTYPE3, APF2IW, - "PF2IL", LTYPE3, APF2IL, - "PF2ID", LTYPE3, APF2IL, /* syn */ - "PI2FL", LTYPE3, API2FL, - "PI2FD", LTYPE3, API2FL, /* syn */ - "PI2FW", LTYPE3, API2FW, - "CVTPS2PD", LTYPE3, ACVTPS2PD, - "CVTSD2SL", LTYPE3, ACVTSD2SL, - "CVTSD2SQ", LTYPE3, ACVTSD2SQ, - "CVTSD2SS", LTYPE3, ACVTSD2SS, - "CVTSL2SD", LTYPE3, ACVTSL2SD, - "CVTSQ2SD", LTYPE3, ACVTSQ2SD, - "CVTSL2SS", LTYPE3, ACVTSL2SS, - "CVTSQ2SS", LTYPE3, ACVTSQ2SS, - "CVTSS2SD", LTYPE3, ACVTSS2SD, - "CVTSS2SL", LTYPE3, ACVTSS2SL, - "CVTSS2SQ", LTYPE3, ACVTSS2SQ, - "CVTTPD2PL", LTYPE3, ACVTTPD2PL, - "CVTTPS2PL", LTYPE3, ACVTTPS2PL, - "CVTTSD2SL", LTYPE3, ACVTTSD2SL, - "CVTTSD2SQ", LTYPE3, ACVTTSD2SQ, - "CVTTSS2SL", LTYPE3, ACVTTSS2SL, - "CVTTSS2SQ", LTYPE3, ACVTTSS2SQ, - "DIVPD", LTYPE3, ADIVPD, - "DIVPS", LTYPE3, ADIVPS, - "DIVSD", LTYPE3, ADIVSD, - "DIVSS", LTYPE3, ADIVSS, + "COMISD", LTYPEY, ACOMISD, + "COMISS", LTYPEY, ACOMISS, + "CVTPL2PD", LTYPEY, ACVTPL2PD, + "CVTPL2PS", LTYPEY, ACVTPL2PS, + "CVTPD2PL", LTYPEY, ACVTPD2PL, + "CVTPD2PS", LTYPEY, ACVTPD2PS, + "CVTPS2PL", LTYPEY, ACVTPS2PL, + "PF2IW", LTYPEY, APF2IW, + "PF2IL", LTYPEY, APF2IL, + "PF2ID", LTYPEY, APF2IL, /* syn */ + "PI2FL", LTYPEY, API2FL, + "PI2FD", LTYPEY, API2FL, /* syn */ + "PI2FW", LTYPEY, API2FW, + "CVTPS2PD", LTYPEY, ACVTPS2PD, + "CVTSD2SL", LTYPEY, ACVTSD2SL, + "CVTSD2SQ", LTYPEY, ACVTSD2SQ, + "CVTSD2SS", LTYPEY, ACVTSD2SS, + "CVTSL2SD", LTYPEY, ACVTSL2SD, + "CVTSQ2SD", LTYPEY, ACVTSQ2SD, + "CVTSL2SS", LTYPEY, ACVTSL2SS, + "CVTSQ2SS", LTYPEY, ACVTSQ2SS, + "CVTSS2SD", LTYPEY, ACVTSS2SD, + "CVTSS2SL", LTYPEY, ACVTSS2SL, + "CVTSS2SQ", LTYPEY, ACVTSS2SQ, + "CVTTPD2PL", LTYPEY, ACVTTPD2PL, + "CVTTPS2PL", LTYPEY, ACVTTPS2PL, + "CVTTSD2SL", LTYPEY, ACVTTSD2SL, + "CVTTSD2SQ", LTYPEY, ACVTTSD2SQ, + "CVTTSS2SL", LTYPEY, ACVTTSS2SL, + "CVTTSS2SQ", LTYPEY, ACVTTSS2SQ, + "DIVPD", LTYPEY, ADIVPD, + "DIVPS", LTYPEY, ADIVPS, + "DIVSD", LTYPEY, ADIVSD, + "DIVSS", LTYPEY, ADIVSS, "FXRSTOR", LTYPE2, AFXRSTOR, "FXRSTOR64", LTYPE2, AFXRSTOR64, "FXSAVE", LTYPE1, AFXSAVE, "FXSAVE64", LTYPE1, AFXSAVE64, "LDMXCSR", LTYPE2, ALDMXCSR, - "MASKMOVOU", LTYPE3, AMASKMOVOU, - "MASKMOVDQU", LTYPE3, AMASKMOVOU, /* syn */ + "MASKMOVDQU", LTYPE3, AMASKMOVDQU, "MASKMOVQ", LTYPE3, AMASKMOVQ, - "MAXPD", LTYPE3, AMAXPD, - "MAXPS", LTYPE3, AMAXPS, - "MAXSD", LTYPE3, AMAXSD, - "MAXSS", LTYPE3, AMAXSS, - "MINPD", LTYPE3, AMINPD, - "MINPS", LTYPE3, AMINPS, - "MINSD", LTYPE3, AMINSD, - "MINSS", LTYPE3, AMINSS, + "MAXPD", LTYPEY, AMAXPD, + "MAXPS", LTYPEY, AMAXPS, + "MAXSD", LTYPEY, AMAXSD, + "MAXSS", LTYPEY, AMAXSS, + "MINPD", LTYPEY, AMINPD, + "MINPS", LTYPEY, AMINPS, + "MINSD", LTYPEY, AMINSD, + "MINSS", LTYPEY, AMINSS, "MOVAPD", LTYPE3, AMOVAPD, "MOVAPS", LTYPE3, AMOVAPS, "MOVD", LTYPE3, AMOVQ, /* syn */ "MOVDQ2Q", LTYPE3, AMOVQ, /* syn */ "MOVO", LTYPE3, AMOVO, - "MOVOA", LTYPE3, AMOVO, /* syn */ - "MOVOU", LTYPE3, AMOVOU, + "MOVOA", LTYPE3, AMOVDQA, /* syn */ + "MOVDQA", LTYPE3, AMOVDQA, + "MOVDQU", LTYPE3, AMOVDQU, + "MOVQQA", LTYPE3, AMOVQQA, + "MOVQQU", LTYPE3, AMOVQQU, "MOVHLPS", LTYPE3, AMOVHLPS, "MOVHPD", LTYPE3, AMOVHPD, "MOVHPS", LTYPE3, AMOVHPS, @@ -857,8 +889,7 @@ "MOVLPS", LTYPE3, AMOVLPS, "MOVMSKPD", LTYPE3, AMOVMSKPD, "MOVMSKPS", LTYPE3, AMOVMSKPS, - "MOVNTO", LTYPE3, AMOVNTO, - "MOVNTDQ", LTYPE3, AMOVNTO, /* syn */ + "MOVNTDQ", LTYPE3, AMOVNTDQ, "MOVNTPD", LTYPE3, AMOVNTPD, "MOVNTPS", LTYPE3, AMOVNTPS, "MOVNTQ", LTYPE3, AMOVNTQ, @@ -867,115 +898,472 @@ "MOVSS", LTYPE3, AMOVSS, "MOVUPD", LTYPE3, AMOVUPD, "MOVUPS", LTYPE3, AMOVUPS, - "MULPD", LTYPE3, AMULPD, - "MULPS", LTYPE3, AMULPS, - "MULSD", LTYPE3, AMULSD, - "MULSS", LTYPE3, AMULSS, - "ORPD", LTYPE3, AORPD, - "ORPS", LTYPE3, AORPS, - "PACKSSLW", LTYPE3, APACKSSLW, - "PACKSSWB", LTYPE3, APACKSSWB, - "PACKUSWB", LTYPE3, APACKUSWB, - "PADDB", LTYPE3, APADDB, - "PADDL", LTYPE3, APADDL, - "PADDQ", LTYPE3, APADDQ, - "PADDSB", LTYPE3, APADDSB, - "PADDSW", LTYPE3, APADDSW, - "PADDUSB", LTYPE3, APADDUSB, - "PADDUSW", LTYPE3, APADDUSW, - "PADDW", LTYPE3, APADDW, - "PAND", LTYPE3, APAND, - "PANDB", LTYPE3, APANDB, - "PANDL", LTYPE3, APANDL, - "PANDSB", LTYPE3, APANDSB, - "PANDSW", LTYPE3, APANDSW, - "PANDUSB", LTYPE3, APANDUSB, - "PANDUSW", LTYPE3, APANDUSW, - "PANDW", LTYPE3, APANDW, - "PANDN", LTYPE3, APANDN, - "PAVGB", LTYPE3, APAVGB, - "PAVGW", LTYPE3, APAVGW, - "PCMPEQB", LTYPE3, APCMPEQB, - "PCMPEQL", LTYPE3, APCMPEQL, - "PCMPEQW", LTYPE3, APCMPEQW, - "PCMPGTB", LTYPE3, APCMPGTB, - "PCMPGTL", LTYPE3, APCMPGTL, - "PCMPGTW", LTYPE3, APCMPGTW, + "MULPD", LTYPEY, AMULPD, + "MULPS", LTYPEY, AMULPS, + "MULSD", LTYPEY, AMULSD, + "MULSS", LTYPEY, AMULSS, + "ORPD", LTYPEY, AORPD, + "ORPS", LTYPEY, AORPS, + "PACKSSLW", LTYPEY, APACKSSLW, + "PACKSSWB", LTYPEY, APACKSSWB, + "PACKUSWB", LTYPEY, APACKUSWB, + "PADDB", LTYPEY, APADDB, + "PADDL", LTYPEY, APADDL, + "PADDQ", LTYPEY, APADDQ, + "PADDSB", LTYPEY, APADDSB, + "PADDSW", LTYPEY, APADDSW, + "PADDUSB", LTYPEY, APADDUSB, + "PADDUSW", LTYPEY, APADDUSW, + "PADDW", LTYPEY, APADDW, + "PAND", LTYPEY, APAND, + "PANDB", LTYPEY, APANDB, + "PANDL", LTYPEY, APANDL, + "PANDSB", LTYPEY, APANDSB, + "PANDSW", LTYPEY, APANDSW, + "PANDUSB", LTYPEY, APANDUSB, + "PANDUSW", LTYPEY, APANDUSW, + "PANDW", LTYPEY, APANDW, + "PANDN", LTYPEY, APANDN, + "PAVGB", LTYPEY, APAVGB, + "PAVGW", LTYPEY, APAVGW, + "PCMPEQB", LTYPEY, APCMPEQB, + "PCMPEQL", LTYPEY, APCMPEQL, + "PCMPEQW", LTYPEY, APCMPEQW, + "PCMPGTB", LTYPEY, APCMPGTB, + "PCMPGTL", LTYPEY, APCMPGTL, + "PCMPGTW", LTYPEY, APCMPGTW, "PEXTRW", LTYPEX, APEXTRW, "PINSRW", LTYPEX, APINSRW, - "PMADDWL", LTYPE3, APMADDWL, - "PMAXSW", LTYPE3, APMAXSW, - "PMAXUB", LTYPE3, APMAXUB, - "PMINSW", LTYPE3, APMINSW, - "PMINUB", LTYPE3, APMINUB, + "PMADDWL", LTYPEY, APMADDWL, + "PMAXSW", LTYPEY, APMAXSW, + "PMAXUB", LTYPEY, APMAXUB, + "PMINSW", LTYPEY, APMINSW, + "PMINUB", LTYPEY, APMINUB, "PMOVMSKB", LTYPE3, APMOVMSKB, - "PMULHRW", LTYPE3, APMULHRW, - "PMULHUW", LTYPE3, APMULHUW, - "PMULHW", LTYPE3, APMULHW, - "PMULLW", LTYPE3, APMULLW, - "PMULULQ", LTYPE3, APMULULQ, - "POR", LTYPE3, APOR, - "PSADBW", LTYPE3, APSADBW, + "PMULHRW", LTYPEY, APMULHRW, + "PMULHUW", LTYPEY, APMULHUW, + "PMULHW", LTYPEY, APMULHW, + "PMULLW", LTYPEY, APMULLW, + "PMULULQ", LTYPEY, APMULULQ, + "POR", LTYPEY, APOR, + "PSADBW", LTYPEY, APSADBW, "PSHUFHW", LTYPEX, APSHUFHW, "PSHUFL", LTYPEX, APSHUFL, "PSHUFLW", LTYPEX, APSHUFLW, "PSHUFW", LTYPEX, APSHUFW, - "PSLLO", LTYPE3, APSLLO, - "PSLLDQ", LTYPE3, APSLLO, /* syn */ - "PSLLL", LTYPE3, APSLLL, - "PSLLQ", LTYPE3, APSLLQ, - "PSLLW", LTYPE3, APSLLW, - "PSRAL", LTYPE3, APSRAL, - "PSRAW", LTYPE3, APSRAW, - "PSRLO", LTYPE3, APSRLO, - "PSRLDQ", LTYPE3, APSRLO, /* syn */ - "PSRLL", LTYPE3, APSRLL, - "PSRLQ", LTYPE3, APSRLQ, - "PSRLW", LTYPE3, APSRLW, - "PSUBB", LTYPE3, APSUBB, - "PSUBL", LTYPE3, APSUBL, - "PSUBQ", LTYPE3, APSUBQ, - "PSUBSB", LTYPE3, APSUBSB, - "PSUBSW", LTYPE3, APSUBSW, - "PSUBUSB", LTYPE3, APSUBUSB, - "PSUBUSW", LTYPE3, APSUBUSW, - "PSUBW", LTYPE3, APSUBW, - "PUNPCKHBW", LTYPE3, APUNPCKHBW, - "PUNPCKHLQ", LTYPE3, APUNPCKHLQ, - "PUNPCKHQDQ", LTYPE3, APUNPCKHQDQ, - "PUNPCKHWL", LTYPE3, APUNPCKHWL, - "PUNPCKLBW", LTYPE3, APUNPCKLBW, - "PUNPCKLLQ", LTYPE3, APUNPCKLLQ, - "PUNPCKLQDQ", LTYPE3, APUNPCKLQDQ, - "PUNPCKLWL", LTYPE3, APUNPCKLWL, - "PXOR", LTYPE3, APXOR, - "RCPPS", LTYPE3, ARCPPS, - "RCPSS", LTYPE3, ARCPSS, - "RSQRTPS", LTYPE3, ARSQRTPS, - "RSQRTSS", LTYPE3, ARSQRTSS, + "PSLLDQ", LTYPEY, APSLLDQ, + "PSLLL", LTYPEY, APSLLL, + "PSLLQ", LTYPEY, APSLLQ, + "PSLLW", LTYPEY, APSLLW, + "PSRAL", LTYPEY, APSRAL, + "PSRAW", LTYPEY, APSRAW, + "PSRLDQ", LTYPEY, APSRLDQ, + "PSRLL", LTYPEY, APSRLL, + "PSRLQ", LTYPEY, APSRLQ, + "PSRLW", LTYPEY, APSRLW, + "PSUBB", LTYPEY, APSUBB, + "PSUBL", LTYPEY, APSUBL, + "PSUBQ", LTYPEY, APSUBQ, + "PSUBSB", LTYPEY, APSUBSB, + "PSUBSW", LTYPEY, APSUBSW, + "PSUBUSB", LTYPEY, APSUBUSB, + "PSUBUSW", LTYPEY, APSUBUSW, + "PSUBW", LTYPEY, APSUBW, + "PUNPCKHBW", LTYPEY, APUNPCKHBW, + "PUNPCKHLQ", LTYPEY, APUNPCKHLQ, + "PUNPCKHQDQ", LTYPEY, APUNPCKHQDQ, + "PUNPCKHWL", LTYPEY, APUNPCKHWL, + "PUNPCKLBW", LTYPEY, APUNPCKLBW, + "PUNPCKLLQ", LTYPEY, APUNPCKLLQ, + "PUNPCKLQDQ", LTYPEY, APUNPCKLQDQ, + "PUNPCKLWL", LTYPEY, APUNPCKLWL, + "PXOR", LTYPEY, APXOR, + "RCPPS", LTYPEY, ARCPPS, + "RCPSS", LTYPEY, ARCPSS, + "RSQRTPS", LTYPEY, ARSQRTPS, + "RSQRTSS", LTYPEY, ARSQRTSS, "SHUFPD", LTYPEX, ASHUFPD, "SHUFPS", LTYPEX, ASHUFPS, - "SQRTPD", LTYPE3, ASQRTPD, - "SQRTPS", LTYPE3, ASQRTPS, - "SQRTSD", LTYPE3, ASQRTSD, - "SQRTSS", LTYPE3, ASQRTSS, + "SQRTPD", LTYPEY, ASQRTPD, + "SQRTPS", LTYPEY, ASQRTPS, + "SQRTSD", LTYPEY, ASQRTSD, + "SQRTSS", LTYPEY, ASQRTSS, "STMXCSR", LTYPE1, ASTMXCSR, - "SUBPD", LTYPE3, ASUBPD, - "SUBPS", LTYPE3, ASUBPS, - "SUBSD", LTYPE3, ASUBSD, - "SUBSS", LTYPE3, ASUBSS, - "UCOMISD", LTYPE3, AUCOMISD, - "UCOMISS", LTYPE3, AUCOMISS, - "UNPCKHPD", LTYPE3, AUNPCKHPD, - "UNPCKHPS", LTYPE3, AUNPCKHPS, - "UNPCKLPD", LTYPE3, AUNPCKLPD, - "UNPCKLPS", LTYPE3, AUNPCKLPS, - "XORPD", LTYPE3, AXORPD, - "XORPS", LTYPE3, AXORPS, - + "SUBPD", LTYPEY, ASUBPD, + "SUBPS", LTYPEY, ASUBPS, + "SUBSD", LTYPEY, ASUBSD, + "SUBSS", LTYPEY, ASUBSS, + "UCOMISD", LTYPEY, AUCOMISD, + "UCOMISS", LTYPEY, AUCOMISS, + "UNPCKHPD", LTYPEY, AUNPCKHPD, + "UNPCKHPS", LTYPEY, AUNPCKHPS, + "UNPCKLPD", LTYPEY, AUNPCKLPD, + "UNPCKLPS", LTYPEY, AUNPCKLPS, + "XORPD", LTYPEY, AXORPD, + "XORPS", LTYPEY, AXORPS, + "XSAVE", LTYPE1, AXSAVE, + "XSAVEOPT", LTYPE1, AXSAVEOPT, + "XRSTOR", LTYPE2, AXRSTOR, + + /* sse/vex */ + "AESDEC", LTYPEY, AAESDEC, + "AESDECLAST", LTYPEY, AAESDECLAST, + "AESENC", LTYPEY, AAESENC, + "AESENCLAST", LTYPEY, AAESENCLAST, + "AESIMC", LTYPEY, AAESIMC, + "AESKEYGENASSIST", LTYPEY, AAESKEYGENASSIST, + "BLENDPD", LTYPEY, ABLENDPD, + "BLENDPS", LTYPEY, ABLENDPS, + "BLENDVPD", LTYPEY, ABLENDVPD, + "BLENDVPS", LTYPEY, ABLENDVPS, + "CRC32B", LTYPE3, ACRC32B, + "CRC32L", LTYPE3, ACRC32L, + "CRC32Q", LTYPE3, ACRC32Q, + "CRC32W", LTYPE3, ACRC32W, + "DPPD", LTYPEY, ADPPD, + "DPPS", LTYPEY, ADPPS, + "EXTRACTPS", LTYPEY, AEXTRACTPS, + "INSERTPS", LTYPEY, AINSERTPS, + "MOVNTDQA", LTYPEY, AMOVNTDQA, + "MPSADBW", LTYPEY, AMPSADBW, + "PABSB", LTYPEY, APABSB, + "PABSL", LTYPEY, APABSL, + "PABSD", LTYPEY, APABSL, /* syn */ + "PABSW", LTYPEY, APABSW, + "PACKUSDW", LTYPEY, APACKUSDW, + "PALIGNR", LTYPEX, APALIGNR, + "PBLENDVB", LTYPEY, APBLENDVB, + "PBLENDW", LTYPEY, APBLENDW, + "PCLMULHQHQDQ", LTYPEY, APCLMULHQHQDQ, + "PCLMULHQLQDQ", LTYPEY, APCLMULHQLQDQ, + "PCLMULLQHQDQ", LTYPEY, APCLMULLQHQDQ, + "PCLMULLQLQDQ", LTYPEY, APCLMULLQLQDQ, + "PCLMULQDQ", LTYPEY, APCLMULQDQ, + "PCMPEQQ", LTYPEY, APCMPEQQ, + "PCMPESTRI", LTYPEY, APCMPESTRI, + "PCMPESTRM", LTYPEY, APCMPESTRM, + "PCMPGTQ", LTYPEY, APCMPGTQ, + "PCMPISTRI", LTYPEY, APCMPISTRI, + "PCMPISTRM", LTYPEY, APCMPISTRM, + "PEXTRB", LTYPEY, APEXTRB, + "PEXTRL", LTYPEY, APEXTRL, + "PHADDL", LTYPEY, APHADDL, + "PHADDSW", LTYPEY, APHADDSW, + "PHADDW", LTYPEY, APHADDW, + "PHMINPOSUW", LTYPEY, APHMINPOSUW, + "PHSUBL", LTYPEY, APHSUBL, + "PHSUBSW", LTYPEY, APHSUBSW, + "PHSUBW", LTYPEY, APHSUBW, + "PINSRB", LTYPEY, APINSRB, + "PINSRL", LTYPEY, APINSRL, + "PMADDUBSW", LTYPEY, APMADDUBSW, + "PMAXSB", LTYPEY, APMAXSB, + "PMAXSL", LTYPEY, APMAXSL, + "PMAXUL", LTYPEY, APMAXUL, + "PMAXUW", LTYPEY, APMAXUW, + "PMINSB", LTYPEY, APMINSB, + "PMINSL", LTYPEY, APMINSL, + "PMINUL", LTYPEY, APMINUL, + "PMINUW", LTYPEY, APMINUW, + "PMOVSXBL", LTYPEY, APMOVSXBL, + "PMOVSXBQ", LTYPEY, APMOVSXBQ, + "PMOVSXBW", LTYPEY, APMOVSXBW, + "PMOVSXLQ", LTYPEY, APMOVSXLQ, + "PMOVSXWL", LTYPEY, APMOVSXWL, + "PMOVSXWQ", LTYPEY, APMOVSXWQ, + "PMOVZXBL", LTYPEY, APMOVZXBL, + "PMOVZXBQ", LTYPEY, APMOVZXBQ, + "PMOVZXBW", LTYPEY, APMOVZXBW, + "PMOVZXLQ", LTYPEY, APMOVZXLQ, + "PMOVZXWL", LTYPEY, APMOVZXWL, + "PMOVZXWQ", LTYPEY, APMOVZXWQ, + "PMULHRSW", LTYPEY, APMULHRSW, + "PMULLL", LTYPEY, APMULLL, + "PMULLQ", LTYPEY, APMULLQ, + "PSHUFB", LTYPEY, APSHUFB, + "PSIGNB", LTYPEY, APSIGNB, + "PSIGNL", LTYPEY, APSIGNL, + "PSIGNW", LTYPEY, APSIGNW, + "PTEST", LTYPEY, APTEST, + "ROUNDPD", LTYPEY, AROUNDPD, + "ROUNDPS", LTYPEY, AROUNDPS, + "ROUNDSD", LTYPEY, AROUNDSD, + "ROUNDSS", LTYPEY, AROUNDSS, + + /* vex only */ + "VBROADCASTF128", LTYPEY, AVBROADCASTF128, + "VBROADCASTSL", LTYPEY, AVBROADCASTSL, + "VBROADCASTSS", LTYPEY, AVBROADCASTSS, + "VMASKMOVPD", LTYPEY, AVMASKMOVPD, + "VMASKMOVPS", LTYPEY, AVMASKMOVPS, + "VPERMILPD", LTYPEY, AVPERMILPD, + "VPERMILPS", LTYPEY, AVPERMILPS, + "VTESTPD", LTYPEY, AVTESTPD, + "VTESTPS", LTYPEY, AVTESTPS, + + /* V* synonyms */ + "VADDPD", LTYPEY, AADDPD, + "VADDPS", LTYPEY, AADDPS, + "VADDSD", LTYPEY, AADDSD, + "VADDSS", LTYPEY, AADDSS, + "VANDNPD", LTYPEY, AANDNPD, + "VANDNPS", LTYPEY, AANDNPS, + "VANDPD", LTYPEY, AANDPD, + "VANDPS", LTYPEY, AANDPS, + "VCMPPD", LTYPEXC,ACMPPD, + "VCMPPS", LTYPEXC,ACMPPS, + "VCMPSD", LTYPEXC,ACMPSD, + "VCMPSS", LTYPEXC,ACMPSS, + "VCOMISD", LTYPEY, ACOMISD, + "VCOMISS", LTYPEY, ACOMISS, + "VCVTPL2PD", LTYPEY, ACVTPL2PD, + "VCVTPL2PS", LTYPEY, ACVTPL2PS, + "VCVTPD2PL", LTYPEY, ACVTPD2PL, + "VCVTPD2PS", LTYPEY, ACVTPD2PS, + "VCVTPS2PL", LTYPEY, ACVTPS2PL, + "VCVTPS2PD", LTYPEY, ACVTPS2PD, + "VCVTSD2SL", LTYPEY, ACVTSD2SL, + "VCVTSD2SQ", LTYPEY, ACVTSD2SQ, + "VCVTSD2SS", LTYPEY, ACVTSD2SS, + "VCVTSL2SD", LTYPEY, ACVTSL2SD, + "VCVTSQ2SD", LTYPEY, ACVTSQ2SD, + "VCVTSL2SS", LTYPEY, ACVTSL2SS, + "VCVTSQ2SS", LTYPEY, ACVTSQ2SS, + "VCVTSS2SD", LTYPEY, ACVTSS2SD, + "VCVTSS2SL", LTYPEY, ACVTSS2SL, + "VCVTSS2SQ", LTYPEY, ACVTSS2SQ, + "VCVTTPD2PL", LTYPEY, ACVTTPD2PL, + "VCVTTPS2PL", LTYPEY, ACVTTPS2PL, + "VCVTTSD2SL", LTYPEY, ACVTTSD2SL, + "VCVTTSS2SL", LTYPEY, ACVTTSS2SL, + "VDIVPD", LTYPEY, ADIVPD, + "VDIVPS", LTYPEY, ADIVPS, + "VDIVSD", LTYPEY, ADIVSD, + "VDIVSS", LTYPEY, ADIVSS, + "VLDMXCSR", LTYPE2, ALDMXCSR, + "VMASKMOVDQU", LTYPE3, AMASKMOVDQU, + "VMASKMOVQ", LTYPE3, AMASKMOVQ, + "VMAXPD", LTYPEY, AMAXPD, + "VMAXPS", LTYPEY, AMAXPS, + "VMAXSD", LTYPEY, AMAXSD, + "VMAXSS", LTYPEY, AMAXSS, + "VMINPD", LTYPEY, AMINPD, + "VMINPS", LTYPEY, AMINPS, + "VMINSD", LTYPEY, AMINSD, + "VMINSS", LTYPEY, AMINSS, + "VMOVAPD", LTYPE3, AMOVAPD, + "VMOVAPS", LTYPE3, AMOVAPS, + "VMOVD", LTYPE3, AMOVL, /* syn */ + "VMOVDQA", LTYPE3, AMOVDQA, + "VMOVDQU", LTYPE3, AMOVDQU, + "VMOVQ", LTYPE3, AMOVQ, + "VMOVQQA", LTYPE3, AMOVQQA, + "VMOVQQU", LTYPE3, AMOVQQU, + "VMOVHLPS", LTYPE3, AMOVHLPS, + "VMOVHPD", LTYPE3, AMOVHPD, + "VMOVHPS", LTYPE3, AMOVHPS, + "VMOVLHPS", LTYPE3, AMOVLHPS, + "VMOVLPD", LTYPE3, AMOVLPD, + "VMOVLPS", LTYPE3, AMOVLPS, + "VMOVMSKPD", LTYPE3, AMOVMSKPD, + "VMOVMSKPS", LTYPE3, AMOVMSKPS, + "VMOVNTDQ", LTYPE3, AMOVNTDQ, + "VMOVNTPD", LTYPE3, AMOVNTPD, + "VMOVNTPS", LTYPE3, AMOVNTPS, + "VMOVNTQ", LTYPE3, AMOVNTQ, + "VMOVSD", LTYPE3, AMOVSD, + "VMOVSS", LTYPE3, AMOVSS, + "VMOVUPD", LTYPE3, AMOVUPD, + "VMOVUPS", LTYPE3, AMOVUPS, + "VMULPD", LTYPEY, AMULPD, + "VMULPS", LTYPEY, AMULPS, + "VMULSD", LTYPEY, AMULSD, + "VMULSS", LTYPEY, AMULSS, + "VORPD", LTYPEY, AORPD, + "VORPS", LTYPEY, AORPS, + "VPACKSSLW", LTYPEY, APACKSSLW, + "VPACKSSWB", LTYPEY, APACKSSWB, + "VPACKUSWB", LTYPEY, APACKUSWB, + "VPADDB", LTYPEY, APADDB, + "VPADDL", LTYPEY, APADDL, + "VPADDQ", LTYPEY, APADDQ, + "VPADDSB", LTYPEY, APADDSB, + "VPADDSW", LTYPEY, APADDSW, + "VPADDUSB", LTYPEY, APADDUSB, + "VPADDUSW", LTYPEY, APADDUSW, + "VPADDW", LTYPEY, APADDW, + "VPAND", LTYPEY, APAND, + "VPANDB", LTYPEY, APANDB, + "VPANDL", LTYPEY, APANDL, + "VPANDSB", LTYPEY, APANDSB, + "VPANDSW", LTYPEY, APANDSW, + "VPANDUSB", LTYPEY, APANDUSB, + "VPANDUSW", LTYPEY, APANDUSW, + "VPANDW", LTYPEY, APANDW, + "VPANDN", LTYPEY, APANDN, + "VPAVGB", LTYPEY, APAVGB, + "VPAVGW", LTYPEY, APAVGW, + "VPCMPEQB", LTYPEY, APCMPEQB, + "VPCMPEQL", LTYPEY, APCMPEQL, + "VPCMPEQW", LTYPEY, APCMPEQW, + "VPCMPGTB", LTYPEY, APCMPGTB, + "VPCMPGTL", LTYPEY, APCMPGTL, + "VPCMPGTW", LTYPEY, APCMPGTW, + "VPEXTRW", LTYPEX, APEXTRW, + "VPINSRW", LTYPEX, APINSRW, + "VPMADDWL", LTYPEY, APMADDWL, + "VPMAXSW", LTYPEY, APMAXSW, + "VPMAXUB", LTYPEY, APMAXUB, + "VPMINSW", LTYPEY, APMINSW, + "VPMINUB", LTYPEY, APMINUB, + "VPMOVMSKB", LTYPE3, APMOVMSKB, + "VPMULHUW", LTYPEY, APMULHUW, + "VPMULHW", LTYPEY, APMULHW, + "VPMULLW", LTYPEY, APMULLW, + "VPMULULQ", LTYPEY, APMULULQ, + "VPOR", LTYPEY, APOR, + "VPSADBW", LTYPEY, APSADBW, + "VPSHUFHW", LTYPEX, APSHUFHW, + "VPSHUFL", LTYPEX, APSHUFL, + "VPSHUFLW", LTYPEX, APSHUFLW, + "VPSLLDQ", LTYPEY, APSLLDQ, + "VPSLLL", LTYPEY, APSLLL, + "VPSLLQ", LTYPEY, APSLLQ, + "VPSLLW", LTYPEY, APSLLW, + "VPSRAL", LTYPEY, APSRAL, + "VPSRAW", LTYPEY, APSRAW, + "VPSRLDQ", LTYPEY, APSRLDQ, + "VPSRLL", LTYPEY, APSRLL, + "VPSRLQ", LTYPEY, APSRLQ, + "VPSRLW", LTYPEY, APSRLW, + "VPSUBB", LTYPEY, APSUBB, + "VPSUBL", LTYPEY, APSUBL, + "VPSUBQ", LTYPEY, APSUBQ, + "VPSUBSB", LTYPEY, APSUBSB, + "VPSUBSW", LTYPEY, APSUBSW, + "VPSUBUSB", LTYPEY, APSUBUSB, + "VPSUBUSW", LTYPEY, APSUBUSW, + "VPSUBW", LTYPEY, APSUBW, + "VPUNPCKHBW", LTYPEY, APUNPCKHBW, + "VPUNPCKHLQ", LTYPEY, APUNPCKHLQ, + "VPUNPCKHQDQ", LTYPEY, APUNPCKHQDQ, + "VPUNPCKHWL", LTYPEY, APUNPCKHWL, + "VPUNPCKLBW", LTYPEY, APUNPCKLBW, + "VPUNPCKLLQ", LTYPEY, APUNPCKLLQ, + "VPUNPCKLQDQ", LTYPEY, APUNPCKLQDQ, + "VPUNPCKLWL", LTYPEY, APUNPCKLWL, + "VPXOR", LTYPEY, APXOR, + "VRCPPS", LTYPEY, ARCPPS, + "VRCPSS", LTYPEY, ARCPSS, + "VRSQRTPS", LTYPEY, ARSQRTPS, + "VRSQRTSS", LTYPEY, ARSQRTSS, + "VSHUFPD", LTYPEX, ASHUFPD, + "VSHUFPS", LTYPEX, ASHUFPS, + "VSQRTPD", LTYPEY, ASQRTPD, + "VSQRTPS", LTYPEY, ASQRTPS, + "VSQRTSD", LTYPEY, ASQRTSD, + "VSQRTSS", LTYPEY, ASQRTSS, + "VSTMXCSR", LTYPE1, ASTMXCSR, + "VSUBPD", LTYPEY, ASUBPD, + "VSUBPS", LTYPEY, ASUBPS, + "VSUBSD", LTYPEY, ASUBSD, + "VSUBSS", LTYPEY, ASUBSS, + "VUCOMISD", LTYPEY, AUCOMISD, + "VUCOMISS", LTYPEY, AUCOMISS, + "VUNPCKHPD", LTYPEY, AUNPCKHPD, + "VUNPCKHPS", LTYPEY, AUNPCKHPS, + "VUNPCKLPD", LTYPEY, AUNPCKLPD, + "VUNPCKLPS", LTYPEY, AUNPCKLPS, + "VXORPD", LTYPEY, AXORPD, + "VXORPS", LTYPEY, AXORPS, + "VAESDEC", LTYPEY, AAESDEC, + "VAESDECLAST", LTYPEY, AAESDECLAST, + "VAESENC", LTYPEY, AAESENC, + "VAESENCLAST", LTYPEY, AAESENCLAST, + "VAESIMC", LTYPEY, AAESIMC, + "VAESKEYGENASSIST", LTYPEY, AAESKEYGENASSIST, + "VBLENDPD", LTYPEY, ABLENDPD, + "VBLENDPS", LTYPEY, ABLENDPS, + "VBLENDVPD", LTYPEY, ABLENDVPD, + "VBLENDVPS", LTYPEY, ABLENDVPS, + "VDPPD", LTYPEY, ADPPD, + "VDPPS", LTYPEY, ADPPS, + "VEXTRACTPS", LTYPEY, AEXTRACTPS, + "VINSERTPS", LTYPEY, AINSERTPS, + "VMOVNTDQA", LTYPEY, AMOVNTDQA, + "VMPSADBW", LTYPEY, AMPSADBW, + "VPABSB", LTYPEY, APABSB, + "VPABSD", LTYPEY, APABSL, /* syn */ + "VPABSL", LTYPEY, APABSL, + "VPABSW", LTYPEY, APABSW, + "VPACKUSDW", LTYPEY, APACKUSDW, + "VPALIGNR", LTYPEX, APALIGNR, + "VPBLENDVB", LTYPEY, APBLENDVB, + "VPBLENDW", LTYPEY, APBLENDW, + "VPCLMULHQHQDQ", LTYPEY, APCLMULHQHQDQ, + "VPCLMULHQLQDQ", LTYPEY, APCLMULHQLQDQ, + "VPCLMULLQHQDQ", LTYPEY, APCLMULLQHQDQ, + "VPCLMULLQLQDQ", LTYPEY, APCLMULLQLQDQ, + "VPCLMULQDQ", LTYPEY, APCLMULQDQ, + "VPCMPEQQ", LTYPEY, APCMPEQQ, + "VPCMPESTRI", LTYPEY, APCMPESTRI, + "VPCMPESTRM", LTYPEY, APCMPESTRM, + "VPCMPGTQ", LTYPEY, APCMPGTQ, + "VPCMPISTRI", LTYPEY, APCMPISTRI, + "VPCMPISTRM", LTYPEY, APCMPISTRM, + "VPEXTRB", LTYPEY, APEXTRB, + "VPEXTRL", LTYPEY, APEXTRL, + "VPHADDL", LTYPEY, APHADDL, + "VPHADDSW", LTYPEY, APHADDSW, + "VPHADDW", LTYPEY, APHADDW, + "VPHMINPOSUW", LTYPEY, APHMINPOSUW, + "VPHSUBL", LTYPEY, APHSUBL, + "VPHSUBSW", LTYPEY, APHSUBSW, + "VPHSUBW", LTYPEY, APHSUBW, + "VPINSRB", LTYPEY, APINSRB, + "VPINSRL", LTYPEY, APINSRL, + "VPMADDUBSW", LTYPEY, APMADDUBSW, + "VPMAXSB", LTYPEY, APMAXSB, + "VPMAXSL", LTYPEY, APMAXSL, + "VPMAXUL", LTYPEY, APMAXUL, + "VPMAXUW", LTYPEY, APMAXUW, + "VPMINSB", LTYPEY, APMINSB, + "VPMINSL", LTYPEY, APMINSL, + "VPMINUL", LTYPEY, APMINUL, + "VPMINUW", LTYPEY, APMINUW, + "VPMOVSXBL", LTYPEY, APMOVSXBL, + "VPMOVSXBQ", LTYPEY, APMOVSXBQ, + "VPMOVSXBW", LTYPEY, APMOVSXBW, + "VPMOVSXLQ", LTYPEY, APMOVSXLQ, + "VPMOVSXWL", LTYPEY, APMOVSXWL, + "VPMOVSXWQ", LTYPEY, APMOVSXWQ, + "VPMOVZXBL", LTYPEY, APMOVZXBL, + "VPMOVZXBQ", LTYPEY, APMOVZXBQ, + "VPMOVZXBW", LTYPEY, APMOVZXBW, + "VPMOVZXLQ", LTYPEY, APMOVZXLQ, + "VPMOVZXWL", LTYPEY, APMOVZXWL, + "VPMOVZXWQ", LTYPEY, APMOVZXWQ, + "VPMULHRSW", LTYPEY, APMULHRSW, + "VPMULLL", LTYPEY, APMULLL, + "VPMULLQ", LTYPEY, APMULLQ, + "VPSHUFB", LTYPEY, APSHUFB, + "VPSIGNB", LTYPEY, APSIGNB, + "VPSIGNL", LTYPEY, APSIGNL, + "VPSIGNW", LTYPEY, APSIGNW, + "VPTEST", LTYPEY, APTEST, + "VROUNDPD", LTYPEY, AROUNDPD, + "VROUNDPS", LTYPEY, AROUNDPS, + "VROUNDSD", LTYPEY, AROUNDSD, + "VROUNDSS", LTYPEY, AROUNDSS, 0 }; +int +isxyreg(int t) +{ + return t >= D_X0 && t <= D_X15 || t >= D_Y0 && t <= D_Y15; +} + void cinit(void) { @@ -1066,8 +1454,7 @@ void zaddr(Gen *a, int s) { - long l; - int i, t; + int i, l, t; char *n; Ieee e; diff -Nru /n/sources/plan9/sys/src/cmd/6c/6.out.h /sys/src/cmd/6c/6.out.h --- /n/sources/plan9/sys/src/cmd/6c/6.out.h Mon May 23 18:57:57 2005 +++ /sys/src/cmd/6c/6.out.h Wed Nov 11 00:00:00 2015 @@ -535,7 +535,7 @@ AFXSAVE, AFXSAVE64, ALDMXCSR, - AMASKMOVOU, + AMASKMOVDQU, AMASKMOVQ, AMAXPD, AMAXPS, @@ -547,7 +547,7 @@ AMINSS, AMOVAPD, AMOVAPS, - AMOVOU, + AMOVDQU, AMOVHLPS, AMOVHPD, AMOVHPS, @@ -556,7 +556,7 @@ AMOVLPS, AMOVMSKPD, AMOVMSKPS, - AMOVNTO, + AMOVNTDQ, AMOVNTPD, AMOVNTPS, AMOVNTQ, @@ -636,13 +636,13 @@ APSHUFL, APSHUFLW, APSHUFW, - APSLLO, + APSLLDQ, APSLLL, APSLLQ, APSLLW, APSRAL, APSRAW, - APSRLO, + APSRLDQ, APSRLL, APSRLQ, APSRLW, @@ -699,6 +699,119 @@ AMODE, + AMOVQQA, + AMOVQQU, + ABSWAPL, + ABSWAPQ, + + /* more sse/vex */ + AAESDEC, + AAESDECLAST, + AAESENC, + AAESENCLAST, + AAESIMC, + AAESKEYGENASSIST, + ABLENDPD, + ABLENDPS, + ABLENDVPD, + ABLENDVPS, + ACRC32L, + ADPPD, + ADPPS, + AEXTRACTPS, + AINSERTPS, + AMOVNTDQA, + AMPSADBW, + APABSB, + APABSL, + APABSW, + APACKUSDW, + APALIGNR, + APBLENDVB, + APBLENDW, + APCLMULHQHQDQ, + APCLMULHQLQDQ, + APCLMULLQHQDQ, + APCLMULLQLQDQ, + APCLMULQDQ, + APCMPEQQ, + APCMPESTRI, + APCMPESTRM, + APCMPGTQ, + APCMPISTRI, + APCMPISTRM, + APEXTRB, + APEXTRL, + APHADDL, + APHADDSW, + APHADDW, + APHMINPOSUW, + APHSUBL, + APHSUBSW, + APHSUBW, + APINSRB, + APINSRL, + APMADDUBSW, + APMAXSB, + APMAXSL, + APMAXUL, + APMAXUW, + APMINSB, + APMINSL, + APMINUL, + APMINUW, + APMOVSXBL, + APMOVSXBQ, + APMOVSXBW, + APMOVSXLQ, + APMOVSXWL, + APMOVSXWQ, + APMOVZXBL, + APMOVZXBQ, + APMOVZXBW, + APMOVZXLQ, + APMOVZXWL, + APMOVZXWQ, + APMULHRSW, + APMULLL, + APMULLQ, + APSHUFB, + APSIGNB, + APSIGNL, + APSIGNW, + APTEST, + AROUNDPD, + AROUNDPS, + AROUNDSD, + AROUNDSS, + AVBROADCASTF128, + AVBROADCASTSL, + AVBROADCASTSS, + AVMASKMOVPD, + AVMASKMOVPS, + AVPERMILPD, + AVPERMILPS, + AVTESTPD, + AVTESTPS, + AXSAVE, + AXSAVEOPT, + AXRSTOR, + ACRC32B, + ACRC32W, + ARDRANDW, + ARDRANDL, + ACRC32Q, + APEXTRQ, + APINSRQ, + ARDRANDQ, + AMOVDQA, + APOPCNTW, + APOPCNTL, + APOPCNTQ, + AMOVQL, + APAUSE, + ACMPXCHG16B, + ALAST }; @@ -744,11 +857,11 @@ D_DH, D_BH, - D_F0 = 36, - - D_M0 = 44, + D_Y0 = 36, + D_Y15 = D_Y0+15, D_X0 = 52, + D_X15 = D_X0+15, D_CS = 68, D_SS, @@ -783,6 +896,14 @@ D_FILE1, D_INDIR, /* additive */ + D_CONST2 = D_INDIR+D_INDIR, + D_SIZE, + + D_M0, + D_M7 = D_M0+7, + D_F0 = D_M0, + D_F7 = D_F0+7, + D_XREG, T_TYPE = 1<<0, T_INDEX = 1<<1, diff -Nru /n/sources/plan9/sys/src/cmd/6c/cgen.c /sys/src/cmd/6c/cgen.c --- /n/sources/plan9/sys/src/cmd/6c/cgen.c Thu Feb 28 19:10:56 2013 +++ /sys/src/cmd/6c/cgen.c Wed Nov 11 00:00:00 2015 @@ -2,7 +2,6 @@ /* ,x/^(print|prtree)\(/i/\/\/ */ int castup(Type*, Type*); -void checkmask(Node*, Node*); void cgen(Node *n, Node *nn) @@ -258,8 +257,6 @@ break; } } - if(n->op == OAND) - checkmask(n, r); if(r->addable >= INDEXED && !hardconst(r)) { regalloc(&nod, l, nn); cgen(l, &nod); @@ -475,8 +472,6 @@ goto asand; if(l->op == OBIT) goto asbitop; - if(typefd[n->type->etype]) - goto asand; /* can this happen? */ /* * get nod to be D_CX @@ -524,8 +519,6 @@ goto asbitop; if(typefd[l->type->etype] || typefd[r->type->etype]) goto asfop; - if(o == OASAND) - checkmask(n, r); if(l->complex >= r->complex) { if(hardleft) reglcgen(&nod, l, Z); @@ -1866,22 +1859,6 @@ return ft == TULONG || ft == TUINT || ft == TUSHORT; } return 0; -} - -/* - * vl &= ~ul or vl & ~ul - * create a ul mask with top bits zero, which is usually wrong - */ -void -checkmask(Node *n, Node *r) -{ - Node *rl; - - if((n->op == OAND || n->op == OASAND) && - r->op == OCAST && - (rl = r->left)->op == OCOM && - typesuv[n->type->etype] && typeu[rl->type->etype] && typechl[rl->type->etype]) - warn(n, "32-bit mask zero-extended to 64 bits"); } void diff -Nru /n/sources/plan9/sys/src/cmd/6c/enam.c /sys/src/cmd/6c/enam.c --- /n/sources/plan9/sys/src/cmd/6c/enam.c Mon May 23 18:57:57 2005 +++ /sys/src/cmd/6c/enam.c Wed Nov 11 00:00:00 2015 @@ -504,7 +504,7 @@ "FXSAVE", "FXSAVE64", "LDMXCSR", - "MASKMOVOU", + "MASKMOVDQU", "MASKMOVQ", "MAXPD", "MAXPS", @@ -516,7 +516,7 @@ "MINSS", "MOVAPD", "MOVAPS", - "MOVOU", + "MOVDQU", "MOVHLPS", "MOVHPD", "MOVHPS", @@ -525,7 +525,7 @@ "MOVLPS", "MOVMSKPD", "MOVMSKPS", - "MOVNTO", + "MOVNTDQ", "MOVNTPD", "MOVNTPS", "MOVNTQ", @@ -605,13 +605,13 @@ "PSHUFL", "PSHUFLW", "PSHUFW", - "PSLLO", + "PSLLDQ", "PSLLL", "PSLLQ", "PSLLW", "PSRAL", "PSRAW", - "PSRLO", + "PSRLDQ", "PSRLL", "PSRLQ", "PSRLW", @@ -665,5 +665,115 @@ "RETFQ", "SWAPGS", "MODE", + "MOVQQA", + "MOVQQU", + "BSWAPL", + "BSWAPQ", + "AESDEC", + "AESDECLAST", + "AESENC", + "AESENCLAST", + "AESIMC", + "AESKEYGENASSIST", + "BLENDPD", + "BLENDPS", + "BLENDVPD", + "BLENDVPS", + "CRC32L", + "DPPD", + "DPPS", + "EXTRACTPS", + "INSERTPS", + "MOVNTDQA", + "MPSADBW", + "PABSB", + "PABSL", + "PABSW", + "PACKUSDW", + "PALIGNR", + "PBLENDVB", + "PBLENDW", + "PCLMULHQHQDQ", + "PCLMULHQLQDQ", + "PCLMULLQHQDQ", + "PCLMULLQLQDQ", + "PCLMULQDQ", + "PCMPEQQ", + "PCMPESTRI", + "PCMPESTRM", + "PCMPGTQ", + "PCMPISTRI", + "PCMPISTRM", + "PEXTRB", + "PEXTRL", + "PHADDL", + "PHADDSW", + "PHADDW", + "PHMINPOSUW", + "PHSUBL", + "PHSUBSW", + "PHSUBW", + "PINSRB", + "PINSRL", + "PMADDUBSW", + "PMAXSB", + "PMAXSL", + "PMAXUL", + "PMAXUW", + "PMINSB", + "PMINSL", + "PMINUL", + "PMINUW", + "PMOVSXBL", + "PMOVSXBQ", + "PMOVSXBW", + "PMOVSXLQ", + "PMOVSXWL", + "PMOVSXWQ", + "PMOVZXBL", + "PMOVZXBQ", + "PMOVZXBW", + "PMOVZXLQ", + "PMOVZXWL", + "PMOVZXWQ", + "PMULHRSW", + "PMULLL", + "PMULLQ", + "PSHUFB", + "PSIGNB", + "PSIGNL", + "PSIGNW", + "PTEST", + "ROUNDPD", + "ROUNDPS", + "ROUNDSD", + "ROUNDSS", + "VBROADCASTF128", + "VBROADCASTSL", + "VBROADCASTSS", + "VMASKMOVPD", + "VMASKMOVPS", + "VPERMILPD", + "VPERMILPS", + "VTESTPD", + "VTESTPS", + "XSAVE", + "XSAVEOPT", + "XRSTOR", + "CRC32B", + "CRC32W", + "RDRANDW", + "RDRANDL", + "CRC32Q", + "PEXTRQ", + "PINSRQ", + "RDRANDQ", + "MOVDQA", + "POPCNTW", + "POPCNTL", + "POPCNTQ", + "MOVQL", + "PAUSE", + "CMPXCHG16B", "LAST", }; diff -Nru /n/sources/plan9/sys/src/cmd/6c/gc.h /sys/src/cmd/6c/gc.h --- /n/sources/plan9/sys/src/cmd/6c/gc.h Mon Mar 4 21:15:20 2013 +++ /sys/src/cmd/6c/gc.h Wed Nov 11 00:00:00 2015 @@ -122,7 +122,7 @@ Node* scope; }; -#define NRGN 600 +#define NRGN 1000 struct Rgn { Reg* enter; @@ -157,7 +157,7 @@ EXTERN Sym* symrathole; EXTERN Node znode; EXTERN Prog zprog; -EXTERN int reg[D_NONE]; +EXTERN int reg[D_XREG]; EXTERN long exregoffset; EXTERN long exfregoffset; EXTERN uchar typechlpv[NTYPE]; @@ -334,9 +334,6 @@ long FtoB(int); int BtoR(long); int BtoF(long); - -#define D_HI D_NONE -#define D_LO D_NONE #define isregtype(t) ((t)>= D_AX && (t)<=D_R15) diff -Nru /n/sources/plan9/sys/src/cmd/6c/list.c /sys/src/cmd/6c/list.c --- /n/sources/plan9/sys/src/cmd/6c/list.c Mon May 23 18:57:57 2005 +++ /sys/src/cmd/6c/list.c Wed Nov 11 00:00:00 2015 @@ -47,13 +47,13 @@ p = va_arg(fp->args, Prog*); if(p->as == ADATA) - sprint(str, " %A %D/%d,%D", + snprint(str, sizeof(str), " %A %D/%d,%D", p->as, &p->from, p->from.scale, &p->to); else if(p->as == ATEXT) - sprint(str, " %A %D,%d,%D", + snprint(str, sizeof(str), " %A %D,%d,%D", p->as, &p->from, p->from.scale, &p->to); else - sprint(str, " %A %D,%D", + snprint(str, sizeof(str), " %A %D,%D", p->as, &p->from, &p->to); return fmtstrcpy(fp, str); } @@ -76,20 +76,20 @@ a = va_arg(fp->args, Adr*); i = a->type; - if(i >= D_INDIR) { + if(i >= D_INDIR && i < D_CONST2) { if(a->offset) - sprint(str, "%lld(%R)", a->offset, i-D_INDIR); + snprint(str, sizeof(str), "%lld(%R)", a->offset, i-D_INDIR); else - sprint(str, "(%R)", i-D_INDIR); + snprint(str, sizeof(str), "(%R)", i-D_INDIR); goto brk; } switch(i) { default: if(a->offset) - sprint(str, "$%lld,%R", a->offset, i); + snprint(str, sizeof(str), "$%lld,%R", a->offset, i); else - sprint(str, "%R", i); + snprint(str, sizeof(str), "%R", i); break; case D_NONE: @@ -97,53 +97,54 @@ break; case D_BRANCH: - sprint(str, "%lld(PC)", a->offset-pc); + snprint(str, sizeof(str), "%lld(PC)", a->offset-pc); break; case D_EXTERN: - sprint(str, "%s+%lld(SB)", a->sym->name, a->offset); + snprint(str, sizeof(str), "%s+%lld(SB)", a->sym->name, a->offset); break; case D_STATIC: - sprint(str, "%s<>+%lld(SB)", a->sym->name, + snprint(str, sizeof(str), "%s<>+%lld(SB)", a->sym->name, a->offset); break; case D_AUTO: - sprint(str, "%s+%lld(SP)", a->sym->name, a->offset); + snprint(str, sizeof(str), "%s+%lld(SP)", a->sym->name, a->offset); break; case D_PARAM: if(a->sym) - sprint(str, "%s+%lld(FP)", a->sym->name, a->offset); + snprint(str, sizeof(str), "%s+%lld(FP)", a->sym->name, a->offset); else - sprint(str, "%lld(FP)", a->offset); + snprint(str, sizeof(str), "%lld(FP)", a->offset); break; case D_CONST: - sprint(str, "$%lld", a->offset); + snprint(str, sizeof(str), "$%lld", a->offset); break; case D_FCONST: - sprint(str, "$(%.17e)", a->dval); + snprint(str, sizeof(str), "$(%.17e)", a->dval); break; case D_SCONST: - sprint(str, "$\"%S\"", a->sval); + snprint(str, sizeof(str), "$\"%S\"", a->sval); break; case D_ADDR: a->type = a->index; a->index = D_NONE; - sprint(str, "$%D", a); + snprint(str, sizeof(str), "$%D", a); a->index = a->type; a->type = D_ADDR; goto conv; } brk: if(a->index != D_NONE) { - sprint(s, "(%R*%d)", (int)a->index, (int)a->scale); - strcat(str, s); + fmtstrcpy(fp, str); + snprint(s, sizeof(s), "(%R*%d)", (int)a->index, (int)a->scale); + return fmtstrcpy(fp, s); } conv: return fmtstrcpy(fp, str); @@ -190,23 +191,23 @@ "DH", "BH", - "F0", /* [D_F0] */ - "F1", - "F2", - "F3", - "F4", - "F5", - "F6", - "F7", - - "M0", - "M1", - "M2", - "M3", - "M4", - "M5", - "M6", - "M7", + "Y0", /* [D_Y0] */ + "Y1", + "Y2", + "Y3", + "Y4", + "Y5", + "Y6", + "Y7", + + "Y8", + "Y9", + "Y10", + "Y11", + "Y12", + "Y13", + "Y14", + "Y15", "X0", "X1", @@ -284,9 +285,13 @@ r = va_arg(fp->args, int); if(r >= D_AL && r <= D_NONE) - sprint(str, "%s", regstr[r-D_AL]); + snprint(str, sizeof(str), "%s", regstr[r-D_AL]); + else if(r >= D_Y0 && r <= D_Y15) + snprint(str, sizeof(str), "Y%d", r-D_Y0); + else if(r >= D_M0 && r <= D_M7) + snprint(str, sizeof(str), "M%d", r-D_M0); else - sprint(str, "gok(%d)", r); + snprint(str, sizeof(str), "gok(%d)", r); return fmtstrcpy(fp, str); } diff -Nru /n/sources/plan9/sys/src/cmd/6c/mul.c /sys/src/cmd/6c/mul.c --- /n/sources/plan9/sys/src/cmd/6c/mul.c Mon May 23 18:57:57 2005 +++ /sys/src/cmd/6c/mul.c Wed Nov 11 00:00:00 2015 @@ -312,6 +312,11 @@ Mparam *p; Node nod, nods; + if(v == 0){ + zeroregm(n); + return 1; + } + for(i = 0; i < nelem(multab); i++) { p = &multab[i]; if(p->value == v) diff -Nru /n/sources/plan9/sys/src/cmd/6c/peep.c /sys/src/cmd/6c/peep.c --- /n/sources/plan9/sys/src/cmd/6c/peep.c Wed Dec 28 20:52:06 2005 +++ /sys/src/cmd/6c/peep.c Wed Nov 11 00:00:00 2015 @@ -117,6 +117,7 @@ case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: + case AMOVQL: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { @@ -347,8 +348,13 @@ case ACDQ: case ACQO: + case ASTOSB: + case ASTOSL: + case ASTOSQ: + case AMOVSB: case AMOVSL: case AMOVSQ: + case AMOVQL: return 0; case AMOVL: @@ -547,6 +553,7 @@ case AMOVWLZX: case AMOVWQSX: case AMOVWQZX: + case AMOVQL: case AMOVSS: case AMOVSD: @@ -725,11 +732,23 @@ return 2; goto caseread; - case AMOVSL: - case AMOVSQ: case AREP: case AREPN: - if(v->type == D_CX || v->type == D_DI || v->type == D_SI) + if(v->type == D_CX) + return 2; + goto caseread; + + case AMOVSB: + case AMOVSL: + case AMOVSQ: + if(v->type == D_DI || v->type == D_SI) + return 2; + goto caseread; + + case ASTOSB: + case ASTOSL: + case ASTOSQ: + if(v->type == D_AX || v->type == D_DI) return 2; goto caseread; diff -Nru /n/sources/plan9/sys/src/cmd/6c/reg.c /sys/src/cmd/6c/reg.c --- /n/sources/plan9/sys/src/cmd/6c/reg.c Mon Sep 17 18:58:11 2012 +++ /sys/src/cmd/6c/reg.c Wed Nov 11 00:00:00 2015 @@ -183,6 +183,7 @@ case AMOVWLZX: case AMOVWQSX: case AMOVWQZX: + case AMOVQL: case AMOVSS: case AMOVSD: @@ -731,6 +732,7 @@ r->regu |= doregbits(t); r->regu |= doregbits(a->index); + s = a->sym; switch(t) { default: goto none; @@ -743,12 +745,13 @@ goto none; case D_EXTERN: case D_STATIC: + if(s->type != nil && s->type->garb & GVOLATILE) + goto none; case D_PARAM: case D_AUTO: n = t; break; } - s = a->sym; if(s == S) goto none; if(s->name[0] == '.') diff -Nru /n/sources/plan9/sys/src/cmd/6c/txt.c /sys/src/cmd/6c/txt.c --- /n/sources/plan9/sys/src/cmd/6c/txt.c Mon Sep 17 18:58:11 2012 +++ /sys/src/cmd/6c/txt.c Wed Nov 11 00:00:00 2015 @@ -93,9 +93,6 @@ nodret = new(OIND, nodret, Z); complex(nodret); - if(0) - com64init(); - memset(reg, 0, sizeof(reg)); for(i=0; iop == OCONST) + if(f->vconst > 0x7fffffffLL || f->vconst < -0x7fffffffLL) + if(t->op != OREGISTER) { + regalloc(&nod, f, Z); + gmove(f, &nod); + gmove(&nod, t); + regfree(&nod); + return; + } + if(f->op == ONAME || f->op == OINDREG || f->op == OIND || f->op == OINDEX) switch(ft) { @@ -665,6 +673,7 @@ a = AMOVLQZX; /* could probably use plain MOVL */ goto ld; case TVLONG: + case TUVLONG: if(typefd[tt]) { regalloc(&nod, t, t); if(tt == TDOUBLE) @@ -676,7 +685,6 @@ regfree(&nod); return; } - case TUVLONG: a = AMOVQ; goto ld; case TIND: @@ -767,7 +775,6 @@ case CASE( TUINT, TCHAR): case CASE( TLONG, TCHAR): case CASE( TULONG, TCHAR): - case CASE( TIND, TCHAR): case CASE( TCHAR, TUCHAR): case CASE( TUCHAR, TUCHAR): @@ -777,7 +784,6 @@ case CASE( TUINT, TUCHAR): case CASE( TLONG, TUCHAR): case CASE( TULONG, TUCHAR): - case CASE( TIND, TUCHAR): case CASE( TSHORT, TSHORT): case CASE( TUSHORT,TSHORT): @@ -785,7 +791,6 @@ case CASE( TUINT, TSHORT): case CASE( TLONG, TSHORT): case CASE( TULONG, TSHORT): - case CASE( TIND, TSHORT): case CASE( TSHORT, TUSHORT): case CASE( TUSHORT,TUSHORT): @@ -793,42 +798,26 @@ case CASE( TUINT, TUSHORT): case CASE( TLONG, TUSHORT): case CASE( TULONG, TUSHORT): - case CASE( TIND, TUSHORT): case CASE( TINT, TINT): case CASE( TUINT, TINT): case CASE( TLONG, TINT): case CASE( TULONG, TINT): - case CASE( TIND, TINT): case CASE( TINT, TUINT): case CASE( TUINT, TUINT): case CASE( TLONG, TUINT): case CASE( TULONG, TUINT): - case CASE( TIND, TUINT): - - case CASE( TUINT, TIND): - case CASE( TVLONG, TUINT): - case CASE( TVLONG, TULONG): - case CASE( TUVLONG, TUINT): - case CASE( TUVLONG, TULONG): *****/ a = AMOVL; break; - case CASE( TVLONG, TCHAR): - case CASE( TVLONG, TSHORT): - case CASE( TVLONG, TINT): - case CASE( TVLONG, TLONG): - case CASE( TUVLONG, TCHAR): - case CASE( TUVLONG, TSHORT): - case CASE( TUVLONG, TINT): - case CASE( TUVLONG, TLONG): + case CASE( TINT, TIND): case CASE( TINT, TVLONG): case CASE( TINT, TUVLONG): - case CASE( TLONG, TVLONG): - case CASE( TINT, TIND): case CASE( TLONG, TIND): + case CASE( TLONG, TVLONG): + case CASE( TLONG, TUVLONG): a = AMOVLQSX; if(f->op == OCONST) { f->vconst &= (uvlong)0xffffffffU; @@ -844,22 +833,53 @@ case CASE( TULONG, TVLONG): case CASE( TULONG, TUVLONG): case CASE( TULONG, TIND): - a = AMOVL; /* same effect as AMOVLQZX */ + a = AMOVLQZX; if(f->op == OCONST) { f->vconst &= (uvlong)0xffffffffU; a = AMOVQ; } break; + + case CASE( TIND, TCHAR): + case CASE( TIND, TUCHAR): + case CASE( TIND, TSHORT): + case CASE( TIND, TUSHORT): + case CASE( TIND, TINT): + case CASE( TIND, TUINT): + case CASE( TIND, TLONG): + case CASE( TIND, TULONG): + case CASE( TVLONG, TCHAR): + case CASE( TVLONG, TUCHAR): + case CASE( TVLONG, TSHORT): + case CASE( TVLONG, TUSHORT): + case CASE( TVLONG, TINT): + case CASE( TVLONG, TUINT): + case CASE( TVLONG, TLONG): + case CASE( TVLONG, TULONG): + case CASE( TUVLONG, TCHAR): + case CASE( TUVLONG, TUCHAR): + case CASE( TUVLONG, TSHORT): + case CASE( TUVLONG, TUSHORT): + case CASE( TUVLONG, TINT): + case CASE( TUVLONG, TUINT): + case CASE( TUVLONG, TLONG): + case CASE( TUVLONG, TULONG): + a = AMOVQL; + if(f->op == OCONST) { + f->vconst &= 0xffffffffU; + a = AMOVL; + } + break; + case CASE( TIND, TIND): case CASE( TIND, TVLONG): - case CASE( TVLONG, TVLONG): - case CASE( TUVLONG, TVLONG): - case CASE( TVLONG, TUVLONG): - case CASE( TUVLONG, TUVLONG): case CASE( TIND, TUVLONG): case CASE( TVLONG, TIND): + case CASE( TVLONG, TVLONG): + case CASE( TVLONG, TUVLONG): case CASE( TUVLONG, TIND): - case CASE( TIND, TIND): + case CASE( TUVLONG, TVLONG): + case CASE( TUVLONG, TUVLONG): a = AMOVQ; break; @@ -1007,7 +1027,7 @@ return; /* - * ulong to float + * uvlong to float */ case CASE( TUVLONG, TDOUBLE): case CASE( TUVLONG, TFLOAT): diff -Nru /n/sources/plan9/sys/src/cmd/6l/asm.c /sys/src/cmd/6l/asm.c --- /n/sources/plan9/sys/src/cmd/6l/asm.c Tue Mar 26 23:17:32 2013 +++ /sys/src/cmd/6l/asm.c Wed Nov 11 00:00:00 2015 @@ -28,7 +28,8 @@ return s->value; } -/* these need to take long arguments to be compatible with elf.c */void +/* these need to take long arguments to be compatible with elf.c */ +void wputl(long w) { cput(w); diff -Nru /n/sources/plan9/sys/src/cmd/6l/compat.c /sys/src/cmd/6l/compat.c --- /n/sources/plan9/sys/src/cmd/6l/compat.c Mon Dec 17 19:33:08 2012 +++ /sys/src/cmd/6l/compat.c Wed Nov 11 00:00:00 2015 @@ -50,8 +50,10 @@ } void -setmalloctag(void*, ulong) +setmalloctag(void *v, ulong pc) { + USED(v); + USED(pc); } int diff -Nru /n/sources/plan9/sys/src/cmd/6l/l.h /sys/src/cmd/6l/l.h --- /n/sources/plan9/sys/src/cmd/6l/l.h Tue Mar 26 23:18:09 2013 +++ /sys/src/cmd/6l/l.h Wed Nov 11 00:00:00 2015 @@ -17,7 +17,6 @@ cflush(); } #define LIBNAMELEN 300 - typedef struct Adr Adr; typedef struct Prog Prog; typedef struct Sym Sym; @@ -92,7 +91,7 @@ { short as; uchar* ytab; - uchar prefix; + ushort prefix; uchar op[20]; }; struct Movtab @@ -155,8 +154,8 @@ Ycr0, Ycr1, Ycr2, Ycr3, Ycr4, Ycr5, Ycr6, Ycr7, Ycr8, Ydr0, Ydr1, Ydr2, Ydr3, Ydr4, Ydr5, Ydr6, Ydr7, Ytr0, Ytr1, Ytr2, Ytr3, Ytr4, Ytr5, Ytr6, Ytr7, Yrl32, Yrl64, - Ymr, Ymm, - Yxr, Yxm, + Ymr, Ymm, + Yxr, Yxm, Yyr, Yxyr, Ymax, Zxxx = 0, @@ -212,9 +211,13 @@ Pb = 0xfe, /* byte operands */ Pf2 = 0xf2, /* xmm escape 1 */ Pf3 = 0xf3, /* xmm escape 2 */ + Pm38 = 0x38, /* 0f.38 opcode */ + Pm3a = 0x3a, /* 0f.3a opcode */ Pw = 0x48, /* Rex.w */ Py = 0x80, /* defaults to 64-bit mode */ + P2 = 1<<9, /* flag: two operand (avx only) */ + Rxf = 1<<9, /* internal flag for Rxr on from */ Rxt = 1<<8, /* internal flag for Rxr on to */ Rxw = 1<<3, /* =1, 64-bit operand size */ @@ -222,6 +225,26 @@ Rxx = 1<<1, /* extend sib index */ Rxb = 1<<0, /* extend modrm r/m, sib base, or opcode reg */ + Vex2 = 0xc5, /* 2-byte vex prefix */ + Vex3 = 0xc4, /* 3-byte vex prefix */ + + /* vex flags */ + Vexr = 1<<7, /* byte 1, both */ + Vexx = 1<<6, /* byte 1, 3-byte */ + Vexb = 1<<5, /* byte 1, 3-byte */ + Vexw = 1<<7, /* byte 2, 3-byte */ + Vexl = 1<<2, /* 256-bit vector */ + + Vexnr = 0xF<<3, /* no reg */ + Vexp0 = 0, /* no SIMD prefix */ + Vexp66 = 1, /* prefix 66 */ + Vexpf3 = 2, /* prefix f3 */ + Vexpf2 = 3, /* prefix f2 */ + + Vex0f = 1, /* 0F opcode byte */ + Vex0f38 = 2, /* 0F 38 opcode bytes */ + Vex0f3a = 3, /* 0F 3A opcode bytes */ + Roffset = 22, /* no. bits for offset in relocation address */ Rindex = 10, /* no. bits for index in relocation address */ }; @@ -244,6 +267,7 @@ #pragma varargck type "D" Adr* #pragma varargck type "P" Prog* #pragma varargck type "R" int +#pragma varargck type "R" uint #pragma varargck type "S" char* #pragma varargck argpos diag 1 @@ -291,8 +315,8 @@ EXTERN uchar* andptr; EXTERN uchar* rexptr; EXTERN uchar and[30]; -EXTERN int reg[D_NONE]; -EXTERN int regrex[D_NONE+1]; +EXTERN int reg[D_XREG]; +EXTERN int regrex[D_XREG+1]; EXTERN Prog* lastp; EXTERN long lcsize; EXTERN int nerrors; @@ -308,6 +332,7 @@ EXTERN vlong textsize; EXTERN long thunk; EXTERN int version; +EXTERN int vexed; EXTERN Prog zprg; EXTERN int dtype; EXTERN char* paramspace; @@ -315,7 +340,7 @@ EXTERN Adr* reloca; EXTERN int doexp, dlm; EXTERN int imports, nimports; -EXTERN int exports, nexports; +EXTERN int exports, nexports, allexport; EXTERN char* EXPTAB; EXTERN Prog undefp; @@ -368,12 +393,13 @@ double ieeedtod(Ieee*); long ieeedtof(Ieee*); void import(void); +int isxyreg(int); void ldobj(int, long, char*); void loadlib(void); void listinit(void); -Sym* lookup(char*, int); void llput(vlong v); void llputl(vlong v); +Sym* lookup(char*, int); void lput(long); void lputl(long); void main(int, char*[]); diff -Nru /n/sources/plan9/sys/src/cmd/6l/list.c /sys/src/cmd/6l/list.c --- /n/sources/plan9/sys/src/cmd/6l/list.c Mon May 23 18:57:48 2005 +++ /sys/src/cmd/6l/list.c Wed Nov 11 00:00:00 2015 @@ -24,18 +24,25 @@ switch(p->as) { case ATEXT: if(p->from.scale) { - sprint(str, "(%ld) %A %D,%d,%D", + snprint(str, sizeof(str), "(%ld) %A %D,%d,%D", p->line, p->as, &p->from, p->from.scale, &p->to); break; } default: - sprint(str, "(%ld) %A %D,%D", - p->line, p->as, &p->from, &p->to); + if(isxyreg(p->to.type) && p->to.index != D_NONE) + snprint(str, sizeof(str), "(%ld) V%A %D,%R,%R", + p->line, p->as, &p->from, p->to.index, p->to.type); + else if(isxyreg(p->from.type) && p->from.index != D_NONE) + snprint(str, sizeof(str), "(%ld) V%A %R,%R,%D", + p->line, p->as, p->from.type, p->from.index, &p->to); + else + snprint(str, sizeof(str), "(%ld) %A %D,%D", + p->line, p->as, &p->from, &p->to); break; case ADATA: case AINIT: case ADYNT: - sprint(str, "(%ld) %A %D/%d,%D", + snprint(str, sizeof(str), "(%ld) %A %D/%d,%D", p->line, p->as, &p->from, p->from.scale, &p->to); break; } @@ -55,26 +62,26 @@ int Dconv(Fmt *fp) { - char str[40], s[20]; + char str[STRINGSZ+40], s[20]; Adr *a; int i; a = va_arg(fp->args, Adr*); i = a->type; - if(i >= D_INDIR) { + if(i >= D_INDIR && i < D_CONST2) { if(a->offset) - sprint(str, "%lld(%R)", a->offset, i-D_INDIR); + snprint(str, sizeof(str), "%lld(%R)", a->offset, i-D_INDIR); else - sprint(str, "(%R)", i-D_INDIR); + snprint(str, sizeof(str), "(%R)", i-D_INDIR); goto brk; } switch(i) { default: if(a->offset) - sprint(str, "$%lld,%R", a->offset, i); + snprint(str, sizeof(str), "$%lld,%R", a->offset, i); else - sprint(str, "%R", i); + snprint(str, sizeof(str), "%R", i); break; case D_NONE: @@ -84,57 +91,57 @@ case D_BRANCH: if(bigP != P && bigP->pcond != P) if(a->sym != S) - sprint(str, "%llux+%s", bigP->pcond->pc, + snprint(str, sizeof(str), "%llux+%s", bigP->pcond->pc, a->sym->name); else - sprint(str, "%llux", bigP->pcond->pc); + snprint(str, sizeof(str), "%llux", bigP->pcond->pc); else - sprint(str, "%lld(PC)", a->offset); + snprint(str, sizeof(str), "%lld(PC)", a->offset); break; case D_EXTERN: - sprint(str, "%s+%lld(SB)", a->sym->name, a->offset); + snprint(str, sizeof(str), "%s+%lld(SB)", a->sym->name, a->offset); break; case D_STATIC: - sprint(str, "%s<%d>+%lld(SB)", a->sym->name, + snprint(str, sizeof(str), "%s<%d>+%lld(SB)", a->sym->name, a->sym->version, a->offset); break; case D_AUTO: - sprint(str, "%s+%lld(SP)", a->sym->name, a->offset); + snprint(str, sizeof(str), "%s+%lld(SP)", a->sym->name, a->offset); break; case D_PARAM: if(a->sym) - sprint(str, "%s+%lld(%s)", a->sym->name, a->offset, paramspace); + snprint(str, sizeof(str), "%s+%lld(%s)", a->sym->name, a->offset, paramspace); else - sprint(str, "%lld(%s)", a->offset, paramspace); + snprint(str, sizeof(str), "%lld(%s)", a->offset, paramspace); break; case D_CONST: - sprint(str, "$%lld", a->offset); + snprint(str, sizeof(str), "$%lld", a->offset); break; case D_FCONST: - sprint(str, "$(%.8lux,%.8lux)", a->ieee.h, a->ieee.l); + snprint(str, sizeof(str), "$(%.8lux,%.8lux)", a->ieee.h, a->ieee.l); break; case D_SCONST: - sprint(str, "$\"%S\"", a->scon); + snprint(str, sizeof(str), "$\"%S\"", a->scon); break; case D_ADDR: a->type = a->index; a->index = D_NONE; - sprint(str, "$%D", a); + snprint(str, sizeof(str), "$%D", a); a->index = a->type; a->type = D_ADDR; goto conv; } brk: - if(a->index != D_NONE) { - sprint(s, "(%R*%d)", a->index, a->scale); + if(a->index != D_NONE && !isxyreg(a->type)) { + snprint(s, sizeof(s), "(%R*%d)", a->index, a->scale); strcat(str, s); } conv: @@ -182,23 +189,23 @@ "DH", "BH", - "F0", /* [D_F0] */ - "F1", - "F2", - "F3", - "F4", - "F5", - "F6", - "F7", - - "M0", - "M1", - "M2", - "M3", - "M4", - "M5", - "M6", - "M7", + "Y0", /* [D_Y0] */ + "Y1", + "Y2", + "Y3", + "Y4", + "Y5", + "Y6", + "Y7", + + "Y8", + "Y9", + "Y10", + "Y11", + "Y12", + "Y13", + "Y14", + "Y15", "X0", "X1", @@ -276,9 +283,13 @@ r = va_arg(fp->args, int); if(r >= D_AL && r <= D_NONE) - sprint(str, "%s", regstr[r-D_AL]); + snprint(str, sizeof(str), "%s", regstr[r-D_AL]); + else if(r >= D_Y0 && r <= D_Y15) + snprint(str, sizeof(str), "Y%d", r-D_Y0); + else if(r >= D_M0 && r <= D_M7) + snprint(str, sizeof(str), "Y%d", r-D_M0); else - sprint(str, "gok(%d)", r); + snprint(str, sizeof(str), "gok(%d)", r); return fmtstrcpy(fp, str); } @@ -343,7 +354,7 @@ print("%s: %s\n", tn, buf); nerrors++; - if(nerrors > 20) { + if(nerrors > 20 && !debug['A']) { print("too many errors\n"); errorexit(); } diff -Nru /n/sources/plan9/sys/src/cmd/6l/obj.c /sys/src/cmd/6l/obj.c --- /n/sources/plan9/sys/src/cmd/6l/obj.c Tue Mar 26 23:16:29 2013 +++ /sys/src/cmd/6l/obj.c Wed Nov 11 00:00:00 2015 @@ -127,7 +127,7 @@ break; } ARGEND USED(argc); - if(*argv == 0) + if(*argv == nil) usage(); if(!debug['9'] && !debug['U'] && !debug['B']) debug[DEFAULT] = 1; @@ -223,9 +223,8 @@ ycover[Yax*Ymax + Yrb] = 1; ycover[Ycx*Ymax + Yrb] = 1; ycover[Yrx*Ymax + Yrb] = 1; - ycover[Yrl*Ymax + Yrb] = 1; - - ycover[Ycl*Ymax + Ycx] = 1; + ycover[Yrl*Ymax + Yrb] = 1; // 8l disables this + ycover[Ycl*Ymax + Ycx] = 1; // 8l disables this ycover[Yax*Ymax + Yrx] = 1; ycover[Ycx*Ymax + Yrx] = 1; @@ -242,7 +241,7 @@ ycover[Ycx*Ymax + Ymb] = 1; ycover[Yrx*Ymax + Ymb] = 1; ycover[Yrb*Ymax + Ymb] = 1; - ycover[Yrl*Ymax + Ymb] = 1; + ycover[Yrl*Ymax + Ymb] = 1; // 8l disables this ycover[Ym*Ymax + Ymb] = 1; ycover[Yax*Ymax + Yml] = 1; @@ -264,6 +263,10 @@ ycover[Yrl*Ymax + Yxm] = 1; ycover[Ym*Ymax + Yxm] = 1; ycover[Yxr*Ymax + Yxm] = 1; + ycover[Yxr*Ymax + Yxyr] = 1; + + ycover[Yyr*Ymax + Yxm] = 1; + ycover[Yyr*Ymax + Yxyr] = 1; for(i=0; i= D_R8) regrex[i] = Rxr | Rxx | Rxb; } +/* if(i >= D_F0 && i <= D_F0+7) reg[i] = (i-D_F0) & 7; +*/ if(i >= D_M0 && i <= D_M0+7) reg[i] = (i-D_M0) & 7; if(i >= D_X0 && i <= D_X0+15) { @@ -290,6 +295,11 @@ if(i >= D_X0+8) regrex[i] = Rxr | Rxx | Rxb; } + if(i >= D_Y0 && i <= D_Y0+15) { + reg[i] = (i-D_Y0) & 7; + if(i >= D_Y0+8) + regrex[i] = Rxr | Rxx | Rxb; + } if(i >= D_CR+8 && i <= D_CR+15) regrex[i] = Rxr; } @@ -535,8 +545,7 @@ l |= (e[3] & 0xff) << 16; l |= (e[4] & 0xff) << 24; seek(f, l, 0); - /* need readn to read the dumps (at least) */ - l = readn(f, &arhdr, SAR_HDR); + l = read(f, &arhdr, SAR_HDR); if(l != SAR_HDR) goto bad; if(strncmp(arhdr.fmag, ARFMAG, sizeof(arhdr.fmag))) @@ -579,10 +588,6 @@ } a->offset = 0; if(t & T_OFFSET) { - /* - * Hack until Charles fixes the compiler. - a->offset = (long)(p[c] | (p[c+1]<<8) | (p[c+2]<<16) | (p[c+3]<<24)); - */ l = p[c] | (p[c+1]<<8) | (p[c+2]<<16) | (p[c+3]<<24); a->offset = l; c += 4; diff -Nru /n/sources/plan9/sys/src/cmd/6l/optab.c /sys/src/cmd/6l/optab.c --- /n/sources/plan9/sys/src/cmd/6l/optab.c Thu Jan 31 20:14:09 2013 +++ /sys/src/cmd/6l/optab.c Wed Nov 11 00:00:00 2015 @@ -15,10 +15,10 @@ Ynone, Ynone, Zpseudo,1, Ynone, Yml, Zpseudo,1, Ynone, Yrf, Zpseudo,1, - Ynone, Yxr, Zpseudo,1, + Ynone, Yxyr, Zpseudo,1, Yml, Ynone, Zpseudo,1, Yrf, Ynone, Zpseudo,1, - Yxr, Ynone, Zpseudo,1, + Yxyr, Ynone, Zpseudo,1, 0 }; uchar yxorb[] = @@ -59,6 +59,7 @@ }; uchar yincl[] = { + /* 32-bit one-byte version is rex prefix (48) in 64-bit mode */ Ynone, Yml, Zo_m, 2, 0 }; @@ -178,6 +179,12 @@ Yiauto, Yrl, Zaut_r, 2, // built-in LEAQ 0 }; +uchar ymovqq[] = +{ + Yxm, Yyr, Zm_r_xm, 2, // MOVQQx ymm load + Yyr, Ym, Zr_m_xm_nr, 2, // MOVQQx ymm store + 0 +}; uchar ym_rl[] = { Ym, Yrl, Zm_r, 1, @@ -198,6 +205,16 @@ Yml, Yrl, Zm_r, 1, 0 }; +uchar ycrc[] = +{ + Yml, Yrl, Zm_r_xm, 3, + 0 +}; +uchar ycrcb[] = +{ + Ymb, Yrb, Zm_r_xm, 3, + 0 +}; uchar yrl_ml[] = { Yrl, Yml, Zr_m, 1, @@ -268,6 +285,11 @@ Ynone, Ym, Zo_m, 2, 0 }; +uchar ybswap[] = +{ + Ynone, Yrl, Z_rp, 2, + 0, +}; uchar yscond[] = { Ynone, Ymb, Zo_m, 2, @@ -370,7 +392,7 @@ uchar ymm[] = { Ymm, Ymr, Zm_r_xm, 1, - Yxm, Yxr, Zm_r_xm, 2, + Yxm, Yxyr, Zm_r_xm, 2, 0 }; uchar yxm[] = @@ -471,8 +493,13 @@ }; uchar ymrxr[] = { + Yxm, Yxyr, Zm_r_xm, 2, + 0 +}; +uchar ymrxrmv[] = +{ Ymr, Yxr, Zm_r, 1, - Yxm, Yxr, Zm_r_xm, 1, + Yxm, Yxyr, Zm_r_xm, 1, 0 }; uchar ymshuf[] = @@ -482,7 +509,12 @@ }; uchar yxshuf[] = { - Yxm, Yxr, Zibm_r, 1, + Yxm, Yxyr, Zibm_r, 1, + 0 +}; +uchar yxshuf2[] = +{ + Yxm, Yxyr, Zm_r_i_xm, 2, 0 }; uchar yextrw[] = @@ -492,15 +524,19 @@ }; uchar ypsdq[] = { - Yi8, Yxr, Zibo_m, 2, + Yi8, Yxyr, Zibo_m, 2, 0 }; uchar ymskb[] = { - Yxr, Yrl, Zm_r_xm, 2, + Yxyr, Yrl, Zm_r_xm, 2, Ymr, Yrl, Zm_r_xm, 1, 0 }; +uchar yaes[] = { + Yi8, Yxyr, Zr_m_i_xm, 3, + 0 +}; Optab optab[] = /* as, ytab, andproto, opcode */ @@ -540,6 +576,8 @@ { ABSRL, yml_rl, Pm, 0xbd }, { ABSRQ, yml_rl, Pw, 0x0f,0xbd }, { ABSRW, yml_rl, Pq, 0xbd }, + { ABSWAPL, ybswap, Px, 0x0f,0xc8 }, + { ABSWAPQ, ybswap, Pw, 0x0f,0xc8 }, { ABTCL, ybtl, Pm, 0xba,(07),0xbb }, { ABTCQ, ybtl, Pw, 0x0f,0xba,(07),0x0f,0xbb }, { ABTCW, ybtl, Pq, 0xba,(07),0xbb }, @@ -735,7 +773,7 @@ { ALOOPNE, yloop, Px, 0xe0 }, { ALSLL, yml_rl, Pm, 0x03 }, { ALSLW, yml_rl, Pq, 0x03 }, - { AMASKMOVOU, yxr, Pe, 0xf7 }, + { AMASKMOVDQU, yxr, Pe, 0xf7 }, { AMASKMOVQ, ymr, Pm, 0xf7 }, { AMAXPD, yxm, Pe, 0x5f }, { AMAXPS, yxm, Pm, 0x5f }, @@ -745,8 +783,8 @@ { AMINPS, yxm, Pm, 0x5d }, { AMINSD, yxm, Pf2, 0x5d }, { AMINSS, yxm, Pf3, 0x5d }, - { AMOVAPD, yxmov, Pe, 0x28,0x29 }, - { AMOVAPS, yxmov, Pm, 0x28,0x29 }, + { AMOVAPD, yxmov, Pe|P2, 0x28,0x29 }, + { AMOVAPS, yxmov, Pm|P2, 0x28,0x29 }, { AMOVB, ymovb, Pb, 0x88,0x8a,0xb0,0xc6,(00) }, { AMOVBLSX, ymb_rl, Pm, 0xbe }, { AMOVBLZX, ymb_rl, Pm, 0xb6 }, @@ -755,32 +793,36 @@ { AMOVBWSX, ymb_rl, Pq, 0xbe }, { AMOVBWZX, ymb_rl, Pq, 0xb6 }, { AMOVO, yxmov, Pe, 0x6f,0x7f }, - { AMOVOU, yxmov, Pf3, 0x6f,0x7f }, + { AMOVDQA, yxmov, Pe|P2, 0x6f,0x7f }, + { AMOVDQU, yxmov, Pf3|P2, 0x6f,0x7f }, { AMOVHLPS, yxr, Pm, 0x12 }, - { AMOVHPD, yxmov, Pe, 0x16,0x17 }, - { AMOVHPS, yxmov, Pm, 0x16,0x17 }, + { AMOVHPD, yxmov, Pe|P2, 0x16,0x17 }, + { AMOVHPS, yxmov, Pm|P2, 0x16,0x17 }, { AMOVL, ymovl, Px, 0x89,0x8b,0x31,0xb8,0xc7,(00),0x6e,0x7e,Pe,0x6e,Pe,0x7e }, { AMOVLHPS, yxr, Pm, 0x16 }, - { AMOVLPD, yxmov, Pe, 0x12,0x13 }, - { AMOVLPS, yxmov, Pm, 0x12,0x13 }, + { AMOVLPD, yxmov, Pe|P2, 0x12,0x13 }, + { AMOVLPS, yxmov, Pm|P2, 0x12,0x13 }, { AMOVLQSX, yml_rl, Pw, 0x63 }, { AMOVLQZX, yml_rl, Px, 0x8b }, { AMOVMSKPD, yxrrl, Pq, 0x50 }, { AMOVMSKPS, yxrrl, Pm, 0x50 }, - { AMOVNTO, yxr_ml, Pe, 0xe7 }, + { AMOVNTDQ, yxr_ml, Pe, 0xe7 }, { AMOVNTPD, yxr_ml, Pe, 0x2b }, { AMOVNTPS, yxr_ml, Pm, 0x2b }, { AMOVNTQ, ymr_ml, Pm, 0xe7 }, - { AMOVQ, ymovq, Pw, 0x89,0x8b,0x31,0xc7,(00),0xb8,0xc7,(00),0x6f,0x7f,0x6e,0x7e,Pf2,0xd6,Pe,0xd6,Pe,0x6e,Pe,0x7e }, - { AMOVQOZX, ymrxr, Pf3, 0xd6,0x7e }, + { AMOVQ, ymovq, Pw|P2, 0x89,0x8b,0x31,0xc7,(00),0xb8,0xc7,(00),0x6f,0x7f,0x6e,0x7e,Pf2,0xd6,Pe,0xd6,Pe,0x6e,Pe,0x7e }, + { AMOVQL, yrl_ml, Px, 0x89 }, + { AMOVQQA, ymovqq, P2, Pe,0x6f,Pe,0x7f }, + { AMOVQQU, ymovqq, P2, Pf3,0x6f,Pf3,0x7f }, + { AMOVQOZX, ymrxrmv, Pf3, 0xd6,0x7e }, { AMOVSB, ynone, Pb, 0xa4 }, - { AMOVSD, yxmov, Pf2, 0x10,0x11 }, + { AMOVSD, yxmov, Pf2|P2, 0x10,0x11 }, { AMOVSL, ynone, Px, 0xa5 }, { AMOVSQ, ynone, Pw, 0xa5 }, - { AMOVSS, yxmov, Pf3, 0x10,0x11 }, + { AMOVSS, yxmov, Pf3|P2, 0x10,0x11 }, { AMOVSW, ynone, Pe, 0xa5 }, - { AMOVUPD, yxmov, Pe, 0x10,0x11 }, - { AMOVUPS, yxmov, Pm, 0x10,0x11 }, + { AMOVUPD, yxmov, Pe|P2, 0x10,0x11 }, + { AMOVUPS, yxmov, Pm|P2, 0x10,0x11 }, { AMOVW, ymovw, Pe, 0x89,0x8b,0x31,0xb8,0xc7,(00) }, { AMOVWLSX, yml_rl, Pm, 0xbf }, { AMOVWLZX, yml_rl, Pm, 0xb7 }, @@ -816,8 +858,12 @@ { AOUTSL, ynone, Px, 0x6f }, { AOUTSW, ynone, Pe, 0x6f }, { AOUTW, yin, Pe, 0xe7,0xef }, + { APABSB, ymrxr, Pe, Pm38,0x1c }, + { APABSL, ymrxr, Pe, Pm38,0x1e }, + { APABSW, ymrxr, Pe, Pm38,0x1d }, { APACKSSLW, ymm, Py, 0x6b,Pe,0x6b }, { APACKSSWB, ymm, Py, 0x63,Pe,0x63 }, + { APACKUSDW, ymrxr, Pe, Pm38,0x2b }, { APACKUSWB, ymm, Py, 0x67,Pe,0x67 }, { APADDB, ymm, Py, 0xfc,Pe,0xfc }, { APADDL, ymm, Py, 0xfe,Pe,0xfe }, @@ -833,6 +879,8 @@ { APAVGW, ymm, Py, 0xe3,Pe,0xe3 }, { APCMPEQB, ymm, Py, 0x74,Pe,0x74 }, { APCMPEQL, ymm, Py, 0x76,Pe,0x76 }, + { APCMPEQQ, ymrxr, Pe, Pm38,0x29 }, + { APCMPGTQ, ymrxr, Pe, Pm38,0x37 }, { APCMPEQW, ymm, Py, 0x75,Pe,0x75 }, { APCMPGTB, ymm, Py, 0x64,Pe,0x64 }, { APCMPGTL, ymm, Py, 0x66,Pe,0x66 }, @@ -858,13 +906,44 @@ { APFRSQRT, ymfp, Px, 0x97 }, { APFSUB, ymfp, Px, 0x9a }, { APFSUBR, ymfp, Px, 0xaa }, + { APHADDL, ymrxr, Pe, Pm38,0x02 }, + { APHADDSW, ymrxr, Pe, Pm38,0x03 }, + { APHADDW, ymrxr, Pe, Pm38,0x01 }, + { APHMINPOSUW, ymrxr, Pe, Pm38,0x41 }, + { APHSUBL, ymrxr, Pe, Pm38,0x06 }, + { APHSUBSW, ymrxr, Pe, Pm38,0x07 }, + { APHSUBW, ymrxr, Pe, Pm38,0x05 }, { APINSRW, yextrw, Pq, 0xc4 }, + { APMADDUBSW, ymrxr, Pe, Pm38,0x04 }, { APMADDWL, ymm, Py, 0xf5,Pe,0xf5 }, + { APMAXSB, ymrxr, Pe, Pm38,0x3c }, + { APMAXSL, ymrxr, Pe, Pm38,0x3d }, { APMAXSW, yxm, Pe, 0xee }, { APMAXUB, yxm, Pe, 0xde }, + { APMAXUL, ymrxr, Pe, Pm38,0x3f }, + { APMAXUW, ymrxr, Pe, Pm38,0x3e }, + { APMINSB, ymrxr, Pe, Pm38,0x38 }, + { APMINSL, ymrxr, Pe, Pm38,0x39 }, + { APMINUL, ymrxr, Pe, Pm38,0x3b }, + { APMINUW, ymrxr, Pe, Pm38,0x3a }, { APMINSW, yxm, Pe, 0xea }, { APMINUB, yxm, Pe, 0xda }, - { APMOVMSKB, ymskb, Px, Pe,0xd7,0xd7 }, + { APMOVMSKB, ymskb, Px|P2, Pe,0xd7,0xd7 }, + { APMOVSXBL, ymrxr, Pe, Pm38,0x21 }, + { APMOVSXBQ, ymrxr, Pe, Pm38,0x22 }, + { APMOVSXBW, ymrxr, Pe, Pm38,0x20 }, + { APMOVSXLQ, ymrxr, Pe, Pm38,0x25 }, + { APMOVSXWL, ymrxr, Pe, Pm38,0x23 }, + { APMOVSXWQ, ymrxr, Pe, Pm38,0x24 }, + { APMOVZXBL, ymrxr, Pe, Pm38,0x31 }, + { APMOVZXBQ, ymrxr, Pe, Pm38,0x32 }, + { APMOVZXBW, ymrxr, Pe, Pm38,0x30 }, + { APMOVZXLQ, ymrxr, Pe, Pm38,0x35 }, + { APMOVZXWL, ymrxr, Pe, Pm38,0x33 }, + { APMOVZXWQ, ymrxr, Pe, Pm38,0x34 }, + { APMULLL, ymrxr, Pe, Pm38,0x40 }, + { APMULLQ, ymrxr, Pe, Pm38,0x28 }, + { APMULHRSW, ymrxr, Pe, Pm38,0x0b }, { APMULHRW, ymfp, Px, 0xb7 }, { APMULHUW, ymm, Py, 0xe4,Pe,0xe4 }, { APMULHW, ymm, Py, 0xe5,Pe,0xe5 }, @@ -880,17 +959,21 @@ { APOPW, ypopl, Pe, 0x58,0x8f,(00) }, { APOR, ymm, Py, 0xeb,Pe,0xeb }, { APSADBW, yxm, Pq, 0xf6 }, + { APSHUFB, ymrxr, Pe, Pm38,0x00 }, { APSHUFHW, yxshuf, Pf3, 0x70 }, { APSHUFL, yxshuf, Pq, 0x70 }, { APSHUFLW, yxshuf, Pf2, 0x70 }, { APSHUFW, ymshuf, Pm, 0x70 }, - { APSLLO, ypsdq, Pq, 0x73,(07) }, + { APSIGNB, ymrxr, Pe, Pm38,0x08 }, + { APSIGNL, ymrxr, Pe, Pm38,0x0a }, + { APSIGNW, ymrxr, Pe, Pm38,0x09 }, + { APSLLDQ, ypsdq, Pq, 0x73,(07) }, { APSLLL, yps, Py, 0xf2, 0x72,(06), Pe,0xf2, Pe,0x72,(06) }, { APSLLQ, yps, Py, 0xf3, 0x73,(06), Pe,0xf3, Pe,0x7e,(06) }, { APSLLW, yps, Py, 0xf1, 0x71,(06), Pe,0xf1, Pe,0x71,(06) }, { APSRAL, yps, Py, 0xe2, 0x72,(04), Pe,0xe2, Pe,0x72,(04) }, { APSRAW, yps, Py, 0xe1, 0x71,(04), Pe,0xe1, Pe,0x71,(04) }, - { APSRLO, ypsdq, Pq, 0x73,(03) }, + { APSRLDQ, ypsdq, Pq, 0x73,(03) }, { APSRLL, yps, Py, 0xd2, 0x72,(02), Pe,0xd2, Pe,0x72,(02) }, { APSRLQ, yps, Py, 0xd3, 0x73,(02), Pe,0xd3, Pe,0x73,(02) }, { APSRLW, yps, Py, 0xd1, 0x71,(02), Pe,0xe1, Pe,0x71,(02) }, @@ -903,6 +986,7 @@ { APSUBUSW, yxm, Pe, 0xd9 }, { APSUBW, yxm, Pe, 0xf9 }, { APSWAPL, ymfp, Px, 0xbb }, + { APTEST, ymrxr, Pe, Pm38,0x17 }, { APUNPCKHBW, ymm, Py, 0x68,Pe,0x68 }, { APUNPCKHLQ, ymm, Py, 0x6a,Pe,0x6a }, { APUNPCKHQDQ, yxm, Pe, 0x6d }, @@ -925,8 +1009,8 @@ { ARCLL, yshl, Px, 0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02) }, { ARCLQ, yshl, Pw, 0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02) }, { ARCLW, yshl, Pe, 0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02) }, - { ARCPPS, yxm, Pm, 0x53 }, - { ARCPSS, yxm, Pf3, 0x53 }, + { ARCPPS, yxm, Pm|P2, 0x53 }, + { ARCPSS, yxm, Pf3|P2, 0x53 }, { ARCRB, yshb, Pb, 0xd0,(03),0xc0,(03),0xd2,(03) }, { ARCRL, yshl, Px, 0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03) }, { ARCRQ, yshl, Pw, 0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03) }, @@ -945,8 +1029,8 @@ { ARORL, yshl, Px, 0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01) }, { ARORQ, yshl, Pw, 0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01) }, { ARORW, yshl, Pe, 0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01) }, - { ARSQRTPS, yxm, Pm, 0x52 }, - { ARSQRTSS, yxm, Pf3, 0x52 }, + { ARSQRTPS, yxm, Pm|P2, 0x52 }, + { ARSQRTSS, yxm, Pf3|P2, 0x52 }, { ASAHF, ynone, Px, 0x86,0xe0,0x50,0x9d }, /* XCHGB AH,AL; PUSH AX; POPFL */ { ASALB, yshb, Pb, 0xd0,(04),0xc0,(04),0xd2,(04) }, { ASALL, yshl, Px, 0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04) }, @@ -990,10 +1074,10 @@ { ASHRW, yshl, Pe, 0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05) }, { ASHUFPD, yxshuf, Pq, 0xc6 }, { ASHUFPS, yxshuf, Pm, 0xc6 }, - { ASQRTPD, yxm, Pe, 0x51 }, - { ASQRTPS, yxm, Pm, 0x51 }, - { ASQRTSD, yxm, Pf2, 0x51 }, - { ASQRTSS, yxm, Pf3, 0x51 }, + { ASQRTPD, yxm, Pe|P2, 0x51 }, + { ASQRTPS, yxm, Pm|P2, 0x51 }, + { ASQRTSD, yxm, Pf2|P2, 0x51 }, + { ASQRTSS, yxm, Pf3|P2, 0x51 }, { ASTC, ynone, Px, 0xf9 }, { ASTD, ynone, Px, 0xfd }, { ASTI, ynone, Px, 0xfb }, @@ -1150,6 +1234,7 @@ { ACMPXCHGL, yrl_ml, Px, 0x0f,0xb1 }, { ACMPXCHGW, yrl_ml, Pe, 0x0f,0xb1 }, { ACMPXCHGQ, yrl_ml, Pw, 0x0f,0xb1 }, + { ACMPXCHG16B, yscond, Pw, 0x0f,0xc7,(01) }, { ACMPXCHG8B, yscond, Pm, 0xc7,(01) }, { AINVD, ynone, Pm, 0x08 }, { AINVLPG, ymbs, Pm, 0x01,(07) }, @@ -1171,15 +1256,31 @@ { AXADDQ, yrl_ml, Pw, 0x0f,0xc1 }, { AXADDW, yrl_ml, Pe, 0x0f,0xc1 }, + { APALIGNR, yxshuf2, Pe, Pm3a,0x0f }, + + { AXSAVE, ysvrs, Pm, 0xae,(04),0xae,(04) }, + { AXSAVEOPT, ysvrs, Pm, 0xae,(06),0xae,(06) }, + { AXRSTOR, ysvrs, Pm, 0xae,(05),0xae,(05) }, + + { ACRC32L, ycrc, Px, Pf2,0x38,0xf1 }, + { ACRC32W, ycrc, Pe, Pf2,0x38,0xf1 }, + { ACRC32B, ycrcb, Pb, Pf2,0x38,0xf0 }, + { ACRC32Q, ycrc, Pw, Pf2,0x38,0xf1 }, + { APAUSE, ynone, Px, 0xf3,0x90 }, + + { AAESIMC, yxm, Pe, Pm38, 0xdb }, + { AAESENC, yxm, Pe, Pm38, 0xdc }, + { AAESENCLAST, yxm, Pe, Pm38, 0xdd }, + { AAESDEC, yxm, Pe, Pm38, 0xde }, + { AAESDECLAST, yxm, Pe, Pm38, 0xdf }, + { AAESKEYGENASSIST, yaes, Pe, Pm3a, 0xdf }, + { APCLMULQDQ, yaes, Pe, Pm3a, 0x44 }, + + { ABLENDPS, yaes, Pe, Pm3a, 0xdc }, + { ABLENDPD, yaes, Pe, Pm3a, 0x0d }, + { AEND }, 0 }; Optab* opindex[ALAST+1]; - -/* -AMOVD 0f 6e/r mmx,reg/mem32[mem64-rex?] -AMOVD 0f 7e/r reg/mem32[64],mmx STORE -AMOVQ 0f 6f/r mmx1,mmx2/mem64 -AMOVQ 0f 7f/r mmx1/mem64,mmx2 -*/ diff -Nru /n/sources/plan9/sys/src/cmd/6l/pass.c /sys/src/cmd/6l/pass.c --- /n/sources/plan9/sys/src/cmd/6l/pass.c Mon Dec 8 12:52:09 2008 +++ /sys/src/cmd/6l/pass.c Wed Nov 11 00:00:00 2015 @@ -99,8 +99,10 @@ s->value = bsssize + datsize; bsssize += t; } + xdefine("bdata", SDATA, 0L); xdefine("edata", SBSS, datsize); xdefine("end", SBSS, bsssize + datsize); + /* etext is defined in span.c */ } Prog* @@ -157,7 +159,7 @@ if(q == lastp) break; a = q->as; - if(a == ANOP) { + if(a == ANOP || a == ATEXT) { i--; continue; } @@ -229,11 +231,19 @@ if(a == AJMP || a == ARET || a == AIRETL || a == AIRETQ || a == AIRETW || a == ARETFL || a == ARETFQ || a == ARETFW) return; + if(a == ATEXT) { + xfol(p->link); + q = p->pcond; + if(q == P || q->mark) + return; + p = q; + goto loop; + } if(p->pcond != P) if(a != ACALL) { q = brchain(p->link); if(q != P && q->mark) - if(a != ALOOP) { + if(a != ALOOP && a != ATEXT) { p->as = relinv(a); p->link = p->pcond; p->pcond = q; @@ -326,7 +336,8 @@ Bprint(&bso, "%s calls %s\n", TNAME, s->name); switch(s->type) { default: - diag("undefined: %s in %s", s->name, TNAME); + /* diag prints TNAME first */ + diag("undefined: %s", s->name); s->type = STEXT; s->value = vexit; break; /* or fall through to set offset? */ @@ -656,7 +667,8 @@ if(s->value != 0) diag("value != 0 on SXREF"); undefsym(s); - Bprint(&bso, "IMPORT: %s sig=%lux v=%lld\n", s->name, s->sig, s->value); + if(debug['X']) + Bprint(&bso, "IMPORT: %s sig=%lux v=%lld\n", s->name, s->sig, s->value); if(debug['S']) s->sig = 0; } @@ -701,14 +713,14 @@ n = 0; for(i = 0; i < NHASH; i++) for(s = hash[i]; s != S; s = s->link) - if(s->sig != 0 && s->type != SXREF && s->type != SUNDEF && (nexports == 0 || s->subtype == SEXPORT)) + if(s->type != SXREF && s->type != SUNDEF && (nexports == 0 && s->sig != 0 || s->subtype == SEXPORT || allexport)) n++; esyms = malloc(n*sizeof(Sym*)); ne = n; n = 0; for(i = 0; i < NHASH; i++) for(s = hash[i]; s != S; s = s->link) - if(s->sig != 0 && s->type != SXREF && s->type != SUNDEF && (nexports == 0 || s->subtype == SEXPORT)) + if(s->type != SXREF && s->type != SUNDEF && (nexports == 0 && s->sig != 0 || s->subtype == SEXPORT || allexport)) esyms[n++] = s; for(i = 0; i < ne-1; i++) for(j = i+1; j < ne; j++) diff -Nru /n/sources/plan9/sys/src/cmd/6l/span.c /sys/src/cmd/6l/span.c --- /n/sources/plan9/sys/src/cmd/6l/span.c Thu Mar 28 17:51:09 2013 +++ /sys/src/cmd/6l/span.c Wed Nov 11 00:00:00 2015 @@ -1,6 +1,7 @@ #include "l.h" static int rexflag; +static int vexbytes; static int asmode; void @@ -321,12 +322,30 @@ } int +prefixof(Adr *a) +{ + switch(a->type) { + case D_INDIR+D_CS: + return 0x2e; + case D_INDIR+D_DS: + return 0x3e; + case D_INDIR+D_ES: + return 0x26; + case D_INDIR+D_FS: + return 0x64; + case D_INDIR+D_GS: + return 0x65; + } + return 0; +} + +int oclass(Adr *a) { vlong v; long l; - if(a->type >= D_INDIR || a->index != D_NONE) { + if(a->type < D_CONST2 && (a->type >= D_INDIR || !isxyreg(a->type) && a->index != D_NONE)) { if(a->index != D_NONE && a->scale == 0) { if(a->type == D_ADDR) { switch(a->index) { @@ -401,6 +420,7 @@ case D_DI: return Yrl; +/* case D_F0+0: return Yf0; @@ -412,6 +432,7 @@ case D_F0+6: case D_F0+7: return Yrf; +*/ case D_M0+0: case D_M0+1: @@ -441,6 +462,24 @@ case D_X0+15: return Yxr; + case D_Y0+0: + case D_Y0+1: + case D_Y0+2: + case D_Y0+3: + case D_Y0+4: + case D_Y0+5: + case D_Y0+6: + case D_Y0+7: + case D_Y0+8: + case D_Y0+9: + case D_Y0+10: + case D_Y0+11: + case D_Y0+12: + case D_Y0+13: + case D_Y0+14: + case D_Y0+15: + return Yyr; + case D_NONE: return Ynone; @@ -674,11 +713,14 @@ int t; Adr aa; + if(r == -1) + diag("asmandsz: immedate instead of register"); + rex &= (0x40 | Rxr); v = a->offset; t = a->type; - if(a->index != D_NONE) { - if(t >= D_INDIR) { + if(a->index != D_NONE && !isxyreg(t)) { + if(t >= D_INDIR && t < D_CONST2) { t -= D_INDIR; rexflag |= (regrex[a->index] & Rxx) | (regrex[t] & Rxb) | rex; if(t == D_NONE) { @@ -721,17 +763,26 @@ asmandsz(&aa, r, rex, m64); return; } - if(t >= D_AL && t <= D_X0+15) { + if(t >= D_AL && t <= D_BH) { if(v) goto bad; *andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3); rexflag |= (regrex[t] & (0x40 | Rxb)) | rex; return; } - if(t >= D_INDIR) { + if(t >= D_X0 && t <= D_X15 || t >= D_Y0 && t <= D_Y15) { + if(v) + goto bad; + *andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3); + if(t >= D_Y0 && t <= D_Y15) + vexbytes |= Vexl; + rexflag |= (regrex[t] & (0x40 | Rxb)) | rex; + return; + } + if(t >= D_INDIR && t < D_CONST2) { t -= D_INDIR; rexflag |= (regrex[t] & Rxb) | rex; - if(t == D_NONE) { + if(t == D_NONE || D_CS <= t && t <= D_GS) { if(asmode != 64){ *andptr++ = (0 << 6) | (5 << 0) | (r << 3); put4(v); @@ -799,6 +850,29 @@ return; } +int +isxyreg(int t) +{ + return t >= D_X0 && t <= D_X15 || t >= D_Y0 && t <= D_Y15; +} + +static void +vexreg(Adr *a) +{ + int t; + + t = a->type; + if(t >= D_Y0 && t <= D_Y15) { + vexbytes |= Vexl; + } else if(t >= D_X0 && t <= D_X15) { + if(vexed) + vexbytes |= Vexr; /* force vex prefix */ + } else + return; + if(a->index != D_NONE) + vexbytes |= a->index << 8; +} + void asmand(Adr *a, Adr *ra) { @@ -806,9 +880,57 @@ } void +asmandg(Adr *a, Adr *r, int o, int rdest, int prefix) +{ + Adr aa, rr; + + if(isxyreg(a->type)) { + if(isxyreg(a->index) && r->type == D_CONST) { + /* + * convert sse instructions with immediate like + * AESKEYGENASSIST $32, X1, X2 from + * a=X1(X2*0); r=$32 to a=X1, r=X2. the + * caller adds the immediate byte. vex is not required + */ + rr.offset = 0; + rr.sym = a->sym; + rr.type = a->index; + rr.index = D_NONE; + rr.scale = 0; + r = &rr; + + aa = *a; + aa.index = D_NONE; + a = &aa; + } + } + vexreg(a); + if(isxyreg(a->type)) { + if(a->index != D_NONE) { + aa = *a; + aa.index = D_NONE; + a = &aa; + } + } + if(r == nil) { + asmandsz(a, o, 0, 0); + return; + } + vexreg(r); + if(rdest && (prefix&P2) == 0 && vexbytes != 0 && (vexbytes>>8) == 0) { + /* copy destination register as second source register */ + if(isxyreg(r->type)) { + vexbytes |= r->type << 8; + rexflag |= regrex[r->type] & Rxx; + } + } + asmand(a, r); +} + +void asmando(Adr *a, int o) { - asmandsz(a, o, 0, 0); + asmandg(a, nil, o, 0, 0); } static void @@ -996,6 +1118,13 @@ mediaop(Optab *o, int op, int osize, int z) { switch(op){ + case Pm38: + case Pm3a: + *andptr++ = Pm; /* 0f */ + *andptr++ = op; /* 38 | 3a */ + op = o->op[++z]; + break; + case Pm: case Pe: case Pf2: @@ -1005,6 +1134,10 @@ *andptr++ = op; *andptr++ = Pm; op = o->op[++z]; + if(op == Pm38 || op == Pm3a) { + *andptr++ = op; + op = o->op[++z]; + } break; } default: @@ -1023,8 +1156,16 @@ Prog *q, pp; uchar *t; Movtab *mo; - int z, op, ft, tt, xo, l; + int z, op, ft, tt, xo, l, pre; vlong v; + Adr vmi; + + pre = prefixof(&p->from); + if(pre) + *andptr++ = pre; + pre = prefixof(&p->to); + if(pre) + *andptr++ = pre; o = opindex[p->as]; if(o == nil) { @@ -1038,7 +1179,7 @@ diag("asmins: noproto %P", p); return; } - xo = o->op[0] == 0x0f; + xo = o->op[0] == Pm; for(z=0; *t; z+=t[3]+xo,t+=4) if(ycover[ft+t[0]]) if(ycover[tt+t[1]]) @@ -1046,7 +1187,7 @@ goto domov; found: - switch(o->prefix) { + switch(o->prefix & 0xFF) { case Pq: /* 16 bit escape and opcode escape */ *andptr++ = Pe; *andptr++ = Pm; @@ -1054,7 +1195,7 @@ case Pf2: /* xmm opcode escape */ case Pf3: - *andptr++ = o->prefix; + *andptr++ = o->prefix & 0xFF; *andptr++ = Pm; break; @@ -1111,36 +1252,36 @@ /* fall through */ case Zm_r: *andptr++ = op; - asmand(&p->from, &p->to); + asmandg(&p->from, &p->to, 0, 1, o->prefix); break; case Zm_r_xm: mediaop(o, op, t[3], z); - asmand(&p->from, &p->to); + asmandg(&p->from, &p->to, 0, 1, o->prefix); break; case Zm_r_xm_nr: rexflag = 0; mediaop(o, op, t[3], z); - asmand(&p->from, &p->to); + asmandg(&p->from, &p->to, 0, 1, o->prefix); break; case Zm_r_i_xm: mediaop(o, op, t[3], z); - asmand(&p->from, &p->to); + asmandg(&p->from, &p->to, 0, 1, o->prefix); *andptr++ = p->to.offset; break; case Zm_r_3d: *andptr++ = 0x0f; *andptr++ = 0x0f; - asmand(&p->from, &p->to); + asmandg(&p->from, &p->to, 0, 1, o->prefix); *andptr++ = op; break; case Zibm_r: *andptr++ = op; - asmand(&p->from, &p->to); + asmandg(&p->from, &p->to, 0, 1, o->prefix); *andptr++ = p->to.offset; break; @@ -1167,18 +1308,18 @@ case Zr_m_xm: mediaop(o, op, t[3], z); - asmand(&p->to, &p->from); + asmandg(&p->to, &p->from, 0, 0, o->prefix); break; case Zr_m_xm_nr: rexflag = 0; mediaop(o, op, t[3], z); - asmand(&p->to, &p->from); + asmandg(&p->to, &p->from, 0, 0, o->prefix); break; case Zr_m_i_xm: mediaop(o, op, t[3], z); - asmand(&p->to, &p->from); + asmandg(&p->to, &p->from, 0, 0, o->prefix); *andptr++ = p->from.offset; break; @@ -1206,8 +1347,13 @@ break; case Zibo_m_xm: + vmi = p->to; + if(p->to.index != D_NONE) { /* VMI has "non-destructive dest" with dest in Vex.vvvv */ + vmi.type = p->to.index; + vmi.index = p->to.type; + } z = mediaop(o, op, t[3], z); - asmando(&p->to, o->op[z+1]); + asmando(&vmi, o->op[z+1]); *andptr++ = v; break; @@ -1472,6 +1618,10 @@ return; } } + if(0) { + int ft = oclass(&p->from), tt = oclass(&p->to); extern char* yclname[]; + fprint(2, "ft=%d [%s] tt=%d [%s]\n", ft, yclname[ft], tt, yclname[tt]); + } diag("doasm: notfound from=%ux to=%ux %P", p->from.type, p->to.type, p); return; @@ -1566,37 +1716,123 @@ break; } break; + + case 7: /* imul rm,r */ + *andptr++ = t[4]; + *andptr++ = t[5]; + asmand(&p->from, &p->to); + break; } } void asmins(Prog *p) { - int n, np, c; + int n, np, o, c, t, v1, v2, vexlen; + vexbytes = 0; rexflag = 0; andptr = and; asmode = p->mode; doasm(p); - if(rexflag){ - /* - * as befits the whole approach of the architecture, - * the rex prefix must appear before the first opcode byte - * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but - * before the 0f opcode escape!), or it might be ignored. - * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'. - */ - if(p->mode != 64) - diag("asmins: illegal in mode %d: %P", p->mode, p); - n = andptr - and; - for(np = 0; np < n; np++) { - c = and[np]; - if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26) + if(vexbytes == 0) { + if(rexflag) { + if(0) fprint(2, "rexflag=%#ux %P\n", rexflag, p); + /* + * the rex prefix must appear before the first opcode byte + * and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but + * before the 0f opcode escape. + * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'. + */ + if(p->mode != 64) + diag("asmins: illegal in mode %d: %P", p->mode, p); + n = andptr - and; + for(np = 0; np < n; np++) { + c = and[np]; + if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26) + break; + } + memmove(and+np+1, and+np, n-np); + and[np] = 0x40 | rexflag; + andptr++; + } + return; + } + if(0) if(rexflag||vexbytes)fprint(2, "rexflag=%#ux vexbytes=%#ux %P\n", rexflag, vexbytes, p); + n = andptr - and; +//vex if need vvvv register or W or L. never need R X B (must be 1 in 32-bit) +//note: 4th register encoding in immediate byte + /* media/sse/vex: seg* (66|F3|F2)? 0F (38|3A)? op -> seg* vex2|vex3 op */ + for(np = 0; np < n; np++) { /* seg* */ + c = and[np]; + if(c != 0x2e && c != 0x3e && c != 0x26 && c != 0x64 && c != 0x65) + break; + } + o = np; + if(np+1 < n) { + v1 = 0; + v2 = (vexbytes & Vexl) | Vexp0; + switch(and[np]) { + case 0x66: + v2 |= Vexp66; + np++; + break; + case 0xF3: + v2 |= Vexpf3; + np++; + break; + case 0xF2: + v2 |= Vexpf2; + np++; + break; + } + c = and[np]; + if(c == Vex2 || c == Vex3) + return; /* already vexed */ + if(and[np] != 0x0F) { + diag("internal: inconsistent vex state: %P", p); + return; + } + np++; + if(np < n) { + switch(and[np]) { + case 0x38: + v1 = Vex0f38; + np++; + break; + case 0x3a: + v1 = Vex0f3a; + np++; + break; + default: + if(rexflag & (Rxw|Rxx|Rxb)) + v1 = Vex0f; /* force 3-byte vex */ break; + } + } + t = vexbytes >> 8; + if(t >= D_Y0 && t <= D_Y15) + t -= D_Y0; + else if(t >= D_X0 && t <= D_X15) + t -= D_X0; + v2 |= (~t & 0xF) << 3; + vexlen = 2; + if(v1 != 0) + vexlen = 3; + if(o+vexlen != np) { + memmove(and+o+vexlen, and+np, n-np); + andptr = and+(o+vexlen)+(n-np); + } + if(vexlen == 2) { + and[o] = Vex2; + and[o+1] = v2 | ((~rexflag<<5) & Vexr); + } else { + and[o] = Vex3; + and[o+1] = v1 | ((~rexflag<<5) & (Vexr | Vexx | Vexb)); + if(rexflag & Rxw) + v2 |= Vexw; + and[o+2] = v2; } - memmove(and+np+1, and+np, n-np); - and[np] = 0x40 | rexflag; - andptr++; } } @@ -1752,3 +1988,74 @@ Bprint(&bso, "export table entries = %d\n", exports); } } + +char* yclname[] ={ + [Yxxx] "Yxxx", + [Ynone] "Ynone", + [Yi0] "Yi0", + [Yi1] "Yi1", + [Yi8] "Yi8", + [Ys32] "Ys32", + [Yi32] "Yi32", + [Yi64] "Yi64", + [Yiauto] "Yiauto", + [Yal] "Yal", + [Ycl] "Ycl", + [Yax] "Yax", + [Ycx] "Ycx", + [Yrb] "Yrb", + [Yrl] "Yrl", + [Yrf] "Yrf", + [Yf0] "Yf0", + [Yrx] "Yrx", + [Ymb] "Ymb", + [Yml] "Yml", + [Ym] "Ym", + [Ybr] "Ybr", + [Ycol] "Ycol", + [Ycs] "Ycs", + [Yss] "Yss", + [Yds] "Yds", + [Yes] "Yes", + [Yfs] "Yfs", + [Ygs] "Ygs", + [Ygdtr] "Ygdtr", + [Yidtr] "Yidtr", + [Yldtr] "Yldtr", + [Ymsw] "Ymsw", + [Ytask] "Ytask", + [Ycr0] "Ycr0", + [Ycr1] "Ycr1", + [Ycr2] "Ycr2", + [Ycr3] "Ycr3", + [Ycr4] "Ycr4", + [Ycr5] "Ycr5", + [Ycr6] "Ycr6", + [Ycr7] "Ycr7", + [Ycr8] "Ycr8", + [Ydr0] "Ydr0", + [Ydr1] "Ydr1", + [Ydr2] "Ydr2", + [Ydr3] "Ydr3", + [Ydr4] "Ydr4", + [Ydr5] "Ydr5", + [Ydr6] "Ydr6", + [Ydr7] "Ydr7", + [Ytr0] "Ytr0", + [Ytr1] "Ytr1", + [Ytr2] "Ytr2", + [Ytr3] "Ytr3", + [Ytr4] "Ytr4", + [Ytr5] "Ytr5", + [Ytr6] "Ytr6", + [Ytr7] "Ytr7", + [Yrl32] "Yrl32", + [Yrl64] "Yrl64", + [Ymr] "Ymr", + [Ymm] "Ymm", + [Yxr] "Yxr", + [Yxm] "Yxm", + [Yyr] "Yyr", + [Yxyr] "Yxyr", + [Ymax] "Ymax", +};