x86: MONITOR/MWAIT are not SSE3 insns

These have their own CPUID bit and hence they should also have their own
separate control.
This commit is contained in:
Jan Beulich
2023-02-24 13:59:35 +01:00
parent c3bb24f566
commit cafa5ef72e
22 changed files with 7480 additions and 7355 deletions

View File

@ -1027,6 +1027,7 @@ static const arch_entry cpu_arch[] =
SUBARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, false),
SUBARCH (avx512bw, AVX512BW, ANY_AVX512BW, false),
SUBARCH (avx512vl, AVX512VL, ANY_AVX512VL, false),
SUBARCH (monitor, MONITOR, MONITOR, false),
SUBARCH (vmx, VMX, ANY_VMX, false),
SUBARCH (vmfunc, VMFUNC, ANY_VMFUNC, false),
SUBARCH (smx, SMX, SMX, false),

View File

@ -152,6 +152,7 @@ accept various extension mnemonics. For example,
@code{avx},
@code{avx2},
@code{lahf_sahf},
@code{monitor},
@code{adx},
@code{rdseed},
@code{prfchw},
@ -1487,7 +1488,7 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{.aes} @tab @samp{.pclmul} @tab @samp{.fma} @tab @samp{.fsgsbase}
@item @samp{.rdrnd} @tab @samp{.f16c} @tab @samp{.avx2} @tab @samp{.bmi2}
@item @samp{.lzcnt} @tab @samp{.popcnt} @tab @samp{.invpcid} @tab @samp{.vmfunc}
@item @samp{.hle} @tab @samp{.rtm} @tab @samp{.tsx}
@item @samp{.monitor} @tab @samp{.hle} @tab @samp{.rtm} @tab @samp{.tsx}
@item @samp{.lahf_sahf} @tab @samp{.adx} @tab @samp{.rdseed} @tab @samp{.prfchw}
@item @samp{.smap} @tab @samp{.mpx} @tab @samp{.sha} @tab @samp{.prefetchwt1}
@item @samp{.clflushopt} @tab @samp{.xsavec} @tab @samp{.xsaves} @tab @samp{.se1}

View File

@ -30,6 +30,7 @@
.*:60: Error: .*
.*:62: Error: .*
.*:64: Error: .*
.*:66: Error: .*
GAS LISTING .*
@ -101,3 +102,5 @@ GAS LISTING .*
[ ]*62[ ]+blsr %ecx,%ebx
[ ]*63[ ]+\# TBM
[ ]*64[ ]+blcfill %ecx,%ebx
[ ]*65[ ]+\# MONITOR
[ ]*66[ ]+monitor

View File

@ -29,6 +29,7 @@
.*:60: Error: .*
.*:62: Error: .*
.*:64: Error: .*
.*:66: Error: .*
GAS LISTING .*
@ -100,3 +101,5 @@ GAS LISTING .*
[ ]*62[ ]+blsr %ecx,%ebx
[ ]*63[ ]+\# TBM
[ ]*64[ ]+blcfill %ecx,%ebx
[ ]*65[ ]+\# MONITOR
[ ]*66[ ]+monitor

View File

@ -22,6 +22,7 @@
.*:60: Error: .*
.*:62: Error: .*
.*:64: Error: .*
.*:66: Error: .*
GAS LISTING .*
@ -96,3 +97,5 @@ GAS LISTING .*
[ ]*62[ ]+blsr %ecx,%ebx
[ ]*63[ ]+\# TBM
[ ]*64[ ]+blcfill %ecx,%ebx
[ ]*65[ ]+\# MONITOR
[ ]*66[ ]+monitor

View File

@ -20,6 +20,7 @@
.*:60: Error: .*
.*:62: Error: .*
.*:64: Error: .*
.*:66: Error: .*
GAS LISTING .*
@ -94,3 +95,5 @@ GAS LISTING .*
[ ]*62[ ]+blsr %ecx,%ebx
[ ]*63[ ]+\# TBM
[ ]*64[ ]+blcfill %ecx,%ebx
[ ]*65[ ]+\# MONITOR
[ ]*66[ ]+monitor

View File

@ -0,0 +1,99 @@
.*: Assembler messages:
.*:4: Error: .*
.*:6: Error: .*
.*:8: Error: .*
.*:10: Error: .*
.*:12: Error: .*
.*:14: Error: .*
.*:16: Error: .*
.*:18: Error: .*
.*:20: Error: .*
.*:22: Error: .*
.*:24: Error: .*
.*:26: Error: .*
.*:28: Error: .*
.*:30: Error: .*
.*:32: Error: .*
.*:34: Error: .*
.*:36: Error: .*
.*:38: Error: .*
.*:40: Error: .*
.*:42: Error: .*
.*:44: Error: .*
.*:46: Error: .*
.*:48: Error: .*
.*:50: Error: .*
.*:52: Error: .*
.*:54: Error: .*
.*:56: Error: .*
.*:58: Error: .*
.*:60: Error: .*
.*:62: Error: .*
.*:64: Error: .*
[ ]*1[ ]+\.include "arch-10\.s"
[ ]*1[ ]+\# Test -march=
[ ]*2[ ]+\.text
[ ]*3[ ]+\# cmov feature *
[ ]*4[ ]+cmove %eax,%ebx
[ ]*5[ ]+\# clflush
[ ]*6[ ]+clflush \(%eax\)
[ ]*7[ ]+\# SYSCALL
[ ]*8[ ]+syscall
[ ]*9[ ]+\# MMX
[ ]*10[ ]+paddb %mm4,%mm3
[ ]*11[ ]+\# SSE
[ ]*12[ ]+addss %xmm4,%xmm3
[ ]*13[ ]+\# SSE2
[ ]*14[ ]+addsd %xmm4,%xmm3
[ ]*15[ ]+\# SSE3
[ ]*16[ ]+addsubpd %xmm4,%xmm3
[ ]*17[ ]+\# SSSE3
[ ]*18[ ]+phaddw %xmm4,%xmm3
[ ]*19[ ]+\# SSE4\.1
[ ]*20[ ]+phminposuw %xmm1,%xmm3
[ ]*21[ ]+\# SSE4\.2
[ ]*22[ ]+crc32 %ecx,%ebx
[ ]*23[ ]+\# AVX
[ ]*24[ ]+vzeroall
[ ]*25[ ]+\# VMX
[ ]*26[ ]+vmxoff
[ ]*27[ ]+\# SMX
[ ]*28[ ]+getsec
[ ]*29[ ]+\# Xsave
[ ]*30[ ]+xgetbv
[ ]*31[ ]+\# Xsaveopt
[ ]*32[ ]+xsaveopt \(%ecx\)
[ ]*33[ ]+\# AES
[ ]*34[ ]+aesenc \(%ecx\),%xmm0
[ ]*35[ ]+\# PCLMUL
[ ]*36[ ]+pclmulqdq \$8,%xmm1,%xmm0
[ ]*37[ ]+\# AES \+ AVX
[ ]*38[ ]+vaesenc \(%ecx\),%xmm0,%xmm2
[ ]*39[ ]+\# PCLMUL \+ AVX
[ ]*40[ ]+vpclmulqdq \$8,%xmm4,%xmm6,%xmm2
[ ]*41[ ]+\# FMA
[ ]*42[ ]+vfmadd132pd %xmm4,%xmm6,%xmm2
[ ]*43[ ]+\# MOVBE
[ ]*44[ ]+movbe \(%ecx\),%ebx
[ ]*45[ ]+\# EPT
[ ]*46[ ]+invept \(%ecx\),%ebx
[ ]*47[ ]+\# RDTSCP
[ ]*48[ ]+rdtscp
[ ]*49[ ]+\# 3DNow or PRFCHW
[ ]*50[ ]+prefetchw 0x1000\(,%esi,2\)
[ ]*51[ ]+\# SSE4a
[ ]*52[ ]+insertq %xmm2,%xmm1
[ ]*53[ ]+\# SVME
[ ]*54[ ]+vmload
[ ]*55[ ]+\# ABM/LZCNT
[ ]*56[ ]+lzcnt %ecx,%ebx
[ ]*57[ ]+\# PadLock
[ ]*58[ ]+xstorerng
[ ]*59[ ]+\# nop
[ ]*60[ ]+nopl \(%eax\)
[ ]*61[ ]+\# BMI
[ ]*62[ ]+blsr %ecx,%ebx
[ ]*63[ ]+\# TBM
[ ]*64[ ]+blcfill %ecx,%ebx
[ ]*65[ ]+\# MONITOR
[ ]*66[ ]+\?\?\?\? 0F01C8 monitor

View File

@ -0,0 +1 @@
.include "arch-10.s"

View File

@ -1,5 +1,5 @@
#source: arch-10.s
#as: -march=i686+nop+avx+vmx+smx+xsave+xsaveopt+aes+pclmul+fma+movbe+ept+clflush+syscall+rdtscp+3dnowa+sse4a+svme+lzcnt+padlock+bmi+tbm
#as: -march=i686+nop+avx+vmx+smx+xsave+xsaveopt+aes+pclmul+fma+movbe+ept+clflush+syscall+monitor+rdtscp+3dnowa+sse4a+svme+lzcnt+padlock+bmi+tbm
#objdump: -dw
#name: i386 arch 10 (lzcnt)
#dump: arch-10.d

View File

@ -1,5 +1,5 @@
#source: arch-10.s
#as: -march=i686+mmx+nop+avx+vmx+smx+xsave+xsaveopt+aes+pclmul+fma+movbe+ept+clflush+syscall+rdtscp+sse4a+svme+lzcnt+padlock+bmi+tbm+prfchw
#as: -march=i686+mmx+nop+avx+vmx+smx+xsave+xsaveopt+aes+pclmul+fma+movbe+ept+clflush+syscall+monitor+rdtscp+sse4a+svme+lzcnt+padlock+bmi+tbm+prfchw
#objdump: -dw
#name: i386 arch 10 (prefetchw)
#dump: arch-10.d

View File

@ -1,4 +1,4 @@
#as: -march=i686+mmx+avx+vmx+smx+xsave+xsaveopt+aes+pclmul+fma+movbe+ept+clflush+nop+syscall+rdtscp+3dnowa+sse4a+svme+abm+padlock+bmi+tbm
#as: -march=i686+mmx+avx+vmx+smx+xsave+xsaveopt+aes+pclmul+fma+movbe+ept+clflush+nop+syscall+monitor+rdtscp+3dnowa+sse4a+svme+abm+padlock+bmi+tbm
#objdump: -dw
#name: i386 arch 10
@ -38,4 +38,5 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 0f 1f 00 nopl \(%eax\)
[ ]*[a-f0-9]+: c4 e2 60 f3 c9 blsr %ecx,%ebx
[ ]*[a-f0-9]+: 8f e9 60 01 c9 blcfill %ecx,%ebx
[ ]*[a-f0-9]+: 0f 01 c8 monitor( .*)
#pass

View File

@ -62,3 +62,5 @@ nopl (%eax)
blsr %ecx,%ebx
# TBM
blcfill %ecx,%ebx
# MONITOR
monitor

View File

@ -206,6 +206,7 @@ if [gas_32_check] then {
run_list_test "arch-10-3" "-march=i686+mmx+sse4.2 -I${srcdir}/$subdir -al"
run_list_test "arch-10-4" "-march=i686+mmx+sse4+vmx+smx -I${srcdir}/$subdir -al"
run_list_test "arch-10-5" "-march=generic32+i686 -al"
run_list_test "arch-10-6" "-march=generic32+monitor -I${srcdir}/$subdir -aln"
run_dump_test "arch-11"
run_dump_test "arch-12"
run_dump_test "arch-13"

View File

@ -5,3 +5,4 @@ GAS LISTING .*
[ ]*1[ ]+\# Test -march=\+nosse
[ ]*2[ ]+\.text
[ ]*3[ ]+lfence
[ ]*4[ ]+\?\?\?\? 0F01C8 monitor

View File

@ -1,3 +1,4 @@
# Test -march=+nosse
.text
lfence
monitor

View File

@ -2,6 +2,7 @@
.*:6: Error: .*generic.*
.*:9: Error: .*\.sse.*
.*:12: Error: .*\.sse2.*
.*:14: Error: .*\.sse3.*
.*:15: Error: .*\.sse3.*
.*:18: Error: .*\.ssse3.*
.*:21: Error: .*\.sse4\.1.*
@ -9,10 +10,9 @@
.*:32: Error: .*\.nosse4\.2.*
.*:35: Error: .*\.nosse4\.1.*
.*:38: Error: .*\.nossse3.*
.*:43: Error: .*\.nosse3.*
.*:45: Error: .*\.nommx.*
.*:47: Error: .*\.nosse2.*
.*:50: Error: .*\.nosse.*
.*:43: Error: .*\.nommx.*
.*:45: Error: .*\.nosse2.*
.*:48: Error: .*\.nosse.*
GAS LISTING .*
#...
[ ]*1[ ]+\# Test \.arch \[\.sseX|\.nosseX\]
@ -28,7 +28,7 @@ GAS LISTING .*
[ ]*11[ ]+\?\?\?\? 0FAEE8 lfence
[ ]*12[ ]+mwait
[ ]*13[ ]+\.arch \.sse3
[ ]*14[ ]+\?\?\?\? 0F01C9 mwait
[ ]*14[ ]+mwait
[ ]*15[ ]+pabsd %xmm0, %xmm0
[ ]*16[ ]+\.arch \.ssse3
[ ]*17[ ]+\?\?\?\? 660F381E pabsd %xmm0, %xmm0
@ -60,21 +60,15 @@ GAS LISTING .*
[ ]*36[ ]+C0
[ ]*37[ ]+\.arch \.nossse3
[ ]*38[ ]+pabsd %xmm0, %xmm0
[ ]*39[ ]+\?\?\?\? 0F01C9 mwait
[ ]*40[ ]+\?\?\?\? 0F77 emms
[ ]*41[ ]+\.arch \.nommx
[ ]*42[ ]+\.arch \.nosse3
[ ]*43[ ]+mwait
[ ]*44[ ]+\?\?\?\? 0FAEE8 lfence
[ ]*45[ ]+emms
[ ]*46[ ]+\.arch \.nosse2
[ ]*47[ ]+lfence
[ ]*48[ ]+\?\?\?\? 0F58C0 addps %xmm0, %xmm0
[ ]*49[ ]+\.arch \.nosse
[ ]*50[ ]+addps %xmm0, %xmm0
GAS LISTING .*
[ ]*51[ ]+\?\?\?\? 8DB42600 \.p2align 4
[ ]*51[ ]+000000
[ ]*39[ ]+\?\?\?\? 0F77 emms
[ ]*40[ ]+\.arch \.nommx
[ ]*41[ ]+\.arch \.nosse3
[ ]*42[ ]+\?\?\?\? 0FAEE8 lfence
[ ]*43[ ]+emms
[ ]*44[ ]+\.arch \.nosse2
[ ]*45[ ]+lfence
[ ]*46[ ]+\?\?\?\? 0F58C0 addps %xmm0, %xmm0
[ ]*47[ ]+\.arch \.nosse
[ ]*48[ ]+addps %xmm0, %xmm0
[ ]*49[ ]+\?\?\?\? .* \.p2align 4
#pass

View File

@ -36,11 +36,9 @@
pabsd %xmm0, %xmm0
.arch .nossse3
pabsd %xmm0, %xmm0
mwait
emms
.arch .nommx
.arch .nosse3
mwait
lfence
emms
.arch .nosse2

View File

@ -61,9 +61,9 @@ static const dependency isa_dependencies[] =
{ "P4",
"P3|Clflush|SSE2" },
{ "NOCONA",
"GENERIC64|FISTTP|SSE3|CX16" },
"GENERIC64|FISTTP|SSE3|MONITOR|CX16" },
{ "CORE",
"P4|FISTTP|SSE3|CX16" },
"P4|FISTTP|SSE3|MONITOR|CX16" },
{ "CORE2",
"NOCONA|SSSE3" },
{ "COREI7",
@ -77,9 +77,9 @@ static const dependency isa_dependencies[] =
{ "K8",
"ATHLON|Rdtscp|SSE2|LM" },
{ "AMDFAM10",
"K8|FISTTP|SSE4A|ABM" },
"K8|FISTTP|SSE4A|ABM|MONITOR" },
{ "BDVER1",
"GENERIC64|FISTTP|Rdtscp|CX16|LAHF_SAHF|XOP|ABM|LWP|SVME|AES|PCLMUL|PRFCHW" },
"GENERIC64|FISTTP|Rdtscp|MONITOR|CX16|LAHF_SAHF|XOP|ABM|LWP|SVME|AES|PCLMUL|PRFCHW" },
{ "BDVER2",
"BDVER1|FMA|BMI|TBM|F16C" },
{ "BDVER3",
@ -87,7 +87,7 @@ static const dependency isa_dependencies[] =
{ "BDVER4",
"BDVER3|AVX2|Movbe|BMI2|RdRnd|MWAITX" },
{ "ZNVER1",
"GENERIC64|FISTTP|Rdtscp|CX16|LAHF_SAHF|AVX2|SSE4A|ABM|SVME|AES|PCLMUL|PRFCHW|FMA|BMI|F16C|Xsaveopt|FSGSBase|Movbe|BMI2|RdRnd|ADX|RdSeed|SMAP|SHA|XSAVEC|XSAVES|ClflushOpt|CLZERO|MWAITX" },
"GENERIC64|FISTTP|Rdtscp|MONITOR|CX16|LAHF_SAHF|AVX2|SSE4A|ABM|SVME|AES|PCLMUL|PRFCHW|FMA|BMI|F16C|Xsaveopt|FSGSBase|Movbe|BMI2|RdRnd|ADX|RdSeed|SMAP|SHA|XSAVEC|XSAVES|ClflushOpt|CLZERO|MWAITX" },
{ "ZNVER2",
"ZNVER1|CLWB|RDPID|RDPRU|MCOMMIT|WBNOINVD" },
{ "ZNVER3",
@ -95,7 +95,7 @@ static const dependency isa_dependencies[] =
{ "ZNVER4",
"ZNVER3|AVX512F|AVX512DQ|AVX512IFMA|AVX512CD|AVX512BW|AVX512VL|AVX512_BF16|AVX512VBMI|AVX512_VBMI2|AVX512_VNNI|AVX512_BITALG|AVX512_VPOPCNTDQ|GFNI|RMPQUERY" },
{ "BTVER1",
"GENERIC64|FISTTP|CX16|LAHF_SAHF|Rdtscp|SSSE3|SSE4A|ABM|PRFCHW|Clflush|FISTTP|SVME" },
"GENERIC64|FISTTP|MONITOR|CX16|LAHF_SAHF|Rdtscp|SSSE3|SSE4A|ABM|PRFCHW|Clflush|FISTTP|SVME" },
{ "BTVER2",
"BTVER1|AVX|BMI|F16C|AES|PCLMUL|Movbe|Xsaveopt|PRFCHW" },
{ "286",
@ -322,6 +322,7 @@ static bitfield cpu_flags[] =
BITFIELD (BMI2),
BITFIELD (LZCNT),
BITFIELD (POPCNT),
BITFIELD (MONITOR),
BITFIELD (HLE),
BITFIELD (RTM),
BITFIELD (INVPCID),

File diff suppressed because it is too large Load Diff

View File

@ -88,6 +88,8 @@ enum
CpuLZCNT,
/* POPCNT support required */
CpuPOPCNT,
/* MONITOR support required */
CpuMONITOR,
/* SSE4.1 support required */
CpuSSE4_1,
/* SSE4.2 support required */
@ -350,6 +352,7 @@ typedef union i386_cpu_flags
unsigned int cpusse4a:1;
unsigned int cpulzcnt:1;
unsigned int cpupopcnt:1;
unsigned int cpumonitor:1;
unsigned int cpusse4_1:1;
unsigned int cpusse4_2:1;
unsigned int cpuavx:1;

View File

@ -1270,17 +1270,17 @@ cmpxchg16b, 0xfc7/1, CX16|x64, Modrm|NoSuf|Size64|LockPrefixOk, { Oword|Unspecif
// MONITOR instructions.
monitor, 0xf01c8, SSE3, NoSuf, {}
monitor, 0xf01c8, MONITOR, NoSuf, {}
// monitor is very special. CX and DX are always 32 bits. The
// address size override prefix can be used to overrride the AX size in
// all modes.
monitor, 0xf01c8, SSE3, AddrPrefixOpReg|NoSuf, { Acc|Word|Dword|Qword, RegC|Dword, RegD|Dword }
monitor, 0xf01c8, MONITOR, AddrPrefixOpReg|NoSuf, { Acc|Word|Dword|Qword, RegC|Dword, RegD|Dword }
// The 64-bit form exists only for compatibility with older gas.
monitor, 0xf01c8, SSE3|x64, AddrPrefixOpReg|NoSuf, { Acc|Dword|Qword, RegC|Qword, RegD|Qword }
mwait, 0xf01c9, SSE3, NoSuf, {}
monitor, 0xf01c8, MONITOR|x64, AddrPrefixOpReg|NoSuf, { Acc|Dword|Qword, RegC|Qword, RegD|Qword }
mwait, 0xf01c9, MONITOR, NoSuf, {}
// mwait is very special. AX and CX are always 32 bits.
// The 64-bit form exists only for compatibility with older gas.
mwait, 0xf01c9, SSE3, CheckOperandSize|IgnoreSize|NoSuf|NoRex64, { Acc|Dword|Qword, RegC|Dword|Qword }
mwait, 0xf01c9, MONITOR, CheckOperandSize|IgnoreSize|NoSuf|NoRex64, { Acc|Dword|Qword, RegC|Dword|Qword }
// VMX instructions.

File diff suppressed because it is too large Load Diff