x86-64: honor REX prefixes for SSE2AVX

Legacy encoded insns do so, and their automatic conversion to AVX ones
ought to produce functionally identical code. Therefore explicit REX
prefixes cannot simply be ignored. This is in particular relevant
because at least PCMPESTR{I,M}'s 64-bit forms couldn't be expressed in
older gas by other than using a REX64 prefix.
This commit is contained in:
Jan Beulich
2020-06-25 09:25:52 +02:00
parent 40d231b4fb
commit a5aeccd9d3
4 changed files with 112 additions and 29 deletions

View File

@ -1,3 +1,13 @@
2020-06-25 Jan Beulich <jbeulich@suse.com>
* config/tc-i386.c (process_operands): Translate explicit REX
prefix into i.rex for SSE2AVX templates.
(set_rex_vrex): New helper.
(build_modrm_byte): Use it.
* testsuite/gas/i386/x86-64-sse2avx.s: Add cases with explict
REX prefixes.
* testsuite/gas/i386/x86-64-sse2avx.d: Adjust expectations.
2020-06-25 Jan Beulich <jbeulich@suse.com>
* config/tc-i386.c (cpu_flags_match): Only match SSE2AVX

View File

@ -7408,6 +7408,15 @@ process_operands (void)
unnecessary segment overrides. */
const seg_entry *default_seg = 0;
if (i.tm.opcode_modifier.sse2avx)
{
/* Legacy encoded insns allow explicit REX prefixes, so these prefixes
need converting. */
i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
i.prefix[REX_PREFIX] = 0;
i.rex_encoding = 0;
}
if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
{
unsigned int dupl = i.operands;
@ -7645,6 +7654,25 @@ process_operands (void)
return 1;
}
static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
bfd_boolean do_sse2avx)
{
if (r->reg_flags & RegRex)
{
if (i.rex & rex_bit)
as_bad (_("same type of prefix used twice"));
i.rex |= rex_bit;
}
else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
{
gas_assert (i.vex.register_specifier == r);
i.vex.register_specifier += 8;
}
if (r->reg_flags & RegVRex)
i.vrex |= rex_bit;
}
static const seg_entry *
build_modrm_byte (void)
{
@ -7875,27 +7903,15 @@ build_modrm_byte (void)
else
i.has_regxmm = TRUE;
}
if ((i.op[dest].regs->reg_flags & RegRex) != 0)
i.rex |= REX_R;
if ((i.op[dest].regs->reg_flags & RegVRex) != 0)
i.vrex |= REX_R;
if ((i.op[source].regs->reg_flags & RegRex) != 0)
i.rex |= REX_B;
if ((i.op[source].regs->reg_flags & RegVRex) != 0)
i.vrex |= REX_B;
set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
set_rex_vrex (i.op[source].regs, REX_B, FALSE);
}
else
{
i.rm.reg = i.op[source].regs->reg_num;
i.rm.regmem = i.op[dest].regs->reg_num;
if ((i.op[dest].regs->reg_flags & RegRex) != 0)
i.rex |= REX_B;
if ((i.op[dest].regs->reg_flags & RegVRex) != 0)
i.vrex |= REX_B;
if ((i.op[source].regs->reg_flags & RegRex) != 0)
i.rex |= REX_R;
if ((i.op[source].regs->reg_flags & RegVRex) != 0)
i.vrex |= REX_R;
set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
set_rex_vrex (i.op[source].regs, REX_R, FALSE);
}
if (flag_code != CODE_64BIT && (i.rex & REX_R))
{
@ -7945,10 +7961,7 @@ build_modrm_byte (void)
}
}
i.sib.index = i.index_reg->reg_num;
if ((i.index_reg->reg_flags & RegRex) != 0)
i.rex |= REX_X;
if ((i.index_reg->reg_flags & RegVRex) != 0)
i.vrex |= REX_X;
set_rex_vrex (i.index_reg, REX_X, FALSE);
}
default_seg = &ds;
@ -8314,18 +8327,14 @@ build_modrm_byte (void)
if (i.tm.extension_opcode != None)
{
i.rm.regmem = i.op[op].regs->reg_num;
if ((i.op[op].regs->reg_flags & RegRex) != 0)
i.rex |= REX_B;
if ((i.op[op].regs->reg_flags & RegVRex) != 0)
i.vrex |= REX_B;
set_rex_vrex (i.op[op].regs, REX_B,
i.tm.opcode_modifier.sse2avx);
}
else
{
i.rm.reg = i.op[op].regs->reg_num;
if ((i.op[op].regs->reg_flags & RegRex) != 0)
i.rex |= REX_R;
if ((i.op[op].regs->reg_flags & RegVRex) != 0)
i.vrex |= REX_R;
set_rex_vrex (i.op[op].regs, REX_R,
i.tm.opcode_modifier.sse2avx);
}
}

View File

@ -711,6 +711,33 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: c5 d9 73 d4 64 vpsrlq \$0x64,%xmm4,%xmm4
[ ]*[a-f0-9]+: c5 d9 71 d4 64 vpsrlw \$0x64,%xmm4,%xmm4
[ ]*[a-f0-9]+: c5 f9 c5 cc 64 vpextrw \$0x64,%xmm4,%ecx
[ ]*[a-f0-9]+: c5 f8 58 c0 vaddps %xmm0,%xmm0,%xmm0
[ ]*[a-f0-9]+: c5 f8 58 04 00 vaddps \(%rax,%rax(,1)?\),%xmm0,%xmm0
[ ]*[a-f0-9]+: c5 f8 58 c0 vaddps %xmm0,%xmm0,%xmm0
[ ]*[a-f0-9]+: c5 f8 58 04 00 vaddps \(%rax,%rax(,1)?\),%xmm0,%xmm0
[ ]*[a-f0-9]+: c5 38 58 c0 vaddps %xmm0,%xmm8,%xmm8
[ ]*[a-f0-9]+: c5 38 58 04 00 vaddps \(%rax,%rax(,1)?\),%xmm8,%xmm8
[ ]*[a-f0-9]+: c4 a1 78 58 c0 vaddps %xmm0,%xmm0,%xmm0
[ ]*[a-f0-9]+: c4 a1 78 58 04 00 vaddps \(%rax,%r8(,1)?\),%xmm0,%xmm0
[ ]*[a-f0-9]+: c4 c1 78 58 c0 vaddps %xmm8,%xmm0,%xmm0
[ ]*[a-f0-9]+: c4 c1 78 58 04 00 vaddps \(%r8,%rax(,1)?\),%xmm0,%xmm0
[ ]*[a-f0-9]+: c5 3a 10 c0 vmovss %xmm0,%xmm8,%xmm8
[ ]*[a-f0-9]+: c4 c1 7a 10 c0 vmovss %xmm8,%xmm0,%xmm0
[ ]*[a-f0-9]+: c5 7a 11 c0 vmovss %xmm8,%xmm0,%xmm0
[ ]*[a-f0-9]+: c4 c1 3a 11 c0 vmovss %xmm0,%xmm8,%xmm8
[ ]*[a-f0-9]+: c4 c1 39 71 f0 00 vpsllw \$(0x)?0,%xmm8,%xmm8
[ ]*[a-f0-9]+: c5 79 c5 c0 00 vpextrw \$(0x)?0,%xmm0,%r8d
[ ]*[a-f0-9]+: c4 c1 79 c5 c0 00 vpextrw \$(0x)?0,%xmm8,%eax
[ ]*[a-f0-9]+: c4 63 79 14 c0 00 vpextrb \$(0x)?0,%xmm8,%eax
[ ]*[a-f0-9]+: c4 c3 79 14 c0 00 vpextrb \$(0x)?0,%xmm0,%r8d
[ ]*[a-f0-9]+: c4 63 39 4a c0 00 vblendvps %xmm0,%xmm0,%xmm8,%xmm8
[ ]*[a-f0-9]+: c4 c3 79 4a c0 00 vblendvps %xmm0,%xmm8,%xmm0,%xmm0
[ ]*[a-f0-9]+: c4 63 39 4a c0 00 vblendvps %xmm0,%xmm0,%xmm8,%xmm8
[ ]*[a-f0-9]+: c4 c3 79 4a c0 00 vblendvps %xmm0,%xmm8,%xmm0,%xmm0
[ ]*[a-f0-9]+: c4 e1 fb 2a 00 vcvtsi2sdq \(%rax\),%xmm0,%xmm0
[ ]*[a-f0-9]+: c4 e1 fa 2a 00 vcvtsi2ssq \(%rax\),%xmm0,%xmm0
[ ]*[a-f0-9]+: c4 e3 f9 61 c0 00 vpcmpestriq \$(0x)?0,%xmm0,%xmm0
[ ]*[a-f0-9]+: c4 e3 f9 60 c0 00 vpcmpestrmq \$(0x)?0,%xmm0,%xmm0
[ ]*[a-f0-9]+: c5 f8 ae 11 vldmxcsr \(%rcx\)
[ ]*[a-f0-9]+: c5 f8 ae 19 vstmxcsr \(%rcx\)
[ ]*[a-f0-9]+: c5 f8 5b f4 vcvtdq2ps %xmm4,%xmm6

View File

@ -802,6 +802,44 @@ _start:
# Tests for op imm8, xmm, regl
pextrw $100,%xmm4,%ecx
# Tests for REX prefix conversion
{rex} addps %xmm0, %xmm0
{rex} addps (%rax,%rax), %xmm0
rex addps %xmm0, %xmm0
rex addps (%rax,%rax), %xmm0
rexx addps %xmm0, %xmm0
rexx addps (%rax,%rax), %xmm0
rexy addps %xmm0, %xmm0
rexy addps (%rax,%rax), %xmm0
rexz addps %xmm0, %xmm0
rexz addps (%rax,%rax), %xmm0
{load} rexx movss %xmm0, %xmm0
{load} rexz movss %xmm0, %xmm0
{store} rexx movss %xmm0, %xmm0
{store} rexz movss %xmm0, %xmm0
rexz psllw $0, %xmm0
rexx pextrw $0, %xmm0, %eax
rexz pextrw $0, %xmm0, %eax
rexx pextrb $0, %xmm0, %eax
rexz pextrb $0, %xmm0, %eax
rexx blendvps %xmm0, %xmm0, %xmm0
rexz blendvps %xmm0, %xmm0, %xmm0
rexx blendvps %xmm0, %xmm0
rexz blendvps %xmm0, %xmm0
rex64 cvtsi2sd (%rax), %xmm0
rex64 cvtsi2ss (%rax), %xmm0
rex64 pcmpestri $0, %xmm0, %xmm0
rex64 pcmpestrm $0, %xmm0, %xmm0
.intel_syntax noprefix
# Tests for op mem64
@ -1505,4 +1543,3 @@ _start:
# Tests for op imm8, xmm, regl
pextrw ecx,xmm4,100