x86: Add -muse-unaligned-vector-move to assembler

Unaligned load/store instructions on aligned memory or register are as
fast as aligned load/store instructions on modern Intel processors.  Add
a command-line option, -muse-unaligned-vector-move, to x86 assembler to
encode encode aligned vector load/store instructions as unaligned
vector load/store instructions.

	* NEWS: Mention -muse-unaligned-vector-move.
	* config/tc-i386.c (use_unaligned_vector_move): New.
	(encode_with_unaligned_vector_move): Likewise.
	(md_assemble): Call encode_with_unaligned_vector_move for
	-muse-unaligned-vector-move.
	(OPTION_MUSE_UNALIGNED_VECTOR_MOVE): New.
	(md_longopts): Add -muse-unaligned-vector-move.
	(md_parse_option): Handle -muse-unaligned-vector-move.
	(md_show_usage): Add -muse-unaligned-vector-move.
	* doc/c-i386.texi: Document -muse-unaligned-vector-move.
	* testsuite/gas/i386/i386.exp: Run unaligned-vector-move and
	x86-64-unaligned-vector-move.
	* testsuite/gas/i386/unaligned-vector-move.d: New file.
	* testsuite/gas/i386/unaligned-vector-move.s: Likewise.
	* testsuite/gas/i386/x86-64-unaligned-vector-move.d: Likewise.
This commit is contained in:
H.J. Lu
2021-10-21 06:15:31 -07:00
parent aab00c2dff
commit c8480b58e1
7 changed files with 110 additions and 0 deletions

View File

@ -1,5 +1,8 @@
-*- text -*- -*- text -*-
* Add a command-line option, -muse-unaligned-vector-move, for x86 target
to encode aligned vector move as unaligned vector move.
* Add support for Cortex-R52+ for Arm. * Add support for Cortex-R52+ for Arm.
* Add support for Cortex-A510, Cortex-A710, Cortex-X2 for AArch64. * Add support for Cortex-A510, Cortex-A710, Cortex-X2 for AArch64.

View File

@ -800,6 +800,9 @@ static unsigned int no_cond_jump_promotion = 0;
/* Encode SSE instructions with VEX prefix. */ /* Encode SSE instructions with VEX prefix. */
static unsigned int sse2avx; static unsigned int sse2avx;
/* Encode aligned vector move as unaligned vector move. */
static unsigned int use_unaligned_vector_move;
/* Encode scalar AVX instructions with specific vector length. */ /* Encode scalar AVX instructions with specific vector length. */
static enum static enum
{ {
@ -4073,6 +4076,30 @@ check_hle (void)
} }
} }
/* Encode aligned vector move as unaligned vector move. */
static void
encode_with_unaligned_vector_move (void)
{
switch (i.tm.base_opcode)
{
case 0x28:
/* movaps/movapd/vmovaps/vmovapd. */
if (i.tm.opcode_modifier.opcodespace == SPACE_0F
&& i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66)
i.tm.base_opcode = 0x10;
break;
case 0x6f:
/* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */
if (i.tm.opcode_modifier.opcodespace == SPACE_0F
&& i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66)
i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
break;
default:
break;
}
}
/* Try the shortest encoding by shortening operand size. */ /* Try the shortest encoding by shortening operand size. */
static void static void
@ -5056,6 +5083,9 @@ md_assemble (char *line)
if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize) if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
optimize_encoding (); optimize_encoding ();
if (use_unaligned_vector_move)
encode_with_unaligned_vector_move ();
if (!process_suffix ()) if (!process_suffix ())
return; return;
@ -13060,6 +13090,7 @@ const char *md_shortopts = "qnO::";
#define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31) #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
#define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32) #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
#define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33) #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
#define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
struct option md_longopts[] = struct option md_longopts[] =
{ {
@ -13081,6 +13112,7 @@ struct option md_longopts[] =
{"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG}, {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
{"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG}, {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
{"msse2avx", no_argument, NULL, OPTION_MSSE2AVX}, {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
{"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE},
{"msse-check", required_argument, NULL, OPTION_MSSE_CHECK}, {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
{"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK}, {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
{"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR}, {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
@ -13381,6 +13413,10 @@ md_parse_option (int c, const char *arg)
sse2avx = 1; sse2avx = 1;
break; break;
case OPTION_MUSE_UNALIGNED_VECTOR_MOVE:
use_unaligned_vector_move = 1;
break;
case OPTION_MSSE_CHECK: case OPTION_MSSE_CHECK:
if (strcasecmp (arg, "error") == 0) if (strcasecmp (arg, "error") == 0)
sse_check = check_error; sse_check = check_error;
@ -13796,6 +13832,9 @@ md_show_usage (FILE *stream)
fprintf (stream, _("\ fprintf (stream, _("\
-msse2avx encode SSE instructions with VEX prefix\n")); -msse2avx encode SSE instructions with VEX prefix\n"));
fprintf (stream, _("\ fprintf (stream, _("\
-muse-unaligned-vector-move\n\
encode aligned vector move as unaligned vector move\n"));
fprintf (stream, _("\
-msse-check=[none|error|warning] (default: warning)\n\ -msse-check=[none|error|warning] (default: warning)\n\
check SSE instructions\n")); check SSE instructions\n"));
fprintf (stream, _("\ fprintf (stream, _("\

View File

@ -316,6 +316,12 @@ Valid @var{CPU} values are identical to the processor list of
This option specifies that the assembler should encode SSE instructions This option specifies that the assembler should encode SSE instructions
with VEX prefix. with VEX prefix.
@cindex @samp{-muse-unaligned-vector-move} option, i386
@cindex @samp{-muse-unaligned-vector-move} option, x86-64
@item -muse-unaligned-vector-move
This option specifies that the assembler should encode aligned vector
move as unaligned vector move.
@cindex @samp{-msse-check=} option, i386 @cindex @samp{-msse-check=} option, i386
@cindex @samp{-msse-check=} option, x86-64 @cindex @samp{-msse-check=} option, x86-64
@item -msse-check=@var{none} @item -msse-check=@var{none}

View File

@ -272,6 +272,7 @@ if [gas_32_check] then {
run_dump_test "evex-wig1-intel" run_dump_test "evex-wig1-intel"
run_dump_test "evex-no-scale-32" run_dump_test "evex-no-scale-32"
run_dump_test "sse2avx" run_dump_test "sse2avx"
run_dump_test "unaligned-vector-move"
run_list_test "inval-avx" "-al" run_list_test "inval-avx" "-al"
run_list_test "inval-avx512f" "-al" run_list_test "inval-avx512f" "-al"
run_list_test "inval-avx512vl" "-al" run_list_test "inval-avx512vl" "-al"
@ -948,6 +949,7 @@ if [gas_64_check] then {
run_dump_test "x86-64-evex-wig2" run_dump_test "x86-64-evex-wig2"
run_dump_test "evex-no-scale-64" run_dump_test "evex-no-scale-64"
run_dump_test "x86-64-sse2avx" run_dump_test "x86-64-sse2avx"
run_dump_test "x86-64-unaligned-vector-move"
run_list_test "x86-64-inval-avx" "-al" run_list_test "x86-64-inval-avx" "-al"
run_list_test "x86-64-inval-avx512f" "-al" run_list_test "x86-64-inval-avx512f" "-al"
run_list_test "x86-64-inval-avx512vl" "-al" run_list_test "x86-64-inval-avx512vl" "-al"

View File

@ -0,0 +1,22 @@
#as: -muse-unaligned-vector-move
#objdump: -dw
#name: i386 (Encode aligned vector move as unaligned vector move)
.*: +file format .*
Disassembly of section .text:
0+ <_start>:
+[a-f0-9]+: 0f 10 d1 movups %xmm1,%xmm2
+[a-f0-9]+: 66 0f 10 d1 movupd %xmm1,%xmm2
+[a-f0-9]+: f3 0f 6f d1 movdqu %xmm1,%xmm2
+[a-f0-9]+: c5 f8 10 d1 vmovups %xmm1,%xmm2
+[a-f0-9]+: c5 f9 10 d1 vmovupd %xmm1,%xmm2
+[a-f0-9]+: c5 fa 6f d1 vmovdqu %xmm1,%xmm2
+[a-f0-9]+: c5 f8 10 d1 vmovups %xmm1,%xmm2
+[a-f0-9]+: 62 f1 fd 09 10 d1 vmovupd %xmm1,%xmm2\{%k1\}
+[a-f0-9]+: 62 f1 7c 09 10 d1 vmovups %xmm1,%xmm2\{%k1\}
+[a-f0-9]+: 62 f1 7e 09 6f d1 vmovdqu32 %xmm1,%xmm2\{%k1\}
+[a-f0-9]+: 62 f1 fe 09 6f d1 vmovdqu64 %xmm1,%xmm2\{%k1\}
#pass

View File

@ -0,0 +1,15 @@
# Encode aligned vector move as unaligned vector move.
.text
_start:
movaps %xmm1, %xmm2
movapd %xmm1, %xmm2
movdqa %xmm1, %xmm2
vmovaps %xmm1, %xmm2
vmovapd %xmm1, %xmm2
vmovdqa %xmm1, %xmm2
vmovaps %xmm1, %xmm2
vmovapd %xmm1, %xmm2{%k1}
vmovaps %xmm1, %xmm2{%k1}
vmovdqa32 %xmm1, %xmm2{%k1}
vmovdqa64 %xmm1, %xmm2{%k1}

View File

@ -0,0 +1,23 @@
#source: unaligned-vector-move.s
#as: -muse-unaligned-vector-move
#objdump: -dw
#name: x86-64 (Encode aligned vector move as unaligned vector move)
.*: +file format .*
Disassembly of section .text:
0+ <_start>:
+[a-f0-9]+: 0f 10 d1 movups %xmm1,%xmm2
+[a-f0-9]+: 66 0f 10 d1 movupd %xmm1,%xmm2
+[a-f0-9]+: f3 0f 6f d1 movdqu %xmm1,%xmm2
+[a-f0-9]+: c5 f8 10 d1 vmovups %xmm1,%xmm2
+[a-f0-9]+: c5 f9 10 d1 vmovupd %xmm1,%xmm2
+[a-f0-9]+: c5 fa 6f d1 vmovdqu %xmm1,%xmm2
+[a-f0-9]+: c5 f8 10 d1 vmovups %xmm1,%xmm2
+[a-f0-9]+: 62 f1 fd 09 10 d1 vmovupd %xmm1,%xmm2\{%k1\}
+[a-f0-9]+: 62 f1 7c 09 10 d1 vmovups %xmm1,%xmm2\{%k1\}
+[a-f0-9]+: 62 f1 7e 09 6f d1 vmovdqu32 %xmm1,%xmm2\{%k1\}
+[a-f0-9]+: 62 f1 fe 09 6f d1 vmovdqu64 %xmm1,%xmm2\{%k1\}
#pass