[Patch][binutils][arm] Armv8.6-A Matrix Multiply extension [9/10]

Hi,

This patch is part of a series that adds support for Armv8.6-A
(Matrix Multiply and BFloat16 extensions) to binutils.

This patch introduces the Matrix Multiply (Int8, F32, F64) extensions
to the arm backend.

The following Matrix Multiply instructions are added: vummla, vsmmla,
vusmmla, vusdot, vsudot[1].

[1]https://developer.arm.com/docs/ddi0597/latest/simd-and-floating-point-instructions-alphabetic-order

Committed on behalf of Mihail Ionescu.

gas/ChangeLog:

2019-11-07  Mihail Ionescu  <mihail.ionescu@arm.com>

	* config/tc-arm.c (arm_ext_i8mm): New feature set.
	(do_vusdot): New.
	(do_vsudot): New.
	(do_vsmmla): New.
	(do_vummla): New.
	(insns): Add vsmmla, vummla, vusmmla, vusdot, vsudot mnemonics.
	(armv86a_ext_table): Add i8mm extension.
	(arm_extensions): Move bf16 extension to context sensitive table.
	(armv82a_ext_table, armv84a_ext_table, armv85a_ext_table):
	Move bf16 extension to context sensitive table.
	(armv86a_ext_table): Add i8mm extension.
	* doc/c-arm.texi: Document i8mm extension.
	* testsuite/gas/arm/i8mm.s: New test.
	* testsuite/gas/arm/i8mm.d: New test.
	* testsuite/gas/arm/bfloat17-cmdline-bad-3.d: Update test.

include/ChangeLog:

2019-11-07  Mihail Ionescu  <mihail.ionescu@arm.com>

	* opcode/arm.h (ARM_EXT2_I8MM): New feature macro.

opcodes/ChangeLog:

2019-11-07  Mihail Ionescu  <mihail.ionescu@arm.com>

	* arm-dis.c (neon_opcodes): Add i8mm SIMD instructions.

Regression tested on arm-none-eabi.
Is this ok for trunk?

Regards,
Mihail
This commit is contained in:
Matthew Malcomson
2019-11-07 17:20:08 +00:00
parent 8382113fdb
commit 616ce08e1c
10 changed files with 195 additions and 5 deletions

View File

@ -1,3 +1,21 @@
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
* config/tc-arm.c (arm_ext_i8mm): New feature set.
(do_vusdot): New.
(do_vsudot): New.
(do_vsmmla): New.
(do_vummla): New.
(insns): Add vsmmla, vummla, vusmmla, vusdot, vsudot mnemonics.
(armv86a_ext_table): Add i8mm extension.
(arm_extensions): Move bf16 extension to context sensitive table.
(armv82a_ext_table, armv84a_ext_table, armv85a_ext_table):
Move bf16 extension to context sensitive table.
(armv86a_ext_table): Add i8mm extension.
* doc/c-arm.texi: Document i8mm extension.
* testsuite/gas/arm/i8mm.s: New test.
* testsuite/gas/arm/i8mm.d: New test.
* testsuite/gas/arm/bfloat17-cmdline-bad-3.d: Update test.
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
* config/tc-aarch64.c: Add new arch fetures to suppport the mm extension.

View File

@ -277,6 +277,8 @@ static const arm_feature_set arm_ext_predres =
ARM_FEATURE_CORE_HIGH (ARM_EXT2_PREDRES);
static const arm_feature_set arm_ext_bf16 =
ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16);
static const arm_feature_set arm_ext_i8mm =
ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM);
static const arm_feature_set arm_arch_any = ARM_ANY;
#ifdef OBJ_ELF
@ -21483,6 +21485,79 @@ do_neon_dotproduct_u (void)
return do_neon_dotproduct (1);
}
static void
do_vusdot (void)
{
enum neon_shape rs;
set_pred_insn_type (OUTSIDE_PRED_INSN);
if (inst.operands[2].isscalar)
{
rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY);
inst.instruction |= (1 << 25);
int index = inst.operands[2].reg & 0xf;
constraint ((index != 1 && index != 0), _("index must be 0 or 1"));
inst.operands[2].reg >>= 4;
constraint (!(inst.operands[2].reg < 16),
_("indexed register must be less than 16"));
neon_three_args (rs == NS_QQS);
inst.instruction |= (index << 5);
}
else
{
inst.instruction |= (1 << 21);
rs = neon_select_shape (NS_DDD, NS_QQQ, NS_NULL);
neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY);
neon_three_args (rs == NS_QQQ);
}
}
static void
do_vsudot (void)
{
enum neon_shape rs;
set_pred_insn_type (OUTSIDE_PRED_INSN);
if (inst.operands[2].isscalar)
{
rs = neon_select_shape (NS_DDS, NS_QQS, NS_NULL);
neon_check_type (3, rs, N_EQK, N_EQK, N_U8 | N_KEY);
inst.instruction |= (1 << 25);
int index = inst.operands[2].reg & 0xf;
constraint ((index != 1 && index != 0), _("index must be 0 or 1"));
inst.operands[2].reg >>= 4;
constraint (!(inst.operands[2].reg < 16),
_("indexed register must be less than 16"));
neon_three_args (rs == NS_QQS);
inst.instruction |= (index << 5);
}
}
static void
do_vsmmla (void)
{
enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
neon_check_type (3, rs, N_EQK, N_EQK, N_S8 | N_KEY);
set_pred_insn_type (OUTSIDE_PRED_INSN);
neon_three_args (1);
}
static void
do_vummla (void)
{
enum neon_shape rs = neon_select_shape (NS_QQQ, NS_NULL);
neon_check_type (3, rs, N_EQK, N_EQK, N_U8 | N_KEY);
set_pred_insn_type (OUTSIDE_PRED_INSN);
neon_three_args (1);
}
/* Crypto v1 instructions. */
static void
do_crypto_2op_1 (unsigned elttype, int op)
@ -26000,7 +26075,7 @@ static const struct asm_opcode insns[] =
#define THUMB_VARIANT &arm_ext_i8mm
TUF ("vsmmla", c200c40, fc200c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla),
TUF ("vummla", c200c50, fc200c50, 3, (RNQ, RNQ, RNQ), vummla, vummla),
TUF ("vusmmla", ca00c40, fca00c40, 3, (RNQ, RNQ, RNQ), vummla, vummla),
TUF ("vusmmla", ca00c40, fca00c40, 3, (RNQ, RNQ, RNQ), vsmmla, vsmmla),
TUF ("vusdot", c800d00, fc800d00, 3, (RNDQ, RNDQ, RNDQ_RNSC), vusdot, vusdot),
TUF ("vsudot", c800d10, fc800d10, 3, (RNDQ, RNDQ, RNSC), vsudot, vsudot),
};
@ -31127,6 +31202,8 @@ static const struct arm_ext_table armv82a_ext_table[] =
ARM_ADD ("simd", FPU_ARCH_NEON_VFP_ARMV8_1),
ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_2_FP16),
ARM_ADD ("fp16fml", FPU_ARCH_NEON_VFP_ARMV8_2_FP16FML),
ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)),
ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_1,
ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
ARM_ADD ("dotprod", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
@ -31143,6 +31220,8 @@ static const struct arm_ext_table armv84a_ext_table[] =
{
ARM_ADD ("simd", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_4_FP16FML),
ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)),
ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_4,
ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
@ -31158,6 +31237,8 @@ static const struct arm_ext_table armv85a_ext_table[] =
{
ARM_ADD ("simd", FPU_ARCH_DOTPROD_NEON_VFP_ARMV8),
ARM_ADD ("fp16", FPU_ARCH_NEON_VFP_ARMV8_4_FP16FML),
ARM_ADD ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16)),
ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
ARM_EXT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8_4,
ARM_FEATURE_COPROC (FPU_CRYPTO_ARMV8)),
@ -31169,6 +31250,7 @@ static const struct arm_ext_table armv85a_ext_table[] =
static const struct arm_ext_table armv86a_ext_table[] =
{
ARM_ADD ("i8mm", ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM)),
{ NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
};
@ -31308,9 +31390,6 @@ struct arm_option_extension_value_table
use the context sensitive approach using arm_ext_table's. */
static const struct arm_option_extension_value_table arm_extensions[] =
{
ARM_EXT_OPT ("bf16", ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
ARM_ARCH_V8_2A),
ARM_EXT_OPT ("crc", ARCH_CRC_ARMV8, ARM_FEATURE_COPROC (CRC_EXT_ARMV8),
ARM_FEATURE_CORE_LOW (ARM_EXT_V8)),
ARM_EXT_OPT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,

View File

@ -181,6 +181,7 @@ been added, again in ascending alphabetical order. For example,
The following extensions are currently supported:
@code{bf16} (BFloat16 extensions for v8.6-A architecture),
@code{i8mm} (Int8 Matrix Multiply extensions for v8.6-A architecture),
@code{crc}
@code{crypto} (Cryptography Extensions for v8-A architecture, implies @code{fp+simd}),
@code{dotprod} (Dot Product Extensions for v8.2-A architecture, implies @code{fp+simd}),

View File

@ -1,4 +1,4 @@
#name: Bfloat 16 bad extension
#source: bfloat16-non-neon.s
#as: -mno-warn-deprecated -march=armv8.1-a+bf16
#error: .*Error: extension does not apply to the base architecture.*
#error: .*Error: unknown architectural extension `bf16'*

View File

@ -0,0 +1,36 @@
#name: Int8 Matrix Multiply extension
#source: i8mm.s
#as: -mno-warn-deprecated -march=armv8.6-a+i8mm+simd -I$srcdir/$subdir
#objdump: -dr --show-raw-insn
.*: +file format .*arm.*
Disassembly of section \.text:
00000000 <\.text>:
*[0-9a-f]+: fcea4c40 vusmmla\.s8 q10, q5, q0
*[0-9a-f]+: fc6a4c50 vummla\.u8 q10, q5, q0
*[0-9a-f]+: fc6a4c40 vsmmla\.s8 q10, q5, q0
*[0-9a-f]+: fcea4d40 vusdot\.s8 q10, q5, q0
*[0-9a-f]+: feca4d50 vsudot\.u8 q10, q5, d0\[0\]
*[0-9a-f]+: feca4d70 vsudot\.u8 q10, q5, d0\[1\]
*[0-9a-f]+: feca4d40 vusdot\.s8 q10, q5, d0\[0\]
*[0-9a-f]+: feca4d60 vusdot\.s8 q10, q5, d0\[1\]
*[0-9a-f]+: fca5ad00 vusdot\.s8 d10, d5, d0
*[0-9a-f]+: fe85ad00 vusdot\.s8 d10, d5, d0\[0\]
*[0-9a-f]+: fe85ad20 vusdot\.s8 d10, d5, d0\[1\]
*[0-9a-f]+: fe85ad10 vsudot\.u8 d10, d5, d0\[0\]
*[0-9a-f]+: fe85ad30 vsudot\.u8 d10, d5, d0\[1\]
*[0-9a-f]+: fcea4c40 vusmmla\.s8 q10, q5, q0
*[0-9a-f]+: fc6a4c50 vummla\.u8 q10, q5, q0
*[0-9a-f]+: fc6a4c40 vsmmla\.s8 q10, q5, q0
*[0-9a-f]+: fcea4d40 vusdot\.s8 q10, q5, q0
*[0-9a-f]+: feca4d50 vsudot\.u8 q10, q5, d0\[0\]
*[0-9a-f]+: feca4d70 vsudot\.u8 q10, q5, d0\[1\]
*[0-9a-f]+: feca4d40 vusdot\.s8 q10, q5, d0\[0\]
*[0-9a-f]+: feca4d60 vusdot\.s8 q10, q5, d0\[1\]
*[0-9a-f]+: fca5ad00 vusdot\.s8 d10, d5, d0
*[0-9a-f]+: fe85ad00 vusdot\.s8 d10, d5, d0\[0\]
*[0-9a-f]+: fe85ad20 vusdot\.s8 d10, d5, d0\[1\]
*[0-9a-f]+: fe85ad10 vsudot\.u8 d10, d5, d0\[0\]
*[0-9a-f]+: fe85ad30 vsudot\.u8 d10, d5, d0\[1\]

View File

@ -0,0 +1,32 @@
vusmmla.s8 q10, q5, q0
vummla.u8 q10, q5, q0
vsmmla.s8 q10, q5, q0
vusdot.s8 q10, q5, q0
vsudot.u8 q10, q5, d0[0]
vsudot.u8 q10, q5, d0[1]
vusdot.s8 q10, q5, d0[0]
vusdot.s8 q10, q5, d0[1]
vusdot.s8 d10, d5, d0
vusdot.s8 d10, d5, d0[0]
vusdot.s8 d10, d5, d0[1]
vsudot.u8 d10, d5, d0[0]
vsudot.u8 d10, d5, d0[1]
vusmmla q10.s8, q5.s8, q0.s8
vummla q10.u8, q5.u8, q0.u8
vsmmla q10.s8, q5.s8, q0.s8
vusdot q10.s8, q5.s8, q0.s8
vsudot q10.u8, q5.u8, d0.u8[0]
vsudot q10.u8, q5.u8, d0.u8[1]
vusdot q10.s8, q5.s8, d0.s8[0]
vusdot q10.s8, q5.s8, d0.s8[1]
vusdot d10.s8, d5.s8, d0.s8
vusdot d10.s8, d5.s8, d0.s8[0]
vusdot d10.s8, d5.s8, d0.s8[1]
vsudot d10.u8, d5.u8, d0.u8[0]
vsudot d10.u8, d5.u8, d0.u8[1]

View File

@ -1,3 +1,7 @@
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
* opcode/arm.h (ARM_EXT2_I8MM): New feature macro.
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
* opcode/aarch64.h (AARCH64_FEATURE_I8MM): New.

View File

@ -75,6 +75,7 @@
#define ARM_EXT2_V8_1M_MAIN 0x00008000 /* ARMv8.1-M Mainline. */
#define ARM_EXT2_V8_6A 0x00010000 /* ARM V8.6A. */
#define ARM_EXT2_BF16 0x00020000 /* ARMv8 bfloat16. */
#define ARM_EXT2_I8MM 0x00040000 /* ARMv8.6A i8mm. */
/* Co-processor space extensions. */
#define ARM_CEXT_XSCALE 0x00000001 /* Allow MIA etc. */

View File

@ -1,3 +1,8 @@
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
* arm-dis.c (neon_opcodes): Add i8mm SIMD instructions.
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
* aarch64-tbl.h (aarch64_feature_i8mm_sve, aarch64_feature_f32mm_sve,

View File

@ -1471,6 +1471,20 @@ static const struct opcode32 neon_opcodes[] =
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_BF16),
0xfe300810, 0xffb00f10, "vfma%6?tb.bf16\t%12-15,22Q, %16-19,7Q, %0-2D[%3,5d]"},
/* Matrix Multiply instructions. */
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM),
0xfc200c40, 0xffb00f50, "vsmmla.s8\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM),
0xfc200c50, 0xffb00f50, "vummla.u8\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM),
0xfca00c40, 0xffb00f50, "vusmmla.s8\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM),
0xfca00d00, 0xffb00f10, "vusdot.s8\t%12-15,22R, %16-19,7R, %0-3,5R"},
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM),
0xfe800d00, 0xffb00f10, "vusdot.s8\t%12-15,22R, %16-19,7R, d%0-3d[%5d]"},
{ARM_FEATURE_CORE_HIGH (ARM_EXT2_I8MM),
0xfe800d10, 0xffb00f10, "vsudot.u8\t%12-15,22R, %16-19,7R, d%0-3d[%5d]"},
/* Two registers, miscellaneous. */
{ARM_FEATURE_COPROC (FPU_NEON_EXT_ARMV8),
0xf3ba0400, 0xffbf0c10, "vrint%7-9?p?m?zaxn%u.f32\t%12-15,22R, %0-3,5R"},