[binutils][aarch64] Matrix Multiply extension enablement [8/X]

Hi,

This patch is part of a series that adds support for Armv8.6-A
(Matrix Multiply and BFloat16 extensions) to binutils.

This patch introduces the Matrix Multiply (Int8, F32, F64) extensions
to the aarch64 backend.

The following instructions are added: {s/u}mmla, usmmla, {us/su}dot,
fmmla, ld1rob, ld1roh, d1row, ld1rod, uzip{1/2}, trn{1/2}.

Committed on behalf of Mihail Ionescu.

gas/ChangeLog:

2019-11-07  Mihail Ionescu  <mihail.ionescu@arm.com>

	* config/tc-aarch64.c: Add new arch fetures to suppport the mm extension.
	(parse_operands): Add new operand.
	* testsuite/gas/aarch64/i8mm.s: New test.
	* testsuite/gas/aarch64/i8mm.d: New test.
	* testsuite/gas/aarch64/f32mm.s: New test.
	* testsuite/gas/aarch64/f32mm.d: New test.
	* testsuite/gas/aarch64/f64mm.s: New test.
	* testsuite/gas/aarch64/f64mm.d: New test.
	* testsuite/gas/aarch64/sve-movprfx-mm.s: New test.
	* testsuite/gas/aarch64/sve-movprfx-mm.d: New test.

include/ChangeLog:

2019-11-07  Mihail Ionescu  <mihail.ionescu@arm.com>

	* opcode/aarch64.h (AARCH64_FEATURE_I8MM): New.
	(AARCH64_FEATURE_F32MM): New.
	(AARCH64_FEATURE_F64MM): New.
	(AARCH64_OPND_SVE_ADDR_RI_S4x32): New.
	(enum aarch64_insn_class): Add new instruction class "aarch64_misc" for
	instructions that do not require special handling.

opcodes/ChangeLog:

2019-11-07  Mihail Ionescu  <mihail.ionescu@arm.com>

	* aarch64-tbl.h (aarch64_feature_i8mm_sve, aarch64_feature_f32mm_sve,
	aarch64_feature_f64mm_sve, aarch64_feature_i8mm, aarch64_feature_f32mm,
	aarch64_feature_f64mm): New feature sets.
	(INT8MATMUL_INSN, F64MATMUL_SVE_INSN, F64MATMUL_INSN,
	F32MATMUL_SVE_INSN, F32MATMUL_INSN): New macros to define matrix multiply
	instructions.
	(I8MM_SVE, F32MM_SVE, F64MM_SVE, I8MM, F32MM, F64MM): New feature set
	macros.
	(QL_MMLA64, OP_SVE_SBB): New qualifiers.
	(OP_SVE_QQQ): New qualifier.
	(INT8MATMUL_SVE_INSNC, F64MATMUL_SVE_INSNC,
	F32MATMUL_SVE_INSNC): New feature set for bfloat16 instructions to support
	the movprfx constraint.
	(aarch64_opcode_table): Support for SVE_ADDR_RI_S4x32.
	(aarch64_opcode_table): Define new instructions smmla,
	ummla, usmmla, usdot, sudot, fmmla, ld1rob, ld1roh, ld1row, ld1rod
	uzip{1/2}, trn{1/2}.
	* aarch64-opc.c (operand_general_constraint_met_p): Handle
	AARCH64_OPND_SVE_ADDR_RI_S4x32.
	(aarch64_print_operand): Handle AARCH64_OPND_SVE_ADDR_RI_S4x32.
	* aarch64-dis-2.c (aarch64_opcode_lookup_1, aarch64_find_next_opcode):
	Account for new instructions.
	* opcodes/aarch64-asm-2.c (aarch64_insert_operand): Support the new
	S4x32 operand.
	* aarch64-opc-2.c (aarch64_operands): Support the new S4x32 operand.

Regression tested on arm-none-eabi.

Is it ok for trunk?

Regards,
Mihail
This commit is contained in:
Matthew Malcomson
2019-11-07 17:10:01 +00:00
parent eb5bbc4821
commit 8382113fdb
19 changed files with 1234 additions and 472 deletions

View File

@ -1,3 +1,16 @@
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
* config/tc-aarch64.c: Add new arch fetures to suppport the mm extension.
(parse_operands): Add new operand.
* testsuite/gas/aarch64/i8mm.s: New test.
* testsuite/gas/aarch64/i8mm.d: New test.
* testsuite/gas/aarch64/f32mm.s: New test.
* testsuite/gas/aarch64/f32mm.d: New test.
* testsuite/gas/aarch64/f64mm.s: New test.
* testsuite/gas/aarch64/f64mm.d: New test.
* testsuite/gas/aarch64/sve-movprfx-mm.s: New test.
* testsuite/gas/aarch64/sve-movprfx-mm.d: New test.
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
2019-11-07 Barnaby Wilks <barnaby.wilks@arm.com>

View File

@ -6433,6 +6433,7 @@ parse_operands (char *str, const aarch64_opcode *opcode)
break;
case AARCH64_OPND_SVE_ADDR_RI_S4x16:
case AARCH64_OPND_SVE_ADDR_RI_S4x32:
case AARCH64_OPND_SVE_ADDR_RI_S4xVL:
case AARCH64_OPND_SVE_ADDR_RI_S4x2xVL:
case AARCH64_OPND_SVE_ADDR_RI_S4x3xVL:
@ -9058,6 +9059,12 @@ static const struct aarch64_option_cpu_value_table aarch64_features[] = {
AARCH64_FEATURE (AARCH64_FEATURE_SVE2, 0)},
{"bf16", AARCH64_FEATURE (AARCH64_FEATURE_BFLOAT16, 0),
AARCH64_ARCH_NONE},
{"i8mm", AARCH64_FEATURE (AARCH64_FEATURE_I8MM, 0),
AARCH64_ARCH_NONE},
{"f32mm", AARCH64_FEATURE (AARCH64_FEATURE_F32MM, 0),
AARCH64_ARCH_NONE},
{"f64mm", AARCH64_FEATURE (AARCH64_FEATURE_F64MM, 0),
AARCH64_ARCH_NONE},
{NULL, AARCH64_ARCH_NONE, AARCH64_ARCH_NONE},
};

View File

@ -144,6 +144,12 @@ automatically cause those extensions to be disabled.
@multitable @columnfractions .12 .17 .17 .54
@headitem Extension @tab Minimum Architecture @tab Enabled by default
@tab Description
@item @code{i8mm} @tab ARMv8.2-A @tab ARMv8.6-A or later
@tab Enable Int8 Matrix Multiply extension.
@item @code{f32mm} @tab ARMv8.2-A @tab No
@tab Enable F32 Matrix Multiply extension.
@item @code{f64mm} @tab ARMv8.2-A @tab No
@tab Enable F64 Matrix Multiply extension.
@item @code{bf16} @tab ARMv8.2-A @tab ARMv8.6-A or later
@tab Enable BFloat16 extension.
@item @code{compnum} @tab ARMv8.2-A @tab ARMv8.3-A or later

View File

@ -0,0 +1,11 @@
#as: -march=armv8.6-a+sve+f32mm
#objdump: -dr
.*: file format .*
Disassembly of section \.text:
0000000000000000 <\.text>:
*[0-9a-f]+: 64bbe6b1 fmmla z17\.s, z21\.s, z27\.s
*[0-9a-f]+: 64a0e400 fmmla z0\.s, z0\.s, z0\.s

View File

@ -0,0 +1,12 @@
/* The instructions with non-zero register numbers are there to ensure we have
the correct argument positioning (i.e. check that the first argument is at
the end of the word etc).
The instructions with all-zero register numbers are to ensure the previous
encoding didn't just "happen" to fit -- so that if we change the registers
that changes the correct part of the word.
Each of the numbered patterns begin and end with a 1, so we can replace
them with all-zeros and see the entire range has changed. */
// SVE
fmmla z17.s, z21.s, z27.s
fmmla z0.s, z0.s, z0.s

View File

@ -0,0 +1,62 @@
#as: -march=armv8.6-a+sve+f64mm
#objdump: -dr
.*: file format .*
Disassembly of section \.text:
0000000000000000 <\.text>:
*[0-9a-f]+: 64dbe6b1 fmmla z17\.d, z21\.d, z27\.d
*[0-9a-f]+: 64c0e400 fmmla z0\.d, z0\.d, z0\.d
*[0-9a-f]+: a43b17f1 ld1rob {z17\.b}, p5/z, \[sp, x27\]
*[0-9a-f]+: a42003e0 ld1rob {z0\.b}, p0/z, \[sp, x0\]
*[0-9a-f]+: a4bb17f1 ld1roh {z17\.h}, p5/z, \[sp, x27\]
*[0-9a-f]+: a4a003e0 ld1roh {z0\.h}, p0/z, \[sp, x0\]
*[0-9a-f]+: a53b17f1 ld1row {z17\.s}, p5/z, \[sp, x27\]
*[0-9a-f]+: a52003e0 ld1row {z0\.s}, p0/z, \[sp, x0\]
*[0-9a-f]+: a5bb17f1 ld1rod {z17\.d}, p5/z, \[sp, x27\]
*[0-9a-f]+: a5a003e0 ld1rod {z0\.d}, p0/z, \[sp, x0\]
*[0-9a-f]+: a43b1411 ld1rob {z17\.b}, p5/z, \[x0, x27\]
*[0-9a-f]+: a4200000 ld1rob {z0\.b}, p0/z, \[x0, x0\]
*[0-9a-f]+: a4bb1411 ld1roh {z17\.h}, p5/z, \[x0, x27\]
*[0-9a-f]+: a4a00000 ld1roh {z0\.h}, p0/z, \[x0, x0\]
*[0-9a-f]+: a53b1411 ld1row {z17\.s}, p5/z, \[x0, x27\]
*[0-9a-f]+: a5200000 ld1row {z0\.s}, p0/z, \[x0, x0\]
*[0-9a-f]+: a5bb1411 ld1rod {z17\.d}, p5/z, \[x0, x27\]
*[0-9a-f]+: a5a00000 ld1rod {z0\.d}, p0/z, \[x0, x0\]
*[0-9a-f]+: a42037f1 ld1rob {z17\.b}, p5/z, \[sp\]
*[0-9a-f]+: a42723e0 ld1rob {z0\.b}, p0/z, \[sp, #224\]
*[0-9a-f]+: a42823e0 ld1rob {z0\.b}, p0/z, \[sp, #-256\]
*[0-9a-f]+: a4a037f1 ld1roh {z17\.h}, p5/z, \[sp\]
*[0-9a-f]+: a4a723e0 ld1roh {z0\.h}, p0/z, \[sp, #224\]
*[0-9a-f]+: a4a823e0 ld1roh {z0\.h}, p0/z, \[sp, #-256\]
*[0-9a-f]+: a52037f1 ld1row {z17\.s}, p5/z, \[sp\]
*[0-9a-f]+: a52723e0 ld1row {z0\.s}, p0/z, \[sp, #224\]
*[0-9a-f]+: a52823e0 ld1row {z0\.s}, p0/z, \[sp, #-256\]
*[0-9a-f]+: a5a037f1 ld1rod {z17\.d}, p5/z, \[sp\]
*[0-9a-f]+: a5a723e0 ld1rod {z0\.d}, p0/z, \[sp, #224\]
*[0-9a-f]+: a5a823e0 ld1rod {z0\.d}, p0/z, \[sp, #-256\]
*[0-9a-f]+: a4203411 ld1rob {z17\.b}, p5/z, \[x0\]
*[0-9a-f]+: a4272000 ld1rob {z0\.b}, p0/z, \[x0, #224\]
*[0-9a-f]+: a4282000 ld1rob {z0\.b}, p0/z, \[x0, #-256\]
*[0-9a-f]+: a4a03411 ld1roh {z17\.h}, p5/z, \[x0\]
*[0-9a-f]+: a4a72000 ld1roh {z0\.h}, p0/z, \[x0, #224\]
*[0-9a-f]+: a4a82000 ld1roh {z0\.h}, p0/z, \[x0, #-256\]
*[0-9a-f]+: a5203411 ld1row {z17\.s}, p5/z, \[x0\]
*[0-9a-f]+: a5272000 ld1row {z0\.s}, p0/z, \[x0, #224\]
*[0-9a-f]+: a5282000 ld1row {z0\.s}, p0/z, \[x0, #-256\]
*[0-9a-f]+: a5a03411 ld1rod {z17\.d}, p5/z, \[x0\]
*[0-9a-f]+: a5a72000 ld1rod {z0\.d}, p0/z, \[x0, #224\]
*[0-9a-f]+: a5a82000 ld1rod {z0\.d}, p0/z, \[x0, #-256\]
*[0-9a-f]+: 05a502b1 zip1 z17\.q, z21\.q, z5\.q
*[0-9a-f]+: 05a00000 zip1 z0\.q, z0\.q, z0\.q
*[0-9a-f]+: 05a506b1 zip2 z17\.q, z21\.q, z5\.q
*[0-9a-f]+: 05a00400 zip2 z0\.q, z0\.q, z0\.q
*[0-9a-f]+: 05a50ab1 uzip1 z17\.q, z21\.q, z5\.q
*[0-9a-f]+: 05a00800 uzip1 z0\.q, z0\.q, z0\.q
*[0-9a-f]+: 05a50eb1 uzip2 z17\.q, z21\.q, z5\.q
*[0-9a-f]+: 05a00c00 uzip2 z0\.q, z0\.q, z0\.q
*[0-9a-f]+: 05a51ab1 trn1 z17\.q, z21\.q, z5\.q
*[0-9a-f]+: 05a01800 trn1 z0\.q, z0\.q, z0\.q
*[0-9a-f]+: 05a51eb1 trn2 z17\.q, z21\.q, z5\.q
*[0-9a-f]+: 05a01c00 trn2 z0\.q, z0\.q, z0\.q

View File

@ -0,0 +1,71 @@
/* The instructions with non-zero register numbers are there to ensure we have
the correct argument positioning (i.e. check that the first argument is at
the end of the word etc).
The instructions with all-zero register numbers are to ensure the previous
encoding didn't just "happen" to fit -- so that if we change the registers
that changes the correct part of the word.
Each of the numbered patterns begin and end with a 1, so we can replace
them with all-zeros and see the entire range has changed. */
// SVE
fmmla z17.d, z21.d, z27.d
fmmla z0.d, z0.d, z0.d
ld1rob { z17.b }, p5/z, [sp, x27]
ld1rob { z0.b }, p0/z, [sp, x0]
ld1roh { z17.h }, p5/z, [sp, x27]
ld1roh { z0.h }, p0/z, [sp, x0]
ld1row { z17.s }, p5/z, [sp, x27]
ld1row { z0.s }, p0/z, [sp, x0]
ld1rod { z17.d }, p5/z, [sp, x27]
ld1rod { z0.d }, p0/z, [sp, x0]
ld1rob { z17.b }, p5/z, [x0, x27]
ld1rob { z0.b }, p0/z, [x0, x0]
ld1roh { z17.h }, p5/z, [x0, x27]
ld1roh { z0.h }, p0/z, [x0, x0]
ld1row { z17.s }, p5/z, [x0, x27]
ld1row { z0.s }, p0/z, [x0, x0]
ld1rod { z17.d }, p5/z, [x0, x27]
ld1rod { z0.d }, p0/z, [x0, x0]
ld1rob { z17.b }, p5/z, [sp, #0]
ld1rob { z0.b }, p0/z, [sp, #224]
ld1rob { z0.b }, p0/z, [sp, #-256]
ld1roh { z17.h }, p5/z, [sp, #0]
ld1roh { z0.h }, p0/z, [sp, #224]
ld1roh { z0.h }, p0/z, [sp, #-256]
ld1row { z17.s }, p5/z, [sp, #0]
ld1row { z0.s }, p0/z, [sp, #224]
ld1row { z0.s }, p0/z, [sp, #-256]
ld1rod { z17.d }, p5/z, [sp, #0]
ld1rod { z0.d }, p0/z, [sp, #224]
ld1rod { z0.d }, p0/z, [sp, #-256]
ld1rob { z17.b }, p5/z, [x0, #0]
ld1rob { z0.b }, p0/z, [x0, #224]
ld1rob { z0.b }, p0/z, [x0, #-256]
ld1roh { z17.h }, p5/z, [x0, #0]
ld1roh { z0.h }, p0/z, [x0, #224]
ld1roh { z0.h }, p0/z, [x0, #-256]
ld1row { z17.s }, p5/z, [x0, #0]
ld1row { z0.s }, p0/z, [x0, #224]
ld1row { z0.s }, p0/z, [x0, #-256]
ld1rod { z17.d }, p5/z, [x0, #0]
ld1rod { z0.d }, p0/z, [x0, #224]
ld1rod { z0.d }, p0/z, [x0, #-256]
zip1 z17.q, z21.q, z5.q
zip1 z0.q, z0.q, z0.q
zip2 z17.q, z21.q, z5.q
zip2 z0.q, z0.q, z0.q
uzip1 z17.q, z21.q, z5.q
uzip1 z0.q, z0.q, z0.q
uzip2 z17.q, z21.q, z5.q
uzip2 z0.q, z0.q, z0.q
trn1 z17.q, z21.q, z5.q
trn1 z0.q, z0.q, z0.q
trn2 z17.q, z21.q, z5.q
trn2 z0.q, z0.q, z0.q

View File

@ -0,0 +1,43 @@
#as: -march=armv8.6-a+sve
#objdump: -dr
.*: file format .*
Disassembly of section \.text:
0000000000000000 <\.text>:
*[0-9a-f]+: 451b9ab1 smmla z17\.s, z21\.b, z27\.b
*[0-9a-f]+: 45009800 smmla z0\.s, z0\.b, z0\.b
*[0-9a-f]+: 45db9ab1 ummla z17\.s, z21\.b, z27\.b
*[0-9a-f]+: 45c09800 ummla z0\.s, z0\.b, z0\.b
*[0-9a-f]+: 459b9ab1 usmmla z17\.s, z21\.b, z27\.b
*[0-9a-f]+: 45809800 usmmla z0\.s, z0\.b, z0\.b
*[0-9a-f]+: 449b7ab1 usdot z17\.s, z21\.b, z27\.b
*[0-9a-f]+: 44807800 usdot z0\.s, z0\.b, z0\.b
*[0-9a-f]+: 44bf1ab1 usdot z17\.s, z21\.b, z7\.b\[3\]
*[0-9a-f]+: 44b81800 usdot z0\.s, z0\.b, z0\.b\[3\]
*[0-9a-f]+: 44a71ab1 usdot z17\.s, z21\.b, z7\.b\[0\]
*[0-9a-f]+: 44a01800 usdot z0\.s, z0\.b, z0\.b\[0\]
*[0-9a-f]+: 44bf1eb1 sudot z17\.s, z21\.b, z7\.b\[3\]
*[0-9a-f]+: 44b81c00 sudot z0\.s, z0\.b, z0\.b\[3\]
*[0-9a-f]+: 44a71eb1 sudot z17\.s, z21\.b, z7\.b\[0\]
*[0-9a-f]+: 44a01c00 sudot z0\.s, z0\.b, z0\.b\[0\]
*[0-9a-f]+: 4e9ba6b1 smmla v17\.4s, v21\.16b, v27\.16b
*[0-9a-f]+: 4e9ba6b1 smmla v17\.4s, v21\.16b, v27\.16b
*[0-9a-f]+: 6e9ba6b1 ummla v17\.4s, v21\.16b, v27\.16b
*[0-9a-f]+: 6e80a400 ummla v0\.4s, v0\.16b, v0\.16b
*[0-9a-f]+: 4e80ac00 usmmla v0\.4s, v0\.16b, v0\.16b
*[0-9a-f]+: 4e9baeb1 usmmla v17\.4s, v21\.16b, v27\.16b
*[0-9a-f]+: 4e9b9eb1 usdot v17\.2s, v21\.8b, v27\.8b
*[0-9a-f]+: 4e809c00 usdot v0\.2s, v0\.8b, v0\.8b
*[0-9a-f]+: 4e9b9eb1 usdot v17\.2s, v21\.8b, v27\.8b
*[0-9a-f]+: 4e809c00 usdot v0\.2s, v0\.8b, v0\.8b
*[0-9a-f]+: 4fbbfab1 usdot v17\.2s, v21\.8b, v27\.4b\[3\]
*[0-9a-f]+: 4fa0f800 usdot v0\.2s, v0\.8b, v0\.4b\[3\]
*[0-9a-f]+: 4f9bf2b1 usdot v17\.2s, v21\.8b, v27\.4b\[0\]
*[0-9a-f]+: 4f80f000 usdot v0\.2s, v0\.8b, v0\.4b\[0\]
*[0-9a-f]+: 4f3bfab1 sudot v17\.2s, v21\.8b, v27\.4b\[3\]
*[0-9a-f]+: 4f20f800 sudot v0\.2s, v0\.8b, v0\.4b\[3\]
*[0-9a-f]+: 4f1bf2b1 sudot v17\.2s, v21\.8b, v27\.4b\[0\]
*[0-9a-f]+: 4f00f000 sudot v0\.2s, v0\.8b, v0\.4b\[0\]

View File

@ -0,0 +1,56 @@
/* The instructions with non-zero register numbers are there to ensure we have
the correct argument positioning (i.e. check that the first argument is at
the end of the word etc).
The instructions with all-zero register numbers are to ensure the previous
encoding didn't just "happen" to fit -- so that if we change the registers
that changes the correct part of the word.
Each of the numbered patterns begin and end with a 1, so we can replace
them with all-zeros and see the entire range has changed. */
// SVE
smmla z17.s, z21.b, z27.b
smmla z0.s, z0.b, z0.b
ummla z17.s, z21.b, z27.b
ummla z0.s, z0.b, z0.b
usmmla z17.s, z21.b, z27.b
usmmla z0.s, z0.b, z0.b
usdot z17.s, z21.b, z27.b
usdot z0.s, z0.b, z0.b
usdot z17.s, z21.b, z7.b[3]
usdot z0.s, z0.b, z0.b[3]
usdot z17.s, z21.b, z7.b[0]
usdot z0.s, z0.b, z0.b[0]
sudot z17.s, z21.b, z7.b[3]
sudot z0.s, z0.b, z0.b[3]
sudot z17.s, z21.b, z7.b[0]
sudot z0.s, z0.b, z0.b[0]
// SIMD
smmla v17.4s, v21.16b, v27.16b
smmla v17.4s, v21.16b, v27.16b
ummla v17.4s, v21.16b, v27.16b
ummla v0.4s, v0.16b, v0.16b
usmmla v0.4s, v0.16b, v0.16b
usmmla v17.4s, v21.16b, v27.16b
usdot v17.2s, v21.8b, v27.8b
usdot v0.2s, v0.8b, v0.8b
usdot v17.4s, v21.16b, v27.16b
usdot v0.4s, v0.16b, v0.16b
usdot v17.2s, v21.8b, v27.4b[3]
usdot v0.2s, v0.8b, v0.4b[3]
usdot v17.2s, v21.8b, v27.4b[0]
usdot v0.2s, v0.8b, v0.4b[0]
sudot v17.4s, v21.16b, v27.4b[3]
sudot v0.4s, v0.16b, v0.4b[3]
sudot v17.4s, v21.16b, v27.4b[0]
sudot v0.4s, v0.16b, v0.4b[0]

View File

@ -0,0 +1,24 @@
#as: -march=armv8.6-a+f32mm+f64mm+sve
#objdump: -dr
.* file format .*
Disassembly of section \.text:
0000000000000000 <\.text>:
*[0-9a-f]+: 0420bc11 movprfx z17, z0
*[0-9a-f]+: 451b9ab1 smmla z17\.s, z21\.b, z27\.b
*[0-9a-f]+: 0420bc11 movprfx z17, z0
*[0-9a-f]+: 45db9ab1 ummla z17\.s, z21\.b, z27\.b
*[0-9a-f]+: 0420bc11 movprfx z17, z0
*[0-9a-f]+: 459b9ab1 usmmla z17\.s, z21\.b, z27\.b
*[0-9a-f]+: 0420bc11 movprfx z17, z0
*[0-9a-f]+: 449b7ab1 usdot z17\.s, z21\.b, z27\.b
*[0-9a-f]+: 0420bc11 movprfx z17, z0
*[0-9a-f]+: 44bf1ab1 usdot z17\.s, z21\.b, z7\.b\[3\]
*[0-9a-f]+: 0420bc11 movprfx z17, z0
*[0-9a-f]+: 44bf1eb1 sudot z17\.s, z21\.b, z7\.b\[3\]
*[0-9a-f]+: 0420bc11 movprfx z17, z0
*[0-9a-f]+: 64bbe6b1 fmmla z17\.s, z21\.s, z27\.s
*[0-9a-f]+: 0420bc11 movprfx z17, z0
*[0-9a-f]+: 64dbe6b1 fmmla z17\.d, z21\.d, z27\.d

View File

@ -0,0 +1,25 @@
/* MOVPRFX tests for matrix multiply instructions */
movprfx z17, z0
smmla z17.s, z21.b, z27.b
movprfx z17, z0
ummla z17.s, z21.b, z27.b
movprfx z17, z0
usmmla z17.s, z21.b, z27.b
movprfx z17, z0
usdot z17.s, z21.b, z27.b
movprfx z17, z0
usdot z17.s, z21.b, z7.b[3]
movprfx z17, z0
sudot z17.s, z21.b, z7.b[3]
movprfx z17, z0
fmmla z17.s, z21.s, z27.s
movprfx z17, z0
fmmla z17.d, z21.d, z27.d

View File

@ -1,3 +1,12 @@
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
* opcode/aarch64.h (AARCH64_FEATURE_I8MM): New.
(AARCH64_FEATURE_F32MM): New.
(AARCH64_FEATURE_F64MM): New.
(AARCH64_OPND_SVE_ADDR_RI_S4x32): New.
(enum aarch64_insn_class): Add new instruction class "aarch64_misc" for
instructions that do not require special handling.
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
2019-11-07 Matthew Malcomson <matthew.malcomson@arm.com>

View File

@ -91,6 +91,11 @@ typedef uint32_t aarch64_insn;
/* Transactional Memory Extension. */
#define AARCH64_FEATURE_TME 0x2000000000000ULL
/* Matrix Multiply instructions */
#define AARCH64_FEATURE_I8MM 0x10000000000000ULL
#define AARCH64_FEATURE_F32MM 0x20000000000000ULL
#define AARCH64_FEATURE_F64MM 0x40000000000000ULL
/* SVE2 instructions. */
#define AARCH64_FEATURE_SVE2 0x000000010
#define AARCH64_FEATURE_SVE2_AES 0x000000080
@ -133,7 +138,8 @@ typedef uint32_t aarch64_insn;
| AARCH64_FEATURE_SSBS)
#define AARCH64_ARCH_V8_6 AARCH64_FEATURE (AARCH64_ARCH_V8_5, \
AARCH64_FEATURE_V8_6 \
| AARCH64_FEATURE_BFLOAT16)
| AARCH64_FEATURE_BFLOAT16 \
| AARCH64_FEATURE_I8MM)
#define AARCH64_ARCH_NONE AARCH64_FEATURE (0, 0)
#define AARCH64_ANY AARCH64_FEATURE (-1, 0) /* Any basic core. */
@ -322,6 +328,7 @@ enum aarch64_opnd
AARCH64_OPND_BTI_TARGET, /* BTI {<target>}. */
AARCH64_OPND_SVE_ADDR_RI_S4x16, /* SVE [<Xn|SP>, #<simm4>*16]. */
AARCH64_OPND_SVE_ADDR_RI_S4x32, /* SVE [<Xn|SP>, #<simm4>*32]. */
AARCH64_OPND_SVE_ADDR_RI_S4xVL, /* SVE [<Xn|SP>, #<simm4>, MUL VL]. */
AARCH64_OPND_SVE_ADDR_RI_S4x2xVL, /* SVE [<Xn|SP>, #<simm4>*2, MUL VL]. */
AARCH64_OPND_SVE_ADDR_RI_S4x3xVL, /* SVE [<Xn|SP>, #<simm4>*3, MUL VL]. */
@ -520,6 +527,7 @@ enum aarch64_opnd_qualifier
enum aarch64_insn_class
{
aarch64_misc,
addsub_carry,
addsub_ext,
addsub_imm,

View File

@ -1,3 +1,31 @@
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
* aarch64-tbl.h (aarch64_feature_i8mm_sve, aarch64_feature_f32mm_sve,
aarch64_feature_f64mm_sve, aarch64_feature_i8mm, aarch64_feature_f32mm,
aarch64_feature_f64mm): New feature sets.
(INT8MATMUL_INSN, F64MATMUL_SVE_INSN, F64MATMUL_INSN,
F32MATMUL_SVE_INSN, F32MATMUL_INSN): New macros to define matrix multiply
instructions.
(I8MM_SVE, F32MM_SVE, F64MM_SVE, I8MM, F32MM, F64MM): New feature set
macros.
(QL_MMLA64, OP_SVE_SBB): New qualifiers.
(OP_SVE_QQQ): New qualifier.
(INT8MATMUL_SVE_INSNC, F64MATMUL_SVE_INSNC,
F32MATMUL_SVE_INSNC): New feature set for bfloat16 instructions to support
the movprfx constraint.
(aarch64_opcode_table): Support for SVE_ADDR_RI_S4x32.
(aarch64_opcode_table): Define new instructions smmla,
ummla, usmmla, usdot, sudot, fmmla, ld1rob, ld1roh, ld1row, ld1rod,
uzip{1/2}, trn{1/2}.
* aarch64-opc.c (operand_general_constraint_met_p): Handle
AARCH64_OPND_SVE_ADDR_RI_S4x32.
(aarch64_print_operand): Handle AARCH64_OPND_SVE_ADDR_RI_S4x32.
* aarch64-dis-2.c (aarch64_opcode_lookup_1, aarch64_find_next_opcode):
Account for new instructions.
* opcodes/aarch64-asm-2.c (aarch64_insert_operand): Support the new
S4x32 operand.
* aarch64-opc-2.c (aarch64_operands): Support the new S4x32 operand.
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
2019-11-07 Matthew Malcomson <matthew.malcomson@arm.com>

View File

@ -628,7 +628,6 @@ aarch64_insert_operand (const aarch64_operand *self,
case 28:
case 29:
case 30:
case 162:
case 163:
case 164:
case 165:
@ -638,7 +637,7 @@ aarch64_insert_operand (const aarch64_operand *self,
case 169:
case 170:
case 171:
case 186:
case 172:
case 187:
case 188:
case 189:
@ -647,8 +646,9 @@ aarch64_insert_operand (const aarch64_operand *self,
case 192:
case 193:
case 194:
case 200:
case 203:
case 195:
case 201:
case 204:
return aarch64_ins_regno (self, info, code, inst, errors);
case 14:
return aarch64_ins_reg_extended (self, info, code, inst, errors);
@ -660,7 +660,7 @@ aarch64_insert_operand (const aarch64_operand *self,
case 32:
case 33:
case 34:
case 206:
case 207:
return aarch64_ins_reglane (self, info, code, inst, errors);
case 35:
return aarch64_ins_reglist (self, info, code, inst, errors);
@ -694,9 +694,8 @@ aarch64_insert_operand (const aarch64_operand *self,
case 80:
case 81:
case 82:
case 159:
case 161:
case 178:
case 160:
case 162:
case 179:
case 180:
case 181:
@ -704,7 +703,8 @@ aarch64_insert_operand (const aarch64_operand *self,
case 183:
case 184:
case 185:
case 205:
case 186:
case 206:
return aarch64_ins_imm (self, info, code, inst, errors);
case 43:
case 44:
@ -714,10 +714,10 @@ aarch64_insert_operand (const aarch64_operand *self,
case 47:
return aarch64_ins_advsimd_imm_modified (self, info, code, inst, errors);
case 51:
case 149:
case 150:
return aarch64_ins_fpimm (self, info, code, inst, errors);
case 68:
case 157:
case 158:
return aarch64_ins_limm (self, info, code, inst, errors);
case 69:
return aarch64_ins_aimm (self, info, code, inst, errors);
@ -727,11 +727,11 @@ aarch64_insert_operand (const aarch64_operand *self,
return aarch64_ins_fbits (self, info, code, inst, errors);
case 73:
case 74:
case 154:
case 155:
return aarch64_ins_imm_rotate2 (self, info, code, inst, errors);
case 75:
case 153:
case 155:
case 154:
case 156:
return aarch64_ins_imm_rotate1 (self, info, code, inst, errors);
case 76:
case 77:
@ -774,22 +774,22 @@ aarch64_insert_operand (const aarch64_operand *self,
case 106:
return aarch64_ins_hint (self, info, code, inst, errors);
case 107:
return aarch64_ins_sve_addr_ri_s4 (self, info, code, inst, errors);
case 108:
return aarch64_ins_sve_addr_ri_s4 (self, info, code, inst, errors);
case 109:
case 110:
case 111:
return aarch64_ins_sve_addr_ri_s4xvl (self, info, code, inst, errors);
case 112:
return aarch64_ins_sve_addr_ri_s6xvl (self, info, code, inst, errors);
return aarch64_ins_sve_addr_ri_s4xvl (self, info, code, inst, errors);
case 113:
return aarch64_ins_sve_addr_ri_s9xvl (self, info, code, inst, errors);
return aarch64_ins_sve_addr_ri_s6xvl (self, info, code, inst, errors);
case 114:
return aarch64_ins_sve_addr_ri_s9xvl (self, info, code, inst, errors);
case 115:
case 116:
case 117:
return aarch64_ins_sve_addr_ri_u6 (self, info, code, inst, errors);
case 118:
return aarch64_ins_sve_addr_ri_u6 (self, info, code, inst, errors);
case 119:
case 120:
case 121:
@ -803,8 +803,8 @@ aarch64_insert_operand (const aarch64_operand *self,
case 129:
case 130:
case 131:
return aarch64_ins_sve_addr_rr_lsl (self, info, code, inst, errors);
case 132:
return aarch64_ins_sve_addr_rr_lsl (self, info, code, inst, errors);
case 133:
case 134:
case 135:
@ -812,52 +812,53 @@ aarch64_insert_operand (const aarch64_operand *self,
case 137:
case 138:
case 139:
return aarch64_ins_sve_addr_rz_xtw (self, info, code, inst, errors);
case 140:
return aarch64_ins_sve_addr_rz_xtw (self, info, code, inst, errors);
case 141:
case 142:
case 143:
return aarch64_ins_sve_addr_zi_u5 (self, info, code, inst, errors);
case 144:
return aarch64_ins_sve_addr_zz_lsl (self, info, code, inst, errors);
return aarch64_ins_sve_addr_zi_u5 (self, info, code, inst, errors);
case 145:
return aarch64_ins_sve_addr_zz_sxtw (self, info, code, inst, errors);
return aarch64_ins_sve_addr_zz_lsl (self, info, code, inst, errors);
case 146:
return aarch64_ins_sve_addr_zz_uxtw (self, info, code, inst, errors);
return aarch64_ins_sve_addr_zz_sxtw (self, info, code, inst, errors);
case 147:
return aarch64_ins_sve_aimm (self, info, code, inst, errors);
return aarch64_ins_sve_addr_zz_uxtw (self, info, code, inst, errors);
case 148:
return aarch64_ins_sve_aimm (self, info, code, inst, errors);
case 149:
return aarch64_ins_sve_asimm (self, info, code, inst, errors);
case 150:
return aarch64_ins_sve_float_half_one (self, info, code, inst, errors);
case 151:
return aarch64_ins_sve_float_half_two (self, info, code, inst, errors);
return aarch64_ins_sve_float_half_one (self, info, code, inst, errors);
case 152:
return aarch64_ins_sve_float_half_two (self, info, code, inst, errors);
case 153:
return aarch64_ins_sve_float_zero_one (self, info, code, inst, errors);
case 156:
case 157:
return aarch64_ins_inv_limm (self, info, code, inst, errors);
case 158:
case 159:
return aarch64_ins_sve_limm_mov (self, info, code, inst, errors);
case 160:
case 161:
return aarch64_ins_sve_scale (self, info, code, inst, errors);
case 172:
case 173:
case 174:
return aarch64_ins_sve_shlimm (self, info, code, inst, errors);
case 175:
return aarch64_ins_sve_shlimm (self, info, code, inst, errors);
case 176:
case 177:
case 178:
return aarch64_ins_sve_shrimm (self, info, code, inst, errors);
case 195:
case 196:
case 197:
case 198:
case 199:
case 200:
return aarch64_ins_sve_quad_index (self, info, code, inst, errors);
case 201:
return aarch64_ins_sve_index (self, info, code, inst, errors);
case 202:
case 204:
return aarch64_ins_sve_index (self, info, code, inst, errors);
case 203:
case 205:
return aarch64_ins_sve_reglist (self, info, code, inst, errors);
default: assert (0); abort ();
}

File diff suppressed because it is too large Load Diff

View File

@ -132,6 +132,7 @@ const struct aarch64_operand aarch64_operands[] =
{AARCH64_OPND_CLASS_SYSTEM, "BARRIER_PSB", OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {}, "the PSB option name CSYNC"},
{AARCH64_OPND_CLASS_SYSTEM, "BTI", OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {}, "BTI targets j/c/jc"},
{AARCH64_OPND_CLASS_ADDRESS, "SVE_ADDR_RI_S4x16", 4 << OPD_F_OD_LSB | OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {FLD_Rn}, "an address with a 4-bit signed offset, multiplied by 16"},
{AARCH64_OPND_CLASS_ADDRESS, "SVE_ADDR_RI_S4x32", 5 << OPD_F_OD_LSB | OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {FLD_Rn}, "an address with a 4-bit signed offset, multiplied by 32"},
{AARCH64_OPND_CLASS_ADDRESS, "SVE_ADDR_RI_S4xVL", OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {FLD_Rn}, "an address with a 4-bit signed offset, multiplied by VL"},
{AARCH64_OPND_CLASS_ADDRESS, "SVE_ADDR_RI_S4x2xVL", 1 << OPD_F_OD_LSB | OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {FLD_Rn}, "an address with a 4-bit signed offset, multiplied by 2*VL"},
{AARCH64_OPND_CLASS_ADDRESS, "SVE_ADDR_RI_S4x3xVL", 2 << OPD_F_OD_LSB | OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {FLD_Rn}, "an address with a 4-bit signed offset, multiplied by 3*VL"},

View File

@ -1899,6 +1899,7 @@ operand_general_constraint_met_p (const aarch64_opnd_info *opnds, int idx,
break;
case AARCH64_OPND_SVE_ADDR_RI_S4x16:
case AARCH64_OPND_SVE_ADDR_RI_S4x32:
min_value = -8;
max_value = 7;
goto sve_imm_offset;
@ -3644,6 +3645,7 @@ aarch64_print_operand (char *buf, size_t size, bfd_vma pc,
case AARCH64_OPND_ADDR_SIMM13:
case AARCH64_OPND_ADDR_OFFSET:
case AARCH64_OPND_SVE_ADDR_RI_S4x16:
case AARCH64_OPND_SVE_ADDR_RI_S4x32:
case AARCH64_OPND_SVE_ADDR_RI_S4xVL:
case AARCH64_OPND_SVE_ADDR_RI_S4x2xVL:
case AARCH64_OPND_SVE_ADDR_RI_S4x3xVL:

View File

@ -1519,6 +1519,10 @@
{ \
QLF3(S_D,S_D,S_D), \
}
#define OP_SVE_QQQ \
{ \
QLF3(S_Q,S_Q,S_Q), \
}
#define OP_SVE_DDDD \
{ \
QLF4(S_D,S_D,S_D,S_D), \
@ -1663,6 +1667,10 @@
{ \
QLF3(S_S,P_Z,S_S), \
}
#define OP_SVE_SBB \
{ \
QLF3(S_S,S_B,S_B), \
}
#define OP_SVE_SBBU \
{ \
QLF4(S_S,S_B,S_B,NIL), \
@ -2272,6 +2280,12 @@
QLF3(V_4S, V_8H, S_2H),\
}
/* e.g. SMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B */
#define QL_MMLA64 \
{ \
QLF3(V_4S, V_16B, V_16B),\
}
/* e.g. BFMMLA <Vd>.4s, <Vn>.8h, <Vm>.8h */
#define QL_BFMMLA \
{ \
@ -2393,6 +2407,17 @@ static const aarch64_feature_set aarch64_feature_sve2bitperm =
AARCH64_FEATURE (AARCH64_FEATURE_SVE2 | AARCH64_FEATURE_SVE2_BITPERM, 0);
static const aarch64_feature_set aarch64_feature_v8_6 =
AARCH64_FEATURE (AARCH64_FEATURE_V8_6, 0);
static const aarch64_feature_set aarch64_feature_i8mm =
AARCH64_FEATURE (AARCH64_FEATURE_V8_2 | AARCH64_FEATURE_I8MM, 0);
static const aarch64_feature_set aarch64_feature_i8mm_sve =
AARCH64_FEATURE (AARCH64_FEATURE_V8_2 | AARCH64_FEATURE_I8MM
| AARCH64_FEATURE_SVE, 0);
static const aarch64_feature_set aarch64_feature_f32mm_sve =
AARCH64_FEATURE (AARCH64_FEATURE_V8_2 | AARCH64_FEATURE_F32MM
| AARCH64_FEATURE_SVE, 0);
static const aarch64_feature_set aarch64_feature_f64mm_sve =
AARCH64_FEATURE (AARCH64_FEATURE_V8_2 | AARCH64_FEATURE_F64MM
| AARCH64_FEATURE_SVE, 0);
#define CORE &aarch64_feature_v8
@ -2435,8 +2460,13 @@ static const aarch64_feature_set aarch64_feature_v8_6 =
#define SVE2_SM4 &aarch64_feature_sve2sm4
#define SVE2_BITPERM &aarch64_feature_sve2bitperm
#define ARMV8_6 &aarch64_feature_v8_6
#define ARMV8_6_SVE &aarch64_feature_v8_6
#define BFLOAT16_SVE &aarch64_feature_bfloat16_sve
#define BFLOAT16 &aarch64_feature_bfloat16
#define I8MM_SVE &aarch64_feature_i8mm_sve
#define F32MM_SVE &aarch64_feature_f32mm_sve
#define F64MM_SVE &aarch64_feature_f64mm_sve
#define I8MM &aarch64_feature_i8mm
#define CORE_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS) \
{ NAME, OPCODE, MASK, CLASS, OP, CORE, OPS, QUALS, FLAGS, 0, 0, NULL }
@ -2534,6 +2564,16 @@ static const aarch64_feature_set aarch64_feature_v8_6 =
CONSTRAINTS, TIED, NULL }
#define BFLOAT16_INSN(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS) \
{ NAME, OPCODE, MASK, CLASS, 0, BFLOAT16, OPS, QUALS, FLAGS, 0, 0, NULL }
#define INT8MATMUL_SVE_INSNC(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS, CONSTRAINTS, TIED) \
{ NAME, OPCODE, MASK, CLASS, 0, I8MM_SVE, OPS, QUALS, FLAGS, CONSTRAINTS, TIED, NULL }
#define INT8MATMUL_INSN(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS) \
{ NAME, OPCODE, MASK, CLASS, 0, I8MM, OPS, QUALS, FLAGS, 0, 0, NULL }
#define F64MATMUL_SVE_INSN(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS,TIED) \
{ NAME, OPCODE, MASK, CLASS, 0, F64MM_SVE, OPS, QUALS, FLAGS, 0, TIED, NULL }
#define F64MATMUL_SVE_INSNC(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS, CONSTRAINTS, TIED) \
{ NAME, OPCODE, MASK, CLASS, 0, F64MM_SVE, OPS, QUALS, FLAGS, CONSTRAINTS, TIED, NULL }
#define F32MATMUL_SVE_INSNC(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS, CONSTRAINTS, TIED) \
{ NAME, OPCODE, MASK, CLASS, 0, F32MM_SVE, OPS, QUALS, FLAGS, CONSTRAINTS, TIED, NULL }
struct aarch64_opcode aarch64_opcode_table[] =
{
@ -5032,6 +5072,37 @@ struct aarch64_opcode aarch64_opcode_table[] =
V8_4_INSN ("stlur", 0xd9000000, 0xffe00c00, ldst_unscaled, OP2 (Rt, ADDR_OFFSET), QL_STLX, 0),
V8_4_INSN ("ldapur", 0xd9400000, 0xffe00c00, ldst_unscaled, OP2 (Rt, ADDR_OFFSET), QL_STLX, 0),
/* Matrix Multiply instructions. */
INT8MATMUL_SVE_INSNC ("smmla", 0x45009800, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_SBB, 0, C_SCAN_MOVPRFX, 0),
INT8MATMUL_SVE_INSNC ("ummla", 0x45c09800, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_SBB, 0, C_SCAN_MOVPRFX, 0),
INT8MATMUL_SVE_INSNC ("usmmla", 0x45809800, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_SBB, 0, C_SCAN_MOVPRFX, 0),
INT8MATMUL_SVE_INSNC ("usdot", 0x44807800, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_SBB, 0, C_SCAN_MOVPRFX, 0),
INT8MATMUL_SVE_INSNC ("usdot", 0x44a01800, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_INDEX), OP_SVE_SBB, 0, C_SCAN_MOVPRFX, 0),
INT8MATMUL_SVE_INSNC ("sudot", 0x44a01c00, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_INDEX), OP_SVE_SBB, 0, C_SCAN_MOVPRFX, 0),
F32MATMUL_SVE_INSNC ("fmmla", 0x64a0e400, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_S, 0, C_SCAN_MOVPRFX, 0),
F64MATMUL_SVE_INSNC ("fmmla", 0x64c0e400, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_D, 0, C_SCAN_MOVPRFX, 0),
F64MATMUL_SVE_INSN ("ld1rob", 0xa4200000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX), OP_SVE_BZU, F_OD(1), 0),
F64MATMUL_SVE_INSN ("ld1roh", 0xa4a00000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX), OP_SVE_HZU, F_OD(1), 0),
F64MATMUL_SVE_INSN ("ld1row", 0xa5200000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX), OP_SVE_SZU, F_OD(1), 0),
F64MATMUL_SVE_INSN ("ld1rod", 0xa5a00000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX), OP_SVE_DZU, F_OD(1), 0),
F64MATMUL_SVE_INSN ("ld1rob", 0xa4202000, 0xfff0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_BZU, F_OD(1), 0),
F64MATMUL_SVE_INSN ("ld1roh", 0xa4a02000, 0xfff0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_HZU, F_OD(1), 0),
F64MATMUL_SVE_INSN ("ld1row", 0xa5202000, 0xfff0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_SZU, F_OD(1), 0),
F64MATMUL_SVE_INSN ("ld1rod", 0xa5a02000, 0xfff0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_DZU, F_OD(1), 0),
F64MATMUL_SVE_INSN ("zip1", 0x05a00000, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
F64MATMUL_SVE_INSN ("zip2", 0x05a00400, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
F64MATMUL_SVE_INSN ("uzip1", 0x05a00800, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
F64MATMUL_SVE_INSN ("uzip2", 0x05a00c00, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
F64MATMUL_SVE_INSN ("trn1", 0x05a01800, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
F64MATMUL_SVE_INSN ("trn2", 0x05a01c00, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
/* Matrix Multiply advanced SIMD instructions. */
INT8MATMUL_INSN ("smmla", 0x4e80a400, 0xffe0fc00, aarch64_misc, OP3 (Vd, Vn, Vm), QL_MMLA64, 0),
INT8MATMUL_INSN ("ummla", 0x6e80a400, 0xffe0fc00, aarch64_misc, OP3 (Vd, Vn, Vm), QL_MMLA64, 0),
INT8MATMUL_INSN ("usmmla", 0x4e80ac00, 0xffe0fc00, aarch64_misc, OP3 (Vd, Vn, Vm), QL_MMLA64, 0),
INT8MATMUL_INSN ("usdot", 0x4e809c00, 0xffe0fc00, aarch64_misc, OP3 (Vd, Vn, Vm), QL_V3DOT, F_SIZEQ),
INT8MATMUL_INSN ("usdot", 0x4f80f000, 0xffc0f400, dotproduct, OP3 (Vd, Vn, Em), QL_V2DOT, F_SIZEQ),
INT8MATMUL_INSN ("sudot", 0x4f00f000, 0xffc0f400, dotproduct, OP3 (Vd, Vn, Em), QL_V2DOT, F_SIZEQ),
/* BFloat instructions. */
BFLOAT16_SVE_INSNC ("bfdot", 0x64608000, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_SHH, 0, C_SCAN_MOVPRFX, 0),
BFLOAT16_SVE_INSNC ("bfdot", 0x64604000, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_INDEX), OP_SVE_SHH, 0, C_SCAN_MOVPRFX, 0),
@ -5263,6 +5334,9 @@ struct aarch64_opcode aarch64_opcode_table[] =
Y(ADDRESS, sve_addr_ri_s4, "SVE_ADDR_RI_S4x16", \
4 << OPD_F_OD_LSB, F(FLD_Rn), \
"an address with a 4-bit signed offset, multiplied by 16") \
Y(ADDRESS, sve_addr_ri_s4, "SVE_ADDR_RI_S4x32", \
5 << OPD_F_OD_LSB, F(FLD_Rn), \
"an address with a 4-bit signed offset, multiplied by 32") \
Y(ADDRESS, sve_addr_ri_s4xvl, "SVE_ADDR_RI_S4xVL", \
0 << OPD_F_OD_LSB, F(FLD_Rn), \
"an address with a 4-bit signed offset, multiplied by VL") \