mirror of
https://github.com/espressif/binutils-gdb.git
synced 2025-06-20 18:08:24 +08:00
[binutils][aarch64] Matrix Multiply extension enablement [8/X]
Hi, This patch is part of a series that adds support for Armv8.6-A (Matrix Multiply and BFloat16 extensions) to binutils. This patch introduces the Matrix Multiply (Int8, F32, F64) extensions to the aarch64 backend. The following instructions are added: {s/u}mmla, usmmla, {us/su}dot, fmmla, ld1rob, ld1roh, d1row, ld1rod, uzip{1/2}, trn{1/2}. Committed on behalf of Mihail Ionescu. gas/ChangeLog: 2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com> * config/tc-aarch64.c: Add new arch fetures to suppport the mm extension. (parse_operands): Add new operand. * testsuite/gas/aarch64/i8mm.s: New test. * testsuite/gas/aarch64/i8mm.d: New test. * testsuite/gas/aarch64/f32mm.s: New test. * testsuite/gas/aarch64/f32mm.d: New test. * testsuite/gas/aarch64/f64mm.s: New test. * testsuite/gas/aarch64/f64mm.d: New test. * testsuite/gas/aarch64/sve-movprfx-mm.s: New test. * testsuite/gas/aarch64/sve-movprfx-mm.d: New test. include/ChangeLog: 2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com> * opcode/aarch64.h (AARCH64_FEATURE_I8MM): New. (AARCH64_FEATURE_F32MM): New. (AARCH64_FEATURE_F64MM): New. (AARCH64_OPND_SVE_ADDR_RI_S4x32): New. (enum aarch64_insn_class): Add new instruction class "aarch64_misc" for instructions that do not require special handling. opcodes/ChangeLog: 2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com> * aarch64-tbl.h (aarch64_feature_i8mm_sve, aarch64_feature_f32mm_sve, aarch64_feature_f64mm_sve, aarch64_feature_i8mm, aarch64_feature_f32mm, aarch64_feature_f64mm): New feature sets. (INT8MATMUL_INSN, F64MATMUL_SVE_INSN, F64MATMUL_INSN, F32MATMUL_SVE_INSN, F32MATMUL_INSN): New macros to define matrix multiply instructions. (I8MM_SVE, F32MM_SVE, F64MM_SVE, I8MM, F32MM, F64MM): New feature set macros. (QL_MMLA64, OP_SVE_SBB): New qualifiers. (OP_SVE_QQQ): New qualifier. (INT8MATMUL_SVE_INSNC, F64MATMUL_SVE_INSNC, F32MATMUL_SVE_INSNC): New feature set for bfloat16 instructions to support the movprfx constraint. (aarch64_opcode_table): Support for SVE_ADDR_RI_S4x32. (aarch64_opcode_table): Define new instructions smmla, ummla, usmmla, usdot, sudot, fmmla, ld1rob, ld1roh, ld1row, ld1rod uzip{1/2}, trn{1/2}. * aarch64-opc.c (operand_general_constraint_met_p): Handle AARCH64_OPND_SVE_ADDR_RI_S4x32. (aarch64_print_operand): Handle AARCH64_OPND_SVE_ADDR_RI_S4x32. * aarch64-dis-2.c (aarch64_opcode_lookup_1, aarch64_find_next_opcode): Account for new instructions. * opcodes/aarch64-asm-2.c (aarch64_insert_operand): Support the new S4x32 operand. * aarch64-opc-2.c (aarch64_operands): Support the new S4x32 operand. Regression tested on arm-none-eabi. Is it ok for trunk? Regards, Mihail
This commit is contained in:
@ -1,3 +1,16 @@
|
||||
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
|
||||
|
||||
* config/tc-aarch64.c: Add new arch fetures to suppport the mm extension.
|
||||
(parse_operands): Add new operand.
|
||||
* testsuite/gas/aarch64/i8mm.s: New test.
|
||||
* testsuite/gas/aarch64/i8mm.d: New test.
|
||||
* testsuite/gas/aarch64/f32mm.s: New test.
|
||||
* testsuite/gas/aarch64/f32mm.d: New test.
|
||||
* testsuite/gas/aarch64/f64mm.s: New test.
|
||||
* testsuite/gas/aarch64/f64mm.d: New test.
|
||||
* testsuite/gas/aarch64/sve-movprfx-mm.s: New test.
|
||||
* testsuite/gas/aarch64/sve-movprfx-mm.d: New test.
|
||||
|
||||
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
|
||||
2019-11-07 Barnaby Wilks <barnaby.wilks@arm.com>
|
||||
|
||||
|
@ -6433,6 +6433,7 @@ parse_operands (char *str, const aarch64_opcode *opcode)
|
||||
break;
|
||||
|
||||
case AARCH64_OPND_SVE_ADDR_RI_S4x16:
|
||||
case AARCH64_OPND_SVE_ADDR_RI_S4x32:
|
||||
case AARCH64_OPND_SVE_ADDR_RI_S4xVL:
|
||||
case AARCH64_OPND_SVE_ADDR_RI_S4x2xVL:
|
||||
case AARCH64_OPND_SVE_ADDR_RI_S4x3xVL:
|
||||
@ -9058,6 +9059,12 @@ static const struct aarch64_option_cpu_value_table aarch64_features[] = {
|
||||
AARCH64_FEATURE (AARCH64_FEATURE_SVE2, 0)},
|
||||
{"bf16", AARCH64_FEATURE (AARCH64_FEATURE_BFLOAT16, 0),
|
||||
AARCH64_ARCH_NONE},
|
||||
{"i8mm", AARCH64_FEATURE (AARCH64_FEATURE_I8MM, 0),
|
||||
AARCH64_ARCH_NONE},
|
||||
{"f32mm", AARCH64_FEATURE (AARCH64_FEATURE_F32MM, 0),
|
||||
AARCH64_ARCH_NONE},
|
||||
{"f64mm", AARCH64_FEATURE (AARCH64_FEATURE_F64MM, 0),
|
||||
AARCH64_ARCH_NONE},
|
||||
{NULL, AARCH64_ARCH_NONE, AARCH64_ARCH_NONE},
|
||||
};
|
||||
|
||||
|
@ -144,6 +144,12 @@ automatically cause those extensions to be disabled.
|
||||
@multitable @columnfractions .12 .17 .17 .54
|
||||
@headitem Extension @tab Minimum Architecture @tab Enabled by default
|
||||
@tab Description
|
||||
@item @code{i8mm} @tab ARMv8.2-A @tab ARMv8.6-A or later
|
||||
@tab Enable Int8 Matrix Multiply extension.
|
||||
@item @code{f32mm} @tab ARMv8.2-A @tab No
|
||||
@tab Enable F32 Matrix Multiply extension.
|
||||
@item @code{f64mm} @tab ARMv8.2-A @tab No
|
||||
@tab Enable F64 Matrix Multiply extension.
|
||||
@item @code{bf16} @tab ARMv8.2-A @tab ARMv8.6-A or later
|
||||
@tab Enable BFloat16 extension.
|
||||
@item @code{compnum} @tab ARMv8.2-A @tab ARMv8.3-A or later
|
||||
|
11
gas/testsuite/gas/aarch64/f32mm.d
Normal file
11
gas/testsuite/gas/aarch64/f32mm.d
Normal file
@ -0,0 +1,11 @@
|
||||
#as: -march=armv8.6-a+sve+f32mm
|
||||
#objdump: -dr
|
||||
|
||||
.*: file format .*
|
||||
|
||||
|
||||
Disassembly of section \.text:
|
||||
|
||||
0000000000000000 <\.text>:
|
||||
*[0-9a-f]+: 64bbe6b1 fmmla z17\.s, z21\.s, z27\.s
|
||||
*[0-9a-f]+: 64a0e400 fmmla z0\.s, z0\.s, z0\.s
|
12
gas/testsuite/gas/aarch64/f32mm.s
Normal file
12
gas/testsuite/gas/aarch64/f32mm.s
Normal file
@ -0,0 +1,12 @@
|
||||
/* The instructions with non-zero register numbers are there to ensure we have
|
||||
the correct argument positioning (i.e. check that the first argument is at
|
||||
the end of the word etc).
|
||||
The instructions with all-zero register numbers are to ensure the previous
|
||||
encoding didn't just "happen" to fit -- so that if we change the registers
|
||||
that changes the correct part of the word.
|
||||
Each of the numbered patterns begin and end with a 1, so we can replace
|
||||
them with all-zeros and see the entire range has changed. */
|
||||
|
||||
// SVE
|
||||
fmmla z17.s, z21.s, z27.s
|
||||
fmmla z0.s, z0.s, z0.s
|
62
gas/testsuite/gas/aarch64/f64mm.d
Normal file
62
gas/testsuite/gas/aarch64/f64mm.d
Normal file
@ -0,0 +1,62 @@
|
||||
#as: -march=armv8.6-a+sve+f64mm
|
||||
#objdump: -dr
|
||||
|
||||
.*: file format .*
|
||||
|
||||
Disassembly of section \.text:
|
||||
|
||||
0000000000000000 <\.text>:
|
||||
*[0-9a-f]+: 64dbe6b1 fmmla z17\.d, z21\.d, z27\.d
|
||||
*[0-9a-f]+: 64c0e400 fmmla z0\.d, z0\.d, z0\.d
|
||||
*[0-9a-f]+: a43b17f1 ld1rob {z17\.b}, p5/z, \[sp, x27\]
|
||||
*[0-9a-f]+: a42003e0 ld1rob {z0\.b}, p0/z, \[sp, x0\]
|
||||
*[0-9a-f]+: a4bb17f1 ld1roh {z17\.h}, p5/z, \[sp, x27\]
|
||||
*[0-9a-f]+: a4a003e0 ld1roh {z0\.h}, p0/z, \[sp, x0\]
|
||||
*[0-9a-f]+: a53b17f1 ld1row {z17\.s}, p5/z, \[sp, x27\]
|
||||
*[0-9a-f]+: a52003e0 ld1row {z0\.s}, p0/z, \[sp, x0\]
|
||||
*[0-9a-f]+: a5bb17f1 ld1rod {z17\.d}, p5/z, \[sp, x27\]
|
||||
*[0-9a-f]+: a5a003e0 ld1rod {z0\.d}, p0/z, \[sp, x0\]
|
||||
*[0-9a-f]+: a43b1411 ld1rob {z17\.b}, p5/z, \[x0, x27\]
|
||||
*[0-9a-f]+: a4200000 ld1rob {z0\.b}, p0/z, \[x0, x0\]
|
||||
*[0-9a-f]+: a4bb1411 ld1roh {z17\.h}, p5/z, \[x0, x27\]
|
||||
*[0-9a-f]+: a4a00000 ld1roh {z0\.h}, p0/z, \[x0, x0\]
|
||||
*[0-9a-f]+: a53b1411 ld1row {z17\.s}, p5/z, \[x0, x27\]
|
||||
*[0-9a-f]+: a5200000 ld1row {z0\.s}, p0/z, \[x0, x0\]
|
||||
*[0-9a-f]+: a5bb1411 ld1rod {z17\.d}, p5/z, \[x0, x27\]
|
||||
*[0-9a-f]+: a5a00000 ld1rod {z0\.d}, p0/z, \[x0, x0\]
|
||||
*[0-9a-f]+: a42037f1 ld1rob {z17\.b}, p5/z, \[sp\]
|
||||
*[0-9a-f]+: a42723e0 ld1rob {z0\.b}, p0/z, \[sp, #224\]
|
||||
*[0-9a-f]+: a42823e0 ld1rob {z0\.b}, p0/z, \[sp, #-256\]
|
||||
*[0-9a-f]+: a4a037f1 ld1roh {z17\.h}, p5/z, \[sp\]
|
||||
*[0-9a-f]+: a4a723e0 ld1roh {z0\.h}, p0/z, \[sp, #224\]
|
||||
*[0-9a-f]+: a4a823e0 ld1roh {z0\.h}, p0/z, \[sp, #-256\]
|
||||
*[0-9a-f]+: a52037f1 ld1row {z17\.s}, p5/z, \[sp\]
|
||||
*[0-9a-f]+: a52723e0 ld1row {z0\.s}, p0/z, \[sp, #224\]
|
||||
*[0-9a-f]+: a52823e0 ld1row {z0\.s}, p0/z, \[sp, #-256\]
|
||||
*[0-9a-f]+: a5a037f1 ld1rod {z17\.d}, p5/z, \[sp\]
|
||||
*[0-9a-f]+: a5a723e0 ld1rod {z0\.d}, p0/z, \[sp, #224\]
|
||||
*[0-9a-f]+: a5a823e0 ld1rod {z0\.d}, p0/z, \[sp, #-256\]
|
||||
*[0-9a-f]+: a4203411 ld1rob {z17\.b}, p5/z, \[x0\]
|
||||
*[0-9a-f]+: a4272000 ld1rob {z0\.b}, p0/z, \[x0, #224\]
|
||||
*[0-9a-f]+: a4282000 ld1rob {z0\.b}, p0/z, \[x0, #-256\]
|
||||
*[0-9a-f]+: a4a03411 ld1roh {z17\.h}, p5/z, \[x0\]
|
||||
*[0-9a-f]+: a4a72000 ld1roh {z0\.h}, p0/z, \[x0, #224\]
|
||||
*[0-9a-f]+: a4a82000 ld1roh {z0\.h}, p0/z, \[x0, #-256\]
|
||||
*[0-9a-f]+: a5203411 ld1row {z17\.s}, p5/z, \[x0\]
|
||||
*[0-9a-f]+: a5272000 ld1row {z0\.s}, p0/z, \[x0, #224\]
|
||||
*[0-9a-f]+: a5282000 ld1row {z0\.s}, p0/z, \[x0, #-256\]
|
||||
*[0-9a-f]+: a5a03411 ld1rod {z17\.d}, p5/z, \[x0\]
|
||||
*[0-9a-f]+: a5a72000 ld1rod {z0\.d}, p0/z, \[x0, #224\]
|
||||
*[0-9a-f]+: a5a82000 ld1rod {z0\.d}, p0/z, \[x0, #-256\]
|
||||
*[0-9a-f]+: 05a502b1 zip1 z17\.q, z21\.q, z5\.q
|
||||
*[0-9a-f]+: 05a00000 zip1 z0\.q, z0\.q, z0\.q
|
||||
*[0-9a-f]+: 05a506b1 zip2 z17\.q, z21\.q, z5\.q
|
||||
*[0-9a-f]+: 05a00400 zip2 z0\.q, z0\.q, z0\.q
|
||||
*[0-9a-f]+: 05a50ab1 uzip1 z17\.q, z21\.q, z5\.q
|
||||
*[0-9a-f]+: 05a00800 uzip1 z0\.q, z0\.q, z0\.q
|
||||
*[0-9a-f]+: 05a50eb1 uzip2 z17\.q, z21\.q, z5\.q
|
||||
*[0-9a-f]+: 05a00c00 uzip2 z0\.q, z0\.q, z0\.q
|
||||
*[0-9a-f]+: 05a51ab1 trn1 z17\.q, z21\.q, z5\.q
|
||||
*[0-9a-f]+: 05a01800 trn1 z0\.q, z0\.q, z0\.q
|
||||
*[0-9a-f]+: 05a51eb1 trn2 z17\.q, z21\.q, z5\.q
|
||||
*[0-9a-f]+: 05a01c00 trn2 z0\.q, z0\.q, z0\.q
|
71
gas/testsuite/gas/aarch64/f64mm.s
Normal file
71
gas/testsuite/gas/aarch64/f64mm.s
Normal file
@ -0,0 +1,71 @@
|
||||
/* The instructions with non-zero register numbers are there to ensure we have
|
||||
the correct argument positioning (i.e. check that the first argument is at
|
||||
the end of the word etc).
|
||||
The instructions with all-zero register numbers are to ensure the previous
|
||||
encoding didn't just "happen" to fit -- so that if we change the registers
|
||||
that changes the correct part of the word.
|
||||
Each of the numbered patterns begin and end with a 1, so we can replace
|
||||
them with all-zeros and see the entire range has changed. */
|
||||
|
||||
// SVE
|
||||
fmmla z17.d, z21.d, z27.d
|
||||
fmmla z0.d, z0.d, z0.d
|
||||
|
||||
ld1rob { z17.b }, p5/z, [sp, x27]
|
||||
ld1rob { z0.b }, p0/z, [sp, x0]
|
||||
ld1roh { z17.h }, p5/z, [sp, x27]
|
||||
ld1roh { z0.h }, p0/z, [sp, x0]
|
||||
ld1row { z17.s }, p5/z, [sp, x27]
|
||||
ld1row { z0.s }, p0/z, [sp, x0]
|
||||
ld1rod { z17.d }, p5/z, [sp, x27]
|
||||
ld1rod { z0.d }, p0/z, [sp, x0]
|
||||
|
||||
ld1rob { z17.b }, p5/z, [x0, x27]
|
||||
ld1rob { z0.b }, p0/z, [x0, x0]
|
||||
ld1roh { z17.h }, p5/z, [x0, x27]
|
||||
ld1roh { z0.h }, p0/z, [x0, x0]
|
||||
ld1row { z17.s }, p5/z, [x0, x27]
|
||||
ld1row { z0.s }, p0/z, [x0, x0]
|
||||
ld1rod { z17.d }, p5/z, [x0, x27]
|
||||
ld1rod { z0.d }, p0/z, [x0, x0]
|
||||
|
||||
ld1rob { z17.b }, p5/z, [sp, #0]
|
||||
ld1rob { z0.b }, p0/z, [sp, #224]
|
||||
ld1rob { z0.b }, p0/z, [sp, #-256]
|
||||
ld1roh { z17.h }, p5/z, [sp, #0]
|
||||
ld1roh { z0.h }, p0/z, [sp, #224]
|
||||
ld1roh { z0.h }, p0/z, [sp, #-256]
|
||||
ld1row { z17.s }, p5/z, [sp, #0]
|
||||
ld1row { z0.s }, p0/z, [sp, #224]
|
||||
ld1row { z0.s }, p0/z, [sp, #-256]
|
||||
ld1rod { z17.d }, p5/z, [sp, #0]
|
||||
ld1rod { z0.d }, p0/z, [sp, #224]
|
||||
ld1rod { z0.d }, p0/z, [sp, #-256]
|
||||
|
||||
ld1rob { z17.b }, p5/z, [x0, #0]
|
||||
ld1rob { z0.b }, p0/z, [x0, #224]
|
||||
ld1rob { z0.b }, p0/z, [x0, #-256]
|
||||
ld1roh { z17.h }, p5/z, [x0, #0]
|
||||
ld1roh { z0.h }, p0/z, [x0, #224]
|
||||
ld1roh { z0.h }, p0/z, [x0, #-256]
|
||||
ld1row { z17.s }, p5/z, [x0, #0]
|
||||
ld1row { z0.s }, p0/z, [x0, #224]
|
||||
ld1row { z0.s }, p0/z, [x0, #-256]
|
||||
ld1rod { z17.d }, p5/z, [x0, #0]
|
||||
ld1rod { z0.d }, p0/z, [x0, #224]
|
||||
ld1rod { z0.d }, p0/z, [x0, #-256]
|
||||
|
||||
zip1 z17.q, z21.q, z5.q
|
||||
zip1 z0.q, z0.q, z0.q
|
||||
zip2 z17.q, z21.q, z5.q
|
||||
zip2 z0.q, z0.q, z0.q
|
||||
|
||||
uzip1 z17.q, z21.q, z5.q
|
||||
uzip1 z0.q, z0.q, z0.q
|
||||
uzip2 z17.q, z21.q, z5.q
|
||||
uzip2 z0.q, z0.q, z0.q
|
||||
|
||||
trn1 z17.q, z21.q, z5.q
|
||||
trn1 z0.q, z0.q, z0.q
|
||||
trn2 z17.q, z21.q, z5.q
|
||||
trn2 z0.q, z0.q, z0.q
|
43
gas/testsuite/gas/aarch64/i8mm.d
Normal file
43
gas/testsuite/gas/aarch64/i8mm.d
Normal file
@ -0,0 +1,43 @@
|
||||
#as: -march=armv8.6-a+sve
|
||||
#objdump: -dr
|
||||
|
||||
.*: file format .*
|
||||
|
||||
|
||||
Disassembly of section \.text:
|
||||
|
||||
0000000000000000 <\.text>:
|
||||
*[0-9a-f]+: 451b9ab1 smmla z17\.s, z21\.b, z27\.b
|
||||
*[0-9a-f]+: 45009800 smmla z0\.s, z0\.b, z0\.b
|
||||
*[0-9a-f]+: 45db9ab1 ummla z17\.s, z21\.b, z27\.b
|
||||
*[0-9a-f]+: 45c09800 ummla z0\.s, z0\.b, z0\.b
|
||||
*[0-9a-f]+: 459b9ab1 usmmla z17\.s, z21\.b, z27\.b
|
||||
*[0-9a-f]+: 45809800 usmmla z0\.s, z0\.b, z0\.b
|
||||
*[0-9a-f]+: 449b7ab1 usdot z17\.s, z21\.b, z27\.b
|
||||
*[0-9a-f]+: 44807800 usdot z0\.s, z0\.b, z0\.b
|
||||
*[0-9a-f]+: 44bf1ab1 usdot z17\.s, z21\.b, z7\.b\[3\]
|
||||
*[0-9a-f]+: 44b81800 usdot z0\.s, z0\.b, z0\.b\[3\]
|
||||
*[0-9a-f]+: 44a71ab1 usdot z17\.s, z21\.b, z7\.b\[0\]
|
||||
*[0-9a-f]+: 44a01800 usdot z0\.s, z0\.b, z0\.b\[0\]
|
||||
*[0-9a-f]+: 44bf1eb1 sudot z17\.s, z21\.b, z7\.b\[3\]
|
||||
*[0-9a-f]+: 44b81c00 sudot z0\.s, z0\.b, z0\.b\[3\]
|
||||
*[0-9a-f]+: 44a71eb1 sudot z17\.s, z21\.b, z7\.b\[0\]
|
||||
*[0-9a-f]+: 44a01c00 sudot z0\.s, z0\.b, z0\.b\[0\]
|
||||
*[0-9a-f]+: 4e9ba6b1 smmla v17\.4s, v21\.16b, v27\.16b
|
||||
*[0-9a-f]+: 4e9ba6b1 smmla v17\.4s, v21\.16b, v27\.16b
|
||||
*[0-9a-f]+: 6e9ba6b1 ummla v17\.4s, v21\.16b, v27\.16b
|
||||
*[0-9a-f]+: 6e80a400 ummla v0\.4s, v0\.16b, v0\.16b
|
||||
*[0-9a-f]+: 4e80ac00 usmmla v0\.4s, v0\.16b, v0\.16b
|
||||
*[0-9a-f]+: 4e9baeb1 usmmla v17\.4s, v21\.16b, v27\.16b
|
||||
*[0-9a-f]+: 4e9b9eb1 usdot v17\.2s, v21\.8b, v27\.8b
|
||||
*[0-9a-f]+: 4e809c00 usdot v0\.2s, v0\.8b, v0\.8b
|
||||
*[0-9a-f]+: 4e9b9eb1 usdot v17\.2s, v21\.8b, v27\.8b
|
||||
*[0-9a-f]+: 4e809c00 usdot v0\.2s, v0\.8b, v0\.8b
|
||||
*[0-9a-f]+: 4fbbfab1 usdot v17\.2s, v21\.8b, v27\.4b\[3\]
|
||||
*[0-9a-f]+: 4fa0f800 usdot v0\.2s, v0\.8b, v0\.4b\[3\]
|
||||
*[0-9a-f]+: 4f9bf2b1 usdot v17\.2s, v21\.8b, v27\.4b\[0\]
|
||||
*[0-9a-f]+: 4f80f000 usdot v0\.2s, v0\.8b, v0\.4b\[0\]
|
||||
*[0-9a-f]+: 4f3bfab1 sudot v17\.2s, v21\.8b, v27\.4b\[3\]
|
||||
*[0-9a-f]+: 4f20f800 sudot v0\.2s, v0\.8b, v0\.4b\[3\]
|
||||
*[0-9a-f]+: 4f1bf2b1 sudot v17\.2s, v21\.8b, v27\.4b\[0\]
|
||||
*[0-9a-f]+: 4f00f000 sudot v0\.2s, v0\.8b, v0\.4b\[0\]
|
56
gas/testsuite/gas/aarch64/i8mm.s
Normal file
56
gas/testsuite/gas/aarch64/i8mm.s
Normal file
@ -0,0 +1,56 @@
|
||||
/* The instructions with non-zero register numbers are there to ensure we have
|
||||
the correct argument positioning (i.e. check that the first argument is at
|
||||
the end of the word etc).
|
||||
The instructions with all-zero register numbers are to ensure the previous
|
||||
encoding didn't just "happen" to fit -- so that if we change the registers
|
||||
that changes the correct part of the word.
|
||||
Each of the numbered patterns begin and end with a 1, so we can replace
|
||||
them with all-zeros and see the entire range has changed. */
|
||||
|
||||
// SVE
|
||||
smmla z17.s, z21.b, z27.b
|
||||
smmla z0.s, z0.b, z0.b
|
||||
|
||||
ummla z17.s, z21.b, z27.b
|
||||
ummla z0.s, z0.b, z0.b
|
||||
|
||||
usmmla z17.s, z21.b, z27.b
|
||||
usmmla z0.s, z0.b, z0.b
|
||||
|
||||
usdot z17.s, z21.b, z27.b
|
||||
usdot z0.s, z0.b, z0.b
|
||||
|
||||
usdot z17.s, z21.b, z7.b[3]
|
||||
usdot z0.s, z0.b, z0.b[3]
|
||||
usdot z17.s, z21.b, z7.b[0]
|
||||
usdot z0.s, z0.b, z0.b[0]
|
||||
|
||||
sudot z17.s, z21.b, z7.b[3]
|
||||
sudot z0.s, z0.b, z0.b[3]
|
||||
sudot z17.s, z21.b, z7.b[0]
|
||||
sudot z0.s, z0.b, z0.b[0]
|
||||
|
||||
// SIMD
|
||||
smmla v17.4s, v21.16b, v27.16b
|
||||
smmla v17.4s, v21.16b, v27.16b
|
||||
|
||||
ummla v17.4s, v21.16b, v27.16b
|
||||
ummla v0.4s, v0.16b, v0.16b
|
||||
|
||||
usmmla v0.4s, v0.16b, v0.16b
|
||||
usmmla v17.4s, v21.16b, v27.16b
|
||||
|
||||
usdot v17.2s, v21.8b, v27.8b
|
||||
usdot v0.2s, v0.8b, v0.8b
|
||||
usdot v17.4s, v21.16b, v27.16b
|
||||
usdot v0.4s, v0.16b, v0.16b
|
||||
|
||||
usdot v17.2s, v21.8b, v27.4b[3]
|
||||
usdot v0.2s, v0.8b, v0.4b[3]
|
||||
usdot v17.2s, v21.8b, v27.4b[0]
|
||||
usdot v0.2s, v0.8b, v0.4b[0]
|
||||
|
||||
sudot v17.4s, v21.16b, v27.4b[3]
|
||||
sudot v0.4s, v0.16b, v0.4b[3]
|
||||
sudot v17.4s, v21.16b, v27.4b[0]
|
||||
sudot v0.4s, v0.16b, v0.4b[0]
|
24
gas/testsuite/gas/aarch64/sve-movprfx-mm.d
Normal file
24
gas/testsuite/gas/aarch64/sve-movprfx-mm.d
Normal file
@ -0,0 +1,24 @@
|
||||
#as: -march=armv8.6-a+f32mm+f64mm+sve
|
||||
#objdump: -dr
|
||||
|
||||
.* file format .*
|
||||
|
||||
Disassembly of section \.text:
|
||||
|
||||
0000000000000000 <\.text>:
|
||||
*[0-9a-f]+: 0420bc11 movprfx z17, z0
|
||||
*[0-9a-f]+: 451b9ab1 smmla z17\.s, z21\.b, z27\.b
|
||||
*[0-9a-f]+: 0420bc11 movprfx z17, z0
|
||||
*[0-9a-f]+: 45db9ab1 ummla z17\.s, z21\.b, z27\.b
|
||||
*[0-9a-f]+: 0420bc11 movprfx z17, z0
|
||||
*[0-9a-f]+: 459b9ab1 usmmla z17\.s, z21\.b, z27\.b
|
||||
*[0-9a-f]+: 0420bc11 movprfx z17, z0
|
||||
*[0-9a-f]+: 449b7ab1 usdot z17\.s, z21\.b, z27\.b
|
||||
*[0-9a-f]+: 0420bc11 movprfx z17, z0
|
||||
*[0-9a-f]+: 44bf1ab1 usdot z17\.s, z21\.b, z7\.b\[3\]
|
||||
*[0-9a-f]+: 0420bc11 movprfx z17, z0
|
||||
*[0-9a-f]+: 44bf1eb1 sudot z17\.s, z21\.b, z7\.b\[3\]
|
||||
*[0-9a-f]+: 0420bc11 movprfx z17, z0
|
||||
*[0-9a-f]+: 64bbe6b1 fmmla z17\.s, z21\.s, z27\.s
|
||||
*[0-9a-f]+: 0420bc11 movprfx z17, z0
|
||||
*[0-9a-f]+: 64dbe6b1 fmmla z17\.d, z21\.d, z27\.d
|
25
gas/testsuite/gas/aarch64/sve-movprfx-mm.s
Normal file
25
gas/testsuite/gas/aarch64/sve-movprfx-mm.s
Normal file
@ -0,0 +1,25 @@
|
||||
/* MOVPRFX tests for matrix multiply instructions */
|
||||
|
||||
movprfx z17, z0
|
||||
smmla z17.s, z21.b, z27.b
|
||||
|
||||
movprfx z17, z0
|
||||
ummla z17.s, z21.b, z27.b
|
||||
|
||||
movprfx z17, z0
|
||||
usmmla z17.s, z21.b, z27.b
|
||||
|
||||
movprfx z17, z0
|
||||
usdot z17.s, z21.b, z27.b
|
||||
|
||||
movprfx z17, z0
|
||||
usdot z17.s, z21.b, z7.b[3]
|
||||
|
||||
movprfx z17, z0
|
||||
sudot z17.s, z21.b, z7.b[3]
|
||||
|
||||
movprfx z17, z0
|
||||
fmmla z17.s, z21.s, z27.s
|
||||
|
||||
movprfx z17, z0
|
||||
fmmla z17.d, z21.d, z27.d
|
@ -1,3 +1,12 @@
|
||||
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
|
||||
|
||||
* opcode/aarch64.h (AARCH64_FEATURE_I8MM): New.
|
||||
(AARCH64_FEATURE_F32MM): New.
|
||||
(AARCH64_FEATURE_F64MM): New.
|
||||
(AARCH64_OPND_SVE_ADDR_RI_S4x32): New.
|
||||
(enum aarch64_insn_class): Add new instruction class "aarch64_misc" for
|
||||
instructions that do not require special handling.
|
||||
|
||||
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
|
||||
2019-11-07 Matthew Malcomson <matthew.malcomson@arm.com>
|
||||
|
||||
|
@ -91,6 +91,11 @@ typedef uint32_t aarch64_insn;
|
||||
/* Transactional Memory Extension. */
|
||||
#define AARCH64_FEATURE_TME 0x2000000000000ULL
|
||||
|
||||
/* Matrix Multiply instructions */
|
||||
#define AARCH64_FEATURE_I8MM 0x10000000000000ULL
|
||||
#define AARCH64_FEATURE_F32MM 0x20000000000000ULL
|
||||
#define AARCH64_FEATURE_F64MM 0x40000000000000ULL
|
||||
|
||||
/* SVE2 instructions. */
|
||||
#define AARCH64_FEATURE_SVE2 0x000000010
|
||||
#define AARCH64_FEATURE_SVE2_AES 0x000000080
|
||||
@ -133,7 +138,8 @@ typedef uint32_t aarch64_insn;
|
||||
| AARCH64_FEATURE_SSBS)
|
||||
#define AARCH64_ARCH_V8_6 AARCH64_FEATURE (AARCH64_ARCH_V8_5, \
|
||||
AARCH64_FEATURE_V8_6 \
|
||||
| AARCH64_FEATURE_BFLOAT16)
|
||||
| AARCH64_FEATURE_BFLOAT16 \
|
||||
| AARCH64_FEATURE_I8MM)
|
||||
|
||||
#define AARCH64_ARCH_NONE AARCH64_FEATURE (0, 0)
|
||||
#define AARCH64_ANY AARCH64_FEATURE (-1, 0) /* Any basic core. */
|
||||
@ -322,6 +328,7 @@ enum aarch64_opnd
|
||||
AARCH64_OPND_BTI_TARGET, /* BTI {<target>}. */
|
||||
|
||||
AARCH64_OPND_SVE_ADDR_RI_S4x16, /* SVE [<Xn|SP>, #<simm4>*16]. */
|
||||
AARCH64_OPND_SVE_ADDR_RI_S4x32, /* SVE [<Xn|SP>, #<simm4>*32]. */
|
||||
AARCH64_OPND_SVE_ADDR_RI_S4xVL, /* SVE [<Xn|SP>, #<simm4>, MUL VL]. */
|
||||
AARCH64_OPND_SVE_ADDR_RI_S4x2xVL, /* SVE [<Xn|SP>, #<simm4>*2, MUL VL]. */
|
||||
AARCH64_OPND_SVE_ADDR_RI_S4x3xVL, /* SVE [<Xn|SP>, #<simm4>*3, MUL VL]. */
|
||||
@ -520,6 +527,7 @@ enum aarch64_opnd_qualifier
|
||||
|
||||
enum aarch64_insn_class
|
||||
{
|
||||
aarch64_misc,
|
||||
addsub_carry,
|
||||
addsub_ext,
|
||||
addsub_imm,
|
||||
|
@ -1,3 +1,31 @@
|
||||
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
|
||||
|
||||
* aarch64-tbl.h (aarch64_feature_i8mm_sve, aarch64_feature_f32mm_sve,
|
||||
aarch64_feature_f64mm_sve, aarch64_feature_i8mm, aarch64_feature_f32mm,
|
||||
aarch64_feature_f64mm): New feature sets.
|
||||
(INT8MATMUL_INSN, F64MATMUL_SVE_INSN, F64MATMUL_INSN,
|
||||
F32MATMUL_SVE_INSN, F32MATMUL_INSN): New macros to define matrix multiply
|
||||
instructions.
|
||||
(I8MM_SVE, F32MM_SVE, F64MM_SVE, I8MM, F32MM, F64MM): New feature set
|
||||
macros.
|
||||
(QL_MMLA64, OP_SVE_SBB): New qualifiers.
|
||||
(OP_SVE_QQQ): New qualifier.
|
||||
(INT8MATMUL_SVE_INSNC, F64MATMUL_SVE_INSNC,
|
||||
F32MATMUL_SVE_INSNC): New feature set for bfloat16 instructions to support
|
||||
the movprfx constraint.
|
||||
(aarch64_opcode_table): Support for SVE_ADDR_RI_S4x32.
|
||||
(aarch64_opcode_table): Define new instructions smmla,
|
||||
ummla, usmmla, usdot, sudot, fmmla, ld1rob, ld1roh, ld1row, ld1rod,
|
||||
uzip{1/2}, trn{1/2}.
|
||||
* aarch64-opc.c (operand_general_constraint_met_p): Handle
|
||||
AARCH64_OPND_SVE_ADDR_RI_S4x32.
|
||||
(aarch64_print_operand): Handle AARCH64_OPND_SVE_ADDR_RI_S4x32.
|
||||
* aarch64-dis-2.c (aarch64_opcode_lookup_1, aarch64_find_next_opcode):
|
||||
Account for new instructions.
|
||||
* opcodes/aarch64-asm-2.c (aarch64_insert_operand): Support the new
|
||||
S4x32 operand.
|
||||
* aarch64-opc-2.c (aarch64_operands): Support the new S4x32 operand.
|
||||
|
||||
2019-11-07 Mihail Ionescu <mihail.ionescu@arm.com>
|
||||
2019-11-07 Matthew Malcomson <matthew.malcomson@arm.com>
|
||||
|
||||
|
@ -628,7 +628,6 @@ aarch64_insert_operand (const aarch64_operand *self,
|
||||
case 28:
|
||||
case 29:
|
||||
case 30:
|
||||
case 162:
|
||||
case 163:
|
||||
case 164:
|
||||
case 165:
|
||||
@ -638,7 +637,7 @@ aarch64_insert_operand (const aarch64_operand *self,
|
||||
case 169:
|
||||
case 170:
|
||||
case 171:
|
||||
case 186:
|
||||
case 172:
|
||||
case 187:
|
||||
case 188:
|
||||
case 189:
|
||||
@ -647,8 +646,9 @@ aarch64_insert_operand (const aarch64_operand *self,
|
||||
case 192:
|
||||
case 193:
|
||||
case 194:
|
||||
case 200:
|
||||
case 203:
|
||||
case 195:
|
||||
case 201:
|
||||
case 204:
|
||||
return aarch64_ins_regno (self, info, code, inst, errors);
|
||||
case 14:
|
||||
return aarch64_ins_reg_extended (self, info, code, inst, errors);
|
||||
@ -660,7 +660,7 @@ aarch64_insert_operand (const aarch64_operand *self,
|
||||
case 32:
|
||||
case 33:
|
||||
case 34:
|
||||
case 206:
|
||||
case 207:
|
||||
return aarch64_ins_reglane (self, info, code, inst, errors);
|
||||
case 35:
|
||||
return aarch64_ins_reglist (self, info, code, inst, errors);
|
||||
@ -694,9 +694,8 @@ aarch64_insert_operand (const aarch64_operand *self,
|
||||
case 80:
|
||||
case 81:
|
||||
case 82:
|
||||
case 159:
|
||||
case 161:
|
||||
case 178:
|
||||
case 160:
|
||||
case 162:
|
||||
case 179:
|
||||
case 180:
|
||||
case 181:
|
||||
@ -704,7 +703,8 @@ aarch64_insert_operand (const aarch64_operand *self,
|
||||
case 183:
|
||||
case 184:
|
||||
case 185:
|
||||
case 205:
|
||||
case 186:
|
||||
case 206:
|
||||
return aarch64_ins_imm (self, info, code, inst, errors);
|
||||
case 43:
|
||||
case 44:
|
||||
@ -714,10 +714,10 @@ aarch64_insert_operand (const aarch64_operand *self,
|
||||
case 47:
|
||||
return aarch64_ins_advsimd_imm_modified (self, info, code, inst, errors);
|
||||
case 51:
|
||||
case 149:
|
||||
case 150:
|
||||
return aarch64_ins_fpimm (self, info, code, inst, errors);
|
||||
case 68:
|
||||
case 157:
|
||||
case 158:
|
||||
return aarch64_ins_limm (self, info, code, inst, errors);
|
||||
case 69:
|
||||
return aarch64_ins_aimm (self, info, code, inst, errors);
|
||||
@ -727,11 +727,11 @@ aarch64_insert_operand (const aarch64_operand *self,
|
||||
return aarch64_ins_fbits (self, info, code, inst, errors);
|
||||
case 73:
|
||||
case 74:
|
||||
case 154:
|
||||
case 155:
|
||||
return aarch64_ins_imm_rotate2 (self, info, code, inst, errors);
|
||||
case 75:
|
||||
case 153:
|
||||
case 155:
|
||||
case 154:
|
||||
case 156:
|
||||
return aarch64_ins_imm_rotate1 (self, info, code, inst, errors);
|
||||
case 76:
|
||||
case 77:
|
||||
@ -774,22 +774,22 @@ aarch64_insert_operand (const aarch64_operand *self,
|
||||
case 106:
|
||||
return aarch64_ins_hint (self, info, code, inst, errors);
|
||||
case 107:
|
||||
return aarch64_ins_sve_addr_ri_s4 (self, info, code, inst, errors);
|
||||
case 108:
|
||||
return aarch64_ins_sve_addr_ri_s4 (self, info, code, inst, errors);
|
||||
case 109:
|
||||
case 110:
|
||||
case 111:
|
||||
return aarch64_ins_sve_addr_ri_s4xvl (self, info, code, inst, errors);
|
||||
case 112:
|
||||
return aarch64_ins_sve_addr_ri_s6xvl (self, info, code, inst, errors);
|
||||
return aarch64_ins_sve_addr_ri_s4xvl (self, info, code, inst, errors);
|
||||
case 113:
|
||||
return aarch64_ins_sve_addr_ri_s9xvl (self, info, code, inst, errors);
|
||||
return aarch64_ins_sve_addr_ri_s6xvl (self, info, code, inst, errors);
|
||||
case 114:
|
||||
return aarch64_ins_sve_addr_ri_s9xvl (self, info, code, inst, errors);
|
||||
case 115:
|
||||
case 116:
|
||||
case 117:
|
||||
return aarch64_ins_sve_addr_ri_u6 (self, info, code, inst, errors);
|
||||
case 118:
|
||||
return aarch64_ins_sve_addr_ri_u6 (self, info, code, inst, errors);
|
||||
case 119:
|
||||
case 120:
|
||||
case 121:
|
||||
@ -803,8 +803,8 @@ aarch64_insert_operand (const aarch64_operand *self,
|
||||
case 129:
|
||||
case 130:
|
||||
case 131:
|
||||
return aarch64_ins_sve_addr_rr_lsl (self, info, code, inst, errors);
|
||||
case 132:
|
||||
return aarch64_ins_sve_addr_rr_lsl (self, info, code, inst, errors);
|
||||
case 133:
|
||||
case 134:
|
||||
case 135:
|
||||
@ -812,52 +812,53 @@ aarch64_insert_operand (const aarch64_operand *self,
|
||||
case 137:
|
||||
case 138:
|
||||
case 139:
|
||||
return aarch64_ins_sve_addr_rz_xtw (self, info, code, inst, errors);
|
||||
case 140:
|
||||
return aarch64_ins_sve_addr_rz_xtw (self, info, code, inst, errors);
|
||||
case 141:
|
||||
case 142:
|
||||
case 143:
|
||||
return aarch64_ins_sve_addr_zi_u5 (self, info, code, inst, errors);
|
||||
case 144:
|
||||
return aarch64_ins_sve_addr_zz_lsl (self, info, code, inst, errors);
|
||||
return aarch64_ins_sve_addr_zi_u5 (self, info, code, inst, errors);
|
||||
case 145:
|
||||
return aarch64_ins_sve_addr_zz_sxtw (self, info, code, inst, errors);
|
||||
return aarch64_ins_sve_addr_zz_lsl (self, info, code, inst, errors);
|
||||
case 146:
|
||||
return aarch64_ins_sve_addr_zz_uxtw (self, info, code, inst, errors);
|
||||
return aarch64_ins_sve_addr_zz_sxtw (self, info, code, inst, errors);
|
||||
case 147:
|
||||
return aarch64_ins_sve_aimm (self, info, code, inst, errors);
|
||||
return aarch64_ins_sve_addr_zz_uxtw (self, info, code, inst, errors);
|
||||
case 148:
|
||||
return aarch64_ins_sve_aimm (self, info, code, inst, errors);
|
||||
case 149:
|
||||
return aarch64_ins_sve_asimm (self, info, code, inst, errors);
|
||||
case 150:
|
||||
return aarch64_ins_sve_float_half_one (self, info, code, inst, errors);
|
||||
case 151:
|
||||
return aarch64_ins_sve_float_half_two (self, info, code, inst, errors);
|
||||
return aarch64_ins_sve_float_half_one (self, info, code, inst, errors);
|
||||
case 152:
|
||||
return aarch64_ins_sve_float_half_two (self, info, code, inst, errors);
|
||||
case 153:
|
||||
return aarch64_ins_sve_float_zero_one (self, info, code, inst, errors);
|
||||
case 156:
|
||||
case 157:
|
||||
return aarch64_ins_inv_limm (self, info, code, inst, errors);
|
||||
case 158:
|
||||
case 159:
|
||||
return aarch64_ins_sve_limm_mov (self, info, code, inst, errors);
|
||||
case 160:
|
||||
case 161:
|
||||
return aarch64_ins_sve_scale (self, info, code, inst, errors);
|
||||
case 172:
|
||||
case 173:
|
||||
case 174:
|
||||
return aarch64_ins_sve_shlimm (self, info, code, inst, errors);
|
||||
case 175:
|
||||
return aarch64_ins_sve_shlimm (self, info, code, inst, errors);
|
||||
case 176:
|
||||
case 177:
|
||||
case 178:
|
||||
return aarch64_ins_sve_shrimm (self, info, code, inst, errors);
|
||||
case 195:
|
||||
case 196:
|
||||
case 197:
|
||||
case 198:
|
||||
case 199:
|
||||
case 200:
|
||||
return aarch64_ins_sve_quad_index (self, info, code, inst, errors);
|
||||
case 201:
|
||||
return aarch64_ins_sve_index (self, info, code, inst, errors);
|
||||
case 202:
|
||||
case 204:
|
||||
return aarch64_ins_sve_index (self, info, code, inst, errors);
|
||||
case 203:
|
||||
case 205:
|
||||
return aarch64_ins_sve_reglist (self, info, code, inst, errors);
|
||||
default: assert (0); abort ();
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -132,6 +132,7 @@ const struct aarch64_operand aarch64_operands[] =
|
||||
{AARCH64_OPND_CLASS_SYSTEM, "BARRIER_PSB", OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {}, "the PSB option name CSYNC"},
|
||||
{AARCH64_OPND_CLASS_SYSTEM, "BTI", OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {}, "BTI targets j/c/jc"},
|
||||
{AARCH64_OPND_CLASS_ADDRESS, "SVE_ADDR_RI_S4x16", 4 << OPD_F_OD_LSB | OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {FLD_Rn}, "an address with a 4-bit signed offset, multiplied by 16"},
|
||||
{AARCH64_OPND_CLASS_ADDRESS, "SVE_ADDR_RI_S4x32", 5 << OPD_F_OD_LSB | OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {FLD_Rn}, "an address with a 4-bit signed offset, multiplied by 32"},
|
||||
{AARCH64_OPND_CLASS_ADDRESS, "SVE_ADDR_RI_S4xVL", OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {FLD_Rn}, "an address with a 4-bit signed offset, multiplied by VL"},
|
||||
{AARCH64_OPND_CLASS_ADDRESS, "SVE_ADDR_RI_S4x2xVL", 1 << OPD_F_OD_LSB | OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {FLD_Rn}, "an address with a 4-bit signed offset, multiplied by 2*VL"},
|
||||
{AARCH64_OPND_CLASS_ADDRESS, "SVE_ADDR_RI_S4x3xVL", 2 << OPD_F_OD_LSB | OPD_F_HAS_INSERTER | OPD_F_HAS_EXTRACTOR, {FLD_Rn}, "an address with a 4-bit signed offset, multiplied by 3*VL"},
|
||||
|
@ -1899,6 +1899,7 @@ operand_general_constraint_met_p (const aarch64_opnd_info *opnds, int idx,
|
||||
break;
|
||||
|
||||
case AARCH64_OPND_SVE_ADDR_RI_S4x16:
|
||||
case AARCH64_OPND_SVE_ADDR_RI_S4x32:
|
||||
min_value = -8;
|
||||
max_value = 7;
|
||||
goto sve_imm_offset;
|
||||
@ -3644,6 +3645,7 @@ aarch64_print_operand (char *buf, size_t size, bfd_vma pc,
|
||||
case AARCH64_OPND_ADDR_SIMM13:
|
||||
case AARCH64_OPND_ADDR_OFFSET:
|
||||
case AARCH64_OPND_SVE_ADDR_RI_S4x16:
|
||||
case AARCH64_OPND_SVE_ADDR_RI_S4x32:
|
||||
case AARCH64_OPND_SVE_ADDR_RI_S4xVL:
|
||||
case AARCH64_OPND_SVE_ADDR_RI_S4x2xVL:
|
||||
case AARCH64_OPND_SVE_ADDR_RI_S4x3xVL:
|
||||
|
@ -1519,6 +1519,10 @@
|
||||
{ \
|
||||
QLF3(S_D,S_D,S_D), \
|
||||
}
|
||||
#define OP_SVE_QQQ \
|
||||
{ \
|
||||
QLF3(S_Q,S_Q,S_Q), \
|
||||
}
|
||||
#define OP_SVE_DDDD \
|
||||
{ \
|
||||
QLF4(S_D,S_D,S_D,S_D), \
|
||||
@ -1663,6 +1667,10 @@
|
||||
{ \
|
||||
QLF3(S_S,P_Z,S_S), \
|
||||
}
|
||||
#define OP_SVE_SBB \
|
||||
{ \
|
||||
QLF3(S_S,S_B,S_B), \
|
||||
}
|
||||
#define OP_SVE_SBBU \
|
||||
{ \
|
||||
QLF4(S_S,S_B,S_B,NIL), \
|
||||
@ -2272,6 +2280,12 @@
|
||||
QLF3(V_4S, V_8H, S_2H),\
|
||||
}
|
||||
|
||||
/* e.g. SMMLA <Vd>.4S, <Vn>.16B, <Vm>.16B */
|
||||
#define QL_MMLA64 \
|
||||
{ \
|
||||
QLF3(V_4S, V_16B, V_16B),\
|
||||
}
|
||||
|
||||
/* e.g. BFMMLA <Vd>.4s, <Vn>.8h, <Vm>.8h */
|
||||
#define QL_BFMMLA \
|
||||
{ \
|
||||
@ -2393,6 +2407,17 @@ static const aarch64_feature_set aarch64_feature_sve2bitperm =
|
||||
AARCH64_FEATURE (AARCH64_FEATURE_SVE2 | AARCH64_FEATURE_SVE2_BITPERM, 0);
|
||||
static const aarch64_feature_set aarch64_feature_v8_6 =
|
||||
AARCH64_FEATURE (AARCH64_FEATURE_V8_6, 0);
|
||||
static const aarch64_feature_set aarch64_feature_i8mm =
|
||||
AARCH64_FEATURE (AARCH64_FEATURE_V8_2 | AARCH64_FEATURE_I8MM, 0);
|
||||
static const aarch64_feature_set aarch64_feature_i8mm_sve =
|
||||
AARCH64_FEATURE (AARCH64_FEATURE_V8_2 | AARCH64_FEATURE_I8MM
|
||||
| AARCH64_FEATURE_SVE, 0);
|
||||
static const aarch64_feature_set aarch64_feature_f32mm_sve =
|
||||
AARCH64_FEATURE (AARCH64_FEATURE_V8_2 | AARCH64_FEATURE_F32MM
|
||||
| AARCH64_FEATURE_SVE, 0);
|
||||
static const aarch64_feature_set aarch64_feature_f64mm_sve =
|
||||
AARCH64_FEATURE (AARCH64_FEATURE_V8_2 | AARCH64_FEATURE_F64MM
|
||||
| AARCH64_FEATURE_SVE, 0);
|
||||
|
||||
|
||||
#define CORE &aarch64_feature_v8
|
||||
@ -2435,8 +2460,13 @@ static const aarch64_feature_set aarch64_feature_v8_6 =
|
||||
#define SVE2_SM4 &aarch64_feature_sve2sm4
|
||||
#define SVE2_BITPERM &aarch64_feature_sve2bitperm
|
||||
#define ARMV8_6 &aarch64_feature_v8_6
|
||||
#define ARMV8_6_SVE &aarch64_feature_v8_6
|
||||
#define BFLOAT16_SVE &aarch64_feature_bfloat16_sve
|
||||
#define BFLOAT16 &aarch64_feature_bfloat16
|
||||
#define I8MM_SVE &aarch64_feature_i8mm_sve
|
||||
#define F32MM_SVE &aarch64_feature_f32mm_sve
|
||||
#define F64MM_SVE &aarch64_feature_f64mm_sve
|
||||
#define I8MM &aarch64_feature_i8mm
|
||||
|
||||
#define CORE_INSN(NAME,OPCODE,MASK,CLASS,OP,OPS,QUALS,FLAGS) \
|
||||
{ NAME, OPCODE, MASK, CLASS, OP, CORE, OPS, QUALS, FLAGS, 0, 0, NULL }
|
||||
@ -2534,6 +2564,16 @@ static const aarch64_feature_set aarch64_feature_v8_6 =
|
||||
CONSTRAINTS, TIED, NULL }
|
||||
#define BFLOAT16_INSN(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS) \
|
||||
{ NAME, OPCODE, MASK, CLASS, 0, BFLOAT16, OPS, QUALS, FLAGS, 0, 0, NULL }
|
||||
#define INT8MATMUL_SVE_INSNC(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS, CONSTRAINTS, TIED) \
|
||||
{ NAME, OPCODE, MASK, CLASS, 0, I8MM_SVE, OPS, QUALS, FLAGS, CONSTRAINTS, TIED, NULL }
|
||||
#define INT8MATMUL_INSN(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS) \
|
||||
{ NAME, OPCODE, MASK, CLASS, 0, I8MM, OPS, QUALS, FLAGS, 0, 0, NULL }
|
||||
#define F64MATMUL_SVE_INSN(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS,TIED) \
|
||||
{ NAME, OPCODE, MASK, CLASS, 0, F64MM_SVE, OPS, QUALS, FLAGS, 0, TIED, NULL }
|
||||
#define F64MATMUL_SVE_INSNC(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS, CONSTRAINTS, TIED) \
|
||||
{ NAME, OPCODE, MASK, CLASS, 0, F64MM_SVE, OPS, QUALS, FLAGS, CONSTRAINTS, TIED, NULL }
|
||||
#define F32MATMUL_SVE_INSNC(NAME,OPCODE,MASK,CLASS,OPS,QUALS,FLAGS, CONSTRAINTS, TIED) \
|
||||
{ NAME, OPCODE, MASK, CLASS, 0, F32MM_SVE, OPS, QUALS, FLAGS, CONSTRAINTS, TIED, NULL }
|
||||
|
||||
struct aarch64_opcode aarch64_opcode_table[] =
|
||||
{
|
||||
@ -5032,6 +5072,37 @@ struct aarch64_opcode aarch64_opcode_table[] =
|
||||
V8_4_INSN ("stlur", 0xd9000000, 0xffe00c00, ldst_unscaled, OP2 (Rt, ADDR_OFFSET), QL_STLX, 0),
|
||||
V8_4_INSN ("ldapur", 0xd9400000, 0xffe00c00, ldst_unscaled, OP2 (Rt, ADDR_OFFSET), QL_STLX, 0),
|
||||
|
||||
/* Matrix Multiply instructions. */
|
||||
INT8MATMUL_SVE_INSNC ("smmla", 0x45009800, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_SBB, 0, C_SCAN_MOVPRFX, 0),
|
||||
INT8MATMUL_SVE_INSNC ("ummla", 0x45c09800, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_SBB, 0, C_SCAN_MOVPRFX, 0),
|
||||
INT8MATMUL_SVE_INSNC ("usmmla", 0x45809800, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_SBB, 0, C_SCAN_MOVPRFX, 0),
|
||||
INT8MATMUL_SVE_INSNC ("usdot", 0x44807800, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_SBB, 0, C_SCAN_MOVPRFX, 0),
|
||||
INT8MATMUL_SVE_INSNC ("usdot", 0x44a01800, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_INDEX), OP_SVE_SBB, 0, C_SCAN_MOVPRFX, 0),
|
||||
INT8MATMUL_SVE_INSNC ("sudot", 0x44a01c00, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_INDEX), OP_SVE_SBB, 0, C_SCAN_MOVPRFX, 0),
|
||||
F32MATMUL_SVE_INSNC ("fmmla", 0x64a0e400, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_S, 0, C_SCAN_MOVPRFX, 0),
|
||||
F64MATMUL_SVE_INSNC ("fmmla", 0x64c0e400, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_VVV_D, 0, C_SCAN_MOVPRFX, 0),
|
||||
F64MATMUL_SVE_INSN ("ld1rob", 0xa4200000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX), OP_SVE_BZU, F_OD(1), 0),
|
||||
F64MATMUL_SVE_INSN ("ld1roh", 0xa4a00000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX), OP_SVE_HZU, F_OD(1), 0),
|
||||
F64MATMUL_SVE_INSN ("ld1row", 0xa5200000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX), OP_SVE_SZU, F_OD(1), 0),
|
||||
F64MATMUL_SVE_INSN ("ld1rod", 0xa5a00000, 0xffe0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RX), OP_SVE_DZU, F_OD(1), 0),
|
||||
F64MATMUL_SVE_INSN ("ld1rob", 0xa4202000, 0xfff0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_BZU, F_OD(1), 0),
|
||||
F64MATMUL_SVE_INSN ("ld1roh", 0xa4a02000, 0xfff0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_HZU, F_OD(1), 0),
|
||||
F64MATMUL_SVE_INSN ("ld1row", 0xa5202000, 0xfff0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_SZU, F_OD(1), 0),
|
||||
F64MATMUL_SVE_INSN ("ld1rod", 0xa5a02000, 0xfff0e000, sve_misc, OP3 (SVE_ZtxN, SVE_Pg3, SVE_ADDR_RI_S4x32), OP_SVE_DZU, F_OD(1), 0),
|
||||
F64MATMUL_SVE_INSN ("zip1", 0x05a00000, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
|
||||
F64MATMUL_SVE_INSN ("zip2", 0x05a00400, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
|
||||
F64MATMUL_SVE_INSN ("uzip1", 0x05a00800, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
|
||||
F64MATMUL_SVE_INSN ("uzip2", 0x05a00c00, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
|
||||
F64MATMUL_SVE_INSN ("trn1", 0x05a01800, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
|
||||
F64MATMUL_SVE_INSN ("trn2", 0x05a01c00, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_QQQ, 0, 0),
|
||||
/* Matrix Multiply advanced SIMD instructions. */
|
||||
INT8MATMUL_INSN ("smmla", 0x4e80a400, 0xffe0fc00, aarch64_misc, OP3 (Vd, Vn, Vm), QL_MMLA64, 0),
|
||||
INT8MATMUL_INSN ("ummla", 0x6e80a400, 0xffe0fc00, aarch64_misc, OP3 (Vd, Vn, Vm), QL_MMLA64, 0),
|
||||
INT8MATMUL_INSN ("usmmla", 0x4e80ac00, 0xffe0fc00, aarch64_misc, OP3 (Vd, Vn, Vm), QL_MMLA64, 0),
|
||||
INT8MATMUL_INSN ("usdot", 0x4e809c00, 0xffe0fc00, aarch64_misc, OP3 (Vd, Vn, Vm), QL_V3DOT, F_SIZEQ),
|
||||
INT8MATMUL_INSN ("usdot", 0x4f80f000, 0xffc0f400, dotproduct, OP3 (Vd, Vn, Em), QL_V2DOT, F_SIZEQ),
|
||||
INT8MATMUL_INSN ("sudot", 0x4f00f000, 0xffc0f400, dotproduct, OP3 (Vd, Vn, Em), QL_V2DOT, F_SIZEQ),
|
||||
|
||||
/* BFloat instructions. */
|
||||
BFLOAT16_SVE_INSNC ("bfdot", 0x64608000, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm_16), OP_SVE_SHH, 0, C_SCAN_MOVPRFX, 0),
|
||||
BFLOAT16_SVE_INSNC ("bfdot", 0x64604000, 0xffe0fc00, sve_misc, OP3 (SVE_Zd, SVE_Zn, SVE_Zm3_INDEX), OP_SVE_SHH, 0, C_SCAN_MOVPRFX, 0),
|
||||
@ -5263,6 +5334,9 @@ struct aarch64_opcode aarch64_opcode_table[] =
|
||||
Y(ADDRESS, sve_addr_ri_s4, "SVE_ADDR_RI_S4x16", \
|
||||
4 << OPD_F_OD_LSB, F(FLD_Rn), \
|
||||
"an address with a 4-bit signed offset, multiplied by 16") \
|
||||
Y(ADDRESS, sve_addr_ri_s4, "SVE_ADDR_RI_S4x32", \
|
||||
5 << OPD_F_OD_LSB, F(FLD_Rn), \
|
||||
"an address with a 4-bit signed offset, multiplied by 32") \
|
||||
Y(ADDRESS, sve_addr_ri_s4xvl, "SVE_ADDR_RI_S4xVL", \
|
||||
0 << OPD_F_OD_LSB, F(FLD_Rn), \
|
||||
"an address with a 4-bit signed offset, multiplied by VL") \
|
||||
|
Reference in New Issue
Block a user