lavu/aes: add x86 AESNI optimizations

crypto_bench comparison for AES-128-ECB:

lavu_aesni AES-128-ECB  size: 1048576  runs:   1024  time:    0.596 +- 0.081
lavu_c     AES-128-ECB  size: 1048576  runs:   1024  time:   17.007 +- 2.131
crypto     AES-128-ECB  size: 1048576  runs:   1024  time:    0.612 +- 1.857
gcrypt     AES-128-ECB  size: 1048576  runs:   1024  time:    1.123 +- 0.224
tomcrypt   AES-128-ECB  size: 1048576  runs:   1024  time:    9.038 +- 0.790

Improved-By: Henrik Gramner <henrik@gramner.com>
Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
Rodger Combs
2015-10-28 05:39:33 -05:00
committed by James Almer
parent 2daaafafc6
commit 2ea3c51795
5 changed files with 139 additions and 2 deletions

95
libavutil/x86/aes.asm Normal file
View File

@ -0,0 +1,95 @@
;*****************************************************************************
;* Copyright (c) 2015 Rodger Combs <rodger.combs@gmail.com>
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "x86util.asm"
SECTION .text
;-----------------------------------------------------------------------------
; void ff_aes_decrypt(AVAES *a, uint8_t *dst, const uint8_t *src,
; int count, uint8_t *iv, int rounds)
;-----------------------------------------------------------------------------
%macro AES_CRYPT 1
cglobal aes_%1rypt, 6,6,2
test r3, r3
je .ret
shl r3d, 4
add r5d, r5d
add r0, 0x60
add r2, r3
add r1, r3
neg r3
pxor m1, m1
test r4, r4
je .block
movu m1, [r4] ; iv
.block:
movu m0, [r2+r3] ; state
%ifidn %1, enc
pxor m0, m1
%endif
pxor m0, [r0+8*r5-0x60]
cmp r5d, 24
je .rounds12
jl .rounds10
aes%1 m0, [r0+0x70]
aes%1 m0, [r0+0x60]
.rounds12:
aes%1 m0, [r0+0x50]
aes%1 m0, [r0+0x40]
.rounds10:
aes%1 m0, [r0+0x30]
aes%1 m0, [r0+0x20]
aes%1 m0, [r0+0x10]
aes%1 m0, [r0+0x00]
aes%1 m0, [r0-0x10]
aes%1 m0, [r0-0x20]
aes%1 m0, [r0-0x30]
aes%1 m0, [r0-0x40]
aes%1 m0, [r0-0x50]
aes%1last m0, [r0-0x60]
test r4, r4
je .noiv
%ifidn %1, enc
mova m1, m0
%else
pxor m0, m1
movu m1, [r2+r3]
%endif
.noiv:
movu [r1+r3], m0
add r3, 16
jl .block
test r4, r4
je .ret
%ifidn %1, dec
movu [r4], m1
%else
movu [r4], m0
%endif
.ret:
REP_RET
%endmacro
%if HAVE_AESNI_EXTERNAL
INIT_XMM aesni
AES_CRYPT enc
AES_CRYPT dec
%endif