mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-06 15:49:50 +08:00
x86inc improvements for 64-bit
Add support for all x86-64 registers Prefer caller-saved register over callee-saved on WIN64 Support up to 15 function arguments Also (by Ronald S. Bultje) Fix up our asm to work with new x86inc.asm. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com> Signed-off-by: Justin Ruggles <justin.ruggles@gmail.com>
This commit is contained in:

committed by
Justin Ruggles

parent
e1ce756844
commit
729f90e268
@ -1,11 +1,12 @@
|
||||
;*****************************************************************************
|
||||
;* x86inc.asm: x264asm abstraction layer
|
||||
;*****************************************************************************
|
||||
;* Copyright (C) 2005-2011 x264 project
|
||||
;* Copyright (C) 2005-2012 x264 project
|
||||
;*
|
||||
;* Authors: Loren Merritt <lorenm@u.washington.edu>
|
||||
;* Anton Mitrofanov <BugMaster@narod.ru>
|
||||
;* Jason Garrett-Glaser <darkshikari@gmail.com>
|
||||
;* Henrik Gramner <hengar-6@student.ltu.se>
|
||||
;*
|
||||
;* Permission to use, copy, modify, and/or distribute this software for any
|
||||
;* purpose with or without fee is hereby granted, provided that the above
|
||||
@ -95,6 +96,9 @@
|
||||
default rel
|
||||
%endif
|
||||
|
||||
; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
|
||||
CPU amdnop
|
||||
|
||||
; Macros to eliminate most code duplication between x86_32 and x86_64:
|
||||
; Currently this works only for leaf functions which load all their arguments
|
||||
; into registers at the start, and make no other use of the stack. Luckily that
|
||||
@ -128,18 +132,20 @@
|
||||
; rNm is the original location of arg N (a register or on the stack), dword
|
||||
; rNmp is native size
|
||||
|
||||
%macro DECLARE_REG 6
|
||||
%macro DECLARE_REG 5-6
|
||||
%define r%1q %2
|
||||
%define r%1d %3
|
||||
%define r%1w %4
|
||||
%define r%1b %5
|
||||
%define r%1m %6
|
||||
%ifid %6 ; i.e. it's a register
|
||||
%if %0 == 5
|
||||
%define r%1m %3
|
||||
%define r%1mp %2
|
||||
%elif ARCH_X86_64 ; memory
|
||||
%define r%1mp qword %6
|
||||
%define r%1m [rsp + stack_offset + %6]
|
||||
%define r%1mp qword r %+ %1m
|
||||
%else
|
||||
%define r%1mp dword %6
|
||||
%define r%1m [esp + stack_offset + %6]
|
||||
%define r%1mp dword r %+ %1m
|
||||
%endif
|
||||
%define r%1 %2
|
||||
%endmacro
|
||||
@ -187,7 +193,7 @@ DECLARE_REG_SIZE bp, bpl
|
||||
%endrep
|
||||
%endmacro
|
||||
|
||||
DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9
|
||||
DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
|
||||
|
||||
%if ARCH_X86_64
|
||||
%define gprsize 8
|
||||
@ -205,6 +211,33 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9
|
||||
%assign stack_offset stack_offset-gprsize
|
||||
%endmacro
|
||||
|
||||
%macro PUSH_IF_USED 1-*
|
||||
%rep %0
|
||||
%if %1 < regs_used
|
||||
PUSH r%1
|
||||
%endif
|
||||
%rotate 1
|
||||
%endrep
|
||||
%endmacro
|
||||
|
||||
%macro POP_IF_USED 1-*
|
||||
%rep %0
|
||||
%if %1 < regs_used
|
||||
pop r%1
|
||||
%endif
|
||||
%rotate 1
|
||||
%endrep
|
||||
%endmacro
|
||||
|
||||
%macro LOAD_IF_USED 1-*
|
||||
%rep %0
|
||||
%if %1 < num_args
|
||||
mov r%1, r %+ %1 %+ mp
|
||||
%endif
|
||||
%rotate 1
|
||||
%endrep
|
||||
%endmacro
|
||||
|
||||
%macro SUB 2
|
||||
sub %1, %2
|
||||
%ifidn %1, rsp
|
||||
@ -272,39 +305,34 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9
|
||||
|
||||
%if WIN64 ; Windows x64 ;=================================================
|
||||
|
||||
DECLARE_REG 0, rcx, ecx, cx, cl, ecx
|
||||
DECLARE_REG 1, rdx, edx, dx, dl, edx
|
||||
DECLARE_REG 2, r8, r8d, r8w, r8b, r8d
|
||||
DECLARE_REG 3, r9, r9d, r9w, r9b, r9d
|
||||
DECLARE_REG 4, rdi, edi, di, dil, [rsp + stack_offset + 40]
|
||||
DECLARE_REG 5, rsi, esi, si, sil, [rsp + stack_offset + 48]
|
||||
DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
|
||||
%define r7m [rsp + stack_offset + 64]
|
||||
%define r8m [rsp + stack_offset + 72]
|
||||
|
||||
%macro LOAD_IF_USED 2 ; reg_id, number_of_args
|
||||
%if %1 < %2
|
||||
mov r%1, [rsp + stack_offset + 8 + %1*8]
|
||||
%endif
|
||||
%endmacro
|
||||
DECLARE_REG 0, rcx, ecx, cx, cl
|
||||
DECLARE_REG 1, rdx, edx, dx, dl
|
||||
DECLARE_REG 2, R8, R8D, R8W, R8B
|
||||
DECLARE_REG 3, R9, R9D, R9W, R9B
|
||||
DECLARE_REG 4, R10, R10D, R10W, R10B, 40
|
||||
DECLARE_REG 5, R11, R11D, R11W, R11B, 48
|
||||
DECLARE_REG 6, rax, eax, ax, al, 56
|
||||
DECLARE_REG 7, rdi, edi, di, dil, 64
|
||||
DECLARE_REG 8, rsi, esi, si, sil, 72
|
||||
DECLARE_REG 9, rbx, ebx, bx, bl, 80
|
||||
DECLARE_REG 10, rbp, ebp, bp, bpl, 88
|
||||
DECLARE_REG 11, R12, R12D, R12W, R12B, 96
|
||||
DECLARE_REG 12, R13, R13D, R13W, R13B, 104
|
||||
DECLARE_REG 13, R14, R14D, R14W, R14B, 112
|
||||
DECLARE_REG 14, R15, R15D, R15W, R15B, 120
|
||||
|
||||
%macro PROLOGUE 2-4+ 0 ; #args, #regs, #xmm_regs, arg_names...
|
||||
ASSERT %2 >= %1
|
||||
%assign num_args %1
|
||||
%assign regs_used %2
|
||||
ASSERT regs_used <= 7
|
||||
%if regs_used > 4
|
||||
push r4
|
||||
push r5
|
||||
%assign stack_offset stack_offset+16
|
||||
%endif
|
||||
ASSERT regs_used >= num_args
|
||||
ASSERT regs_used <= 15
|
||||
PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14
|
||||
%if mmsize == 8
|
||||
%assign xmm_regs_used 0
|
||||
%else
|
||||
WIN64_SPILL_XMM %3
|
||||
%endif
|
||||
LOAD_IF_USED 4, %1
|
||||
LOAD_IF_USED 5, %1
|
||||
LOAD_IF_USED 6, %1
|
||||
LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
|
||||
DEFINE_ARGS %4
|
||||
%endmacro
|
||||
|
||||
@ -312,12 +340,11 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
|
||||
%assign xmm_regs_used %1
|
||||
ASSERT xmm_regs_used <= 16
|
||||
%if xmm_regs_used > 6
|
||||
sub rsp, (xmm_regs_used-6)*16+16
|
||||
%assign stack_offset stack_offset+(xmm_regs_used-6)*16+16
|
||||
SUB rsp, (xmm_regs_used-6)*16+16
|
||||
%assign %%i xmm_regs_used
|
||||
%rep (xmm_regs_used-6)
|
||||
%assign %%i %%i-1
|
||||
movdqa [rsp + (%%i-6)*16+8], xmm %+ %%i
|
||||
movdqa [rsp + (%%i-6)*16+(~stack_offset&8)], xmm %+ %%i
|
||||
%endrep
|
||||
%endif
|
||||
%endmacro
|
||||
@ -327,7 +354,7 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
|
||||
%assign %%i xmm_regs_used
|
||||
%rep (xmm_regs_used-6)
|
||||
%assign %%i %%i-1
|
||||
movdqa xmm %+ %%i, [%1 + (%%i-6)*16+8]
|
||||
movdqa xmm %+ %%i, [%1 + (%%i-6)*16+(~stack_offset&8)]
|
||||
%endrep
|
||||
add %1, (xmm_regs_used-6)*16+16
|
||||
%endif
|
||||
@ -341,15 +368,12 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
|
||||
|
||||
%macro RET 0
|
||||
WIN64_RESTORE_XMM_INTERNAL rsp
|
||||
%if regs_used > 4
|
||||
pop r5
|
||||
pop r4
|
||||
%endif
|
||||
POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
%macro REP_RET 0
|
||||
%if regs_used > 4 || xmm_regs_used > 6
|
||||
%if regs_used > 7 || xmm_regs_used > 6
|
||||
RET
|
||||
%else
|
||||
rep ret
|
||||
@ -358,92 +382,80 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
|
||||
|
||||
%elif ARCH_X86_64 ; *nix x64 ;=============================================
|
||||
|
||||
DECLARE_REG 0, rdi, edi, di, dil, edi
|
||||
DECLARE_REG 1, rsi, esi, si, sil, esi
|
||||
DECLARE_REG 2, rdx, edx, dx, dl, edx
|
||||
DECLARE_REG 3, rcx, ecx, cx, cl, ecx
|
||||
DECLARE_REG 4, r8, r8d, r8w, r8b, r8d
|
||||
DECLARE_REG 5, r9, r9d, r9w, r9b, r9d
|
||||
DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 8]
|
||||
%define r7m [rsp + stack_offset + 16]
|
||||
%define r8m [rsp + stack_offset + 24]
|
||||
|
||||
%macro LOAD_IF_USED 2 ; reg_id, number_of_args
|
||||
%if %1 < %2
|
||||
mov r%1, [rsp - 40 + %1*8]
|
||||
%endif
|
||||
%endmacro
|
||||
DECLARE_REG 0, rdi, edi, di, dil
|
||||
DECLARE_REG 1, rsi, esi, si, sil
|
||||
DECLARE_REG 2, rdx, edx, dx, dl
|
||||
DECLARE_REG 3, rcx, ecx, cx, cl
|
||||
DECLARE_REG 4, R8, R8D, R8W, R8B
|
||||
DECLARE_REG 5, R9, R9D, R9W, R9B
|
||||
DECLARE_REG 6, rax, eax, ax, al, 8
|
||||
DECLARE_REG 7, R10, R10D, R10W, R10B, 16
|
||||
DECLARE_REG 8, R11, R11D, R11W, R11B, 24
|
||||
DECLARE_REG 9, rbx, ebx, bx, bl, 32
|
||||
DECLARE_REG 10, rbp, ebp, bp, bpl, 40
|
||||
DECLARE_REG 11, R12, R12D, R12W, R12B, 48
|
||||
DECLARE_REG 12, R13, R13D, R13W, R13B, 56
|
||||
DECLARE_REG 13, R14, R14D, R14W, R14B, 64
|
||||
DECLARE_REG 14, R15, R15D, R15W, R15B, 72
|
||||
|
||||
%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
|
||||
ASSERT %2 >= %1
|
||||
ASSERT %2 <= 7
|
||||
LOAD_IF_USED 6, %1
|
||||
%assign num_args %1
|
||||
%assign regs_used %2
|
||||
ASSERT regs_used >= num_args
|
||||
ASSERT regs_used <= 15
|
||||
PUSH_IF_USED 9, 10, 11, 12, 13, 14
|
||||
LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14
|
||||
DEFINE_ARGS %4
|
||||
%endmacro
|
||||
|
||||
%macro RET 0
|
||||
POP_IF_USED 14, 13, 12, 11, 10, 9
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
%macro REP_RET 0
|
||||
rep ret
|
||||
%if regs_used > 9
|
||||
RET
|
||||
%else
|
||||
rep ret
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%else ; X86_32 ;==============================================================
|
||||
|
||||
DECLARE_REG 0, eax, eax, ax, al, [esp + stack_offset + 4]
|
||||
DECLARE_REG 1, ecx, ecx, cx, cl, [esp + stack_offset + 8]
|
||||
DECLARE_REG 2, edx, edx, dx, dl, [esp + stack_offset + 12]
|
||||
DECLARE_REG 3, ebx, ebx, bx, bl, [esp + stack_offset + 16]
|
||||
DECLARE_REG 4, esi, esi, si, null, [esp + stack_offset + 20]
|
||||
DECLARE_REG 5, edi, edi, di, null, [esp + stack_offset + 24]
|
||||
DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28]
|
||||
%define r7m [esp + stack_offset + 32]
|
||||
%define r8m [esp + stack_offset + 36]
|
||||
DECLARE_REG 0, eax, eax, ax, al, 4
|
||||
DECLARE_REG 1, ecx, ecx, cx, cl, 8
|
||||
DECLARE_REG 2, edx, edx, dx, dl, 12
|
||||
DECLARE_REG 3, ebx, ebx, bx, bl, 16
|
||||
DECLARE_REG 4, esi, esi, si, null, 20
|
||||
DECLARE_REG 5, edi, edi, di, null, 24
|
||||
DECLARE_REG 6, ebp, ebp, bp, null, 28
|
||||
%define rsp esp
|
||||
|
||||
%macro PUSH_IF_USED 1 ; reg_id
|
||||
%if %1 < regs_used
|
||||
push r%1
|
||||
%assign stack_offset stack_offset+4
|
||||
%endif
|
||||
%macro DECLARE_ARG 1-*
|
||||
%rep %0
|
||||
%define r%1m [esp + stack_offset + 4*%1 + 4]
|
||||
%define r%1mp dword r%1m
|
||||
%rotate 1
|
||||
%endrep
|
||||
%endmacro
|
||||
|
||||
%macro POP_IF_USED 1 ; reg_id
|
||||
%if %1 < regs_used
|
||||
pop r%1
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro LOAD_IF_USED 2 ; reg_id, number_of_args
|
||||
%if %1 < %2
|
||||
mov r%1, [esp + stack_offset + 4 + %1*4]
|
||||
%endif
|
||||
%endmacro
|
||||
DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
|
||||
|
||||
%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
|
||||
ASSERT %2 >= %1
|
||||
%assign num_args %1
|
||||
%assign regs_used %2
|
||||
ASSERT regs_used <= 7
|
||||
PUSH_IF_USED 3
|
||||
PUSH_IF_USED 4
|
||||
PUSH_IF_USED 5
|
||||
PUSH_IF_USED 6
|
||||
LOAD_IF_USED 0, %1
|
||||
LOAD_IF_USED 1, %1
|
||||
LOAD_IF_USED 2, %1
|
||||
LOAD_IF_USED 3, %1
|
||||
LOAD_IF_USED 4, %1
|
||||
LOAD_IF_USED 5, %1
|
||||
LOAD_IF_USED 6, %1
|
||||
%if regs_used > 7
|
||||
%assign regs_used 7
|
||||
%endif
|
||||
ASSERT regs_used >= num_args
|
||||
PUSH_IF_USED 3, 4, 5, 6
|
||||
LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6
|
||||
DEFINE_ARGS %4
|
||||
%endmacro
|
||||
|
||||
%macro RET 0
|
||||
POP_IF_USED 6
|
||||
POP_IF_USED 5
|
||||
POP_IF_USED 4
|
||||
POP_IF_USED 3
|
||||
POP_IF_USED 6, 5, 4, 3
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
@ -464,8 +476,6 @@ DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28]
|
||||
%endmacro
|
||||
%endif
|
||||
|
||||
|
||||
|
||||
;=============================================================================
|
||||
; arch-independent part
|
||||
;=============================================================================
|
||||
|
Reference in New Issue
Block a user