mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-06 15:49:50 +08:00
x86/intreadwrite: use intrinsics instead of inline asm for AV_COPY128
This has the benefit of removing any SSE -> AVX penalty that may happen when the compiler emits VEX encoded instructions. Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
@ -22,29 +22,25 @@
|
||||
#define AVUTIL_X86_INTREADWRITE_H
|
||||
|
||||
#include <stdint.h>
|
||||
#if HAVE_INTRINSICS_SSE
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#if HAVE_INTRINSICS_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#include "config.h"
|
||||
#include "libavutil/attributes.h"
|
||||
|
||||
#if HAVE_MMX
|
||||
|
||||
#ifdef __SSE__
|
||||
#if HAVE_INTRINSICS_SSE
|
||||
|
||||
#define AV_COPY128 AV_COPY128
|
||||
static av_always_inline void AV_COPY128(void *d, const void *s)
|
||||
{
|
||||
struct v {uint64_t v[2];};
|
||||
|
||||
__asm__("movaps %1, %%xmm0 \n\t"
|
||||
"movaps %%xmm0, %0 \n\t"
|
||||
: "=m"(*(struct v*)d)
|
||||
: "m" (*(const struct v*)s)
|
||||
: "xmm0");
|
||||
__m128 tmp = _mm_load_ps(s);
|
||||
_mm_store_ps(d, tmp);
|
||||
}
|
||||
|
||||
#endif /* __SSE__ */
|
||||
#endif /* HAVE_INTRINSICS_SSE */
|
||||
|
||||
#if HAVE_INTRINSICS_SSE2
|
||||
|
||||
@ -57,6 +53,4 @@ static av_always_inline void AV_ZERO128(void *d)
|
||||
|
||||
#endif /* HAVE_INTRINSICS_SSE2 */
|
||||
|
||||
#endif /* HAVE_MMX */
|
||||
|
||||
#endif /* AVUTIL_X86_INTREADWRITE_H */
|
||||
|
Reference in New Issue
Block a user