x86/intreadwrite: use intrinsics instead of inline asm for AV_COPY128

This has the benefit of removing any SSE -> AVX penalty that may happen when
the compiler emits VEX encoded instructions.

Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
James Almer
2024-07-10 13:00:20 -03:00
parent 4a04cca69a
commit bd1bcb07e0
2 changed files with 11 additions and 14 deletions

View File

@ -22,29 +22,25 @@
#define AVUTIL_X86_INTREADWRITE_H
#include <stdint.h>
#if HAVE_INTRINSICS_SSE
#include <immintrin.h>
#endif
#if HAVE_INTRINSICS_SSE2
#include <emmintrin.h>
#endif
#include "config.h"
#include "libavutil/attributes.h"
#if HAVE_MMX
#ifdef __SSE__
#if HAVE_INTRINSICS_SSE
#define AV_COPY128 AV_COPY128
static av_always_inline void AV_COPY128(void *d, const void *s)
{
struct v {uint64_t v[2];};
__asm__("movaps %1, %%xmm0 \n\t"
"movaps %%xmm0, %0 \n\t"
: "=m"(*(struct v*)d)
: "m" (*(const struct v*)s)
: "xmm0");
__m128 tmp = _mm_load_ps(s);
_mm_store_ps(d, tmp);
}
#endif /* __SSE__ */
#endif /* HAVE_INTRINSICS_SSE */
#if HAVE_INTRINSICS_SSE2
@ -57,6 +53,4 @@ static av_always_inline void AV_ZERO128(void *d)
#endif /* HAVE_INTRINSICS_SSE2 */
#endif /* HAVE_MMX */
#endif /* AVUTIL_X86_INTREADWRITE_H */