mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-08-06 15:49:50 +08:00
x86/float_dsp: add ff_vector_fmul_reverse_avx2
~20% faster than AVX. Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
@ -67,6 +67,8 @@ void ff_vector_fmul_reverse_sse(float *dst, const float *src0,
|
||||
const float *src1, int len);
|
||||
void ff_vector_fmul_reverse_avx(float *dst, const float *src0,
|
||||
const float *src1, int len);
|
||||
void ff_vector_fmul_reverse_avx2(float *dst, const float *src0,
|
||||
const float *src1, int len);
|
||||
|
||||
float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
|
||||
|
||||
@ -101,6 +103,9 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
|
||||
fdsp->vector_fmul_add = ff_vector_fmul_add_avx;
|
||||
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
|
||||
}
|
||||
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
|
||||
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx2;
|
||||
}
|
||||
if (EXTERNAL_FMA3_FAST(cpu_flags)) {
|
||||
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3;
|
||||
fdsp->vector_fmul_add = ff_vector_fmul_add_fma3;
|
||||
|
Reference in New Issue
Block a user