diff --git a/libavcodec/h264qpel.c b/libavcodec/h264qpel.c index faca1e8953..be80203c4b 100644 --- a/libavcodec/h264qpel.c +++ b/libavcodec/h264qpel.c @@ -20,11 +20,71 @@ */ #include "libavutil/attributes.h" +#include "libavutil/common.h" #include "h264qpel.h" #define pixeltmp int16_t #define BIT_DEPTH 8 #include "h264qpel_template.c" + +static void put_h264_qpel2_h_lowpass_8(uint8_t *dst, const uint8_t *restrict src, int dstStride, int srcStride) +{ + const int h = 2; + for (int i = 0; i < h; ++i) { + dst[0] = av_clip_uint8(((src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + 16) >> 5); + dst[1] = av_clip_uint8(((src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + 16) >> 5); + dst += dstStride; + src += srcStride; + } +} + +static void put_h264_qpel2_v_lowpass_8(uint8_t *dst, const uint8_t *restrict src, int dstStride, int srcStride) +{ + const int w = 2; + for (int i = 0; i < w; ++i) { + const int srcB = src[-2*srcStride]; + const int srcA = src[-1*srcStride]; + const int src0 = src[0 *srcStride]; + const int src1 = src[1 *srcStride]; + const int src2 = src[2 *srcStride]; + const int src3 = src[3 *srcStride]; + const int src4 = src[4 *srcStride]; + dst[0*dstStride] = av_clip_uint8(((src0+src1)*20 - (srcA+src2)*5 + (srcB+src3) + 16) >> 5); + dst[1*dstStride] = av_clip_uint8(((src1+src2)*20 - (src0+src3)*5 + (srcA+src4) + 16) >> 5); + dst++; + src++; + } +} + +static void put_h264_qpel2_hv_lowpass_8(uint8_t *dst, pixeltmp *tmp, const uint8_t *restrict src, int dstStride, int tmpStride, int srcStride) +{ + const int h = 2; + const int w = 2; + src -= 2*srcStride; + for (int i = 0; i < h + 5; ++i) { + tmp[0] = (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]); + tmp[1] = (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]); + tmp += tmpStride; + src += srcStride; + } + tmp -= tmpStride*(h+5-2); + for (int i = 0; i < w; ++i) { + const int tmpB = tmp[-2*tmpStride]; + const int tmpA = tmp[-1*tmpStride]; + const int tmp0 = tmp[0 *tmpStride]; + const int tmp1 = tmp[1 *tmpStride]; + const int tmp2 = tmp[2 *tmpStride]; + const int tmp3 = tmp[3 *tmpStride]; + const int tmp4 = tmp[4 *tmpStride]; + dst[0*dstStride] = av_clip_uint8(((tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3) + 512) >> 10); + dst[1*dstStride] = av_clip_uint8(((tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4) + 512) >> 10); + dst++; + tmp++; + } +} + +H264_MC(put_, 2) + #undef BIT_DEPTH #define BIT_DEPTH 9 @@ -73,7 +133,6 @@ av_cold void ff_h264qpel_init(H264QpelContext *c, int bit_depth) dspfunc2(put_h264_qpel, 0, 16, depth); \ dspfunc2(put_h264_qpel, 1, 8, depth); \ dspfunc2(put_h264_qpel, 2, 4, depth); \ - dspfunc2(put_h264_qpel, 3, 2, depth); \ dspfunc2(avg_h264_qpel, 0, 16, depth); \ dspfunc2(avg_h264_qpel, 1, 8, depth); \ dspfunc2(avg_h264_qpel, 2, 4, depth) @@ -81,6 +140,7 @@ av_cold void ff_h264qpel_init(H264QpelContext *c, int bit_depth) switch (bit_depth) { default: SET_QPEL(8); + dspfunc2(put_h264_qpel, 3, 2, 8); // only used by Snow break; case 9: SET_QPEL(9); diff --git a/libavcodec/h264qpel_template.c b/libavcodec/h264qpel_template.c index b71710e6db..a55b45e824 100644 --- a/libavcodec/h264qpel_template.c +++ b/libavcodec/h264qpel_template.c @@ -75,81 +75,6 @@ static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *restrict src, } #define H264_LOWPASS(OPNAME, OP, OP2) \ -av_unused static void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *p_dst, const uint8_t *restrict p_src, int dstStride, int srcStride)\ -{\ - const int h=2;\ - int i;\ - pixel *dst = (pixel*)p_dst;\ - const pixel *restrict src = (const pixel*)p_src;\ - dstStride >>= sizeof(pixel)-1;\ - srcStride >>= sizeof(pixel)-1;\ - for(i=0; i>= sizeof(pixel)-1;\ - srcStride >>= sizeof(pixel)-1;\ - for(i=0; i>= sizeof(pixel)-1;\ - srcStride >>= sizeof(pixel)-1;\ - src -= 2*srcStride;\ - for(i=0; i> i; for (j = 0; j < 16; j++) if (check_func(tab[i][j], "%s_h264_qpel_%d_mc%d%d_%d", op_name, size, j & 3, j >> 2, bit_depth)) {