|
|
|
@ -55,7 +55,7 @@ int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
DECLARE_ALIGNED_16(int, s);
|
|
|
|
|
const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
vector unsigned char *tv;
|
|
|
|
|
vector unsigned char pix1v, pix2v, pix2iv, avgv, t5;
|
|
|
|
|
vector unsigned int sad;
|
|
|
|
@ -102,7 +102,7 @@ int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
DECLARE_ALIGNED_16(int, s);
|
|
|
|
|
const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
vector unsigned char *tv;
|
|
|
|
|
vector unsigned char pix1v, pix2v, pix3v, avgv, t5;
|
|
|
|
|
vector unsigned int sad;
|
|
|
|
@ -163,8 +163,8 @@ int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int
|
|
|
|
|
int i;
|
|
|
|
|
DECLARE_ALIGNED_16(int, s);
|
|
|
|
|
uint8_t *pix3 = pix2 + line_size;
|
|
|
|
|
const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
const_vector unsigned short two = (const_vector unsigned short)vec_splat_u16(2);
|
|
|
|
|
const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
const vector unsigned short two = (const vector unsigned short)vec_splat_u16(2);
|
|
|
|
|
vector unsigned char *tv, avgv, t5;
|
|
|
|
|
vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv;
|
|
|
|
|
vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv;
|
|
|
|
@ -263,7 +263,7 @@ int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
DECLARE_ALIGNED_16(int, s);
|
|
|
|
|
const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
|
|
|
|
|
const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
|
|
|
|
|
vector unsigned char perm1, perm2, *pix1v, *pix2v;
|
|
|
|
|
vector unsigned char t1, t2, t3,t4, t5;
|
|
|
|
|
vector unsigned int sad;
|
|
|
|
@ -305,7 +305,7 @@ int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
DECLARE_ALIGNED_16(int, s);
|
|
|
|
|
const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
|
|
|
|
|
const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
|
|
|
|
|
vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
|
|
|
|
|
vector unsigned char t1, t2, t3,t4, t5;
|
|
|
|
|
vector unsigned int sad;
|
|
|
|
@ -350,7 +350,7 @@ int pix_norm1_altivec(uint8_t *pix, int line_size)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
DECLARE_ALIGNED_16(int, s);
|
|
|
|
|
const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
|
|
|
|
|
const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
|
|
|
|
|
vector unsigned char *tv;
|
|
|
|
|
vector unsigned char pixv;
|
|
|
|
|
vector unsigned int sv;
|
|
|
|
@ -386,7 +386,7 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
DECLARE_ALIGNED_16(int, s);
|
|
|
|
|
const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
|
|
|
|
|
const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
|
|
|
|
|
vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
|
|
|
|
|
vector unsigned char t1, t2, t3,t4, t5;
|
|
|
|
|
vector unsigned int sum;
|
|
|
|
@ -442,7 +442,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
DECLARE_ALIGNED_16(int, s);
|
|
|
|
|
const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
|
|
|
|
|
const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
|
|
|
|
|
vector unsigned char perm1, perm2, *pix1v, *pix2v;
|
|
|
|
|
vector unsigned char t1, t2, t3,t4, t5;
|
|
|
|
|
vector unsigned int sum;
|
|
|
|
@ -486,7 +486,7 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
|
|
|
|
|
|
|
|
|
|
int pix_sum_altivec(uint8_t * pix, int line_size)
|
|
|
|
|
{
|
|
|
|
|
const_vector unsigned int zero = (const_vector unsigned int)vec_splat_u32(0);
|
|
|
|
|
const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
|
|
|
|
|
vector unsigned char perm, *pixv;
|
|
|
|
|
vector unsigned char t1;
|
|
|
|
|
vector unsigned int sad;
|
|
|
|
@ -521,7 +521,7 @@ void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, int line
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
vector unsigned char perm, bytes, *pixv;
|
|
|
|
|
const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
vector signed short shorts;
|
|
|
|
|
|
|
|
|
|
for(i=0;i<8;i++)
|
|
|
|
@ -548,7 +548,7 @@ void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1,
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
vector unsigned char perm, bytes, *pixv;
|
|
|
|
|
const_vector unsigned char zero = (const_vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
vector signed short shorts1, shorts2;
|
|
|
|
|
|
|
|
|
|
for(i=0;i<4;i++)
|
|
|
|
@ -767,8 +767,8 @@ POWERPC_PERF_DECLARE(altivec_put_pixels8_xy2_num, 1);
|
|
|
|
|
blockv, temp1, temp2;
|
|
|
|
|
register vector unsigned short
|
|
|
|
|
pixelssum1, pixelssum2, temp3;
|
|
|
|
|
register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
|
|
|
|
|
register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
|
|
|
|
|
|
|
|
|
|
temp1 = vec_ld(0, pixels);
|
|
|
|
|
temp2 = vec_ld(16, pixels);
|
|
|
|
@ -843,9 +843,9 @@ POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels8_xy2_num, 1);
|
|
|
|
|
blockv, temp1, temp2;
|
|
|
|
|
register vector unsigned short
|
|
|
|
|
pixelssum1, pixelssum2, temp3;
|
|
|
|
|
register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
register const_vector unsigned short vcone = (const_vector unsigned short)vec_splat_u16(1);
|
|
|
|
|
register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
|
|
|
|
|
register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);
|
|
|
|
|
register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
|
|
|
|
|
|
|
|
|
|
temp1 = vec_ld(0, pixels);
|
|
|
|
|
temp2 = vec_ld(16, pixels);
|
|
|
|
@ -920,8 +920,8 @@ POWERPC_PERF_DECLARE(altivec_put_pixels16_xy2_num, 1);
|
|
|
|
|
register vector unsigned short
|
|
|
|
|
pixelssum1, pixelssum2, temp3,
|
|
|
|
|
pixelssum3, pixelssum4, temp4;
|
|
|
|
|
register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
|
|
|
|
|
register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
|
|
|
|
|
|
|
|
|
|
POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
|
|
|
|
|
|
|
|
|
@ -1002,9 +1002,9 @@ POWERPC_PERF_DECLARE(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
|
|
|
|
register vector unsigned short
|
|
|
|
|
pixelssum1, pixelssum2, temp3,
|
|
|
|
|
pixelssum3, pixelssum4, temp4;
|
|
|
|
|
register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
register const_vector unsigned short vcone = (const_vector unsigned short)vec_splat_u16(1);
|
|
|
|
|
register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
|
|
|
|
|
register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);
|
|
|
|
|
register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
|
|
|
|
|
|
|
|
|
|
POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
|
|
|
|
|
|
|
|
@ -1076,25 +1076,25 @@ POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
|
|
|
|
|
int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
|
|
|
|
|
POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1);
|
|
|
|
|
int sum;
|
|
|
|
|
register const_vector unsigned char vzero =
|
|
|
|
|
(const_vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
register const vector unsigned char vzero =
|
|
|
|
|
(const vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
register vector signed short temp0, temp1, temp2, temp3, temp4,
|
|
|
|
|
temp5, temp6, temp7;
|
|
|
|
|
POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);
|
|
|
|
|
{
|
|
|
|
|
register const_vector signed short vprod1 =(const_vector signed short)
|
|
|
|
|
register const vector signed short vprod1 =(const vector signed short)
|
|
|
|
|
AVV( 1,-1, 1,-1, 1,-1, 1,-1);
|
|
|
|
|
register const_vector signed short vprod2 =(const_vector signed short)
|
|
|
|
|
register const vector signed short vprod2 =(const vector signed short)
|
|
|
|
|
AVV( 1, 1,-1,-1, 1, 1,-1,-1);
|
|
|
|
|
register const_vector signed short vprod3 =(const_vector signed short)
|
|
|
|
|
register const vector signed short vprod3 =(const vector signed short)
|
|
|
|
|
AVV( 1, 1, 1, 1,-1,-1,-1,-1);
|
|
|
|
|
register const_vector unsigned char perm1 = (const_vector unsigned char)
|
|
|
|
|
register const vector unsigned char perm1 = (const vector unsigned char)
|
|
|
|
|
AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
|
|
|
|
|
0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D);
|
|
|
|
|
register const_vector unsigned char perm2 = (const_vector unsigned char)
|
|
|
|
|
register const vector unsigned char perm2 = (const vector unsigned char)
|
|
|
|
|
AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
|
|
|
|
|
0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B);
|
|
|
|
|
register const_vector unsigned char perm3 = (const_vector unsigned char)
|
|
|
|
|
register const vector unsigned char perm3 = (const vector unsigned char)
|
|
|
|
|
AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
|
|
|
|
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
|
|
|
|
|
|
|
|
|
@ -1224,25 +1224,25 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
|
|
|
|
|
temp5S REG_v(v13),
|
|
|
|
|
temp6S REG_v(v14),
|
|
|
|
|
temp7S REG_v(v15);
|
|
|
|
|
register const_vector unsigned char vzero REG_v(v31)=
|
|
|
|
|
(const_vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
register const vector unsigned char vzero REG_v(v31)=
|
|
|
|
|
(const vector unsigned char)vec_splat_u8(0);
|
|
|
|
|
{
|
|
|
|
|
register const_vector signed short vprod1 REG_v(v16)=
|
|
|
|
|
(const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1);
|
|
|
|
|
register const_vector signed short vprod2 REG_v(v17)=
|
|
|
|
|
(const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1);
|
|
|
|
|
register const_vector signed short vprod3 REG_v(v18)=
|
|
|
|
|
(const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1);
|
|
|
|
|
register const_vector unsigned char perm1 REG_v(v19)=
|
|
|
|
|
(const_vector unsigned char)
|
|
|
|
|
register const vector signed short vprod1 REG_v(v16)=
|
|
|
|
|
(const vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1);
|
|
|
|
|
register const vector signed short vprod2 REG_v(v17)=
|
|
|
|
|
(const vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1);
|
|
|
|
|
register const vector signed short vprod3 REG_v(v18)=
|
|
|
|
|
(const vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1);
|
|
|
|
|
register const vector unsigned char perm1 REG_v(v19)=
|
|
|
|
|
(const vector unsigned char)
|
|
|
|
|
AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
|
|
|
|
|
0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D);
|
|
|
|
|
register const_vector unsigned char perm2 REG_v(v20)=
|
|
|
|
|
(const_vector unsigned char)
|
|
|
|
|
register const vector unsigned char perm2 REG_v(v20)=
|
|
|
|
|
(const vector unsigned char)
|
|
|
|
|
AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
|
|
|
|
|
0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B);
|
|
|
|
|
register const_vector unsigned char perm3 REG_v(v21)=
|
|
|
|
|
(const_vector unsigned char)
|
|
|
|
|
register const vector unsigned char perm3 REG_v(v21)=
|
|
|
|
|
(const vector unsigned char)
|
|
|
|
|
AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
|
|
|
|
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
|
|
|
|
|
|
|
|
|
@ -1490,9 +1490,9 @@ POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1);
|
|
|
|
|
register vector unsigned char blockv, temp1, temp2, blocktemp;
|
|
|
|
|
register vector unsigned short pixelssum1, pixelssum2, temp3;
|
|
|
|
|
|
|
|
|
|
register const_vector unsigned char vczero = (const_vector unsigned char)
|
|
|
|
|
register const vector unsigned char vczero = (const vector unsigned char)
|
|
|
|
|
vec_splat_u8(0);
|
|
|
|
|
register const_vector unsigned short vctwo = (const_vector unsigned short)
|
|
|
|
|
register const vector unsigned short vctwo = (const vector unsigned short)
|
|
|
|
|
vec_splat_u16(2);
|
|
|
|
|
|
|
|
|
|
temp1 = vec_ld(0, pixels);
|
|
|
|
|