h264dsp: merge some asm blocks
Some code was initializing some xmm registers in one asm block and using them in the following block, assuming they wouldn't be changed in between blocks. Originally committed as revision 25568 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
3ab354d777
commit
b32c9ca9a3
1 changed files with 20 additions and 26 deletions
|
|
@ -299,11 +299,8 @@ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst,
|
|||
int h=8;\
|
||||
__asm__ volatile(\
|
||||
"pxor %%mm7, %%mm7 \n\t"\
|
||||
"movq %0, %%mm6 \n\t"\
|
||||
:: "m"(ff_pw_5)\
|
||||
);\
|
||||
do{\
|
||||
__asm__ volatile(\
|
||||
"movq "MANGLE(ff_pw_5)", %%mm6\n\t"\
|
||||
"1: \n\t"\
|
||||
"movq (%0), %%mm0 \n\t"\
|
||||
"movq 1(%0), %%mm2 \n\t"\
|
||||
"movq %%mm0, %%mm1 \n\t"\
|
||||
|
|
@ -336,7 +333,7 @@ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst,
|
|||
"punpcklbw %%mm7, %%mm5 \n\t"\
|
||||
"paddw %%mm3, %%mm2 \n\t"\
|
||||
"paddw %%mm5, %%mm4 \n\t"\
|
||||
"movq %5, %%mm5 \n\t"\
|
||||
"movq "MANGLE(ff_pw_16)", %%mm5\n\t"\
|
||||
"paddw %%mm5, %%mm2 \n\t"\
|
||||
"paddw %%mm5, %%mm4 \n\t"\
|
||||
"paddw %%mm2, %%mm0 \n\t"\
|
||||
|
|
@ -347,15 +344,15 @@ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst,
|
|||
"packuswb %%mm1, %%mm0 \n\t"\
|
||||
PAVGB" %%mm4, %%mm0 \n\t"\
|
||||
OP(%%mm0, (%1),%%mm5, q)\
|
||||
"add %4, %0 \n\t"\
|
||||
"add %4, %1 \n\t"\
|
||||
"add %3, %2 \n\t"\
|
||||
: "+a"(src), "+c"(dst), "+d"(src2)\
|
||||
: "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
|
||||
"m"(ff_pw_16)\
|
||||
"add %5, %0 \n\t"\
|
||||
"add %5, %1 \n\t"\
|
||||
"add %4, %2 \n\t"\
|
||||
"decl %3 \n\t"\
|
||||
"jg 1b \n\t"\
|
||||
: "+a"(src), "+c"(dst), "+d"(src2), "+g"(h)\
|
||||
: "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\
|
||||
: "memory"\
|
||||
);\
|
||||
}while(--h);\
|
||||
}\
|
||||
\
|
||||
static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
|
||||
|
|
@ -697,11 +694,8 @@ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst,
|
|||
int h=8;\
|
||||
__asm__ volatile(\
|
||||
"pxor %%xmm7, %%xmm7 \n\t"\
|
||||
"movdqa %0, %%xmm6 \n\t"\
|
||||
:: "m"(ff_pw_5)\
|
||||
);\
|
||||
do{\
|
||||
__asm__ volatile(\
|
||||
"movdqa "MANGLE(ff_pw_5)", %%xmm6\n\t"\
|
||||
"1: \n\t"\
|
||||
"lddqu -2(%0), %%xmm1 \n\t"\
|
||||
"movdqa %%xmm1, %%xmm0 \n\t"\
|
||||
"punpckhbw %%xmm7, %%xmm1 \n\t"\
|
||||
|
|
@ -721,22 +715,22 @@ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst,
|
|||
"psllw $2, %%xmm2 \n\t"\
|
||||
"movq (%2), %%xmm3 \n\t"\
|
||||
"psubw %%xmm1, %%xmm2 \n\t"\
|
||||
"paddw %5, %%xmm0 \n\t"\
|
||||
"paddw "MANGLE(ff_pw_16)", %%xmm0\n\t"\
|
||||
"pmullw %%xmm6, %%xmm2 \n\t"\
|
||||
"paddw %%xmm0, %%xmm2 \n\t"\
|
||||
"psraw $5, %%xmm2 \n\t"\
|
||||
"packuswb %%xmm2, %%xmm2 \n\t"\
|
||||
"pavgb %%xmm3, %%xmm2 \n\t"\
|
||||
OP(%%xmm2, (%1), %%xmm4, q)\
|
||||
"add %4, %0 \n\t"\
|
||||
"add %4, %1 \n\t"\
|
||||
"add %3, %2 \n\t"\
|
||||
: "+a"(src), "+c"(dst), "+d"(src2)\
|
||||
: "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
|
||||
"m"(ff_pw_16)\
|
||||
"add %5, %0 \n\t"\
|
||||
"add %5, %1 \n\t"\
|
||||
"add %4, %2 \n\t"\
|
||||
"decl %3 \n\t"\
|
||||
"jg 1b \n\t"\
|
||||
: "+a"(src), "+c"(dst), "+d"(src2), "+g"(h)\
|
||||
: "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\
|
||||
: "memory"\
|
||||
);\
|
||||
}while(--h);\
|
||||
}\
|
||||
QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
|
||||
\
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue