diff --git a/libavcodec/x86/fpel.asm b/libavcodec/x86/fpel.asm index d38a1b1035..ebe8e43750 100644 --- a/libavcodec/x86/fpel.asm +++ b/libavcodec/x86/fpel.asm @@ -91,7 +91,6 @@ cglobal %1_pixels%2, 4,5,4 INIT_MMX mmx OP_PIXELS put, 4 OP_PIXELS put, 8 -OP_PIXELS avg, 8 OP_PIXELS put, 16 OP_PIXELS avg, 16 diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm index ce5d7a4e28..b3a270a173 100644 --- a/libavcodec/x86/hpeldsp.asm +++ b/libavcodec/x86/hpeldsp.asm @@ -83,8 +83,6 @@ cglobal put_pixels8_x2, 4,5 INIT_MMX mmxext PUT_PIXELS8_X2 -INIT_MMX 3dnow -PUT_PIXELS8_X2 ; void ff_put_pixels16_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) @@ -127,15 +125,13 @@ cglobal put_pixels16_x2, 4,5 INIT_MMX mmxext PUT_PIXELS_16 -INIT_MMX 3dnow -PUT_PIXELS_16 ; The 8_X2 macro can easily be used here INIT_XMM sse2 PUT_PIXELS8_X2 ; void ff_put_no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -%macro PUT_NO_RND_PIXELS8_X2 0 +INIT_MMX mmxext cglobal put_no_rnd_pixels8_x2, 4,5 mova m6, [pb_1] lea r4, [r2*2] @@ -167,12 +163,6 @@ cglobal put_no_rnd_pixels8_x2, 4,5 sub r3d, 4 jne .loop REP_RET -%endmacro - -INIT_MMX mmxext -PUT_NO_RND_PIXELS8_X2 -INIT_MMX 3dnow -PUT_NO_RND_PIXELS8_X2 ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) @@ -209,15 +199,13 @@ cglobal put_pixels8_y2, 4,5 INIT_MMX mmxext PUT_PIXELS8_Y2 -INIT_MMX 3dnow -PUT_PIXELS8_Y2 ; actually, put_pixels16_y2_sse2 INIT_XMM sse2 PUT_PIXELS8_Y2 ; void ff_put_no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -%macro PUT_NO_RND_PIXELS8_Y2 0 +INIT_MMX mmxext cglobal put_no_rnd_pixels8_y2, 4,5 mova m6, [pb_1] lea r4, [r2+r2] @@ -245,42 +233,6 @@ cglobal put_no_rnd_pixels8_y2, 4,5 sub r3d, 4 jne .loop REP_RET -%endmacro - -INIT_MMX mmxext -PUT_NO_RND_PIXELS8_Y2 -INIT_MMX 3dnow -PUT_NO_RND_PIXELS8_Y2 - - -; void ff_avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) -%macro AVG_PIXELS8 0 -cglobal avg_pixels8, 4,5 - lea r4, [r2*2] -.loop: - mova m0, [r0] - mova m1, [r0+r2] - PAVGB m0, [r1] - PAVGB m1, [r1+r2] - mova [r0], m0 - mova [r0+r2], m1 - add r1, r4 - add r0, r4 - mova m0, [r0] - mova m1, [r0+r2] - PAVGB m0, [r1] - PAVGB m1, [r1+r2] - add r1, r4 - mova [r0], m0 - mova [r0+r2], m1 - add r0, r4 - sub r3d, 4 - jne .loop - REP_RET -%endmacro - -INIT_MMX 3dnow -AVG_PIXELS8 ; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) @@ -291,10 +243,6 @@ cglobal avg_pixels16_x2, 4,5,4 cglobal avg_pixels8_x2, 4,5 %endif lea r4, [r2*2] -%if notcpuflag(mmxext) - pcmpeqd m5, m5 - paddb m5, m5 -%endif .loop: movu m0, [r1] movu m2, [r1+r2] @@ -335,12 +283,8 @@ cglobal avg_pixels8_x2, 4,5 REP_RET %endmacro -INIT_MMX mmx -AVG_PIXELS8_X2 INIT_MMX mmxext AVG_PIXELS8_X2 -INIT_MMX 3dnow -AVG_PIXELS8_X2 ; actually avg_pixels16_x2 INIT_XMM sse2 AVG_PIXELS8_X2 @@ -384,8 +328,6 @@ cglobal avg_pixels8_y2, 4,5 INIT_MMX mmxext AVG_PIXELS8_Y2 -INIT_MMX 3dnow -AVG_PIXELS8_Y2 ; actually avg_pixels16_y2 INIT_XMM sse2 AVG_PIXELS8_Y2 @@ -394,7 +336,7 @@ AVG_PIXELS8_Y2 ; void ff_avg_pixels8_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; Note this is not correctly rounded, and is therefore used for ; not-bitexact output -%macro AVG_APPROX_PIXELS8_XY2 0 +INIT_MMX mmxext cglobal avg_approx_pixels8_xy2, 4,5 mova m6, [pb_1] lea r4, [r2*2] @@ -429,12 +371,6 @@ cglobal avg_approx_pixels8_xy2, 4,5 sub r3d, 4 jne .loop REP_RET -%endmacro - -INIT_MMX mmxext -AVG_APPROX_PIXELS8_XY2 -INIT_MMX 3dnow -AVG_APPROX_PIXELS8_XY2 ; void ff_avg_pixels16_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) @@ -517,8 +453,6 @@ cglobal %1_pixels8_xy2, 4,5 INIT_MMX mmxext SET_PIXELS_XY2 avg -INIT_MMX 3dnow -SET_PIXELS_XY2 avg INIT_XMM sse2 SET_PIXELS_XY2 put SET_PIXELS_XY2 avg diff --git a/libavcodec/x86/hpeldsp.h b/libavcodec/x86/hpeldsp.h index bf97029b57..fd740da72e 100644 --- a/libavcodec/x86/hpeldsp.h +++ b/libavcodec/x86/hpeldsp.h @@ -45,8 +45,6 @@ void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_put_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); void ff_put_pixels16_xy2_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels, diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c index 6336587281..09c48c341e 100644 --- a/libavcodec/x86/hpeldsp_init.c +++ b/libavcodec/x86/hpeldsp_init.c @@ -35,12 +35,8 @@ void ff_put_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_put_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); void ff_put_pixels16_x2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_put_pixels16_x2_3dnow(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); void ff_put_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_avg_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels, @@ -51,42 +47,21 @@ void ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_put_no_rnd_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_put_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_put_no_rnd_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); -void ff_avg_pixels8_3dnow(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_avg_pixels8_x2_3dnow(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_avg_pixels8_y2_3dnow(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); -void ff_avg_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); -void ff_avg_approx_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); -#define avg_pixels8_mmx ff_avg_pixels8_mmx -#define avg_pixels8_x2_mmx ff_avg_pixels8_x2_mmx #define avg_pixels16_mmx ff_avg_pixels16_mmx -#define avg_pixels8_xy2_mmx ff_avg_pixels8_xy2_mmx -#define avg_pixels16_xy2_mmx ff_avg_pixels16_xy2_mmx #define put_pixels8_mmx ff_put_pixels8_mmx #define put_pixels16_mmx ff_put_pixels16_mmx #define put_pixels8_xy2_mmx ff_put_pixels8_xy2_mmx -#define put_pixels16_xy2_mmx ff_put_pixels16_xy2_mmx #define avg_no_rnd_pixels16_mmx ff_avg_pixels16_mmx #define put_no_rnd_pixels8_mmx ff_put_pixels8_mmx #define put_no_rnd_pixels16_mmx ff_put_pixels16_mmx @@ -121,30 +96,19 @@ CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_mmx, put_no_rnd_pixels8_xy2_mmx, 8) /***********************************/ /* MMX rounding */ -#define DEF(x, y) x ## _ ## y ## _mmx #define SET_RND MOVQ_WTWO -#define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f) -#define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e) - -#include "hpeldsp_rnd_template.c" - -#undef DEF #define DEF(x, y) ff_ ## x ## _ ## y ## _mmx #define STATIC +#define NO_AVG #include "rnd_template.c" +#undef NO_AVG #undef DEF #undef SET_RND -#undef PAVGBP -#undef PAVGB #if HAVE_MMX -CALL_2X_PIXELS(avg_pixels16_y2_mmx, avg_pixels8_y2_mmx, 8) -CALL_2X_PIXELS(put_pixels16_y2_mmx, put_pixels8_y2_mmx, 8) - -CALL_2X_PIXELS_EXPORT(ff_avg_pixels16_xy2_mmx, ff_avg_pixels8_xy2_mmx, 8) -CALL_2X_PIXELS_EXPORT(ff_put_pixels16_xy2_mmx, ff_put_pixels8_xy2_mmx, 8) +CALL_2X_PIXELS(put_pixels16_xy2_mmx, ff_put_pixels8_xy2_mmx, 8) #endif #endif /* HAVE_INLINE_ASM */ @@ -162,46 +126,42 @@ CALL_2X_PIXELS_EXPORT(ff_put_pixels16_xy2_mmx, ff_put_pixels8_xy2_mmx, 8) CALL_2X_PIXELS(avg_pixels16_xy2 ## CPUEXT, ff_avg_pixels8_xy2 ## CPUEXT, 8) \ CALL_2X_PIXELS(avg_approx_pixels16_xy2## CPUEXT, ff_avg_approx_pixels8_xy2## CPUEXT, 8) -HPELDSP_AVG_PIXELS16(_3dnow) HPELDSP_AVG_PIXELS16(_mmxext) #endif /* HAVE_X86ASM */ #define SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU) \ if (HAVE_MMX_EXTERNAL) \ - c->PFX ## _pixels_tab IDX [0] = PFX ## _pixels ## SIZE ## _ ## CPU; + c->PFX ## _pixels_tab IDX [0] = PFX ## _pixels ## SIZE ## _ ## CPU #if HAVE_MMX_INLINE -#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ +#define SET_HPEL_FUNCS03(PFX, IDX, SIZE, CPU) \ do { \ - SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU) \ - c->PFX ## _pixels_tab IDX [1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \ - c->PFX ## _pixels_tab IDX [2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ + SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU); \ c->PFX ## _pixels_tab IDX [3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU; \ } while (0) +#define SET_HPEL_FUNCS12(PFX, IDX, SIZE, CPU) \ + do { \ + c->PFX ## _pixels_tab IDX [1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \ + c->PFX ## _pixels_tab IDX [2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ + } while (0) #else +#define SET_HPEL_FUNCS03(PFX, IDX, SIZE, CPU) SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU) +#define SET_HPEL_FUNCS12(PFX, IDX, SIZE, CPU) ((void)0) +#endif #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ do { \ - SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU) \ + SET_HPEL_FUNCS03(PFX, IDX, SIZE, CPU); \ + SET_HPEL_FUNCS12(PFX, IDX, SIZE, CPU); \ } while (0) -#endif static void hpeldsp_init_mmx(HpelDSPContext *c, int flags) { - SET_HPEL_FUNCS(put, [0], 16, mmx); + SET_HPEL_FUNCS03(put, [0], 16, mmx); SET_HPEL_FUNCS(put_no_rnd, [0], 16, mmx); - SET_HPEL_FUNCS(avg, [0], 16, mmx); SET_HPEL_FUNCS(avg_no_rnd, , 16, mmx); - SET_HPEL_FUNCS(put, [1], 8, mmx); + SET_HPEL_FUNCS03(put, [1], 8, mmx); SET_HPEL_FUNCS(put_no_rnd, [1], 8, mmx); - if (HAVE_MMX_EXTERNAL) { - c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmx; - c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmx; - } -#if HAVE_MMX_INLINE - c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx; - c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmx; -#endif } static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags) @@ -235,37 +195,6 @@ static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags) #endif /* HAVE_MMXEXT_EXTERNAL */ } -static void hpeldsp_init_3dnow(HpelDSPContext *c, int flags) -{ -#if HAVE_AMD3DNOW_EXTERNAL - c->put_pixels_tab[0][1] = ff_put_pixels16_x2_3dnow; - c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; - - c->avg_pixels_tab[0][0] = avg_pixels16_3dnow; - c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; - c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; - c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; - - c->put_pixels_tab[1][1] = ff_put_pixels8_x2_3dnow; - c->put_pixels_tab[1][2] = ff_put_pixels8_y2_3dnow; - - c->avg_pixels_tab[1][0] = ff_avg_pixels8_3dnow; - c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_3dnow; - c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_3dnow; - c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_3dnow; - - if (!(flags & AV_CODEC_FLAG_BITEXACT)){ - c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow; - c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow; - c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_3dnow; - c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_3dnow; - - c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_3dnow; - c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_3dnow; - } -#endif /* HAVE_AMD3DNOW_EXTERNAL */ -} - static void hpeldsp_init_sse2_fast(HpelDSPContext *c, int flags) { #if HAVE_SSE2_EXTERNAL @@ -298,9 +227,6 @@ av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags) if (INLINE_MMX(cpu_flags)) hpeldsp_init_mmx(c, flags); - if (EXTERNAL_AMD3DNOW(cpu_flags)) - hpeldsp_init_3dnow(c, flags); - if (EXTERNAL_MMXEXT(cpu_flags)) hpeldsp_init_mmxext(c, flags); diff --git a/libavcodec/x86/rnd_template.c b/libavcodec/x86/rnd_template.c index 09946bd23f..b825eeba6e 100644 --- a/libavcodec/x86/rnd_template.c +++ b/libavcodec/x86/rnd_template.c @@ -97,6 +97,7 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel :FF_REG_a, "memory"); } +#ifndef NO_AVG // avg_pixels // this routine is 'slightly' suboptimal but mostly unused av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, @@ -173,3 +174,4 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel :"D"(block), "r"((x86_reg)line_size) :FF_REG_a, "memory"); } +#endif