avfilter/vf_overlay: Hoist calculations out of loop

Also use const where appropriate.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt 2025-06-04 20:27:46 +02:00
parent a4b3474de6
commit 730ffc5d35

View file

@ -470,16 +470,13 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext
int dst_hp = AV_CEIL_RSHIFT(dst_h, vsub); \ int dst_hp = AV_CEIL_RSHIFT(dst_h, vsub); \
int yp = y>>vsub; \ int yp = y>>vsub; \
int xp = x>>hsub; \ int xp = x>>hsub; \
int jmax, j, k, kmax; \
int slice_start, slice_end; \
const T max = (1 << nbits) - 1; \ const T max = (1 << nbits) - 1; \
const T mid = (1 << (nbits - 1)); \ const T mid = (1 << (nbits - 1)); \
\ \
j = FFMAX(-yp, 0); \ const int jmin = FFMAX(-yp, 0), jmax = FFMIN3(-yp + dst_hp, FFMIN(src_hp, dst_hp), yp + src_hp); \
jmax = FFMIN3(-yp + dst_hp, FFMIN(src_hp, dst_hp), yp + src_hp); \ const int kmin = FFMAX(-xp, 0), kmax = FFMIN(-xp + dst_wp, src_wp); \
\ const int slice_start = jmin + (jmax * jobnr) / nb_jobs; \
slice_start = j + (jmax * jobnr) / nb_jobs; \ const int slice_end = jmin + (jmax * (jobnr + 1)) / nb_jobs; \
slice_end = j + (jmax * (jobnr+1)) / nb_jobs; \
\ \
const uint8_t *sp = src->data[i] + (slice_start) * src->linesize[i]; \ const uint8_t *sp = src->data[i] + (slice_start) * src->linesize[i]; \
uint8_t *dp = dst->data[dst_plane] \ uint8_t *dp = dst->data[dst_plane] \
@ -488,13 +485,12 @@ static av_always_inline void blend_plane_##depth##_##nbits##bits(AVFilterContext
const uint8_t *ap = src->data[3] + (slice_start << vsub) * src->linesize[3]; \ const uint8_t *ap = src->data[3] + (slice_start << vsub) * src->linesize[3]; \
const uint8_t *dap = main_has_alpha ? dst->data[3] + ((yp + slice_start) << vsub) * dst->linesize[3] : NULL; \ const uint8_t *dap = main_has_alpha ? dst->data[3] + ((yp + slice_start) << vsub) * dst->linesize[3] : NULL; \
\ \
for (j = slice_start; j < slice_end; j++) { \ for (int j = slice_start; j < slice_end; ++j) { \
k = FFMAX(-xp, 0); \ int k = kmin; \
const T *s = (const T *)sp + k; \ const T *s = (const T *)sp + k; \
const T *a = (const T *)ap + (k << hsub); \ const T *a = (const T *)ap + (k << hsub); \
const T *da = main_has_alpha ? (T *)dap + ((xp + k) << hsub) : NULL; \ const T *da = main_has_alpha ? (T *)dap + ((xp + k) << hsub) : NULL; \
T *d = (T *)(dp + (xp + k) * dst_step); \ T *d = (T *)(dp + (xp + k) * dst_step); \
kmax = FFMIN(-xp + dst_wp, src_wp); \
\ \
if (nbits == 8 && ((vsub && j+1 < src_hp) || !vsub) && octx->blend_row[i]) { \ if (nbits == 8 && ((vsub && j+1 < src_hp) || !vsub) && octx->blend_row[i]) { \
int c = octx->blend_row[i]((uint8_t*)d, (uint8_t*)da, (uint8_t*)s, \ int c = octx->blend_row[i]((uint8_t*)d, (uint8_t*)da, (uint8_t*)s, \
@ -585,25 +581,21 @@ static inline void alpha_composite_##depth##_##nbits##bits(const AVFrame *src, c
int jobnr, int nb_jobs) \ int jobnr, int nb_jobs) \
{ \ { \
T alpha; /* the amount of overlay to blend on to main */ \ T alpha; /* the amount of overlay to blend on to main */ \
int i, imax, j, jmax; \
int slice_start, slice_end; \
const T max = (1 << nbits) - 1; \ const T max = (1 << nbits) - 1; \
\ \
imax = FFMIN3(-y + dst_h, FFMIN(src_h, dst_h), y + src_h); \ const int imin = FFMAX(-y, 0), imax = FFMIN3(-y + dst_h, FFMIN(src_h, dst_h), y + src_h); \
i = FFMAX(-y, 0); \ const int jmin = FFMAX(-x, 0), jmax = FFMIN(-x + dst_w, src_w); \
\ const int slice_start = imin + ( imax * jobnr) / nb_jobs; \
slice_start = i + (imax * jobnr) / nb_jobs; \ const int slice_end = imin + ((imax * (jobnr + 1)) / nb_jobs); \
slice_end = i + ((imax * (jobnr+1)) / nb_jobs); \
\ \
const uint8_t *sa = src->data[3] + (slice_start) * src->linesize[3]; \ const uint8_t *sa = src->data[3] + (slice_start) * src->linesize[3]; \
uint8_t *da = dst->data[3] + (y + slice_start) * dst->linesize[3]; \ uint8_t *da = dst->data[3] + (y + slice_start) * dst->linesize[3]; \
\ \
for (i = slice_start; i < slice_end; i++) { \ for (int i = slice_start; i < slice_end; ++i) { \
j = FFMAX(-x, 0); \ const T *s = (const T *)sa + jmin; \
const T *s = (const T *)sa + j; \ T *d = (T *)da + x + jmin; \
T *d = (T *)da + x + j; \
\ \
for (jmax = FFMIN(-x + dst_w, src_w); j < jmax; j++) { \ for (int j = jmin; j < jmax; ++j) { \
alpha = *s; \ alpha = *s; \
if (alpha != 0 && alpha != max) { \ if (alpha != 0 && alpha != max) { \
uint8_t alpha_d = *d; \ uint8_t alpha_d = *d; \