avfilter/interlace: add support for 10 and 12 bit
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc> Signed-off-by: Thomas Mundt <tmundt75@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
58ca446672
commit
40bfaa190c
11 changed files with 355 additions and 56 deletions
|
|
@ -25,9 +25,11 @@
|
|||
#ifndef AVFILTER_INTERLACE_H
|
||||
#define AVFILTER_INTERLACE_H
|
||||
|
||||
#include "libavutil/bswap.h"
|
||||
#include "libavutil/common.h"
|
||||
#include "libavutil/imgutils.h"
|
||||
#include "libavutil/opt.h"
|
||||
#include "libavutil/pixdesc.h"
|
||||
|
||||
#include "avfilter.h"
|
||||
#include "formats.h"
|
||||
|
|
@ -55,8 +57,9 @@ typedef struct InterlaceContext {
|
|||
enum ScanMode scan; // top or bottom field first scanning
|
||||
int lowpass; // enable or disable low pass filtering
|
||||
AVFrame *cur, *next; // the two frames from which the new one is obtained
|
||||
const AVPixFmtDescriptor *csp;
|
||||
void (*lowpass_line)(uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp,
|
||||
ptrdiff_t mref, ptrdiff_t pref);
|
||||
ptrdiff_t mref, ptrdiff_t pref, int clip_max);
|
||||
} InterlaceContext;
|
||||
|
||||
void ff_interlace_init_x86(InterlaceContext *interlace);
|
||||
|
|
|
|||
|
|
@ -27,7 +27,9 @@
|
|||
#ifndef AVFILTER_TINTERLACE_H
|
||||
#define AVFILTER_TINTERLACE_H
|
||||
|
||||
#include "libavutil/bswap.h"
|
||||
#include "libavutil/opt.h"
|
||||
#include "libavutil/pixdesc.h"
|
||||
#include "drawutils.h"
|
||||
#include "avfilter.h"
|
||||
|
||||
|
|
@ -60,8 +62,9 @@ typedef struct TInterlaceContext {
|
|||
int black_linesize[4];
|
||||
FFDrawContext draw;
|
||||
FFDrawColor color;
|
||||
const AVPixFmtDescriptor *csp;
|
||||
void (*lowpass_line)(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp,
|
||||
ptrdiff_t mref, ptrdiff_t pref);
|
||||
ptrdiff_t mref, ptrdiff_t pref, int clip_max);
|
||||
} TInterlaceContext;
|
||||
|
||||
void ff_tinterlace_init_x86(TInterlaceContext *interlace);
|
||||
|
|
|
|||
|
|
@ -61,8 +61,8 @@ static const AVOption interlace_options[] = {
|
|||
AVFILTER_DEFINE_CLASS(interlace);
|
||||
|
||||
static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize,
|
||||
const uint8_t *srcp,
|
||||
ptrdiff_t mref, ptrdiff_t pref)
|
||||
const uint8_t *srcp, ptrdiff_t mref,
|
||||
ptrdiff_t pref, int clip_max)
|
||||
{
|
||||
const uint8_t *srcp_above = srcp + mref;
|
||||
const uint8_t *srcp_below = srcp + pref;
|
||||
|
|
@ -75,9 +75,28 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t linesize,
|
|||
}
|
||||
}
|
||||
|
||||
static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t linesize,
|
||||
const uint8_t *src8, ptrdiff_t mref,
|
||||
ptrdiff_t pref, int clip_max)
|
||||
{
|
||||
uint16_t *dstp = (uint16_t *)dst8;
|
||||
const uint16_t *srcp = (const uint16_t *)src8;
|
||||
const uint16_t *srcp_above = srcp + mref / 2;
|
||||
const uint16_t *srcp_below = srcp + pref / 2;
|
||||
int i, src_x;
|
||||
for (i = 0; i < linesize; i++) {
|
||||
// this calculation is an integer representation of
|
||||
// '0.5 * current + 0.25 * above + 0.25 * below'
|
||||
// '1 +' is for rounding.
|
||||
src_x = av_le2ne16(srcp[i]) << 1;
|
||||
dstp[i] = av_le2ne16((1 + src_x + av_le2ne16(srcp_above[i])
|
||||
+ av_le2ne16(srcp_below[i])) >> 2);
|
||||
}
|
||||
}
|
||||
|
||||
static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize,
|
||||
const uint8_t *srcp,
|
||||
ptrdiff_t mref, ptrdiff_t pref)
|
||||
const uint8_t *srcp, ptrdiff_t mref,
|
||||
ptrdiff_t pref, int clip_max)
|
||||
{
|
||||
const uint8_t *srcp_above = srcp + mref;
|
||||
const uint8_t *srcp_below = srcp + pref;
|
||||
|
|
@ -103,11 +122,51 @@ static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize,
|
|||
}
|
||||
}
|
||||
|
||||
static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t linesize,
|
||||
const uint8_t *src8, ptrdiff_t mref,
|
||||
ptrdiff_t pref, int clip_max)
|
||||
{
|
||||
uint16_t *dstp = (uint16_t *)dst8;
|
||||
const uint16_t *srcp = (const uint16_t *)src8;
|
||||
const uint16_t *srcp_above = srcp + mref / 2;
|
||||
const uint16_t *srcp_below = srcp + pref / 2;
|
||||
const uint16_t *srcp_above2 = srcp + mref;
|
||||
const uint16_t *srcp_below2 = srcp + pref;
|
||||
int i, dst_le, src_le, src_x, src_ab;
|
||||
for (i = 0; i < linesize; i++) {
|
||||
// this calculation is an integer representation of
|
||||
// '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2'
|
||||
// '4 +' is for rounding.
|
||||
src_le = av_le2ne16(srcp[i]);
|
||||
src_x = src_le << 1;
|
||||
src_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]);
|
||||
dst_le = av_clip((4 + ((src_le + src_x + src_ab) << 1)
|
||||
- av_le2ne16(srcp_above2[i])
|
||||
- av_le2ne16(srcp_below2[i])) >> 3, 0, clip_max);
|
||||
// Prevent over-sharpening:
|
||||
// dst must not exceed src when the average of above and below
|
||||
// is less than src. And the other way around.
|
||||
if (src_ab > src_x) {
|
||||
if (dst_le < src_le)
|
||||
dstp[i] = av_le2ne16(src_le);
|
||||
else
|
||||
dstp[i] = av_le2ne16(dst_le);
|
||||
} else if (dst_le > src_le) {
|
||||
dstp[i] = av_le2ne16(src_le);
|
||||
} else
|
||||
dstp[i] = av_le2ne16(dst_le);
|
||||
}
|
||||
}
|
||||
|
||||
static const enum AVPixelFormat formats_supported[] = {
|
||||
AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
|
||||
AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUVA420P,
|
||||
AV_PIX_FMT_GRAY8, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
|
||||
AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_NONE
|
||||
AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
|
||||
AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P,
|
||||
AV_PIX_FMT_YUV420P10LE, AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV444P10LE,
|
||||
AV_PIX_FMT_YUV420P12LE, AV_PIX_FMT_YUV422P12LE, AV_PIX_FMT_YUV444P12LE,
|
||||
AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P,
|
||||
AV_PIX_FMT_YUVA420P10LE, AV_PIX_FMT_YUVA422P10LE, AV_PIX_FMT_YUVA444P10LE,
|
||||
AV_PIX_FMT_GRAY8, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
|
||||
AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_NONE
|
||||
};
|
||||
|
||||
static int query_formats(AVFilterContext *ctx)
|
||||
|
|
@ -150,12 +209,19 @@ static int config_out_props(AVFilterLink *outlink)
|
|||
outlink->time_base.num *= 2;
|
||||
outlink->frame_rate.den *= 2;
|
||||
|
||||
|
||||
s->csp = av_pix_fmt_desc_get(outlink->format);
|
||||
if (s->lowpass) {
|
||||
if (s->lowpass == VLPF_LIN)
|
||||
s->lowpass_line = lowpass_line_c;
|
||||
else if (s->lowpass == VLPF_CMP)
|
||||
s->lowpass_line = lowpass_line_complex_c;
|
||||
if (s->lowpass == VLPF_LIN) {
|
||||
if (s->csp->comp[0].depth > 8)
|
||||
s->lowpass_line = lowpass_line_c_16;
|
||||
else
|
||||
s->lowpass_line = lowpass_line_c;
|
||||
} else if (s->lowpass == VLPF_CMP) {
|
||||
if (s->csp->comp[0].depth > 8)
|
||||
s->lowpass_line = lowpass_line_complex_c_16;
|
||||
else
|
||||
s->lowpass_line = lowpass_line_complex_c;
|
||||
}
|
||||
if (ARCH_X86)
|
||||
ff_interlace_init_x86(s);
|
||||
}
|
||||
|
|
@ -183,6 +249,7 @@ static void copy_picture_field(InterlaceContext *s,
|
|||
const uint8_t *srcp = src_frame->data[plane];
|
||||
int srcp_linesize = src_frame->linesize[plane] * 2;
|
||||
int dstp_linesize = dst_frame->linesize[plane] * 2;
|
||||
int clip_max = (1 << s->csp->comp[plane].depth) - 1;
|
||||
|
||||
av_assert0(cols >= 0 || lines >= 0);
|
||||
|
||||
|
|
@ -202,11 +269,13 @@ static void copy_picture_field(InterlaceContext *s,
|
|||
mref = 0;
|
||||
else if (j <= (1 + x))
|
||||
pref = 0;
|
||||
s->lowpass_line(dstp, cols, srcp, mref, pref);
|
||||
s->lowpass_line(dstp, cols, srcp, mref, pref, clip_max);
|
||||
dstp += dstp_linesize;
|
||||
srcp += srcp_linesize;
|
||||
}
|
||||
} else {
|
||||
if (s->csp->comp[plane].depth > 8)
|
||||
cols *= 2;
|
||||
av_image_copy_plane(dstp, dstp_linesize, srcp, srcp_linesize, cols, lines);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -78,7 +78,12 @@ static int query_formats(AVFilterContext *ctx)
|
|||
AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
|
||||
AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
|
||||
AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
|
||||
AV_PIX_FMT_YUV420P10LE, AV_PIX_FMT_YUV422P10LE,
|
||||
AV_PIX_FMT_YUV440P10LE, AV_PIX_FMT_YUV444P10LE,
|
||||
AV_PIX_FMT_YUV420P12LE, AV_PIX_FMT_YUV422P12LE,
|
||||
AV_PIX_FMT_YUV440P12LE, AV_PIX_FMT_YUV444P12LE,
|
||||
AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P,
|
||||
AV_PIX_FMT_YUVA420P10LE, AV_PIX_FMT_YUVA422P10LE, AV_PIX_FMT_YUVA444P10LE,
|
||||
AV_PIX_FMT_GRAY8, FULL_SCALE_YUVJ_FORMATS,
|
||||
AV_PIX_FMT_NONE
|
||||
};
|
||||
|
|
@ -90,7 +95,7 @@ static int query_formats(AVFilterContext *ctx)
|
|||
}
|
||||
|
||||
static void lowpass_line_c(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp,
|
||||
ptrdiff_t mref, ptrdiff_t pref)
|
||||
ptrdiff_t mref, ptrdiff_t pref, int clip_max)
|
||||
{
|
||||
const uint8_t *srcp_above = srcp + mref;
|
||||
const uint8_t *srcp_below = srcp + pref;
|
||||
|
|
@ -103,8 +108,26 @@ static void lowpass_line_c(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp,
|
|||
}
|
||||
}
|
||||
|
||||
static void lowpass_line_c_16(uint8_t *dst8, ptrdiff_t width, const uint8_t *src8,
|
||||
ptrdiff_t mref, ptrdiff_t pref, int clip_max)
|
||||
{
|
||||
uint16_t *dstp = (uint16_t *)dst8;
|
||||
const uint16_t *srcp = (const uint16_t *)src8;
|
||||
const uint16_t *srcp_above = srcp + mref / 2;
|
||||
const uint16_t *srcp_below = srcp + pref / 2;
|
||||
int i, src_x;
|
||||
for (i = 0; i < width; i++) {
|
||||
// this calculation is an integer representation of
|
||||
// '0.5 * current + 0.25 * above + 0.25 * below'
|
||||
// '1 +' is for rounding.
|
||||
src_x = av_le2ne16(srcp[i]) << 1;
|
||||
dstp[i] = av_le2ne16((1 + src_x + av_le2ne16(srcp_above[i])
|
||||
+ av_le2ne16(srcp_below[i])) >> 2);
|
||||
}
|
||||
}
|
||||
|
||||
static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t width, const uint8_t *srcp,
|
||||
ptrdiff_t mref, ptrdiff_t pref)
|
||||
ptrdiff_t mref, ptrdiff_t pref, int clip_max)
|
||||
{
|
||||
const uint8_t *srcp_above = srcp + mref;
|
||||
const uint8_t *srcp_below = srcp + pref;
|
||||
|
|
@ -130,6 +153,41 @@ static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t width, const uint8_t
|
|||
}
|
||||
}
|
||||
|
||||
static void lowpass_line_complex_c_16(uint8_t *dst8, ptrdiff_t width, const uint8_t *src8,
|
||||
ptrdiff_t mref, ptrdiff_t pref, int clip_max)
|
||||
{
|
||||
uint16_t *dstp = (uint16_t *)dst8;
|
||||
const uint16_t *srcp = (const uint16_t *)src8;
|
||||
const uint16_t *srcp_above = srcp + mref / 2;
|
||||
const uint16_t *srcp_below = srcp + pref / 2;
|
||||
const uint16_t *srcp_above2 = srcp + mref;
|
||||
const uint16_t *srcp_below2 = srcp + pref;
|
||||
int i, dst_le, src_le, src_x, src_ab;
|
||||
for (i = 0; i < width; i++) {
|
||||
// this calculation is an integer representation of
|
||||
// '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2'
|
||||
// '4 +' is for rounding.
|
||||
src_le = av_le2ne16(srcp[i]);
|
||||
src_x = src_le << 1;
|
||||
src_ab = av_le2ne16(srcp_above[i]) + av_le2ne16(srcp_below[i]);
|
||||
dst_le = av_clip((4 + ((src_le + src_x + src_ab) << 1)
|
||||
- av_le2ne16(srcp_above2[i])
|
||||
- av_le2ne16(srcp_below2[i])) >> 3, 0, clip_max);
|
||||
// Prevent over-sharpening:
|
||||
// dst must not exceed src when the average of above and below
|
||||
// is less than src. And the other way around.
|
||||
if (src_ab > src_x) {
|
||||
if (dst_le < src_le)
|
||||
dstp[i] = av_le2ne16(src_le);
|
||||
else
|
||||
dstp[i] = av_le2ne16(dst_le);
|
||||
} else if (dst_le > src_le) {
|
||||
dstp[i] = av_le2ne16(src_le);
|
||||
} else
|
||||
dstp[i] = av_le2ne16(dst_le);
|
||||
}
|
||||
}
|
||||
|
||||
static av_cold void uninit(AVFilterContext *ctx)
|
||||
{
|
||||
TInterlaceContext *tinterlace = ctx->priv;
|
||||
|
|
@ -198,12 +256,19 @@ static int config_out_props(AVFilterLink *outlink)
|
|||
(tinterlace->flags & TINTERLACE_FLAG_EXACT_TB))
|
||||
outlink->time_base = tinterlace->preout_time_base;
|
||||
|
||||
tinterlace->csp = av_pix_fmt_desc_get(outlink->format);
|
||||
if (tinterlace->flags & TINTERLACE_FLAG_CVLPF) {
|
||||
tinterlace->lowpass_line = lowpass_line_complex_c;
|
||||
if (tinterlace->csp->comp[0].depth > 8)
|
||||
tinterlace->lowpass_line = lowpass_line_complex_c_16;
|
||||
else
|
||||
tinterlace->lowpass_line = lowpass_line_complex_c;
|
||||
if (ARCH_X86)
|
||||
ff_tinterlace_init_x86(tinterlace);
|
||||
} else if (tinterlace->flags & TINTERLACE_FLAG_VLPF) {
|
||||
tinterlace->lowpass_line = lowpass_line_c;
|
||||
if (tinterlace->csp->comp[0].depth > 8)
|
||||
tinterlace->lowpass_line = lowpass_line_c_16;
|
||||
else
|
||||
tinterlace->lowpass_line = lowpass_line_c;
|
||||
if (ARCH_X86)
|
||||
ff_tinterlace_init_x86(tinterlace);
|
||||
}
|
||||
|
|
@ -250,6 +315,7 @@ void copy_picture_field(TInterlaceContext *tinterlace,
|
|||
const uint8_t *srcp = src[plane];
|
||||
int srcp_linesize = src_linesize[plane] * k;
|
||||
int dstp_linesize = dst_linesize[plane] * (interleave ? 2 : 1);
|
||||
int clip_max = (1 << tinterlace->csp->comp[plane].depth) - 1;
|
||||
|
||||
lines = (lines + (src_field == FIELD_UPPER)) / k;
|
||||
if (src_field == FIELD_LOWER)
|
||||
|
|
@ -267,11 +333,13 @@ void copy_picture_field(TInterlaceContext *tinterlace,
|
|||
if (h >= (lines - x)) mref = 0; // there is no line above
|
||||
else if (h <= (1 + x)) pref = 0; // there is no line below
|
||||
|
||||
tinterlace->lowpass_line(dstp, cols, srcp, mref, pref);
|
||||
tinterlace->lowpass_line(dstp, cols, srcp, mref, pref, clip_max);
|
||||
dstp += dstp_linesize;
|
||||
srcp += srcp_linesize;
|
||||
}
|
||||
} else {
|
||||
if (tinterlace->csp->comp[plane].depth > 8)
|
||||
cols *= 2;
|
||||
av_image_copy_plane(dstp, dstp_linesize, srcp, srcp_linesize, cols, lines);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,27 +30,26 @@ pw_4: times 8 dw 4
|
|||
|
||||
SECTION .text
|
||||
|
||||
%macro LOWPASS_LINE 0
|
||||
cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref
|
||||
%macro LOWPASS 1
|
||||
add dstq, hq
|
||||
add srcq, hq
|
||||
add mrefq, srcq
|
||||
add prefq, srcq
|
||||
neg hq
|
||||
|
||||
pcmpeqb m6, m6
|
||||
pcmpeq%1 m6, m6
|
||||
|
||||
.loop:
|
||||
mova m0, [mrefq+hq]
|
||||
mova m1, [mrefq+hq+mmsize]
|
||||
pavgb m0, [prefq+hq]
|
||||
pavgb m1, [prefq+hq+mmsize]
|
||||
pavg%1 m0, [prefq+hq]
|
||||
pavg%1 m1, [prefq+hq+mmsize]
|
||||
pxor m0, m6
|
||||
pxor m1, m6
|
||||
pxor m2, m6, [srcq+hq]
|
||||
pxor m3, m6, [srcq+hq+mmsize]
|
||||
pavgb m0, m2
|
||||
pavgb m1, m3
|
||||
pavg%1 m0, m2
|
||||
pavg%1 m1, m3
|
||||
pxor m0, m6
|
||||
pxor m1, m6
|
||||
mova [dstq+hq], m0
|
||||
|
|
@ -59,7 +58,15 @@ cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref
|
|||
add hq, 2*mmsize
|
||||
jl .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
%macro LOWPASS_LINE 0
|
||||
cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref
|
||||
LOWPASS b
|
||||
|
||||
cglobal lowpass_line_16, 5, 5, 7, dst, h, src, mref, pref
|
||||
shl hq, 1
|
||||
LOWPASS w
|
||||
%endmacro
|
||||
|
||||
%macro LOWPASS_LINE_COMPLEX 0
|
||||
|
|
@ -124,6 +131,65 @@ cglobal lowpass_line_complex, 5, 5, 8, dst, h, src, mref, pref
|
|||
jg .loop
|
||||
REP_RET
|
||||
|
||||
cglobal lowpass_line_complex_12, 5, 5, 8, 16, dst, h, src, mref, pref, clip_max
|
||||
movd m7, DWORD clip_maxm
|
||||
SPLATW m7, m7, 0
|
||||
mova [rsp], m7
|
||||
.loop:
|
||||
mova m0, [srcq+mrefq]
|
||||
mova m1, [srcq+mrefq+mmsize]
|
||||
mova m2, [srcq+prefq]
|
||||
mova m3, [srcq+prefq+mmsize]
|
||||
paddw m0, m2
|
||||
paddw m1, m3
|
||||
mova m6, m0
|
||||
mova m7, m1
|
||||
mova m2, [srcq]
|
||||
mova m3, [srcq+mmsize]
|
||||
paddw m0, m2
|
||||
paddw m1, m3
|
||||
psllw m2, 1
|
||||
psllw m3, 1
|
||||
paddw m0, m2
|
||||
paddw m1, m3
|
||||
psllw m0, 1
|
||||
psllw m1, 1
|
||||
pcmpgtw m6, m2
|
||||
pcmpgtw m7, m3
|
||||
mova m2, [srcq+2*mrefq]
|
||||
mova m3, [srcq+2*mrefq+mmsize]
|
||||
mova m4, [srcq+2*prefq]
|
||||
mova m5, [srcq+2*prefq+mmsize]
|
||||
paddw m2, m4
|
||||
paddw m3, m5
|
||||
paddw m0, [pw_4]
|
||||
paddw m1, [pw_4]
|
||||
psubusw m0, m2
|
||||
psubusw m1, m3
|
||||
psrlw m0, 3
|
||||
psrlw m1, 3
|
||||
pminsw m0, [rsp]
|
||||
pminsw m1, [rsp]
|
||||
mova m2, m0
|
||||
mova m3, m1
|
||||
pmaxsw m0, [srcq]
|
||||
pmaxsw m1, [srcq+mmsize]
|
||||
pminsw m2, [srcq]
|
||||
pminsw m3, [srcq+mmsize]
|
||||
pand m0, m6
|
||||
pand m1, m7
|
||||
pandn m6, m2
|
||||
pandn m7, m3
|
||||
por m0, m6
|
||||
por m1, m7
|
||||
mova [dstq], m0
|
||||
mova [dstq+mmsize], m1
|
||||
|
||||
add dstq, 2*mmsize
|
||||
add srcq, 2*mmsize
|
||||
sub hd, mmsize
|
||||
jg .loop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse2
|
||||
|
|
|
|||
|
|
@ -27,27 +27,50 @@
|
|||
#include "libavfilter/interlace.h"
|
||||
|
||||
void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize,
|
||||
const uint8_t *srcp,
|
||||
ptrdiff_t mref, ptrdiff_t pref);
|
||||
const uint8_t *srcp, ptrdiff_t mref,
|
||||
ptrdiff_t pref, int clip_max);
|
||||
void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize,
|
||||
const uint8_t *srcp,
|
||||
ptrdiff_t mref, ptrdiff_t pref);
|
||||
const uint8_t *srcp, ptrdiff_t mref,
|
||||
ptrdiff_t pref, int clip_max);
|
||||
|
||||
void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize,
|
||||
const uint8_t *srcp, ptrdiff_t mref,
|
||||
ptrdiff_t pref, int clip_max);
|
||||
void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize,
|
||||
const uint8_t *srcp, ptrdiff_t mref,
|
||||
ptrdiff_t pref, int clip_max);
|
||||
|
||||
void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize,
|
||||
const uint8_t *srcp,
|
||||
ptrdiff_t mref, ptrdiff_t pref);
|
||||
const uint8_t *srcp, ptrdiff_t mref,
|
||||
ptrdiff_t pref, int clip_max);
|
||||
|
||||
void ff_lowpass_line_complex_12_sse2(uint8_t *dstp, ptrdiff_t linesize,
|
||||
const uint8_t *srcp, ptrdiff_t mref,
|
||||
ptrdiff_t pref, int clip_max);
|
||||
|
||||
av_cold void ff_interlace_init_x86(InterlaceContext *s)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
if (s->lowpass == VLPF_LIN)
|
||||
s->lowpass_line = ff_lowpass_line_sse2;
|
||||
else if (s->lowpass == VLPF_CMP)
|
||||
s->lowpass_line = ff_lowpass_line_complex_sse2;
|
||||
if (s->csp->comp[0].depth > 8) {
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
if (s->lowpass == VLPF_LIN)
|
||||
s->lowpass_line = ff_lowpass_line_16_sse2;
|
||||
else if (s->lowpass == VLPF_CMP)
|
||||
s->lowpass_line = ff_lowpass_line_complex_12_sse2;
|
||||
}
|
||||
if (EXTERNAL_AVX(cpu_flags))
|
||||
if (s->lowpass == VLPF_LIN)
|
||||
s->lowpass_line = ff_lowpass_line_16_avx;
|
||||
} else {
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
if (s->lowpass == VLPF_LIN)
|
||||
s->lowpass_line = ff_lowpass_line_sse2;
|
||||
else if (s->lowpass == VLPF_CMP)
|
||||
s->lowpass_line = ff_lowpass_line_complex_sse2;
|
||||
}
|
||||
if (EXTERNAL_AVX(cpu_flags))
|
||||
if (s->lowpass == VLPF_LIN)
|
||||
s->lowpass_line = ff_lowpass_line_avx;
|
||||
}
|
||||
if (EXTERNAL_AVX(cpu_flags))
|
||||
if (s->lowpass == VLPF_LIN)
|
||||
s->lowpass_line = ff_lowpass_line_avx;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,27 +28,50 @@
|
|||
#include "libavfilter/tinterlace.h"
|
||||
|
||||
void ff_lowpass_line_sse2(uint8_t *dstp, ptrdiff_t linesize,
|
||||
const uint8_t *srcp,
|
||||
ptrdiff_t mref, ptrdiff_t pref);
|
||||
const uint8_t *srcp, ptrdiff_t mref,
|
||||
ptrdiff_t pref, int clip_max);
|
||||
void ff_lowpass_line_avx (uint8_t *dstp, ptrdiff_t linesize,
|
||||
const uint8_t *srcp,
|
||||
ptrdiff_t mref, ptrdiff_t pref);
|
||||
const uint8_t *srcp, ptrdiff_t mref,
|
||||
ptrdiff_t pref, int clip_max);
|
||||
|
||||
void ff_lowpass_line_16_sse2(uint8_t *dstp, ptrdiff_t linesize,
|
||||
const uint8_t *srcp, ptrdiff_t mref,
|
||||
ptrdiff_t pref, int clip_max);
|
||||
void ff_lowpass_line_16_avx (uint8_t *dstp, ptrdiff_t linesize,
|
||||
const uint8_t *srcp, ptrdiff_t mref,
|
||||
ptrdiff_t pref, int clip_max);
|
||||
|
||||
void ff_lowpass_line_complex_sse2(uint8_t *dstp, ptrdiff_t linesize,
|
||||
const uint8_t *srcp,
|
||||
ptrdiff_t mref, ptrdiff_t pref);
|
||||
const uint8_t *srcp, ptrdiff_t mref,
|
||||
ptrdiff_t pref, int clip_max);
|
||||
|
||||
void ff_lowpass_line_complex_12_sse2(uint8_t *dstp, ptrdiff_t linesize,
|
||||
const uint8_t *srcp, ptrdiff_t mref,
|
||||
ptrdiff_t pref, int clip_max);
|
||||
|
||||
av_cold void ff_tinterlace_init_x86(TInterlaceContext *s)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
if (!(s->flags & TINTERLACE_FLAG_CVLPF))
|
||||
s->lowpass_line = ff_lowpass_line_sse2;
|
||||
else
|
||||
s->lowpass_line = ff_lowpass_line_complex_sse2;
|
||||
if (s->csp->comp[0].depth > 8) {
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
if (!(s->flags & TINTERLACE_FLAG_CVLPF))
|
||||
s->lowpass_line = ff_lowpass_line_16_sse2;
|
||||
else
|
||||
s->lowpass_line = ff_lowpass_line_complex_12_sse2;
|
||||
}
|
||||
if (EXTERNAL_AVX(cpu_flags))
|
||||
if (!(s->flags & TINTERLACE_FLAG_CVLPF))
|
||||
s->lowpass_line = ff_lowpass_line_16_avx;
|
||||
} else {
|
||||
if (EXTERNAL_SSE2(cpu_flags)) {
|
||||
if (!(s->flags & TINTERLACE_FLAG_CVLPF))
|
||||
s->lowpass_line = ff_lowpass_line_sse2;
|
||||
else
|
||||
s->lowpass_line = ff_lowpass_line_complex_sse2;
|
||||
}
|
||||
if (EXTERNAL_AVX(cpu_flags))
|
||||
if (!(s->flags & TINTERLACE_FLAG_CVLPF))
|
||||
s->lowpass_line = ff_lowpass_line_avx;
|
||||
}
|
||||
if (EXTERNAL_AVX(cpu_flags))
|
||||
if (!(s->flags & TINTERLACE_FLAG_CVLPF))
|
||||
s->lowpass_line = ff_lowpass_line_avx;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue