avcodec/lagarith: switch to planar rgb
Speed goes from 363 fps to 428 fps for 640x480 video.
This commit is contained in:
parent
fb9d4f5c3e
commit
b2ffecbd0c
8 changed files with 118 additions and 164 deletions
|
|
@ -53,9 +53,6 @@ typedef struct LagarithContext {
|
|||
LLVidDSPContext llviddsp;
|
||||
int zeros; /**< number of consecutive zero bytes encountered */
|
||||
int zeros_rem; /**< number of zero bytes remaining to output */
|
||||
uint8_t *rgb_planes;
|
||||
int rgb_planes_allocated;
|
||||
int rgb_stride;
|
||||
} LagarithContext;
|
||||
|
||||
/**
|
||||
|
|
@ -544,7 +541,7 @@ static int lag_decode_frame(AVCodecContext *avctx,
|
|||
uint8_t frametype;
|
||||
uint32_t offset_gu = 0, offset_bv = 0, offset_ry = 9;
|
||||
uint32_t offs[4];
|
||||
uint8_t *srcs[4], *dst;
|
||||
uint8_t *srcs[4];
|
||||
int i, j, planes = 3;
|
||||
int ret;
|
||||
|
||||
|
|
@ -557,70 +554,60 @@ static int lag_decode_frame(AVCodecContext *avctx,
|
|||
|
||||
switch (frametype) {
|
||||
case FRAME_SOLID_RGBA:
|
||||
avctx->pix_fmt = AV_PIX_FMT_RGB32;
|
||||
avctx->pix_fmt = AV_PIX_FMT_GBRAP;
|
||||
case FRAME_SOLID_GRAY:
|
||||
if (frametype == FRAME_SOLID_GRAY)
|
||||
if (avctx->bits_per_coded_sample == 24) {
|
||||
avctx->pix_fmt = AV_PIX_FMT_RGB24;
|
||||
avctx->pix_fmt = AV_PIX_FMT_GBRP;
|
||||
} else {
|
||||
avctx->pix_fmt = AV_PIX_FMT_0RGB32;
|
||||
avctx->pix_fmt = AV_PIX_FMT_GBRAP;
|
||||
planes = 4;
|
||||
}
|
||||
|
||||
if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
|
||||
return ret;
|
||||
|
||||
dst = p->data[0];
|
||||
if (frametype == FRAME_SOLID_RGBA) {
|
||||
int qwidth = avctx->width>>2;
|
||||
uint64_t c = ((uint64_t)offset_gu << 32) | offset_gu;
|
||||
for (j = 0; j < avctx->height; j++) {
|
||||
for (i = 0; i < qwidth; i++) {
|
||||
AV_WN64(dst + i * 16 , c);
|
||||
AV_WN64(dst + i * 16 + 8, c);
|
||||
for (i = 0; i < avctx->height; i++) {
|
||||
memset(p->data[0] + i * p->linesize[0], buf[2], avctx->width);
|
||||
memset(p->data[1] + i * p->linesize[1], buf[1], avctx->width);
|
||||
memset(p->data[2] + i * p->linesize[2], buf[3], avctx->width);
|
||||
memset(p->data[3] + i * p->linesize[3], buf[4], avctx->width);
|
||||
}
|
||||
for (i = 4*qwidth; i < avctx->width; i++)
|
||||
AV_WN32(dst + i * 4, offset_gu);
|
||||
dst += p->linesize[0];
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < avctx->height; j++) {
|
||||
memset(dst, buf[1], avctx->width * planes);
|
||||
dst += p->linesize[0];
|
||||
for (i = 0; i < avctx->height; i++) {
|
||||
for (j = 0; j < planes; j++)
|
||||
memset(p->data[j] + i * p->linesize[j], buf[1], avctx->width);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case FRAME_SOLID_COLOR:
|
||||
if (avctx->bits_per_coded_sample == 24) {
|
||||
avctx->pix_fmt = AV_PIX_FMT_RGB24;
|
||||
avctx->pix_fmt = AV_PIX_FMT_GBRP;
|
||||
} else {
|
||||
avctx->pix_fmt = AV_PIX_FMT_RGB32;
|
||||
offset_gu |= 0xFFU << 24;
|
||||
avctx->pix_fmt = AV_PIX_FMT_GBRAP;
|
||||
}
|
||||
|
||||
if ((ret = ff_thread_get_buffer(avctx, &frame,0)) < 0)
|
||||
return ret;
|
||||
|
||||
dst = p->data[0];
|
||||
for (j = 0; j < avctx->height; j++) {
|
||||
for (i = 0; i < avctx->width; i++)
|
||||
if (avctx->bits_per_coded_sample == 24) {
|
||||
AV_WB24(dst + i * 3, offset_gu);
|
||||
} else {
|
||||
AV_WN32(dst + i * 4, offset_gu);
|
||||
}
|
||||
dst += p->linesize[0];
|
||||
for (i = 0; i < avctx->height; i++) {
|
||||
memset(p->data[0] + i * p->linesize[0], buf[2], avctx->width);
|
||||
memset(p->data[1] + i * p->linesize[1], buf[1], avctx->width);
|
||||
memset(p->data[2] + i * p->linesize[2], buf[3], avctx->width);
|
||||
if (avctx->pix_fmt == AV_PIX_FMT_GBRAP)
|
||||
memset(p->data[3] + i * p->linesize[3], 0xFFu, avctx->width);
|
||||
}
|
||||
break;
|
||||
case FRAME_ARITH_RGBA:
|
||||
avctx->pix_fmt = AV_PIX_FMT_RGB32;
|
||||
avctx->pix_fmt = AV_PIX_FMT_GBRAP;
|
||||
planes = 4;
|
||||
offset_ry += 4;
|
||||
offs[3] = AV_RL32(buf + 9);
|
||||
case FRAME_ARITH_RGB24:
|
||||
case FRAME_U_RGB24:
|
||||
if (frametype == FRAME_ARITH_RGB24 || frametype == FRAME_U_RGB24)
|
||||
avctx->pix_fmt = AV_PIX_FMT_RGB24;
|
||||
avctx->pix_fmt = AV_PIX_FMT_GBRP;
|
||||
|
||||
if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
|
||||
return ret;
|
||||
|
|
@ -629,15 +616,8 @@ static int lag_decode_frame(AVCodecContext *avctx,
|
|||
offs[1] = offset_gu;
|
||||
offs[2] = offset_ry;
|
||||
|
||||
l->rgb_stride = FFALIGN(avctx->width, 16);
|
||||
av_fast_malloc(&l->rgb_planes, &l->rgb_planes_allocated,
|
||||
l->rgb_stride * avctx->height * planes + 1);
|
||||
if (!l->rgb_planes) {
|
||||
av_log(avctx, AV_LOG_ERROR, "cannot allocate temporary buffer\n");
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
for (i = 0; i < planes; i++)
|
||||
srcs[i] = l->rgb_planes + (i + 1) * l->rgb_stride * avctx->height - l->rgb_stride;
|
||||
srcs[i] = p->data[i] + (avctx->height - 1) * p->linesize[i];
|
||||
for (i = 0; i < planes; i++)
|
||||
if (buf_size <= offs[i]) {
|
||||
av_log(avctx, AV_LOG_ERROR,
|
||||
|
|
@ -648,32 +628,16 @@ static int lag_decode_frame(AVCodecContext *avctx,
|
|||
for (i = 0; i < planes; i++)
|
||||
lag_decode_arith_plane(l, srcs[i],
|
||||
avctx->width, avctx->height,
|
||||
-l->rgb_stride, buf + offs[i],
|
||||
-p->linesize[i], buf + offs[i],
|
||||
buf_size - offs[i]);
|
||||
dst = p->data[0];
|
||||
for (i = 0; i < planes; i++)
|
||||
srcs[i] = l->rgb_planes + i * l->rgb_stride * avctx->height;
|
||||
for (j = 0; j < avctx->height; j++) {
|
||||
for (i = 0; i < avctx->width; i++) {
|
||||
uint8_t r, g, b, a;
|
||||
r = srcs[0][i];
|
||||
g = srcs[1][i];
|
||||
b = srcs[2][i];
|
||||
r += g;
|
||||
b += g;
|
||||
if (frametype == FRAME_ARITH_RGBA) {
|
||||
a = srcs[3][i];
|
||||
AV_WN32(dst + i * 4, MKBETAG(a, r, g, b));
|
||||
} else {
|
||||
dst[i * 3 + 0] = r;
|
||||
dst[i * 3 + 1] = g;
|
||||
dst[i * 3 + 2] = b;
|
||||
}
|
||||
}
|
||||
dst += p->linesize[0];
|
||||
for (i = 0; i < planes; i++)
|
||||
srcs[i] += l->rgb_stride;
|
||||
for (i = 0; i < avctx->height; i++) {
|
||||
l->llviddsp.add_bytes(p->data[0] + i * p->linesize[0], p->data[1] + i * p->linesize[1], avctx->width);
|
||||
l->llviddsp.add_bytes(p->data[2] + i * p->linesize[2], p->data[1] + i * p->linesize[1], avctx->width);
|
||||
}
|
||||
FFSWAP(uint8_t*, p->data[0], p->data[1]);
|
||||
FFSWAP(int, p->linesize[0], p->linesize[1]);
|
||||
FFSWAP(uint8_t*, p->data[2], p->data[1]);
|
||||
FFSWAP(int, p->linesize[2], p->linesize[1]);
|
||||
break;
|
||||
case FRAME_ARITH_YUY2:
|
||||
avctx->pix_fmt = AV_PIX_FMT_YUV422P;
|
||||
|
|
@ -757,15 +721,6 @@ static av_cold int lag_decode_init_thread_copy(AVCodecContext *avctx)
|
|||
}
|
||||
#endif
|
||||
|
||||
static av_cold int lag_decode_end(AVCodecContext *avctx)
|
||||
{
|
||||
LagarithContext *l = avctx->priv_data;
|
||||
|
||||
av_freep(&l->rgb_planes);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
AVCodec ff_lagarith_decoder = {
|
||||
.name = "lagarith",
|
||||
.long_name = NULL_IF_CONFIG_SMALL("Lagarith lossless"),
|
||||
|
|
@ -774,7 +729,6 @@ AVCodec ff_lagarith_decoder = {
|
|||
.priv_data_size = sizeof(LagarithContext),
|
||||
.init = lag_decode_init,
|
||||
.init_thread_copy = ONLY_IF_THREADS_ENABLED(lag_decode_init_thread_copy),
|
||||
.close = lag_decode_end,
|
||||
.decode = lag_decode_frame,
|
||||
.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue