Reduced memory requirements.

Buffer allocation is based on minimum level required for the
input resolution rather than the input max level.

Number of maximum context sets can be set to 1 to reduce
memory usage.

Added a macro ENC_MIN_PU_SIZE for minimum size of
inter prediction unit supported by encoder.

Changed the maximum constraint on number of MBs for NMB
processing to width in Mbs.

Change-Id: I5a9255e93935d90c13262681aafc772aedf8ae81
This commit is contained in:
Harinarayanan K K 2015-06-19 14:44:42 +05:30 committed by Marco Nelissen
parent 461adb94c9
commit 6cb6772805
8 changed files with 70 additions and 65 deletions

View file

@ -2502,7 +2502,7 @@ static WORD32 ih264e_init(codec_t *ps_codec)
{
WORD32 max_mb_rows = ps_cfg->i4_ht_mbs;
WORD32 num_jobs = max_mb_rows * 2;
WORD32 num_jobs = max_mb_rows * MAX_CTXT_SETS;
WORD32 clz;
/* Use next power of two number of entries*/
@ -2674,8 +2674,6 @@ static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op)
/* error status */
IV_STATUS_T status = IV_SUCCESS;
/* profile / level info */
level = ps_ip->s_ive_ip.u4_max_level;
num_reorder_frames = ps_ip->s_ive_ip.u4_max_reorder_cnt;
num_ref_frames = ps_ip->s_ive_ip.u4_max_ref_cnt;
@ -2692,6 +2690,9 @@ static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op)
max_mb_cols = max_wd_luma / MB_SIZE;
max_mb_cnt = max_mb_rows * max_mb_cols;
/* profile / level info */
level = ih264e_get_min_level(max_ht_luma, max_wd_luma);
/* validate params */
if ((level < MIN_LEVEL) || (level > MAX_LEVEL))
{
@ -3062,7 +3063,7 @@ static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op)
{
/* One process job per row of MBs */
/* Allocate for two pictures, so that wrap around can be handled easily */
WORD32 num_jobs = max_mb_rows * 2;
WORD32 num_jobs = max_mb_rows * MAX_CTXT_SETS;
WORD32 job_queue_size = ih264_list_size(num_jobs, sizeof(job_t));
@ -3077,7 +3078,7 @@ static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op)
{
/* One process job per row of MBs */
/* Allocate for two pictures, so that wrap around can be handled easily */
WORD32 num_jobs = max_mb_rows * 2;
WORD32 num_jobs = max_mb_rows * MAX_CTXT_SETS;
WORD32 job_queue_size = ih264_list_size(num_jobs, sizeof(job_t));
@ -3464,9 +3465,9 @@ static WORD32 ih264e_fill_num_mem_rec(void *pv_api_ip, void *pv_api_op)
************************************************************************/
ps_mem_rec = &ps_mem_rec_base[MEM_REC_MB_INFO_NMB];
{
ps_mem_rec->u4_mem_size = MAX_PROCESS_CTXT * MAX_NMB
* (sizeof(mb_info_nmb_t)
+ MB_SIZE * MB_SIZE * sizeof(UWORD8));
ps_mem_rec->u4_mem_size = MAX_PROCESS_CTXT * max_mb_cols *
(sizeof(mb_info_nmb_t) + MB_SIZE * MB_SIZE
* sizeof(UWORD8));
}
DEBUG("\nMemory record Id %d = %d \n", MEM_REC_MB_INFO_NMB, ps_mem_rec->u4_mem_size);
@ -3641,7 +3642,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
if (i < MAX_PROCESS_CTXT / 2)
if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
/* base ptr */
UWORD8 *pu1_buf = ps_mem_rec->pv_base;
@ -3756,7 +3757,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
if (i < MAX_PROCESS_CTXT / 2)
if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
ps_codec->as_process[i].pv_pic_mb_coeff_data = pu1_buf;
ps_codec->as_process[i].s_entropy.pv_pic_mb_coeff_data =
@ -3794,7 +3795,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
if (i < MAX_PROCESS_CTXT / 2)
if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
ps_codec->as_process[i].pv_pic_mb_header_data = pu1_buf;
ps_codec->as_process[i].s_entropy.pv_pic_mb_header_data =
@ -3874,7 +3875,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
if (i < MAX_PROCESS_CTXT / 2)
if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
ps_codec->as_process[i].ps_slice_hdr_base = ps_mem_rec->pv_base;
}
@ -3896,7 +3897,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
if (i < MAX_PROCESS_CTXT / 2)
if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
ps_codec->as_process[i].pu1_is_intra_coded = pu1_buf;
}
@ -3921,7 +3922,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
if (i < MAX_PROCESS_CTXT / 2)
if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
ps_codec->as_process[i].pu1_slice_idx = pu1_buf_ping;
}
@ -3981,7 +3982,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
if (i < MAX_PROCESS_CTXT / 2)
if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
ps_codec->as_process[i].pu1_proc_map = pu1_buf + max_mb_cols;
}
@ -4012,7 +4013,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
if (i < MAX_PROCESS_CTXT / 2)
if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
ps_codec->as_process[i].pu1_deblk_map = pu1_buf + max_mb_cols;
@ -4042,7 +4043,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
if (i < MAX_PROCESS_CTXT / 2)
if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
ps_codec->as_process[i].pu1_me_map = pu1_buf + max_mb_cols;
}
@ -4238,7 +4239,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
if (i < MAX_PROCESS_CTXT / 2)
if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
ps_codec->as_process[i].ps_top_row_mb_syntax_ele_base =
(mb_info_t *) pu1_buf;
@ -4289,7 +4290,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
for (i = 0; i < MAX_PROCESS_CTXT; i++)
{
if (i < MAX_PROCESS_CTXT / 2)
if (i < MAX_PROCESS_CTXT / MAX_CTXT_SETS)
{
pu1_buf_ping = (UWORD8 *) ps_mem_rec->pv_base;
@ -4370,9 +4371,9 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
UWORD8 *pu1_buf = ps_mem_rec->pv_base;
/* size of nmb ctxt */
WORD32 size = MAX_NMB * sizeof(mb_info_nmb_t);
WORD32 size = max_mb_cols * sizeof(mb_info_nmb_t);
UWORD32 nmb_cntr, subpel_buf_size;
WORD32 nmb_cntr, subpel_buf_size;
/* init nmb info structure pointer in all proc ctxts */
for (i = 0; i < MAX_PROCESS_CTXT; i++)
@ -4390,7 +4391,7 @@ static WORD32 ih264e_init_mem_rec(iv_obj_t *ps_codec_obj,
mb_info_nmb_t* ps_mb_info_nmb =
&ps_codec->as_process[i].ps_nmb_info[0];
for (nmb_cntr = 0; nmb_cntr < MAX_NMB; nmb_cntr++)
for (nmb_cntr = 0; nmb_cntr < max_mb_cols; nmb_cntr++)
{
ps_mb_info_nmb[nmb_cntr].pu1_best_sub_pel_buf = pu1_buf;

View file

@ -139,6 +139,11 @@
*/
#define MAX_REF_CNT 32
/*****************************************************************************/
/* Minimum size of inter prediction unit supported by encoder */
/*****************************************************************************/
#define ENC_MIN_PU_SIZE 16
/*****************************************************************************/
/* Num cores releated defs */
/*****************************************************************************/
@ -156,7 +161,7 @@
* Maximum process context sets
* Used to stagger encoding of MAX_CTXT_SETS in parallel
*/
#define MAX_CTXT_SETS 2
#define MAX_CTXT_SETS 1
/**
* Maximum number of contexts
* Kept as twice the number of threads, to make it easier to initialize the contexts
@ -529,8 +534,6 @@ enum
#define MIN_RAW_BUFS_RGBA8888_COMP 1
#define MIN_RAW_BUFS_420SP_COMP 2
#define MAX_NMB 120
/** Maximum number of active config paramter sets */
#define MAX_ACTIVE_CONFIG_PARAMS 32

View file

@ -236,7 +236,7 @@ WORD32 ih264e_encode(iv_obj_t *ps_codec_obj, void *pv_api_ip, void *pv_api_op)
ps_codec->i4_encode_api_call_cnt += 1;
/* codec context selector */
ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
/* reset status flags */
ps_codec->ai4_pic_cnt[ctxt_sel] = -1;

View file

@ -90,6 +90,7 @@
#include "ih264e_encode_header.h"
#include "ih264_common_tables.h"
#include "ih264_macros.h"
#include "ih264e_utils.h"
/*****************************************************************************/
@ -686,17 +687,8 @@ IH264E_ERROR_T ih264e_populate_sps(codec_t *ps_codec, sps_t *ps_sps)
}
/* level */
ps_sps->u1_level_idc = ps_cfg->u4_max_level;
// i4_err_code = ih264e_get_level(ps_cfg, &level_idc);
// if (i4_err_code == IH264E_SUCCESS)
// {
// ps_sps->u1_level_idc = level_idc;
//
// }
// else
// {
// return i4_err_code;
// }
ps_sps->u1_level_idc = MAX(ps_cfg->u4_max_level,
(UWORD32)ih264e_get_min_level(ps_cfg->u4_max_wd, ps_cfg->u4_max_ht));
/* constrained flags */
/*

View file

@ -138,7 +138,7 @@
IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
{
/* choose between ping-pong process buffer set */
WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
/* entropy ctxt */
entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
@ -308,7 +308,7 @@ IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
UWORD8 *pu1_entropy_map_curr;
/* proc base idx */
WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1;
WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
/* temp var */
WORD32 i4_wd_mbs, i4_ht_mbs;
@ -1037,7 +1037,7 @@ WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
s_job.i2_mb_y = ps_proc->i4_mb_y;
/* proc base idx */
s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt & 1) ? (MAX_PROCESS_CTXT / 2): 0 ;
s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0;
/* queue the job */
error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
@ -1182,8 +1182,8 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
i4_mb_y = ps_proc->i4_mb_y;
/* Number of mbs processed in one loop of process function */
ps_proc->i4_nmb_ntrpy = (ps_proc->i4_wd_mbs > MAX_NMB) ? MAX_NMB : ps_proc->i4_wd_mbs;
ps_proc->u4_nmb_me = (ps_proc->i4_wd_mbs > MAX_NMB)? MAX_NMB : ps_proc->i4_wd_mbs;
ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs;
ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs;
/* init buffer pointers */
convert_uv_only = 1;
@ -1371,10 +1371,12 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
/*********************************************************************/
/* init mv buffer ptr */
ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
/* Init co-located mv buffer */
ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
if (i4_mb_y == 0)
{
@ -1382,7 +1384,8 @@ IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
}
else
{
ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs * (MIN_PU_SIZE * MIN_PU_SIZE));
ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs *
((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
}
ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
@ -1911,7 +1914,7 @@ WORD32 ih264e_process(process_ctxt_t *ps_proc)
WORD32 luma_idx, chroma_idx, is_intra;
/* temp variables */
WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt & 1;
WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
/* list of modes for evaluation */
if (ps_proc->i4_slice_type == ISLICE)
@ -2435,7 +2438,7 @@ WORD32 ih264e_process_thread(void *pv_proc)
int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
/* codec context selector */
WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];

View file

@ -331,7 +331,7 @@ WORD32 ih264e_input_queue_update(codec_t *ps_codec,
/* Mark the skip flag */
i4_skip = 0;
ctxt_sel = ps_codec->i4_encode_api_call_cnt & 0x01;
ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
ps_codec->s_rate_control.pre_encode_skip[ctxt_sel] = i4_skip;
/* Get a buffer to encode */
@ -375,13 +375,15 @@ WORD32 ih264e_input_queue_update(codec_t *ps_codec,
*
*******************************************************************************
*/
WORD32 ih264e_get_min_level(WORD32 pic_size)
WORD32 ih264e_get_min_level(WORD32 wd, WORD32 ht)
{
WORD32 lvl_idx = MAX_LEVEL, i;
WORD32 pic_size = wd * ht;
WORD32 max = MAX(wd, ht);
for (i = 0; i < MAX_LEVEL; i++)
{
if (pic_size <= gai4_ih264_max_luma_pic_size[i])
if ((pic_size <= gai4_ih264_max_luma_pic_size[i]) &&
(max <= gai4_ih264_max_wd_ht[i]))
{
lvl_idx = i;
break;
@ -645,7 +647,7 @@ WORD32 ih264e_get_pic_mv_bank_size(WORD32 num_luma_samples)
WORD32 mv_bank_size = 0;
/* number of sub mb partitions possible */
WORD32 num_pu = num_luma_samples / (MIN_PU_SIZE * MIN_PU_SIZE);
WORD32 num_pu = num_luma_samples / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE);
/* number of mbs */
WORD32 num_mb = num_luma_samples / (MB_SIZE * MB_SIZE);
@ -655,10 +657,10 @@ WORD32 ih264e_get_pic_mv_bank_size(WORD32 num_luma_samples)
mv_bank_size += num_mb * sizeof(WORD32);
/* Size for pu_map */
mv_bank_size += num_pu;
mv_bank_size += ALIGN4(num_pu);
/* Size for storing enc_pu_t for each PU */
mv_bank_size += num_pu * sizeof(enc_pu_t);
mv_bank_size += ALIGN4(num_pu * sizeof(enc_pu_t));
return mv_bank_size;
}
@ -789,7 +791,7 @@ IH264E_ERROR_T ih264e_mv_buf_mgr_add_bufs(codec_t *ps_codec)
/* num of luma samples */
WORD32 num_luma_samples = ALIGN16(ps_codec->s_cfg.u4_wd)
* ALIGN16(ps_codec->s_cfg.u4_ht);
* ALIGN16(ps_codec->s_cfg.u4_ht);
/* number of mb's & frame partitions */
WORD32 num_pu, num_mb;
@ -815,7 +817,7 @@ IH264E_ERROR_T ih264e_mv_buf_mgr_add_bufs(codec_t *ps_codec)
/* compute MV bank size per picture */
pic_mv_bank_size = ih264e_get_pic_mv_bank_size(num_luma_samples);
num_pu = num_luma_samples / (MIN_PU_SIZE * MIN_PU_SIZE);
num_pu = num_luma_samples / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE);
num_mb = num_luma_samples / (MB_SIZE * MB_SIZE);
i = 0;
ps_mv_buf = ps_codec->pv_mv_bank_buf_base;
@ -834,11 +836,13 @@ IH264E_ERROR_T ih264e_mv_buf_mgr_add_bufs(codec_t *ps_codec)
}
ps_mv_buf->pu4_mb_pu_cnt = (UWORD32 *) pu1_buf;
pu1_buf += num_mb * sizeof(WORD32);
ps_mv_buf->pu1_pic_pu_map = (pu1_buf + num_mb * sizeof(WORD32));
ps_mv_buf->pu1_pic_pu_map = pu1_buf;
pu1_buf += ALIGN4(num_pu);
ps_mv_buf->ps_pic_pu = (enc_pu_t *) (pu1_buf + num_mb * sizeof(WORD32)
+ num_pu);
ps_mv_buf->ps_pic_pu = (enc_pu_t *) (pu1_buf);
pu1_buf += ALIGN4(num_pu * sizeof(enc_pu_t));
ret = ih264_buf_mgr_add((buf_mgr_t *) ps_codec->pv_mv_buf_mgr,
ps_mv_buf, i);
@ -850,7 +854,6 @@ IH264E_ERROR_T ih264e_mv_buf_mgr_add_bufs(codec_t *ps_codec)
return error_status;
}
pu1_buf += pic_mv_bank_size;
ps_mv_buf++;
i++;
}
@ -1321,7 +1324,7 @@ IH264E_ERROR_T ih264e_pic_init(codec_t *ps_codec, inp_buf_t *ps_inp_buf)
UWORD32 u4_timestamp_low = ps_inp_buf->u4_timestamp_low;
/* indices to access curr/prev frame info */
WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt & 1;
WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
/* curr pic type */
PIC_TYPE_T *pic_type = &ps_codec->pic_type;

View file

@ -91,8 +91,11 @@ WORD32 ih264e_input_queue_update(codec_t *ps_codec,
* Gets the minimum level index and then gets corresponding level.
* Also used to ignore invalid levels like 2.3, 3.3 etc
*
* @param[in] level
* Level of the stream
* @param[in] wd
* Width
*
* @param[in] ht
* Height
*
* @returns Level index for a given level
*
@ -100,7 +103,7 @@ WORD32 ih264e_input_queue_update(codec_t *ps_codec,
*
*******************************************************************************
*/
WORD32 ih264e_get_min_level(WORD32 pic_size);
WORD32 ih264e_get_min_level(WORD32 wd, WORD32 ht);
/**
*******************************************************************************

View file

@ -161,7 +161,7 @@ void init_raw_buf_descr(app_ctxt_t *ps_app_ctxt, iv_raw_buf_t *ps_raw_buf, UWORD
/* All the pointers and dimensions are initialized here
* to support change in resolution from the application */
luma_size = ALIGN16(ps_app_ctxt->u4_max_wd) * ALIGN16(ps_app_ctxt->u4_max_ht);
luma_size = ps_app_ctxt->u4_max_wd * ps_app_ctxt->u4_max_ht;
chroma_size = (luma_size) / 4;
ps_raw_buf->apv_bufs[0] = pu1_buf;