libhevc-mirror/decoder/ihevcd_iquant_itrans_recon_ctb.c
Ram Mohan M e0aec79c0d
Some checks are pending
CMake / build (cmake, aarch64-linux-gnu-gcc, -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/aarch64_toolchain.cmake, aarch64-linux-gnu-g++, ubuntu-latest-cross-aarch64-cmake, ubuntu-latest) (push) Waiting to run
CMake / build (cmake, arm-linux-gnueabihf-gcc, -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/aarch32_toolchain.cmake, arm-linux-gnueabihf-g++, ubuntu-latest-cross-aarch32-cmake, ubuntu-latest) (push) Waiting to run
CMake / build (cmake, clang, , clang++, macos-latest-clang-cmake, macos-latest) (push) Waiting to run
CMake / build (cmake, clang, , clang++, ubuntu-24.04-arm-clang-cmake, ubuntu-24.04-arm) (push) Waiting to run
CMake / build (cmake, clang, , clang++, ubuntu-latest-clang-cmake, ubuntu-latest) (push) Waiting to run
CMake / build (cmake, clang, -DSANITIZE=fuzzer-no-link,address, clang++, ubuntu-latest-clang-cmake-asan-fuzzer, ubuntu-latest) (push) Waiting to run
CMake / build (cmake, clang, -G Ninja, clang++, ubuntu-latest-clang-cmake-ninja, ubuntu-latest) (push) Waiting to run
CMake / build (cmake, gcc, , g++, ubuntu-latest-gcc-cmake, ubuntu-latest) (push) Waiting to run
hevcdec: add support for explicit rdpcm
Test: ./hevcdec

Change-Id: I89365bb544c3a9dc6ffc25f86f3cbf568dd2ecf4
2026-03-06 14:33:49 -08:00

1604 lines
68 KiB
C

/******************************************************************************
*
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
/**
*******************************************************************************
* @file
* ihevcd_iquant_itrans_recon_ctb.c
*
* @brief
* Contains functions for inverse quantization, inverse transform and recon
*
* @author
* Ittiam
*
* @par List of Functions:
* - ihevcd_iquant_itrans_recon_ctb()
*
* @remarks
* None
*
*******************************************************************************
*/
/*****************************************************************************/
/* File Includes */
/*****************************************************************************/
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include "ihevc_typedefs.h"
#include "iv.h"
#include "ivd.h"
#include "ihevcd_cxa.h"
#include "ihevc_defs.h"
#include "ihevc_debug.h"
#include "ihevc_structs.h"
#include "ihevc_cabac_tables.h"
#include "ihevc_macros.h"
#include "ihevc_platform_macros.h"
#include "ihevcd_defs.h"
#include "ihevcd_function_selector.h"
#include "ihevcd_structs.h"
#include "ihevcd_error.h"
#include "ihevcd_bitstream.h"
#include "ihevc_common_tables.h"
/* Intra pred includes */
#include "ihevc_intra_pred.h"
/* Inverse transform common module includes */
#include "ihevc_trans_tables.h"
#include "ihevc_trans_macros.h"
#include "ihevc_itrans_recon.h"
#include "ihevc_recon.h"
#include "ihevc_chroma_itrans_recon.h"
#include "ihevc_chroma_recon.h"
/* Decoder includes */
#include "ihevcd_common_tables.h"
#include "ihevcd_iquant_itrans_recon_ctb.h"
#include "ihevcd_debug.h"
#include "ihevcd_profile.h"
#include "ihevcd_statistics.h"
#include "ihevcd_itrans_recon_dc.h"
/* Globals */
static const WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES] =
{ IP_FUNC_MODE_0, /* Mode 0 */
IP_FUNC_MODE_1, /* Mode 1 */
IP_FUNC_MODE_2, /* Mode 2 */
IP_FUNC_MODE_3TO9, /* Mode 3 */
IP_FUNC_MODE_3TO9, /* Mode 4 */
IP_FUNC_MODE_3TO9, /* Mode 5 */
IP_FUNC_MODE_3TO9, /* Mode 6 */
IP_FUNC_MODE_3TO9, /* Mode 7 */
IP_FUNC_MODE_3TO9, /* Mode 8 */
IP_FUNC_MODE_3TO9, /* Mode 9 */
IP_FUNC_MODE_10, /* Mode 10 */
IP_FUNC_MODE_11TO17, /* Mode 11 */
IP_FUNC_MODE_11TO17, /* Mode 12 */
IP_FUNC_MODE_11TO17, /* Mode 13 */
IP_FUNC_MODE_11TO17, /* Mode 14 */
IP_FUNC_MODE_11TO17, /* Mode 15 */
IP_FUNC_MODE_11TO17, /* Mode 16 */
IP_FUNC_MODE_11TO17, /* Mode 17 */
IP_FUNC_MODE_18_34, /* Mode 18 */
IP_FUNC_MODE_19TO25, /* Mode 19 */
IP_FUNC_MODE_19TO25, /* Mode 20 */
IP_FUNC_MODE_19TO25, /* Mode 21 */
IP_FUNC_MODE_19TO25, /* Mode 22 */
IP_FUNC_MODE_19TO25, /* Mode 23 */
IP_FUNC_MODE_19TO25, /* Mode 24 */
IP_FUNC_MODE_19TO25, /* Mode 25 */
IP_FUNC_MODE_26, /* Mode 26 */
IP_FUNC_MODE_27TO33, /* Mode 27 */
IP_FUNC_MODE_27TO33, /* Mode 26 */
IP_FUNC_MODE_27TO33, /* Mode 29 */
IP_FUNC_MODE_27TO33, /* Mode 30 */
IP_FUNC_MODE_27TO33, /* Mode 31 */
IP_FUNC_MODE_27TO33, /* Mode 32 */
IP_FUNC_MODE_27TO33, /* Mode 33 */
IP_FUNC_MODE_18_34, /* Mode 34 */
};
const WORD16 *g_ai2_ihevc_trans_tables[] =
{ &g_ai2_ihevc_trans_dst_4[0][0],
&g_ai2_ihevc_trans_4[0][0],
&g_ai2_ihevc_trans_8[0][0],
&g_ai2_ihevc_trans_16[0][0],
&g_ai2_ihevc_trans_32[0][0]
};
/*****************************************************************************/
/* Structures */
/*****************************************************************************/
/**
* Structure to hold fields required for iq it recon construction process
*/
typedef struct
{
/*
* parsed transform coeffs
*/
WORD16 *pi2_tu_coeff;
/**
* pred buffer
*/
UWORD8 *pu1_pred;
/**
* recon buffer
*/
UWORD8 *pu1_dst;
/**
* transform coeffs buffer stride
*/
WORD32 tu_coeff_stride;
/**
* pred buffer stride
*/
WORD32 pred_strd;
/**
* recon buffer stride
*/
WORD32 dst_strd;
/**
* zero cols, zero rows for optimizing itrans process
*/
UWORD32 zero_cols;
UWORD32 zero_rows;
/**
* dc only? for optimizing itrans process
*/
UWORD32 coeff_type;
WORD16 coeff_value;
/**
* cbf
*/
UWORD8 cbf;
/**
* is transform skip
*/
UWORD8 transform_skip_flag;
#ifdef ENABLE_MAIN_REXT_PROFILE
/**
* is explicit rdpcm enabled
*/
UWORD8 explicit_rdpcm_flag;
/**
* explicit rdpcm dir
*/
UWORD8 explicit_rdpcm_dir;
#endif
} tu_plane_iq_it_recon_ctxt_t;
/*****************************************************************************/
/* Function Prototypes */
/*****************************************************************************/
typedef void (*PF_IQITRECON_PLANE)(process_ctxt_t *ps_proc,
tu_t *ps_tu,
tu_plane_iq_it_recon_ctxt_t *ps_pl_tu_ctxt,
WORD32 func_idx,
WORD32 log2_trans_size,
CHROMA_PLANE_ID_T chroma_plane,
WORD8 intra_flag,
WORD8 intra_pred_mode);
/* Returns number of ai2_level read from ps_sblk_coeff */
UWORD8* ihevcd_unpack_coeffs(WORD16 *pi2_tu_coeff,
WORD32 log2_trans_size,
UWORD8 *pu1_tu_coeff_data,
WORD16 *pi2_dequant_matrix,
WORD32 qp_rem,
WORD32 qp_div,
TRANSFORM_TYPE e_trans_type,
WORD32 trans_quant_bypass,
UWORD32 *pu4_zero_cols,
UWORD32 *pu4_zero_rows,
UWORD32 *pu4_coeff_type,
WORD16 *pi2_coeff_value)
{
/* Generating coeffs from coeff-map */
WORD32 i;
WORD16 *pi2_sblk_ptr;
WORD32 subblk_pos_x, subblk_pos_y;
WORD32 sblk_scan_idx, coeff_raster_idx;
WORD32 sblk_non_zero_coeff_idx;
tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data;
UWORD8 u1_num_coded_sblks, u1_scan_type;
UWORD8 *pu1_new_tu_coeff_data;
WORD32 trans_size;
WORD32 xs, ys;
WORD32 trans_skip;
WORD16 iquant_out;
WORD32 shift_iq;
{
WORD32 bit_depth;
bit_depth = 8 + 0;
shift_iq = bit_depth + log2_trans_size - 5;
}
trans_size = (1 << log2_trans_size);
/* First byte points to number of coded blocks */
u1_num_coded_sblks = *pu1_tu_coeff_data++;
/* Next byte points to scan type */
u1_scan_type = *pu1_tu_coeff_data++;
/* 0th bit has trans_skip */
trans_skip = u1_scan_type & 1;
#ifdef ENABLE_MAIN_REXT_PROFILE
u1_scan_type = (u1_scan_type & 0xF) >> 1;
#else
u1_scan_type >>= 1;
#endif
pi2_sblk_ptr = pi2_tu_coeff;
/* Initially all columns are assumed to be zero */
*pu4_zero_cols = 0xFFFFFFFF;
/* Initially all rows are assumed to be zero */
*pu4_zero_rows = 0xFFFFFFFF;
ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)(pu1_tu_coeff_data);
if(trans_skip)
memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16));
STATS_INIT_SBLK_AND_COEFF_POS();
/* DC only case */
if((e_trans_type != DST_4x4) && (1 == u1_num_coded_sblks)
&& (0 == ps_tu_sblk_coeff_data->u2_subblk_pos)
&& (1 == ps_tu_sblk_coeff_data->u2_sig_coeff_map))
{
*pu4_coeff_type = 1;
if(!trans_quant_bypass)
{
if(4 == trans_size)
{
IQUANT_4x4(iquant_out,
ps_tu_sblk_coeff_data->ai2_level[0],
pi2_dequant_matrix[0]
* g_ihevc_iquant_scales[qp_rem],
shift_iq, qp_div);
}
else
{
IQUANT(iquant_out, ps_tu_sblk_coeff_data->ai2_level[0],
pi2_dequant_matrix[0] * g_ihevc_iquant_scales[qp_rem],
shift_iq, qp_div);
}
if(trans_skip)
iquant_out = (iquant_out + 16) >> 5;
}
else
{
/* setting the column to zero */
for(i = 0; i < trans_size; i++)
*(pi2_tu_coeff + i * trans_size) = 0;
iquant_out = ps_tu_sblk_coeff_data->ai2_level[0];
}
*pi2_coeff_value = iquant_out;
*pi2_tu_coeff = iquant_out;
*pu4_zero_cols &= ~0x1;
*pu4_zero_rows &= ~0x1;
ps_tu_sblk_coeff_data =
(void *)&ps_tu_sblk_coeff_data->ai2_level[1];
STATS_UPDATE_COEFF_COUNT();
STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass), 0, 0);
STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip));
return ((UWORD8 *)ps_tu_sblk_coeff_data);
}
else
{
*pu4_coeff_type = 0;
/* In case of trans skip, memset has already happened */
if(!trans_skip)
memset(pi2_tu_coeff, 0, trans_size * trans_size * sizeof(WORD16));
}
for(i = 0; i < u1_num_coded_sblks; i++)
{
UWORD32 u4_sig_coeff_map;
subblk_pos_x = ps_tu_sblk_coeff_data->u2_subblk_pos & 0x00FF;
subblk_pos_y = (ps_tu_sblk_coeff_data->u2_subblk_pos & 0xFF00) >> 8;
STATS_LAST_SBLK_POS_UPDATE(e_trans_type, (trans_skip || trans_quant_bypass), subblk_pos_x, subblk_pos_y);
subblk_pos_x = subblk_pos_x * MIN_TU_SIZE;
subblk_pos_y = subblk_pos_y * MIN_TU_SIZE;
pi2_sblk_ptr = pi2_tu_coeff + subblk_pos_y * trans_size
+ subblk_pos_x;
//*pu4_zero_cols &= ~(0xF << subblk_pos_x);
sblk_non_zero_coeff_idx = 0;
u4_sig_coeff_map = ps_tu_sblk_coeff_data->u2_sig_coeff_map;
//for(sblk_scan_idx = (31 - CLZ(u4_sig_coeff_map)); sblk_scan_idx >= 0; sblk_scan_idx--)
sblk_scan_idx = 31;
do
{
WORD32 clz = CLZ(u4_sig_coeff_map);
sblk_scan_idx -= clz;
/* when clz is 31, u4_sig_coeff_map << (clz+1) might result in unknown behaviour in some cases */
/* Hence either use SHL which takes care of handling these issues based on platform or shift in two stages */
u4_sig_coeff_map = u4_sig_coeff_map << clz;
/* Copying coeffs and storing in reverse order */
{
STATS_UPDATE_COEFF_COUNT();
coeff_raster_idx =
gau1_ihevc_invscan4x4[u1_scan_type][sblk_scan_idx];
xs = coeff_raster_idx & 0x3;
ys = coeff_raster_idx >> 2;
if(!trans_quant_bypass)
{
if(4 == trans_size)
{
IQUANT_4x4(iquant_out,
ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx],
pi2_dequant_matrix[(subblk_pos_x + xs)
+ (subblk_pos_y + ys)
* trans_size]
* g_ihevc_iquant_scales[qp_rem],
shift_iq, qp_div);
sblk_non_zero_coeff_idx++;
}
else
{
IQUANT(iquant_out,
ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx],
pi2_dequant_matrix[(subblk_pos_x + xs)
+ (subblk_pos_y + ys)
* trans_size]
* g_ihevc_iquant_scales[qp_rem],
shift_iq, qp_div);
sblk_non_zero_coeff_idx++;
}
if(trans_skip)
iquant_out = (iquant_out + 16) >> 5;
}
else
{
iquant_out = ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx++];
}
*pu4_zero_cols &= ~(0x1 << (subblk_pos_x + xs));
*pu4_zero_rows &= ~(0x1 << (subblk_pos_y + ys));
*(pi2_sblk_ptr + xs + ys * trans_size) = iquant_out;
}
sblk_scan_idx--;
u4_sig_coeff_map <<= 1;
}while(u4_sig_coeff_map);
/* Updating the sblk pointer */
ps_tu_sblk_coeff_data =
(void *)&ps_tu_sblk_coeff_data->ai2_level[sblk_non_zero_coeff_idx];
}
STATS_UPDATE_SBLK_AND_COEFF_HISTOGRAM(e_trans_type, (trans_quant_bypass || trans_skip));
pu1_new_tu_coeff_data = (UWORD8 *)ps_tu_sblk_coeff_data;
return pu1_new_tu_coeff_data;
}
WORD32 ihevcd_get_intra_nbr_flag(process_ctxt_t *ps_proc,
tu_t *ps_tu,
UWORD32 *pu4_intra_nbr_avail,
WORD16 i2_pic_width_in_luma_samples,
UWORD8 i1_constrained_intra_pred_flag,
WORD32 trans_size,
WORD32 ctb_size)
{
sps_t *ps_sps;
UWORD8 u1_bot_lt_avail, u1_left_avail, u1_top_avail, u1_top_rt_avail,
u1_top_lt_avail;
WORD32 x_cur, y_cur, x_nbr, y_nbr;
UWORD8 *pu1_nbr_intra_flag;
UWORD8 *pu1_pic_intra_flag;
UWORD8 top_right, top, top_left, left, bot_left;
WORD32 intra_pos;
WORD32 num_8_blks, num_8_blks_in_bits;
WORD32 numbytes_row = (i2_pic_width_in_luma_samples + 63) / 64;
WORD32 cur_x, cur_y;
WORD32 i;
WORD32 nbr_flags;
ps_sps = ps_proc->ps_sps;
cur_x = ps_tu->b4_pos_x;
cur_y = ps_tu->b4_pos_y;
u1_bot_lt_avail = (pu4_intra_nbr_avail[1 + cur_y + trans_size / MIN_TU_SIZE]
>> (31 - (1 + cur_x - 1))) & 1;
u1_left_avail = (pu4_intra_nbr_avail[1 + cur_y] >> (31 - (1 + cur_x - 1)))
& 1;
u1_top_avail = (pu4_intra_nbr_avail[1 + cur_y - 1] >> (31 - (1 + cur_x)))
& 1;
u1_top_rt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1]
>> (31 - (1 + cur_x + trans_size / MIN_TU_SIZE))) & 1;
u1_top_lt_avail = (pu4_intra_nbr_avail[1 + cur_y - 1]
>> (31 - (1 + cur_x - 1))) & 1;
x_cur = ps_proc->i4_ctb_x * ctb_size + cur_x * MIN_TU_SIZE;
y_cur = ps_proc->i4_ctb_y * ctb_size + cur_y * MIN_TU_SIZE;
pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag;
/* WORD32 nbr_flags as below MSB --> LSB */
/* Top-Left | Top-Right | Top | Left | Bottom-Left
* 1 4 4 4 4
*/
bot_left = 0;
left = 0;
top_right = 0;
top = 0;
top_left = 0;
num_8_blks = trans_size > 4 ? trans_size / 8 : 1;
num_8_blks_in_bits = ((1 << num_8_blks) - 1);
if(i1_constrained_intra_pred_flag)
{
/* TODO: constrained intra pred not tested */
if(u1_bot_lt_avail)
{
x_nbr = x_cur - 1;
y_nbr = y_cur + trans_size;
pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
+ x_nbr / 64;
intra_pos = ((x_nbr / 8) % 8);
for(i = 0; i < num_8_blks; i++)
{
bot_left |= ((*(pu1_nbr_intra_flag + i * numbytes_row)
>> intra_pos) & 1) << i;
}
bot_left &= num_8_blks_in_bits;
}
if(u1_left_avail)
{
x_nbr = x_cur - 1;
y_nbr = y_cur;
pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
+ x_nbr / 64;
intra_pos = ((x_nbr / 8) % 8);
for(i = 0; i < num_8_blks; i++)
{
left |= ((*(pu1_nbr_intra_flag + i * numbytes_row) >> intra_pos)
& 1) << i;
}
left &= num_8_blks_in_bits;
}
if(u1_top_avail)
{
x_nbr = x_cur;
y_nbr = y_cur - 1;
pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
+ x_nbr / 64;
intra_pos = ((x_nbr / 8) % 8);
top = (*pu1_nbr_intra_flag >> intra_pos);
top &= num_8_blks_in_bits;
/*
for(i=0;i<num_8_blks;i++)
{
top |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i;
}
*/
}
if(u1_top_rt_avail)
{
x_nbr = x_cur + trans_size;
y_nbr = y_cur - 1;
pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
+ x_nbr / 64;
intra_pos = ((x_nbr / 8) % 8);
top_right = (*pu1_nbr_intra_flag >> intra_pos);
top_right &= num_8_blks_in_bits;
/*
for(i=0;i<num_8_blks;i++)
{
top_right |= ( (*pu1_nbr_intra_flag >> (intra_pos+i)) & 1) << i;
}
*/
}
if(u1_top_lt_avail)
{
x_nbr = x_cur - 1;
y_nbr = y_cur - 1;
pu1_nbr_intra_flag = pu1_pic_intra_flag + y_nbr / 8 * numbytes_row
+ x_nbr / 64;
intra_pos = ((x_nbr / 8) % 8);
top_left = (*pu1_nbr_intra_flag >> intra_pos) & 1;
}
}
else
{
if(u1_top_avail)
top = 0xF;
if(u1_top_rt_avail)
top_right = 0xF;
if(u1_bot_lt_avail)
bot_left = 0xF;
if(u1_left_avail)
left = 0xF;
if(u1_top_lt_avail)
top_left = 0x1;
}
/* Handling incomplete CTBs */
{
WORD32 pu_size_limit = MIN(trans_size, 8);
WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples
- (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size)
- (ps_tu->b4_pos_x * MIN_TU_SIZE)
- (1 << (ps_tu->b3_size + 2));
/* ctb_size_top gives number of valid pixels remaining in the current row */
WORD32 ctb_size_top = MIN(ctb_size, cols_remaining);
WORD32 ctb_size_top_bits = (1 << (ctb_size_top / pu_size_limit)) - 1;
WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples
- (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size)
- (ps_tu->b4_pos_y * MIN_TU_SIZE)
- (1 << (ps_tu->b3_size + 2));
/* ctb_size_bot gives number of valid pixels remaining in the current column */
WORD32 ctb_size_bot = MIN(ctb_size, rows_remaining);
WORD32 ctb_size_bot_bits = (1 << (ctb_size_bot / pu_size_limit)) - 1;
top_right &= ctb_size_top_bits;
bot_left &= ctb_size_bot_bits;
}
/* Top-Left | Top-Right | Top | Left | Bottom-Left
* 1 4 4 4 4
*/
/*
nbr_flags = (top_left << 16) | (gau4_ihevcd_4_bit_reverse[top_right] << 12) | (gau4_ihevcd_4_bit_reverse[top] << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4)
| gau4_ihevcd_4_bit_reverse[bot_left];
*/
nbr_flags = (top_left << 16) | (top_right << 12) | (top << 8) | (gau4_ihevcd_4_bit_reverse[left] << 4)
| gau4_ihevcd_4_bit_reverse[bot_left];
return nbr_flags;
}
static void ihevcd_iquant_itrans_recon_tu_plane(process_ctxt_t *ps_proc,
tu_t *ps_tu,
tu_plane_iq_it_recon_ctxt_t *ps_pl_tu_ctxt,
WORD32 func_idx,
WORD32 log2_trans_size,
CHROMA_PLANE_ID_T chroma_plane,
WORD8 intra_flag,
WORD8 intra_pred_mode)
{
sps_t *ps_sps = ps_proc->ps_sps;
pps_t *ps_pps = ps_proc->ps_pps;
codec_t *ps_codec = ps_proc->ps_codec;
if(1 == ps_pl_tu_ctxt->cbf)
{
if(ps_tu->b1_transquant_bypass || ps_pl_tu_ctxt->transform_skip_flag)
{
/* Recon */
ps_codec->apf_recon[func_idx](ps_pl_tu_ctxt->pi2_tu_coeff, ps_pl_tu_ctxt->pu1_pred,
ps_pl_tu_ctxt->pu1_dst, ps_pl_tu_ctxt->tu_coeff_stride,
ps_pl_tu_ctxt->pred_strd, ps_pl_tu_ctxt->dst_strd,
ps_pl_tu_ctxt->zero_cols);
}
else
{
/* iQuant , iTrans and Recon */
if((0 == ps_pl_tu_ctxt->coeff_type))
{
ps_codec->apf_itrans_recon[func_idx](ps_pl_tu_ctxt->pi2_tu_coeff,
ps_proc->pi2_itrans_intrmd_buf,
ps_pl_tu_ctxt->pu1_pred,
ps_pl_tu_ctxt->pu1_dst,
ps_pl_tu_ctxt->tu_coeff_stride,
ps_pl_tu_ctxt->pred_strd,
ps_pl_tu_ctxt->dst_strd,
ps_pl_tu_ctxt->zero_cols,
ps_pl_tu_ctxt->zero_rows);
}
else /* DC only */
{
ps_codec->apf_itrans_recon_dc[chroma_plane != NULL_PLANE](
ps_pl_tu_ctxt->pu1_pred, ps_pl_tu_ctxt->pu1_dst,
ps_pl_tu_ctxt->pred_strd, ps_pl_tu_ctxt->dst_strd, log2_trans_size,
ps_pl_tu_ctxt->coeff_value);
}
}
}
}
#ifdef ENABLE_MAIN_REXT_PROFILE
static void ihevcd_iquant_itrans_resi_recon_tu_plane(process_ctxt_t *ps_proc,
tu_t *ps_tu,
tu_plane_iq_it_recon_ctxt_t *ps_pl_tu_ctxt,
WORD32 func_idx,
WORD32 log2_trans_size,
CHROMA_PLANE_ID_T chroma_plane,
WORD8 intra_flag,
WORD8 intra_pred_mode)
{
sps_t *ps_sps = ps_proc->ps_sps;
pps_t *ps_pps = ps_proc->ps_pps;
codec_t *ps_codec = ps_proc->ps_codec;
WORD8 trans_size = 1 << log2_trans_size;
WORD16 *pi2_res = ps_proc->pi2_res_luma_buf;
WORD16 *pi2_res_uv = ps_proc->pi2_res_chroma_buf;
WORD32 alpha = 0;
WORD16 *residue_out_base = chroma_plane == NULL_PLANE ? pi2_res : pi2_res_uv;
WORD16 *residue_out = residue_out_base;
// if both rdpcm and rotate are to be applied, share the output residue buffer between the
// two transforms
WORD16 *residue_out_intrmdt = residue_out_base + (TRANS_SIZE_4 * TRANS_SIZE_4);
if(chroma_plane == U_PLANE && ps_tu->b3_cb_log2_res_scale_abs_plus1 != 0)
{
alpha = (1 << (ps_tu->b3_cb_log2_res_scale_abs_plus1 - 1))
* (1 - 2 * ps_tu->b1_cb_log2_res_sign);
}
else if(chroma_plane == V_PLANE && ps_tu->b3_cr_log2_res_scale_abs_plus1 != 0)
{
alpha = (1 << (ps_tu->b3_cr_log2_res_scale_abs_plus1 - 1))
* (1 - 2 * ps_tu->b1_cr_log2_res_sign);
}
if(1 == ps_pl_tu_ctxt->cbf)
{
if(ps_tu->b1_transquant_bypass || ps_pl_tu_ctxt->transform_skip_flag)
{
WORD8 rotate = ps_sps->i1_transform_skip_rotation_enabled_flag && trans_size == 4
&& intra_flag;
WORD8 rdpcm = (ps_sps->i1_implicit_rdpcm_enabled_flag && intra_flag
&& (intra_pred_mode == 10 || intra_pred_mode == 26))
|| ps_pl_tu_ctxt->explicit_rdpcm_flag;
WORD16 *src_residue = ps_pl_tu_ctxt->pi2_tu_coeff;
WORD16 src_residue_strd = ps_pl_tu_ctxt->tu_coeff_stride;
if(rotate)
{
ihevc_res_4x4_rotate(src_residue, rdpcm ? residue_out_intrmdt : residue_out,
src_residue_strd, trans_size, ps_pl_tu_ctxt->zero_cols);
ps_pl_tu_ctxt->zero_cols =
gau4_ihevcd_4_bit_reverse[ps_pl_tu_ctxt->zero_cols & 0xF];
src_residue = residue_out_intrmdt;
src_residue_strd = trans_size;
}
if(rdpcm)
{
WORD8 rdpcm_dir =
ps_pl_tu_ctxt->explicit_rdpcm_flag ?
ps_pl_tu_ctxt->explicit_rdpcm_dir :
intra_pred_mode != 10;
if(rdpcm_dir == 0)
{
ihevc_res_nxn_rdpcm_horz(src_residue, residue_out, src_residue_strd, trans_size,
trans_size, ps_pl_tu_ctxt->zero_cols);
ps_pl_tu_ctxt->zero_cols = (1 << CTZ(~ps_pl_tu_ctxt->zero_cols)) - 1;
}
else
{
ihevc_res_nxn_rdpcm_vert(src_residue, residue_out, src_residue_strd, trans_size,
trans_size, ps_pl_tu_ctxt->zero_cols);
}
}
if(!rdpcm && !rotate)
{
ihevc_res_nxn_copy(src_residue, residue_out, src_residue_strd, trans_size,
trans_size, ps_pl_tu_ctxt->zero_cols);
}
}
else
{
/* iQuant, iTrans */
if(0 == ps_pl_tu_ctxt->coeff_type)
{
WORD32 func_tmp_idx = chroma_plane != NULL_PLANE ? func_idx - 4 : func_idx;
ps_codec->apf_itrans_res[func_tmp_idx](ps_pl_tu_ctxt->pi2_tu_coeff,
ps_proc->pi2_itrans_intrmd_buf, residue_out,
ps_pl_tu_ctxt->tu_coeff_stride, trans_size,
ps_pl_tu_ctxt->zero_cols,
ps_pl_tu_ctxt->zero_rows);
}
else /* DC only */
{
ps_codec->apf_itrans_res_dc(residue_out, trans_size, log2_trans_size,
ps_pl_tu_ctxt->coeff_value);
}
ps_pl_tu_ctxt->zero_cols = 0;
}
if(!alpha)
{
ps_codec->apf_recon[func_idx](residue_out, ps_pl_tu_ctxt->pu1_pred,
ps_pl_tu_ctxt->pu1_dst, trans_size,
ps_pl_tu_ctxt->pred_strd, ps_pl_tu_ctxt->dst_strd,
ps_pl_tu_ctxt->zero_cols);
}
}
if(alpha)
{
if(0 == ps_pl_tu_ctxt->cbf)
{
memset(residue_out, 0, trans_size * trans_size * sizeof(WORD16));
}
ihevc_chroma_recon_nxn_ccp(pi2_res, pi2_res_uv, ps_pl_tu_ctxt->pu1_pred,
ps_pl_tu_ctxt->pu1_dst, alpha, trans_size, trans_size,
trans_size, ps_pl_tu_ctxt->pred_strd, ps_pl_tu_ctxt->dst_strd);
}
}
PF_IQITRECON_PLANE get_iqitrec_func(process_ctxt_t *ps_proc,
tu_t *ps_tu,
tu_plane_iq_it_recon_ctxt_t *ps_pl_tu_ctxt,
WORD32 log2_trans_size,
CHROMA_PLANE_ID_T chroma_plane,
WORD8 intra_flag,
WORD8 intra_pred_mode)
{
sps_t *ps_sps = ps_proc->ps_sps;
pps_t *ps_pps = ps_proc->ps_pps;
WORD8 trans_size = 1 << log2_trans_size;
if(1 == ps_pl_tu_ctxt->cbf
&& (ps_tu->b1_transquant_bypass || ps_pl_tu_ctxt->transform_skip_flag))
{
if(ps_sps->i1_transform_skip_rotation_enabled_flag && trans_size == 4 && intra_flag)
return ihevcd_iquant_itrans_resi_recon_tu_plane;
if(ps_sps->i1_implicit_rdpcm_enabled_flag && intra_flag
&& (intra_pred_mode == 10 || intra_pred_mode == 26))
return ihevcd_iquant_itrans_resi_recon_tu_plane;
if(ps_pl_tu_ctxt->explicit_rdpcm_flag)
return ihevcd_iquant_itrans_resi_recon_tu_plane;
}
if(ps_pps->i1_cross_component_prediction_enabled_flag)
{
if((chroma_plane == NULL_PLANE
&& (ps_tu->b3_cb_log2_res_scale_abs_plus1 != 0
|| ps_tu->b3_cr_log2_res_scale_abs_plus1 != 0))
|| (chroma_plane == V_PLANE && ps_tu->b3_cr_log2_res_scale_abs_plus1 != 0)
|| (chroma_plane == U_PLANE && ps_tu->b3_cb_log2_res_scale_abs_plus1 != 0))
return ihevcd_iquant_itrans_resi_recon_tu_plane;
}
return ihevcd_iquant_itrans_recon_tu_plane;
}
#endif
WORD32 ihevcd_iquant_itrans_recon_ctb(process_ctxt_t *ps_proc)
{
WORD16 *pi2_scaling_mat;
UWORD8 *pu1_y_dst_ctb;
UWORD8 *pu1_uv_dst_ctb;
WORD32 ctb_size;
codec_t *ps_codec;
slice_header_t *ps_slice_hdr;
tu_t *ps_tu;
WORD16 *pi2_ctb_coeff;
WORD32 tu_cnt;
WORD16 *pi2_tu_coeff;
WORD32 pic_strd;
WORD32 luma_nbr_flags;
WORD32 luma_nbr_flags_4x4[4] = { 0 };
WORD32 chroma_nbr_flags = 0;
WORD32 chroma_nbr_flags_subtu = 0;
#ifdef ENABLE_MAIN_REXT_PROFILE
WORD32 disable_boundary_filter = 0;
#endif
UWORD8 u1_luma_pred_mode_first_tu = 0;
/* Pointers for generating 2d coeffs from coeff-map */
UWORD8 *pu1_tu_coeff_data;
/* nbr avail map for CTB */
/* 1st bit points to neighbor (left/top_left/bot_left) */
/* 1Tb starts at 2nd bit from msb of 2nd value in array, followed by number of min_tu's in that ctb */
UWORD32 au4_intra_nbr_avail[MAX_CTB_SIZE / MIN_TU_SIZE + 2 /* Top nbr + bot nbr */];
UWORD32 top_avail_bits;
sps_t *ps_sps;
pps_t *ps_pps;
WORD32 intra_flag;
UWORD8 *pu1_pic_intra_flag;
WORD32 h_samp_factor, v_samp_factor;
WORD32 chroma_pixel_strd = 2;
PF_IQITRECON_PLANE iqitrecon_fptr = ihevcd_iquant_itrans_recon_tu_plane;
/*************************************************************************/
/* Contanis scaling matrix offset in the following order in a 1D buffer */
/* Entries that are listed as UNUSED are invalid combinations where */
/* scaling matrix is not used. eg: 64x64 SKIP CU, 64x64 PCM CU */
/* Intra 4 x 4 Y, 4 x 4 U, 4 x 4 V */
/* Inter 4 x 4 Y, 4 x 4 U, 4 x 4 V */
/* Intra 8 x 8 Y, 8 x 8 U, 8 x 8 V */
/* Inter 8 x 8 Y, 8 x 8 U, 8 x 8 V */
/* Intra 16x16 Y, 16x16 U, 16x16 V */
/* Inter 16x16 Y, 16x16 U, 16x16 V */
/* Intra 32x32 Y, 32x32 U, 32x32 V */
/* Inter 32x32 Y, 32x32 U, 32x32 V */
/* UNUSED, UNUSED, UNUSED */
/* UNUSED, UNUSED, UNUSED */
/*************************************************************************/
static const WORD32 scaling_mat_offset[] =
{ 0, 16, 32, 48, 64, 80, 96, 160, 224, 288, 352, 416, 480, 736, 992,
1248, 1504, 1760, 2016, 3040, 4064, 5088, 6112, 7136, 0, 0, 0, 0, 0, 0};
PROFILE_DISABLE_IQ_IT_RECON_INTRA_PRED();
ps_sps = ps_proc->ps_sps;
ps_pps = ps_proc->ps_pps;
ps_slice_hdr = ps_proc->ps_slice_hdr;
ps_codec = ps_proc->ps_codec;
pu1_y_dst_ctb = ps_proc->pu1_cur_ctb_luma;
pu1_uv_dst_ctb = ps_proc->pu1_cur_ctb_chroma;
pi2_ctb_coeff = ps_proc->pi2_invscan_out;
ctb_size = (1 << ps_sps->i1_log2_ctb_size);
pu1_tu_coeff_data = (UWORD8 *)ps_proc->pv_tu_coeff_data;
pic_strd = ps_codec->i4_strd;
pi2_tu_coeff = pi2_ctb_coeff;
ps_tu = ps_proc->ps_tu;
if((1 == ps_sps->i1_scaling_list_enable_flag) && (1 == ps_pps->i1_pps_scaling_list_data_present_flag))
{
pi2_scaling_mat = ps_pps->pi2_scaling_mat;
}
else
{
pi2_scaling_mat = ps_sps->pi2_scaling_mat;
}
{
/* Updating the initial availability map */
WORD32 i;
UWORD8 u1_left_ctb_avail, u1_top_lt_ctb_avail, u1_top_rt_ctb_avail,
u1_top_ctb_avail;
u1_left_ctb_avail = ps_proc->u1_left_ctb_avail;
u1_top_lt_ctb_avail = ps_proc->u1_top_lt_ctb_avail;
u1_top_ctb_avail = ps_proc->u1_top_ctb_avail;
u1_top_rt_ctb_avail = ps_proc->u1_top_rt_ctb_avail;
/* Initializing the availability array */
memset(au4_intra_nbr_avail, 0,
(MAX_CTB_SIZE / MIN_TU_SIZE + 2) * sizeof(UWORD32));
/* Initializing the availability array with CTB level availability flags */
{
WORD32 rows_remaining = ps_sps->i2_pic_height_in_luma_samples - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size);
WORD32 ctb_size_left = MIN(ctb_size, rows_remaining);
for(i = 0; i < ctb_size_left / MIN_TU_SIZE; i++)
{
au4_intra_nbr_avail[i + 1] = ((UWORD32)u1_left_ctb_avail << 31);
}
}
au4_intra_nbr_avail[0] |= (((UWORD32)u1_top_rt_ctb_avail << 31)
>> (1 + ctb_size / MIN_TU_SIZE)); /* 1+ctb_size/4 position bit pos from msb */
au4_intra_nbr_avail[0] |= ((UWORD32)u1_top_lt_ctb_avail << 31);
{
WORD32 cols_remaining = ps_sps->i2_pic_width_in_luma_samples - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size);
WORD32 ctb_size_top = MIN(ctb_size, cols_remaining);
WORD32 shift = (31 - (ctb_size / MIN_TU_SIZE));
/* ctb_size_top gives number of valid pixels remaining in the current row */
/* Since we need pattern of 1's starting from the MSB, an additional shift */
/* is needed */
shift += ((ctb_size - ctb_size_top) / MIN_TU_SIZE);
top_avail_bits = ((1 << (ctb_size_top / MIN_TU_SIZE)) - 1)
<< shift;
}
au4_intra_nbr_avail[0] |= (
(u1_top_ctb_avail == 1) ? top_avail_bits : 0x0);
/* Starting from msb 2nd bit to (1+ctb_size/4) bit, set 1 if top avail,or 0 */
}
h_samp_factor = (CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) ? 1 : 2;
v_samp_factor = (CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc) ? 2 : 1;
/* Applying Inverse transform on all the TU's in CTB */
for(tu_cnt = 0; tu_cnt < ps_proc->i4_ctb_tu_cnt; tu_cnt++, ps_tu++)
{
tu_plane_iq_it_recon_ctxt_t y_cb_tu = { 0 };
tu_plane_iq_it_recon_ctxt_t cr_tu = { 0 };
tu_plane_iq_it_recon_ctxt_t *ps_cb_tu = &y_cb_tu;
tu_plane_iq_it_recon_ctxt_t *ps_cr_tu = &cr_tu;
#ifdef ENABLE_MAIN_REXT_PROFILE
tu_plane_iq_it_recon_ctxt_t cb_sub_tu = { 0 };
tu_plane_iq_it_recon_ctxt_t cr_sub_tu = { 0 };
#endif
WORD32 num_comp, c_idx, func_idx;
WORD32 qp_div = 0, qp_rem = 0;
WORD32 qp_div_v = 0, qp_rem_v = 0;
WORD32 chroma_qp_idx;
WORD8 i1_chroma_pic_qp_offset, i1_chroma_slice_qp_offset;
WORD16 *pi2_dequant_matrix = NULL, *pi2_dequant_matrix_v = NULL;
WORD32 trans_size = 0;
TRANSFORM_TYPE e_trans_type;
WORD32 log2_y_trans_size_minus_2, log2_uv_trans_size_minus_2;
WORD32 log2_trans_size;
WORD32 tu_x, tu_y;
WORD32 tu_y_offset, tu_uv_offset;
UWORD8 u1_luma_pred_mode, u1_chroma_pred_mode;
WORD32 offset;
WORD32 pcm_flag;
WORD32 chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU);
/* If 420SP_VU is chroma format, pred and dst pointer */
/* will be added +1 to point to U */
WORD32 chroma_yuv420sp_vu_u_offset = 1 * chroma_yuv420sp_vu;
/* If 420SP_VU is chroma format, pred and dst pointer */
/* will be added U offset of +1 and subtracted 2 */
/* to point to V */
WORD32 chroma_yuv420sp_vu_v_offset = -2 * chroma_yuv420sp_vu;
tu_x = ps_tu->b4_pos_x * 4; /* Converting minTU unit to pixel unit */
tu_y = ps_tu->b4_pos_y * 4; /* Converting minTU unit to pixel unit */
{
WORD32 tu_abs_x = (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size) + (tu_x);
WORD32 tu_abs_y = (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size) + (tu_y);
WORD32 numbytes_row = (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
pu1_pic_intra_flag = ps_proc->pu1_pic_intra_flag;
pu1_pic_intra_flag += (tu_abs_y >> 3) * numbytes_row;
pu1_pic_intra_flag += (tu_abs_x >> 6);
intra_flag = *pu1_pic_intra_flag;
intra_flag &= (1 << ((tu_abs_x >> 3) % 8));
}
u1_luma_pred_mode = ps_tu->b6_luma_intra_mode;
u1_chroma_pred_mode = ps_tu->b3_chroma_intra_mode_idx;
if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc && u1_chroma_pred_mode != 7)
num_comp = 2; /* Y and UV */
else
num_comp = 1; /* Y */
pcm_flag = 0;
if((intra_flag) && (u1_luma_pred_mode == INTRA_PRED_NONE))
{
UWORD8 *pu1_buf;
UWORD8 *pu1_y_dst = pu1_y_dst_ctb;
UWORD8 *pu1_uv_dst = pu1_uv_dst_ctb;
WORD32 i, j;
tu_sblk_coeff_data_t *ps_tu_sblk_coeff_data;
WORD32 cb_size = 1 << (ps_tu->b3_size + 2);
/* trans_size is used to update availability after reconstruction */
trans_size = cb_size;
pcm_flag = 1;
tu_y_offset = tu_x + tu_y * pic_strd;
pu1_y_dst += tu_x + tu_y * pic_strd;
/* First byte points to number of coded blocks */
pu1_tu_coeff_data++;
/* Next byte points to scan type */
pu1_tu_coeff_data++;
ps_tu_sblk_coeff_data = (tu_sblk_coeff_data_t *)pu1_tu_coeff_data;
pu1_buf = (UWORD8 *)&ps_tu_sblk_coeff_data->ai2_level[0];
{
for(i = 0; i < cb_size; i++)
{
//pu1_y_dst[i * pic_strd + j] = *pu1_buf++;
memcpy(&pu1_y_dst[i * pic_strd], pu1_buf, cb_size);
pu1_buf += cb_size;
}
if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_MONOCHROME)
{
WORD32 chroma_strd = (pic_strd * chroma_pixel_strd) / h_samp_factor;
pu1_uv_dst += (tu_x * chroma_pixel_strd / h_samp_factor)
+ (tu_y * chroma_pixel_strd * pic_strd / (h_samp_factor * v_samp_factor));
pu1_uv_dst = pu1_uv_dst + chroma_yuv420sp_vu_u_offset;
/* U */
for(i = 0; i < cb_size / v_samp_factor; i++)
{
for(j = 0; j < cb_size / h_samp_factor; j++)
{
pu1_uv_dst[i * chroma_strd + chroma_pixel_strd * j] = *pu1_buf++;
}
}
pu1_uv_dst = pu1_uv_dst + 1 + chroma_yuv420sp_vu_v_offset;
/* V */
for(i = 0; i < cb_size / v_samp_factor; i++)
{
for(j = 0; j < cb_size / h_samp_factor; j++)
{
pu1_uv_dst[i * chroma_strd + chroma_pixel_strd * j] = *pu1_buf++;
}
}
}
}
pu1_tu_coeff_data = pu1_buf;
}
for(c_idx = 0; c_idx < num_comp; c_idx++)
{
if(0 == pcm_flag)
{
if(c_idx == 0) /* Y */
{
/* Initializing variables */
log2_y_trans_size_minus_2 = ps_tu->b3_size;
trans_size = 1 << (log2_y_trans_size_minus_2 + 2);
log2_trans_size = log2_y_trans_size_minus_2 + 2;
tu_y_offset = tu_x + tu_y * pic_strd;
/* Calculating scaling matrix offset */
offset = log2_y_trans_size_minus_2 * 6 + (!intra_flag) * 3 + c_idx;
pi2_dequant_matrix = pi2_scaling_mat + scaling_mat_offset[offset];
/* 4x4 transform Luma in INTRA mode is DST */
if(log2_y_trans_size_minus_2 == 0 && intra_flag)
{
func_idx = log2_y_trans_size_minus_2;
e_trans_type = DST_4x4;
}
else
{
func_idx = log2_y_trans_size_minus_2 + 1;
e_trans_type = (TRANSFORM_TYPE)(log2_y_trans_size_minus_2 + 1);
}
qp_div = ps_tu->b7_qp / 6;
qp_rem = ps_tu->b7_qp % 6;
y_cb_tu.pi2_tu_coeff = pi2_tu_coeff;
y_cb_tu.pu1_pred = pu1_y_dst_ctb + tu_y_offset;
y_cb_tu.pu1_dst = pu1_y_dst_ctb + tu_y_offset;
y_cb_tu.tu_coeff_stride = trans_size;
y_cb_tu.pred_strd = pic_strd;
y_cb_tu.dst_strd = pic_strd;
y_cb_tu.cbf = ps_tu->b1_y_cbf;
y_cb_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1;
#ifdef ENABLE_MAIN_REXT_PROFILE
y_cb_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1;
y_cb_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1;
#endif
/* Unpacking coeffs */
if(1 == y_cb_tu.cbf)
{
pu1_tu_coeff_data = ihevcd_unpack_coeffs(
y_cb_tu.pi2_tu_coeff, log2_y_trans_size_minus_2 + 2,
pu1_tu_coeff_data, pi2_dequant_matrix,
qp_rem, qp_div, e_trans_type,
ps_tu->b1_transquant_bypass, &y_cb_tu.zero_cols,
&y_cb_tu.zero_rows, &y_cb_tu.coeff_type,
&y_cb_tu.coeff_value);
}
}
else /* UV interleaved */
{
/* Initializing variables */
const WORD16 *pi2_ihevcd_chroma_qp =
CHROMA_FMT_IDC_YUV420 != ps_sps->i1_chroma_format_idc ?
gai2_ihevcd_chroma_qp_clip :
gai2_ihevcd_chroma_qp_420;
/* Chroma :If Transform size is 4x4, keep 4x4 else do transform on (trans_size/2 x trans_size/2) */
if(ps_tu->b3_size == 0)
{
log2_uv_trans_size_minus_2 = ps_tu->b3_size;
if(CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc)
{
tu_uv_offset = (tu_x * chroma_pixel_strd)
+ (tu_y * chroma_pixel_strd * pic_strd);
}
else
{
/* Chroma 4x4 is present with 4th luma 4x4 block. For this case chroma postion has to be (luma pos x - 4, luma pos y - 4) */
tu_uv_offset = (tu_x - 4) + ((tu_y - 4) / v_samp_factor) * pic_strd;
}
}
else
{
if(CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc)
{
log2_uv_trans_size_minus_2 = ps_tu->b3_size;
}
else
{
log2_uv_trans_size_minus_2 = ps_tu->b3_size - 1;
}
tu_uv_offset = (tu_x * chroma_pixel_strd / h_samp_factor)
+ (tu_y * chroma_pixel_strd * pic_strd / (h_samp_factor * v_samp_factor));
}
trans_size = 1 << (log2_uv_trans_size_minus_2 + 2);
log2_trans_size = log2_uv_trans_size_minus_2 + 2;
/*TODO: Add support for choosing different tables for U and V,
* change this to a single array to handle flat/default/custom, intra/inter, luma/chroma and various sizes
*/
/* Calculating scaling matrix offset */
/* ((log2_uv_trans_size_minus_2 == 3) ? 1:3) condition check is not needed, since
* max uv trans size is 16x16
*/
offset = log2_uv_trans_size_minus_2 * 6 + (!intra_flag) * 3 + c_idx;
pi2_dequant_matrix = pi2_scaling_mat + scaling_mat_offset[offset];
pi2_dequant_matrix_v = pi2_scaling_mat + scaling_mat_offset[offset + 1];
func_idx = 1 + 4 + log2_uv_trans_size_minus_2; /* DST func + Y funcs + cur func index*/
/* Handle error cases where 64x64 TU is signalled which results in 32x32 chroma.
* Limit func_idx based on allowed max chroma tu size */
func_idx = MIN(func_idx, (CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) ? 8 : 7);
e_trans_type = (TRANSFORM_TYPE)(log2_uv_trans_size_minus_2 + 1);
/* QP for U */
i1_chroma_pic_qp_offset = ps_pps->i1_pic_cb_qp_offset;
i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset;
chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset + i1_chroma_slice_qp_offset;
chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57);
qp_div = pi2_ihevcd_chroma_qp[chroma_qp_idx] / 6;
qp_rem = pi2_ihevcd_chroma_qp[chroma_qp_idx] % 6;
/* QP for V */
i1_chroma_pic_qp_offset = ps_pps->i1_pic_cr_qp_offset;
i1_chroma_slice_qp_offset = ps_slice_hdr->i1_slice_cr_qp_offset;
chroma_qp_idx = ps_tu->b7_qp + i1_chroma_pic_qp_offset + i1_chroma_slice_qp_offset;
chroma_qp_idx = CLIP3(chroma_qp_idx, 0, 57);
qp_div_v = pi2_ihevcd_chroma_qp[chroma_qp_idx] / 6;
qp_rem_v = pi2_ihevcd_chroma_qp[chroma_qp_idx] % 6;
y_cb_tu.pi2_tu_coeff = pi2_tu_coeff;
y_cb_tu.pu1_pred = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/
y_cb_tu.pu1_dst = pu1_uv_dst_ctb + tu_uv_offset + chroma_yuv420sp_vu_u_offset; /* Pointing to start byte of U*/
y_cb_tu.tu_coeff_stride = trans_size;
y_cb_tu.pred_strd = pic_strd * chroma_pixel_strd / h_samp_factor;
y_cb_tu.dst_strd = pic_strd * chroma_pixel_strd / h_samp_factor;
y_cb_tu.cbf = ps_tu->b1_cb_cbf;
cr_tu.pi2_tu_coeff = pi2_tu_coeff + trans_size * trans_size;
cr_tu.pu1_pred = y_cb_tu.pu1_pred + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/
cr_tu.pu1_dst = y_cb_tu.pu1_dst + 1 + chroma_yuv420sp_vu_v_offset; /* Pointing to start byte of V*/
cr_tu.tu_coeff_stride = trans_size;
cr_tu.pred_strd = pic_strd * chroma_pixel_strd / h_samp_factor;
cr_tu.dst_strd = pic_strd * chroma_pixel_strd / h_samp_factor;
cr_tu.cbf = ps_tu->b1_cr_cbf;
/* Unpacking coeffs */
y_cb_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1;
#ifdef ENABLE_MAIN_REXT_PROFILE
y_cb_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1;
y_cb_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1;
#endif
if(1 == y_cb_tu.cbf)
{
pu1_tu_coeff_data = ihevcd_unpack_coeffs(
y_cb_tu.pi2_tu_coeff, log2_uv_trans_size_minus_2 + 2,
pu1_tu_coeff_data, pi2_dequant_matrix,
qp_rem, qp_div, e_trans_type,
ps_tu->b1_transquant_bypass, &y_cb_tu.zero_cols,
&y_cb_tu.zero_rows, &y_cb_tu.coeff_type,
&y_cb_tu.coeff_value);
}
#ifdef ENABLE_MAIN_REXT_PROFILE
if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422)
{
cb_sub_tu.pi2_tu_coeff = ps_proc->pi2_invscan_out_subtu;
cb_sub_tu.pu1_pred = y_cb_tu.pu1_pred + trans_size * y_cb_tu.pred_strd;
cb_sub_tu.pu1_dst = y_cb_tu.pu1_dst + trans_size * y_cb_tu.dst_strd;
cb_sub_tu.tu_coeff_stride = trans_size;
cb_sub_tu.pred_strd = pic_strd * chroma_pixel_strd / h_samp_factor;
cb_sub_tu.dst_strd = pic_strd * chroma_pixel_strd / h_samp_factor;
cb_sub_tu.cbf = ps_tu->b1_cb_cbf_subtu1;
cb_sub_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1;
cb_sub_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1;
cb_sub_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1;
if(1 == cb_sub_tu.cbf)
{
pu1_tu_coeff_data = ihevcd_unpack_coeffs(
cb_sub_tu.pi2_tu_coeff, log2_uv_trans_size_minus_2 + 2,
pu1_tu_coeff_data, pi2_dequant_matrix,
qp_rem, qp_div, e_trans_type,
ps_tu->b1_transquant_bypass, &cb_sub_tu.zero_cols,
&cb_sub_tu.zero_rows, &cb_sub_tu.coeff_type,
&cb_sub_tu.coeff_value);
}
}
#endif
cr_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1;
#ifdef ENABLE_MAIN_REXT_PROFILE
cr_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1;
cr_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1;
#endif
if(1 == cr_tu.cbf)
{
pu1_tu_coeff_data = ihevcd_unpack_coeffs(
cr_tu.pi2_tu_coeff, log2_uv_trans_size_minus_2 + 2,
pu1_tu_coeff_data, pi2_dequant_matrix_v,
qp_rem_v, qp_div_v, e_trans_type,
ps_tu->b1_transquant_bypass, &cr_tu.zero_cols,
&cr_tu.zero_rows, &cr_tu.coeff_type, &cr_tu.coeff_value);
}
#ifdef ENABLE_MAIN_REXT_PROFILE
if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422)
{
cr_sub_tu.pi2_tu_coeff = ps_proc->pi2_invscan_out_subtu + trans_size * trans_size;
cr_sub_tu.pu1_pred = cr_tu.pu1_pred + trans_size * cr_tu.pred_strd;
cr_sub_tu.pu1_dst = cr_tu.pu1_dst + trans_size * cr_tu.dst_strd;
cr_sub_tu.tu_coeff_stride = trans_size;
cr_sub_tu.pred_strd = pic_strd * chroma_pixel_strd / h_samp_factor;
cr_sub_tu.dst_strd = pic_strd * chroma_pixel_strd / h_samp_factor;
cr_sub_tu.cbf = ps_tu->b1_cr_cbf_subtu1;
cr_sub_tu.transform_skip_flag = pu1_tu_coeff_data[1] & 1;
cr_sub_tu.explicit_rdpcm_flag = (pu1_tu_coeff_data[1] >> 4) & 1;
cr_sub_tu.explicit_rdpcm_dir = (pu1_tu_coeff_data[1] >> 5) & 1;
if(1 == cr_sub_tu.cbf)
{
pu1_tu_coeff_data = ihevcd_unpack_coeffs(
cr_sub_tu.pi2_tu_coeff, log2_uv_trans_size_minus_2 + 2,
pu1_tu_coeff_data, pi2_dequant_matrix_v,
qp_rem_v, qp_div_v, e_trans_type,
ps_tu->b1_transquant_bypass, &cr_sub_tu.zero_cols,
&cr_sub_tu.zero_rows, &cr_sub_tu.coeff_type,
&cr_sub_tu.coeff_value);
}
}
#endif
}
WORD8 subtu_idx = 0;
do
{
/***************************************************************/
/****************** Intra Prediction **************************/
/***************************************************************/
if(intra_flag) /* Intra */
{
/* While (MAX_TU_SIZE * 2 * 2) + 1 is the actaul size needed,
au1_ref_sub_out size is kept as multiple of 8,
so that SIMD functions can load 64 bits. Also some SIMD
modules read few bytes before the start of the array, so
allocate 16 extra bytes at the start */
UWORD8 au1_ref_sub_out[16 + (MAX_TU_SIZE * 2 * 2 * 2) + 8] = {0};
UWORD8 *pu1_ref_sub_out = &au1_ref_sub_out[16];
UWORD8 *pu1_top_left, *pu1_top, *pu1_left;
WORD32 luma_pred_func_idx, chroma_pred_func_idx;
/* Get the neighbour availability flags */
/* Done for only Y */
if(c_idx == 0)
{
/* Get neighbor availability for Y only */
luma_nbr_flags = ihevcd_get_intra_nbr_flag(ps_proc,
ps_tu,
au4_intra_nbr_avail,
ps_sps->i2_pic_width_in_luma_samples,
ps_pps->i1_constrained_intra_pred_flag,
trans_size,
ctb_size);
if(trans_size == 4)
luma_nbr_flags_4x4[(ps_tu->b4_pos_x % 2) + (ps_tu->b4_pos_y % 2) * 2] = luma_nbr_flags;
if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444)
{
chroma_nbr_flags = luma_nbr_flags;
}
else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422)
{
WORD32 bot_left, left, top, tp_right, tp_left;
tp_left = (luma_nbr_flags & 0x10000);
tp_right = (luma_nbr_flags & 0x0f000);
top = (luma_nbr_flags & 0x00f00);
left = (luma_nbr_flags & 0x000f0);
bot_left = (luma_nbr_flags & 0x0000f);
chroma_nbr_flags = tp_left | tp_right | top | left | (left >> 4);
chroma_nbr_flags_subtu = ((left != 0 ? 1 : 0) << 16) | (0xf << 8)
| left | bot_left;
}
else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV420)
{
if(((ps_tu->b4_pos_x % 2 == 0) && (ps_tu->b4_pos_y % 2 == 0)))
chroma_nbr_flags = luma_nbr_flags;
}
/* Initializing nbr pointers */
pu1_top = y_cb_tu.pu1_pred - pic_strd;
pu1_left = y_cb_tu.pu1_pred - 1;
pu1_top_left = y_cb_tu.pu1_pred - pic_strd - 1;
/* call reference array substitution */
if(luma_nbr_flags == 0x1ffff)
ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_subst_all_avlble_fptr(
pu1_top_left,
pu1_top, pu1_left, y_cb_tu.pred_strd, trans_size, luma_nbr_flags, pu1_ref_sub_out, 1);
else
ps_codec->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr(
pu1_top_left,
pu1_top, pu1_left, y_cb_tu.pred_strd, trans_size, luma_nbr_flags, pu1_ref_sub_out, 1);
/* call reference filtering */
ps_codec->s_func_selector.ihevc_intra_pred_ref_filtering_fptr(
pu1_ref_sub_out,
trans_size,
pu1_ref_sub_out,
u1_luma_pred_mode,
#ifdef ENABLE_MAIN_REXT_PROFILE
(ps_sps->i1_intra_smoothing_disabled_flag << 3
| ps_sps->i1_strong_intra_smoothing_enable_flag)
#else
ps_sps->i1_strong_intra_smoothing_enable_flag
#endif
);
/* use the look up to get the function idx */
luma_pred_func_idx = g_i4_ip_funcs[u1_luma_pred_mode];
#ifdef ENABLE_MAIN_REXT_PROFILE
if(ps_sps->i1_implicit_rdpcm_enabled_flag && ps_tu->b1_transquant_bypass
&& (u1_luma_pred_mode == 10 || u1_luma_pred_mode == 26))
disable_boundary_filter = 1;
#endif
/* call the intra prediction function */
ps_codec->apf_intra_pred_luma[luma_pred_func_idx](
pu1_ref_sub_out, 1,
y_cb_tu.pu1_pred,
y_cb_tu.pred_strd,
trans_size,
#ifdef ENABLE_MAIN_REXT_PROFILE
(u1_luma_pred_mode == 10 || u1_luma_pred_mode == 26) ?
disable_boundary_filter :
u1_luma_pred_mode
#else
u1_luma_pred_mode
#endif
);
}
else
{
#ifdef ENABLE_MAIN_REXT_PROFILE
if(subtu_idx != 0)
{
ps_cb_tu = &cb_sub_tu;
ps_cr_tu = &cr_sub_tu;
chroma_nbr_flags = chroma_nbr_flags_subtu;
}
#endif
/* In case of yuv420sp_vu, prediction happens as usual. */
/* So point the pu1_pred pointer to original prediction pointer */
UWORD8 *pu1_pred_orig = ps_cb_tu->pu1_pred - chroma_yuv420sp_vu_u_offset;
/* Top-Left | Top-Right | Top | Left | Bottom-Left
* 1 4 4 4 4
*
* Generating chroma_nbr_flags depending upon the transform size */
if(ps_tu->b3_size == 0)
{
if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV420)
{
/* Take TL,T,L flags of First luma 4x4 block */
chroma_nbr_flags = (luma_nbr_flags_4x4[0] & 0x10FF0);
/* Take TR flags of Second luma 4x4 block */
chroma_nbr_flags |= (luma_nbr_flags_4x4[1] & 0x0F000);
/* Take BL flags of Third luma 4x4 block */
chroma_nbr_flags |= (luma_nbr_flags_4x4[2] & 0x0000F);
}
else if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422)
{
if(subtu_idx == 0)
{
/* Take TL,T,L flags of First luma 4x4 block */
chroma_nbr_flags = (luma_nbr_flags_4x4[0] & 0x10FF0);
/* Take TR flags of Second luma 4x4 block */
chroma_nbr_flags |= (luma_nbr_flags_4x4[1] & 0x0F000);
/* Take BL flags of first luma 4x4 block */
chroma_nbr_flags |= (luma_nbr_flags_4x4[0] & 0x0000F);
}
else
{
/* Take TL,T,L flags of Third luma 4x4 block */
chroma_nbr_flags = (luma_nbr_flags_4x4[2] & 0x10FF0);
/* Take BL flags of Third luma 4x4 block */
chroma_nbr_flags |= (luma_nbr_flags_4x4[2] & 0x0000F);
}
}
}
/* Initializing nbr pointers */
pu1_top = pu1_pred_orig - (pic_strd * chroma_pixel_strd / h_samp_factor);
pu1_left = pu1_pred_orig - 2;
pu1_top_left = pu1_pred_orig - (pic_strd * chroma_pixel_strd / h_samp_factor) - 2;
if(subtu_idx == 0)
{
/* Chroma pred mode derivation from luma pred mode */
{
tu_t *ps_tu_tmp = ps_tu;
if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_YUV444)
{
while(!ps_tu_tmp->b1_first_tu_in_cu)
{
ps_tu_tmp--;
}
}
u1_luma_pred_mode_first_tu = ps_tu_tmp->b6_luma_intra_mode;
}
if(4 == u1_chroma_pred_mode)
u1_chroma_pred_mode = u1_luma_pred_mode_first_tu;
else
{
u1_chroma_pred_mode = gau1_intra_pred_chroma_modes[u1_chroma_pred_mode];
if(u1_chroma_pred_mode == u1_luma_pred_mode_first_tu)
{
u1_chroma_pred_mode = INTRA_ANGULAR(34);
}
}
if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422)
{
u1_chroma_pred_mode = gau1_intra_pred_chroma_modes_422[u1_chroma_pred_mode];
}
}
/* call the chroma reference array substitution */
ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr(
pu1_top_left,
pu1_top, pu1_left,
ps_cb_tu->pred_strd,
trans_size, chroma_nbr_flags, pu1_ref_sub_out, 1,
ps_sps->i1_chroma_format_idc);
#ifdef ENABLE_MAIN_REXT_PROFILE
/* call reference filtering */
if(ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV444)
{
ps_codec->s_func_selector.ihevc_intra_pred_chroma_ref_filtering_fptr(
pu1_ref_sub_out,
trans_size,
pu1_ref_sub_out,
u1_chroma_pred_mode,
(ps_sps->i1_intra_smoothing_disabled_flag << 3
| ps_sps->i1_strong_intra_smoothing_enable_flag));
}
#endif
/* use the look up to get the function idx */
chroma_pred_func_idx = g_i4_ip_funcs[u1_chroma_pred_mode];
/* call the intra prediction function */
ps_codec->apf_intra_pred_chroma[chroma_pred_func_idx](pu1_ref_sub_out, 1, pu1_pred_orig, ps_cb_tu->pred_strd, trans_size, u1_chroma_pred_mode);
}
}
/* Updating number of transform types */
STATS_UPDATE_ALL_TRANS(e_trans_type, c_idx);
#ifdef ENABLE_MAIN_REXT_PROFILE
iqitrecon_fptr = get_iqitrec_func(
ps_proc, ps_tu, ps_cb_tu, log2_trans_size,
c_idx != 0 ? U_PLANE : NULL_PLANE, intra_flag,
c_idx == 0 ? u1_luma_pred_mode : u1_chroma_pred_mode);
#endif
/* IQ, IT and Recon for Y if c_idx == 0, and U if c_idx !=0 */
iqitrecon_fptr(ps_proc, ps_tu, ps_cb_tu, func_idx, log2_trans_size,
c_idx != 0 ? U_PLANE : NULL_PLANE, intra_flag,
c_idx == 0 ? u1_luma_pred_mode : u1_chroma_pred_mode);
/* IQ, IT and Recon for V */
if(c_idx != 0)
{
#ifdef ENABLE_MAIN_REXT_PROFILE
iqitrecon_fptr = get_iqitrec_func(ps_proc, ps_tu, ps_cr_tu, log2_trans_size,
V_PLANE, intra_flag, u1_chroma_pred_mode);
#endif
iqitrecon_fptr(ps_proc, ps_tu, ps_cr_tu, func_idx, log2_trans_size, V_PLANE,
intra_flag, u1_chroma_pred_mode);
}
}
while(c_idx != 0 && ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_YUV422
&& ++subtu_idx < 2);
}
/* Neighbor availability inside CTB */
/* 1bit per 4x4. Indicates whether that 4x4 block has been reconstructed(avialable) */
/* Used for neighbor availability in intra pred */
if(c_idx == 0)
{
WORD32 i;
WORD32 trans_in_min_tu;
UWORD32 cur_tu_in_bits;
UWORD32 cur_tu_avail_flag;
trans_in_min_tu = trans_size / MIN_TU_SIZE;
cur_tu_in_bits = (1 << trans_in_min_tu) - 1;
cur_tu_in_bits = cur_tu_in_bits << (32 - trans_in_min_tu);
cur_tu_avail_flag = cur_tu_in_bits >> (ps_tu->b4_pos_x + 1);
for(i = 0; i < trans_in_min_tu; i++)
au4_intra_nbr_avail[1 + ps_tu->b4_pos_y + i] |=
cur_tu_avail_flag;
}
}
}
ps_proc->pv_tu_coeff_data = pu1_tu_coeff_data;
return ps_proc->i4_ctb_tu_cnt;
}