mirror of
https://github.com/ittiam-systems/libhevc.git
synced 2026-04-06 06:10:50 +07:00
encoder: Update chroma modules to avoid reading an extra byte
When processing V plane, an extra byte was being read in some of the neon modules. Modules are now updated to avoid that extra byte read. Bug: 177433559 Bug: 183012467 Test: poc in bug Test: atest CtsMediaTestCases:VideoEncoderTest Test: atest CtsMediaV2TestCases:CodecEncoderTest Test: atest VtsHalMediaC2V1_0TargetVideoEncTest Change-Id: I598c50f727b4d62f19523cbb008482e27de5e3cc
This commit is contained in:
parent
847af0f209
commit
45fb34a5a5
14 changed files with 231 additions and 170 deletions
|
|
@ -66,10 +66,9 @@ UWORD32 ihevc_resi_trans_4x4_neon(
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag)
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane)
|
||||
{
|
||||
WORD32 chroma_flag = dst_strd_chr_flag & 1;
|
||||
WORD32 dst_strd = dst_strd_chr_flag >> 16;
|
||||
UWORD32 sad;
|
||||
uint8x16_t inp_buf, pred_buf;
|
||||
int16x8_t diff_1, diff_2;
|
||||
|
|
@ -86,15 +85,15 @@ UWORD32 ihevc_resi_trans_4x4_neon(
|
|||
uint64x2_t c;
|
||||
|
||||
(void)pi4_temp;
|
||||
if(chroma_flag == 0)
|
||||
if(e_chroma_plane == NULL_PLANE)
|
||||
{
|
||||
inp_buf = load_unaligned_u8q(pu1_src, src_strd);
|
||||
pred_buf = load_unaligned_u8q(pu1_pred, pred_strd);
|
||||
}
|
||||
else
|
||||
{
|
||||
inp_buf = load_unaligned_u8qi(pu1_src, src_strd);
|
||||
pred_buf = load_unaligned_u8qi(pu1_pred, pred_strd);
|
||||
inp_buf = load_unaligned_u8qi(pu1_src + e_chroma_plane, src_strd);
|
||||
pred_buf = load_unaligned_u8qi(pu1_pred + e_chroma_plane, pred_strd);
|
||||
}
|
||||
|
||||
abs = vabdl_u8(vget_low_u8(inp_buf), vget_low_u8(pred_buf));
|
||||
|
|
@ -198,9 +197,11 @@ UWORD32 ihevc_resi_trans_4x4_neon(
|
|||
* @param[in] pred_strd
|
||||
* Prediction Stride
|
||||
*
|
||||
* @param[in] dst_strd_chr_flag
|
||||
* Output Stride and Chroma Flag packed in the MS and LS 16-bit
|
||||
* 0 - luma transform, 1 - chroma transform. Not used for 4x4ttyppe1
|
||||
* @param[in] dst_strd
|
||||
* Output Stride
|
||||
*
|
||||
* @param[in] e_chroma_plane
|
||||
* Enum singalling chroma plane
|
||||
*
|
||||
* @returns block sad
|
||||
*
|
||||
|
|
@ -216,9 +217,9 @@ UWORD32 ihevc_resi_trans_4x4_ttype1_neon(
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag)
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane)
|
||||
{
|
||||
WORD32 dst_strd;
|
||||
UWORD32 sad;
|
||||
int16x4_t src0_4x16b;
|
||||
int16x4_t src1_4x16b;
|
||||
|
|
@ -242,7 +243,7 @@ UWORD32 ihevc_resi_trans_4x4_ttype1_neon(
|
|||
uint16x8_t abs = vabdl_u8(vget_low_u8(src_u8), vget_low_u8(pred_u8));
|
||||
uint32x4_t b;
|
||||
uint64x2_t c;
|
||||
|
||||
UNUSED(e_chroma_plane);
|
||||
abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(pred_u8));
|
||||
b = vpaddlq_u16(abs);
|
||||
c = vpaddlq_u32(b);
|
||||
|
|
@ -251,7 +252,6 @@ UWORD32 ihevc_resi_trans_4x4_ttype1_neon(
|
|||
0);
|
||||
|
||||
(void)pi4_temp;
|
||||
dst_strd = dst_strd_chr_flag >> 16;
|
||||
|
||||
/************************* 4x4 16bit Transpose ***********************/
|
||||
src0_4x16b = vget_low_s16(src_reg0);
|
||||
|
|
@ -379,8 +379,11 @@ UWORD32 ihevc_resi_trans_4x4_ttype1_neon(
|
|||
* @param[in] pred_strd
|
||||
* Prediction Stride
|
||||
*
|
||||
* @param[in] dst_strd_chr_flag
|
||||
* Output Stride and Chroma Flag packed in the MS and LS 16-bit
|
||||
* @param[in] dst_strd
|
||||
* Output Stride
|
||||
*
|
||||
* @param[in] e_chroma_plane
|
||||
* Enum singalling chroma plane
|
||||
*
|
||||
* @returns Void
|
||||
*
|
||||
|
|
@ -396,7 +399,8 @@ UWORD32 ihevc_resi_trans_8x8_neon(
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag)
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane)
|
||||
{
|
||||
int16x8_t diff_16[8];
|
||||
int16x8_t abs = vdupq_n_s16(0);
|
||||
|
|
@ -404,13 +408,11 @@ UWORD32 ihevc_resi_trans_8x8_neon(
|
|||
int64x2_t tmp_b;
|
||||
int32x2_t sad_v;
|
||||
int32x4x2_t a0, a1, a2, a3, a4, a5, a6, a7;
|
||||
int chroma_flag = dst_strd_chr_flag & 1;
|
||||
int dst_strd = dst_strd_chr_flag >> 16;
|
||||
UWORD32 sad;
|
||||
|
||||
(void)pi4_temp;
|
||||
#define RESIDUE(k, is_chroma) \
|
||||
if(!is_chroma) \
|
||||
#define RESIDUE(k) \
|
||||
if(NULL_PLANE == e_chroma_plane) \
|
||||
{ \
|
||||
const uint8x8_t s##k = vld1_u8(pu1_src); \
|
||||
const uint8x8_t p##k = vld1_u8(pu1_pred); \
|
||||
|
|
@ -421,8 +423,8 @@ UWORD32 ihevc_resi_trans_8x8_neon(
|
|||
} \
|
||||
else \
|
||||
{ \
|
||||
const uint8x8_t s##k = vld2_u8(pu1_src).val[0]; \
|
||||
const uint8x8_t p##k = vld2_u8(pu1_pred).val[0]; \
|
||||
const uint8x8_t s##k = vld2_u8(pu1_src).val[e_chroma_plane]; \
|
||||
const uint8x8_t p##k = vld2_u8(pu1_pred).val[e_chroma_plane]; \
|
||||
diff_16[k] = vreinterpretq_s16_u16(vsubl_u8(s##k, p##k)); \
|
||||
pu1_src += src_strd; \
|
||||
pu1_pred += pred_strd; \
|
||||
|
|
@ -430,14 +432,14 @@ UWORD32 ihevc_resi_trans_8x8_neon(
|
|||
}
|
||||
|
||||
// stage 1
|
||||
RESIDUE(0, chroma_flag);
|
||||
RESIDUE(1, chroma_flag);
|
||||
RESIDUE(2, chroma_flag);
|
||||
RESIDUE(3, chroma_flag);
|
||||
RESIDUE(4, chroma_flag);
|
||||
RESIDUE(5, chroma_flag);
|
||||
RESIDUE(6, chroma_flag);
|
||||
RESIDUE(7, chroma_flag);
|
||||
RESIDUE(0);
|
||||
RESIDUE(1);
|
||||
RESIDUE(2);
|
||||
RESIDUE(3);
|
||||
RESIDUE(4);
|
||||
RESIDUE(5);
|
||||
RESIDUE(6);
|
||||
RESIDUE(7);
|
||||
|
||||
tmp_a = vpaddlq_s16(abs);
|
||||
tmp_b = vpaddlq_s32(tmp_a);
|
||||
|
|
@ -792,11 +794,12 @@ UWORD32 ihevc_resi_trans_8x8_neon(
|
|||
return sad;
|
||||
}
|
||||
|
||||
static INLINE void load(const uint8_t *a, int stride, uint8x8_t *b, int is_chroma)
|
||||
static INLINE void load(const uint8_t *a, int stride, uint8x8_t *b,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane)
|
||||
{
|
||||
int i;
|
||||
|
||||
if(is_chroma == 0)
|
||||
if(e_chroma_plane == NULL_PLANE)
|
||||
{
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
|
|
@ -808,7 +811,7 @@ static INLINE void load(const uint8_t *a, int stride, uint8x8_t *b, int is_chrom
|
|||
{
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
b[i] = vld2_u8(a).val[0];
|
||||
b[i] = vld2_u8(a).val[e_chroma_plane];
|
||||
a += stride;
|
||||
}
|
||||
}
|
||||
|
|
@ -1261,8 +1264,11 @@ static void dct_body_32_32(int32x4x2_t *in /*[16]*/, int32x4x2_t *out /*[16]*/)
|
|||
* @param[in] pred_strd
|
||||
* Prediction Stride
|
||||
*
|
||||
* @param[in] dst_strd_chr_flag
|
||||
* Output Stride and Chroma Flag packed in the MS and LS 16-bit
|
||||
* @param[in] dst_strd
|
||||
* Output Stride
|
||||
*
|
||||
* @param[in] e_chroma_plane
|
||||
* Enum singalling chroma plane
|
||||
*
|
||||
* @returns Void
|
||||
*
|
||||
|
|
@ -1278,12 +1284,11 @@ UWORD32 ihevc_resi_trans_16x16_neon(
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag)
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane)
|
||||
{
|
||||
UWORD32 u4_blk_sad = 0;
|
||||
WORD32 chroma_flag;
|
||||
WORD32 dst_strd;
|
||||
|
||||
uint8x8_t temp0[16], temp1[16];
|
||||
int16x8_t temp2[16], temp3[16];
|
||||
int32x4_t tmp_a, tmp_b;
|
||||
|
|
@ -1292,21 +1297,19 @@ UWORD32 ihevc_resi_trans_16x16_neon(
|
|||
int32x4x2_t out0[16], out1[16], temp4[16], temp5[16];
|
||||
|
||||
(void)pi4_temp;
|
||||
chroma_flag = dst_strd_chr_flag & 1;
|
||||
dst_strd = dst_strd_chr_flag >> 16;
|
||||
|
||||
chroma_flag = e_chroma_plane != NULL_PLANE;
|
||||
/* Residue + Forward Transform 1st stage */
|
||||
// Left half.
|
||||
load(pu1_src, src_strd, temp0, chroma_flag);
|
||||
load(pu1_pred, pred_strd, temp1, chroma_flag);
|
||||
load(pu1_src, src_strd, temp0, e_chroma_plane);
|
||||
load(pu1_pred, pred_strd, temp1, e_chroma_plane);
|
||||
|
||||
tmp_a = diff(temp0, temp1, temp2);
|
||||
cross_input_16(temp2, temp3);
|
||||
dct_body_16_32(temp3, out0);
|
||||
|
||||
// Right half.
|
||||
load(pu1_src + 8 * (1 + chroma_flag), src_strd, temp0, chroma_flag);
|
||||
load(pu1_pred + 8 * (1 + chroma_flag), pred_strd, temp1, chroma_flag);
|
||||
load(pu1_src + 8 * (1 + chroma_flag), src_strd, temp0, e_chroma_plane);
|
||||
load(pu1_pred + 8 * (1 + chroma_flag), pred_strd, temp1, e_chroma_plane);
|
||||
|
||||
tmp_b = diff(temp0, temp1, temp2);
|
||||
cross_input_16(temp2, temp3);
|
||||
|
|
|
|||
|
|
@ -86,8 +86,11 @@
|
|||
* @param[in] pred_strd
|
||||
* Prediction Stride
|
||||
*
|
||||
* @param[in] dst_strd_chr_flag
|
||||
* Output Stride and Chroma Flag packed in the MS and LS 16-bit
|
||||
* @param[in] dst_strd
|
||||
* Output Stride
|
||||
*
|
||||
* @param[in] e_chroma_plane
|
||||
* Enum singalling chroma plane
|
||||
*
|
||||
* @returns Void
|
||||
*
|
||||
|
|
@ -98,18 +101,17 @@
|
|||
*/
|
||||
UWORD32 ihevc_resi_trans_32x32_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred,
|
||||
WORD32 *pi4_temp, WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag)
|
||||
WORD32 dst_strd, CHROMA_PLANE_ID_T e_chroma_plane)
|
||||
{
|
||||
int16x8_t diff_16[4][2];
|
||||
WORD32 i;
|
||||
int32x2_t sad;
|
||||
int64x2_t tmp_a;
|
||||
UWORD32 u4_blk_sad = 0;
|
||||
WORD32 dst_strd = dst_strd_chr_flag >> 16;
|
||||
WORD32 *pi4_temp_orig = pi4_temp;
|
||||
int16x8_t abs = vdupq_n_s16(0);
|
||||
int32x4_t sum_val = vdupq_n_s32(0);
|
||||
|
||||
UNUSED(e_chroma_plane);
|
||||
|
||||
// Stage 1
|
||||
for(i = 0; i < 16; i++)
|
||||
|
|
|
|||
|
|
@ -133,6 +133,13 @@ enum
|
|||
CHROMA_FMT_IDC_YUV444_PLANES = 4,
|
||||
};
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NULL_PLANE = -1,
|
||||
U_PLANE = 0,
|
||||
V_PLANE = 1
|
||||
} CHROMA_PLANE_ID_T;
|
||||
|
||||
/* Pred Modes */
|
||||
/* Do not change enum values */
|
||||
enum
|
||||
|
|
|
|||
|
|
@ -81,8 +81,11 @@
|
|||
* @param[in] pred_strd
|
||||
* Prediction Stride
|
||||
*
|
||||
* @param[in] dst_strd_chr_flag
|
||||
* Output Stride and Chroma Flag packed in the MS and LS 16-bit
|
||||
* @param[in] dst_strd
|
||||
* Output Stride
|
||||
*
|
||||
* @param[in] e_chroma_plane
|
||||
* Enum singalling chroma plane
|
||||
*
|
||||
*
|
||||
* @returns Void
|
||||
|
|
@ -99,7 +102,8 @@ UWORD32 ihevc_resi_trans_4x4_ttype1(UWORD8 *pu1_src,
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag)
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane)
|
||||
{
|
||||
WORD32 i, c[4];
|
||||
WORD32 add, shift;
|
||||
|
|
@ -107,11 +111,7 @@ UWORD32 ihevc_resi_trans_4x4_ttype1(UWORD8 *pu1_src,
|
|||
WORD32 *pi4_tmp_orig;
|
||||
WORD16 *pi2_dst_orig;
|
||||
UWORD32 u4_blk_sad = 0;
|
||||
// WORD32 chroma_flag;
|
||||
WORD32 dst_strd;
|
||||
|
||||
// chroma_flag = dst_strd_chr_flag & 1;
|
||||
dst_strd = dst_strd_chr_flag >> 16;
|
||||
UNUSED(e_chroma_plane);
|
||||
|
||||
pi2_dst_orig = pi2_dst;
|
||||
pi4_tmp_orig = pi4_temp;
|
||||
|
|
@ -216,8 +216,11 @@ UWORD32 ihevc_resi_trans_4x4_ttype1(UWORD8 *pu1_src,
|
|||
* @param[in] pred_strd
|
||||
* Prediction Stride
|
||||
*
|
||||
* @param[in] dst_strd_chr_flag
|
||||
* Output Stride and Chroma Flag packed in the MS and LS 16-bit
|
||||
* @param[in] dst_strd
|
||||
* Output Stride
|
||||
*
|
||||
* @param[in] e_chroma_plane
|
||||
* Enum singalling chroma plane
|
||||
*
|
||||
* @returns Void
|
||||
*
|
||||
|
|
@ -233,7 +236,8 @@ UWORD32 ihevc_resi_trans_4x4(UWORD8 *pu1_src,
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag)
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane)
|
||||
{
|
||||
WORD32 i;
|
||||
WORD32 e[2], o[2];
|
||||
|
|
@ -242,11 +246,14 @@ UWORD32 ihevc_resi_trans_4x4(UWORD8 *pu1_src,
|
|||
WORD32 *pi4_tmp_orig;
|
||||
WORD16 *pi2_dst_orig;
|
||||
UWORD32 u4_blk_sad=0;
|
||||
WORD32 chroma_flag;
|
||||
WORD32 dst_strd;
|
||||
WORD32 chroma_flag = 0;
|
||||
|
||||
chroma_flag = dst_strd_chr_flag & 1;
|
||||
dst_strd = dst_strd_chr_flag >> 16;
|
||||
if (e_chroma_plane != NULL_PLANE)
|
||||
{
|
||||
chroma_flag = 1;
|
||||
pu1_src += e_chroma_plane;
|
||||
pu1_pred += e_chroma_plane;
|
||||
}
|
||||
|
||||
pi2_dst_orig = pi2_dst;
|
||||
pi4_tmp_orig = pi4_temp;
|
||||
|
|
@ -427,8 +434,11 @@ void ihevc_resi_trans_4x4_16bit(WORD16 *pi2_src,
|
|||
* @param[in] pred_strd
|
||||
* Prediction Stride
|
||||
*
|
||||
* @param[in] dst_strd_chr_flag
|
||||
* Output Stride and Chroma Flag packed in the MS and LS 16-bit
|
||||
* @param[in] dst_strd
|
||||
* Output Stride
|
||||
*
|
||||
* @param[in] e_chroma_plane
|
||||
* Enum singalling chroma plane
|
||||
*
|
||||
* @returns Void
|
||||
*
|
||||
|
|
@ -444,7 +454,8 @@ UWORD32 ihevc_resi_trans_8x8(UWORD8 *pu1_src,
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag)
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane)
|
||||
{
|
||||
WORD32 i, k;
|
||||
WORD32 e[4], o[4];
|
||||
|
|
@ -455,11 +466,14 @@ UWORD32 ihevc_resi_trans_8x8(UWORD8 *pu1_src,
|
|||
// WORD16 *pi2_tmp;
|
||||
WORD16 *pi2_dst_orig;
|
||||
UWORD32 u4_blk_sad=0;
|
||||
WORD32 chroma_flag;
|
||||
WORD32 dst_strd;
|
||||
WORD32 chroma_flag = 0;
|
||||
|
||||
chroma_flag = dst_strd_chr_flag & 1;
|
||||
dst_strd = dst_strd_chr_flag >> 16;
|
||||
if (e_chroma_plane != NULL_PLANE)
|
||||
{
|
||||
chroma_flag = 1;
|
||||
pu1_src += e_chroma_plane;
|
||||
pu1_pred += e_chroma_plane;
|
||||
}
|
||||
|
||||
pi2_dst_orig = pi2_dst;
|
||||
pi4_tmp_orig = pi4_temp;
|
||||
|
|
@ -724,8 +738,11 @@ void ihevc_resi_trans_8x8_16bit(WORD16 *pi2_src,
|
|||
* @param[in] pred_strd
|
||||
* Prediction Stride
|
||||
*
|
||||
* @param[in] dst_strd_chr_flag
|
||||
* Output Stride and Chroma Flag packed in the MS and LS 16-bit
|
||||
* @param[in] dst_strd
|
||||
* Output Stride
|
||||
*
|
||||
* @param[in] e_chroma_plane
|
||||
* Enum singalling chroma plane
|
||||
*
|
||||
* @returns Void
|
||||
*
|
||||
|
|
@ -741,7 +758,8 @@ UWORD32 ihevc_resi_trans_16x16(UWORD8 *pu1_src,
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag)
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane)
|
||||
{
|
||||
WORD32 i, k;
|
||||
WORD32 e[8], o[8];
|
||||
|
|
@ -752,11 +770,14 @@ UWORD32 ihevc_resi_trans_16x16(UWORD8 *pu1_src,
|
|||
WORD32 *pi4_tmp_orig;
|
||||
WORD16 *pi2_dst_orig;
|
||||
UWORD32 u4_blk_sad = 0;
|
||||
WORD32 chroma_flag;
|
||||
WORD32 dst_strd;
|
||||
WORD32 chroma_flag = 0;
|
||||
|
||||
chroma_flag = dst_strd_chr_flag & 1;
|
||||
dst_strd = dst_strd_chr_flag >> 16;
|
||||
if (e_chroma_plane != NULL_PLANE)
|
||||
{
|
||||
chroma_flag = 1;
|
||||
pu1_src += e_chroma_plane;
|
||||
pu1_pred += e_chroma_plane;
|
||||
}
|
||||
|
||||
pi2_dst_orig = pi2_dst;
|
||||
pi4_tmp_orig = pi4_temp;
|
||||
|
|
@ -1056,8 +1077,11 @@ void ihevc_resi_trans_16x16_16bit(WORD16 *pi2_src,
|
|||
* @param[in] pred_strd
|
||||
* Prediction Stride
|
||||
*
|
||||
* @param[in] dst_strd_chr_flag
|
||||
* Output Stride and Chroma Flag packed in the MS and LS 16-bit
|
||||
* @param[in] dst_strd
|
||||
* Output Stride
|
||||
*
|
||||
* @param[in] e_chroma_plane
|
||||
* Enum singalling chroma plane
|
||||
*
|
||||
* @returns Void
|
||||
*
|
||||
|
|
@ -1073,7 +1097,8 @@ UWORD32 ihevc_resi_trans_32x32(UWORD8 *pu1_src,
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag)
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane)
|
||||
{
|
||||
WORD32 i, k;
|
||||
WORD32 e[16], o[16];
|
||||
|
|
@ -1085,11 +1110,7 @@ UWORD32 ihevc_resi_trans_32x32(UWORD8 *pu1_src,
|
|||
WORD32 *pi4_tmp_orig;
|
||||
WORD16 *pi2_dst_orig;
|
||||
UWORD32 u4_blk_sad = 0 ;
|
||||
WORD32 chroma_flag;
|
||||
WORD32 dst_strd;
|
||||
|
||||
chroma_flag = dst_strd_chr_flag & 1;
|
||||
dst_strd = dst_strd_chr_flag >> 16;
|
||||
UNUSED(e_chroma_plane);
|
||||
|
||||
pi2_dst_orig = pi2_dst;
|
||||
pi4_tmp_orig = pi4_temp;
|
||||
|
|
|
|||
|
|
@ -42,7 +42,8 @@ typedef UWORD32 ihevc_resi_trans_4x4_ttype1_ft(UWORD8 *pu1_src,
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag);
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane);
|
||||
|
||||
typedef UWORD32 ihevc_hbd_resi_trans_4x4_ttype1_ft(UWORD16 *pu2_src,
|
||||
UWORD16 *pu2_pred,
|
||||
|
|
@ -50,7 +51,8 @@ typedef UWORD32 ihevc_hbd_resi_trans_4x4_ttype1_ft(UWORD16 *pu2_src,
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag,
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane,
|
||||
UWORD8 bit_depth);
|
||||
|
||||
typedef UWORD32 ihevc_resi_trans_4x4_ft(UWORD8 *pu1_src,
|
||||
|
|
@ -59,7 +61,8 @@ typedef UWORD32 ihevc_resi_trans_4x4_ft(UWORD8 *pu1_src,
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag);
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane);
|
||||
|
||||
typedef UWORD32 ihevc_hbd_resi_trans_4x4_ft
|
||||
(
|
||||
|
|
@ -79,7 +82,8 @@ typedef UWORD32 ihevc_resi_trans_8x8_ft(UWORD8 *pu1_src,
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag);
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane);
|
||||
|
||||
typedef UWORD32 ihevc_hbd_resi_trans_8x8_ft
|
||||
(
|
||||
|
|
@ -100,7 +104,8 @@ typedef UWORD32 ihevc_resi_trans_16x16_ft(UWORD8 *pu1_src,
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag);
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane);
|
||||
|
||||
typedef UWORD32 ihevc_hbd_resi_trans_16x16_ft(UWORD16 *pu2_src,
|
||||
UWORD16 *pu2_pred,
|
||||
|
|
@ -108,7 +113,8 @@ typedef UWORD32 ihevc_hbd_resi_trans_16x16_ft(UWORD16 *pu2_src,
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag,
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane,
|
||||
UWORD8 bit_depth);
|
||||
|
||||
typedef UWORD32 ihevc_resi_trans_32x32_ft(UWORD8 *pu1_src,
|
||||
|
|
@ -117,7 +123,8 @@ typedef UWORD32 ihevc_resi_trans_32x32_ft(UWORD8 *pu1_src,
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag);
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane);
|
||||
|
||||
typedef UWORD32 ihevc_hbd_resi_trans_32x32_ft(UWORD16 *pu2_src,
|
||||
UWORD16 *pu2_pred,
|
||||
|
|
@ -125,7 +132,8 @@ typedef UWORD32 ihevc_hbd_resi_trans_32x32_ft(UWORD16 *pu2_src,
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag,
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane,
|
||||
UWORD8 bit_depth);
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -54,21 +54,22 @@
|
|||
/* Function Definitions */
|
||||
/*****************************************************************************/
|
||||
static INLINE uint32x4_t ihevce_4x4_ssd_computer_neon(
|
||||
UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 src_strd, WORD32 pred_strd, WORD32 is_chroma)
|
||||
UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 src_strd, WORD32 pred_strd,
|
||||
CHROMA_PLANE_ID_T chroma_plane)
|
||||
{
|
||||
uint32x4_t ssd_low, ssd_high;
|
||||
uint8x16_t src, pred, abs;
|
||||
uint16x8_t sqabs_low, sqabs_high;
|
||||
|
||||
if(!is_chroma)
|
||||
if(chroma_plane == NULL_PLANE)
|
||||
{
|
||||
src = load_unaligned_u8q(pu1_src, src_strd);
|
||||
pred = load_unaligned_u8q(pu1_pred, pred_strd);
|
||||
}
|
||||
else
|
||||
{
|
||||
src = load_unaligned_u8qi(pu1_src, src_strd);
|
||||
pred = load_unaligned_u8qi(pu1_pred, pred_strd);
|
||||
src = load_unaligned_u8qi(pu1_src + chroma_plane, src_strd);
|
||||
pred = load_unaligned_u8qi(pu1_pred + chroma_plane, pred_strd);
|
||||
}
|
||||
abs = vabdq_u8(src, pred);
|
||||
sqabs_low = vmull_u8(vget_low_u8(abs), vget_low_u8(abs));
|
||||
|
|
@ -80,21 +81,22 @@ static INLINE uint32x4_t ihevce_4x4_ssd_computer_neon(
|
|||
}
|
||||
|
||||
static INLINE uint32x4_t
|
||||
ihevce_1x8_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 is_chroma)
|
||||
ihevce_1x8_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred,
|
||||
CHROMA_PLANE_ID_T chroma_plane)
|
||||
{
|
||||
uint32x4_t ssd_val;
|
||||
uint8x8_t src, pred, abs;
|
||||
uint16x8_t sqabs;
|
||||
|
||||
if(!is_chroma)
|
||||
if(chroma_plane == NULL_PLANE)
|
||||
{
|
||||
src = vld1_u8(pu1_src);
|
||||
pred = vld1_u8(pu1_pred);
|
||||
}
|
||||
else
|
||||
{
|
||||
src = vld2_u8(pu1_src).val[0];
|
||||
pred = vld2_u8(pu1_pred).val[0];
|
||||
src = vld2_u8(pu1_src).val[chroma_plane];
|
||||
pred = vld2_u8(pu1_pred).val[chroma_plane];
|
||||
}
|
||||
abs = vabd_u8(src, pred);
|
||||
sqabs = vmull_u8(abs, abs);
|
||||
|
|
@ -104,21 +106,22 @@ static INLINE uint32x4_t
|
|||
}
|
||||
|
||||
static INLINE uint32x4_t
|
||||
ihevce_1x16_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 is_chroma)
|
||||
ihevce_1x16_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred,
|
||||
CHROMA_PLANE_ID_T chroma_plane)
|
||||
{
|
||||
uint32x4_t ssd_low, ssd_high;
|
||||
uint8x16_t src, pred, abs;
|
||||
uint16x8_t sqabs_low, sqabs_high;
|
||||
|
||||
if(!is_chroma)
|
||||
if(chroma_plane == NULL_PLANE)
|
||||
{
|
||||
src = vld1q_u8(pu1_src);
|
||||
pred = vld1q_u8(pu1_pred);
|
||||
}
|
||||
else
|
||||
{
|
||||
src = vld2q_u8(pu1_src).val[0];
|
||||
pred = vld2q_u8(pu1_pred).val[0];
|
||||
src = vld2q_u8(pu1_src).val[chroma_plane];
|
||||
pred = vld2q_u8(pu1_pred).val[chroma_plane];
|
||||
}
|
||||
abs = vabdq_u8(src, pred);
|
||||
sqabs_low = vmull_u8(vget_low_u8(abs), vget_low_u8(abs));
|
||||
|
|
@ -130,13 +133,14 @@ static INLINE uint32x4_t
|
|||
}
|
||||
|
||||
static INLINE uint32x4_t
|
||||
ihevce_1x32_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 is_chroma)
|
||||
ihevce_1x32_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred,
|
||||
CHROMA_PLANE_ID_T chroma_plane)
|
||||
{
|
||||
uint32x4_t ssd_0, ssd_1, ssd_2, ssd_3;
|
||||
uint8x16_t src_0, pred_0, src_1, pred_1, abs_0, abs_1;
|
||||
uint16x8_t sqabs_0, sqabs_1, sqabs_2, sqabs_3;
|
||||
|
||||
if(!is_chroma)
|
||||
if(chroma_plane == NULL_PLANE)
|
||||
{
|
||||
src_0 = vld1q_u8(pu1_src);
|
||||
pred_0 = vld1q_u8(pu1_pred);
|
||||
|
|
@ -145,10 +149,10 @@ static INLINE uint32x4_t
|
|||
}
|
||||
else
|
||||
{
|
||||
src_0 = vld2q_u8(pu1_src).val[0];
|
||||
pred_0 = vld2q_u8(pu1_pred).val[0];
|
||||
src_1 = vld2q_u8(pu1_src + 32).val[0];
|
||||
pred_1 = vld2q_u8(pu1_pred + 32).val[0];
|
||||
src_0 = vld2q_u8(pu1_src).val[chroma_plane];
|
||||
pred_0 = vld2q_u8(pu1_pred).val[chroma_plane];
|
||||
src_1 = vld2q_u8(pu1_src + 32).val[chroma_plane];
|
||||
pred_1 = vld2q_u8(pu1_pred + 32).val[chroma_plane];
|
||||
}
|
||||
abs_0 = vabdq_u8(src_0, pred_0);
|
||||
abs_1 = vabdq_u8(src_1, pred_1);
|
||||
|
|
@ -167,7 +171,8 @@ static INLINE uint32x4_t
|
|||
}
|
||||
|
||||
static INLINE uint32x4_t
|
||||
ihevce_1x64_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 is_chroma)
|
||||
ihevce_1x64_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred,
|
||||
CHROMA_PLANE_ID_T chroma_plane)
|
||||
{
|
||||
uint32x4_t ssd_0, ssd_1, ssd_2, ssd_3;
|
||||
uint32x4_t ssd_4, ssd_5, ssd_6, ssd_7;
|
||||
|
|
@ -177,7 +182,7 @@ static INLINE uint32x4_t
|
|||
uint16x8_t sqabs_0, sqabs_1, sqabs_2, sqabs_3;
|
||||
uint16x8_t sqabs_4, sqabs_5, sqabs_6, sqabs_7;
|
||||
|
||||
if(!is_chroma)
|
||||
if(chroma_plane == NULL_PLANE)
|
||||
{
|
||||
src_0 = vld1q_u8(pu1_src);
|
||||
pred_0 = vld1q_u8(pu1_pred);
|
||||
|
|
@ -190,14 +195,14 @@ static INLINE uint32x4_t
|
|||
}
|
||||
else
|
||||
{
|
||||
src_0 = vld2q_u8(pu1_src).val[0];
|
||||
pred_0 = vld2q_u8(pu1_pred).val[0];
|
||||
src_1 = vld2q_u8(pu1_src + 32).val[0];
|
||||
pred_1 = vld2q_u8(pu1_pred + 32).val[0];
|
||||
src_2 = vld2q_u8(pu1_src + 64).val[0];
|
||||
pred_2 = vld2q_u8(pu1_pred + 64).val[0];
|
||||
src_3 = vld2q_u8(pu1_src + 96).val[0];
|
||||
pred_3 = vld2q_u8(pu1_pred + 96).val[0];
|
||||
src_0 = vld2q_u8(pu1_src).val[chroma_plane];
|
||||
pred_0 = vld2q_u8(pu1_pred).val[chroma_plane];
|
||||
src_1 = vld2q_u8(pu1_src + 32).val[chroma_plane];
|
||||
pred_1 = vld2q_u8(pu1_pred + 32).val[chroma_plane];
|
||||
src_2 = vld2q_u8(pu1_src + 64).val[chroma_plane];
|
||||
pred_2 = vld2q_u8(pu1_pred + 64).val[chroma_plane];
|
||||
src_3 = vld2q_u8(pu1_src + 96).val[chroma_plane];
|
||||
pred_3 = vld2q_u8(pu1_pred + 96).val[chroma_plane];
|
||||
}
|
||||
abs_0 = vabdq_u8(src_0, pred_0);
|
||||
abs_1 = vabdq_u8(src_1, pred_1);
|
||||
|
|
@ -236,7 +241,7 @@ static LWORD64 ihevce_ssd_calculator_plane_neon(
|
|||
UWORD32 ref_stride,
|
||||
UWORD32 wd,
|
||||
UWORD32 ht,
|
||||
WORD32 is_chroma)
|
||||
CHROMA_PLANE_ID_T chroma_plane)
|
||||
{
|
||||
uint32x4_t ssd = vdupq_n_u32(0);
|
||||
uint32x2_t sum;
|
||||
|
|
@ -248,13 +253,13 @@ static LWORD64 ihevce_ssd_calculator_plane_neon(
|
|||
for(row = ht; row > 0; row--)
|
||||
{
|
||||
if(wd == 8)
|
||||
ssd = vaddq_u32(ssd, ihevce_1x8_ssd_computer_neon(pu1_inp, pu1_ref, is_chroma));
|
||||
ssd = vaddq_u32(ssd, ihevce_1x8_ssd_computer_neon(pu1_inp, pu1_ref, chroma_plane));
|
||||
else if(wd == 16)
|
||||
ssd = vaddq_u32(ssd, ihevce_1x16_ssd_computer_neon(pu1_inp, pu1_ref, is_chroma));
|
||||
ssd = vaddq_u32(ssd, ihevce_1x16_ssd_computer_neon(pu1_inp, pu1_ref, chroma_plane));
|
||||
else if(wd == 32)
|
||||
ssd = vaddq_u32(ssd, ihevce_1x32_ssd_computer_neon(pu1_inp, pu1_ref, is_chroma));
|
||||
ssd = vaddq_u32(ssd, ihevce_1x32_ssd_computer_neon(pu1_inp, pu1_ref, chroma_plane));
|
||||
else if(wd == 64)
|
||||
ssd = vaddq_u32(ssd, ihevce_1x64_ssd_computer_neon(pu1_inp, pu1_ref, is_chroma));
|
||||
ssd = vaddq_u32(ssd, ihevce_1x64_ssd_computer_neon(pu1_inp, pu1_ref, chroma_plane));
|
||||
else if(wd % 8 == 0)
|
||||
{
|
||||
UWORD32 col;
|
||||
|
|
@ -262,7 +267,7 @@ static LWORD64 ihevce_ssd_calculator_plane_neon(
|
|||
|
||||
for(col = 0; col < wd; col += 8)
|
||||
{
|
||||
ssd = vaddq_u32(ssd, ihevce_1x8_ssd_computer_neon(inp, ref, is_chroma));
|
||||
ssd = vaddq_u32(ssd, ihevce_1x8_ssd_computer_neon(inp, ref, chroma_plane));
|
||||
ref = ref + 8;
|
||||
inp = inp + 8;
|
||||
}
|
||||
|
|
@ -275,7 +280,7 @@ static LWORD64 ihevce_ssd_calculator_plane_neon(
|
|||
else if(wd == 4)
|
||||
{
|
||||
assert(ht == 4);
|
||||
ssd = ihevce_4x4_ssd_computer_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, is_chroma);
|
||||
ssd = ihevce_4x4_ssd_computer_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, chroma_plane);
|
||||
}
|
||||
|
||||
sum = vadd_u32(vget_low_u32(ssd), vget_high_u32(ssd));
|
||||
|
|
@ -283,13 +288,17 @@ static LWORD64 ihevce_ssd_calculator_plane_neon(
|
|||
}
|
||||
|
||||
LWORD64 ihevce_ssd_calculator_neon(
|
||||
UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, UWORD32 ht)
|
||||
UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd,
|
||||
UWORD32 ht, CHROMA_PLANE_ID_T chroma_plane)
|
||||
{
|
||||
return ihevce_ssd_calculator_plane_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, wd, ht, 0);
|
||||
return ihevce_ssd_calculator_plane_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, wd, ht,
|
||||
chroma_plane);
|
||||
}
|
||||
|
||||
LWORD64 ihevce_chroma_interleave_ssd_calculator_neon(
|
||||
UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, UWORD32 ht)
|
||||
UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd,
|
||||
UWORD32 ht, CHROMA_PLANE_ID_T chroma_plane)
|
||||
{
|
||||
return ihevce_ssd_calculator_plane_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, wd, ht, 1);
|
||||
return ihevce_ssd_calculator_plane_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, wd, ht,
|
||||
chroma_plane);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@
|
|||
#define __IHEVCE_CMN_UTILS_INSTR_SET_ROUTER_H_
|
||||
|
||||
#include "ihevc_typedefs.h"
|
||||
#include "ihevc_defs.h"
|
||||
#include "ihevce_defs.h"
|
||||
|
||||
/*****************************************************************************/
|
||||
|
|
@ -47,7 +48,8 @@
|
|||
/*****************************************************************************/
|
||||
typedef UWORD32 FT_CALC_HAD_SATD_8BIT(UWORD8 *, WORD32, UWORD8 *, WORD32, WORD16 *, WORD32);
|
||||
|
||||
typedef LWORD64 FT_SSD_CALCULATOR(UWORD8 *, UWORD8 *, UWORD32, UWORD32, UWORD32, UWORD32);
|
||||
typedef LWORD64 FT_SSD_CALCULATOR(
|
||||
UWORD8 *, UWORD8 *, UWORD32, UWORD32, UWORD32, UWORD32, CHROMA_PLANE_ID_T);
|
||||
|
||||
typedef LWORD64 FT_SSD_AND_SAD_CALCULATOR(UWORD8 *, WORD32, UWORD8 *, WORD32, WORD32, UWORD32 *);
|
||||
|
||||
|
|
|
|||
|
|
@ -858,11 +858,12 @@ WORD32 ihevce_osal_delete(void *pv_hle_ctxt)
|
|||
*******************************************************************************
|
||||
*/
|
||||
LWORD64 ihevce_ssd_calculator(
|
||||
UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, UWORD32 ht)
|
||||
UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd,
|
||||
UWORD32 ht, CHROMA_PLANE_ID_T chroma_plane)
|
||||
{
|
||||
UWORD32 i, j;
|
||||
LWORD64 ssd = 0;
|
||||
|
||||
UNUSED(chroma_plane);
|
||||
for(i = 0; i < ht; i++)
|
||||
{
|
||||
for(j = 0; j < wd; j++)
|
||||
|
|
@ -910,10 +911,13 @@ LWORD64 ihevce_ssd_calculator(
|
|||
*******************************************************************************
|
||||
*/
|
||||
LWORD64 ihevce_chroma_interleave_ssd_calculator(
|
||||
UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, UWORD32 ht)
|
||||
UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd,
|
||||
UWORD32 ht, CHROMA_PLANE_ID_T chroma_plane)
|
||||
{
|
||||
UWORD32 i, j;
|
||||
LWORD64 ssd = 0;
|
||||
pu1_inp += chroma_plane;
|
||||
pu1_ref += chroma_plane;
|
||||
|
||||
/* run a loop and find the ssd by doing diff followed by square */
|
||||
for(i = 0; i < ht; i++)
|
||||
|
|
|
|||
|
|
@ -966,13 +966,6 @@ typedef enum
|
|||
|
||||
} REF_LISTS_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NULL_PLANE = -1,
|
||||
U_PLANE = 0,
|
||||
V_PLANE = 1
|
||||
} CHROMA_PLANE_ID_T;
|
||||
|
||||
typedef enum SSD_TYPE_T
|
||||
{
|
||||
NULL_TYPE = -1,
|
||||
|
|
|
|||
|
|
@ -215,7 +215,8 @@ typedef UWORD32 (*pf_res_trans_chroma)(
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag);
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane);
|
||||
|
||||
/** \breif function pointer prototype for quantization and inv Quant for ssd
|
||||
calc. for all transform sizes */
|
||||
|
|
|
|||
|
|
@ -2188,7 +2188,8 @@ WORD32 ihevce_t_q_iq_ssd_scan_fxn(
|
|||
pi2_trans_values,
|
||||
src_strd,
|
||||
pred_strd,
|
||||
((trans_size << 16) + 0)); /* dst strd and chroma flag are packed together */
|
||||
trans_size,
|
||||
NULL_PLANE);
|
||||
|
||||
cbf = ps_ctxt->apf_quant_iquant_ssd
|
||||
[i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2](
|
||||
|
|
@ -2297,7 +2298,7 @@ WORD32 ihevce_t_q_iq_ssd_scan_fxn(
|
|||
zero_cbf_cost =
|
||||
|
||||
ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
|
||||
pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size);
|
||||
pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size, NULL_PLANE);
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
|
|
@ -7601,7 +7602,8 @@ LWORD64 ihevce_chroma_cu_prcs_rdopt(
|
|||
pred_strd,
|
||||
chrm_src_stride,
|
||||
trans_size,
|
||||
trans_size);
|
||||
trans_size,
|
||||
U_PLANE);
|
||||
|
||||
if(u1_compute_spatial_ssd)
|
||||
{
|
||||
|
|
@ -7861,12 +7863,13 @@ LWORD64 ihevce_chroma_cu_prcs_rdopt(
|
|||
curr_cr_cod_cost = trans_ssd_v =
|
||||
|
||||
ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
|
||||
pu1_cur_pred + 1,
|
||||
pu1_cur_src + 1,
|
||||
pu1_cur_pred,
|
||||
pu1_cur_src,
|
||||
pred_strd,
|
||||
chrm_src_stride,
|
||||
trans_size,
|
||||
trans_size);
|
||||
trans_size,
|
||||
V_PLANE);
|
||||
|
||||
if(u1_compute_spatial_ssd)
|
||||
{
|
||||
|
|
@ -10487,7 +10490,8 @@ LWORD64 ihevce_it_recon_ssd(
|
|||
i4_zero_row);
|
||||
|
||||
return ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
|
||||
pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size);
|
||||
pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size,
|
||||
e_chroma_plane);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -10507,12 +10511,13 @@ LWORD64 ihevce_it_recon_ssd(
|
|||
e_chroma_plane);
|
||||
|
||||
return ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
|
||||
pu1_recon + (e_chroma_plane == V_PLANE),
|
||||
pu1_src + (e_chroma_plane == V_PLANE),
|
||||
pu1_recon,
|
||||
pu1_src,
|
||||
i4_recon_stride,
|
||||
i4_src_strd,
|
||||
u1_trans_size,
|
||||
u1_trans_size);
|
||||
u1_trans_size,
|
||||
e_chroma_plane);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -10628,12 +10633,13 @@ WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn(
|
|||
if(u1_is_skip)
|
||||
{
|
||||
pi8_cost[0] = ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
|
||||
pu1_pred + e_chroma_plane,
|
||||
pu1_src + e_chroma_plane,
|
||||
pu1_pred,
|
||||
pu1_src,
|
||||
pred_strd,
|
||||
src_strd,
|
||||
trans_size,
|
||||
trans_size);
|
||||
trans_size,
|
||||
e_chroma_plane);
|
||||
|
||||
if(e_ssd_type == SPATIAL_DOMAIN_SSD)
|
||||
{
|
||||
|
|
@ -10735,13 +10741,14 @@ WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn(
|
|||
|
||||
/* ---------- call residue and transform block ------- */
|
||||
u4_blk_sad = ps_ctxt->apf_chrm_resd_trns[trans_idx - 1](
|
||||
pu1_src + (e_chroma_plane == V_PLANE),
|
||||
pu1_pred + (e_chroma_plane == V_PLANE),
|
||||
pu1_src,
|
||||
pu1_pred,
|
||||
pi4_trans_scratch,
|
||||
pi2_trans_values,
|
||||
src_strd,
|
||||
pred_strd,
|
||||
((trans_size << 16) + 1)); /* dst strd and chroma flag are packed together */
|
||||
trans_size,
|
||||
e_chroma_plane);
|
||||
(void)u4_blk_sad;
|
||||
/* -------- calculate SSD calculation in Transform Domain ------ */
|
||||
|
||||
|
|
@ -10855,12 +10862,13 @@ WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn(
|
|||
zero_cbf_cost_u =
|
||||
|
||||
ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
|
||||
pu1_pred + (e_chroma_plane == V_PLANE),
|
||||
pu1_src + (e_chroma_plane == V_PLANE),
|
||||
pu1_pred,
|
||||
pu1_src,
|
||||
pred_strd,
|
||||
src_strd,
|
||||
trans_size,
|
||||
trans_size);
|
||||
trans_size,
|
||||
e_chroma_plane);
|
||||
}
|
||||
|
||||
/************************************************************************/
|
||||
|
|
|
|||
|
|
@ -221,7 +221,8 @@ typedef UWORD32 (*pf_res_trans_luma)(
|
|||
WORD16 *pi2_dst,
|
||||
WORD32 src_strd,
|
||||
WORD32 pred_strd,
|
||||
WORD32 dst_strd_chr_flag);
|
||||
WORD32 dst_strd,
|
||||
CHROMA_PLANE_ID_T e_chroma_plane);
|
||||
|
||||
typedef WORD32 (*pf_quant)(
|
||||
WORD16 *pi2_coeffs,
|
||||
|
|
|
|||
|
|
@ -451,7 +451,7 @@ void ihevce_pu_calc_4x4_blk(
|
|||
if(u1_use_satd)
|
||||
{
|
||||
ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr(
|
||||
pu1_src, &pred[0], (WORD32 *)pi2_tmp, pi2_trans_out, src_stride, 4, (4 << 16) | 0);
|
||||
pu1_src, &pred[0], (WORD32 *)pi2_tmp, pi2_trans_out, src_stride, 4, 4, NULL_PLANE);
|
||||
|
||||
sad = ihevce_ipe_pass_satd(pi2_trans_out, 4, 4);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -779,7 +779,7 @@ void ihevce_sao_analyse(
|
|||
distortion =
|
||||
ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma,
|
||||
s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride,
|
||||
s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, ctb_ht);
|
||||
s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, ctb_ht, NULL_PLANE);
|
||||
// clang-format on
|
||||
|
||||
ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx;
|
||||
|
|
@ -1145,7 +1145,8 @@ void ihevce_sao_analyse(
|
|||
ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma,
|
||||
s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride,
|
||||
s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd,
|
||||
ctb_ht);
|
||||
ctb_ht,
|
||||
NULL_PLANE);
|
||||
} // clang-format on
|
||||
|
||||
if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
|
||||
|
|
@ -1155,7 +1156,8 @@ void ihevce_sao_analyse(
|
|||
s_sao_ctxt.pu1_cur_chroma_recon_buf,
|
||||
chroma_src_stride,
|
||||
s_sao_ctxt.i4_cur_chroma_recon_stride, ctb_wd,
|
||||
(ctb_ht >> !u1_is_422));
|
||||
(ctb_ht >> !u1_is_422),
|
||||
NULL_PLANE);
|
||||
} // clang-format on
|
||||
|
||||
/*chroma distortion is added after correction because of lambda difference*/
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue